kernel/bpf/helpers.c

root/kernel/bpf/helpers.c
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
 */
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/bpf-cgroup.h>
#include <linux/cgroup.h>
#include <linux/rcupdate.h>
#include <linux/random.h>
#include <linux/smp.h>
#include <linux/topology.h>
#include <linux/ktime.h>
#include <linux/sched.h>
#include <linux/uidgid.h>
#include <linux/filter.h>
#include <linux/ctype.h>
#include <linux/jiffies.h>
#include <linux/pid_namespace.h>
#include <linux/poison.h>
#include <linux/proc_ns.h>
#include <linux/sched/task.h>
#include <linux/security.h>
#include <linux/btf_ids.h>
#include <linux/bpf_mem_alloc.h>
#include <linux/kasan.h>
#include <linux/bpf_verifier.h>
#include <linux/uaccess.h>
#include <linux/verification.h>
#include <linux/task_work.h>
#include <linux/irq_work.h>
#include <linux/buildid.h>

#include "../../lib/kstrtox.h"

/* If kernel subsystem is allowing eBPF programs to call this function,
 * inside its own verifier_ops->get_func_proto() callback it should return
 * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
 *
 * Different map implementations will rely on rcu in map methods
 * lookup/update/delete, therefore eBPF programs must run under rcu lock
 * if program is allowed to access maps, so check rcu_read_lock_held() or
 * rcu_read_lock_trace_held() in all three functions.
 */
BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
{
        WARN_ON_ONCE(!bpf_rcu_lock_held());
        return (unsigned long) map->ops->map_lookup_elem(map, key);
}

const struct bpf_func_proto bpf_map_lookup_elem_proto = {
        .func           = bpf_map_lookup_elem,
        .gpl_only       = false,
        .pkt_access     = true,
        .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
        .arg1_type      = ARG_CONST_MAP_PTR,
        .arg2_type      = ARG_PTR_TO_MAP_KEY,
};

BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
           void *, value, u64, flags)
{
        WARN_ON_ONCE(!bpf_rcu_lock_held());
        return map->ops->map_update_elem(map, key, value, flags);
}

const struct bpf_func_proto bpf_map_update_elem_proto = {
        .func           = bpf_map_update_elem,
        .gpl_only       = false,
        .pkt_access     = true,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_CONST_MAP_PTR,
        .arg2_type      = ARG_PTR_TO_MAP_KEY,
        .arg3_type      = ARG_PTR_TO_MAP_VALUE,
        .arg4_type      = ARG_ANYTHING,
};

BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
{
        WARN_ON_ONCE(!bpf_rcu_lock_held());
        return map->ops->map_delete_elem(map, key);
}

const struct bpf_func_proto bpf_map_delete_elem_proto = {
        .func           = bpf_map_delete_elem,
        .gpl_only       = false,
        .pkt_access     = true,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_CONST_MAP_PTR,
        .arg2_type      = ARG_PTR_TO_MAP_KEY,
};

BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
{
        return map->ops->map_push_elem(map, value, flags);
}

const struct bpf_func_proto bpf_map_push_elem_proto = {
        .func           = bpf_map_push_elem,
        .gpl_only       = false,
        .pkt_access     = true,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_CONST_MAP_PTR,
        .arg2_type      = ARG_PTR_TO_MAP_VALUE,
        .arg3_type      = ARG_ANYTHING,
};

BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
{
        return map->ops->map_pop_elem(map, value);
}

const struct bpf_func_proto bpf_map_pop_elem_proto = {
        .func           = bpf_map_pop_elem,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_CONST_MAP_PTR,
        .arg2_type      = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
};

BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
{
        return map->ops->map_peek_elem(map, value);
}

const struct bpf_func_proto bpf_map_peek_elem_proto = {
        .func           = bpf_map_peek_elem,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_CONST_MAP_PTR,
        .arg2_type      = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
};

BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
{
        WARN_ON_ONCE(!bpf_rcu_lock_held());
        return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu);
}

const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = {
        .func           = bpf_map_lookup_percpu_elem,
        .gpl_only       = false,
        .pkt_access     = true,
        .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
        .arg1_type      = ARG_CONST_MAP_PTR,
        .arg2_type      = ARG_PTR_TO_MAP_KEY,
        .arg3_type      = ARG_ANYTHING,
};

const struct bpf_func_proto bpf_get_prandom_u32_proto = {
        .func           = bpf_user_rnd_u32,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
};

BPF_CALL_0(bpf_get_smp_processor_id)
{
        return smp_processor_id();
}

const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
        .func           = bpf_get_smp_processor_id,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .allow_fastcall = true,
};

BPF_CALL_0(bpf_get_numa_node_id)
{
        return numa_node_id();
}

const struct bpf_func_proto bpf_get_numa_node_id_proto = {
        .func           = bpf_get_numa_node_id,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
};

BPF_CALL_0(bpf_ktime_get_ns)
{
        /* NMI safe access to clock monotonic */
        return ktime_get_mono_fast_ns();
}

const struct bpf_func_proto bpf_ktime_get_ns_proto = {
        .func           = bpf_ktime_get_ns,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
};

BPF_CALL_0(bpf_ktime_get_boot_ns)
{
        /* NMI safe access to clock boottime */
        return ktime_get_boot_fast_ns();
}

const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
        .func           = bpf_ktime_get_boot_ns,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
};

BPF_CALL_0(bpf_ktime_get_coarse_ns)
{
        return ktime_get_coarse_ns();
}

const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
        .func           = bpf_ktime_get_coarse_ns,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
};

BPF_CALL_0(bpf_ktime_get_tai_ns)
{
        /* NMI safe access to clock tai */
        return ktime_get_tai_fast_ns();
}

const struct bpf_func_proto bpf_ktime_get_tai_ns_proto = {
        .func           = bpf_ktime_get_tai_ns,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
};

BPF_CALL_0(bpf_get_current_pid_tgid)
{
        struct task_struct *task = current;

        if (unlikely(!task))
                return -EINVAL;

        return (u64) task->tgid << 32 | task->pid;
}

const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
        .func           = bpf_get_current_pid_tgid,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
};

BPF_CALL_0(bpf_get_current_uid_gid)
{
        struct task_struct *task = current;
        kuid_t uid;
        kgid_t gid;

        if (unlikely(!task))
                return -EINVAL;

        current_uid_gid(&uid, &gid);
        return (u64) from_kgid(&init_user_ns, gid) << 32 |
                     from_kuid(&init_user_ns, uid);
}

const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
        .func           = bpf_get_current_uid_gid,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
};

BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
{
        struct task_struct *task = current;

        if (unlikely(!task))
                goto err_clear;

        /* Verifier guarantees that size > 0 */
        strscpy_pad(buf, task->comm, size);
        return 0;
err_clear:
        memset(buf, 0, size);
        return -EINVAL;
}

const struct bpf_func_proto bpf_get_current_comm_proto = {
        .func           = bpf_get_current_comm,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
        .arg2_type      = ARG_CONST_SIZE,
};

#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)

static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
{
        arch_spinlock_t *l = (void *)lock;
        union {
                __u32 val;
                arch_spinlock_t lock;
        } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };

        compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
        BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
        BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
        preempt_disable();
        arch_spin_lock(l);
}

static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
{
        arch_spinlock_t *l = (void *)lock;

        arch_spin_unlock(l);
        preempt_enable();
}

#else

static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
{
        atomic_t *l = (void *)lock;

        BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
        do {
                atomic_cond_read_relaxed(l, !VAL);
        } while (atomic_xchg(l, 1));
}

static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
{
        atomic_t *l = (void *)lock;

        atomic_set_release(l, 0);
}

#endif

static DEFINE_PER_CPU(unsigned long, irqsave_flags);

static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
{
        unsigned long flags;

        local_irq_save(flags);
        __bpf_spin_lock(lock);
        __this_cpu_write(irqsave_flags, flags);
}

NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
{
        __bpf_spin_lock_irqsave(lock);
        return 0;
}

const struct bpf_func_proto bpf_spin_lock_proto = {
        .func           = bpf_spin_lock,
        .gpl_only       = false,
        .ret_type       = RET_VOID,
        .arg1_type      = ARG_PTR_TO_SPIN_LOCK,
        .arg1_btf_id    = BPF_PTR_POISON,
};

static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
{
        unsigned long flags;

        flags = __this_cpu_read(irqsave_flags);
        __bpf_spin_unlock(lock);
        local_irq_restore(flags);
}

NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
{
        __bpf_spin_unlock_irqrestore(lock);
        return 0;
}

const struct bpf_func_proto bpf_spin_unlock_proto = {
        .func           = bpf_spin_unlock,
        .gpl_only       = false,
        .ret_type       = RET_VOID,
        .arg1_type      = ARG_PTR_TO_SPIN_LOCK,
        .arg1_btf_id    = BPF_PTR_POISON,
};

void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
                           bool lock_src)
{
        struct bpf_spin_lock *lock;

        if (lock_src)
                lock = src + map->record->spin_lock_off;
        else
                lock = dst + map->record->spin_lock_off;
        preempt_disable();
        __bpf_spin_lock_irqsave(lock);
        copy_map_value(map, dst, src);
        __bpf_spin_unlock_irqrestore(lock);
        preempt_enable();
}

BPF_CALL_0(bpf_jiffies64)
{
        return get_jiffies_64();
}

const struct bpf_func_proto bpf_jiffies64_proto = {
        .func           = bpf_jiffies64,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
};

#ifdef CONFIG_CGROUPS
BPF_CALL_0(bpf_get_current_cgroup_id)
{
        struct cgroup *cgrp;
        u64 cgrp_id;

        rcu_read_lock();
        cgrp = task_dfl_cgroup(current);
        cgrp_id = cgroup_id(cgrp);
        rcu_read_unlock();

        return cgrp_id;
}

const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
        .func           = bpf_get_current_cgroup_id,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
};

BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
{
        struct cgroup *cgrp;
        struct cgroup *ancestor;
        u64 cgrp_id;

        rcu_read_lock();
        cgrp = task_dfl_cgroup(current);
        ancestor = cgroup_ancestor(cgrp, ancestor_level);
        cgrp_id = ancestor ? cgroup_id(ancestor) : 0;
        rcu_read_unlock();

        return cgrp_id;
}

const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
        .func           = bpf_get_current_ancestor_cgroup_id,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_ANYTHING,
};
#endif /* CONFIG_CGROUPS */

#define BPF_STRTOX_BASE_MASK 0x1F

static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
                          unsigned long long *res, bool *is_negative)
{
        unsigned int base = flags & BPF_STRTOX_BASE_MASK;
        const char *cur_buf = buf;
        size_t cur_len = buf_len;
        unsigned int consumed;
        size_t val_len;
        char str[64];

        if (!buf || !buf_len || !res || !is_negative)
                return -EINVAL;

        if (base != 0 && base != 8 && base != 10 && base != 16)
                return -EINVAL;

        if (flags & ~BPF_STRTOX_BASE_MASK)
                return -EINVAL;

        while (cur_buf < buf + buf_len && isspace(*cur_buf))
                ++cur_buf;

        *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
        if (*is_negative)
                ++cur_buf;

        consumed = cur_buf - buf;
        cur_len -= consumed;
        if (!cur_len)
                return -EINVAL;

        cur_len = min(cur_len, sizeof(str) - 1);
        memcpy(str, cur_buf, cur_len);
        str[cur_len] = '\0';
        cur_buf = str;

        cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
        val_len = _parse_integer(cur_buf, base, res);

        if (val_len & KSTRTOX_OVERFLOW)
                return -ERANGE;

        if (val_len == 0)
                return -EINVAL;

        cur_buf += val_len;
        consumed += cur_buf - str;

        return consumed;
}

static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
                         long long *res)
{
        unsigned long long _res;
        bool is_negative;
        int err;

        err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
        if (err < 0)
                return err;
        if (is_negative) {
                if ((long long)-_res > 0)
                        return -ERANGE;
                *res = -_res;
        } else {
                if ((long long)_res < 0)
                        return -ERANGE;
                *res = _res;
        }
        return err;
}

BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
           s64 *, res)
{
        long long _res;
        int err;

        *res = 0;
        err = __bpf_strtoll(buf, buf_len, flags, &_res);
        if (err < 0)
                return err;
        *res = _res;
        return err;
}

const struct bpf_func_proto bpf_strtol_proto = {
        .func           = bpf_strtol,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg2_type      = ARG_CONST_SIZE,
        .arg3_type      = ARG_ANYTHING,
        .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
        .arg4_size      = sizeof(s64),
};

BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
           u64 *, res)
{
        unsigned long long _res;
        bool is_negative;
        int err;

        *res = 0;
        err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
        if (err < 0)
                return err;
        if (is_negative)
                return -EINVAL;
        *res = _res;
        return err;
}

const struct bpf_func_proto bpf_strtoul_proto = {
        .func           = bpf_strtoul,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg2_type      = ARG_CONST_SIZE,
        .arg3_type      = ARG_ANYTHING,
        .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
        .arg4_size      = sizeof(u64),
};

BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2)
{
        return strncmp(s1, s2, s1_sz);
}

static const struct bpf_func_proto bpf_strncmp_proto = {
        .func           = bpf_strncmp,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg2_type      = ARG_CONST_SIZE,
        .arg3_type      = ARG_PTR_TO_CONST_STR,
};

BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
           struct bpf_pidns_info *, nsdata, u32, size)
{
        struct task_struct *task = current;
        struct pid_namespace *pidns;
        int err = -EINVAL;

        if (unlikely(size != sizeof(struct bpf_pidns_info)))
                goto clear;

        if (unlikely((u64)(dev_t)dev != dev))
                goto clear;

        if (unlikely(!task))
                goto clear;

        pidns = task_active_pid_ns(task);
        if (unlikely(!pidns)) {
                err = -ENOENT;
                goto clear;
        }

        if (!ns_match(&pidns->ns, (dev_t)dev, ino))
                goto clear;

        nsdata->pid = task_pid_nr_ns(task, pidns);
        nsdata->tgid = task_tgid_nr_ns(task, pidns);
        return 0;
clear:
        memset((void *)nsdata, 0, (size_t) size);
        return err;
}

const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
        .func           = bpf_get_ns_current_pid_tgid,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_ANYTHING,
        .arg2_type      = ARG_ANYTHING,
        .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
        .arg4_type      = ARG_CONST_SIZE,
};

static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
        .func           = bpf_get_raw_cpu_id,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
};

BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map,
           u64, flags, void *, data, u64, size)
{
        if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
                return -EINVAL;

        return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
}

const struct bpf_func_proto bpf_event_output_data_proto =  {
        .func           = bpf_event_output_data,
        .gpl_only       = true,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_CTX,
        .arg2_type      = ARG_CONST_MAP_PTR,
        .arg3_type      = ARG_ANYTHING,
        .arg4_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
};

BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
           const void __user *, user_ptr)
{
        int ret = copy_from_user(dst, user_ptr, size);

        if (unlikely(ret)) {
                memset(dst, 0, size);
                ret = -EFAULT;
        }

        return ret;
}

const struct bpf_func_proto bpf_copy_from_user_proto = {
        .func           = bpf_copy_from_user,
        .gpl_only       = false,
        .might_sleep    = true,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
        .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
        .arg3_type      = ARG_ANYTHING,
};

BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
           const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
{
        int ret;

        /* flags is not used yet */
        if (unlikely(flags))
                return -EINVAL;

        if (unlikely(!size))
                return 0;

        ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
        if (ret == size)
                return 0;

        memset(dst, 0, size);
        /* Return -EFAULT for partial read */
        return ret < 0 ? ret : -EFAULT;
}

const struct bpf_func_proto bpf_copy_from_user_task_proto = {
        .func           = bpf_copy_from_user_task,
        .gpl_only       = true,
        .might_sleep    = true,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
        .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
        .arg3_type      = ARG_ANYTHING,
        .arg4_type      = ARG_PTR_TO_BTF_ID,
        .arg4_btf_id    = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
        .arg5_type      = ARG_ANYTHING
};

BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
{
        if (cpu >= nr_cpu_ids)
                return (unsigned long)NULL;

        return (unsigned long)per_cpu_ptr((const void __percpu *)(const uintptr_t)ptr, cpu);
}

const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
        .func           = bpf_per_cpu_ptr,
        .gpl_only       = false,
        .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
        .arg1_type      = ARG_PTR_TO_PERCPU_BTF_ID,
        .arg2_type      = ARG_ANYTHING,
};

BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
{
        return (unsigned long)this_cpu_ptr((const void __percpu *)(const uintptr_t)percpu_ptr);
}

const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
        .func           = bpf_this_cpu_ptr,
        .gpl_only       = false,
        .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
        .arg1_type      = ARG_PTR_TO_PERCPU_BTF_ID,
};

static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
                size_t bufsz)
{
        void __user *user_ptr = (__force void __user *)unsafe_ptr;

        buf[0] = 0;

        switch (fmt_ptype) {
        case 's':
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
                if ((unsigned long)unsafe_ptr < TASK_SIZE)
                        return strncpy_from_user_nofault(buf, user_ptr, bufsz);
                fallthrough;
#endif
        case 'k':
                return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
        case 'u':
                return strncpy_from_user_nofault(buf, user_ptr, bufsz);
        }

        return -EINVAL;
}

/* Support executing three nested bprintf helper calls on a given CPU */
#define MAX_BPRINTF_NEST_LEVEL  3

static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs);
static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);

int bpf_try_get_buffers(struct bpf_bprintf_buffers **bufs)
{
        int nest_level;

        preempt_disable();
        nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
        if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
                this_cpu_dec(bpf_bprintf_nest_level);
                preempt_enable();
                return -EBUSY;
        }
        *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]);

        return 0;
}

void bpf_put_buffers(void)
{
        if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0))
                return;
        this_cpu_dec(bpf_bprintf_nest_level);
        preempt_enable();
}

void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
{
        if (!data->bin_args && !data->buf)
                return;
        bpf_put_buffers();
}

/*
 * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers
 *
 * Returns a negative value if fmt is an invalid format string or 0 otherwise.
 *
 * This can be used in two ways:
 * - Format string verification only: when data->get_bin_args is false
 * - Arguments preparation: in addition to the above verification, it writes in
 *   data->bin_args a binary representation of arguments usable by bstr_printf
 *   where pointers from BPF have been sanitized.
 *
 * In argument preparation mode, if 0 is returned, safe temporary buffers are
 * allocated and bpf_bprintf_cleanup should be called to free them after use.
 */
int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args,
                        u32 num_args, struct bpf_bprintf_data *data)
{
        bool get_buffers = (data->get_bin_args && num_args) || data->get_buf;
        char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
        struct bpf_bprintf_buffers *buffers = NULL;
        size_t sizeof_cur_arg, sizeof_cur_ip;
        int err, i, num_spec = 0;
        u64 cur_arg;
        char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";

        fmt_end = strnchr(fmt, fmt_size, 0);
        if (!fmt_end)
                return -EINVAL;
        fmt_size = fmt_end - fmt;

        if (get_buffers && bpf_try_get_buffers(&buffers))
                return -EBUSY;

        if (data->get_bin_args) {
                if (num_args)
                        tmp_buf = buffers->bin_args;
                tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS;
                data->bin_args = (u32 *)tmp_buf;
        }

        if (data->get_buf)
                data->buf = buffers->buf;

        for (i = 0; i < fmt_size; i++) {
                if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
                        err = -EINVAL;
                        goto out;
                }

                if (fmt[i] != '%')
                        continue;

                if (fmt[i + 1] == '%') {
                        i++;
                        continue;
                }

                if (num_spec >= num_args) {
                        err = -EINVAL;
                        goto out;
                }

                /* The string is zero-terminated so if fmt[i] != 0, we can
                 * always access fmt[i + 1], in the worst case it will be a 0
                 */
                i++;

                /* skip optional "[0 +-][num]" width formatting field */
                while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
                       fmt[i] == ' ')
                        i++;
                if (fmt[i] >= '1' && fmt[i] <= '9') {
                        i++;
                        while (fmt[i] >= '0' && fmt[i] <= '9')
                                i++;
                }

                if (fmt[i] == 'p') {
                        sizeof_cur_arg = sizeof(long);

                        if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
                            ispunct(fmt[i + 1])) {
                                if (tmp_buf)
                                        cur_arg = raw_args[num_spec];
                                goto nocopy_fmt;
                        }

                        if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
                            fmt[i + 2] == 's') {
                                fmt_ptype = fmt[i + 1];
                                i += 2;
                                goto fmt_str;
                        }

                        if (fmt[i + 1] == 'K' ||
                            fmt[i + 1] == 'x' || fmt[i + 1] == 's' ||
                            fmt[i + 1] == 'S') {
                                if (tmp_buf)
                                        cur_arg = raw_args[num_spec];
                                i++;
                                goto nocopy_fmt;
                        }

                        if (fmt[i + 1] == 'B') {
                                if (tmp_buf)  {
                                        err = snprintf(tmp_buf,
                                                       (tmp_buf_end - tmp_buf),
                                                       "%pB",
                                                       (void *)(long)raw_args[num_spec]);
                                        tmp_buf += (err + 1);
                                }

                                i++;
                                num_spec++;
                                continue;
                        }

                        /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
                        if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
                            (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
                                err = -EINVAL;
                                goto out;
                        }

                        i += 2;
                        if (!tmp_buf)
                                goto nocopy_fmt;

                        sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16;
                        if (tmp_buf_end - tmp_buf < sizeof_cur_ip) {
                                err = -ENOSPC;
                                goto out;
                        }

                        unsafe_ptr = (char *)(long)raw_args[num_spec];
                        err = copy_from_kernel_nofault(cur_ip, unsafe_ptr,
                                                       sizeof_cur_ip);
                        if (err < 0)
                                memset(cur_ip, 0, sizeof_cur_ip);

                        /* hack: bstr_printf expects IP addresses to be
                         * pre-formatted as strings, ironically, the easiest way
                         * to do that is to call snprintf.
                         */
                        ip_spec[2] = fmt[i - 1];
                        ip_spec[3] = fmt[i];
                        err = snprintf(tmp_buf, tmp_buf_end - tmp_buf,
                                       ip_spec, &cur_ip);

                        tmp_buf += err + 1;
                        num_spec++;

                        continue;
                } else if (fmt[i] == 's') {
                        fmt_ptype = fmt[i];
fmt_str:
                        if (fmt[i + 1] != 0 &&
                            !isspace(fmt[i + 1]) &&
                            !ispunct(fmt[i + 1])) {
                                err = -EINVAL;
                                goto out;
                        }

                        if (!tmp_buf)
                                goto nocopy_fmt;

                        if (tmp_buf_end == tmp_buf) {
                                err = -ENOSPC;
                                goto out;
                        }

                        unsafe_ptr = (char *)(long)raw_args[num_spec];
                        err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
                                                    fmt_ptype,
                                                    tmp_buf_end - tmp_buf);
                        if (err < 0) {
                                tmp_buf[0] = '\0';
                                err = 1;
                        }

                        tmp_buf += err;
                        num_spec++;

                        continue;
                } else if (fmt[i] == 'c') {
                        if (!tmp_buf)
                                goto nocopy_fmt;

                        if (tmp_buf_end == tmp_buf) {
                                err = -ENOSPC;
                                goto out;
                        }

                        *tmp_buf = raw_args[num_spec];
                        tmp_buf++;
                        num_spec++;

                        continue;
                }

                sizeof_cur_arg = sizeof(int);

                if (fmt[i] == 'l') {
                        sizeof_cur_arg = sizeof(long);
                        i++;
                }
                if (fmt[i] == 'l') {
                        sizeof_cur_arg = sizeof(long long);
                        i++;
                }

                if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
                    fmt[i] != 'x' && fmt[i] != 'X') {
                        err = -EINVAL;
                        goto out;
                }

                if (tmp_buf)
                        cur_arg = raw_args[num_spec];
nocopy_fmt:
                if (tmp_buf) {
                        tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32));
                        if (tmp_buf_end - tmp_buf < sizeof_cur_arg) {
                                err = -ENOSPC;
                                goto out;
                        }

                        if (sizeof_cur_arg == 8) {
                                *(u32 *)tmp_buf = *(u32 *)&cur_arg;
                                *(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1);
                        } else {
                                *(u32 *)tmp_buf = (u32)(long)cur_arg;
                        }
                        tmp_buf += sizeof_cur_arg;
                }
                num_spec++;
        }

        err = 0;
out:
        if (err)
                bpf_bprintf_cleanup(data);
        return err;
}

BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
           const void *, args, u32, data_len)
{
        struct bpf_bprintf_data data = {
                .get_bin_args   = true,
        };
        int err, num_args;

        if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 ||
            (data_len && !args))
                return -EINVAL;
        num_args = data_len / 8;

        /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
         * can safely give an unbounded size.
         */
        err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data);
        if (err < 0)
                return err;

        err = bstr_printf(str, str_size, fmt, data.bin_args);

        bpf_bprintf_cleanup(&data);

        return err + 1;
}

const struct bpf_func_proto bpf_snprintf_proto = {
        .func           = bpf_snprintf,
        .gpl_only       = true,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_MEM_OR_NULL | MEM_WRITE,
        .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
        .arg3_type      = ARG_PTR_TO_CONST_STR,
        .arg4_type      = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
};

static void *map_key_from_value(struct bpf_map *map, void *value, u32 *arr_idx)
{
        if (map->map_type == BPF_MAP_TYPE_ARRAY) {
                struct bpf_array *array = container_of(map, struct bpf_array, map);

                *arr_idx = ((char *)value - array->value) / array->elem_size;
                return arr_idx;
        }
        return (void *)value - round_up(map->key_size, 8);
}

enum bpf_async_type {
        BPF_ASYNC_TYPE_TIMER = 0,
        BPF_ASYNC_TYPE_WQ,
};

enum bpf_async_op {
        BPF_ASYNC_START,
        BPF_ASYNC_CANCEL
};

struct bpf_async_cmd {
        struct llist_node node;
        u64 nsec;
        u32 mode;
        enum bpf_async_op op;
};

struct bpf_async_cb {
        struct bpf_map *map;
        struct bpf_prog *prog;
        void __rcu *callback_fn;
        void *value;
        struct rcu_head rcu;
        u64 flags;
        struct irq_work worker;
        refcount_t refcnt;
        enum bpf_async_type type;
        struct llist_head async_cmds;
};

/* BPF map elements can contain 'struct bpf_timer'.
 * Such map owns all of its BPF timers.
 * 'struct bpf_timer' is allocated as part of map element allocation
 * and it's zero initialized.
 * That space is used to keep 'struct bpf_async_kern'.
 * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
 * remembers 'struct bpf_map *' pointer it's part of.
 * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
 * bpf_timer_start() arms the timer.
 * If user space reference to a map goes to zero at this point
 * ops->map_release_uref callback is responsible for cancelling the timers,
 * freeing their memory, and decrementing prog's refcnts.
 * bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
 * Inner maps can contain bpf timers as well. ops->map_release_uref is
 * freeing the timers when inner map is replaced or deleted by user space.
 */
struct bpf_hrtimer {
        struct bpf_async_cb cb;
        struct hrtimer timer;
        atomic_t cancelling;
};

struct bpf_work {
        struct bpf_async_cb cb;
        struct work_struct work;
};

/* the actual struct hidden inside uapi struct bpf_timer and bpf_wq */
struct bpf_async_kern {
        union {
                struct bpf_async_cb *cb;
                struct bpf_hrtimer *timer;
                struct bpf_work *work;
        };
} __attribute__((aligned(8)));

static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);

static void bpf_async_refcount_put(struct bpf_async_cb *cb);

static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
{
        struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
        struct bpf_map *map = t->cb.map;
        void *value = t->cb.value;
        bpf_callback_t callback_fn;
        void *key;
        u32 idx;

        BTF_TYPE_EMIT(struct bpf_timer);
        callback_fn = rcu_dereference_check(t->cb.callback_fn, rcu_read_lock_bh_held());
        if (!callback_fn)
                goto out;

        /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
         * cannot be preempted by another bpf_timer_cb() on the same cpu.
         * Remember the timer this callback is servicing to prevent
         * deadlock if callback_fn() calls bpf_timer_cancel() or
         * bpf_map_delete_elem() on the same timer.
         */
        this_cpu_write(hrtimer_running, t);

        key = map_key_from_value(map, value, &idx);

        callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);
        /* The verifier checked that return value is zero. */

        this_cpu_write(hrtimer_running, NULL);
out:
        return HRTIMER_NORESTART;
}

static void bpf_wq_work(struct work_struct *work)
{
        struct bpf_work *w = container_of(work, struct bpf_work, work);
        struct bpf_async_cb *cb = &w->cb;
        struct bpf_map *map = cb->map;
        bpf_callback_t callback_fn;
        void *value = cb->value;
        void *key;
        u32 idx;

        BTF_TYPE_EMIT(struct bpf_wq);

        callback_fn = READ_ONCE(cb->callback_fn);
        if (!callback_fn)
                return;

        key = map_key_from_value(map, value, &idx);

        rcu_read_lock_trace();
        migrate_disable();

        callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);

        migrate_enable();
        rcu_read_unlock_trace();
}

static void bpf_async_cb_rcu_free(struct rcu_head *rcu)
{
        struct bpf_async_cb *cb = container_of(rcu, struct bpf_async_cb, rcu);

        /*
         * Drop the last reference to prog only after RCU GP, as set_callback()
         * may race with cancel_and_free()
         */
        if (cb->prog)
                bpf_prog_put(cb->prog);

        kfree_nolock(cb);
}

/* Callback from call_rcu_tasks_trace, chains to call_rcu for final free */
static void bpf_async_cb_rcu_tasks_trace_free(struct rcu_head *rcu)
{
        struct bpf_async_cb *cb = container_of(rcu, struct bpf_async_cb, rcu);
        struct bpf_hrtimer *t = container_of(cb, struct bpf_hrtimer, cb);
        struct bpf_work *w = container_of(cb, struct bpf_work, cb);
        bool retry = false;

        /*
         * bpf_async_cancel_and_free() tried to cancel timer/wq, but it
         * could have raced with timer/wq_start. Now refcnt is zero and
         * srcu/rcu GP completed. Cancel timer/wq again.
         */
        switch (cb->type) {
        case BPF_ASYNC_TYPE_TIMER:
                if (hrtimer_try_to_cancel(&t->timer) < 0)
                        retry = true;
                break;
        case BPF_ASYNC_TYPE_WQ:
                if (!cancel_work(&w->work) && work_busy(&w->work))
                        retry = true;
                break;
        }
        if (retry) {
                /*
                 * hrtimer or wq callback may still be running. It must be
                 * in rcu_tasks_trace or rcu CS, so wait for GP again.
                 * It won't retry forever, since refcnt zero prevents all
                 * operations on timer/wq.
                 */
                call_rcu_tasks_trace(&cb->rcu, bpf_async_cb_rcu_tasks_trace_free);
                return;
        }

        /* rcu_trace_implies_rcu_gp() is true and will remain so */
        bpf_async_cb_rcu_free(rcu);
}

static void worker_for_call_rcu(struct irq_work *work)
{
        struct bpf_async_cb *cb = container_of(work, struct bpf_async_cb, worker);

        call_rcu_tasks_trace(&cb->rcu, bpf_async_cb_rcu_tasks_trace_free);
}

static void bpf_async_refcount_put(struct bpf_async_cb *cb)
{
        if (!refcount_dec_and_test(&cb->refcnt))
                return;

        if (irqs_disabled()) {
                cb->worker = IRQ_WORK_INIT(worker_for_call_rcu);
                irq_work_queue(&cb->worker);
        } else {
                call_rcu_tasks_trace(&cb->rcu, bpf_async_cb_rcu_tasks_trace_free);
        }
}

static void bpf_async_cancel_and_free(struct bpf_async_kern *async);
static void bpf_async_irq_worker(struct irq_work *work);

static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
                            enum bpf_async_type type)
{
        struct bpf_async_cb *cb, *old_cb;
        struct bpf_hrtimer *t;
        struct bpf_work *w;
        clockid_t clockid;
        size_t size;

        switch (type) {
        case BPF_ASYNC_TYPE_TIMER:
                size = sizeof(struct bpf_hrtimer);
                break;
        case BPF_ASYNC_TYPE_WQ:
                size = sizeof(struct bpf_work);
                break;
        default:
                return -EINVAL;
        }

        old_cb = READ_ONCE(async->cb);
        if (old_cb)
                return -EBUSY;

        cb = bpf_map_kmalloc_nolock(map, size, 0, map->numa_node);
        if (!cb)
                return -ENOMEM;

        switch (type) {
        case BPF_ASYNC_TYPE_TIMER:
                clockid = flags & (MAX_CLOCKS - 1);
                t = (struct bpf_hrtimer *)cb;

                atomic_set(&t->cancelling, 0);
                hrtimer_setup(&t->timer, bpf_timer_cb, clockid, HRTIMER_MODE_REL_SOFT);
                cb->value = (void *)async - map->record->timer_off;
                break;
        case BPF_ASYNC_TYPE_WQ:
                w = (struct bpf_work *)cb;

                INIT_WORK(&w->work, bpf_wq_work);
                cb->value = (void *)async - map->record->wq_off;
                break;
        }
        cb->map = map;
        cb->prog = NULL;
        cb->flags = flags;
        cb->worker = IRQ_WORK_INIT(bpf_async_irq_worker);
        init_llist_head(&cb->async_cmds);
        refcount_set(&cb->refcnt, 1); /* map's reference */
        cb->type = type;
        rcu_assign_pointer(cb->callback_fn, NULL);

        old_cb = cmpxchg(&async->cb, NULL, cb);
        if (old_cb) {
                /* Lost the race to initialize this bpf_async_kern, drop the allocated object */
                kfree_nolock(cb);
                return -EBUSY;
        }
        /* Guarantee the order between async->cb and map->usercnt. So
         * when there are concurrent uref release and bpf timer init, either
         * bpf_timer_cancel_and_free() called by uref release reads a no-NULL
         * timer or atomic64_read() below returns a zero usercnt.
         */
        smp_mb();
        if (!atomic64_read(&map->usercnt)) {
                /* maps with timers must be either held by user space
                 * or pinned in bpffs.
                 */
                bpf_async_cancel_and_free(async);
                return -EPERM;
        }

        return 0;
}

BPF_CALL_3(bpf_timer_init, struct bpf_async_kern *, timer, struct bpf_map *, map,
           u64, flags)
{
        clock_t clockid = flags & (MAX_CLOCKS - 1);

        BUILD_BUG_ON(MAX_CLOCKS != 16);
        BUILD_BUG_ON(sizeof(struct bpf_async_kern) > sizeof(struct bpf_timer));
        BUILD_BUG_ON(__alignof__(struct bpf_async_kern) != __alignof__(struct bpf_timer));

        if (flags >= MAX_CLOCKS ||
            /* similar to timerfd except _ALARM variants are not supported */
            (clockid != CLOCK_MONOTONIC &&
             clockid != CLOCK_REALTIME &&
             clockid != CLOCK_BOOTTIME))
                return -EINVAL;

        return __bpf_async_init(timer, map, flags, BPF_ASYNC_TYPE_TIMER);
}

static const struct bpf_func_proto bpf_timer_init_proto = {
        .func           = bpf_timer_init,
        .gpl_only       = true,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_TIMER,
        .arg2_type      = ARG_CONST_MAP_PTR,
        .arg3_type      = ARG_ANYTHING,
};

static int bpf_async_update_prog_callback(struct bpf_async_cb *cb,
                                          struct bpf_prog *prog,
                                          void *callback_fn)
{
        struct bpf_prog *prev;

        /* Acquire a guard reference on prog to prevent it from being freed during the loop */
        if (prog) {
                prog = bpf_prog_inc_not_zero(prog);
                if (IS_ERR(prog))
                        return PTR_ERR(prog);
        }

        do {
                if (prog)
                        prog = bpf_prog_inc_not_zero(prog);
                prev = xchg(&cb->prog, prog);
                rcu_assign_pointer(cb->callback_fn, callback_fn);

                /*
                 * Release previous prog, make sure that if other CPU is contending,
                 * to set bpf_prog, references are not leaked as each iteration acquires and
                 * releases one reference.
                 */
                if (prev)
                        bpf_prog_put(prev);

        } while (READ_ONCE(cb->prog) != prog ||
                 (void __force *)READ_ONCE(cb->callback_fn) != callback_fn);

        if (prog)
                bpf_prog_put(prog);

        return 0;
}

static DEFINE_PER_CPU(struct bpf_async_cb *, async_cb_running);

static int bpf_async_schedule_op(struct bpf_async_cb *cb, enum bpf_async_op op,
                                 u64 nsec, u32 timer_mode)
{
        /*
         * Do not schedule another operation on this cpu if it's in irq_work
         * callback that is processing async_cmds queue. Otherwise the following
         * loop is possible:
         * bpf_timer_start() -> bpf_async_schedule_op() -> irq_work_queue().
         * irqrestore -> bpf_async_irq_worker() -> tracepoint -> bpf_timer_start().
         */
        if (this_cpu_read(async_cb_running) == cb) {
                bpf_async_refcount_put(cb);
                return -EDEADLK;
        }

        struct bpf_async_cmd *cmd = kmalloc_nolock(sizeof(*cmd), 0, NUMA_NO_NODE);

        if (!cmd) {
                bpf_async_refcount_put(cb);
                return -ENOMEM;
        }
        init_llist_node(&cmd->node);
        cmd->nsec = nsec;
        cmd->mode = timer_mode;
        cmd->op = op;
        if (llist_add(&cmd->node, &cb->async_cmds))
                irq_work_queue(&cb->worker);
        return 0;
}

static int __bpf_async_set_callback(struct bpf_async_kern *async, void *callback_fn,
                                    struct bpf_prog *prog)
{
        struct bpf_async_cb *cb;

        cb = READ_ONCE(async->cb);
        if (!cb)
                return -EINVAL;

        return bpf_async_update_prog_callback(cb, prog, callback_fn);
}

BPF_CALL_3(bpf_timer_set_callback, struct bpf_async_kern *, timer, void *, callback_fn,
           struct bpf_prog_aux *, aux)
{
        return __bpf_async_set_callback(timer, callback_fn, aux->prog);
}

static const struct bpf_func_proto bpf_timer_set_callback_proto = {
        .func           = bpf_timer_set_callback,
        .gpl_only       = true,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_TIMER,
        .arg2_type      = ARG_PTR_TO_FUNC,
};

static bool defer_timer_wq_op(void)
{
        return in_hardirq() || irqs_disabled();
}

BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, async, u64, nsecs, u64, flags)
{
        struct bpf_hrtimer *t;
        u32 mode;

        if (flags & ~(BPF_F_TIMER_ABS | BPF_F_TIMER_CPU_PIN))
                return -EINVAL;

        t = READ_ONCE(async->timer);
        if (!t || !READ_ONCE(t->cb.prog))
                return -EINVAL;

        if (flags & BPF_F_TIMER_ABS)
                mode = HRTIMER_MODE_ABS_SOFT;
        else
                mode = HRTIMER_MODE_REL_SOFT;

        if (flags & BPF_F_TIMER_CPU_PIN)
                mode |= HRTIMER_MODE_PINNED;

        /*
         * bpf_async_cancel_and_free() could have dropped refcnt to zero. In
         * such case BPF progs are not allowed to arm the timer to prevent UAF.
         */
        if (!refcount_inc_not_zero(&t->cb.refcnt))
                return -ENOENT;

        if (!defer_timer_wq_op()) {
                hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode);
                bpf_async_refcount_put(&t->cb);
                return 0;
        } else {
                return bpf_async_schedule_op(&t->cb, BPF_ASYNC_START, nsecs, mode);
        }
}

static const struct bpf_func_proto bpf_timer_start_proto = {
        .func           = bpf_timer_start,
        .gpl_only       = true,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_TIMER,
        .arg2_type      = ARG_ANYTHING,
        .arg3_type      = ARG_ANYTHING,
};

BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, async)
{
        struct bpf_hrtimer *t, *cur_t;
        bool inc = false;
        int ret = 0;

        if (defer_timer_wq_op())
                return -EOPNOTSUPP;

        t = READ_ONCE(async->timer);
        if (!t)
                return -EINVAL;

        cur_t = this_cpu_read(hrtimer_running);
        if (cur_t == t) {
                /* If bpf callback_fn is trying to bpf_timer_cancel()
                 * its own timer the hrtimer_cancel() will deadlock
                 * since it waits for callback_fn to finish.
                 */
                return -EDEADLK;
        }

        /* Only account in-flight cancellations when invoked from a timer
         * callback, since we want to avoid waiting only if other _callbacks_
         * are waiting on us, to avoid introducing lockups. Non-callback paths
         * are ok, since nobody would synchronously wait for their completion.
         */
        if (!cur_t)
                goto drop;
        atomic_inc(&t->cancelling);
        /* Need full barrier after relaxed atomic_inc */
        smp_mb__after_atomic();
        inc = true;
        if (atomic_read(&cur_t->cancelling)) {
                /* We're cancelling timer t, while some other timer callback is
                 * attempting to cancel us. In such a case, it might be possible
                 * that timer t belongs to the other callback, or some other
                 * callback waiting upon it (creating transitive dependencies
                 * upon us), and we will enter a deadlock if we continue
                 * cancelling and waiting for it synchronously, since it might
                 * do the same. Bail!
                 */
                atomic_dec(&t->cancelling);
                return -EDEADLK;
        }
drop:
        bpf_async_update_prog_callback(&t->cb, NULL, NULL);
        /* Cancel the timer and wait for associated callback to finish
         * if it was running.
         */
        ret = hrtimer_cancel(&t->timer);
        if (inc)
                atomic_dec(&t->cancelling);
        return ret;
}

static const struct bpf_func_proto bpf_timer_cancel_proto = {
        .func           = bpf_timer_cancel,
        .gpl_only       = true,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_TIMER,
};

static void bpf_async_process_op(struct bpf_async_cb *cb, u32 op,
                                 u64 timer_nsec, u32 timer_mode)
{
        switch (cb->type) {
        case BPF_ASYNC_TYPE_TIMER: {
                struct bpf_hrtimer *t = container_of(cb, struct bpf_hrtimer, cb);

                switch (op) {
                case BPF_ASYNC_START:
                        hrtimer_start(&t->timer, ns_to_ktime(timer_nsec), timer_mode);
                        break;
                case BPF_ASYNC_CANCEL:
                        hrtimer_try_to_cancel(&t->timer);
                        break;
                }
                break;
        }
        case BPF_ASYNC_TYPE_WQ: {
                struct bpf_work *w = container_of(cb, struct bpf_work, cb);

                switch (op) {
                case BPF_ASYNC_START:
                        schedule_work(&w->work);
                        break;
                case BPF_ASYNC_CANCEL:
                        cancel_work(&w->work);
                        break;
                }
                break;
        }
        }
        bpf_async_refcount_put(cb);
}

static void bpf_async_irq_worker(struct irq_work *work)
{
        struct bpf_async_cb *cb = container_of(work, struct bpf_async_cb, worker);
        struct llist_node *pos, *n, *list;

        list = llist_del_all(&cb->async_cmds);
        if (!list)
                return;

        list = llist_reverse_order(list);
        this_cpu_write(async_cb_running, cb);
        llist_for_each_safe(pos, n, list) {
                struct bpf_async_cmd *cmd;

                cmd = container_of(pos, struct bpf_async_cmd, node);
                bpf_async_process_op(cb, cmd->op, cmd->nsec, cmd->mode);
                kfree_nolock(cmd);
        }
        this_cpu_write(async_cb_running, NULL);
}

static void bpf_async_cancel_and_free(struct bpf_async_kern *async)
{
        struct bpf_async_cb *cb;

        if (!READ_ONCE(async->cb))
                return;

        cb = xchg(&async->cb, NULL);
        if (!cb)
                return;

        bpf_async_update_prog_callback(cb, NULL, NULL);
        /*
         * No refcount_inc_not_zero(&cb->refcnt) here. Dropping the last
         * refcnt. Either synchronously or asynchronously in irq_work.
         */

        if (!defer_timer_wq_op()) {
                bpf_async_process_op(cb, BPF_ASYNC_CANCEL, 0, 0);
        } else {
                (void)bpf_async_schedule_op(cb, BPF_ASYNC_CANCEL, 0, 0);
                /*
                 * bpf_async_schedule_op() either enqueues allocated cmd into llist
                 * or fails with ENOMEM and drop the last refcnt.
                 * This is unlikely, but safe, since bpf_async_cb_rcu_tasks_trace_free()
                 * callback will do additional timer/wq_cancel due to races anyway.
                 */
        }
}

/*
 * This function is called by map_delete/update_elem for individual element and
 * by ops->map_release_uref when the user space reference to a map reaches zero.
 */
void bpf_timer_cancel_and_free(void *val)
{
        bpf_async_cancel_and_free(val);
}

/*
 * This function is called by map_delete/update_elem for individual element and
 * by ops->map_release_uref when the user space reference to a map reaches zero.
 */
void bpf_wq_cancel_and_free(void *val)
{
        bpf_async_cancel_and_free(val);
}

BPF_CALL_2(bpf_kptr_xchg, void *, dst, void *, ptr)
{
        unsigned long *kptr = dst;

        /* This helper may be inlined by verifier. */
        return xchg(kptr, (unsigned long)ptr);
}

/* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg()
 * helper is determined dynamically by the verifier. Use BPF_PTR_POISON to
 * denote type that verifier will determine.
 */
static const struct bpf_func_proto bpf_kptr_xchg_proto = {
        .func         = bpf_kptr_xchg,
        .gpl_only     = false,
        .ret_type     = RET_PTR_TO_BTF_ID_OR_NULL,
        .ret_btf_id   = BPF_PTR_POISON,
        .arg1_type    = ARG_KPTR_XCHG_DEST,
        .arg2_type    = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
        .arg2_btf_id  = BPF_PTR_POISON,
};

struct bpf_dynptr_file_impl {
        struct freader freader;
        /* 64 bit offset and size overriding 32 bit ones in bpf_dynptr_kern */
        u64 offset;
        u64 size;
};

/* Since the upper 8 bits of dynptr->size is reserved, the
 * maximum supported size is 2^24 - 1.
 */
#define DYNPTR_MAX_SIZE ((1UL << 24) - 1)
#define DYNPTR_TYPE_SHIFT       28
#define DYNPTR_SIZE_MASK        0xFFFFFF
#define DYNPTR_RDONLY_BIT       BIT(31)

bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
{
        return ptr->size & DYNPTR_RDONLY_BIT;
}

void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
{
        ptr->size |= DYNPTR_RDONLY_BIT;
}

static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
{
        ptr->size |= type << DYNPTR_TYPE_SHIFT;
}

static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
{
        return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
}

u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
{
        if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) {
                struct bpf_dynptr_file_impl *df = ptr->data;

                return df->size;
        }

        return ptr->size & DYNPTR_SIZE_MASK;
}

static void bpf_dynptr_advance_offset(struct bpf_dynptr_kern *ptr, u64 off)
{
        if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) {
                struct bpf_dynptr_file_impl *df = ptr->data;

                df->offset += off;
                return;
        }
        ptr->offset += off;
}

static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u64 new_size)
{
        u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;

        if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) {
                struct bpf_dynptr_file_impl *df = ptr->data;

                df->size = new_size;
                return;
        }
        ptr->size = (u32)new_size | metadata;
}

int bpf_dynptr_check_size(u64 size)
{
        return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
}

static int bpf_file_fetch_bytes(struct bpf_dynptr_file_impl *df, u64 offset, void *buf, u64 len)
{
        const void *ptr;

        if (!buf)
                return -EINVAL;

        df->freader.buf = buf;
        df->freader.buf_sz = len;
        ptr = freader_fetch(&df->freader, offset + df->offset, len);
        if (!ptr)
                return df->freader.err;

        if (ptr != buf) /* Force copying into the buffer */
                memcpy(buf, ptr, len);

        return 0;
}

void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
                     enum bpf_dynptr_type type, u32 offset, u32 size)
{
        ptr->data = data;
        ptr->offset = offset;
        ptr->size = size;
        bpf_dynptr_set_type(ptr, type);
}

void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
{
        memset(ptr, 0, sizeof(*ptr));
}

BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u64, size, u64, flags, struct bpf_dynptr_kern *, ptr)
{
        int err;

        BTF_TYPE_EMIT(struct bpf_dynptr);

        err = bpf_dynptr_check_size(size);
        if (err)
                goto error;

        /* flags is currently unsupported */
        if (flags) {
                err = -EINVAL;
                goto error;
        }

        bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size);

        return 0;

error:
        bpf_dynptr_set_null(ptr);
        return err;
}

static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
        .func           = bpf_dynptr_from_mem,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
        .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
        .arg3_type      = ARG_ANYTHING,
        .arg4_type      = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE,
};

static int __bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr_kern *src,
                             u64 offset, u64 flags)
{
        enum bpf_dynptr_type type;
        int err;

        if (!src->data || flags)
                return -EINVAL;

        err = bpf_dynptr_check_off_len(src, offset, len);
        if (err)
                return err;

        type = bpf_dynptr_get_type(src);

        switch (type) {
        case BPF_DYNPTR_TYPE_LOCAL:
        case BPF_DYNPTR_TYPE_RINGBUF:
                /* Source and destination may possibly overlap, hence use memmove to
                 * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
                 * pointing to overlapping PTR_TO_MAP_VALUE regions.
                 */
                memmove(dst, src->data + src->offset + offset, len);
                return 0;
        case BPF_DYNPTR_TYPE_SKB:
                return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
        case BPF_DYNPTR_TYPE_XDP:
                return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len);
        case BPF_DYNPTR_TYPE_SKB_META:
                memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len);
                return 0;
        case BPF_DYNPTR_TYPE_FILE:
                return bpf_file_fetch_bytes(src->data, offset, dst, len);
        default:
                WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
                return -EFAULT;
        }
}

BPF_CALL_5(bpf_dynptr_read, void *, dst, u64, len, const struct bpf_dynptr_kern *, src,
           u64, offset, u64, flags)
{
        return __bpf_dynptr_read(dst, len, src, offset, flags);
}

static const struct bpf_func_proto bpf_dynptr_read_proto = {
        .func           = bpf_dynptr_read,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
        .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
        .arg3_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
        .arg4_type      = ARG_ANYTHING,
        .arg5_type      = ARG_ANYTHING,
};

int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, void *src,
                       u64 len, u64 flags)
{
        enum bpf_dynptr_type type;
        int err;

        if (!dst->data || __bpf_dynptr_is_rdonly(dst))
                return -EINVAL;

        err = bpf_dynptr_check_off_len(dst, offset, len);
        if (err)
                return err;

        type = bpf_dynptr_get_type(dst);

        switch (type) {
        case BPF_DYNPTR_TYPE_LOCAL:
        case BPF_DYNPTR_TYPE_RINGBUF:
                if (flags)
                        return -EINVAL;
                /* Source and destination may possibly overlap, hence use memmove to
                 * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
                 * pointing to overlapping PTR_TO_MAP_VALUE regions.
                 */
                memmove(dst->data + dst->offset + offset, src, len);
                return 0;
        case BPF_DYNPTR_TYPE_SKB:
                return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
                                             flags);
        case BPF_DYNPTR_TYPE_XDP:
                if (flags)
                        return -EINVAL;
                return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len);
        case BPF_DYNPTR_TYPE_SKB_META:
                return __bpf_skb_meta_store_bytes(dst->data, dst->offset + offset, src,
                                                  len, flags);
        default:
                WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
                return -EFAULT;
        }
}

BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u64, offset, void *, src,
           u64, len, u64, flags)
{
        return __bpf_dynptr_write(dst, offset, src, len, flags);
}

static const struct bpf_func_proto bpf_dynptr_write_proto = {
        .func           = bpf_dynptr_write,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
        .arg2_type      = ARG_ANYTHING,
        .arg3_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
        .arg4_type      = ARG_CONST_SIZE_OR_ZERO,
        .arg5_type      = ARG_ANYTHING,
};

BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u64, offset, u64, len)
{
        enum bpf_dynptr_type type;
        int err;

        if (!ptr->data)
                return 0;

        err = bpf_dynptr_check_off_len(ptr, offset, len);
        if (err)
                return 0;

        if (__bpf_dynptr_is_rdonly(ptr))
                return 0;

        type = bpf_dynptr_get_type(ptr);

        switch (type) {
        case BPF_DYNPTR_TYPE_LOCAL:
        case BPF_DYNPTR_TYPE_RINGBUF:
                return (unsigned long)(ptr->data + ptr->offset + offset);
        case BPF_DYNPTR_TYPE_SKB:
        case BPF_DYNPTR_TYPE_XDP:
        case BPF_DYNPTR_TYPE_SKB_META:
                /* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
                return 0;
        default:
                WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
                return 0;
        }
}

static const struct bpf_func_proto bpf_dynptr_data_proto = {
        .func           = bpf_dynptr_data,
        .gpl_only       = false,
        .ret_type       = RET_PTR_TO_DYNPTR_MEM_OR_NULL,
        .arg1_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
        .arg2_type      = ARG_ANYTHING,
        .arg3_type      = ARG_CONST_ALLOC_SIZE_OR_ZERO,
};

const struct bpf_func_proto bpf_get_current_task_proto __weak;
const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
const struct bpf_func_proto bpf_perf_event_read_proto __weak;
const struct bpf_func_proto bpf_send_signal_proto __weak;
const struct bpf_func_proto bpf_send_signal_thread_proto __weak;
const struct bpf_func_proto bpf_get_task_stack_sleepable_proto __weak;
const struct bpf_func_proto bpf_get_task_stack_proto __weak;
const struct bpf_func_proto bpf_get_branch_snapshot_proto __weak;

const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
        switch (func_id) {
        case BPF_FUNC_map_lookup_elem:
                return &bpf_map_lookup_elem_proto;
        case BPF_FUNC_map_update_elem:
                return &bpf_map_update_elem_proto;
        case BPF_FUNC_map_delete_elem:
                return &bpf_map_delete_elem_proto;
        case BPF_FUNC_map_push_elem:
                return &bpf_map_push_elem_proto;
        case BPF_FUNC_map_pop_elem:
                return &bpf_map_pop_elem_proto;
        case BPF_FUNC_map_peek_elem:
                return &bpf_map_peek_elem_proto;
        case BPF_FUNC_map_lookup_percpu_elem:
                return &bpf_map_lookup_percpu_elem_proto;
        case BPF_FUNC_get_prandom_u32:
                return &bpf_get_prandom_u32_proto;
        case BPF_FUNC_get_smp_processor_id:
                return &bpf_get_raw_smp_processor_id_proto;
        case BPF_FUNC_get_numa_node_id:
                return &bpf_get_numa_node_id_proto;
        case BPF_FUNC_tail_call:
                return &bpf_tail_call_proto;
        case BPF_FUNC_ktime_get_ns:
                return &bpf_ktime_get_ns_proto;
        case BPF_FUNC_ktime_get_boot_ns:
                return &bpf_ktime_get_boot_ns_proto;
        case BPF_FUNC_ktime_get_tai_ns:
                return &bpf_ktime_get_tai_ns_proto;
        case BPF_FUNC_ringbuf_output:
                return &bpf_ringbuf_output_proto;
        case BPF_FUNC_ringbuf_reserve:
                return &bpf_ringbuf_reserve_proto;
        case BPF_FUNC_ringbuf_submit:
                return &bpf_ringbuf_submit_proto;
        case BPF_FUNC_ringbuf_discard:
                return &bpf_ringbuf_discard_proto;
        case BPF_FUNC_ringbuf_query:
                return &bpf_ringbuf_query_proto;
        case BPF_FUNC_strncmp:
                return &bpf_strncmp_proto;
        case BPF_FUNC_strtol:
                return &bpf_strtol_proto;
        case BPF_FUNC_strtoul:
                return &bpf_strtoul_proto;
        case BPF_FUNC_get_current_pid_tgid:
                return &bpf_get_current_pid_tgid_proto;
        case BPF_FUNC_get_ns_current_pid_tgid:
                return &bpf_get_ns_current_pid_tgid_proto;
        case BPF_FUNC_get_current_uid_gid:
                return &bpf_get_current_uid_gid_proto;
        default:
                break;
        }

        if (!bpf_token_capable(prog->aux->token, CAP_BPF))
                return NULL;

        switch (func_id) {
        case BPF_FUNC_spin_lock:
                return &bpf_spin_lock_proto;
        case BPF_FUNC_spin_unlock:
                return &bpf_spin_unlock_proto;
        case BPF_FUNC_jiffies64:
                return &bpf_jiffies64_proto;
        case BPF_FUNC_per_cpu_ptr:
                return &bpf_per_cpu_ptr_proto;
        case BPF_FUNC_this_cpu_ptr:
                return &bpf_this_cpu_ptr_proto;
        case BPF_FUNC_timer_init:
                return &bpf_timer_init_proto;
        case BPF_FUNC_timer_set_callback:
                return &bpf_timer_set_callback_proto;
        case BPF_FUNC_timer_start:
                return &bpf_timer_start_proto;
        case BPF_FUNC_timer_cancel:
                return &bpf_timer_cancel_proto;
        case BPF_FUNC_kptr_xchg:
                return &bpf_kptr_xchg_proto;
        case BPF_FUNC_for_each_map_elem:
                return &bpf_for_each_map_elem_proto;
        case BPF_FUNC_loop:
                return &bpf_loop_proto;
        case BPF_FUNC_user_ringbuf_drain:
                return &bpf_user_ringbuf_drain_proto;
        case BPF_FUNC_ringbuf_reserve_dynptr:
                return &bpf_ringbuf_reserve_dynptr_proto;
        case BPF_FUNC_ringbuf_submit_dynptr:
                return &bpf_ringbuf_submit_dynptr_proto;
        case BPF_FUNC_ringbuf_discard_dynptr:
                return &bpf_ringbuf_discard_dynptr_proto;
        case BPF_FUNC_dynptr_from_mem:
                return &bpf_dynptr_from_mem_proto;
        case BPF_FUNC_dynptr_read:
                return &bpf_dynptr_read_proto;
        case BPF_FUNC_dynptr_write:
                return &bpf_dynptr_write_proto;
        case BPF_FUNC_dynptr_data:
                return &bpf_dynptr_data_proto;
#ifdef CONFIG_CGROUPS
        case BPF_FUNC_cgrp_storage_get:
                return &bpf_cgrp_storage_get_proto;
        case BPF_FUNC_cgrp_storage_delete:
                return &bpf_cgrp_storage_delete_proto;
        case BPF_FUNC_get_current_cgroup_id:
                return &bpf_get_current_cgroup_id_proto;
        case BPF_FUNC_get_current_ancestor_cgroup_id:
                return &bpf_get_current_ancestor_cgroup_id_proto;
        case BPF_FUNC_current_task_under_cgroup:
                return &bpf_current_task_under_cgroup_proto;
#endif
#ifdef CONFIG_CGROUP_NET_CLASSID
        case BPF_FUNC_get_cgroup_classid:
                return &bpf_get_cgroup_classid_curr_proto;
#endif
        case BPF_FUNC_task_storage_get:
                return &bpf_task_storage_get_proto;
        case BPF_FUNC_task_storage_delete:
                return &bpf_task_storage_delete_proto;
        default:
                break;
        }

        if (!bpf_token_capable(prog->aux->token, CAP_PERFMON))
                return NULL;

        switch (func_id) {
        case BPF_FUNC_trace_printk:
                return bpf_get_trace_printk_proto();
        case BPF_FUNC_get_current_task:
                return &bpf_get_current_task_proto;
        case BPF_FUNC_get_current_task_btf:
                return &bpf_get_current_task_btf_proto;
        case BPF_FUNC_get_current_comm:
                return &bpf_get_current_comm_proto;
        case BPF_FUNC_probe_read_user:
                return &bpf_probe_read_user_proto;
        case BPF_FUNC_probe_read_kernel:
                return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
                       NULL : &bpf_probe_read_kernel_proto;
        case BPF_FUNC_probe_read_user_str:
                return &bpf_probe_read_user_str_proto;
        case BPF_FUNC_probe_read_kernel_str:
                return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
                       NULL : &bpf_probe_read_kernel_str_proto;
        case BPF_FUNC_copy_from_user:
                return &bpf_copy_from_user_proto;
        case BPF_FUNC_copy_from_user_task:
                return &bpf_copy_from_user_task_proto;
        case BPF_FUNC_snprintf_btf:
                return &bpf_snprintf_btf_proto;
        case BPF_FUNC_snprintf:
                return &bpf_snprintf_proto;
        case BPF_FUNC_task_pt_regs:
                return &bpf_task_pt_regs_proto;
        case BPF_FUNC_trace_vprintk:
                return bpf_get_trace_vprintk_proto();
        case BPF_FUNC_perf_event_read_value:
                return bpf_get_perf_event_read_value_proto();
        case BPF_FUNC_perf_event_read:
                return &bpf_perf_event_read_proto;
        case BPF_FUNC_send_signal:
                return &bpf_send_signal_proto;
        case BPF_FUNC_send_signal_thread:
                return &bpf_send_signal_thread_proto;
        case BPF_FUNC_get_task_stack:
                return prog->sleepable ? &bpf_get_task_stack_sleepable_proto
                                       : &bpf_get_task_stack_proto;
        case BPF_FUNC_get_branch_snapshot:
                return &bpf_get_branch_snapshot_proto;
        case BPF_FUNC_find_vma:
                return &bpf_find_vma_proto;
        default:
                return NULL;
        }
}
EXPORT_SYMBOL_GPL(bpf_base_func_proto);

void bpf_list_head_free(const struct btf_field *field, void *list_head,
                        struct bpf_spin_lock *spin_lock)
{
        struct list_head *head = list_head, *orig_head = list_head;

        BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head));
        BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct bpf_list_head));

        /* Do the actual list draining outside the lock to not hold the lock for
         * too long, and also prevent deadlocks if tracing programs end up
         * executing on entry/exit of functions called inside the critical
         * section, and end up doing map ops that call bpf_list_head_free for
         * the same map value again.
         */
        __bpf_spin_lock_irqsave(spin_lock);
        if (!head->next || list_empty(head))
                goto unlock;
        head = head->next;
unlock:
        INIT_LIST_HEAD(orig_head);
        __bpf_spin_unlock_irqrestore(spin_lock);

        while (head != orig_head) {
                void *obj = head;

                obj -= field->graph_root.node_offset;
                head = head->next;
                /* The contained type can also have resources, including a
                 * bpf_list_head which needs to be freed.
                 */
                __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false);
        }
}

/* Like rbtree_postorder_for_each_entry_safe, but 'pos' and 'n' are
 * 'rb_node *', so field name of rb_node within containing struct is not
 * needed.
 *
 * Since bpf_rb_tree's node type has a corresponding struct btf_field with
 * graph_root.node_offset, it's not necessary to know field name
 * or type of node struct
 */
#define bpf_rbtree_postorder_for_each_entry_safe(pos, n, root) \
        for (pos = rb_first_postorder(root); \
            pos && ({ n = rb_next_postorder(pos); 1; }); \
            pos = n)

void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
                      struct bpf_spin_lock *spin_lock)
{
        struct rb_root_cached orig_root, *root = rb_root;
        struct rb_node *pos, *n;
        void *obj;

        BUILD_BUG_ON(sizeof(struct rb_root_cached) > sizeof(struct bpf_rb_root));
        BUILD_BUG_ON(__alignof__(struct rb_root_cached) > __alignof__(struct bpf_rb_root));

        __bpf_spin_lock_irqsave(spin_lock);
        orig_root = *root;
        *root = RB_ROOT_CACHED;
        __bpf_spin_unlock_irqrestore(spin_lock);

        bpf_rbtree_postorder_for_each_entry_safe(pos, n, &orig_root.rb_root) {
                obj = pos;
                obj -= field->graph_root.node_offset;


                __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false);
        }
}

__bpf_kfunc_start_defs();

__bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
{
        struct btf_struct_meta *meta = meta__ign;
        u64 size = local_type_id__k;
        void *p;

        p = bpf_mem_alloc(&bpf_global_ma, size);
        if (!p)
                return NULL;
        if (meta)
                bpf_obj_init(meta->record, p);
        return p;
}

__bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
{
        u64 size = local_type_id__k;

        /* The verifier has ensured that meta__ign must be NULL */
        return bpf_mem_alloc(&bpf_global_percpu_ma, size);
}

/* Must be called under migrate_disable(), as required by bpf_mem_free */
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu)
{
        struct bpf_mem_alloc *ma;

        if (rec && rec->refcount_off >= 0 &&
            !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) {
                /* Object is refcounted and refcount_dec didn't result in 0
                 * refcount. Return without freeing the object
                 */
                return;
        }

        if (rec)
                bpf_obj_free_fields(rec, p);

        if (percpu)
                ma = &bpf_global_percpu_ma;
        else
                ma = &bpf_global_ma;
        bpf_mem_free_rcu(ma, p);
}

__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
{
        struct btf_struct_meta *meta = meta__ign;
        void *p = p__alloc;

        __bpf_obj_drop_impl(p, meta ? meta->record : NULL, false);
}

__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
{
        /* The verifier has ensured that meta__ign must be NULL */
        bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc);
}

__bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
{
        struct btf_struct_meta *meta = meta__ign;
        struct bpf_refcount *ref;

        /* Could just cast directly to refcount_t *, but need some code using
         * bpf_refcount type so that it is emitted in vmlinux BTF
         */
        ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off);
        if (!refcount_inc_not_zero((refcount_t *)ref))
                return NULL;

        /* Verifier strips KF_RET_NULL if input is owned ref, see is_kfunc_ret_null
         * in verifier.c
         */
        return (void *)p__refcounted_kptr;
}

static int __bpf_list_add(struct bpf_list_node_kern *node,
                          struct bpf_list_head *head,
                          bool tail, struct btf_record *rec, u64 off)
{
        struct list_head *n = &node->list_head, *h = (void *)head;

        /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
         * called on its fields, so init here
         */
        if (unlikely(!h->next))
                INIT_LIST_HEAD(h);

        /* node->owner != NULL implies !list_empty(n), no need to separately
         * check the latter
         */
        if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
                /* Only called from BPF prog, no need to migrate_disable */
                __bpf_obj_drop_impl((void *)n - off, rec, false);
                return -EINVAL;
        }

        tail ? list_add_tail(n, h) : list_add(n, h);
        WRITE_ONCE(node->owner, head);

        return 0;
}

__bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head,
                                         struct bpf_list_node *node,
                                         void *meta__ign, u64 off)
{
        struct bpf_list_node_kern *n = (void *)node;
        struct btf_struct_meta *meta = meta__ign;

        return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off);
}

__bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head,
                                        struct bpf_list_node *node,
                                        void *meta__ign, u64 off)
{
        struct bpf_list_node_kern *n = (void *)node;
        struct btf_struct_meta *meta = meta__ign;

        return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off);
}

static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
{
        struct list_head *n, *h = (void *)head;
        struct bpf_list_node_kern *node;

        /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
         * called on its fields, so init here
         */
        if (unlikely(!h->next))
                INIT_LIST_HEAD(h);
        if (list_empty(h))
                return NULL;

        n = tail ? h->prev : h->next;
        node = container_of(n, struct bpf_list_node_kern, list_head);
        if (WARN_ON_ONCE(READ_ONCE(node->owner) != head))
                return NULL;

        list_del_init(n);
        WRITE_ONCE(node->owner, NULL);
        return (struct bpf_list_node *)n;
}

__bpf_kfunc struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head)
{
        return __bpf_list_del(head, false);
}

__bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
{
        return __bpf_list_del(head, true);
}

__bpf_kfunc struct bpf_list_node *bpf_list_front(struct bpf_list_head *head)
{
        struct list_head *h = (struct list_head *)head;

        if (list_empty(h) || unlikely(!h->next))
                return NULL;

        return (struct bpf_list_node *)h->next;
}

__bpf_kfunc struct bpf_list_node *bpf_list_back(struct bpf_list_head *head)
{
        struct list_head *h = (struct list_head *)head;

        if (list_empty(h) || unlikely(!h->next))
                return NULL;

        return (struct bpf_list_node *)h->prev;
}

__bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
                                                  struct bpf_rb_node *node)
{
        struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
        struct rb_root_cached *r = (struct rb_root_cached *)root;
        struct rb_node *n = &node_internal->rb_node;

        /* node_internal->owner != root implies either RB_EMPTY_NODE(n) or
         * n is owned by some other tree. No need to check RB_EMPTY_NODE(n)
         */
        if (READ_ONCE(node_internal->owner) != root)
                return NULL;

        rb_erase_cached(n, r);
        RB_CLEAR_NODE(n);
        WRITE_ONCE(node_internal->owner, NULL);
        return (struct bpf_rb_node *)n;
}

/* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF
 * program
 */
static int __bpf_rbtree_add(struct bpf_rb_root *root,
                            struct bpf_rb_node_kern *node,
                            void *less, struct btf_record *rec, u64 off)
{
        struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node;
        struct rb_node *parent = NULL, *n = &node->rb_node;
        bpf_callback_t cb = (bpf_callback_t)less;
        bool leftmost = true;

        /* node->owner != NULL implies !RB_EMPTY_NODE(n), no need to separately
         * check the latter
         */
        if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
                /* Only called from BPF prog, no need to migrate_disable */
                __bpf_obj_drop_impl((void *)n - off, rec, false);
                return -EINVAL;
        }

        while (*link) {
                parent = *link;
                if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) {
                        link = &parent->rb_left;
                } else {
                        link = &parent->rb_right;
                        leftmost = false;
                }
        }

        rb_link_node(n, parent, link);
        rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost);
        WRITE_ONCE(node->owner, root);
        return 0;
}

__bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
                                    bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
                                    void *meta__ign, u64 off)
{
        struct btf_struct_meta *meta = meta__ign;
        struct bpf_rb_node_kern *n = (void *)node;

        return __bpf_rbtree_add(root, n, (void *)less, meta ? meta->record : NULL, off);
}

__bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
{
        struct rb_root_cached *r = (struct rb_root_cached *)root;

        return (struct bpf_rb_node *)rb_first_cached(r);
}

__bpf_kfunc struct bpf_rb_node *bpf_rbtree_root(struct bpf_rb_root *root)
{
        struct rb_root_cached *r = (struct rb_root_cached *)root;

        return (struct bpf_rb_node *)r->rb_root.rb_node;
}

__bpf_kfunc struct bpf_rb_node *bpf_rbtree_left(struct bpf_rb_root *root, struct bpf_rb_node *node)
{
        struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;

        if (READ_ONCE(node_internal->owner) != root)
                return NULL;

        return (struct bpf_rb_node *)node_internal->rb_node.rb_left;
}

__bpf_kfunc struct bpf_rb_node *bpf_rbtree_right(struct bpf_rb_root *root, struct bpf_rb_node *node)
{
        struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;

        if (READ_ONCE(node_internal->owner) != root)
                return NULL;

        return (struct bpf_rb_node *)node_internal->rb_node.rb_right;
}

/**
 * bpf_task_acquire - Acquire a reference to a task. A task acquired by this
 * kfunc which is not stored in a map as a kptr, must be released by calling
 * bpf_task_release().
 * @p: The task on which a reference is being acquired.
 */
__bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p)
{
        if (refcount_inc_not_zero(&p->rcu_users))
                return p;
        return NULL;
}

/**
 * bpf_task_release - Release the reference acquired on a task.
 * @p: The task on which a reference is being released.
 */
__bpf_kfunc void bpf_task_release(struct task_struct *p)
{
        put_task_struct_rcu_user(p);
}

__bpf_kfunc void bpf_task_release_dtor(void *p)
{
        put_task_struct_rcu_user(p);
}
CFI_NOSEAL(bpf_task_release_dtor);

#ifdef CONFIG_CGROUPS
/**
 * bpf_cgroup_acquire - Acquire a reference to a cgroup. A cgroup acquired by
 * this kfunc which is not stored in a map as a kptr, must be released by
 * calling bpf_cgroup_release().
 * @cgrp: The cgroup on which a reference is being acquired.
 */
__bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
{
        return cgroup_tryget(cgrp) ? cgrp : NULL;
}

/**
 * bpf_cgroup_release - Release the reference acquired on a cgroup.
 * If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to
 * not be freed until the current grace period has ended, even if its refcount
 * drops to 0.
 * @cgrp: The cgroup on which a reference is being released.
 */
__bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp)
{
        cgroup_put(cgrp);
}

__bpf_kfunc void bpf_cgroup_release_dtor(void *cgrp)
{
        cgroup_put(cgrp);
}
CFI_NOSEAL(bpf_cgroup_release_dtor);

/**
 * bpf_cgroup_ancestor - Perform a lookup on an entry in a cgroup's ancestor
 * array. A cgroup returned by this kfunc which is not subsequently stored in a
 * map, must be released by calling bpf_cgroup_release().
 * @cgrp: The cgroup for which we're performing a lookup.
 * @level: The level of ancestor to look up.
 */
__bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
{
        struct cgroup *ancestor;

        if (level > cgrp->level || level < 0)
                return NULL;

        /* cgrp's refcnt could be 0 here, but ancestors can still be accessed */
        ancestor = cgrp->ancestors[level];
        if (!cgroup_tryget(ancestor))
                return NULL;
        return ancestor;
}

/**
 * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this
 * kfunc which is not subsequently stored in a map, must be released by calling
 * bpf_cgroup_release().
 * @cgid: cgroup id.
 */
__bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
{
        struct cgroup *cgrp;

        cgrp = __cgroup_get_from_id(cgid);
        if (IS_ERR(cgrp))
                return NULL;
        return cgrp;
}

/**
 * bpf_task_under_cgroup - wrap task_under_cgroup_hierarchy() as a kfunc, test
 * task's membership of cgroup ancestry.
 * @task: the task to be tested
 * @ancestor: possible ancestor of @task's cgroup
 *
 * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
 * It follows all the same rules as cgroup_is_descendant, and only applies
 * to the default hierarchy.
 */
__bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task,
                                       struct cgroup *ancestor)
{
        long ret;

        rcu_read_lock();
        ret = task_under_cgroup_hierarchy(task, ancestor);
        rcu_read_unlock();
        return ret;
}

BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
{
        struct bpf_array *array = container_of(map, struct bpf_array, map);
        struct cgroup *cgrp;

        if (unlikely(idx >= array->map.max_entries))
                return -E2BIG;

        cgrp = READ_ONCE(array->ptrs[idx]);
        if (unlikely(!cgrp))
                return -EAGAIN;

        return task_under_cgroup_hierarchy(current, cgrp);
}

const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
        .func           = bpf_current_task_under_cgroup,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_CONST_MAP_PTR,
        .arg2_type      = ARG_ANYTHING,
};

/**
 * bpf_task_get_cgroup1 - Acquires the associated cgroup of a task within a
 * specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its
 * hierarchy ID.
 * @task: The target task
 * @hierarchy_id: The ID of a cgroup1 hierarchy
 *
 * On success, the cgroup is returen. On failure, NULL is returned.
 */
__bpf_kfunc struct cgroup *
bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id)
{
        struct cgroup *cgrp = task_get_cgroup1(task, hierarchy_id);

        if (IS_ERR(cgrp))
                return NULL;
        return cgrp;
}
#endif /* CONFIG_CGROUPS */

/**
 * bpf_task_from_pid - Find a struct task_struct from its pid by looking it up
 * in the root pid namespace idr. If a task is returned, it must either be
 * stored in a map, or released with bpf_task_release().
 * @pid: The pid of the task being looked up.
 */
__bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
{
        struct task_struct *p;

        rcu_read_lock();
        p = find_task_by_pid_ns(pid, &init_pid_ns);
        if (p)
                p = bpf_task_acquire(p);
        rcu_read_unlock();

        return p;
}

/**
 * bpf_task_from_vpid - Find a struct task_struct from its vpid by looking it up
 * in the pid namespace of the current task. If a task is returned, it must
 * either be stored in a map, or released with bpf_task_release().
 * @vpid: The vpid of the task being looked up.
 */
__bpf_kfunc struct task_struct *bpf_task_from_vpid(s32 vpid)
{
        struct task_struct *p;

        rcu_read_lock();
        p = find_task_by_vpid(vpid);
        if (p)
                p = bpf_task_acquire(p);
        rcu_read_unlock();

        return p;
}

/**
 * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
 * @p: The dynptr whose data slice to retrieve
 * @offset: Offset into the dynptr
 * @buffer__nullable: User-provided buffer to copy contents into.  May be NULL
 * @buffer__szk: Size (in bytes) of the buffer if present. This is the
 *               length of the requested slice. This must be a constant.
 *
 * For non-skb and non-xdp type dynptrs, there is no difference between
 * bpf_dynptr_slice and bpf_dynptr_data.
 *
 *  If buffer__nullable is NULL, the call will fail if buffer_opt was needed.
 *
 * If the intention is to write to the data slice, please use
 * bpf_dynptr_slice_rdwr.
 *
 * The user must check that the returned pointer is not null before using it.
 *
 * Please note that in the case of skb and xdp dynptrs, bpf_dynptr_slice
 * does not change the underlying packet data pointers, so a call to
 * bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in
 * the bpf program.
 *
 * Return: NULL if the call failed (eg invalid dynptr), pointer to a read-only
 * data slice (can be either direct pointer to the data or a pointer to the user
 * provided buffer, with its contents containing the data, if unable to obtain
 * direct pointer)
 */
__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u64 offset,
                                   void *buffer__nullable, u64 buffer__szk)
{
        const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
        enum bpf_dynptr_type type;
        u64 len = buffer__szk;
        int err;

        if (!ptr->data)
                return NULL;

        err = bpf_dynptr_check_off_len(ptr, offset, len);
        if (err)
                return NULL;

        type = bpf_dynptr_get_type(ptr);

        switch (type) {
        case BPF_DYNPTR_TYPE_LOCAL:
        case BPF_DYNPTR_TYPE_RINGBUF:
                return ptr->data + ptr->offset + offset;
        case BPF_DYNPTR_TYPE_SKB:
                if (buffer__nullable)
                        return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__nullable);
                else
                        return skb_pointer_if_linear(ptr->data, ptr->offset + offset, len);
        case BPF_DYNPTR_TYPE_XDP:
        {
                void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
                if (!IS_ERR_OR_NULL(xdp_ptr))
                        return xdp_ptr;

                if (!buffer__nullable)
                        return NULL;
                bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__nullable, len, false);
                return buffer__nullable;
        }
        case BPF_DYNPTR_TYPE_SKB_META:
                return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset);
        case BPF_DYNPTR_TYPE_FILE:
                err = bpf_file_fetch_bytes(ptr->data, offset, buffer__nullable, buffer__szk);
                return err ? NULL : buffer__nullable;
        default:
                WARN_ONCE(true, "unknown dynptr type %d\n", type);
                return NULL;
        }
}

/**
 * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data.
 * @p: The dynptr whose data slice to retrieve
 * @offset: Offset into the dynptr
 * @buffer__nullable: User-provided buffer to copy contents into. May be NULL
 * @buffer__szk: Size (in bytes) of the buffer if present. This is the
 *               length of the requested slice. This must be a constant.
 *
 * For non-skb and non-xdp type dynptrs, there is no difference between
 * bpf_dynptr_slice and bpf_dynptr_data.
 *
 * If buffer__nullable is NULL, the call will fail if buffer_opt was needed.
 *
 * The returned pointer is writable and may point to either directly the dynptr
 * data at the requested offset or to the buffer if unable to obtain a direct
 * data pointer to (example: the requested slice is to the paged area of an skb
 * packet). In the case where the returned pointer is to the buffer, the user
 * is responsible for persisting writes through calling bpf_dynptr_write(). This
 * usually looks something like this pattern:
 *
 * struct eth_hdr *eth = bpf_dynptr_slice_rdwr(&dynptr, 0, buffer, sizeof(buffer));
 * if (!eth)
 *      return TC_ACT_SHOT;
 *
 * // mutate eth header //
 *
 * if (eth == buffer)
 *      bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0);
 *
 * Please note that, as in the example above, the user must check that the
 * returned pointer is not null before using it.
 *
 * Please also note that in the case of skb and xdp dynptrs, bpf_dynptr_slice_rdwr
 * does not change the underlying packet data pointers, so a call to
 * bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in
 * the bpf program.
 *
 * Return: NULL if the call failed (eg invalid dynptr), pointer to a
 * data slice (can be either direct pointer to the data or a pointer to the user
 * provided buffer, with its contents containing the data, if unable to obtain
 * direct pointer)
 */
__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset,
                                        void *buffer__nullable, u64 buffer__szk)
{
        const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;

        if (!ptr->data || __bpf_dynptr_is_rdonly(ptr))
                return NULL;

        /* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice.
         *
         * For skb-type dynptrs, it is safe to write into the returned pointer
         * if the bpf program allows skb data writes. There are two possibilities
         * that may occur when calling bpf_dynptr_slice_rdwr:
         *
         * 1) The requested slice is in the head of the skb. In this case, the
         * returned pointer is directly to skb data, and if the skb is cloned, the
         * verifier will have uncloned it (see bpf_unclone_prologue()) already.
         * The pointer can be directly written into.
         *
         * 2) Some portion of the requested slice is in the paged buffer area.
         * In this case, the requested data will be copied out into the buffer
         * and the returned pointer will be a pointer to the buffer. The skb
         * will not be pulled. To persist the write, the user will need to call
         * bpf_dynptr_write(), which will pull the skb and commit the write.
         *
         * Similarly for xdp programs, if the requested slice is not across xdp
         * fragments, then a direct pointer will be returned, otherwise the data
         * will be copied out into the buffer and the user will need to call
         * bpf_dynptr_write() to commit changes.
         */
        return bpf_dynptr_slice(p, offset, buffer__nullable, buffer__szk);
}

__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u64 start, u64 end)
{
        struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
        u64 size;

        if (!ptr->data || start > end)
                return -EINVAL;

        size = __bpf_dynptr_size(ptr);

        if (start > size || end > size)
                return -ERANGE;

        bpf_dynptr_advance_offset(ptr, start);
        bpf_dynptr_set_size(ptr, end - start);

        return 0;
}

__bpf_kfunc bool bpf_dynptr_is_null(const struct bpf_dynptr *p)
{
        struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;

        return !ptr->data;
}

__bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p)
{
        struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;

        if (!ptr->data)
                return false;

        return __bpf_dynptr_is_rdonly(ptr);
}

__bpf_kfunc u64 bpf_dynptr_size(const struct bpf_dynptr *p)
{
        struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;

        if (!ptr->data)
                return -EINVAL;

        return __bpf_dynptr_size(ptr);
}

__bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p,
                                 struct bpf_dynptr *clone__uninit)
{
        struct bpf_dynptr_kern *clone = (struct bpf_dynptr_kern *)clone__uninit;
        struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;

        if (!ptr->data) {
                bpf_dynptr_set_null(clone);
                return -EINVAL;
        }

        *clone = *ptr;

        return 0;
}

/**
 * bpf_dynptr_copy() - Copy data from one dynptr to another.
 * @dst_ptr: Destination dynptr - where data should be copied to
 * @dst_off: Offset into the destination dynptr
 * @src_ptr: Source dynptr - where data should be copied from
 * @src_off: Offset into the source dynptr
 * @size: Length of the data to copy from source to destination
 *
 * Copies data from source dynptr to destination dynptr.
 * Returns 0 on success; negative error, otherwise.
 */
__bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u64 dst_off,
                                struct bpf_dynptr *src_ptr, u64 src_off, u64 size)
{
        struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr;
        struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr;
        void *src_slice, *dst_slice;
        char buf[256];
        u64 off;

        src_slice = bpf_dynptr_slice(src_ptr, src_off, NULL, size);
        dst_slice = bpf_dynptr_slice_rdwr(dst_ptr, dst_off, NULL, size);

        if (src_slice && dst_slice) {
                memmove(dst_slice, src_slice, size);
                return 0;
        }

        if (src_slice)
                return __bpf_dynptr_write(dst, dst_off, src_slice, size, 0);

        if (dst_slice)
                return __bpf_dynptr_read(dst_slice, size, src, src_off, 0);

        if (bpf_dynptr_check_off_len(dst, dst_off, size) ||
            bpf_dynptr_check_off_len(src, src_off, size))
                return -E2BIG;

        off = 0;
        while (off < size) {
                u64 chunk_sz = min_t(u64, sizeof(buf), size - off);
                int err;

                err = __bpf_dynptr_read(buf, chunk_sz, src, src_off + off, 0);
                if (err)
                        return err;
                err = __bpf_dynptr_write(dst, dst_off + off, buf, chunk_sz, 0);
                if (err)
                        return err;

                off += chunk_sz;
        }
        return 0;
}

/**
 * bpf_dynptr_memset() - Fill dynptr memory with a constant byte.
 * @p: Destination dynptr - where data will be filled
 * @offset: Offset into the dynptr to start filling from
 * @size: Number of bytes to fill
 * @val: Constant byte to fill the memory with
 *
 * Fills the @size bytes of the memory area pointed to by @p
 * at @offset with the constant byte @val.
 * Returns 0 on success; negative error, otherwise.
 */
__bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u64 offset, u64 size, u8 val)
{
        struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
        u64 chunk_sz, write_off;
        char buf[256];
        void* slice;
        int err;

        slice = bpf_dynptr_slice_rdwr(p, offset, NULL, size);
        if (likely(slice)) {
                memset(slice, val, size);
                return 0;
        }

        if (__bpf_dynptr_is_rdonly(ptr))
                return -EINVAL;

        err = bpf_dynptr_check_off_len(ptr, offset, size);
        if (err)
                return err;

        /* Non-linear data under the dynptr, write from a local buffer */
        chunk_sz = min_t(u64, sizeof(buf), size);
        memset(buf, val, chunk_sz);

        for (write_off = 0; write_off < size; write_off += chunk_sz) {
                chunk_sz = min_t(u64, sizeof(buf), size - write_off);
                err = __bpf_dynptr_write(ptr, offset + write_off, buf, chunk_sz, 0);
                if (err)
                        return err;
        }

        return 0;
}

__bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
{
        return obj;
}

__bpf_kfunc void *bpf_rdonly_cast(const void *obj__ign, u32 btf_id__k)
{
        return (void *)obj__ign;
}

__bpf_kfunc void bpf_rcu_read_lock(void)
{
        rcu_read_lock();
}

__bpf_kfunc void bpf_rcu_read_unlock(void)
{
        rcu_read_unlock();
}

struct bpf_throw_ctx {
        struct bpf_prog_aux *aux;
        u64 sp;
        u64 bp;
        int cnt;
};

static bool bpf_stack_walker(void *cookie, u64 ip, u64 sp, u64 bp)
{
        struct bpf_throw_ctx *ctx = cookie;
        struct bpf_prog *prog;

        /*
         * The RCU read lock is held to safely traverse the latch tree, but we
         * don't need its protection when accessing the prog, since it has an
         * active stack frame on the current stack trace, and won't disappear.
         */
        rcu_read_lock();
        prog = bpf_prog_ksym_find(ip);
        rcu_read_unlock();
        if (!prog)
                return !ctx->cnt;
        ctx->cnt++;
        if (bpf_is_subprog(prog))
                return true;
        ctx->aux = prog->aux;
        ctx->sp = sp;
        ctx->bp = bp;
        return false;
}

__bpf_kfunc void bpf_throw(u64 cookie)
{
        struct bpf_throw_ctx ctx = {};

        arch_bpf_stack_walk(bpf_stack_walker, &ctx);
        WARN_ON_ONCE(!ctx.aux);
        if (ctx.aux)
                WARN_ON_ONCE(!ctx.aux->exception_boundary);
        WARN_ON_ONCE(!ctx.bp);
        WARN_ON_ONCE(!ctx.cnt);
        /* Prevent KASAN false positives for CONFIG_KASAN_STACK by unpoisoning
         * deeper stack depths than ctx.sp as we do not return from bpf_throw,
         * which skips compiler generated instrumentation to do the same.
         */
        kasan_unpoison_task_stack_below((void *)(long)ctx.sp);
        ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp, 0, 0);
        WARN(1, "A call to BPF exception callback should never return\n");
}

__bpf_kfunc int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags)
{
        struct bpf_async_kern *async = (struct bpf_async_kern *)wq;
        struct bpf_map *map = p__map;

        BUILD_BUG_ON(sizeof(struct bpf_async_kern) > sizeof(struct bpf_wq));
        BUILD_BUG_ON(__alignof__(struct bpf_async_kern) != __alignof__(struct bpf_wq));

        if (flags)
                return -EINVAL;

        return __bpf_async_init(async, map, flags, BPF_ASYNC_TYPE_WQ);
}

__bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags)
{
        struct bpf_async_kern *async = (struct bpf_async_kern *)wq;
        struct bpf_work *w;

        if (flags)
                return -EINVAL;

        w = READ_ONCE(async->work);
        if (!w || !READ_ONCE(w->cb.prog))
                return -EINVAL;

        if (!refcount_inc_not_zero(&w->cb.refcnt))
                return -ENOENT;

        if (!defer_timer_wq_op()) {
                schedule_work(&w->work);
                bpf_async_refcount_put(&w->cb);
                return 0;
        } else {
                return bpf_async_schedule_op(&w->cb, BPF_ASYNC_START, 0, 0);
        }
}

__bpf_kfunc int bpf_wq_set_callback(struct bpf_wq *wq,
                                    int (callback_fn)(void *map, int *key, void *value),
                                    unsigned int flags,
                                    struct bpf_prog_aux *aux)
{
        struct bpf_async_kern *async = (struct bpf_async_kern *)wq;

        if (flags)
                return -EINVAL;

        return __bpf_async_set_callback(async, callback_fn, aux->prog);
}

__bpf_kfunc void bpf_preempt_disable(void)
{
        preempt_disable();
}

__bpf_kfunc void bpf_preempt_enable(void)
{
        preempt_enable();
}

struct bpf_iter_bits {
        __u64 __opaque[2];
} __aligned(8);

#define BITS_ITER_NR_WORDS_MAX 511

struct bpf_iter_bits_kern {
        union {
                __u64 *bits;
                __u64 bits_copy;
        };
        int nr_bits;
        int bit;
} __aligned(8);

/* On 64-bit hosts, unsigned long and u64 have the same size, so passing
 * a u64 pointer and an unsigned long pointer to find_next_bit() will
 * return the same result, as both point to the same 8-byte area.
 *
 * For 32-bit little-endian hosts, using a u64 pointer or unsigned long
 * pointer also makes no difference. This is because the first iterated
 * unsigned long is composed of bits 0-31 of the u64 and the second unsigned
 * long is composed of bits 32-63 of the u64.
 *
 * However, for 32-bit big-endian hosts, this is not the case. The first
 * iterated unsigned long will be bits 32-63 of the u64, so swap these two
 * ulong values within the u64.
 */
static void swap_ulong_in_u64(u64 *bits, unsigned int nr)
{
#if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN)
        unsigned int i;

        for (i = 0; i < nr; i++)
                bits[i] = (bits[i] >> 32) | ((u64)(u32)bits[i] << 32);
#endif
}

/**
 * bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area
 * @it: The new bpf_iter_bits to be created
 * @unsafe_ptr__ign: A pointer pointing to a memory area to be iterated over
 * @nr_words: The size of the specified memory area, measured in 8-byte units.
 * The maximum value of @nr_words is @BITS_ITER_NR_WORDS_MAX. This limit may be
 * further reduced by the BPF memory allocator implementation.
 *
 * This function initializes a new bpf_iter_bits structure for iterating over
 * a memory area which is specified by the @unsafe_ptr__ign and @nr_words. It
 * copies the data of the memory area to the newly created bpf_iter_bits @it for
 * subsequent iteration operations.
 *
 * On success, 0 is returned. On failure, ERR is returned.
 */
__bpf_kfunc int
bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words)
{
        struct bpf_iter_bits_kern *kit = (void *)it;
        u32 nr_bytes = nr_words * sizeof(u64);
        u32 nr_bits = BYTES_TO_BITS(nr_bytes);
        int err;

        BUILD_BUG_ON(sizeof(struct bpf_iter_bits_kern) != sizeof(struct bpf_iter_bits));
        BUILD_BUG_ON(__alignof__(struct bpf_iter_bits_kern) !=
                     __alignof__(struct bpf_iter_bits));

        kit->nr_bits = 0;
        kit->bits_copy = 0;
        kit->bit = -1;

        if (!unsafe_ptr__ign || !nr_words)
                return -EINVAL;
        if (nr_words > BITS_ITER_NR_WORDS_MAX)
                return -E2BIG;

        /* Optimization for u64 mask */
        if (nr_bits == 64) {
                err = bpf_probe_read_kernel_common(&kit->bits_copy, nr_bytes, unsafe_ptr__ign);
                if (err)
                        return -EFAULT;

                swap_ulong_in_u64(&kit->bits_copy, nr_words);

                kit->nr_bits = nr_bits;
                return 0;
        }

        if (bpf_mem_alloc_check_size(false, nr_bytes))
                return -E2BIG;

        /* Fallback to memalloc */
        kit->bits = bpf_mem_alloc(&bpf_global_ma, nr_bytes);
        if (!kit->bits)
                return -ENOMEM;

        err = bpf_probe_read_kernel_common(kit->bits, nr_bytes, unsafe_ptr__ign);
        if (err) {
                bpf_mem_free(&bpf_global_ma, kit->bits);
                return err;
        }

        swap_ulong_in_u64(kit->bits, nr_words);

        kit->nr_bits = nr_bits;
        return 0;
}

/**
 * bpf_iter_bits_next() - Get the next bit in a bpf_iter_bits
 * @it: The bpf_iter_bits to be checked
 *
 * This function returns a pointer to a number representing the value of the
 * next bit in the bits.
 *
 * If there are no further bits available, it returns NULL.
 */
__bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
{
        struct bpf_iter_bits_kern *kit = (void *)it;
        int bit = kit->bit, nr_bits = kit->nr_bits;
        const void *bits;

        if (!nr_bits || bit >= nr_bits)
                return NULL;

        bits = nr_bits == 64 ? &kit->bits_copy : kit->bits;
        bit = find_next_bit(bits, nr_bits, bit + 1);
        if (bit >= nr_bits) {
                kit->bit = bit;
                return NULL;
        }

        kit->bit = bit;
        return &kit->bit;
}

/**
 * bpf_iter_bits_destroy() - Destroy a bpf_iter_bits
 * @it: The bpf_iter_bits to be destroyed
 *
 * Destroy the resource associated with the bpf_iter_bits.
 */
__bpf_kfunc void bpf_iter_bits_destroy(struct bpf_iter_bits *it)
{
        struct bpf_iter_bits_kern *kit = (void *)it;

        if (kit->nr_bits <= 64)
                return;
        bpf_mem_free(&bpf_global_ma, kit->bits);
}

/**
 * bpf_copy_from_user_str() - Copy a string from an unsafe user address
 * @dst:             Destination address, in kernel space.  This buffer must be
 *                   at least @dst__sz bytes long.
 * @dst__sz:         Maximum number of bytes to copy, includes the trailing NUL.
 * @unsafe_ptr__ign: Source address, in user space.
 * @flags:           The only supported flag is BPF_F_PAD_ZEROS
 *
 * Copies a NUL-terminated string from userspace to BPF space. If user string is
 * too long this will still ensure zero termination in the dst buffer unless
 * buffer size is 0.
 *
 * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst to 0 on success and
 * memset all of @dst on failure.
 */
__bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user *unsafe_ptr__ign, u64 flags)
{
        int ret;

        if (unlikely(flags & ~BPF_F_PAD_ZEROS))
                return -EINVAL;

        if (unlikely(!dst__sz))
                return 0;

        ret = strncpy_from_user(dst, unsafe_ptr__ign, dst__sz - 1);
        if (ret < 0) {
                if (flags & BPF_F_PAD_ZEROS)
                        memset((char *)dst, 0, dst__sz);

                return ret;
        }

        if (flags & BPF_F_PAD_ZEROS)
                memset((char *)dst + ret, 0, dst__sz - ret);
        else
                ((char *)dst)[ret] = '\0';

        return ret + 1;
}

/**
 * bpf_copy_from_user_task_str() - Copy a string from an task's address space
 * @dst:             Destination address, in kernel space.  This buffer must be
 *                   at least @dst__sz bytes long.
 * @dst__sz:         Maximum number of bytes to copy, includes the trailing NUL.
 * @unsafe_ptr__ign: Source address in the task's address space.
 * @tsk:             The task whose address space will be used
 * @flags:           The only supported flag is BPF_F_PAD_ZEROS
 *
 * Copies a NUL terminated string from a task's address space to @dst__sz
 * buffer. If user string is too long this will still ensure zero termination
 * in the @dst__sz buffer unless buffer size is 0.
 *
 * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst__sz to 0 on success
 * and memset all of @dst__sz on failure.
 *
 * Return: The number of copied bytes on success including the NUL terminator.
 * A negative error code on failure.
 */
__bpf_kfunc int bpf_copy_from_user_task_str(void *dst, u32 dst__sz,
                                            const void __user *unsafe_ptr__ign,
                                            struct task_struct *tsk, u64 flags)
{
        int ret;

        if (unlikely(flags & ~BPF_F_PAD_ZEROS))
                return -EINVAL;

        if (unlikely(dst__sz == 0))
                return 0;

        ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_ptr__ign, dst, dst__sz, 0);
        if (ret < 0) {
                if (flags & BPF_F_PAD_ZEROS)
                        memset(dst, 0, dst__sz);
                return ret;
        }

        if (flags & BPF_F_PAD_ZEROS)
                memset(dst + ret, 0, dst__sz - ret);

        return ret + 1;
}

/* Keep unsinged long in prototype so that kfunc is usable when emitted to
 * vmlinux.h in BPF programs directly, but note that while in BPF prog, the
 * unsigned long always points to 8-byte region on stack, the kernel may only
 * read and write the 4-bytes on 32-bit.
 */
__bpf_kfunc void bpf_local_irq_save(unsigned long *flags__irq_flag)
{
        local_irq_save(*flags__irq_flag);
}

__bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag)
{
        local_irq_restore(*flags__irq_flag);
}

__bpf_kfunc void __bpf_trap(void)
{
}

/*
 * Kfuncs for string operations.
 *
 * Since strings are not necessarily %NUL-terminated, we cannot directly call
 * in-kernel implementations. Instead, we open-code the implementations using
 * __get_kernel_nofault instead of plain dereference to make them safe.
 */

static int __bpf_strncasecmp(const char *s1, const char *s2, bool ignore_case, size_t len)
{
        char c1, c2;
        int i;

        if (!copy_from_kernel_nofault_allowed(s1, 1) ||
            !copy_from_kernel_nofault_allowed(s2, 1)) {
                return -ERANGE;
        }

        guard(pagefault)();
        for (i = 0; i < len && i < XATTR_SIZE_MAX; i++) {
                __get_kernel_nofault(&c1, s1, char, err_out);
                __get_kernel_nofault(&c2, s2, char, err_out);
                if (ignore_case) {
                        c1 = tolower(c1);
                        c2 = tolower(c2);
                }
                if (c1 != c2)
                        return c1 < c2 ? -1 : 1;
                if (c1 == '\0')
                        return 0;
                s1++;
                s2++;
        }
        return i == XATTR_SIZE_MAX ? -E2BIG : 0;
err_out:
        return -EFAULT;
}

/**
 * bpf_strcmp - Compare two strings
 * @s1__ign: One string
 * @s2__ign: Another string
 *
 * Return:
 * * %0       - Strings are equal
 * * %-1      - @s1__ign is smaller
 * * %1       - @s2__ign is smaller
 * * %-EFAULT - Cannot read one of the strings
 * * %-E2BIG  - One of strings is too large
 * * %-ERANGE - One of strings is outside of kernel address space
 */
__bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign)
{
        return __bpf_strncasecmp(s1__ign, s2__ign, false, XATTR_SIZE_MAX);
}

/**
 * bpf_strcasecmp - Compare two strings, ignoring the case of the characters
 * @s1__ign: One string
 * @s2__ign: Another string
 *
 * Return:
 * * %0       - Strings are equal
 * * %-1      - @s1__ign is smaller
 * * %1       - @s2__ign is smaller
 * * %-EFAULT - Cannot read one of the strings
 * * %-E2BIG  - One of strings is too large
 * * %-ERANGE - One of strings is outside of kernel address space
 */
__bpf_kfunc int bpf_strcasecmp(const char *s1__ign, const char *s2__ign)
{
        return __bpf_strncasecmp(s1__ign, s2__ign, true, XATTR_SIZE_MAX);
}

/*
 * bpf_strncasecmp - Compare two length-limited strings, ignoring case
 * @s1__ign: One string
 * @s2__ign: Another string
 * @len: The maximum number of characters to compare
 *
 * Return:
 * * %0       - Strings are equal
 * * %-1      - @s1__ign is smaller
 * * %1       - @s2__ign is smaller
 * * %-EFAULT - Cannot read one of the strings
 * * %-E2BIG  - One of strings is too large
 * * %-ERANGE - One of strings is outside of kernel address space
 */
__bpf_kfunc int bpf_strncasecmp(const char *s1__ign, const char *s2__ign, size_t len)
{
        return __bpf_strncasecmp(s1__ign, s2__ign, true, len);
}

/**
 * bpf_strnchr - Find a character in a length limited string
 * @s__ign: The string to be searched
 * @count: The number of characters to be searched
 * @c: The character to search for
 *
 * Note that the %NUL-terminator is considered part of the string, and can
 * be searched for.
 *
 * Return:
 * * >=0      - Index of the first occurrence of @c within @s__ign
 * * %-ENOENT - @c not found in the first @count characters of @s__ign
 * * %-EFAULT - Cannot read @s__ign
 * * %-E2BIG  - @s__ign is too large
 * * %-ERANGE - @s__ign is outside of kernel address space
 */
__bpf_kfunc int bpf_strnchr(const char *s__ign, size_t count, char c)
{
        char sc;
        int i;

        if (!copy_from_kernel_nofault_allowed(s__ign, 1))
                return -ERANGE;

        guard(pagefault)();
        for (i = 0; i < count && i < XATTR_SIZE_MAX; i++) {
                __get_kernel_nofault(&sc, s__ign, char, err_out);
                if (sc == c)
                        return i;
                if (sc == '\0')
                        return -ENOENT;
                s__ign++;
        }
        return i == XATTR_SIZE_MAX ? -E2BIG : -ENOENT;
err_out:
        return -EFAULT;
}

/**
 * bpf_strchr - Find the first occurrence of a character in a string
 * @s__ign: The string to be searched
 * @c: The character to search for
 *
 * Note that the %NUL-terminator is considered part of the string, and can
 * be searched for.
 *
 * Return:
 * * >=0      - The index of the first occurrence of @c within @s__ign
 * * %-ENOENT - @c not found in @s__ign
 * * %-EFAULT - Cannot read @s__ign
 * * %-E2BIG  - @s__ign is too large
 * * %-ERANGE - @s__ign is outside of kernel address space
 */
__bpf_kfunc int bpf_strchr(const char *s__ign, char c)
{
        return bpf_strnchr(s__ign, XATTR_SIZE_MAX, c);
}

/**
 * bpf_strchrnul - Find and return a character in a string, or end of string
 * @s__ign: The string to be searched
 * @c: The character to search for
 *
 * Return:
 * * >=0      - Index of the first occurrence of @c within @s__ign or index of
 *              the null byte at the end of @s__ign when @c is not found
 * * %-EFAULT - Cannot read @s__ign
 * * %-E2BIG  - @s__ign is too large
 * * %-ERANGE - @s__ign is outside of kernel address space
 */
__bpf_kfunc int bpf_strchrnul(const char *s__ign, char c)
{
        char sc;
        int i;

        if (!copy_from_kernel_nofault_allowed(s__ign, 1))
                return -ERANGE;

        guard(pagefault)();
        for (i = 0; i < XATTR_SIZE_MAX; i++) {
                __get_kernel_nofault(&sc, s__ign, char, err_out);
                if (sc == '\0' || sc == c)
                        return i;
                s__ign++;
        }
        return -E2BIG;
err_out:
        return -EFAULT;
}

/**
 * bpf_strrchr - Find the last occurrence of a character in a string
 * @s__ign: The string to be searched
 * @c: The character to search for
 *
 * Return:
 * * >=0      - Index of the last occurrence of @c within @s__ign
 * * %-ENOENT - @c not found in @s__ign
 * * %-EFAULT - Cannot read @s__ign
 * * %-E2BIG  - @s__ign is too large
 * * %-ERANGE - @s__ign is outside of kernel address space
 */
__bpf_kfunc int bpf_strrchr(const char *s__ign, int c)
{
        char sc;
        int i, last = -ENOENT;

        if (!copy_from_kernel_nofault_allowed(s__ign, 1))
                return -ERANGE;

        guard(pagefault)();
        for (i = 0; i < XATTR_SIZE_MAX; i++) {
                __get_kernel_nofault(&sc, s__ign, char, err_out);
                if (sc == c)
                        last = i;
                if (sc == '\0')
                        return last;
                s__ign++;
        }
        return -E2BIG;
err_out:
        return -EFAULT;
}

/**
 * bpf_strnlen - Calculate the length of a length-limited string
 * @s__ign: The string
 * @count: The maximum number of characters to count
 *
 * Return:
 * * >=0      - The length of @s__ign
 * * %-EFAULT - Cannot read @s__ign
 * * %-E2BIG  - @s__ign is too large
 * * %-ERANGE - @s__ign is outside of kernel address space
 */
__bpf_kfunc int bpf_strnlen(const char *s__ign, size_t count)
{
        char c;
        int i;

        if (!copy_from_kernel_nofault_allowed(s__ign, 1))
                return -ERANGE;

        guard(pagefault)();
        for (i = 0; i < count && i < XATTR_SIZE_MAX; i++) {
                __get_kernel_nofault(&c, s__ign, char, err_out);
                if (c == '\0')
                        return i;
                s__ign++;
        }
        return i == XATTR_SIZE_MAX ? -E2BIG : i;
err_out:
        return -EFAULT;
}

/**
 * bpf_strlen - Calculate the length of a string
 * @s__ign: The string
 *
 * Return:
 * * >=0      - The length of @s__ign
 * * %-EFAULT - Cannot read @s__ign
 * * %-E2BIG  - @s__ign is too large
 * * %-ERANGE - @s__ign is outside of kernel address space
 */
__bpf_kfunc int bpf_strlen(const char *s__ign)
{
        return bpf_strnlen(s__ign, XATTR_SIZE_MAX);
}

/**
 * bpf_strspn - Calculate the length of the initial substring of @s__ign which
 *              only contains letters in @accept__ign
 * @s__ign: The string to be searched
 * @accept__ign: The string to search for
 *
 * Return:
 * * >=0      - The length of the initial substring of @s__ign which only
 *              contains letters from @accept__ign
 * * %-EFAULT - Cannot read one of the strings
 * * %-E2BIG  - One of the strings is too large
 * * %-ERANGE - One of the strings is outside of kernel address space
 */
__bpf_kfunc int bpf_strspn(const char *s__ign, const char *accept__ign)
{
        char cs, ca;
        int i, j;

        if (!copy_from_kernel_nofault_allowed(s__ign, 1) ||
            !copy_from_kernel_nofault_allowed(accept__ign, 1)) {
                return -ERANGE;
        }

        guard(pagefault)();
        for (i = 0; i < XATTR_SIZE_MAX; i++) {
                __get_kernel_nofault(&cs, s__ign, char, err_out);
                if (cs == '\0')
                        return i;
                for (j = 0; j < XATTR_SIZE_MAX; j++) {
                        __get_kernel_nofault(&ca, accept__ign + j, char, err_out);
                        if (cs == ca || ca == '\0')
                                break;
                }
                if (j == XATTR_SIZE_MAX)
                        return -E2BIG;
                if (ca == '\0')
                        return i;
                s__ign++;
        }
        return -E2BIG;
err_out:
        return -EFAULT;
}

/**
 * bpf_strcspn - Calculate the length of the initial substring of @s__ign which
 *               does not contain letters in @reject__ign
 * @s__ign: The string to be searched
 * @reject__ign: The string to search for
 *
 * Return:
 * * >=0      - The length of the initial substring of @s__ign which does not
 *              contain letters from @reject__ign
 * * %-EFAULT - Cannot read one of the strings
 * * %-E2BIG  - One of the strings is too large
 * * %-ERANGE - One of the strings is outside of kernel address space
 */
__bpf_kfunc int bpf_strcspn(const char *s__ign, const char *reject__ign)
{
        char cs, cr;
        int i, j;

        if (!copy_from_kernel_nofault_allowed(s__ign, 1) ||
            !copy_from_kernel_nofault_allowed(reject__ign, 1)) {
                return -ERANGE;
        }

        guard(pagefault)();
        for (i = 0; i < XATTR_SIZE_MAX; i++) {
                __get_kernel_nofault(&cs, s__ign, char, err_out);
                if (cs == '\0')
                        return i;
                for (j = 0; j < XATTR_SIZE_MAX; j++) {
                        __get_kernel_nofault(&cr, reject__ign + j, char, err_out);
                        if (cs == cr || cr == '\0')
                                break;
                }
                if (j == XATTR_SIZE_MAX)
                        return -E2BIG;
                if (cr != '\0')
                        return i;
                s__ign++;
        }
        return -E2BIG;
err_out:
        return -EFAULT;
}

static int __bpf_strnstr(const char *s1, const char *s2, size_t len,
                         bool ignore_case)
{
        char c1, c2;
        int i, j;

        if (!copy_from_kernel_nofault_allowed(s1, 1) ||
            !copy_from_kernel_nofault_allowed(s2, 1)) {
                return -ERANGE;
        }

        guard(pagefault)();
        for (i = 0; i < XATTR_SIZE_MAX; i++) {
                for (j = 0; i + j <= len && j < XATTR_SIZE_MAX; j++) {
                        __get_kernel_nofault(&c2, s2 + j, char, err_out);
                        if (c2 == '\0')
                                return i;
                        /*
                         * We allow reading an extra byte from s2 (note the
                         * `i + j <= len` above) to cover the case when s2 is
                         * a suffix of the first len chars of s1.
                         */
                        if (i + j == len)
                                break;
                        __get_kernel_nofault(&c1, s1 + j, char, err_out);

                        if (ignore_case) {
                                c1 = tolower(c1);
                                c2 = tolower(c2);
                        }

                        if (c1 == '\0')
                                return -ENOENT;
                        if (c1 != c2)
                                break;
                }
                if (j == XATTR_SIZE_MAX)
                        return -E2BIG;
                if (i + j == len)
                        return -ENOENT;
                s1++;
        }
        return -E2BIG;
err_out:
        return -EFAULT;
}

/**
 * bpf_strstr - Find the first substring in a string
 * @s1__ign: The string to be searched
 * @s2__ign: The string to search for
 *
 * Return:
 * * >=0      - Index of the first character of the first occurrence of @s2__ign
 *              within @s1__ign
 * * %-ENOENT - @s2__ign is not a substring of @s1__ign
 * * %-EFAULT - Cannot read one of the strings
 * * %-E2BIG  - One of the strings is too large
 * * %-ERANGE - One of the strings is outside of kernel address space
 */
__bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign)
{
        return __bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX, false);
}

/**
 * bpf_strcasestr - Find the first substring in a string, ignoring the case of
 *                  the characters
 * @s1__ign: The string to be searched
 * @s2__ign: The string to search for
 *
 * Return:
 * * >=0      - Index of the first character of the first occurrence of @s2__ign
 *              within @s1__ign
 * * %-ENOENT - @s2__ign is not a substring of @s1__ign
 * * %-EFAULT - Cannot read one of the strings
 * * %-E2BIG  - One of the strings is too large
 * * %-ERANGE - One of the strings is outside of kernel address space
 */
__bpf_kfunc int bpf_strcasestr(const char *s1__ign, const char *s2__ign)
{
        return __bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX, true);
}

/**
 * bpf_strnstr - Find the first substring in a length-limited string
 * @s1__ign: The string to be searched
 * @s2__ign: The string to search for
 * @len: the maximum number of characters to search
 *
 * Return:
 * * >=0      - Index of the first character of the first occurrence of @s2__ign
 *              within the first @len characters of @s1__ign
 * * %-ENOENT - @s2__ign not found in the first @len characters of @s1__ign
 * * %-EFAULT - Cannot read one of the strings
 * * %-E2BIG  - One of the strings is too large
 * * %-ERANGE - One of the strings is outside of kernel address space
 */
__bpf_kfunc int bpf_strnstr(const char *s1__ign, const char *s2__ign,
                            size_t len)
{
        return __bpf_strnstr(s1__ign, s2__ign, len, false);
}

/**
 * bpf_strncasestr - Find the first substring in a length-limited string,
 *                   ignoring the case of the characters
 * @s1__ign: The string to be searched
 * @s2__ign: The string to search for
 * @len: the maximum number of characters to search
 *
 * Return:
 * * >=0      - Index of the first character of the first occurrence of @s2__ign
 *              within the first @len characters of @s1__ign
 * * %-ENOENT - @s2__ign not found in the first @len characters of @s1__ign
 * * %-EFAULT - Cannot read one of the strings
 * * %-E2BIG  - One of the strings is too large
 * * %-ERANGE - One of the strings is outside of kernel address space
 */
__bpf_kfunc int bpf_strncasestr(const char *s1__ign, const char *s2__ign,
                                size_t len)
{
        return __bpf_strnstr(s1__ign, s2__ign, len, true);
}

#ifdef CONFIG_KEYS
/**
 * bpf_lookup_user_key - lookup a key by its serial
 * @serial: key handle serial number
 * @flags: lookup-specific flags
 *
 * Search a key with a given *serial* and the provided *flags*.
 * If found, increment the reference count of the key by one, and
 * return it in the bpf_key structure.
 *
 * The bpf_key structure must be passed to bpf_key_put() when done
 * with it, so that the key reference count is decremented and the
 * bpf_key structure is freed.
 *
 * Permission checks are deferred to the time the key is used by
 * one of the available key-specific kfuncs.
 *
 * Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested
 * special keyring (e.g. session keyring), if it doesn't yet exist.
 * Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting
 * for the key construction, and to retrieve uninstantiated keys (keys
 * without data attached to them).
 *
 * Return: a bpf_key pointer with a valid key pointer if the key is found, a
 *         NULL pointer otherwise.
 */
__bpf_kfunc struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags)
{
        key_ref_t key_ref;
        struct bpf_key *bkey;

        if (flags & ~KEY_LOOKUP_ALL)
                return NULL;

        /*
         * Permission check is deferred until the key is used, as the
         * intent of the caller is unknown here.
         */
        key_ref = lookup_user_key(serial, flags, KEY_DEFER_PERM_CHECK);
        if (IS_ERR(key_ref))
                return NULL;

        bkey = kmalloc_obj(*bkey);
        if (!bkey) {
                key_put(key_ref_to_ptr(key_ref));
                return NULL;
        }

        bkey->key = key_ref_to_ptr(key_ref);
        bkey->has_ref = true;

        return bkey;
}

/**
 * bpf_lookup_system_key - lookup a key by a system-defined ID
 * @id: key ID
 *
 * Obtain a bpf_key structure with a key pointer set to the passed key ID.
 * The key pointer is marked as invalid, to prevent bpf_key_put() from
 * attempting to decrement the key reference count on that pointer. The key
 * pointer set in such way is currently understood only by
 * verify_pkcs7_signature().
 *
 * Set *id* to one of the values defined in include/linux/verification.h:
 * 0 for the primary keyring (immutable keyring of system keys);
 * VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring
 * (where keys can be added only if they are vouched for by existing keys
 * in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform
 * keyring (primarily used by the integrity subsystem to verify a kexec'ed
 * kerned image and, possibly, the initramfs signature).
 *
 * Return: a bpf_key pointer with an invalid key pointer set from the
 *         pre-determined ID on success, a NULL pointer otherwise
 */
__bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id)
{
        struct bpf_key *bkey;

        if (system_keyring_id_check(id) < 0)
                return NULL;

        bkey = kmalloc_obj(*bkey, GFP_ATOMIC);
        if (!bkey)
                return NULL;

        bkey->key = (struct key *)(unsigned long)id;
        bkey->has_ref = false;

        return bkey;
}

/**
 * bpf_key_put - decrement key reference count if key is valid and free bpf_key
 * @bkey: bpf_key structure
 *
 * Decrement the reference count of the key inside *bkey*, if the pointer
 * is valid, and free *bkey*.
 */
__bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
{
        if (bkey->has_ref)
                key_put(bkey->key);

        kfree(bkey);
}

/**
 * bpf_verify_pkcs7_signature - verify a PKCS#7 signature
 * @data_p: data to verify
 * @sig_p: signature of the data
 * @trusted_keyring: keyring with keys trusted for signature verification
 *
 * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr*
 * with keys in a keyring referenced by *trusted_keyring*.
 *
 * Return: 0 on success, a negative value on error.
 */
__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
                               struct bpf_dynptr *sig_p,
                               struct bpf_key *trusted_keyring)
{
#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
        struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p;
        struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p;
        const void *data, *sig;
        u32 data_len, sig_len;
        int ret;

        if (trusted_keyring->has_ref) {
                /*
                 * Do the permission check deferred in bpf_lookup_user_key().
                 * See bpf_lookup_user_key() for more details.
                 *
                 * A call to key_task_permission() here would be redundant, as
                 * it is already done by keyring_search() called by
                 * find_asymmetric_key().
                 */
                ret = key_validate(trusted_keyring->key);
                if (ret < 0)
                        return ret;
        }

        data_len = __bpf_dynptr_size(data_ptr);
        data = __bpf_dynptr_data(data_ptr, data_len);
        sig_len = __bpf_dynptr_size(sig_ptr);
        sig = __bpf_dynptr_data(sig_ptr, sig_len);

        return verify_pkcs7_signature(data, data_len, sig, sig_len,
                                      trusted_keyring->key,
                                      VERIFYING_BPF_SIGNATURE, NULL,
                                      NULL);
#else
        return -EOPNOTSUPP;
#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
}
#endif /* CONFIG_KEYS */

typedef int (*bpf_task_work_callback_t)(struct bpf_map *map, void *key, void *value);

enum bpf_task_work_state {
        /* bpf_task_work is ready to be used */
        BPF_TW_STANDBY = 0,
        /* irq work scheduling in progress */
        BPF_TW_PENDING,
        /* task work scheduling in progress */
        BPF_TW_SCHEDULING,
        /* task work is scheduled successfully */
        BPF_TW_SCHEDULED,
        /* callback is running */
        BPF_TW_RUNNING,
        /* associated BPF map value is deleted */
        BPF_TW_FREED,
};

struct bpf_task_work_ctx {
        enum bpf_task_work_state state;
        refcount_t refcnt;
        struct callback_head work;
        struct irq_work irq_work;
        /* bpf_prog that schedules task work */
        struct bpf_prog *prog;
        /* task for which callback is scheduled */
        struct task_struct *task;
        /* the map and map value associated with this context */
        struct bpf_map *map;
        void *map_val;
        enum task_work_notify_mode mode;
        bpf_task_work_callback_t callback_fn;
        struct rcu_head rcu;
} __aligned(8);

/* Actual type for struct bpf_task_work */
struct bpf_task_work_kern {
        struct bpf_task_work_ctx *ctx;
};

static void bpf_task_work_ctx_reset(struct bpf_task_work_ctx *ctx)
{
        if (ctx->prog) {
                bpf_prog_put(ctx->prog);
                ctx->prog = NULL;
        }
        if (ctx->task) {
                bpf_task_release(ctx->task);
                ctx->task = NULL;
        }
}

static bool bpf_task_work_ctx_tryget(struct bpf_task_work_ctx *ctx)
{
        return refcount_inc_not_zero(&ctx->refcnt);
}

static void bpf_task_work_ctx_put(struct bpf_task_work_ctx *ctx)
{
        if (!refcount_dec_and_test(&ctx->refcnt))
                return;

        bpf_task_work_ctx_reset(ctx);

        /* bpf_mem_free expects migration to be disabled */
        migrate_disable();
        bpf_mem_free(&bpf_global_ma, ctx);
        migrate_enable();
}

static void bpf_task_work_cancel(struct bpf_task_work_ctx *ctx)
{
        /*
         * Scheduled task_work callback holds ctx ref, so if we successfully
         * cancelled, we put that ref on callback's behalf. If we couldn't
         * cancel, callback will inevitably run or has already completed
         * running, and it would have taken care of its ctx ref itself.
         */
        if (task_work_cancel(ctx->task, &ctx->work))
                bpf_task_work_ctx_put(ctx);
}

static void bpf_task_work_callback(struct callback_head *cb)
{
        struct bpf_task_work_ctx *ctx = container_of(cb, struct bpf_task_work_ctx, work);
        enum bpf_task_work_state state;
        u32 idx;
        void *key;

        /* Read lock is needed to protect ctx and map key/value access */
        guard(rcu_tasks_trace)();
        /*
         * This callback may start running before bpf_task_work_irq() switched to
         * SCHEDULED state, so handle both transition variants SCHEDULING|SCHEDULED -> RUNNING.
         */
        state = cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_RUNNING);
        if (state == BPF_TW_SCHEDULED)
                state = cmpxchg(&ctx->state, BPF_TW_SCHEDULED, BPF_TW_RUNNING);
        if (state == BPF_TW_FREED) {
                bpf_task_work_ctx_put(ctx);
                return;
        }

        key = (void *)map_key_from_value(ctx->map, ctx->map_val, &idx);

        migrate_disable();
        ctx->callback_fn(ctx->map, key, ctx->map_val);
        migrate_enable();

        bpf_task_work_ctx_reset(ctx);
        (void)cmpxchg(&ctx->state, BPF_TW_RUNNING, BPF_TW_STANDBY);

        bpf_task_work_ctx_put(ctx);
}

static void bpf_task_work_irq(struct irq_work *irq_work)
{
        struct bpf_task_work_ctx *ctx = container_of(irq_work, struct bpf_task_work_ctx, irq_work);
        enum bpf_task_work_state state;
        int err;

        guard(rcu_tasks_trace)();

        if (cmpxchg(&ctx->state, BPF_TW_PENDING, BPF_TW_SCHEDULING) != BPF_TW_PENDING) {
                bpf_task_work_ctx_put(ctx);
                return;
        }

        err = task_work_add(ctx->task, &ctx->work, ctx->mode);
        if (err) {
                bpf_task_work_ctx_reset(ctx);
                /*
                 * try to switch back to STANDBY for another task_work reuse, but we might have
                 * gone to FREED already, which is fine as we already cleaned up after ourselves
                 */
                (void)cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_STANDBY);
                bpf_task_work_ctx_put(ctx);
                return;
        }

        /*
         * It's technically possible for just scheduled task_work callback to
         * complete running by now, going SCHEDULING -> RUNNING and then
         * dropping its ctx refcount. Instead of capturing extra ref just to
         * protected below ctx->state access, we rely on RCU protection to
         * perform below SCHEDULING -> SCHEDULED attempt.
         */
        state = cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_SCHEDULED);
        if (state == BPF_TW_FREED)
                bpf_task_work_cancel(ctx); /* clean up if we switched into FREED state */
}

static struct bpf_task_work_ctx *bpf_task_work_fetch_ctx(struct bpf_task_work *tw,
                                                         struct bpf_map *map)
{
        struct bpf_task_work_kern *twk = (void *)tw;
        struct bpf_task_work_ctx *ctx, *old_ctx;

        ctx = READ_ONCE(twk->ctx);
        if (ctx)
                return ctx;

        ctx = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_task_work_ctx));
        if (!ctx)
                return ERR_PTR(-ENOMEM);

        memset(ctx, 0, sizeof(*ctx));
        refcount_set(&ctx->refcnt, 1); /* map's own ref */
        ctx->state = BPF_TW_STANDBY;

        old_ctx = cmpxchg(&twk->ctx, NULL, ctx);
        if (old_ctx) {
                /*
                 * tw->ctx is set by concurrent BPF program, release allocated
                 * memory and try to reuse already set context.
                 */
                bpf_mem_free(&bpf_global_ma, ctx);
                return old_ctx;
        }

        return ctx; /* Success */
}

static struct bpf_task_work_ctx *bpf_task_work_acquire_ctx(struct bpf_task_work *tw,
                                                           struct bpf_map *map)
{
        struct bpf_task_work_ctx *ctx;

        ctx = bpf_task_work_fetch_ctx(tw, map);
        if (IS_ERR(ctx))
                return ctx;

        /* try to get ref for task_work callback to hold */
        if (!bpf_task_work_ctx_tryget(ctx))
                return ERR_PTR(-EBUSY);

        if (cmpxchg(&ctx->state, BPF_TW_STANDBY, BPF_TW_PENDING) != BPF_TW_STANDBY) {
                /* lost acquiring race or map_release_uref() stole it from us, put ref and bail */
                bpf_task_work_ctx_put(ctx);
                return ERR_PTR(-EBUSY);
        }

        /*
         * If no process or bpffs is holding a reference to the map, no new callbacks should be
         * scheduled. This does not address any race or correctness issue, but rather is a policy
         * choice: dropping user references should stop everything.
         */
        if (!atomic64_read(&map->usercnt)) {
                /* drop ref we just got for task_work callback itself */
                bpf_task_work_ctx_put(ctx);
                /* transfer map's ref into cancel_and_free() */
                bpf_task_work_cancel_and_free(tw);
                return ERR_PTR(-EBUSY);
        }

        return ctx;
}

static int bpf_task_work_schedule(struct task_struct *task, struct bpf_task_work *tw,
                                  struct bpf_map *map, bpf_task_work_callback_t callback_fn,
                                  struct bpf_prog_aux *aux, enum task_work_notify_mode mode)
{
        struct bpf_prog *prog;
        struct bpf_task_work_ctx *ctx;
        int err;

        BTF_TYPE_EMIT(struct bpf_task_work);

        prog = bpf_prog_inc_not_zero(aux->prog);
        if (IS_ERR(prog))
                return -EBADF;
        task = bpf_task_acquire(task);
        if (!task) {
                err = -EBADF;
                goto release_prog;
        }

        ctx = bpf_task_work_acquire_ctx(tw, map);
        if (IS_ERR(ctx)) {
                err = PTR_ERR(ctx);
                goto release_all;
        }

        ctx->task = task;
        ctx->callback_fn = callback_fn;
        ctx->prog = prog;
        ctx->mode = mode;
        ctx->map = map;
        ctx->map_val = (void *)tw - map->record->task_work_off;
        init_task_work(&ctx->work, bpf_task_work_callback);
        init_irq_work(&ctx->irq_work, bpf_task_work_irq);

        irq_work_queue(&ctx->irq_work);
        return 0;

release_all:
        bpf_task_release(task);
release_prog:
        bpf_prog_put(prog);
        return err;
}

/**
 * bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL
 * mode
 * @task: Task struct for which callback should be scheduled
 * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
 * @map__map: bpf_map that embeds struct bpf_task_work in the values
 * @callback: pointer to BPF subprogram to call
 * @aux: pointer to bpf_prog_aux of the caller BPF program, implicitly set by the verifier
 *
 * Return: 0 if task work has been scheduled successfully, negative error code otherwise
 */
__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw,
                                              void *map__map, bpf_task_work_callback_t callback,
                                              struct bpf_prog_aux *aux)
{
        return bpf_task_work_schedule(task, tw, map__map, callback, aux, TWA_SIGNAL);
}

/**
 * bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME
 * mode
 * @task: Task struct for which callback should be scheduled
 * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
 * @map__map: bpf_map that embeds struct bpf_task_work in the values
 * @callback: pointer to BPF subprogram to call
 * @aux: pointer to bpf_prog_aux of the caller BPF program, implicitly set by the verifier
 *
 * Return: 0 if task work has been scheduled successfully, negative error code otherwise
 */
__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct bpf_task_work *tw,
                                              void *map__map, bpf_task_work_callback_t callback,
                                              struct bpf_prog_aux *aux)
{
        return bpf_task_work_schedule(task, tw, map__map, callback, aux, TWA_RESUME);
}

static int make_file_dynptr(struct file *file, u32 flags, bool may_sleep,
                            struct bpf_dynptr_kern *ptr)
{
        struct bpf_dynptr_file_impl *state;

        /* flags is currently unsupported */
        if (flags) {
                bpf_dynptr_set_null(ptr);
                return -EINVAL;
        }

        state = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_dynptr_file_impl));
        if (!state) {
                bpf_dynptr_set_null(ptr);
                return -ENOMEM;
        }
        state->offset = 0;
        state->size = U64_MAX; /* Don't restrict size, as file may change anyways */
        freader_init_from_file(&state->freader, NULL, 0, file, may_sleep);
        bpf_dynptr_init(ptr, state, BPF_DYNPTR_TYPE_FILE, 0, 0);
        bpf_dynptr_set_rdonly(ptr);
        return 0;
}

__bpf_kfunc int bpf_dynptr_from_file(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit)
{
        return make_file_dynptr(file, flags, false, (struct bpf_dynptr_kern *)ptr__uninit);
}

int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit)
{
        return make_file_dynptr(file, flags, true, (struct bpf_dynptr_kern *)ptr__uninit);
}

__bpf_kfunc int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr)
{
        struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)dynptr;
        struct bpf_dynptr_file_impl *df = ptr->data;

        if (!df)
                return 0;

        freader_cleanup(&df->freader);
        bpf_mem_free(&bpf_global_ma, df);
        bpf_dynptr_set_null(ptr);
        return 0;
}

/**
 * bpf_timer_cancel_async - try to deactivate a timer
 * @timer:      bpf_timer to stop
 *
 * Returns:
 *
 *  *  0 when the timer was not active
 *  *  1 when the timer was active
 *  * -1 when the timer is currently executing the callback function and
 *       cannot be stopped
 *  * -ECANCELED when the timer will be cancelled asynchronously
 *  * -ENOMEM when out of memory
 *  * -EINVAL when the timer was not initialized
 *  * -ENOENT when this kfunc is racing with timer deletion
 */
__bpf_kfunc int bpf_timer_cancel_async(struct bpf_timer *timer)
{
        struct bpf_async_kern *async = (void *)timer;
        struct bpf_async_cb *cb;
        int ret;

        cb = READ_ONCE(async->cb);
        if (!cb)
                return -EINVAL;

        /*
         * Unlike hrtimer_start() it's ok to synchronously call
         * hrtimer_try_to_cancel() when refcnt reached zero, but deferring to
         * irq_work is not, since irq callback may execute after RCU GP and
         * cb could be freed at that time. Check for refcnt zero for
         * consistency.
         */
        if (!refcount_inc_not_zero(&cb->refcnt))
                return -ENOENT;

        if (!defer_timer_wq_op()) {
                struct bpf_hrtimer *t = container_of(cb, struct bpf_hrtimer, cb);

                ret = hrtimer_try_to_cancel(&t->timer);
                bpf_async_refcount_put(cb);
                return ret;
        } else {
                ret = bpf_async_schedule_op(cb, BPF_ASYNC_CANCEL, 0, 0);
                return ret ? ret : -ECANCELED;
        }
}

__bpf_kfunc_end_defs();

static void bpf_task_work_cancel_scheduled(struct irq_work *irq_work)
{
        struct bpf_task_work_ctx *ctx = container_of(irq_work, struct bpf_task_work_ctx, irq_work);

        bpf_task_work_cancel(ctx); /* this might put task_work callback's ref */
        bpf_task_work_ctx_put(ctx); /* and here we put map's own ref that was transferred to us */
}

void bpf_task_work_cancel_and_free(void *val)
{
        struct bpf_task_work_kern *twk = val;
        struct bpf_task_work_ctx *ctx;
        enum bpf_task_work_state state;

        ctx = xchg(&twk->ctx, NULL);
        if (!ctx)
                return;

        state = xchg(&ctx->state, BPF_TW_FREED);
        if (state == BPF_TW_SCHEDULED) {
                /* run in irq_work to avoid locks in NMI */
                init_irq_work(&ctx->irq_work, bpf_task_work_cancel_scheduled);
                irq_work_queue(&ctx->irq_work);
                return;
        }

        bpf_task_work_ctx_put(ctx); /* put bpf map's ref */
}

BTF_KFUNCS_START(generic_btf_ids)
#ifdef CONFIG_CRASH_DUMP
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL | KF_RCU)
BTF_ID_FLAGS(func, bpf_list_push_front_impl)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_root, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_left, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_right, KF_RET_NULL)

#ifdef CONFIG_CGROUPS
BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
#endif
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_from_vpid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_throw)
#ifdef CONFIG_BPF_EVENTS
BTF_ID_FLAGS(func, bpf_send_signal_task)
#endif
#ifdef CONFIG_KEYS
BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE)
#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE)
#endif
#endif
BTF_KFUNCS_END(generic_btf_ids)

static const struct btf_kfunc_id_set generic_kfunc_set = {
        .owner = THIS_MODULE,
        .set   = &generic_btf_ids,
};


BTF_ID_LIST(generic_dtor_ids)
BTF_ID(struct, task_struct)
BTF_ID(func, bpf_task_release_dtor)
#ifdef CONFIG_CGROUPS
BTF_ID(struct, cgroup)
BTF_ID(func, bpf_cgroup_release_dtor)
#endif

BTF_KFUNCS_START(common_btf_ids)
BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx, KF_FASTCALL)
BTF_ID_FLAGS(func, bpf_rdonly_cast, KF_FASTCALL)
BTF_ID_FLAGS(func, bpf_rcu_read_lock)
BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW | KF_RCU)
BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY)
#ifdef CONFIG_CGROUPS
BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_RCU_PROTECTED)
BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY)
#endif
BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_RCU_PROTECTED)
BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_dynptr_adjust)
BTF_ID_FLAGS(func, bpf_dynptr_is_null)
BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
BTF_ID_FLAGS(func, bpf_dynptr_size)
BTF_ID_FLAGS(func, bpf_dynptr_clone)
BTF_ID_FLAGS(func, bpf_dynptr_copy)
BTF_ID_FLAGS(func, bpf_dynptr_memset)
#ifdef CONFIG_NET
BTF_ID_FLAGS(func, bpf_modify_return_test_tp)
#endif
BTF_ID_FLAGS(func, bpf_wq_init)
BTF_ID_FLAGS(func, bpf_wq_set_callback, KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_wq_start)
BTF_ID_FLAGS(func, bpf_preempt_disable)
BTF_ID_FLAGS(func, bpf_preempt_enable)
BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_copy_from_user_task_str, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_get_kmem_cache)
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_local_irq_save)
BTF_ID_FLAGS(func, bpf_local_irq_restore)
#ifdef CONFIG_BPF_EVENTS
BTF_ID_FLAGS(func, bpf_probe_read_user_dynptr)
BTF_ID_FLAGS(func, bpf_probe_read_kernel_dynptr)
BTF_ID_FLAGS(func, bpf_probe_read_user_str_dynptr)
BTF_ID_FLAGS(func, bpf_probe_read_kernel_str_dynptr)
BTF_ID_FLAGS(func, bpf_copy_from_user_dynptr, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_copy_from_user_str_dynptr, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_copy_from_user_task_dynptr, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_copy_from_user_task_str_dynptr, KF_SLEEPABLE)
#endif
#ifdef CONFIG_DMA_SHARED_BUFFER
BTF_ID_FLAGS(func, bpf_iter_dmabuf_new, KF_ITER_NEW | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
#endif
BTF_ID_FLAGS(func, __bpf_trap)
BTF_ID_FLAGS(func, bpf_strcmp);
BTF_ID_FLAGS(func, bpf_strcasecmp);
BTF_ID_FLAGS(func, bpf_strncasecmp);
BTF_ID_FLAGS(func, bpf_strchr);
BTF_ID_FLAGS(func, bpf_strchrnul);
BTF_ID_FLAGS(func, bpf_strnchr);
BTF_ID_FLAGS(func, bpf_strrchr);
BTF_ID_FLAGS(func, bpf_strlen);
BTF_ID_FLAGS(func, bpf_strnlen);
BTF_ID_FLAGS(func, bpf_strspn);
BTF_ID_FLAGS(func, bpf_strcspn);
BTF_ID_FLAGS(func, bpf_strstr);
BTF_ID_FLAGS(func, bpf_strcasestr);
BTF_ID_FLAGS(func, bpf_strnstr);
BTF_ID_FLAGS(func, bpf_strncasestr);
#if defined(CONFIG_BPF_LSM) && defined(CONFIG_CGROUPS)
BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU)
#endif
BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_stream_print_stack, KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, bpf_dynptr_from_file)
BTF_ID_FLAGS(func, bpf_dynptr_file_discard)
BTF_ID_FLAGS(func, bpf_timer_cancel_async)
BTF_KFUNCS_END(common_btf_ids)

static const struct btf_kfunc_id_set common_kfunc_set = {
        .owner = THIS_MODULE,
        .set   = &common_btf_ids,
};

static int __init kfunc_init(void)
{
        int ret;
        const struct btf_id_dtor_kfunc generic_dtors[] = {
                {
                        .btf_id       = generic_dtor_ids[0],
                        .kfunc_btf_id = generic_dtor_ids[1]
                },
#ifdef CONFIG_CGROUPS
                {
                        .btf_id       = generic_dtor_ids[2],
                        .kfunc_btf_id = generic_dtor_ids[3]
                },
#endif
        };

        ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &generic_kfunc_set);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &generic_kfunc_set);
        ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &generic_kfunc_set);
        ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors,
                                                  ARRAY_SIZE(generic_dtors),
                                                  THIS_MODULE);
        return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
}

late_initcall(kfunc_init);

/* Get a pointer to dynptr data up to len bytes for read only access. If
 * the dynptr doesn't have continuous data up to len bytes, return NULL.
 */
const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len)
{
        const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr;

        return bpf_dynptr_slice(p, 0, NULL, len);
}

/* Get a pointer to dynptr data up to len bytes for read write access. If
 * the dynptr doesn't have continuous data up to len bytes, or the dynptr
 * is read only, return NULL.
 */
void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len)
{
        if (__bpf_dynptr_is_rdonly(ptr))
                return NULL;
        return (void *)__bpf_dynptr_data(ptr, len);
}

void bpf_map_free_internal_structs(struct bpf_map *map, void *val)
{
        if (btf_record_has_field(map->record, BPF_TIMER))
                bpf_obj_free_timer(map->record, val);
        if (btf_record_has_field(map->record, BPF_WORKQUEUE))
                bpf_obj_free_workqueue(map->record, val);
        if (btf_record_has_field(map->record, BPF_TASK_WORK))
                bpf_obj_free_task_work(map->record, val);
}
Linux