#include <linux/sched/task_stack.h>
#include <linux/unwind_deferred.h>
#include <linux/sched/clock.h>
#include <linux/task_work.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/mm.h>
#if defined(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)
# define CAN_USE_IN_NMI 1
static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt)
{
u32 old = 0;
return try_cmpxchg(&info->id.cnt, &old, cnt);
}
#else
# define CAN_USE_IN_NMI 0
static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt)
{
info->id.cnt = cnt;
return true;
}
#endif
#define UNWIND_MAX_ENTRIES \
((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long))
static DEFINE_MUTEX(callback_mutex);
static LIST_HEAD(callbacks);
#define RESERVED_BITS (UNWIND_PENDING | UNWIND_USED)
static unsigned long unwind_mask = RESERVED_BITS;
DEFINE_STATIC_SRCU(unwind_srcu);
static inline bool unwind_pending(struct unwind_task_info *info)
{
return atomic_long_read(&info->unwind_mask) & UNWIND_PENDING;
}
static DEFINE_PER_CPU(u32, unwind_ctx_ctr);
static u64 get_cookie(struct unwind_task_info *info)
{
u32 cnt = 1;
lockdep_assert_irqs_disabled();
if (info->id.cpu)
return info->id.id;
cnt |= __this_cpu_read(unwind_ctx_ctr) + 2;
if (try_assign_cnt(info, cnt)) {
__this_cpu_write(unwind_ctx_ctr, cnt);
}
info->id.cpu = smp_processor_id() + 1;
return info->id.id;
}
int unwind_user_faultable(struct unwind_stacktrace *trace)
{
struct unwind_task_info *info = ¤t->unwind_info;
struct unwind_cache *cache;
might_fault();
if (!current->mm)
return -EINVAL;
if (!info->cache) {
info->cache = kzalloc_flex(*cache, entries, UNWIND_MAX_ENTRIES);
if (!info->cache)
return -ENOMEM;
}
cache = info->cache;
trace->entries = cache->entries;
trace->nr = cache->nr_entries;
if (trace->nr)
return 0;
unwind_user(trace, UNWIND_MAX_ENTRIES);
cache->nr_entries = trace->nr;
atomic_long_or(UNWIND_USED, &info->unwind_mask);
return 0;
}
static void process_unwind_deferred(struct task_struct *task)
{
struct unwind_task_info *info = &task->unwind_info;
struct unwind_stacktrace trace;
struct unwind_work *work;
unsigned long bits;
u64 cookie;
if (WARN_ON_ONCE(!unwind_pending(info)))
return;
bits = atomic_long_fetch_andnot(UNWIND_PENDING,
&info->unwind_mask);
trace.nr = 0;
trace.entries = NULL;
unwind_user_faultable(&trace);
if (info->cache)
bits &= ~(info->cache->unwind_completed);
cookie = info->id.id;
guard(srcu)(&unwind_srcu);
list_for_each_entry_srcu(work, &callbacks, list,
srcu_read_lock_held(&unwind_srcu)) {
if (test_bit(work->bit, &bits)) {
work->func(work, &trace, cookie);
if (info->cache)
info->cache->unwind_completed |= BIT(work->bit);
}
}
}
static void unwind_deferred_task_work(struct callback_head *head)
{
process_unwind_deferred(current);
}
void unwind_deferred_task_exit(struct task_struct *task)
{
struct unwind_task_info *info = ¤t->unwind_info;
if (!unwind_pending(info))
return;
process_unwind_deferred(task);
task_work_cancel(task, &info->work);
}
int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
{
struct unwind_task_info *info = ¤t->unwind_info;
int twa_mode = TWA_RESUME;
unsigned long old, bits;
unsigned long bit;
int ret;
*cookie = 0;
if ((current->flags & (PF_KTHREAD | PF_EXITING)) ||
!user_mode(task_pt_regs(current)))
return -EINVAL;
if (in_nmi()) {
if (WARN_ON_ONCE(!CAN_USE_IN_NMI))
return -EINVAL;
twa_mode = TWA_NMI_CURRENT;
}
bit = READ_ONCE(work->bit);
if (WARN_ON_ONCE(bit < 0))
return -EINVAL;
bit = BIT(bit);
guard(irqsave)();
*cookie = get_cookie(info);
old = atomic_long_read(&info->unwind_mask);
if (old & bit)
return 1;
bits = UNWIND_PENDING | bit;
old = atomic_long_fetch_or(bits, &info->unwind_mask);
if (old & bits) {
WARN_ON_ONCE(!(old & UNWIND_PENDING));
return old & bit;
}
ret = task_work_add(current, &info->work, twa_mode);
if (WARN_ON_ONCE(ret))
atomic_long_set(&info->unwind_mask, 0);
return ret;
}
void unwind_deferred_cancel(struct unwind_work *work)
{
struct task_struct *g, *t;
int bit;
if (!work)
return;
bit = work->bit;
if (WARN_ON_ONCE(BIT(bit) & RESERVED_BITS))
return;
guard(mutex)(&callback_mutex);
list_del_rcu(&work->list);
work->bit = -1;
__clear_bit(bit, &unwind_mask);
synchronize_srcu(&unwind_srcu);
guard(rcu)();
for_each_process_thread(g, t) {
atomic_long_andnot(BIT(bit),
&t->unwind_info.unwind_mask);
if (t->unwind_info.cache)
clear_bit(bit, &t->unwind_info.cache->unwind_completed);
}
}
int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func)
{
memset(work, 0, sizeof(*work));
guard(mutex)(&callback_mutex);
if (unwind_mask == ~0UL)
return -EBUSY;
work->bit = ffz(unwind_mask);
__set_bit(work->bit, &unwind_mask);
list_add_rcu(&work->list, &callbacks);
work->func = func;
return 0;
}
void unwind_task_init(struct task_struct *task)
{
struct unwind_task_info *info = &task->unwind_info;
memset(info, 0, sizeof(*info));
init_task_work(&info->work, unwind_deferred_task_work);
atomic_long_set(&info->unwind_mask, 0);
}
void unwind_task_free(struct task_struct *task)
{
struct unwind_task_info *info = &task->unwind_info;
kfree(info->cache);
task_work_cancel(task, &info->work);
}