#ifndef _LINUX_RSEQ_ENTRY_H
#define _LINUX_RSEQ_ENTRY_H
#ifdef CONFIG_RSEQ_STATS
#include <linux/percpu.h>
struct rseq_stats {
unsigned long exit;
unsigned long signal;
unsigned long slowpath;
unsigned long fastpath;
unsigned long ids;
unsigned long cs;
unsigned long clear;
unsigned long fixup;
unsigned long s_granted;
unsigned long s_expired;
unsigned long s_revoked;
unsigned long s_yielded;
unsigned long s_aborted;
};
DECLARE_PER_CPU(struct rseq_stats, rseq_stats);
#ifdef RSEQ_BUILD_SLOW_PATH
#define rseq_stat_inc(which) this_cpu_inc((which))
#else
#define rseq_stat_inc(which) raw_cpu_inc((which))
#endif
#else
#define rseq_stat_inc(x) do { } while (0)
#endif
#ifdef CONFIG_RSEQ
#include <linux/jump_label.h>
#include <linux/rseq.h>
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
#include <linux/tracepoint-defs.h>
#ifdef CONFIG_TRACEPOINTS
DECLARE_TRACEPOINT(rseq_update);
DECLARE_TRACEPOINT(rseq_ip_fixup);
void __rseq_trace_update(struct task_struct *t);
void __rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
unsigned long offset, unsigned long abort_ip);
static inline void rseq_trace_update(struct task_struct *t, struct rseq_ids *ids)
{
if (tracepoint_enabled(rseq_update) && ids)
__rseq_trace_update(t);
}
static inline void rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
unsigned long offset, unsigned long abort_ip)
{
if (tracepoint_enabled(rseq_ip_fixup))
__rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
}
#else
static inline void rseq_trace_update(struct task_struct *t, struct rseq_ids *ids) { }
static inline void rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
unsigned long offset, unsigned long abort_ip) { }
#endif
DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);
#ifdef RSEQ_BUILD_SLOW_PATH
#define rseq_inline
#else
#define rseq_inline __always_inline
#endif
#ifdef CONFIG_RSEQ_SLICE_EXTENSION
DECLARE_STATIC_KEY_TRUE(rseq_slice_extension_key);
static __always_inline bool rseq_slice_extension_enabled(void)
{
return static_branch_likely(&rseq_slice_extension_key);
}
extern unsigned int rseq_slice_ext_nsecs;
bool __rseq_arm_slice_extension_timer(void);
static __always_inline bool rseq_arm_slice_extension_timer(void)
{
if (!rseq_slice_extension_enabled())
return false;
if (likely(!current->rseq.slice.state.granted))
return false;
return __rseq_arm_slice_extension_timer();
}
static __always_inline void rseq_slice_clear_grant(struct task_struct *t)
{
if (IS_ENABLED(CONFIG_RSEQ_STATS) && t->rseq.slice.state.granted)
rseq_stat_inc(rseq_stats.s_revoked);
t->rseq.slice.state.granted = false;
}
static __always_inline bool rseq_grant_slice_extension(bool work_pending)
{
struct task_struct *curr = current;
struct rseq_slice_ctrl usr_ctrl;
union rseq_slice_state state;
struct rseq __user *rseq;
if (!rseq_slice_extension_enabled())
return false;
state = curr->rseq.slice.state;
state.enabled &= curr->rseq.event.user_irq;
if (likely(!state.state))
return false;
rseq = curr->rseq.usrptr;
scoped_user_rw_access(rseq, efault) {
if (unlikely(work_pending || state.granted)) {
unsafe_put_user(0U, &rseq->slice_ctrl.all, efault);
rseq_slice_clear_grant(curr);
return false;
}
unsafe_get_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault);
if (likely(!(usr_ctrl.request)))
return false;
usr_ctrl.request = 0;
usr_ctrl.granted = 1;
unsafe_put_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault);
}
rseq_stat_inc(rseq_stats.s_granted);
curr->rseq.slice.state.granted = true;
curr->rseq.slice.expires = data_race(rseq_slice_ext_nsecs) + ktime_get_mono_fast_ns();
scoped_guard(irq) {
clear_tsk_need_resched(curr);
clear_preempt_need_resched();
}
return true;
efault:
force_sig(SIGSEGV);
return false;
}
#else
static __always_inline bool rseq_slice_extension_enabled(void) { return false; }
static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; }
static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { }
static __always_inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
#endif
bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
bool rseq_debug_validate_ids(struct task_struct *t);
static __always_inline void rseq_note_user_irq_entry(void)
{
if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
current->rseq.event.user_irq = true;
}
#ifdef RSEQ_BUILD_SLOW_PATH
bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs,
unsigned long csaddr)
{
struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
u64 start_ip, abort_ip, offset, cs_end, head, tasksize = TASK_SIZE;
unsigned long ip = instruction_pointer(regs);
u64 __user *uc_head = (u64 __user *) ucs;
u32 usig, __user *uc_sig;
scoped_user_rw_access(ucs, efault) {
unsafe_get_user(start_ip, &ucs->start_ip, efault);
if (unlikely(start_ip >= tasksize))
goto die;
if (ip < start_ip)
goto clear;
unsafe_get_user(offset, &ucs->post_commit_offset, efault);
cs_end = start_ip + offset;
if (unlikely(cs_end >= tasksize || cs_end < start_ip))
goto die;
if (ip >= cs_end)
goto clear;
unsafe_get_user(abort_ip, &ucs->abort_ip, efault);
if (unlikely(abort_ip >= tasksize || abort_ip < sizeof(*uc_sig)))
goto die;
if (unlikely(abort_ip - start_ip < offset))
goto die;
unsafe_get_user(head, uc_head, efault);
if (unlikely(head))
goto die;
uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
unsafe_get_user(usig, uc_sig, efault);
if (unlikely(usig != t->rseq.sig))
goto die;
if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
if (unlikely(!t->rseq.event.user_irq))
goto die;
}
unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
instruction_pointer_set(regs, (unsigned long)abort_ip);
rseq_stat_inc(rseq_stats.fixup);
break;
clear:
unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
rseq_stat_inc(rseq_stats.clear);
abort_ip = 0ULL;
}
if (unlikely(abort_ip))
rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
return true;
die:
t->rseq.event.fatal = true;
efault:
return false;
}
bool rseq_debug_validate_ids(struct task_struct *t)
{
struct rseq __user *rseq = t->rseq.usrptr;
u32 cpu_id, uval, node_id;
node_id = t->rseq.ids.cpu_id != RSEQ_CPU_ID_UNINITIALIZED ?
cpu_to_node(t->rseq.ids.cpu_id) : 0;
scoped_user_read_access(rseq, efault) {
unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault);
if (cpu_id != t->rseq.ids.cpu_id)
goto die;
unsafe_get_user(uval, &rseq->cpu_id, efault);
if (uval != cpu_id)
goto die;
unsafe_get_user(uval, &rseq->node_id, efault);
if (uval != node_id)
goto die;
unsafe_get_user(uval, &rseq->mm_cid, efault);
if (uval != t->rseq.ids.mm_cid)
goto die;
}
return true;
die:
t->rseq.event.fatal = true;
efault:
return false;
}
#endif
static rseq_inline bool
rseq_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr)
{
struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
unsigned long ip = instruction_pointer(regs);
unsigned long tasksize = TASK_SIZE;
u64 start_ip, abort_ip, offset;
u32 usig, __user *uc_sig;
rseq_stat_inc(rseq_stats.cs);
if (unlikely(csaddr >= tasksize)) {
t->rseq.event.fatal = true;
return false;
}
if (static_branch_unlikely(&rseq_debug_enabled))
return rseq_debug_update_user_cs(t, regs, csaddr);
scoped_user_rw_access(ucs, efault) {
unsafe_get_user(start_ip, &ucs->start_ip, efault);
unsafe_get_user(offset, &ucs->post_commit_offset, efault);
unsafe_get_user(abort_ip, &ucs->abort_ip, efault);
if (ip - start_ip >= offset)
goto clear;
if (unlikely(abort_ip >= tasksize || abort_ip < sizeof(*uc_sig)))
goto die;
uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
unsafe_get_user(usig, uc_sig, efault);
if (unlikely(usig != t->rseq.sig))
goto die;
unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
instruction_pointer_set(regs, (unsigned long)abort_ip);
rseq_stat_inc(rseq_stats.fixup);
break;
clear:
unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
rseq_stat_inc(rseq_stats.clear);
abort_ip = 0ULL;
}
if (unlikely(abort_ip))
rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
return true;
die:
t->rseq.event.fatal = true;
efault:
return false;
}
static rseq_inline
bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids,
u32 node_id, u64 *csaddr)
{
struct rseq __user *rseq = t->rseq.usrptr;
if (static_branch_unlikely(&rseq_debug_enabled)) {
if (!rseq_debug_validate_ids(t))
return false;
}
scoped_user_rw_access(rseq, efault) {
unsafe_put_user(ids->cpu_id, &rseq->cpu_id_start, efault);
unsafe_put_user(ids->cpu_id, &rseq->cpu_id, efault);
unsafe_put_user(node_id, &rseq->node_id, efault);
unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault);
if (csaddr)
unsafe_get_user(*csaddr, &rseq->rseq_cs, efault);
if (rseq_slice_extension_enabled()) {
unsafe_put_user(0U, &rseq->slice_ctrl.all, efault);
}
}
rseq_slice_clear_grant(t);
t->rseq.ids.cpu_cid = ids->cpu_cid;
rseq_stat_inc(rseq_stats.ids);
rseq_trace_update(t, ids);
return true;
efault:
return false;
}
static rseq_inline bool rseq_update_usr(struct task_struct *t, struct pt_regs *regs,
struct rseq_ids *ids, u32 node_id)
{
u64 csaddr;
if (!rseq_set_ids_get_csaddr(t, ids, node_id, &csaddr))
return false;
if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
if (!static_branch_unlikely(&rseq_debug_enabled)) {
if (likely(!t->rseq.event.user_irq))
return true;
}
}
if (likely(!csaddr))
return true;
return rseq_update_user_cs(t, regs, csaddr);
}
#ifdef CONFIG_GENERIC_ENTRY
static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct task_struct *t)
{
guard(pagefault)();
if (likely(!t->rseq.event.ids_changed)) {
struct rseq __user *rseq = t->rseq.usrptr;
u64 csaddr;
scoped_user_rw_access(rseq, efault) {
unsafe_get_user(csaddr, &rseq->rseq_cs, efault);
if (rseq_slice_extension_enabled()) {
unsafe_put_user(0U, &rseq->slice_ctrl.all, efault);
}
}
rseq_slice_clear_grant(t);
if (static_branch_unlikely(&rseq_debug_enabled) || unlikely(csaddr)) {
if (unlikely(!rseq_update_user_cs(t, regs, csaddr)))
return false;
}
return true;
}
struct rseq_ids ids = {
.cpu_id = task_cpu(t),
.mm_cid = task_mm_cid(t),
};
u32 node_id = cpu_to_node(ids.cpu_id);
return rseq_update_usr(t, regs, &ids, node_id);
efault:
return false;
}
static __always_inline bool __rseq_exit_to_user_mode_restart(struct pt_regs *regs)
{
struct task_struct *t = current;
if (unlikely((t->rseq.event.sched_switch))) {
rseq_stat_inc(rseq_stats.fastpath);
if (unlikely(!rseq_exit_user_update(regs, t)))
return true;
}
t->rseq.event.events = 0;
return false;
}
#ifdef CONFIG_HAVE_GENERIC_TIF_BITS
static __always_inline bool test_tif_rseq(unsigned long ti_work)
{
return ti_work & _TIF_RSEQ;
}
static __always_inline void clear_tif_rseq(void)
{
static_assert(TIF_RSEQ != TIF_NOTIFY_RESUME);
clear_thread_flag(TIF_RSEQ);
}
#else
static __always_inline bool test_tif_rseq(unsigned long ti_work) { return true; }
static __always_inline void clear_tif_rseq(void) { }
#endif
static __always_inline bool
rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
{
if (unlikely(test_tif_rseq(ti_work))) {
if (unlikely(__rseq_exit_to_user_mode_restart(regs))) {
current->rseq.event.slowpath = true;
set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
return true;
}
clear_tif_rseq();
}
return rseq_arm_slice_extension_timer();
}
#else
static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
{
return false;
}
#endif
static __always_inline void rseq_syscall_exit_to_user_mode(void)
{
struct rseq_event *ev = ¤t->rseq.event;
rseq_stat_inc(rseq_stats.exit);
if (IS_ENABLED(CONFIG_LOCKDEP)) {
WARN_ON_ONCE(ev->sched_switch);
ev->events = 0;
}
}
static __always_inline void rseq_irqentry_exit_to_user_mode(void)
{
struct rseq_event *ev = ¤t->rseq.event;
rseq_stat_inc(rseq_stats.exit);
lockdep_assert_once(!ev->sched_switch);
ev->events = 0;
}
static __always_inline void rseq_exit_to_user_mode_legacy(void)
{
struct rseq_event *ev = ¤t->rseq.event;
rseq_stat_inc(rseq_stats.exit);
if (static_branch_unlikely(&rseq_debug_enabled))
WARN_ON_ONCE(ev->sched_switch);
ev->events = 0;
}
void __rseq_debug_syscall_return(struct pt_regs *regs);
static __always_inline void rseq_debug_syscall_return(struct pt_regs *regs)
{
if (static_branch_unlikely(&rseq_debug_enabled))
__rseq_debug_syscall_return(regs);
}
#else
static inline void rseq_note_user_irq_entry(void) { }
static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
{
return false;
}
static inline void rseq_syscall_exit_to_user_mode(void) { }
static inline void rseq_irqentry_exit_to_user_mode(void) { }
static inline void rseq_exit_to_user_mode_legacy(void) { }
static inline void rseq_debug_syscall_return(struct pt_regs *regs) { }
static inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
#endif
#endif