root/include/linux/rseq_entry.h
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_RSEQ_ENTRY_H
#define _LINUX_RSEQ_ENTRY_H

/* Must be outside the CONFIG_RSEQ guard to resolve the stubs */
#ifdef CONFIG_RSEQ_STATS
#include <linux/percpu.h>

struct rseq_stats {
        unsigned long   exit;
        unsigned long   signal;
        unsigned long   slowpath;
        unsigned long   fastpath;
        unsigned long   ids;
        unsigned long   cs;
        unsigned long   clear;
        unsigned long   fixup;
        unsigned long   s_granted;
        unsigned long   s_expired;
        unsigned long   s_revoked;
        unsigned long   s_yielded;
        unsigned long   s_aborted;
};

DECLARE_PER_CPU(struct rseq_stats, rseq_stats);

/*
 * Slow path has interrupts and preemption enabled, but the fast path
 * runs with interrupts disabled so there is no point in having the
 * preemption checks implied in __this_cpu_inc() for every operation.
 */
#ifdef RSEQ_BUILD_SLOW_PATH
#define rseq_stat_inc(which)    this_cpu_inc((which))
#else
#define rseq_stat_inc(which)    raw_cpu_inc((which))
#endif

#else /* CONFIG_RSEQ_STATS */
#define rseq_stat_inc(x)        do { } while (0)
#endif /* !CONFIG_RSEQ_STATS */

#ifdef CONFIG_RSEQ
#include <linux/jump_label.h>
#include <linux/rseq.h>
#include <linux/sched/signal.h>
#include <linux/uaccess.h>

#include <linux/tracepoint-defs.h>

#ifdef CONFIG_TRACEPOINTS
DECLARE_TRACEPOINT(rseq_update);
DECLARE_TRACEPOINT(rseq_ip_fixup);
void __rseq_trace_update(struct task_struct *t);
void __rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
                           unsigned long offset, unsigned long abort_ip);

static inline void rseq_trace_update(struct task_struct *t, struct rseq_ids *ids)
{
        if (tracepoint_enabled(rseq_update) && ids)
                __rseq_trace_update(t);
}

static inline void rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
                                       unsigned long offset, unsigned long abort_ip)
{
        if (tracepoint_enabled(rseq_ip_fixup))
                __rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
}

#else /* CONFIG_TRACEPOINT */
static inline void rseq_trace_update(struct task_struct *t, struct rseq_ids *ids) { }
static inline void rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
                                       unsigned long offset, unsigned long abort_ip) { }
#endif /* !CONFIG_TRACEPOINT */

DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);

#ifdef RSEQ_BUILD_SLOW_PATH
#define rseq_inline
#else
#define rseq_inline __always_inline
#endif

#ifdef CONFIG_RSEQ_SLICE_EXTENSION
DECLARE_STATIC_KEY_TRUE(rseq_slice_extension_key);

static __always_inline bool rseq_slice_extension_enabled(void)
{
        return static_branch_likely(&rseq_slice_extension_key);
}

extern unsigned int rseq_slice_ext_nsecs;
bool __rseq_arm_slice_extension_timer(void);

static __always_inline bool rseq_arm_slice_extension_timer(void)
{
        if (!rseq_slice_extension_enabled())
                return false;

        if (likely(!current->rseq.slice.state.granted))
                return false;

        return __rseq_arm_slice_extension_timer();
}

static __always_inline void rseq_slice_clear_grant(struct task_struct *t)
{
        if (IS_ENABLED(CONFIG_RSEQ_STATS) && t->rseq.slice.state.granted)
                rseq_stat_inc(rseq_stats.s_revoked);
        t->rseq.slice.state.granted = false;
}

static __always_inline bool rseq_grant_slice_extension(bool work_pending)
{
        struct task_struct *curr = current;
        struct rseq_slice_ctrl usr_ctrl;
        union rseq_slice_state state;
        struct rseq __user *rseq;

        if (!rseq_slice_extension_enabled())
                return false;

        /* If not enabled or not a return from interrupt, nothing to do. */
        state = curr->rseq.slice.state;
        state.enabled &= curr->rseq.event.user_irq;
        if (likely(!state.state))
                return false;

        rseq = curr->rseq.usrptr;
        scoped_user_rw_access(rseq, efault) {

                /*
                 * Quick check conditions where a grant is not possible or
                 * needs to be revoked.
                 *
                 *  1) Any TIF bit which needs to do extra work aside of
                 *     rescheduling prevents a grant.
                 *
                 *  2) A previous rescheduling request resulted in a slice
                 *     extension grant.
                 */
                if (unlikely(work_pending || state.granted)) {
                        /* Clear user control unconditionally. No point for checking */
                        unsafe_put_user(0U, &rseq->slice_ctrl.all, efault);
                        rseq_slice_clear_grant(curr);
                        return false;
                }

                unsafe_get_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault);
                if (likely(!(usr_ctrl.request)))
                        return false;

                /* Grant the slice extention */
                usr_ctrl.request = 0;
                usr_ctrl.granted = 1;
                unsafe_put_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault);
        }

        rseq_stat_inc(rseq_stats.s_granted);

        curr->rseq.slice.state.granted = true;
        /* Store expiry time for arming the timer on the way out */
        curr->rseq.slice.expires = data_race(rseq_slice_ext_nsecs) + ktime_get_mono_fast_ns();
        /*
         * This is racy against a remote CPU setting TIF_NEED_RESCHED in
         * several ways:
         *
         * 1)
         *      CPU0                    CPU1
         *      clear_tsk()
         *                              set_tsk()
         *      clear_preempt()
         *                              Raise scheduler IPI on CPU0
         *      --> IPI
         *          fold_need_resched() -> Folds correctly
         * 2)
         *      CPU0                    CPU1
         *                              set_tsk()
         *      clear_tsk()
         *      clear_preempt()
         *                              Raise scheduler IPI on CPU0
         *      --> IPI
         *          fold_need_resched() <- NOOP as TIF_NEED_RESCHED is false
         *
         * #1 is not any different from a regular remote reschedule as it
         *    sets the previously not set bit and then raises the IPI which
         *    folds it into the preempt counter
         *
         * #2 is obviously incorrect from a scheduler POV, but it's not
         *    differently incorrect than the code below clearing the
         *    reschedule request with the safety net of the timer.
         *
         * The important part is that the clearing is protected against the
         * scheduler IPI and also against any other interrupt which might
         * end up waking up a task and setting the bits in the middle of
         * the operation:
         *
         *      clear_tsk()
         *      ---> Interrupt
         *              wakeup_on_this_cpu()
         *              set_tsk()
         *              set_preempt()
         *      clear_preempt()
         *
         * which would be inconsistent state.
         */
        scoped_guard(irq) {
                clear_tsk_need_resched(curr);
                clear_preempt_need_resched();
        }
        return true;

efault:
        force_sig(SIGSEGV);
        return false;
}

#else /* CONFIG_RSEQ_SLICE_EXTENSION */
static __always_inline bool rseq_slice_extension_enabled(void) { return false; }
static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; }
static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { }
static __always_inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
#endif /* !CONFIG_RSEQ_SLICE_EXTENSION */

bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
bool rseq_debug_validate_ids(struct task_struct *t);

static __always_inline void rseq_note_user_irq_entry(void)
{
        if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
                current->rseq.event.user_irq = true;
}

/*
 * Check whether there is a valid critical section and whether the
 * instruction pointer in @regs is inside the critical section.
 *
 *  - If the critical section is invalid, terminate the task.
 *
 *  - If valid and the instruction pointer is inside, set it to the abort IP.
 *
 *  - If valid and the instruction pointer is outside, clear the critical
 *    section address.
 *
 * Returns true, if the section was valid and either fixup or clear was
 * done, false otherwise.
 *
 * In the failure case task::rseq_event::fatal is set when a invalid
 * section was found. It's clear when the failure was an unresolved page
 * fault.
 *
 * If inlined into the exit to user path with interrupts disabled, the
 * caller has to protect against page faults with pagefault_disable().
 *
 * In preemptible task context this would be counterproductive as the page
 * faults could not be fully resolved. As a consequence unresolved page
 * faults in task context are fatal too.
 */

#ifdef RSEQ_BUILD_SLOW_PATH
/*
 * The debug version is put out of line, but kept here so the code stays
 * together.
 *
 * @csaddr has already been checked by the caller to be in user space
 */
bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs,
                               unsigned long csaddr)
{
        struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
        u64 start_ip, abort_ip, offset, cs_end, head, tasksize = TASK_SIZE;
        unsigned long ip = instruction_pointer(regs);
        u64 __user *uc_head = (u64 __user *) ucs;
        u32 usig, __user *uc_sig;

        scoped_user_rw_access(ucs, efault) {
                /*
                 * Evaluate the user pile and exit if one of the conditions
                 * is not fulfilled.
                 */
                unsafe_get_user(start_ip, &ucs->start_ip, efault);
                if (unlikely(start_ip >= tasksize))
                        goto die;
                /* If outside, just clear the critical section. */
                if (ip < start_ip)
                        goto clear;

                unsafe_get_user(offset, &ucs->post_commit_offset, efault);
                cs_end = start_ip + offset;
                /* Check for overflow and wraparound */
                if (unlikely(cs_end >= tasksize || cs_end < start_ip))
                        goto die;

                /* If not inside, clear it. */
                if (ip >= cs_end)
                        goto clear;

                unsafe_get_user(abort_ip, &ucs->abort_ip, efault);
                /* Ensure it's "valid" */
                if (unlikely(abort_ip >= tasksize || abort_ip < sizeof(*uc_sig)))
                        goto die;
                /* Validate that the abort IP is not in the critical section */
                if (unlikely(abort_ip - start_ip < offset))
                        goto die;

                /*
                 * Check version and flags for 0. No point in emitting
                 * deprecated warnings before dying. That could be done in
                 * the slow path eventually, but *shrug*.
                 */
                unsafe_get_user(head, uc_head, efault);
                if (unlikely(head))
                        goto die;

                /* abort_ip - 4 is >= 0. See abort_ip check above */
                uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
                unsafe_get_user(usig, uc_sig, efault);
                if (unlikely(usig != t->rseq.sig))
                        goto die;

                /* rseq_event.user_irq is only valid if CONFIG_GENERIC_IRQ_ENTRY=y */
                if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
                        /* If not in interrupt from user context, let it die */
                        if (unlikely(!t->rseq.event.user_irq))
                                goto die;
                }
                unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
                instruction_pointer_set(regs, (unsigned long)abort_ip);
                rseq_stat_inc(rseq_stats.fixup);
                break;
        clear:
                unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
                rseq_stat_inc(rseq_stats.clear);
                abort_ip = 0ULL;
        }

        if (unlikely(abort_ip))
                rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
        return true;
die:
        t->rseq.event.fatal = true;
efault:
        return false;
}

/*
 * On debug kernels validate that user space did not mess with it if the
 * debug branch is enabled.
 */
bool rseq_debug_validate_ids(struct task_struct *t)
{
        struct rseq __user *rseq = t->rseq.usrptr;
        u32 cpu_id, uval, node_id;

        /*
         * On the first exit after registering the rseq region CPU ID is
         * RSEQ_CPU_ID_UNINITIALIZED and node_id in user space is 0!
         */
        node_id = t->rseq.ids.cpu_id != RSEQ_CPU_ID_UNINITIALIZED ?
                  cpu_to_node(t->rseq.ids.cpu_id) : 0;

        scoped_user_read_access(rseq, efault) {
                unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault);
                if (cpu_id != t->rseq.ids.cpu_id)
                        goto die;
                unsafe_get_user(uval, &rseq->cpu_id, efault);
                if (uval != cpu_id)
                        goto die;
                unsafe_get_user(uval, &rseq->node_id, efault);
                if (uval != node_id)
                        goto die;
                unsafe_get_user(uval, &rseq->mm_cid, efault);
                if (uval != t->rseq.ids.mm_cid)
                        goto die;
        }
        return true;
die:
        t->rseq.event.fatal = true;
efault:
        return false;
}

#endif /* RSEQ_BUILD_SLOW_PATH */

/*
 * This only ensures that abort_ip is in the user address space and
 * validates that it is preceded by the signature.
 *
 * No other sanity checks are done here, that's what the debug code is for.
 */
static rseq_inline bool
rseq_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr)
{
        struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
        unsigned long ip = instruction_pointer(regs);
        unsigned long tasksize = TASK_SIZE;
        u64 start_ip, abort_ip, offset;
        u32 usig, __user *uc_sig;

        rseq_stat_inc(rseq_stats.cs);

        if (unlikely(csaddr >= tasksize)) {
                t->rseq.event.fatal = true;
                return false;
        }

        if (static_branch_unlikely(&rseq_debug_enabled))
                return rseq_debug_update_user_cs(t, regs, csaddr);

        scoped_user_rw_access(ucs, efault) {
                unsafe_get_user(start_ip, &ucs->start_ip, efault);
                unsafe_get_user(offset, &ucs->post_commit_offset, efault);
                unsafe_get_user(abort_ip, &ucs->abort_ip, efault);

                /*
                 * No sanity checks. If user space screwed it up, it can
                 * keep the pieces. That's what debug code is for.
                 *
                 * If outside, just clear the critical section.
                 */
                if (ip - start_ip >= offset)
                        goto clear;

                /*
                 * Two requirements for @abort_ip:
                 *   - Must be in user space as x86 IRET would happily return to
                 *     the kernel.
                 *   - The four bytes preceding the instruction at @abort_ip must
                 *     contain the signature.
                 *
                 * The latter protects against the following attack vector:
                 *
                 * An attacker with limited abilities to write, creates a critical
                 * section descriptor, sets the abort IP to a library function or
                 * some other ROP gadget and stores the address of the descriptor
                 * in TLS::rseq::rseq_cs. An RSEQ abort would then evade ROP
                 * protection.
                 */
                if (unlikely(abort_ip >= tasksize || abort_ip < sizeof(*uc_sig)))
                        goto die;

                /* The address is guaranteed to be >= 0 and < TASK_SIZE */
                uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
                unsafe_get_user(usig, uc_sig, efault);
                if (unlikely(usig != t->rseq.sig))
                        goto die;

                /* Invalidate the critical section */
                unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
                /* Update the instruction pointer */
                instruction_pointer_set(regs, (unsigned long)abort_ip);
                rseq_stat_inc(rseq_stats.fixup);
                break;
        clear:
                unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
                rseq_stat_inc(rseq_stats.clear);
                abort_ip = 0ULL;
        }

        if (unlikely(abort_ip))
                rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
        return true;
die:
        t->rseq.event.fatal = true;
efault:
        return false;
}

/*
 * Updates CPU ID, Node ID and MM CID and reads the critical section
 * address, when @csaddr != NULL. This allows to put the ID update and the
 * read under the same uaccess region to spare a separate begin/end.
 *
 * As this is either invoked from a C wrapper with @csaddr = NULL or from
 * the fast path code with a valid pointer, a clever compiler should be
 * able to optimize the read out. Spares a duplicate implementation.
 *
 * Returns true, if the operation was successful, false otherwise.
 *
 * In the failure case task::rseq_event::fatal is set when invalid data
 * was found on debug kernels. It's clear when the failure was an unresolved page
 * fault.
 *
 * If inlined into the exit to user path with interrupts disabled, the
 * caller has to protect against page faults with pagefault_disable().
 *
 * In preemptible task context this would be counterproductive as the page
 * faults could not be fully resolved. As a consequence unresolved page
 * faults in task context are fatal too.
 */
static rseq_inline
bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids,
                             u32 node_id, u64 *csaddr)
{
        struct rseq __user *rseq = t->rseq.usrptr;

        if (static_branch_unlikely(&rseq_debug_enabled)) {
                if (!rseq_debug_validate_ids(t))
                        return false;
        }

        scoped_user_rw_access(rseq, efault) {
                unsafe_put_user(ids->cpu_id, &rseq->cpu_id_start, efault);
                unsafe_put_user(ids->cpu_id, &rseq->cpu_id, efault);
                unsafe_put_user(node_id, &rseq->node_id, efault);
                unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault);
                if (csaddr)
                        unsafe_get_user(*csaddr, &rseq->rseq_cs, efault);

                /* Open coded, so it's in the same user access region */
                if (rseq_slice_extension_enabled()) {
                        /* Unconditionally clear it, no point in conditionals */
                        unsafe_put_user(0U, &rseq->slice_ctrl.all, efault);
                }
        }

        rseq_slice_clear_grant(t);
        /* Cache the new values */
        t->rseq.ids.cpu_cid = ids->cpu_cid;
        rseq_stat_inc(rseq_stats.ids);
        rseq_trace_update(t, ids);
        return true;
efault:
        return false;
}

/*
 * Update user space with new IDs and conditionally check whether the task
 * is in a critical section.
 */
static rseq_inline bool rseq_update_usr(struct task_struct *t, struct pt_regs *regs,
                                        struct rseq_ids *ids, u32 node_id)
{
        u64 csaddr;

        if (!rseq_set_ids_get_csaddr(t, ids, node_id, &csaddr))
                return false;

        /*
         * On architectures which utilize the generic entry code this
         * allows to skip the critical section when the entry was not from
         * a user space interrupt, unless debug mode is enabled.
         */
        if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
                if (!static_branch_unlikely(&rseq_debug_enabled)) {
                        if (likely(!t->rseq.event.user_irq))
                                return true;
                }
        }
        if (likely(!csaddr))
                return true;
        /* Sigh, this really needs to do work */
        return rseq_update_user_cs(t, regs, csaddr);
}

/*
 * If you want to use this then convert your architecture to the generic
 * entry code. I'm tired of building workarounds for people who can't be
 * bothered to make the maintenance of generic infrastructure less
 * burdensome. Just sucking everything into the architecture code and
 * thereby making others chase the horrible hacks and keep them working is
 * neither acceptable nor sustainable.
 */
#ifdef CONFIG_GENERIC_ENTRY

/*
 * This is inlined into the exit path because:
 *
 * 1) It's a one time comparison in the fast path when there is no event to
 *    handle
 *
 * 2) The access to the user space rseq memory (TLS) is unlikely to fault
 *    so the straight inline operation is:
 *
 *      - Four 32-bit stores only if CPU ID/ MM CID need to be updated
 *      - One 64-bit load to retrieve the critical section address
 *
 * 3) In the unlikely case that the critical section address is != NULL:
 *
 *     - One 64-bit load to retrieve the start IP
 *     - One 64-bit load to retrieve the offset for calculating the end
 *     - One 64-bit load to retrieve the abort IP
 *     - One 64-bit load to retrieve the signature
 *     - One store to clear the critical section address
 *
 * The non-debug case implements only the minimal required checking. It
 * provides protection against a rogue abort IP in kernel space, which
 * would be exploitable at least on x86, and also against a rogue CS
 * descriptor by checking the signature at the abort IP. Any fallout from
 * invalid critical section descriptors is a user space problem. The debug
 * case provides the full set of checks and terminates the task if a
 * condition is not met.
 *
 * In case of a fault or an invalid value, this sets TIF_NOTIFY_RESUME and
 * tells the caller to loop back into exit_to_user_mode_loop(). The rseq
 * slow path there will handle the failure.
 */
static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct task_struct *t)
{
        /*
         * Page faults need to be disabled as this is called with
         * interrupts disabled
         */
        guard(pagefault)();
        if (likely(!t->rseq.event.ids_changed)) {
                struct rseq __user *rseq = t->rseq.usrptr;
                /*
                 * If IDs have not changed rseq_event::user_irq must be true
                 * See rseq_sched_switch_event().
                 */
                u64 csaddr;

                scoped_user_rw_access(rseq, efault) {
                        unsafe_get_user(csaddr, &rseq->rseq_cs, efault);

                        /* Open coded, so it's in the same user access region */
                        if (rseq_slice_extension_enabled()) {
                                /* Unconditionally clear it, no point in conditionals */
                                unsafe_put_user(0U, &rseq->slice_ctrl.all, efault);
                        }
                }

                rseq_slice_clear_grant(t);

                if (static_branch_unlikely(&rseq_debug_enabled) || unlikely(csaddr)) {
                        if (unlikely(!rseq_update_user_cs(t, regs, csaddr)))
                                return false;
                }
                return true;
        }

        struct rseq_ids ids = {
                .cpu_id = task_cpu(t),
                .mm_cid = task_mm_cid(t),
        };
        u32 node_id = cpu_to_node(ids.cpu_id);

        return rseq_update_usr(t, regs, &ids, node_id);
efault:
        return false;
}

static __always_inline bool __rseq_exit_to_user_mode_restart(struct pt_regs *regs)
{
        struct task_struct *t = current;

        /*
         * If the task did not go through schedule or got the flag enforced
         * by the rseq syscall or execve, then nothing to do here.
         *
         * CPU ID and MM CID can only change when going through a context
         * switch.
         *
         * rseq_sched_switch_event() sets the rseq_event::sched_switch bit
         * only when rseq_event::has_rseq is true. That conditional is
         * required to avoid setting the TIF bit if RSEQ is not registered
         * for a task. rseq_event::sched_switch is cleared when RSEQ is
         * unregistered by a task so it's sufficient to check for the
         * sched_switch bit alone.
         *
         * A sane compiler requires three instructions for the nothing to do
         * case including clearing the events, but your mileage might vary.
         */
        if (unlikely((t->rseq.event.sched_switch))) {
                rseq_stat_inc(rseq_stats.fastpath);

                if (unlikely(!rseq_exit_user_update(regs, t)))
                        return true;
        }
        /* Clear state so next entry starts from a clean slate */
        t->rseq.event.events = 0;
        return false;
}

/* Required to allow conversion to GENERIC_ENTRY w/o GENERIC_TIF_BITS */
#ifdef CONFIG_HAVE_GENERIC_TIF_BITS
static __always_inline bool test_tif_rseq(unsigned long ti_work)
{
        return ti_work & _TIF_RSEQ;
}

static __always_inline void clear_tif_rseq(void)
{
        static_assert(TIF_RSEQ != TIF_NOTIFY_RESUME);
        clear_thread_flag(TIF_RSEQ);
}
#else
static __always_inline bool test_tif_rseq(unsigned long ti_work) { return true; }
static __always_inline void clear_tif_rseq(void) { }
#endif

static __always_inline bool
rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
{
        if (unlikely(test_tif_rseq(ti_work))) {
                if (unlikely(__rseq_exit_to_user_mode_restart(regs))) {
                        current->rseq.event.slowpath = true;
                        set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
                        return true;
                }
                clear_tif_rseq();
        }
        /*
         * Arm the slice extension timer if nothing to do anymore and the
         * task really goes out to user space.
         */
        return rseq_arm_slice_extension_timer();
}

#else /* CONFIG_GENERIC_ENTRY */
static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
{
        return false;
}
#endif /* !CONFIG_GENERIC_ENTRY */

static __always_inline void rseq_syscall_exit_to_user_mode(void)
{
        struct rseq_event *ev = &current->rseq.event;

        rseq_stat_inc(rseq_stats.exit);

        /* Needed to remove the store for the !lockdep case */
        if (IS_ENABLED(CONFIG_LOCKDEP)) {
                WARN_ON_ONCE(ev->sched_switch);
                ev->events = 0;
        }
}

static __always_inline void rseq_irqentry_exit_to_user_mode(void)
{
        struct rseq_event *ev = &current->rseq.event;

        rseq_stat_inc(rseq_stats.exit);

        lockdep_assert_once(!ev->sched_switch);

        /*
         * Ensure that event (especially user_irq) is cleared when the
         * interrupt did not result in a schedule and therefore the
         * rseq processing could not clear it.
         */
        ev->events = 0;
}

/* Required to keep ARM64 working */
static __always_inline void rseq_exit_to_user_mode_legacy(void)
{
        struct rseq_event *ev = &current->rseq.event;

        rseq_stat_inc(rseq_stats.exit);

        if (static_branch_unlikely(&rseq_debug_enabled))
                WARN_ON_ONCE(ev->sched_switch);

        /*
         * Ensure that event (especially user_irq) is cleared when the
         * interrupt did not result in a schedule and therefore the
         * rseq processing did not clear it.
         */
        ev->events = 0;
}

void __rseq_debug_syscall_return(struct pt_regs *regs);

static __always_inline void rseq_debug_syscall_return(struct pt_regs *regs)
{
        if (static_branch_unlikely(&rseq_debug_enabled))
                __rseq_debug_syscall_return(regs);
}
#else /* CONFIG_RSEQ */
static inline void rseq_note_user_irq_entry(void) { }
static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
{
        return false;
}
static inline void rseq_syscall_exit_to_user_mode(void) { }
static inline void rseq_irqentry_exit_to_user_mode(void) { }
static inline void rseq_exit_to_user_mode_legacy(void) { }
static inline void rseq_debug_syscall_return(struct pt_regs *regs) { }
static inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
#endif /* !CONFIG_RSEQ */

#endif /* _LINUX_RSEQ_ENTRY_H */