root/usr/src/uts/intel/syscall/getcontext.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/*        All Rights Reserved   */

/*
 * Copyright 2023 Oxide Computer Company
 */

#include <sys/param.h>
#include <sys/types.h>
#include <sys/vmparam.h>
#include <sys/systm.h>
#include <sys/signal.h>
#include <sys/stack.h>
#include <sys/regset.h>
#include <sys/privregs.h>
#include <sys/frame.h>
#include <sys/proc.h>
#include <sys/brand.h>
#include <sys/psw.h>
#include <sys/ucontext.h>
#include <sys/asm_linkage.h>
#include <sys/errno.h>
#include <sys/archsystm.h>
#include <sys/schedctl.h>
#include <sys/debug.h>
#include <sys/sysmacros.h>

/*
 * This is a wrapper around copyout_noerr that returns a guaranteed error code.
 * Because we're using copyout_noerr(), we need to bound the time we're under an
 * on_fault/no_fault and attempt to do so only while we're actually copying data
 * out. The main reason for this is because we're being called back from the
 * FPU, which is being held with a kpreempt_disable() and related, we can't use
 * a larger on_fault()/no_fault() as that would both hide legitimate errors we
 * make, masquerading as user issues, and it gets trickier to reason about the
 * correct restoration of our state.
 */
static int
savecontext_copyout(const void *kaddr, void *uaddr, size_t size)
{
        label_t ljb;
        if (!on_fault(&ljb)) {
                copyout_noerr(kaddr, uaddr, size);
                no_fault();
                return (0);
        } else {
                no_fault();
                return (EFAULT);
        }
}

/*
 * Save user context.
 *
 * ucp is itself always a pointer to the kernel's copy of a ucontext_t. In the
 * traditional version of this (when flags is 0), then we just write and fill
 * out all of the ucontext_t without any care for what was there ahead of this.
 * Our callers are responsible for coyping out that state if required. When
 * there is extended state to deal with (flags include SAVECTXT_F_EXTD), our
 * callers will have already copied in and pre-populated the structure with
 * values from userland. When those pointers are non-zero then we will copy out
 * that extended state directly to the user pointer. Currently this is only done
 * for uc_xsave. Even when we perform this, the rest of the structure stays as
 * is.
 *
 * We allow the copying to happen in two different ways mostly because this is
 * also used in the signal handling context where we must be much more careful
 * about how to copy out data.
 */
int
savecontext(ucontext_t *ucp, const k_sigset_t *mask, savecontext_flags_t flags)
{
        proc_t *p = ttoproc(curthread);
        klwp_t *lwp = ttolwp(curthread);
        struct regs *rp = lwptoregs(lwp);
        boolean_t need_xsave = B_FALSE;
        boolean_t fpu_en;
        long user_xsave = 0;
        int ret;

        VERIFY0(flags & ~(SAVECTXT_F_EXTD | SAVECTXT_F_ONFAULT));

        /*
         * We unconditionally assign to every field through the end
         * of the gregs, but we need to bzero() everything -after- that
         * to avoid having any kernel stack garbage escape to userland.
         *
         * If we have been asked to save extended state, then we must make sure
         * that we don't clobber that value. We must also determine if the
         * processor has xsave state. If it does not, then we just simply honor
         * the pointer, but do not write anything out and do not set the flag.
         */
        if ((flags & SAVECTXT_F_EXTD) != 0) {
                user_xsave = ucp->uc_xsave;
                if (fpu_xsave_enabled() && user_xsave != 0) {
                        need_xsave = B_TRUE;
                }
        } else {
                /*
                 * The only other flag that we have right now is about modifying
                 * the copyout behavior when we're copying out extended
                 * information. If it's not here, we should not do anything.
                 */
                VERIFY0(flags);
        }
        bzero(&ucp->uc_mcontext.fpregs, sizeof (ucontext_t) -
            offsetof(ucontext_t, uc_mcontext.fpregs));
        ucp->uc_xsave = user_xsave;

        ucp->uc_flags = UC_ALL;
        ucp->uc_link = (struct ucontext *)lwp->lwp_oldcontext;

        /*
         * Try to copyin() the ustack if one is registered. If the stack
         * has zero size, this indicates that stack bounds checking has
         * been disabled for this LWP. If stack bounds checking is disabled
         * or the copyin() fails, we fall back to the legacy behavior.
         */
        if (lwp->lwp_ustack == (uintptr_t)NULL ||
            copyin((void *)lwp->lwp_ustack, &ucp->uc_stack,
            sizeof (ucp->uc_stack)) != 0 ||
            ucp->uc_stack.ss_size == 0) {

                if (lwp->lwp_sigaltstack.ss_flags == SS_ONSTACK) {
                        ucp->uc_stack = lwp->lwp_sigaltstack;
                } else {
                        ucp->uc_stack.ss_sp = p->p_usrstack - p->p_stksize;
                        ucp->uc_stack.ss_size = p->p_stksize;
                        ucp->uc_stack.ss_flags = 0;
                }
        }

        /*
         * If either the trace flag or REQUEST_STEP is set,
         * arrange for single-stepping and turn off the trace flag.
         */
        if ((rp->r_ps & PS_T) || (lwp->lwp_pcb.pcb_flags & REQUEST_STEP)) {
                /*
                 * Clear PS_T so that saved user context won't have trace
                 * flag set.
                 */
                rp->r_ps &= ~PS_T;

                if (!(lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP)) {
                        lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
                        /*
                         * trap() always checks DEBUG_PENDING before
                         * checking for any pending signal. This at times
                         * can potentially lead to DEBUG_PENDING not being
                         * honoured. (for eg: the lwp is stopped by
                         * stop_on_fault() called from trap(), after being
                         * awakened it might see a pending signal and call
                         * savecontext(), however on the way back to userland
                         * there is no place it can be detected). Hence in
                         * anticipation of such occasions, set AST flag for
                         * the thread which will make the thread take an
                         * excursion through trap() where it will be handled
                         * appropriately.
                         */
                        aston(curthread);
                }
        }

        getgregs(lwp, ucp->uc_mcontext.gregs);
        fpu_en = (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN) != 0;
        if (fpu_en)
                getfpregs(lwp, &ucp->uc_mcontext.fpregs);
        else
                ucp->uc_flags &= ~UC_FPU;

        sigktou(mask, &ucp->uc_sigmask);

        /*
         * Determine if we need to get the rest of the xsave context out here.
         * If the thread doesn't actually have the FPU enabled, then we don't
         * actually need to do this. We also don't have to if it wasn't
         * requested.
         */
        if (!need_xsave || !fpu_en) {
                return (0);
        }

        ucp->uc_flags |= UC_XSAVE;

        /*
         * While you might be asking why and contemplating despair, just know
         * that some things need to just be done in the face of signal (half the
         * reason this function exists). Basically when in signal context we
         * can't trigger watch points. This means we need to tell the FPU copy
         * logic to actually use the on_fault/no_fault and the non-error form of
         * copyout (which still checks if it's a user address at least).
         */
        if ((flags & SAVECTXT_F_ONFAULT) != 0) {
                ret = fpu_signal_copyout(lwp, ucp->uc_xsave,
                    savecontext_copyout);
        } else {
                ret = fpu_signal_copyout(lwp, ucp->uc_xsave, copyout);
        }

        return (ret);
}

/*
 * Restore user context.
 */
void
restorecontext(ucontext_t *ucp)
{
        kthread_t *t = curthread;
        klwp_t *lwp = ttolwp(t);

        lwp->lwp_oldcontext = (uintptr_t)ucp->uc_link;

        if (ucp->uc_flags & UC_STACK) {
                if (ucp->uc_stack.ss_flags == SS_ONSTACK)
                        lwp->lwp_sigaltstack = ucp->uc_stack;
                else
                        lwp->lwp_sigaltstack.ss_flags &= ~SS_ONSTACK;
        }

        if (ucp->uc_flags & UC_CPU) {
                /*
                 * If the trace flag is set, mark the lwp to take a
                 * single-step trap on return to user level (below).
                 * The x86 lcall interface and sysenter has already done this,
                 * and turned off the flag, but amd64 syscall interface has not.
                 */
                if (lwptoregs(lwp)->r_ps & PS_T)
                        lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
                setgregs(lwp, ucp->uc_mcontext.gregs);
                lwp->lwp_eosys = JUSTRETURN;
                t->t_post_sys = 1;
                aston(curthread);
        }

        /*
         * The logic to copy in the ucontex_t takes care of combining the UC_FPU
         * and UC_XSAVE, so at this point only one of them should be set, if
         * any.
         */
        if (ucp->uc_flags & UC_XSAVE) {
                ASSERT0(ucp->uc_flags & UC_FPU);
                ASSERT3U((uintptr_t)ucp->uc_xsave, >=, _kernelbase);
                fpu_set_xsave(lwp, (const void *)ucp->uc_xsave);
        } else if (ucp->uc_flags & UC_FPU) {
                setfpregs(lwp, &ucp->uc_mcontext.fpregs);
        }

        if (ucp->uc_flags & UC_SIGMASK) {
                /*
                 * We don't need to acquire p->p_lock here;
                 * we are manipulating thread-private data.
                 */
                schedctl_finish_sigblock(t);
                sigutok(&ucp->uc_sigmask, &t->t_hold);
                if (sigcheck(ttoproc(t), t))
                        t->t_sig_check = 1;
        }
}


int
getsetcontext(int flag, void *arg)
{
        ucontext_t uc;
        ucontext_t *ucp;
        klwp_t *lwp = ttolwp(curthread);
        void *fpu = NULL;
        stack_t dummy_stk;
        int ret;

        /*
         * In future releases, when the ucontext structure grows,
         * getcontext should be modified to only return the fields
         * specified in the uc_flags.  That way, the structure can grow
         * and still be binary compatible will all .o's which will only
         * have old fields defined in uc_flags
         */

        switch (flag) {
        default:
                return (set_errno(EINVAL));

        case GETCONTEXT:
                schedctl_finish_sigblock(curthread);
                ret = savecontext(&uc, &curthread->t_hold, SAVECTXT_F_NONE);
                if (ret != 0)
                        return (set_errno(ret));
                if (uc.uc_flags & UC_SIGMASK)
                        SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
                if (copyout(&uc, arg, sizeof (uc)))
                        return (set_errno(EFAULT));
                return (0);

        /*
         * In the case of GETCONTEXT_EXTD, we've theoretically been given all
         * the required pointers of the appropriate length by libc in the
         * ucontext_t. We must first copyin the offsets that we care about to
         * seed the known extensions. Right now that is just the uc_xsave
         * member. As we are setting uc_flags, we only look at the members we
         * need to care about.
         *
         * The main reason that we have a different entry point is that we don't
         * want to assume that callers have always properly zeroed their
         * ucontext_t ahead of calling into libc. In fact, it often is just
         * declared on the stack so we can't assume that at all. Instead,
         * getcontext_extd does require that.
         */
        case GETCONTEXT_EXTD:
                schedctl_finish_sigblock(curthread);
                ucp = arg;
                if (copyin(&ucp->uc_xsave, &uc.uc_xsave,
                    sizeof (uc.uc_xsave)) != 0) {
                        return (set_errno(EFAULT));
                }
                ret = savecontext(&uc, &curthread->t_hold, SAVECTXT_F_EXTD);
                if (ret != 0)
                        return (set_errno(ret));
                if (uc.uc_flags & UC_SIGMASK)
                        SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
                if (copyout(&uc, arg, sizeof (uc)))
                        return (set_errno(EFAULT));
                return (0);


        case SETCONTEXT:
                ucp = arg;
                if (ucp == NULL)
                        exit(CLD_EXITED, 0);
                /*
                 * Don't copyin filler or floating state unless we need it.
                 * The ucontext_t struct and fields are specified in the ABI.
                 */
                if (copyin(ucp, &uc, offsetof(ucontext_t, uc_filler) -
                    sizeof (uc.uc_mcontext.fpregs))) {
                        return (set_errno(EFAULT));
                }
                if (uc.uc_flags & UC_SIGMASK)
                        SIGSET_BRAND_TO_NATIVE(&uc.uc_sigmask);

                if ((uc.uc_flags & UC_FPU) &&
                    copyin(&ucp->uc_mcontext.fpregs, &uc.uc_mcontext.fpregs,
                    sizeof (uc.uc_mcontext.fpregs))) {
                        return (set_errno(EFAULT));
                }

                uc.uc_xsave = 0;
                if ((uc.uc_flags & UC_XSAVE) != 0) {
                        int ret;

                        if (copyin(&ucp->uc_xsave, &uc.uc_xsave,
                            sizeof (uc.uc_xsave)) != 0) {
                                return (set_errno(EFAULT));
                        }

                        ret = fpu_signal_copyin(lwp, &uc);
                        if (ret != 0) {
                                return (set_errno(ret));
                        }
                }

                restorecontext(&uc);

                if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0))
                        (void) copyout(&uc.uc_stack, (stack_t *)lwp->lwp_ustack,
                            sizeof (uc.uc_stack));
                return (0);

        case GETUSTACK:
                if (copyout(&lwp->lwp_ustack, arg, sizeof (caddr_t)))
                        return (set_errno(EFAULT));
                return (0);

        case SETUSTACK:
                if (copyin(arg, &dummy_stk, sizeof (dummy_stk)))
                        return (set_errno(EFAULT));
                lwp->lwp_ustack = (uintptr_t)arg;
                return (0);
        }
}

#ifdef _SYSCALL32_IMPL

/*
 * Save user context for 32-bit processes.
 */
int
savecontext32(ucontext32_t *ucp, const k_sigset_t *mask,
    savecontext_flags_t flags)
{
        proc_t *p = ttoproc(curthread);
        klwp_t *lwp = ttolwp(curthread);
        struct regs *rp = lwptoregs(lwp);
        boolean_t need_xsave = B_FALSE;
        boolean_t fpu_en;
        int32_t user_xsave = 0;
        uintptr_t uaddr;
        int ret;

        /*
         * See savecontext for an explanation of this.
         */
        if ((flags & SAVECTXT_F_EXTD) != 0) {
                user_xsave = ucp->uc_xsave;
                if (fpu_xsave_enabled() && user_xsave != 0) {
                        need_xsave = B_TRUE;
                }
        } else {
                VERIFY0(flags);
        }
        bzero(&ucp->uc_mcontext.fpregs, sizeof (ucontext32_t) -
            offsetof(ucontext32_t, uc_mcontext.fpregs));
        ucp->uc_xsave = user_xsave;

        ucp->uc_flags = UC_ALL;
        ucp->uc_link = (caddr32_t)lwp->lwp_oldcontext;

        if (lwp->lwp_ustack == (uintptr_t)NULL ||
            copyin((void *)lwp->lwp_ustack, &ucp->uc_stack,
            sizeof (ucp->uc_stack)) != 0 ||
            ucp->uc_stack.ss_size == 0) {

                if (lwp->lwp_sigaltstack.ss_flags == SS_ONSTACK) {
                        ucp->uc_stack.ss_sp =
                            (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
                        ucp->uc_stack.ss_size =
                            (size32_t)lwp->lwp_sigaltstack.ss_size;
                        ucp->uc_stack.ss_flags = SS_ONSTACK;
                } else {
                        ucp->uc_stack.ss_sp = (caddr32_t)(uintptr_t)
                            (p->p_usrstack - p->p_stksize);
                        ucp->uc_stack.ss_size = (size32_t)p->p_stksize;
                        ucp->uc_stack.ss_flags = 0;
                }
        }

        /*
         * If either the trace flag or REQUEST_STEP is set, arrange
         * for single-stepping and turn off the trace flag.
         */
        if ((rp->r_ps & PS_T) || (lwp->lwp_pcb.pcb_flags & REQUEST_STEP)) {
                /*
                 * Clear PS_T so that saved user context won't have trace
                 * flag set.
                 */
                rp->r_ps &= ~PS_T;

                if (!(lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP)) {
                        lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
                        /*
                         * See comments in savecontext().
                         */
                        aston(curthread);
                }
        }

        getgregs32(lwp, ucp->uc_mcontext.gregs);
        fpu_en = (lwp->lwp_pcb.pcb_fpu.fpu_flags & FPU_EN) != 0;
        if (fpu_en)
                getfpregs32(lwp, &ucp->uc_mcontext.fpregs);
        else
                ucp->uc_flags &= ~UC_FPU;

        sigktou(mask, &ucp->uc_sigmask);

        if (!need_xsave || !fpu_en) {
                return (0);
        }

        ucp->uc_flags |= UC_XSAVE;

        /*
         * Due to not wanting to change or break programs, the filler in the
         * ucontext_t was always declared as a long, which is signed. Because
         * this is the 32-bit version, this is an int32_t. We cannot directly go
         * to a uintptr_t otherwise we might get sign extension, so we first
         * have to go through a uint32_t and then a uintptr_t. Otherwise, see
         * savecontext().
         */
        uaddr = (uintptr_t)(uint32_t)ucp->uc_xsave;
        if ((flags & SAVECTXT_F_ONFAULT) != 0) {
                ret = fpu_signal_copyout(lwp, uaddr, savecontext_copyout);
        } else {
                ret = fpu_signal_copyout(lwp, uaddr, copyout);
        }

        return (ret);
}

int
getsetcontext32(int flag, void *arg)
{
        ucontext32_t uc;
        ucontext_t ucnat;
        ucontext32_t *ucp;
        klwp_t *lwp = ttolwp(curthread);
        caddr32_t ustack32;
        stack32_t dummy_stk32;
        int ret;

        switch (flag) {
        default:
                return (set_errno(EINVAL));

        case GETCONTEXT:
                schedctl_finish_sigblock(curthread);
                ret = savecontext32(&uc, &curthread->t_hold, SAVECTXT_F_NONE);
                if (ret != 0)
                        return (set_errno(ret));
                if (uc.uc_flags & UC_SIGMASK)
                        SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
                if (copyout(&uc, arg, sizeof (uc)))
                        return (set_errno(EFAULT));
                return (0);

        /*
         * See getsetcontext() for an explanation of what is going on here.
         */
        case GETCONTEXT_EXTD:
                schedctl_finish_sigblock(curthread);
                ucp = arg;
                if (copyin(&ucp->uc_xsave, &uc.uc_xsave,
                    sizeof (uc.uc_xsave)) != 0) {
                        return (set_errno(EFAULT));
                }
                ret = savecontext32(&uc, &curthread->t_hold, SAVECTXT_F_EXTD);
                if (ret != 0)
                        return (set_errno(ret));
                if (uc.uc_flags & UC_SIGMASK)
                        SIGSET_NATIVE_TO_BRAND(&uc.uc_sigmask);
                if (copyout(&uc, arg, sizeof (uc)))
                        return (set_errno(EFAULT));
                return (0);

        case SETCONTEXT:
                ucp = arg;
                if (ucp == NULL)
                        exit(CLD_EXITED, 0);
                if (copyin(ucp, &uc, offsetof(ucontext32_t, uc_filler) -
                    sizeof (uc.uc_mcontext.fpregs))) {
                        return (set_errno(EFAULT));
                }
                if (uc.uc_flags & UC_SIGMASK)
                        SIGSET_BRAND_TO_NATIVE(&uc.uc_sigmask);
                if ((uc.uc_flags & UC_FPU) &&
                    copyin(&ucp->uc_mcontext.fpregs, &uc.uc_mcontext.fpregs,
                    sizeof (uc.uc_mcontext.fpregs))) {
                        return (set_errno(EFAULT));
                }

                uc.uc_xsave = 0;
                if ((uc.uc_flags & UC_XSAVE) != 0 &&
                    copyin(&ucp->uc_xsave, &uc.uc_xsave,
                    sizeof (uc.uc_xsave)) != 0) {
                        return (set_errno(EFAULT));
                }

                ucontext_32ton(&uc, &ucnat);

                if ((ucnat.uc_flags & UC_XSAVE) != 0) {
                        int ret = fpu_signal_copyin(lwp, &ucnat);
                        if (ret != 0) {
                                return (set_errno(ret));
                        }
                }

                restorecontext(&ucnat);

                if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0))
                        (void) copyout(&uc.uc_stack,
                            (stack32_t *)lwp->lwp_ustack, sizeof (uc.uc_stack));
                return (0);

        case GETUSTACK:
                ustack32 = (caddr32_t)lwp->lwp_ustack;
                if (copyout(&ustack32, arg, sizeof (ustack32)))
                        return (set_errno(EFAULT));
                return (0);

        case SETUSTACK:
                if (copyin(arg, &dummy_stk32, sizeof (dummy_stk32)))
                        return (set_errno(EFAULT));
                lwp->lwp_ustack = (uintptr_t)arg;
                return (0);
        }
}

#endif  /* _SYSCALL32_IMPL */