root/usr/src/uts/intel/dtrace/dtrace_isa.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
 * Copyright (c) 2017 Joyent, Inc.
 */

#include <sys/dtrace_impl.h>
#include <sys/stack.h>
#include <sys/frame.h>
#include <sys/cmn_err.h>
#include <sys/privregs.h>
#include <sys/sysmacros.h>

extern uintptr_t kernelbase;

int     dtrace_ustackdepth_max = 2048;

void
dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
    uint32_t *intrpc)
{
        struct frame *fp = (struct frame *)dtrace_getfp();
        struct frame *nextfp, *minfp, *stacktop;
        int depth = 0;
        int on_intr, last = 0;
        uintptr_t pc;
        uintptr_t caller = CPU->cpu_dtrace_caller;

        if ((on_intr = CPU_ON_INTR(CPU)) != 0)
                stacktop = (struct frame *)(CPU->cpu_intr_stack + SA(MINFRAME));
        else
                stacktop = (struct frame *)curthread->t_stk;
        minfp = fp;

        aframes++;

        if (intrpc != NULL && depth < pcstack_limit)
                pcstack[depth++] = (pc_t)intrpc;

        while (depth < pcstack_limit) {
                nextfp = (struct frame *)fp->fr_savfp;
                pc = fp->fr_savpc;

                if (nextfp <= minfp || nextfp >= stacktop) {
                        if (on_intr) {
                                /*
                                 * Hop from interrupt stack to thread stack.
                                 */
                                stacktop = (struct frame *)curthread->t_stk;
                                minfp = (struct frame *)curthread->t_stkbase;
                                on_intr = 0;
                                continue;
                        }

                        /*
                         * This is the last frame we can process; indicate
                         * that we should return after processing this frame.
                         */
                        last = 1;
                }

                if (aframes > 0) {
                        if (--aframes == 0 && caller != 0) {
                                /*
                                 * We've just run out of artificial frames,
                                 * and we have a valid caller -- fill it in
                                 * now.
                                 */
                                ASSERT(depth < pcstack_limit);
                                pcstack[depth++] = (pc_t)caller;
                                caller = 0;
                        }
                } else {
                        if (depth < pcstack_limit)
                                pcstack[depth++] = (pc_t)pc;
                }

                if (last) {
                        while (depth < pcstack_limit)
                                pcstack[depth++] = 0;
                        return;
                }

                fp = nextfp;
                minfp = fp;
        }
}

static int
dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
    uintptr_t sp)
{
        klwp_t *lwp = ttolwp(curthread);
        proc_t *p = curproc;
        uintptr_t oldcontext = lwp->lwp_oldcontext;
        uintptr_t oldsp;
        volatile uint16_t *flags =
            (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
        size_t s1, s2;
        int ret = 0;

        ASSERT(pcstack == NULL || pcstack_limit > 0);
        ASSERT(dtrace_ustackdepth_max > 0);

        if (p->p_model == DATAMODEL_NATIVE) {
                s1 = sizeof (struct frame) + 2 * sizeof (long);
                s2 = s1 + sizeof (siginfo_t);
        } else {
                s1 = sizeof (struct frame32) + 3 * sizeof (int);
                s2 = s1 + sizeof (siginfo32_t);
        }

        while (pc != 0) {
                /*
                 * We limit the number of times we can go around this
                 * loop to account for a circular stack.
                 */
                if (ret++ >= dtrace_ustackdepth_max) {
                        *flags |= CPU_DTRACE_BADSTACK;
                        cpu_core[CPU->cpu_id].cpuc_dtrace_illval = sp;
                        break;
                }

                if (pcstack != NULL) {
                        *pcstack++ = (uint64_t)pc;
                        pcstack_limit--;
                        if (pcstack_limit <= 0)
                                break;
                }

                if (sp == 0)
                        break;

                oldsp = sp;

                if (oldcontext == sp + s1 || oldcontext == sp + s2) {
                        if (p->p_model == DATAMODEL_NATIVE) {
                                ucontext_t *ucp = (ucontext_t *)oldcontext;
                                greg_t *gregs = ucp->uc_mcontext.gregs;

                                sp = dtrace_fulword(&gregs[REG_FP]);
                                pc = dtrace_fulword(&gregs[REG_PC]);

                                oldcontext = dtrace_fulword(&ucp->uc_link);
                        } else {
                                ucontext32_t *ucp = (ucontext32_t *)oldcontext;
                                greg32_t *gregs = ucp->uc_mcontext.gregs;

                                sp = dtrace_fuword32(&gregs[EBP]);
                                pc = dtrace_fuword32(&gregs[EIP]);

                                oldcontext = dtrace_fuword32(&ucp->uc_link);
                        }
                } else {
                        if (p->p_model == DATAMODEL_NATIVE) {
                                struct frame *fr = (struct frame *)sp;

                                pc = dtrace_fulword(&fr->fr_savpc);
                                sp = dtrace_fulword(&fr->fr_savfp);
                        } else {
                                struct frame32 *fr = (struct frame32 *)sp;

                                pc = dtrace_fuword32(&fr->fr_savpc);
                                sp = dtrace_fuword32(&fr->fr_savfp);
                        }
                }

                if (sp == oldsp) {
                        *flags |= CPU_DTRACE_BADSTACK;
                        cpu_core[CPU->cpu_id].cpuc_dtrace_illval = sp;
                        break;
                }

                /*
                 * This is totally bogus:  if we faulted, we're going to clear
                 * the fault and break.  This is to deal with the apparently
                 * broken Java stacks on x86.
                 */
                if (*flags & CPU_DTRACE_FAULT) {
                        *flags &= ~CPU_DTRACE_FAULT;
                        break;
                }
        }

        return (ret);
}

void
dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
{
        klwp_t *lwp = ttolwp(curthread);
        proc_t *p = curproc;
        struct regs *rp;
        uintptr_t pc, sp;
        int n;

        ASSERT(DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT));

        if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
                return;

        if (pcstack_limit <= 0)
                return;

        /*
         * If there's no user context we still need to zero the stack.
         */
        if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL)
                goto zero;

        *pcstack++ = (uint64_t)p->p_pid;
        pcstack_limit--;

        if (pcstack_limit <= 0)
                return;

        pc = rp->r_pc;
        sp = rp->r_fp;

        if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
                *pcstack++ = (uint64_t)pc;
                pcstack_limit--;
                if (pcstack_limit <= 0)
                        return;

                if (p->p_model == DATAMODEL_NATIVE)
                        pc = dtrace_fulword((void *)rp->r_sp);
                else
                        pc = dtrace_fuword32((void *)rp->r_sp);
        }

        n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp);
        ASSERT(n >= 0);
        ASSERT(n <= pcstack_limit);

        pcstack += n;
        pcstack_limit -= n;

zero:
        while (pcstack_limit-- > 0)
                *pcstack++ = 0;
}

int
dtrace_getustackdepth(void)
{
        klwp_t *lwp = ttolwp(curthread);
        proc_t *p = curproc;
        struct regs *rp;
        uintptr_t pc, sp;
        int n = 0;

        if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL)
                return (0);

        if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
                return (-1);

        pc = rp->r_pc;
        sp = rp->r_fp;

        if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
                n++;

                if (p->p_model == DATAMODEL_NATIVE)
                        pc = dtrace_fulword((void *)rp->r_sp);
                else
                        pc = dtrace_fuword32((void *)rp->r_sp);
        }

        n += dtrace_getustack_common(NULL, 0, pc, sp);

        return (n);
}

void
dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
{
        klwp_t *lwp = ttolwp(curthread);
        proc_t *p = curproc;
        struct regs *rp;
        uintptr_t pc, sp, oldcontext;
        volatile uint16_t *flags =
            (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
        size_t s1, s2;

        if (*flags & CPU_DTRACE_FAULT)
                return;

        if (pcstack_limit <= 0)
                return;

        /*
         * If there's no user context we still need to zero the stack.
         */
        if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL)
                goto zero;

        *pcstack++ = (uint64_t)p->p_pid;
        pcstack_limit--;

        if (pcstack_limit <= 0)
                return;

        pc = rp->r_pc;
        sp = rp->r_fp;
        oldcontext = lwp->lwp_oldcontext;

        if (p->p_model == DATAMODEL_NATIVE) {
                s1 = sizeof (struct frame) + 2 * sizeof (long);
                s2 = s1 + sizeof (siginfo_t);
        } else {
                s1 = sizeof (struct frame32) + 3 * sizeof (int);
                s2 = s1 + sizeof (siginfo32_t);
        }

        if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
                *pcstack++ = (uint64_t)pc;
                *fpstack++ = 0;
                pcstack_limit--;
                if (pcstack_limit <= 0)
                        return;

                if (p->p_model == DATAMODEL_NATIVE)
                        pc = dtrace_fulword((void *)rp->r_sp);
                else
                        pc = dtrace_fuword32((void *)rp->r_sp);
        }

        while (pc != 0) {
                *pcstack++ = (uint64_t)pc;
                *fpstack++ = sp;
                pcstack_limit--;
                if (pcstack_limit <= 0)
                        break;

                if (sp == 0)
                        break;

                if (oldcontext == sp + s1 || oldcontext == sp + s2) {
                        if (p->p_model == DATAMODEL_NATIVE) {
                                ucontext_t *ucp = (ucontext_t *)oldcontext;
                                greg_t *gregs = ucp->uc_mcontext.gregs;

                                sp = dtrace_fulword(&gregs[REG_FP]);
                                pc = dtrace_fulword(&gregs[REG_PC]);

                                oldcontext = dtrace_fulword(&ucp->uc_link);
                        } else {
                                ucontext_t *ucp = (ucontext_t *)oldcontext;
                                greg_t *gregs = ucp->uc_mcontext.gregs;

                                sp = dtrace_fuword32(&gregs[EBP]);
                                pc = dtrace_fuword32(&gregs[EIP]);

                                oldcontext = dtrace_fuword32(&ucp->uc_link);
                        }
                } else {
                        if (p->p_model == DATAMODEL_NATIVE) {
                                struct frame *fr = (struct frame *)sp;

                                pc = dtrace_fulword(&fr->fr_savpc);
                                sp = dtrace_fulword(&fr->fr_savfp);
                        } else {
                                struct frame32 *fr = (struct frame32 *)sp;

                                pc = dtrace_fuword32(&fr->fr_savpc);
                                sp = dtrace_fuword32(&fr->fr_savfp);
                        }
                }

                /*
                 * This is totally bogus:  if we faulted, we're going to clear
                 * the fault and break.  This is to deal with the apparently
                 * broken Java stacks on x86.
                 */
                if (*flags & CPU_DTRACE_FAULT) {
                        *flags &= ~CPU_DTRACE_FAULT;
                        break;
                }
        }

zero:
        while (pcstack_limit-- > 0)
                *pcstack++ = 0;
}

/*ARGSUSED*/
uint64_t
dtrace_getarg(int arg, int aframes)
{
        uintptr_t val;
        struct frame *fp = (struct frame *)dtrace_getfp();
        uintptr_t *stack;
        int i;
        /*
         * A total of 6 arguments are passed via registers; any argument with
         * index of 5 or lower is therefore in a register.
         */
        int inreg = 5;

        for (i = 1; i <= aframes; i++) {
                fp = (struct frame *)(fp->fr_savfp);

                if (fp->fr_savpc == (pc_t)dtrace_invop_callsite) {
                        /*
                         * In the case of amd64, we will use the pointer to the
                         * regs structure that was pushed when we took the
                         * trap.  To get this structure, we must increment
                         * beyond the frame structure, the calling RIP, and
                         * padding stored in dtrace_invop().  If the argument
                         * that we're seeking is passed on the stack, we'll
                         * pull the true stack pointer out of the saved
                         * registers and decrement our argument by the number
                         * of arguments passed in registers; if the argument
                         * we're seeking is passed in regsiters, we can just
                         * load it directly.
                         */
                        struct regs *rp = (struct regs *)((uintptr_t)&fp[1] +
                            sizeof (uintptr_t) * 2);

                        if (arg <= inreg) {
                                stack = (uintptr_t *)&rp->r_rdi;
                        } else {
                                stack = (uintptr_t *)(rp->r_rsp);
                                arg -= inreg;
                        }
                        goto load;
                }

        }

        /*
         * We know that we did not come through a trap to get into
         * dtrace_probe() -- the provider simply called dtrace_probe()
         * directly.  As this is the case, we need to shift the argument
         * that we're looking for:  the probe ID is the first argument to
         * dtrace_probe(), so the argument n will actually be found where
         * one would expect to find argument (n + 1).
         */
        arg++;

        if (arg <= inreg) {
                /*
                 * This shouldn't happen.  If the argument is passed in a
                 * register then it should have been, well, passed in a
                 * register...
                 */
                DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
                return (0);
        }

        arg -= (inreg + 1);
        stack = (uintptr_t *)&fp[1];

load:
        DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
        val = stack[arg];
        DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);

        return (val);
}

/*ARGSUSED*/
int
dtrace_getstackdepth(int aframes)
{
        struct frame *fp = (struct frame *)dtrace_getfp();
        struct frame *nextfp, *minfp, *stacktop;
        int depth = 0;
        int on_intr;

        if ((on_intr = CPU_ON_INTR(CPU)) != 0)
                stacktop = (struct frame *)(CPU->cpu_intr_stack + SA(MINFRAME));
        else
                stacktop = (struct frame *)curthread->t_stk;
        minfp = fp;

        aframes++;

        for (;;) {
                depth++;

                nextfp = (struct frame *)fp->fr_savfp;

                if (nextfp <= minfp || nextfp >= stacktop) {
                        if (on_intr) {
                                /*
                                 * Hop from interrupt stack to thread stack.
                                 */
                                stacktop = (struct frame *)curthread->t_stk;
                                minfp = (struct frame *)curthread->t_stkbase;
                                on_intr = 0;
                                continue;
                        }
                        break;
                }

                fp = nextfp;
                minfp = fp;
        }

        if (depth <= aframes)
                return (0);

        return (depth - aframes);
}

static const int dtrace_regmap[] = {
        REG_GS,         /* GS */
        REG_FS,         /* FS */
        REG_ES,         /* ES */
        REG_DS,         /* DS */
        REG_RDI,        /* EDI */
        REG_RSI,        /* ESI */
        REG_RBP,        /* EBP */
        REG_RSP,        /* ESP */
        REG_RBX,        /* EBX */
        REG_RDX,        /* EDX */
        REG_RCX,        /* ECX */
        REG_RAX,        /* EAX */
        REG_TRAPNO,     /* TRAPNO */
        REG_ERR,        /* ERR */
        REG_RIP,        /* EIP */
        REG_CS,         /* CS */
        REG_RFL,        /* EFL */
        REG_RSP,        /* UESP */
        REG_SS          /* SS */
};


ulong_t
dtrace_getreg(struct regs *rp, uint_t reg)
{
        if (reg <= SS) {
                if (reg >= sizeof (dtrace_regmap) / sizeof (int)) {
                        DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
                        return (0);
                }

                reg = dtrace_regmap[reg];
        } else {
                reg -= SS + 1;
        }

        switch (reg) {
        case REG_RDI:
                return (rp->r_rdi);
        case REG_RSI:
                return (rp->r_rsi);
        case REG_RDX:
                return (rp->r_rdx);
        case REG_RCX:
                return (rp->r_rcx);
        case REG_R8:
                return (rp->r_r8);
        case REG_R9:
                return (rp->r_r9);
        case REG_RAX:
                return (rp->r_rax);
        case REG_RBX:
                return (rp->r_rbx);
        case REG_RBP:
                return (rp->r_rbp);
        case REG_R10:
                return (rp->r_r10);
        case REG_R11:
                return (rp->r_r11);
        case REG_R12:
                return (rp->r_r12);
        case REG_R13:
                return (rp->r_r13);
        case REG_R14:
                return (rp->r_r14);
        case REG_R15:
                return (rp->r_r15);
        case REG_DS:
                return (rp->r_ds);
        case REG_ES:
                return (rp->r_es);
        case REG_FS:
                return (rp->r_fs);
        case REG_GS:
                return (rp->r_gs);
        case REG_TRAPNO:
                return (rp->r_trapno);
        case REG_ERR:
                return (rp->r_err);
        case REG_RIP:
                return (rp->r_rip);
        case REG_CS:
                return (rp->r_cs);
        case REG_SS:
                return (rp->r_ss);
        case REG_RFL:
                return (rp->r_rfl);
        case REG_RSP:
                return (rp->r_rsp);
        default:
                DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
                return (0);
        }

}

void
dtrace_setreg(struct regs *rp, uint_t reg, ulong_t val)
{
        if (reg <= SS) {
                ASSERT(reg < (sizeof (dtrace_regmap) / sizeof (int)));

                reg = dtrace_regmap[reg];
        } else {
                reg -= SS + 1;
        }

        switch (reg) {
        case REG_RDI:
                rp->r_rdi = val;
                break;
        case REG_RSI:
                rp->r_rsi = val;
                break;
        case REG_RDX:
                rp->r_rdx = val;
                break;
        case REG_RCX:
                rp->r_rcx = val;
                break;
        case REG_R8:
                rp->r_r8 = val;
                break;
        case REG_R9:
                rp->r_r9 = val;
                break;
        case REG_RAX:
                rp->r_rax = val;
                break;
        case REG_RBX:
                rp->r_rbx = val;
                break;
        case REG_RBP:
                rp->r_rbp = val;
                break;
        case REG_R10:
                rp->r_r10 = val;
                break;
        case REG_R11:
                rp->r_r11 = val;
                break;
        case REG_R12:
                rp->r_r12 = val;
                break;
        case REG_R13:
                rp->r_r13 = val;
                break;
        case REG_R14:
                rp->r_r14 = val;
                break;
        case REG_R15:
                rp->r_r15 = val;
                break;
        case REG_RSP:
                rp->r_rsp = val;
                break;
        default:
                DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
                return;
        }

}

static int
dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
{
        ASSERT(kaddr >= kernelbase && kaddr + size >= kaddr);

        if (uaddr + size >= kernelbase || uaddr + size < uaddr) {
                DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
                cpu_core[CPU->cpu_id].cpuc_dtrace_illval = uaddr;
                return (0);
        }

        return (1);
}

/*ARGSUSED*/
void
dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
    volatile uint16_t *flags)
{
        if (dtrace_copycheck(uaddr, kaddr, size))
                dtrace_copy(uaddr, kaddr, size);
}

/*ARGSUSED*/
void
dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
    volatile uint16_t *flags)
{
        if (dtrace_copycheck(uaddr, kaddr, size))
                dtrace_copy(kaddr, uaddr, size);
}

void
dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
    volatile uint16_t *flags)
{
        if (dtrace_copycheck(uaddr, kaddr, size))
                dtrace_copystr(uaddr, kaddr, size, flags);
}

void
dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
    volatile uint16_t *flags)
{
        if (dtrace_copycheck(uaddr, kaddr, size))
                dtrace_copystr(kaddr, uaddr, size, flags);
}

uint8_t
dtrace_fuword8(void *uaddr)
{
        extern uint8_t dtrace_fuword8_nocheck(void *);
        if ((uintptr_t)uaddr >= _userlimit) {
                DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
                cpu_core[CPU->cpu_id].cpuc_dtrace_illval = (uintptr_t)uaddr;
                return (0);
        }
        return (dtrace_fuword8_nocheck(uaddr));
}

uint16_t
dtrace_fuword16(void *uaddr)
{
        extern uint16_t dtrace_fuword16_nocheck(void *);
        if ((uintptr_t)uaddr >= _userlimit) {
                DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
                cpu_core[CPU->cpu_id].cpuc_dtrace_illval = (uintptr_t)uaddr;
                return (0);
        }
        return (dtrace_fuword16_nocheck(uaddr));
}

uint32_t
dtrace_fuword32(void *uaddr)
{
        extern uint32_t dtrace_fuword32_nocheck(void *);
        if ((uintptr_t)uaddr >= _userlimit) {
                DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
                cpu_core[CPU->cpu_id].cpuc_dtrace_illval = (uintptr_t)uaddr;
                return (0);
        }
        return (dtrace_fuword32_nocheck(uaddr));
}

uint64_t
dtrace_fuword64(void *uaddr)
{
        extern uint64_t dtrace_fuword64_nocheck(void *);
        if ((uintptr_t)uaddr >= _userlimit) {
                DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
                cpu_core[CPU->cpu_id].cpuc_dtrace_illval = (uintptr_t)uaddr;
                return (0);
        }
        return (dtrace_fuword64_nocheck(uaddr));
}