root/usr/src/uts/sparc/dtrace/fbt.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */


#include <sys/errno.h>
#include <sys/stat.h>
#include <sys/modctl.h>
#include <sys/conf.h>
#include <sys/systm.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/cpuvar.h>
#include <sys/kmem.h>
#include <sys/strsubr.h>
#include <sys/dtrace.h>
#include <sys/kobj.h>
#include <sys/modctl.h>
#include <sys/atomic.h>
#include <vm/seg_kmem.h>
#include <sys/stack.h>
#include <sys/ctf_api.h>
#include <sys/sysmacros.h>

static dev_info_t               *fbt_devi;
static dtrace_provider_id_t     fbt_id;
static uintptr_t                fbt_trampoline;
static caddr_t                  fbt_trampoline_window;
static size_t                   fbt_trampoline_size;
static int                      fbt_verbose = 0;

/*
 * Various interesting bean counters.
 */
static int                      fbt_entry;
static int                      fbt_ret;
static int                      fbt_retl;
static int                      fbt_retl_jmptab;
static int                      fbt_retl_twoinstr;
static int                      fbt_retl_tailcall;
static int                      fbt_retl_tailjmpl;
static int                      fbt_leaf_functions;

extern char                     stubs_base[];
extern char                     stubs_end[];

#define FBT_REG_G0              0
#define FBT_REG_G1              1
#define FBT_REG_O0              8
#define FBT_REG_O1              9
#define FBT_REG_O2              10
#define FBT_REG_O3              11
#define FBT_REG_O4              12
#define FBT_REG_O5              13
#define FBT_REG_O6              14
#define FBT_REG_O7              15
#define FBT_REG_I0              24
#define FBT_REG_I1              25
#define FBT_REG_I2              26
#define FBT_REG_I3              27
#define FBT_REG_I4              28
#define FBT_REG_I7              31
#define FBT_REG_L0              16
#define FBT_REG_L1              17
#define FBT_REG_L2              18
#define FBT_REG_L3              19
#define FBT_REG_PC              5

#define FBT_REG_ISGLOBAL(r)     ((r) < 8)
#define FBT_REG_ISOUTPUT(r)     ((r) >= 8 && (r) < 16)
#define FBT_REG_ISLOCAL(r)      ((r) >= 16 && (r) < 24)
#define FBT_REG_ISVOLATILE(r)   \
        ((FBT_REG_ISGLOBAL(r) || FBT_REG_ISOUTPUT(r)) && (r) != FBT_REG_G0)
#define FBT_REG_NLOCALS         8

#define FBT_REG_MARKLOCAL(locals, r)    \
        if (FBT_REG_ISLOCAL(r)) \
                (locals)[(r) - FBT_REG_L0] = 1;

#define FBT_REG_INITLOCALS(local, locals)       \
        for ((local) = 0; (local) < FBT_REG_NLOCALS; (local)++)  \
                (locals)[(local)] = 0; \
        (local) = FBT_REG_L0

#define FBT_REG_ALLOCLOCAL(local, locals)       \
        while ((locals)[(local) - FBT_REG_L0]) \
                (local)++; \
        (locals)[(local) - FBT_REG_L0] = 1;

#define FBT_OP_MASK             0xc0000000
#define FBT_OP_SHIFT            30
#define FBT_OP(val)             ((val) & FBT_FMT1_MASK)

#define FBT_SIMM13_MASK         0x1fff
#define FBT_SIMM13_MAX          ((int32_t)0xfff)
#define FBT_IMM22_MASK          0x3fffff
#define FBT_IMM22_SHIFT         10
#define FBT_IMM10_MASK          0x3ff

#define FBT_DISP30_MASK         0x3fffffff
#define FBT_DISP30(from, to)    \
        (((uintptr_t)(to) - (uintptr_t)(from) >> 2) & FBT_DISP30_MASK)

#define FBT_DISP22_MASK         0x3fffff
#define FBT_DISP22(from, to)    \
        (((uintptr_t)(to) - (uintptr_t)(from) >> 2) & FBT_DISP22_MASK)

#define FBT_DISP19_MASK         0x7ffff
#define FBT_DISP19(from, to)    \
        (((uintptr_t)(to) - (uintptr_t)(from) >> 2) & FBT_DISP19_MASK)

#define FBT_DISP16_HISHIFT      20
#define FBT_DISP16_HIMASK       (0x3 << FBT_DISP16_HISHIFT)
#define FBT_DISP16_LOMASK       (0x3fff)
#define FBT_DISP16_MASK         (FBT_DISP16_HIMASK | FBT_DISP16_LOMASK)
#define FBT_DISP16(val) \
        ((((val) & FBT_DISP16_HIMASK) >> 6) | ((val) & FBT_DISP16_LOMASK))

#define FBT_DISP14_MASK         0x3fff
#define FBT_DISP14(from, to)    \
        (((uintptr_t)(to) - (uintptr_t)(from) >> 2) & FBT_DISP14_MASK)

#define FBT_OP0                 (((uint32_t)0) << FBT_OP_SHIFT)
#define FBT_OP1                 (((uint32_t)1) << FBT_OP_SHIFT)
#define FBT_OP2                 (((uint32_t)2) << FBT_OP_SHIFT)
#define FBT_ILLTRAP             0

#define FBT_ANNUL_SHIFT         29
#define FBT_ANNUL               (1 << FBT_ANNUL_SHIFT)

#define FBT_FMT3_OP3_SHIFT      19
#define FBT_FMT3_OP_MASK        0xc1f80000
#define FBT_FMT3_OP(val)        ((val) & FBT_FMT3_OP_MASK)

#define FBT_FMT3_RD_SHIFT       25
#define FBT_FMT3_RD_MASK        (0x1f << FBT_FMT3_RD_SHIFT)
#define FBT_FMT3_RD(val)        \
        (((val) & FBT_FMT3_RD_MASK) >> FBT_FMT3_RD_SHIFT)

#define FBT_FMT3_RS1_SHIFT      14
#define FBT_FMT3_RS1_MASK       (0x1f << FBT_FMT3_RS1_SHIFT)
#define FBT_FMT3_RS1(val)       \
        (((val) & FBT_FMT3_RS1_MASK) >> FBT_FMT3_RS1_SHIFT)
#define FBT_FMT3_RS1_SET(val, rs1) \
        (val) = ((val) & ~FBT_FMT3_RS1_MASK) | ((rs1) << FBT_FMT3_RS1_SHIFT)

#define FBT_FMT3_RS2_SHIFT      0
#define FBT_FMT3_RS2_MASK       (0x1f << FBT_FMT3_RS2_SHIFT)
#define FBT_FMT3_RS2(val)       \
        (((val) & FBT_FMT3_RS2_MASK) >> FBT_FMT3_RS2_SHIFT)
#define FBT_FMT3_RS2_SET(val, rs2) \
        (val) = ((val) & ~FBT_FMT3_RS2_MASK) | ((rs2) << FBT_FMT3_RS2_SHIFT)

#define FBT_FMT3_IMM_SHIFT      13
#define FBT_FMT3_IMM            (1 << FBT_FMT3_IMM_SHIFT)
#define FBT_FMT3_SIMM13_MASK    FBT_SIMM13_MASK

#define FBT_FMT3_ISIMM(val)     ((val) & FBT_FMT3_IMM)
#define FBT_FMT3_SIMM13(val)    ((val) & FBT_FMT3_SIMM13_MASK)

#define FBT_FMT2_OP2_SHIFT      22
#define FBT_FMT2_OP2_MASK       (0x7 << FBT_FMT2_OP2_SHIFT)
#define FBT_FMT2_RD_SHIFT       25

#define FBT_FMT1_OP(val)        ((val) & FBT_OP_MASK)
#define FBT_FMT1_DISP30(val)    ((val) & FBT_DISP30_MASK)

#define FBT_FMT2_OP2_BPCC       (0x01 << FBT_FMT2_OP2_SHIFT)
#define FBT_FMT2_OP2_BCC        (0x02 << FBT_FMT2_OP2_SHIFT)
#define FBT_FMT2_OP2_BPR        (0x03 << FBT_FMT2_OP2_SHIFT)
#define FBT_FMT2_OP2_SETHI      (0x04 << FBT_FMT2_OP2_SHIFT)

#define FBT_FMT2_COND_SHIFT     25
#define FBT_FMT2_COND_BA        (0x8 << FBT_FMT2_COND_SHIFT)
#define FBT_FMT2_COND_BL        (0x3 << FBT_FMT2_COND_SHIFT)
#define FBT_FMT2_COND_BGE       (0xb << FBT_FMT2_COND_SHIFT)

#define FBT_OP_RESTORE          (FBT_OP2 | (0x3d << FBT_FMT3_OP3_SHIFT))
#define FBT_OP_SAVE             (FBT_OP2 | (0x3c << FBT_FMT3_OP3_SHIFT))
#define FBT_OP_JMPL             (FBT_OP2 | (0x38 << FBT_FMT3_OP3_SHIFT))
#define FBT_OP_RETURN           (FBT_OP2 | (0x39 << FBT_FMT3_OP3_SHIFT))
#define FBT_OP_CALL             FBT_OP1
#define FBT_OP_SETHI            (FBT_OP0 | FBT_FMT2_OP2_SETHI)
#define FBT_OP_ADD              (FBT_OP2 | (0x00 << FBT_FMT3_OP3_SHIFT))
#define FBT_OP_OR               (FBT_OP2 | (0x02 << FBT_FMT3_OP3_SHIFT))
#define FBT_OP_SUB              (FBT_OP2 | (0x04 << FBT_FMT3_OP3_SHIFT))
#define FBT_OP_CC               (FBT_OP2 | (0x10 << FBT_FMT3_OP3_SHIFT))
#define FBT_OP_BA               (FBT_OP0 | FBT_FMT2_OP2_BCC | FBT_FMT2_COND_BA)
#define FBT_OP_BL               (FBT_OP0 | FBT_FMT2_OP2_BCC | FBT_FMT2_COND_BL)
#define FBT_OP_BGE              (FBT_OP0 | FBT_FMT2_OP2_BCC | FBT_FMT2_COND_BGE)
#define FBT_OP_BAPCC            (FBT_OP0 | FBT_FMT2_OP2_BPCC | FBT_FMT2_COND_BA)
#define FBT_OP_RD               (FBT_OP2 | (0x28 << FBT_FMT3_OP3_SHIFT))

#define FBT_ORLO(rs, val, rd) \
        (FBT_OP_OR | ((rs) << FBT_FMT3_RS1_SHIFT) | \
        ((rd) << FBT_FMT3_RD_SHIFT) | FBT_FMT3_IMM | ((val) & FBT_IMM10_MASK))

#define FBT_ORSIMM13(rs, val, rd) \
        (FBT_OP_OR | ((rs) << FBT_FMT3_RS1_SHIFT) | \
        ((rd) << FBT_FMT3_RD_SHIFT) | FBT_FMT3_IMM | ((val) & FBT_SIMM13_MASK))

#define FBT_ADDSIMM13(rs, val, rd) \
        (FBT_OP_ADD | ((rs) << FBT_FMT3_RS1_SHIFT) | \
        ((rd) << FBT_FMT3_RD_SHIFT) | FBT_FMT3_IMM | ((val) & FBT_SIMM13_MASK))

#define FBT_ADD(rs1, rs2, rd) \
        (FBT_OP_ADD | ((rs1) << FBT_FMT3_RS1_SHIFT) | \
        ((rs2) << FBT_FMT3_RS2_SHIFT) | ((rd) << FBT_FMT3_RD_SHIFT))

#define FBT_CMP(rs1, rs2) \
        (FBT_OP_SUB | FBT_OP_CC | ((rs1) << FBT_FMT3_RS1_SHIFT) | \
        ((rs2) << FBT_FMT3_RS2_SHIFT) | (FBT_REG_G0 << FBT_FMT3_RD_SHIFT))

#define FBT_MOV(rs, rd) \
        (FBT_OP_OR | (FBT_REG_G0 << FBT_FMT3_RS1_SHIFT) | \
        ((rs) << FBT_FMT3_RS2_SHIFT) | ((rd) << FBT_FMT3_RD_SHIFT))

#define FBT_SETHI(val, reg)     \
        (FBT_OP_SETHI | (reg << FBT_FMT2_RD_SHIFT) | \
        ((val >> FBT_IMM22_SHIFT) & FBT_IMM22_MASK))

#define FBT_CALL(orig, dest)    (FBT_OP_CALL | FBT_DISP30(orig, dest))

#define FBT_RET \
        (FBT_OP_JMPL | (FBT_REG_I7 << FBT_FMT3_RS1_SHIFT) | \
        (FBT_REG_G0 << FBT_FMT3_RD_SHIFT) | FBT_FMT3_IMM | (sizeof (pc_t) << 1))

#define FBT_SAVEIMM(rd, val, rs1)       \
        (FBT_OP_SAVE | ((rs1) << FBT_FMT3_RS1_SHIFT) | \
        ((rd) << FBT_FMT3_RD_SHIFT) | FBT_FMT3_IMM | ((val) & FBT_SIMM13_MASK))

#define FBT_RESTORE(rd, rs1, rs2)       \
        (FBT_OP_RESTORE | ((rs1) << FBT_FMT3_RS1_SHIFT) | \
        ((rd) << FBT_FMT3_RD_SHIFT) | ((rs2) << FBT_FMT3_RS2_SHIFT))

#define FBT_RETURN(rs1, val)            \
        (FBT_OP_RETURN | ((rs1) << FBT_FMT3_RS1_SHIFT) | \
        FBT_FMT3_IMM | ((val) & FBT_SIMM13_MASK))

#define FBT_BA(orig, dest)      (FBT_OP_BA | FBT_DISP22(orig, dest))
#define FBT_BAA(orig, dest)     (FBT_BA(orig, dest) | FBT_ANNUL)
#define FBT_BL(orig, dest)      (FBT_OP_BL | FBT_DISP22(orig, dest))
#define FBT_BGE(orig, dest)     (FBT_OP_BGE | FBT_DISP22(orig, dest))
#define FBT_BDEST(va, instr)    ((uintptr_t)(va) + \
        (((int32_t)(((instr) & FBT_DISP22_MASK) << 10)) >> 8))
#define FBT_BPCCDEST(va, instr) ((uintptr_t)(va) + \
        (((int32_t)(((instr) & FBT_DISP19_MASK) << 13)) >> 11))
#define FBT_BPRDEST(va, instr)  ((uintptr_t)(va) + \
        (((int32_t)((FBT_DISP16(instr)) << 16)) >> 14))

/*
 * We're only going to treat a save as safe if (a) both rs1 and rd are
 * %sp and (b) if the instruction has a simm, the value isn't 0.
 */
#define FBT_IS_SAVE(instr)      \
        (FBT_FMT3_OP(instr) == FBT_OP_SAVE && \
        FBT_FMT3_RD(instr) == FBT_REG_O6 && \
        FBT_FMT3_RS1(instr) == FBT_REG_O6 && \
        !(FBT_FMT3_ISIMM(instr) && FBT_FMT3_SIMM13(instr) == 0))

#define FBT_IS_BA(instr)        (((instr) & ~FBT_DISP22_MASK) == FBT_OP_BA)
#define FBT_IS_BAPCC(instr)     (((instr) & ~FBT_DISP22_MASK) == FBT_OP_BAPCC)

#define FBT_IS_RDPC(instr)      ((FBT_FMT3_OP(instr) == FBT_OP_RD) && \
        (FBT_FMT3_RD(instr) == FBT_REG_PC))

#define FBT_IS_PCRELATIVE(instr)        \
        ((((instr) & FBT_OP_MASK) == FBT_OP0 && \
        ((instr) & FBT_FMT2_OP2_MASK) != FBT_FMT2_OP2_SETHI) || \
        ((instr) & FBT_OP_MASK) == FBT_OP1 || \
        FBT_IS_RDPC(instr))

#define FBT_IS_CTI(instr)       \
        ((((instr) & FBT_OP_MASK) == FBT_OP0 && \
        ((instr) & FBT_FMT2_OP2_MASK) != FBT_FMT2_OP2_SETHI) || \
        ((instr) & FBT_OP_MASK) == FBT_OP1 || \
        (FBT_FMT3_OP(instr) == FBT_OP_JMPL) || \
        (FBT_FMT3_OP(instr) == FBT_OP_RETURN))

#define FBT_PROBENAME_ENTRY     "entry"
#define FBT_PROBENAME_RETURN    "return"
#define FBT_ESTIMATE_ID         (UINT32_MAX)
#define FBT_COUNTER(id, count)  if ((id) != FBT_ESTIMATE_ID) (count)++

#define FBT_ENTENT_MAXSIZE      (16 * sizeof (uint32_t))
#define FBT_RETENT_MAXSIZE      (11 * sizeof (uint32_t))
#define FBT_RETLENT_MAXSIZE     (23 * sizeof (uint32_t))
#define FBT_ENT_MAXSIZE         \
        MAX(MAX(FBT_ENTENT_MAXSIZE, FBT_RETENT_MAXSIZE), FBT_RETLENT_MAXSIZE)

typedef struct fbt_probe {
        char            *fbtp_name;
        dtrace_id_t     fbtp_id;
        uintptr_t       fbtp_addr;
        struct modctl   *fbtp_ctl;
        int             fbtp_loadcnt;
        int             fbtp_symndx;
        int             fbtp_primary;
        int             fbtp_return;
        uint32_t        *fbtp_patchpoint;
        uint32_t        fbtp_patchval;
        uint32_t        fbtp_savedval;
        struct fbt_probe *fbtp_next;
} fbt_probe_t;

typedef struct fbt_trampoline {
        uintptr_t       fbtt_va;
        uintptr_t       fbtt_limit;
        uintptr_t       fbtt_next;
} fbt_trampoline_t;

static caddr_t
fbt_trampoline_map(uintptr_t tramp, size_t size)
{
        uintptr_t offs;
        page_t **ppl;

        ASSERT(fbt_trampoline_window == NULL);
        ASSERT(fbt_trampoline_size == 0);
        ASSERT(fbt_trampoline == NULL);

        size += tramp & PAGEOFFSET;
        fbt_trampoline = tramp & PAGEMASK;
        fbt_trampoline_size = (size + PAGESIZE - 1) & PAGEMASK;
        fbt_trampoline_window =
            vmem_alloc(heap_arena, fbt_trampoline_size, VM_SLEEP);

        (void) as_pagelock(&kas, &ppl, (caddr_t)fbt_trampoline,
            fbt_trampoline_size, S_WRITE);

        for (offs = 0; offs < fbt_trampoline_size; offs += PAGESIZE) {
                hat_devload(kas.a_hat, fbt_trampoline_window + offs, PAGESIZE,
                    hat_getpfnum(kas.a_hat, (caddr_t)fbt_trampoline + offs),
                    PROT_READ | PROT_WRITE,
                    HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
        }

        as_pageunlock(&kas, ppl, (caddr_t)fbt_trampoline, fbt_trampoline_size,
            S_WRITE);

        return (fbt_trampoline_window + (tramp & PAGEOFFSET));
}

static void
fbt_trampoline_unmap()
{
        ASSERT(fbt_trampoline_window != NULL);
        ASSERT(fbt_trampoline_size != 0);
        ASSERT(fbt_trampoline != NULL);

        membar_enter();
        sync_icache((caddr_t)fbt_trampoline, fbt_trampoline_size);
        sync_icache(fbt_trampoline_window, fbt_trampoline_size);

        hat_unload(kas.a_hat, fbt_trampoline_window, fbt_trampoline_size,
            HAT_UNLOAD_UNLOCK);

        vmem_free(heap_arena, fbt_trampoline_window, fbt_trampoline_size);

        fbt_trampoline_window = NULL;
        fbt_trampoline = 0;
        fbt_trampoline_size = 0;
}

static uintptr_t
fbt_patch_entry(uint32_t *instr, uint32_t id, fbt_trampoline_t *tramp,
    int nargs)
{
        uint32_t *tinstr = (uint32_t *)tramp->fbtt_next;
        uint32_t first = *instr;
        uintptr_t va = tramp->fbtt_va;
        uintptr_t base = tramp->fbtt_next;

        if (tramp->fbtt_next + FBT_ENTENT_MAXSIZE > tramp->fbtt_limit) {
                /*
                 * There isn't sufficient room for this entry; return failure.
                 */
                return (0);
        }

        FBT_COUNTER(id, fbt_entry);

        if (FBT_IS_SAVE(first)) {
                *tinstr++ = first;
        } else {
                *tinstr++ = FBT_SAVEIMM(FBT_REG_O6, -SA(MINFRAME), FBT_REG_O6);
        }

        if (id > (uint32_t)FBT_SIMM13_MAX) {
                *tinstr++ = FBT_SETHI(id, FBT_REG_O0);
                *tinstr++ = FBT_ORLO(FBT_REG_O0, id, FBT_REG_O0);
        } else {
                *tinstr++ = FBT_ORSIMM13(FBT_REG_G0, id, FBT_REG_O0);
        }

        if (nargs >= 1)
                *tinstr++ = FBT_MOV(FBT_REG_I0, FBT_REG_O1);

        if (nargs >= 2)
                *tinstr++ = FBT_MOV(FBT_REG_I1, FBT_REG_O2);

        if (nargs >= 3)
                *tinstr++ = FBT_MOV(FBT_REG_I2, FBT_REG_O3);

        if (nargs >= 4)
                *tinstr++ = FBT_MOV(FBT_REG_I3, FBT_REG_O4);

        if (nargs >= 5)
                *tinstr++ = FBT_MOV(FBT_REG_I4, FBT_REG_O5);

        if (FBT_IS_SAVE(first)) {
                uintptr_t ret = (uintptr_t)instr - sizeof (uint32_t);

                *tinstr++ = FBT_SETHI(ret, FBT_REG_G1);
                *tinstr = FBT_CALL((uintptr_t)tinstr - base + va, dtrace_probe);
                tinstr++;
                *tinstr++ = FBT_ORLO(FBT_REG_G1, ret, FBT_REG_O7);
        } else {
                uintptr_t slot = *--tinstr;
                uintptr_t ret = (uintptr_t)instr + sizeof (uint32_t);
                uint32_t delay = first;

                *tinstr = FBT_CALL((uintptr_t)tinstr - base + va, dtrace_probe);
                tinstr++;
                *tinstr++ = slot;
                *tinstr++ = FBT_RESTORE(FBT_REG_G0, FBT_REG_G0, FBT_REG_G0);

                if (FBT_IS_BA(first) || FBT_IS_BAPCC(first)) {
                        /*
                         * This is a special case:  we are instrumenting a
                         * a non-annulled branch-always (or variant).  We'll
                         * return directly to the destination of the branch,
                         * copying the instruction in the delay slot here,
                         * and then executing it in the slot of a ba.
                         */
                        if (FBT_IS_BA(first)) {
                                ret = FBT_BDEST(instr, *instr);
                        } else {
                                ret = FBT_BPCCDEST(instr, *instr);
                        }

                        delay = *(instr + 1);
                }

                if ((first & FBT_OP_MASK) != FBT_OP0 ||
                    (first & FBT_FMT2_OP2_MASK) != FBT_FMT2_OP2_BPR) {
                        *tinstr = FBT_BA((uintptr_t)tinstr - base + va, ret);
                        tinstr++;
                        *tinstr++ = delay;
                } else {
                        /*
                         * If this is a branch-on-register, we have a little
                         * more work to do:  because the displacement is only
                         * sixteen bits, we're going to thunk the branch into
                         * the trampoline, and then ba,a to the appropriate
                         * destination in the branch targets.  That is, we're
                         * constructing this sequence in the trampoline:
                         *
                         *              br[cc]  %[rs], 1f
                         *              <delay-instruction>
                         *              ba,a    <not-taken-destination>
                         *      1:      ba,a    <taken-destination>
                         *
                         */
                        uintptr_t targ = FBT_BPRDEST(instr, first);

                        *tinstr = first & ~(FBT_DISP16_MASK);
                        *tinstr |= FBT_DISP14(tinstr, &tinstr[3]);
                        tinstr++;
                        *tinstr++ = *(instr + 1);
                        *tinstr = FBT_BAA((uintptr_t)tinstr - base + va,
                            ret + sizeof (uint32_t));
                        tinstr++;
                        *tinstr = FBT_BAA((uintptr_t)tinstr - base + va, targ);
                        tinstr++;
                }
        }

        tramp->fbtt_va += (uintptr_t)tinstr - tramp->fbtt_next;
        tramp->fbtt_next = (uintptr_t)tinstr;

        return (1);
}

/*
 * We are patching control-transfer/restore couplets.  There are three
 * variants of couplet:
 *
 * (a)  return          rs1 + imm
 *      delay
 *
 * (b)  jmpl            rs1 + (rs2 | offset), rd
 *      restore         rs1, rs2 | imm, rd
 *
 * (c)  call            displacement
 *      restore         rs1, rs2 | imm, rd
 *
 * If rs1 in (a) is anything other than %i7, or imm is anything other than 8,
 * or delay is a DCTI, we fail.  If rd from the jmpl in (b) is something other
 * than %g0 (a ret or a tail-call through a function pointer) or %o7 (a call
 * through a register), we fail.
 *
 * Note that rs1 and rs2 in the restore instructions in (b) and (c) are
 * potentially outputs and/or globals.  Because these registers cannot be
 * relied upon across the call to dtrace_probe(), we move rs1 into an unused
 * local, ls0, and rs2 into an unused local, ls1, and restructure the restore
 * to be:
 *
 *      restore         ls0, ls1, rd
 *
 * Likewise, rs1 and rs2 in the jmpl of case (b) may be outputs and/or globals.
 * If the jmpl uses outputs or globals, we restructure it to be:
 *
 *      jmpl            ls2 + (ls3 | offset), (%g0 | %o7)
 *
 */
/*ARGSUSED*/
static int
fbt_canpatch_return(uint32_t *instr, int offset, const char *name)
{
        int rd;

        if (FBT_FMT3_OP(*instr) == FBT_OP_RETURN) {
                uint32_t delay = *(instr + 1);

                if (*instr != FBT_RETURN(FBT_REG_I7, 8)) {
                        /*
                         * It's unclear if we should warn about this or not.
                         * We really wouldn't expect the compiler to generate
                         * return instructions with something other than %i7
                         * as rs1 and 8 as the simm13 -- it would just be
                         * mean-spirited.  That said, such a construct isn't
                         * necessarily incorrect.  Sill, we err on the side of
                         * caution and warn about it...
                         */
                        cmn_err(CE_NOTE, "cannot instrument return of %s at "
                            "%p: non-canonical return instruction", name,
                            (void *)instr);
                        return (0);
                }

                if (FBT_IS_CTI(delay)) {
                        /*
                         * This is even weirder -- a DCTI coupled with a
                         * return instruction.  Similar constructs are used to
                         * return from utraps, but these typically have the
                         * return in the slot -- and we wouldn't expect to see
                         * it in the kernel regardless.  At any rate, we don't
                         * want to try to instrument this construct, whatever
                         * it may be.
                         */
                        cmn_err(CE_NOTE, "cannot instrument return of %s at "
                            "%p: CTI in delay slot of return instruction",
                            name, (void *)instr);
                        return (0);
                }

                if (FBT_IS_PCRELATIVE(delay)) {
                        /*
                         * This is also very weird, but might be correct code
                         * if the function is (for example) returning the
                         * address of the delay instruction of the return as
                         * its return value (e.g. "rd %pc, %o0" in the slot).
                         * Perhaps correct, but still too weird to not warn
                         * about it...
                         */
                        cmn_err(CE_NOTE, "cannot instrument return of %s at "
                            "%p: PC-relative instruction in delay slot of "
                            "return instruction", name, (void *)instr);
                        return (0);
                }

                return (1);
        }

        if (FBT_FMT3_OP(*(instr + 1)) != FBT_OP_RESTORE)
                return (0);

        if (FBT_FMT1_OP(*instr) == FBT_OP_CALL)
                return (1);

        if (FBT_FMT3_OP(*instr) != FBT_OP_JMPL)
                return (0);

        rd = FBT_FMT3_RD(*instr);

        if (rd == FBT_REG_I7 || rd == FBT_REG_O7 || rd == FBT_REG_G0)
                return (1);

        /*
         * We have encountered a jmpl that is storing the calling %pc in
         * some register besides %i7, %o7 or %g0.  This is strange; emit
         * a warning and fail.
         */
        cmn_err(CE_NOTE, "cannot instrument return of %s at %p: unexpected "
            "jmpl destination register", name, (void *)instr);
        return (0);
}

static int
fbt_canpatch_retl(uint32_t *instr, int offset, const char *name)
{
        if (FBT_FMT1_OP(*instr) == FBT_OP_CALL ||
            (FBT_FMT3_OP(*instr) == FBT_OP_JMPL &&
            FBT_FMT3_RD(*instr) == FBT_REG_O7)) {
                /*
                 * If this is a call (or a jmpl that links into %o7), we can
                 * patch it iff the next instruction uses %o7 as a destination
                 * register.  Because there is an ABI responsibility to
                 * restore %o7 to the value before the call/jmpl, we don't
                 * particularly care how this routine is managing to restore
                 * it (mov, add, ld or divx for all we care).  If it doesn't
                 * seem to be restoring it at all, however, we'll refuse
                 * to patch it.
                 */
                uint32_t delay = *(instr + 1);
                uint32_t op, rd;

                op = FBT_FMT1_OP(delay);
                rd = FBT_FMT3_RD(delay);

                if (op != FBT_OP2 || rd != FBT_REG_O7) {
                        /*
                         * This is odd.  Before we assume that we're looking
                         * at something bizarre (and warn accordingly), we'll
                         * check to see if it's obviously a jump table entry.
                         */
                        if (*instr < (uintptr_t)instr &&
                            *instr >= (uintptr_t)instr - offset)
                                return (0);

                        cmn_err(CE_NOTE, "cannot instrument return of %s at "
                            "%p: leaf jmpl/call delay isn't restoring %%o7",
                            name, (void *)instr);
                        return (0);
                }

                return (1);
        }

        if (offset == sizeof (uint32_t)) {
                /*
                 * If this is the second instruction in the function, we're
                 * going to allow it to be patched if the first instruction
                 * is a patchable return-from-leaf instruction.
                 */
                if (fbt_canpatch_retl(instr - 1, 0, name))
                        return (1);
        }

        if (FBT_FMT3_OP(*instr) != FBT_OP_JMPL)
                return (0);

        if (FBT_FMT3_RD(*instr) != FBT_REG_G0)
                return (0);

        return (1);
}

/*ARGSUSED*/
static uint32_t
fbt_patch_return(uint32_t *instr, uint32_t *funcbase, uint32_t *funclim,
    int offset, uint32_t id, fbt_trampoline_t *tramp, const char *name)
{
        uint32_t *tinstr = (uint32_t *)tramp->fbtt_next;
        uint32_t cti = *instr, restore = *(instr + 1), rs1, dest;
        uintptr_t va = tramp->fbtt_va;
        uintptr_t base = tramp->fbtt_next;
        uint32_t locals[FBT_REG_NLOCALS], local;

        if (tramp->fbtt_next + FBT_RETENT_MAXSIZE > tramp->fbtt_limit) {
                /*
                 * There isn't sufficient room for this entry; return failure.
                 */
                return (FBT_ILLTRAP);
        }

        FBT_COUNTER(id, fbt_ret);

        if (FBT_FMT3_OP(*instr) == FBT_OP_RETURN) {
                /*
                 * To handle the case of the return instruction, we'll emit a
                 * restore, followed by the instruction in the slot (which
                 * we'll transplant here), and then another save.  While it
                 * may seem intellectually unsatisfying to emit the additional
                 * restore/save couplet, one can take solace in the fact that
                 * we don't do this if the instruction in the return delay
                 * slot is a nop -- which it is nearly 90% of the time with
                 * gcc.  (And besides, this couplet can't induce unnecessary
                 * spill/fill traps; rewriting the delay instruction to be
                 * in terms of the current window hardly seems worth the
                 * trouble -- let alone the risk.)
                 */
                uint32_t delay = *(instr + 1);
                ASSERT(*instr == FBT_RETURN(FBT_REG_I7, 8));

                cti = FBT_RET;
                restore = FBT_RESTORE(FBT_REG_G0, FBT_REG_G0, FBT_REG_G0);

                if (delay != FBT_SETHI(0, FBT_REG_G0)) {
                        *tinstr++ = restore;
                        *tinstr++ = delay;
                        *tinstr++ = FBT_SAVEIMM(FBT_REG_O6,
                            -SA(MINFRAME), FBT_REG_O6);
                }
        }

        FBT_REG_INITLOCALS(local, locals);

        /*
         * Mark the locals used in the jmpl.
         */
        if (FBT_FMT3_OP(cti) == FBT_OP_JMPL) {
                uint32_t rs1 = FBT_FMT3_RS1(cti);
                FBT_REG_MARKLOCAL(locals, rs1);

                if (!FBT_FMT3_ISIMM(cti)) {
                        uint32_t rs2 = FBT_FMT3_RS2(cti);
                        FBT_REG_MARKLOCAL(locals, rs2);
                }
        }

        /*
         * And mark the locals used in the restore.
         */
        rs1 = FBT_FMT3_RS1(restore);
        FBT_REG_MARKLOCAL(locals, rs1);

        if (!FBT_FMT3_ISIMM(restore)) {
                uint32_t rs2 = FBT_FMT3_RS2(restore);
                FBT_REG_MARKLOCAL(locals, rs2);
        }

        if (FBT_FMT3_OP(cti) == FBT_OP_JMPL) {
                uint32_t rs1 = FBT_FMT3_RS1(cti);

                if (FBT_REG_ISVOLATILE(rs1)) {
                        FBT_REG_ALLOCLOCAL(local, locals);
                        FBT_FMT3_RS1_SET(cti, local);
                        *tinstr++ = FBT_MOV(rs1, local);
                }

                if (!FBT_FMT3_ISIMM(cti)) {
                        uint32_t rs2 = FBT_FMT3_RS2(cti);

                        if (FBT_REG_ISVOLATILE(rs2)) {
                                FBT_REG_ALLOCLOCAL(local, locals);
                                FBT_FMT3_RS2_SET(cti, local);
                                *tinstr++ = FBT_MOV(rs2, local);
                        }
                }
        }

        rs1 = FBT_FMT3_RS1(restore);

        if (FBT_REG_ISVOLATILE(rs1)) {
                FBT_REG_ALLOCLOCAL(local, locals);
                FBT_FMT3_RS1_SET(restore, local);
                *tinstr++ = FBT_MOV(rs1, local);
        }

        if (!FBT_FMT3_ISIMM(restore)) {
                uint32_t rs2 = FBT_FMT3_RS2(restore);

                if (FBT_REG_ISVOLATILE(rs2)) {
                        FBT_REG_ALLOCLOCAL(local, locals);
                        FBT_FMT3_RS2_SET(restore, local);
                        *tinstr++ = FBT_MOV(rs2, local);
                }
        }

        if (id > (uint32_t)FBT_SIMM13_MAX) {
                *tinstr++ = FBT_SETHI(id, FBT_REG_O0);
                *tinstr++ = FBT_ORLO(FBT_REG_O0, id, FBT_REG_O0);
        } else {
                *tinstr++ = FBT_ORSIMM13(FBT_REG_G0, id, FBT_REG_O0);
        }

        if (offset > (uint32_t)FBT_SIMM13_MAX) {
                *tinstr++ = FBT_SETHI(offset, FBT_REG_O1);
                *tinstr++ = FBT_ORLO(FBT_REG_O1, offset, FBT_REG_O1);
        } else {
                *tinstr++ = FBT_ORSIMM13(FBT_REG_G0, offset, FBT_REG_O1);
        }

        *tinstr = FBT_CALL((uintptr_t)tinstr - base + va, dtrace_probe);
        tinstr++;

        if (FBT_FMT3_RD(restore) == FBT_REG_O0) {
                /*
                 * If the destination register of the restore is %o0, we
                 * need to perform the implied calculation to derive the
                 * return value.
                 */
                uint32_t add = (restore & ~FBT_FMT3_OP_MASK) | FBT_OP_ADD;
                add &= ~FBT_FMT3_RD_MASK;
                *tinstr++ = add | (FBT_REG_O2 << FBT_FMT3_RD_SHIFT);
        } else {
                *tinstr++ = FBT_MOV(FBT_REG_I0, FBT_REG_O2);
        }

        /*
         * If the control transfer instruction is %pc-relative (i.e. a
         * call), we need to reset it appropriately.
         */
        if (FBT_FMT1_OP(cti) == FBT_OP_CALL) {
                dest = (uintptr_t)instr + (FBT_FMT1_DISP30(cti) << 2);
                *tinstr = FBT_CALL((uintptr_t)tinstr - base + va, dest);
                tinstr++;
        } else {
                *tinstr++ = cti;
        }

        *tinstr++ = restore;
        tramp->fbtt_va += (uintptr_t)tinstr - tramp->fbtt_next;
        tramp->fbtt_next = (uintptr_t)tinstr;

        return (FBT_BAA(instr, va));
}

static uint32_t
fbt_patch_retl(uint32_t *instr, uint32_t *funcbase, uint32_t *funclim,
    int offset, uint32_t id, fbt_trampoline_t *tramp, const char *name)
{
        uint32_t *tinstr = (uint32_t *)tramp->fbtt_next;
        uintptr_t va = tramp->fbtt_va;
        uintptr_t base = tramp->fbtt_next;
        uint32_t cti = *instr, dest;
        int annul = 0;

        FBT_COUNTER(id, fbt_retl);

        if (tramp->fbtt_next + FBT_RETLENT_MAXSIZE > tramp->fbtt_limit) {
                /*
                 * There isn't sufficient room for this entry; return failure.
                 */
                return (FBT_ILLTRAP);
        }

        if (offset == sizeof (uint32_t) &&
            fbt_canpatch_retl(instr - 1, 0, name)) {
                *tinstr++ = *instr;
                annul = 1;
                FBT_COUNTER(id, fbt_retl_twoinstr);
        } else {
                if (FBT_FMT3_OP(cti) == FBT_OP_JMPL &&
                    FBT_FMT3_RD(cti) != FBT_REG_O7 &&
                    FBT_FMT3_RS1(cti) != FBT_REG_O7) {
                        annul = 1;
                        *tinstr++ = *(instr + 1);
                }
        }

        *tinstr++ = FBT_SAVEIMM(FBT_REG_O6, -SA(MINFRAME), FBT_REG_O6);

        if (FBT_FMT3_OP(cti) == FBT_OP_JMPL) {
                uint32_t rs1, rs2, o2i = FBT_REG_I0 - FBT_REG_O0;

                /*
                 * If we have a jmpl and it's in terms of output registers, we
                 * need to rewrite it to be in terms of the corresponding input
                 * registers.  If it's in terms of the globals, we'll rewrite
                 * it to be in terms of locals.
                 */
                rs1 = FBT_FMT3_RS1(cti);

                if (FBT_REG_ISOUTPUT(rs1))
                        rs1 += o2i;

                if (FBT_REG_ISGLOBAL(rs1)) {
                        *tinstr++ = FBT_MOV(rs1, FBT_REG_L0);
                        rs1 = FBT_REG_L0;
                }

                FBT_FMT3_RS1_SET(cti, rs1);

                if (!FBT_FMT3_ISIMM(cti)) {
                        rs2 = FBT_FMT3_RS2(cti);

                        if (FBT_REG_ISOUTPUT(rs2))
                                rs2 += o2i;

                        if (FBT_REG_ISGLOBAL(rs2)) {
                                *tinstr++ = FBT_MOV(rs2, FBT_REG_L1);
                                rs2 = FBT_REG_L1;
                        }

                        FBT_FMT3_RS2_SET(cti, rs2);
                }

                /*
                 * Now we need to check the rd and source register for the jmpl;
                 * If neither rd nor the source register is %o7, then we might
                 * have a jmp that is actually part of a jump table.  We need
                 * to generate the code to compare it to the base and limit of
                 * the function.
                 */
                if (FBT_FMT3_RD(cti) != FBT_REG_O7 && rs1 != FBT_REG_I7) {
                        uintptr_t base = (uintptr_t)funcbase;
                        uintptr_t limit = (uintptr_t)funclim;

                        FBT_COUNTER(id, fbt_retl_jmptab);

                        if (FBT_FMT3_ISIMM(cti)) {
                                *tinstr++ = FBT_ADDSIMM13(rs1,
                                    FBT_FMT3_SIMM13(cti), FBT_REG_L2);
                        } else {
                                *tinstr++ = FBT_ADD(rs1, rs2, FBT_REG_L2);
                        }

                        *tinstr++ = FBT_SETHI(base, FBT_REG_L3);
                        *tinstr++ = FBT_ORLO(FBT_REG_L3, base, FBT_REG_L3);
                        *tinstr++ = FBT_CMP(FBT_REG_L2, FBT_REG_L3);
                        *tinstr++ = FBT_BL(0, 8 * sizeof (uint32_t));
                        *tinstr++ = FBT_SETHI(limit, FBT_REG_L3);
                        *tinstr++ = FBT_ORLO(FBT_REG_L3, limit, FBT_REG_L3);
                        *tinstr++ = FBT_CMP(FBT_REG_L2, FBT_REG_L3);
                        *tinstr++ = FBT_BGE(0, 4 * sizeof (uint32_t));
                        *tinstr++ = FBT_SETHI(0, FBT_REG_G0);
                        *tinstr++ = cti;
                        *tinstr++ = FBT_RESTORE(FBT_REG_G0,
                            FBT_REG_G0, FBT_REG_G0);
                }
        }

        if (id > (uint32_t)FBT_SIMM13_MAX) {
                *tinstr++ = FBT_SETHI(id, FBT_REG_O0);
                *tinstr++ = FBT_ORLO(FBT_REG_O0, id, FBT_REG_O0);
        } else {
                *tinstr++ = FBT_ORSIMM13(FBT_REG_G0, id, FBT_REG_O0);
        }

        if (offset > (uint32_t)FBT_SIMM13_MAX) {
                *tinstr++ = FBT_SETHI(offset, FBT_REG_O1);
                *tinstr++ = FBT_ORLO(FBT_REG_O1, offset, FBT_REG_O1);
        } else {
                *tinstr++ = FBT_ORSIMM13(FBT_REG_G0, offset, FBT_REG_O1);
        }

        *tinstr = FBT_CALL((uintptr_t)tinstr - base + va, dtrace_probe);
        tinstr++;
        *tinstr++ = FBT_MOV(FBT_REG_I0, FBT_REG_O2);

        /*
         * If the control transfer instruction is %pc-relative (i.e. a
         * call), we need to reset it appropriately.
         */
        if (FBT_FMT1_OP(cti) == FBT_OP_CALL) {
                FBT_COUNTER(id, fbt_retl_tailcall);
                dest = (uintptr_t)instr + (FBT_FMT1_DISP30(cti) << 2);
                *tinstr = FBT_CALL((uintptr_t)tinstr - base + va, dest);
                tinstr++;
                annul = 1;
        } else {
                if (FBT_FMT3_OP(cti) == FBT_OP_JMPL) {
                        *tinstr++ = cti;

                        if (FBT_FMT3_RD(cti) == FBT_REG_O7) {
                                FBT_COUNTER(id, fbt_retl_tailjmpl);
                                annul = 1;
                        }
                } else {
                        *tinstr++ = FBT_RET;
                }
        }

        *tinstr++ = FBT_RESTORE(FBT_REG_G0, FBT_REG_G0, FBT_REG_G0);

        tramp->fbtt_va += (uintptr_t)tinstr - tramp->fbtt_next;
        tramp->fbtt_next = (uintptr_t)tinstr;

        return (annul ? FBT_BAA(instr, va) : FBT_BA(instr, va));
}

/*ARGSUSED*/
static void
fbt_provide_module(void *arg, struct modctl *ctl)
{
        struct module *mp = ctl->mod_mp;
        char *modname = ctl->mod_modname;
        char *str = mp->strings;
        int nsyms = mp->nsyms;
        Shdr *symhdr = mp->symhdr;
        size_t symsize;
        char *name;
        int i;
        fbt_probe_t *fbt, *retfbt;
        fbt_trampoline_t tramp;
        uintptr_t offset;
        int primary = 0;
        ctf_file_t *fp = NULL;
        int error;
        int estimate = 1;
        uint32_t faketramp[50];
        size_t fbt_size = 0;

        /*
         * Employees of dtrace and their families are ineligible.  Void
         * where prohibited.
         */
        if (strcmp(modname, "dtrace") == 0)
                return;

        if (ctl->mod_requisites != NULL) {
                struct modctl_list *list;

                list = (struct modctl_list *)ctl->mod_requisites;

                for (; list != NULL; list = list->modl_next) {
                        if (strcmp(list->modl_modp->mod_modname, "dtrace") == 0)
                                return;
                }
        }

        /*
         * KMDB is ineligible for instrumentation -- it may execute in
         * any context, including probe context.
         */
        if (strcmp(modname, "kmdbmod") == 0)
                return;

        if (str == NULL || symhdr == NULL || symhdr->sh_addr == 0) {
                /*
                 * If this module doesn't (yet) have its string or symbol
                 * table allocated, clear out.
                 */
                return;
        }

        symsize = symhdr->sh_entsize;

        if (mp->fbt_nentries) {
                /*
                 * This module has some FBT entries allocated; we're afraid
                 * to screw with it.
                 */
                return;
        }

        if (mp->fbt_tab != NULL)
                estimate = 0;

        /*
         * This is a hack for unix/genunix/krtld.
         */
        primary = vmem_contains(heap_arena, (void *)ctl,
            sizeof (struct modctl)) == 0;
        kobj_textwin_alloc(mp);

        /*
         * Open the CTF data for the module.  We'll use this to determine the
         * functions that can be instrumented.  Note that this call can fail,
         * in which case we'll use heuristics to determine the functions that
         * can be instrumented.  (But in particular, leaf functions will not be
         * instrumented.)
         */
        fp = ctf_modopen(mp, &error);

forreal:
        if (!estimate) {
                tramp.fbtt_next =
                    (uintptr_t)fbt_trampoline_map((uintptr_t)mp->fbt_tab,
                    mp->fbt_size);
                tramp.fbtt_limit = tramp.fbtt_next + mp->fbt_size;
                tramp.fbtt_va = (uintptr_t)mp->fbt_tab;
        }

        for (i = 1; i < nsyms; i++) {
                ctf_funcinfo_t f;
                uint32_t *instr, *base, *limit;
                Sym *sym = (Sym *)(symhdr->sh_addr + i * symsize);
                int have_ctf = 0, is_leaf = 0, nargs, cti = 0;
                int (*canpatch)(uint32_t *, int, const char *);
                uint32_t (*patch)(uint32_t *, uint32_t *, uint32_t *, int,
                    uint32_t, fbt_trampoline_t *, const char *);

                if (ELF_ST_TYPE(sym->st_info) != STT_FUNC)
                        continue;

                /*
                 * Weak symbols are not candidates.  This could be made to
                 * work (where weak functions and their underlying function
                 * appear as two disjoint probes), but it's not simple.
                 */
                if (ELF_ST_BIND(sym->st_info) == STB_WEAK)
                        continue;

                name = str + sym->st_name;

                if (strstr(name, "dtrace_") == name &&
                    strstr(name, "dtrace_safe_") != name) {
                        /*
                         * Anything beginning with "dtrace_" may be called
                         * from probe context unless it explitly indicates
                         * that it won't be called from probe context by
                         * using the prefix "dtrace_safe_".
                         */
                        continue;
                }

                if (strstr(name, "kdi_") == name ||
                    strstr(name, "_kdi_") != NULL) {
                        /*
                         * Any function name beginning with "kdi_" or
                         * containing the string "_kdi_" is a part of the
                         * kernel debugger interface and may be called in
                         * arbitrary context -- including probe context.
                         */
                        continue;
                }

                if (strstr(name, "__relocatable") != NULL) {
                        /*
                         * Anything with the string "__relocatable" anywhere
                         * in the function name is considered to be a function
                         * that may be manually relocated before execution.
                         * Because FBT uses a PC-relative technique for
                         * instrumentation, these functions cannot safely
                         * be instrumented by us.
                         */
                        continue;
                }

                if (strstr(name, "ip_ocsum") == name) {
                        /*
                         * The ip_ocsum_* family of routines are all ABI
                         * violators.  (They expect incoming arguments in the
                         * globals!)  Break the ABI?  No soup for you!
                         */
                        continue;
                }

                /*
                 * We want to scan the function for one (and only one) save.
                 * Any more indicates that something fancy is going on.
                 */
                base = (uint32_t *)sym->st_value;
                limit = (uint32_t *)(sym->st_value + sym->st_size);

                /*
                 * We don't want to interpose on the module stubs.
                 */
                if (base >= (uint32_t *)stubs_base &&
                    base <= (uint32_t *)stubs_end)
                        continue;

                /*
                 * We can't safely trace a zero-length function...
                 */
                if (base == limit)
                        continue;

                /*
                 * Due to 4524008, _init and _fini may have a bloated st_size.
                 * While this bug was fixed quite some time ago, old drivers
                 * may be lurking.  We need to develop a better solution to
                 * this problem, such that correct _init and _fini functions
                 * (the vast majority) may be correctly traced.  One solution
                 * may be to scan through the entire symbol table to see if
                 * any symbol overlaps with _init.  If none does, set a bit in
                 * the module structure that this module has correct _init and
                 * _fini sizes.  This will cause some pain the first time a
                 * module is scanned, but at least it would be O(N) instead of
                 * O(N log N)...
                 */
                if (strcmp(name, "_init") == 0)
                        continue;

                if (strcmp(name, "_fini") == 0)
                        continue;

                instr = base;

                /*
                 * While we try hard to only trace safe functions (that is,
                 * functions at TL=0), one unsafe function manages to otherwise
                 * appear safe:  prom_trap().  We could discover prom_trap()
                 * if we added an additional rule:  in order to trace a
                 * function, we must either (a) discover a restore or (b)
                 * determine that the function does not have any unlinked
                 * control transfers to another function (i.e., the function
                 * never returns).  Unfortunately, as of this writing, one
                 * legitimate function (resume_from_zombie()) transfers
                 * control to a different function (_resume_from_idle())
                 * without executing a restore.  Barring a rule to figure out
                 * that resume_from_zombie() is safe while prom_trap() is not,
                 * we resort to hard-coding prom_trap() here.
                 */
                if (strcmp(name, "prom_trap") == 0)
                        continue;

                if (fp != NULL && ctf_func_info(fp, i, &f) != CTF_ERR) {
                        nargs = f.ctc_argc;
                        have_ctf = 1;
                } else {
                        nargs = 32;
                }

                /*
                 * If the first instruction of the function is a branch and
                 * it's not a branch-always-not-annulled, we're going to refuse
                 * to patch it.
                 */
                if ((*instr & FBT_OP_MASK) == FBT_OP0 &&
                    (*instr & FBT_FMT2_OP2_MASK) != FBT_FMT2_OP2_SETHI &&
                    (*instr & FBT_FMT2_OP2_MASK) != FBT_FMT2_OP2_BPR) {
                        if (!FBT_IS_BA(*instr) && !FBT_IS_BAPCC(*instr)) {
                                if (have_ctf) {
                                        cmn_err(CE_NOTE, "cannot instrument %s:"
                                            " begins with non-ba, "
                                            "non-br CTI", name);
                                }
                                continue;
                        }
                }

                while (!FBT_IS_SAVE(*instr)) {
                        /*
                         * Before we assume that this is a leaf routine, check
                         * forward in the basic block for a save.
                         */
                        int op = *instr & FBT_OP_MASK;
                        int op2 = *instr & FBT_FMT2_OP2_MASK;

                        if (op == FBT_OP0 && op2 != FBT_FMT2_OP2_SETHI) {
                                /*
                                 * This is a CTI.  If we see a subsequent
                                 * save, we will refuse to process this
                                 * routine unless both of the following are
                                 * true:
                                 *
                                 *  (a) The branch is not annulled
                                 *
                                 *  (b) The subsequent save is in the delay
                                 *      slot of the branch
                                 */
                                if ((*instr & FBT_ANNUL) ||
                                    !FBT_IS_SAVE(*(instr + 1))) {
                                        cti = 1;
                                } else {
                                        instr++;
                                        break;
                                }
                        }

                        if (op == FBT_OP1)
                                cti = 1;

                        if (++instr == limit)
                                break;
                }

                if (instr < limit && cti) {
                        /*
                         * If we found a CTI before the save, we need to not
                         * do anything.  But if we have CTF information, this
                         * is weird enough that it merits a message.
                         */
                        if (!have_ctf)
                                continue;

                        cmn_err(CE_NOTE, "cannot instrument %s: "
                            "save not in first basic block", name);
                        continue;
                }

                if (instr == limit) {
                        if (!have_ctf)
                                continue;
                        is_leaf = 1;

                        if (!estimate)
                                fbt_leaf_functions++;

                        canpatch = fbt_canpatch_retl;
                        patch = fbt_patch_retl;
                } else {
                        canpatch = fbt_canpatch_return;
                        patch = fbt_patch_return;
                }

                if (!have_ctf && !is_leaf) {
                        /*
                         * Before we assume that this isn't something tricky,
                         * look for other saves.  If we find them, there are
                         * multiple entry points here (or something), and we'll
                         * leave it alone.
                         */
                        while (++instr < limit) {
                                if (FBT_IS_SAVE(*instr))
                                        break;
                        }

                        if (instr != limit)
                                continue;
                }

                instr = base;

                if (FBT_IS_CTI(*instr)) {
                        /*
                         * If we have a CTI, we want to be sure that we don't
                         * have a CTI or a PC-relative instruction in the
                         * delay slot -- we want to be able to thunk the
                         * instruction into the trampoline without worrying
                         * about either DCTIs or relocations.  It would be
                         * very odd for the compiler to generate this kind of
                         * code, so we warn about it if we have CTF
                         * information.
                         */
                        if (FBT_IS_CTI(*(instr + 1))) {
                                if (!have_ctf)
                                        continue;

                                cmn_err(CE_NOTE, "cannot instrument %s: "
                                    "CTI in delay slot of first instruction",
                                    name);
                                continue;
                        }

                        if (FBT_IS_PCRELATIVE(*(instr + 1))) {
                                if (!have_ctf)
                                        continue;

                                cmn_err(CE_NOTE, "cannot instrument %s: "
                                    "PC-relative instruction in delay slot of"
                                    " first instruction", name);
                                continue;
                        }
                }

                if (estimate) {
                        tramp.fbtt_next = (uintptr_t)faketramp;
                        tramp.fbtt_limit = tramp.fbtt_next + sizeof (faketramp);
                        (void) fbt_patch_entry(instr, FBT_ESTIMATE_ID,
                            &tramp, nargs);
                        fbt_size += tramp.fbtt_next - (uintptr_t)faketramp;
                } else {
                        fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
                        fbt->fbtp_name = name;
                        fbt->fbtp_ctl = ctl;
                        fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
                            name, FBT_PROBENAME_ENTRY, 1, fbt);
                        fbt->fbtp_patchval = FBT_BAA(instr, tramp.fbtt_va);

                        if (!fbt_patch_entry(instr, fbt->fbtp_id,
                            &tramp, nargs)) {
                                cmn_err(CE_WARN, "unexpectedly short FBT table "
                                    "in module %s (sym %d of %d)", modname,
                                    i, nsyms);
                                break;
                        }

                        fbt->fbtp_patchpoint =
                            (uint32_t *)((uintptr_t)mp->textwin +
                            ((uintptr_t)instr - (uintptr_t)mp->text));
                        fbt->fbtp_savedval = *instr;

                        fbt->fbtp_loadcnt = ctl->mod_loadcnt;
                        fbt->fbtp_primary = primary;
                        fbt->fbtp_symndx = i;
                        mp->fbt_nentries++;
                }

                retfbt = NULL;
again:
                if (++instr == limit)
                        continue;

                offset = (uintptr_t)instr - (uintptr_t)base;

                if (!(*canpatch)(instr, offset, name))
                        goto again;

                if (estimate) {
                        tramp.fbtt_next = (uintptr_t)faketramp;
                        tramp.fbtt_limit = tramp.fbtt_next + sizeof (faketramp);
                        (void) (*patch)(instr, base, limit,
                            offset, FBT_ESTIMATE_ID, &tramp, name);
                        fbt_size += tramp.fbtt_next - (uintptr_t)faketramp;

                        goto again;
                }

                fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
                fbt->fbtp_name = name;
                fbt->fbtp_ctl = ctl;

                if (retfbt == NULL) {
                        fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
                            name, FBT_PROBENAME_RETURN, 1, fbt);
                } else {
                        retfbt->fbtp_next = fbt;
                        fbt->fbtp_id = retfbt->fbtp_id;
                }

                fbt->fbtp_return = 1;
                retfbt = fbt;

                if ((fbt->fbtp_patchval = (*patch)(instr, base, limit, offset,
                    fbt->fbtp_id, &tramp, name)) == FBT_ILLTRAP) {
                        cmn_err(CE_WARN, "unexpectedly short FBT table "
                            "in module %s (sym %d of %d)", modname, i, nsyms);
                        break;
                }

                fbt->fbtp_patchpoint = (uint32_t *)((uintptr_t)mp->textwin +
                    ((uintptr_t)instr - (uintptr_t)mp->text));
                fbt->fbtp_savedval = *instr;
                fbt->fbtp_loadcnt = ctl->mod_loadcnt;
                fbt->fbtp_primary = primary;
                fbt->fbtp_symndx = i;
                mp->fbt_nentries++;

                goto again;
        }

        if (estimate) {
                /*
                 * Slosh on another entry's worth...
                 */
                fbt_size += FBT_ENT_MAXSIZE;
                mp->fbt_size = fbt_size;
                mp->fbt_tab = kobj_texthole_alloc(mp->text, fbt_size);

                if (mp->fbt_tab == NULL) {
                        cmn_err(CE_WARN, "couldn't allocate FBT table "
                            "for module %s", modname);
                } else {
                        estimate = 0;
                        goto forreal;
                }
        } else {
                fbt_trampoline_unmap();
        }

error:
        if (fp != NULL)
                ctf_close(fp);
}

/*ARGSUSED*/
static void
fbt_destroy(void *arg, dtrace_id_t id, void *parg)
{
        fbt_probe_t *fbt = parg, *next;
        struct modctl *ctl = fbt->fbtp_ctl;

        do {
                if (ctl != NULL && ctl->mod_loadcnt == fbt->fbtp_loadcnt) {
                        if ((ctl->mod_loadcnt == fbt->fbtp_loadcnt &&
                            ctl->mod_loaded) || fbt->fbtp_primary) {
                                ((struct module *)
                                    (ctl->mod_mp))->fbt_nentries--;
                        }
                }

                next = fbt->fbtp_next;
                kmem_free(fbt, sizeof (fbt_probe_t));
                fbt = next;
        } while (fbt != NULL);
}

/*ARGSUSED*/
static int
fbt_enable(void *arg, dtrace_id_t id, void *parg)
{
        fbt_probe_t *fbt = parg, *f;
        struct modctl *ctl = fbt->fbtp_ctl;

        ctl->mod_nenabled++;

        for (f = fbt; f != NULL; f = f->fbtp_next) {
                if (f->fbtp_patchpoint == NULL) {
                        /*
                         * Due to a shortened FBT table, this entry was never
                         * completed; refuse to enable it.
                         */
                        if (fbt_verbose) {
                                cmn_err(CE_NOTE, "fbt is failing for probe %s "
                                    "(short FBT table in %s)",
                                    fbt->fbtp_name, ctl->mod_modname);
                        }

                        return (0);
                }
        }

        /*
         * If this module has disappeared since we discovered its probes,
         * refuse to enable it.
         */
        if (!fbt->fbtp_primary && !ctl->mod_loaded) {
                if (fbt_verbose) {
                        cmn_err(CE_NOTE, "fbt is failing for probe %s "
                            "(module %s unloaded)",
                            fbt->fbtp_name, ctl->mod_modname);
                }

                return (0);
        }

        /*
         * Now check that our modctl has the expected load count.  If it
         * doesn't, this module must have been unloaded and reloaded -- and
         * we're not going to touch it.
         */
        if (ctl->mod_loadcnt != fbt->fbtp_loadcnt) {
                if (fbt_verbose) {
                        cmn_err(CE_NOTE, "fbt is failing for probe %s "
                            "(module %s reloaded)",
                            fbt->fbtp_name, ctl->mod_modname);
                }

                return (0);
        }

        for (; fbt != NULL; fbt = fbt->fbtp_next)
                *fbt->fbtp_patchpoint = fbt->fbtp_patchval;

        return (0);
}

/*ARGSUSED*/
static void
fbt_disable(void *arg, dtrace_id_t id, void *parg)
{
        fbt_probe_t *fbt = parg, *f;
        struct modctl *ctl = fbt->fbtp_ctl;

        ASSERT(ctl->mod_nenabled > 0);
        ctl->mod_nenabled--;

        for (f = fbt; f != NULL; f = f->fbtp_next) {
                if (f->fbtp_patchpoint == NULL)
                        return;
        }

        if ((!fbt->fbtp_primary && !ctl->mod_loaded) ||
            (ctl->mod_loadcnt != fbt->fbtp_loadcnt))
                return;

        for (; fbt != NULL; fbt = fbt->fbtp_next)
                *fbt->fbtp_patchpoint = fbt->fbtp_savedval;
}

/*ARGSUSED*/
static void
fbt_suspend(void *arg, dtrace_id_t id, void *parg)
{
        fbt_probe_t *fbt = parg;
        struct modctl *ctl = fbt->fbtp_ctl;

        if (!fbt->fbtp_primary && !ctl->mod_loaded)
                return;

        if (ctl->mod_loadcnt != fbt->fbtp_loadcnt)
                return;

        ASSERT(ctl->mod_nenabled > 0);

        for (; fbt != NULL; fbt = fbt->fbtp_next)
                *fbt->fbtp_patchpoint = fbt->fbtp_savedval;
}

/*ARGSUSED*/
static void
fbt_resume(void *arg, dtrace_id_t id, void *parg)
{
        fbt_probe_t *fbt = parg;
        struct modctl *ctl = fbt->fbtp_ctl;

        if (!fbt->fbtp_primary && !ctl->mod_loaded)
                return;

        if (ctl->mod_loadcnt != fbt->fbtp_loadcnt)
                return;

        ASSERT(ctl->mod_nenabled > 0);

        for (; fbt != NULL; fbt = fbt->fbtp_next)
                *fbt->fbtp_patchpoint = fbt->fbtp_patchval;
}

/*ARGSUSED*/
static void
fbt_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc)
{
        fbt_probe_t *fbt = parg;
        struct modctl *ctl = fbt->fbtp_ctl;
        struct module *mp = ctl->mod_mp;
        ctf_file_t *fp = NULL, *pfp;
        ctf_funcinfo_t f;
        int error;
        ctf_id_t argv[32], type;
        int argc = sizeof (argv) / sizeof (ctf_id_t);
        const char *parent;

        if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt))
                goto err;

        if (fbt->fbtp_return && desc->dtargd_ndx == 0) {
                (void) strcpy(desc->dtargd_native, "int");
                return;
        }

        if ((fp = ctf_modopen(mp, &error)) == NULL) {
                /*
                 * We have no CTF information for this module -- and therefore
                 * no args[] information.
                 */
                goto err;
        }

        /*
         * If we have a parent container, we must manually import it.
         */
        if ((parent = ctf_parent_name(fp)) != NULL) {
                struct modctl *mp = &modules;
                struct modctl *mod = NULL;

                /*
                 * We must iterate over all modules to find the module that
                 * is our parent.
                 */
                do {
                        if (strcmp(mp->mod_modname, parent) == 0) {
                                mod = mp;
                                break;
                        }
                } while ((mp = mp->mod_next) != &modules);

                if (mod == NULL)
                        goto err;

                if ((pfp = ctf_modopen(mod->mod_mp, &error)) == NULL)
                        goto err;

                if (ctf_import(fp, pfp) != 0) {
                        ctf_close(pfp);
                        goto err;
                }

                ctf_close(pfp);
        }

        if (ctf_func_info(fp, fbt->fbtp_symndx, &f) == CTF_ERR)
                goto err;

        if (fbt->fbtp_return) {
                if (desc->dtargd_ndx > 1)
                        goto err;

                ASSERT(desc->dtargd_ndx == 1);
                type = f.ctc_return;
        } else {
                if (desc->dtargd_ndx + 1 > f.ctc_argc)
                        goto err;

                if (ctf_func_args(fp, fbt->fbtp_symndx, argc, argv) == CTF_ERR)
                        goto err;

                type = argv[desc->dtargd_ndx];
        }

        if (ctf_type_name(fp, type, desc->dtargd_native,
            DTRACE_ARGTYPELEN) != NULL) {
                ctf_close(fp);
                return;
        }
err:
        if (fp != NULL)
                ctf_close(fp);

        desc->dtargd_ndx = DTRACE_ARGNONE;
}

static dtrace_pattr_t fbt_attr = {
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
};

static dtrace_pops_t fbt_pops = {
        NULL,
        fbt_provide_module,
        fbt_enable,
        fbt_disable,
        fbt_suspend,
        fbt_resume,
        fbt_getargdesc,
        NULL,
        NULL,
        fbt_destroy
};

static int
fbt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
        switch (cmd) {
        case DDI_ATTACH:
                break;
        case DDI_RESUME:
                return (DDI_SUCCESS);
        default:
                return (DDI_FAILURE);
        }

        if (ddi_create_minor_node(devi, "fbt", S_IFCHR, 0,
            DDI_PSEUDO, 0) == DDI_FAILURE ||
            dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_KERNEL, NULL,
            &fbt_pops, NULL, &fbt_id) != 0) {
                ddi_remove_minor_node(devi, NULL);
                return (DDI_FAILURE);
        }

        ddi_report_dev(devi);
        fbt_devi = devi;
        return (DDI_SUCCESS);
}

static int
fbt_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
{
        switch (cmd) {
        case DDI_DETACH:
                break;
        case DDI_SUSPEND:
                return (DDI_SUCCESS);
        default:
                return (DDI_FAILURE);
        }

        if (dtrace_unregister(fbt_id) != 0)
                return (DDI_FAILURE);

        ddi_remove_minor_node(devi, NULL);
        return (DDI_SUCCESS);
}

/*ARGSUSED*/
static int
fbt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
        int error;

        switch (infocmd) {
        case DDI_INFO_DEVT2DEVINFO:
                *result = (void *)fbt_devi;
                error = DDI_SUCCESS;
                break;
        case DDI_INFO_DEVT2INSTANCE:
                *result = (void *)0;
                error = DDI_SUCCESS;
                break;
        default:
                error = DDI_FAILURE;
        }
        return (error);
}

/*ARGSUSED*/
static int
fbt_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
{
        return (0);
}

static struct cb_ops fbt_cb_ops = {
        fbt_open,               /* open */
        nodev,                  /* close */
        nulldev,                /* strategy */
        nulldev,                /* print */
        nodev,                  /* dump */
        nodev,                  /* read */
        nodev,                  /* write */
        nodev,                  /* ioctl */
        nodev,                  /* devmap */
        nodev,                  /* mmap */
        nodev,                  /* segmap */
        nochpoll,               /* poll */
        ddi_prop_op,            /* cb_prop_op */
        0,                      /* streamtab  */
        D_NEW | D_MP            /* Driver compatibility flag */
};

static struct dev_ops fbt_ops = {
        DEVO_REV,               /* devo_rev */
        0,                      /* refcnt */
        fbt_info,               /* get_dev_info */
        nulldev,                /* identify */
        nulldev,                /* probe */
        fbt_attach,             /* attach */
        fbt_detach,             /* detach */
        nodev,                  /* reset */
        &fbt_cb_ops,            /* driver operations */
        NULL,                   /* bus operations */
        nodev,                  /* dev power */
        ddi_quiesce_not_needed,         /* quiesce */
};

/*
 * Module linkage information for the kernel.
 */
static struct modldrv modldrv = {
        &mod_driverops,         /* module type (this is a pseudo driver) */
        "Function Boundary Tracing",    /* name of module */
        &fbt_ops,               /* driver ops */
};

static struct modlinkage modlinkage = {
        MODREV_1,
        (void *)&modldrv,
        NULL
};

int
_init(void)
{
        return (mod_install(&modlinkage));
}

int
_info(struct modinfo *modinfop)
{
        return (mod_info(&modlinkage, modinfop));
}

int
_fini(void)
{
        return (mod_remove(&modlinkage));
}