root/usr/src/uts/common/dtrace/systrace.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */


#include <sys/dtrace.h>
#include <sys/systrace.h>
#include <sys/stat.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/atomic.h>

#define SYSTRACE_ARTIFICIAL_FRAMES      1

#define SYSTRACE_SHIFT                  16
#define SYSTRACE_ISENTRY(x)             ((int)(x) >> SYSTRACE_SHIFT)
#define SYSTRACE_SYSNUM(x)              ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
#define SYSTRACE_ENTRY(id)              ((1 << SYSTRACE_SHIFT) | (id))
#define SYSTRACE_RETURN(id)             (id)

#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
#error 1 << SYSTRACE_SHIFT must exceed number of system calls
#endif

static dev_info_t *systrace_devi;
static dtrace_provider_id_t systrace_id;

static void
systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
{
        systrace_sysent_t *sysent = *interposed;
        int i;

        if (sysent == NULL) {
                *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
                    NSYSCALL, KM_SLEEP);
        }

        for (i = 0; i < NSYSCALL; i++) {
                struct sysent *a = &actual[i];
                systrace_sysent_t *s = &sysent[i];

                if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
                        continue;

                if (a->sy_callc == dtrace_systrace_syscall)
                        continue;

#ifdef _SYSCALL32_IMPL
                if (a->sy_callc == dtrace_systrace_syscall32)
                        continue;
#endif

                s->stsy_underlying = a->sy_callc;
        }
}

/*ARGSUSED*/
static void
systrace_provide(void *arg, const dtrace_probedesc_t *desc)
{
        int i;

        if (desc != NULL)
                return;

        systrace_init(sysent, &systrace_sysent);
#ifdef _SYSCALL32_IMPL
        systrace_init(sysent32, &systrace_sysent32);
#endif

        for (i = 0; i < NSYSCALL; i++) {
                if (systrace_sysent[i].stsy_underlying == NULL)
                        continue;

                if (dtrace_probe_lookup(systrace_id, NULL,
                    syscallnames[i], "entry") != 0)
                        continue;

                (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
                    "entry", SYSTRACE_ARTIFICIAL_FRAMES,
                    (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
                (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
                    "return", SYSTRACE_ARTIFICIAL_FRAMES,
                    (void *)((uintptr_t)SYSTRACE_RETURN(i)));

                systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
                systrace_sysent[i].stsy_return = DTRACE_IDNONE;
#ifdef _SYSCALL32_IMPL
                systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
                systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
#endif
        }
}

/*ARGSUSED*/
static void
systrace_destroy(void *arg, dtrace_id_t id, void *parg)
{
        int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);

        /*
         * There's nothing to do here but assert that we have actually been
         * disabled.
         */
        if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
                ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
#ifdef _SYSCALL32_IMPL
                ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
#endif
        } else {
                ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
#ifdef _SYSCALL32_IMPL
                ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
#endif
        }
}

/*ARGSUSED*/
static int
systrace_enable(void *arg, dtrace_id_t id, void *parg)
{
        int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
        int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
            systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);

        if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
                systrace_sysent[sysnum].stsy_entry = id;
#ifdef _SYSCALL32_IMPL
                systrace_sysent32[sysnum].stsy_entry = id;
#endif
        } else {
                systrace_sysent[sysnum].stsy_return = id;
#ifdef _SYSCALL32_IMPL
                systrace_sysent32[sysnum].stsy_return = id;
#endif
        }

        if (enabled) {
                ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
                return (0);
        }

        (void) atomic_cas_ptr(&sysent[sysnum].sy_callc,
            (void *)systrace_sysent[sysnum].stsy_underlying,
            (void *)dtrace_systrace_syscall);
#ifdef _SYSCALL32_IMPL
        (void) atomic_cas_ptr(&sysent32[sysnum].sy_callc,
            (void *)systrace_sysent32[sysnum].stsy_underlying,
            (void *)dtrace_systrace_syscall32);
#endif
        return (0);
}

/*ARGSUSED*/
static void
systrace_disable(void *arg, dtrace_id_t id, void *parg)
{
        int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
        int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
            systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);

        if (disable) {
                (void) atomic_cas_ptr(&sysent[sysnum].sy_callc,
                    (void *)dtrace_systrace_syscall,
                    (void *)systrace_sysent[sysnum].stsy_underlying);

#ifdef _SYSCALL32_IMPL
                (void) atomic_cas_ptr(&sysent32[sysnum].sy_callc,
                    (void *)dtrace_systrace_syscall32,
                    (void *)systrace_sysent32[sysnum].stsy_underlying);
#endif
        }

        if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
                systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
#ifdef _SYSCALL32_IMPL
                systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
#endif
        } else {
                systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
#ifdef _SYSCALL32_IMPL
                systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
#endif
        }
}

static dtrace_pattr_t systrace_attr = {
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
};

static dtrace_pops_t systrace_pops = {
        systrace_provide,
        NULL,
        systrace_enable,
        systrace_disable,
        NULL,
        NULL,
        NULL,
        NULL,
        NULL,
        systrace_destroy
};

static int
systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
        switch (cmd) {
        case DDI_ATTACH:
                break;
        case DDI_RESUME:
                return (DDI_SUCCESS);
        default:
                return (DDI_FAILURE);
        }

        systrace_probe = (void (*)())dtrace_probe;
        membar_enter();

        if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
            DDI_PSEUDO, 0) == DDI_FAILURE ||
            dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
            &systrace_pops, NULL, &systrace_id) != 0) {
                systrace_probe = systrace_stub;
                ddi_remove_minor_node(devi, NULL);
                return (DDI_FAILURE);
        }

        ddi_report_dev(devi);
        systrace_devi = devi;

        return (DDI_SUCCESS);
}

static int
systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
{
        switch (cmd) {
        case DDI_DETACH:
                break;
        case DDI_SUSPEND:
                return (DDI_SUCCESS);
        default:
                return (DDI_FAILURE);
        }

        if (dtrace_unregister(systrace_id) != 0)
                return (DDI_FAILURE);

        ddi_remove_minor_node(devi, NULL);
        systrace_probe = systrace_stub;
        return (DDI_SUCCESS);
}

/*ARGSUSED*/
static int
systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
        int error;

        switch (infocmd) {
        case DDI_INFO_DEVT2DEVINFO:
                *result = (void *)systrace_devi;
                error = DDI_SUCCESS;
                break;
        case DDI_INFO_DEVT2INSTANCE:
                *result = (void *)0;
                error = DDI_SUCCESS;
                break;
        default:
                error = DDI_FAILURE;
        }
        return (error);
}

/*ARGSUSED*/
static int
systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
{
        return (0);
}

static struct cb_ops systrace_cb_ops = {
        systrace_open,          /* open */
        nodev,                  /* close */
        nulldev,                /* strategy */
        nulldev,                /* print */
        nodev,                  /* dump */
        nodev,                  /* read */
        nodev,                  /* write */
        nodev,                  /* ioctl */
        nodev,                  /* devmap */
        nodev,                  /* mmap */
        nodev,                  /* segmap */
        nochpoll,               /* poll */
        ddi_prop_op,            /* cb_prop_op */
        0,                      /* streamtab  */
        D_NEW | D_MP            /* Driver compatibility flag */
};

static struct dev_ops systrace_ops = {
        DEVO_REV,               /* devo_rev, */
        0,                      /* refcnt  */
        systrace_info,          /* get_dev_info */
        nulldev,                /* identify */
        nulldev,                /* probe */
        systrace_attach,        /* attach */
        systrace_detach,        /* detach */
        nodev,                  /* reset */
        &systrace_cb_ops,       /* driver operations */
        NULL,                   /* bus operations */
        nodev,                  /* dev power */
        ddi_quiesce_not_needed,         /* quiesce */
};

/*
 * Module linkage information for the kernel.
 */
static struct modldrv modldrv = {
        &mod_driverops,         /* module type (this is a pseudo driver) */
        "System Call Tracing",  /* name of module */
        &systrace_ops,          /* driver ops */
};

static struct modlinkage modlinkage = {
        MODREV_1,
        (void *)&modldrv,
        NULL
};

int
_init(void)
{
        return (mod_install(&modlinkage));
}

int
_info(struct modinfo *modinfop)
{
        return (mod_info(&modlinkage, modinfop));
}

int
_fini(void)
{
        return (mod_remove(&modlinkage));
}