#include <sys/dtrace.h>
#include <sys/systrace.h>
#include <sys/stat.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/atomic.h>
#define SYSTRACE_ARTIFICIAL_FRAMES 1
#define SYSTRACE_SHIFT 16
#define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
#define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
#define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
#define SYSTRACE_RETURN(id) (id)
#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
#error 1 << SYSTRACE_SHIFT must exceed number of system calls
#endif
static dev_info_t *systrace_devi;
static dtrace_provider_id_t systrace_id;
static void
systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
{
systrace_sysent_t *sysent = *interposed;
int i;
if (sysent == NULL) {
*interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
NSYSCALL, KM_SLEEP);
}
for (i = 0; i < NSYSCALL; i++) {
struct sysent *a = &actual[i];
systrace_sysent_t *s = &sysent[i];
if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
continue;
if (a->sy_callc == dtrace_systrace_syscall)
continue;
#ifdef _SYSCALL32_IMPL
if (a->sy_callc == dtrace_systrace_syscall32)
continue;
#endif
s->stsy_underlying = a->sy_callc;
}
}
static void
systrace_provide(void *arg, const dtrace_probedesc_t *desc)
{
int i;
if (desc != NULL)
return;
systrace_init(sysent, &systrace_sysent);
#ifdef _SYSCALL32_IMPL
systrace_init(sysent32, &systrace_sysent32);
#endif
for (i = 0; i < NSYSCALL; i++) {
if (systrace_sysent[i].stsy_underlying == NULL)
continue;
if (dtrace_probe_lookup(systrace_id, NULL,
syscallnames[i], "entry") != 0)
continue;
(void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
"entry", SYSTRACE_ARTIFICIAL_FRAMES,
(void *)((uintptr_t)SYSTRACE_ENTRY(i)));
(void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
"return", SYSTRACE_ARTIFICIAL_FRAMES,
(void *)((uintptr_t)SYSTRACE_RETURN(i)));
systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
systrace_sysent[i].stsy_return = DTRACE_IDNONE;
#ifdef _SYSCALL32_IMPL
systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
#endif
}
}
static void
systrace_destroy(void *arg, dtrace_id_t id, void *parg)
{
int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
#ifdef _SYSCALL32_IMPL
ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
#endif
} else {
ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
#ifdef _SYSCALL32_IMPL
ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
#endif
}
}
static int
systrace_enable(void *arg, dtrace_id_t id, void *parg)
{
int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
systrace_sysent[sysnum].stsy_entry = id;
#ifdef _SYSCALL32_IMPL
systrace_sysent32[sysnum].stsy_entry = id;
#endif
} else {
systrace_sysent[sysnum].stsy_return = id;
#ifdef _SYSCALL32_IMPL
systrace_sysent32[sysnum].stsy_return = id;
#endif
}
if (enabled) {
ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
return (0);
}
(void) atomic_cas_ptr(&sysent[sysnum].sy_callc,
(void *)systrace_sysent[sysnum].stsy_underlying,
(void *)dtrace_systrace_syscall);
#ifdef _SYSCALL32_IMPL
(void) atomic_cas_ptr(&sysent32[sysnum].sy_callc,
(void *)systrace_sysent32[sysnum].stsy_underlying,
(void *)dtrace_systrace_syscall32);
#endif
return (0);
}
static void
systrace_disable(void *arg, dtrace_id_t id, void *parg)
{
int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
if (disable) {
(void) atomic_cas_ptr(&sysent[sysnum].sy_callc,
(void *)dtrace_systrace_syscall,
(void *)systrace_sysent[sysnum].stsy_underlying);
#ifdef _SYSCALL32_IMPL
(void) atomic_cas_ptr(&sysent32[sysnum].sy_callc,
(void *)dtrace_systrace_syscall32,
(void *)systrace_sysent32[sysnum].stsy_underlying);
#endif
}
if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
#ifdef _SYSCALL32_IMPL
systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
#endif
} else {
systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
#ifdef _SYSCALL32_IMPL
systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
#endif
}
}
static dtrace_pattr_t systrace_attr = {
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
};
static dtrace_pops_t systrace_pops = {
systrace_provide,
NULL,
systrace_enable,
systrace_disable,
NULL,
NULL,
NULL,
NULL,
NULL,
systrace_destroy
};
static int
systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
switch (cmd) {
case DDI_ATTACH:
break;
case DDI_RESUME:
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
systrace_probe = (void (*)())dtrace_probe;
membar_enter();
if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
DDI_PSEUDO, 0) == DDI_FAILURE ||
dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
&systrace_pops, NULL, &systrace_id) != 0) {
systrace_probe = systrace_stub;
ddi_remove_minor_node(devi, NULL);
return (DDI_FAILURE);
}
ddi_report_dev(devi);
systrace_devi = devi;
return (DDI_SUCCESS);
}
static int
systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
{
switch (cmd) {
case DDI_DETACH:
break;
case DDI_SUSPEND:
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
if (dtrace_unregister(systrace_id) != 0)
return (DDI_FAILURE);
ddi_remove_minor_node(devi, NULL);
systrace_probe = systrace_stub;
return (DDI_SUCCESS);
}
static int
systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
int error;
switch (infocmd) {
case DDI_INFO_DEVT2DEVINFO:
*result = (void *)systrace_devi;
error = DDI_SUCCESS;
break;
case DDI_INFO_DEVT2INSTANCE:
*result = (void *)0;
error = DDI_SUCCESS;
break;
default:
error = DDI_FAILURE;
}
return (error);
}
static int
systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
{
return (0);
}
static struct cb_ops systrace_cb_ops = {
systrace_open,
nodev,
nulldev,
nulldev,
nodev,
nodev,
nodev,
nodev,
nodev,
nodev,
nodev,
nochpoll,
ddi_prop_op,
0,
D_NEW | D_MP
};
static struct dev_ops systrace_ops = {
DEVO_REV,
0,
systrace_info,
nulldev,
nulldev,
systrace_attach,
systrace_detach,
nodev,
&systrace_cb_ops,
NULL,
nodev,
ddi_quiesce_not_needed,
};
static struct modldrv modldrv = {
&mod_driverops,
"System Call Tracing",
&systrace_ops,
};
static struct modlinkage modlinkage = {
MODREV_1,
(void *)&modldrv,
NULL
};
int
_init(void)
{
return (mod_install(&modlinkage));
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
int
_fini(void)
{
return (mod_remove(&modlinkage));
}