#include <sys/types.h>
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/clockintr.h>
#include <sys/device.h>
#include <sys/exec_elf.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/ptrace.h>
#include <sys/vnode.h>
#include <uvm/uvm.h>
#include <uvm/uvm_map.h>
#include <uvm/uvm_vnode.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/fcntl.h>
#include <machine/intr.h>
#include <dev/dt/dtvar.h>
#if defined(__amd64__)
#define DT_FA_PROFILE 5
#define DT_FA_STATIC 2
#elif defined(__i386__)
#define DT_FA_PROFILE 5
#define DT_FA_STATIC 2
#elif defined(__macppc__)
#define DT_FA_PROFILE 5
#define DT_FA_STATIC 2
#elif defined(__octeon__)
#define DT_FA_PROFILE 6
#define DT_FA_STATIC 2
#elif defined(__powerpc64__)
#define DT_FA_PROFILE 6
#define DT_FA_STATIC 2
#elif defined(__sparc64__)
#define DT_FA_PROFILE 7
#define DT_FA_STATIC 1
#else
#define DT_FA_STATIC 0
#define DT_FA_PROFILE 0
#endif
#define DT_EVTRING_SIZE 16
#define DPRINTF(x...)
struct dt_cpubuf {
unsigned int dc_prod;
unsigned int dc_cons;
struct dt_evt *dc_ring;
unsigned int dc_inevt;
unsigned int dc_dropevt;
unsigned int dc_skiptick;
unsigned int dc_recurevt;
unsigned int dc_readevt;
};
struct dt_softc {
SLIST_ENTRY(dt_softc) ds_next;
int ds_unit;
pid_t ds_pid;
void *ds_si;
struct dt_pcb_list ds_pcbs;
int ds_recording;
unsigned int ds_evtcnt;
struct dt_cpubuf ds_cpu[MAXCPUS];
unsigned int ds_lastcpu;
};
SLIST_HEAD(, dt_softc) dtdev_list;
unsigned int dt_nprobes;
SIMPLEQ_HEAD(, dt_probe) dt_probe_list;
struct rwlock dt_lock = RWLOCK_INITIALIZER("dtlk");
volatile uint32_t dt_tracing = 0;
int allowdt;
void dtattach(struct device *, struct device *, void *);
int dtopen(dev_t, int, int, struct proc *);
int dtclose(dev_t, int, int, struct proc *);
int dtread(dev_t, struct uio *, int);
int dtioctl(dev_t, u_long, caddr_t, int, struct proc *);
struct dt_softc *dtlookup(int);
struct dt_softc *dtalloc(void);
void dtfree(struct dt_softc *);
int dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *);
int dt_ioctl_get_args(struct dt_softc *, struct dtioc_arg *);
int dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *);
int dt_ioctl_record_start(struct dt_softc *);
void dt_ioctl_record_stop(struct dt_softc *);
int dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *);
int dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);
int dt_ioctl_rd_vnode(struct dt_softc *, struct dtioc_rdvn *);
int dt_ring_copy(struct dt_cpubuf *, struct uio *, size_t, size_t *);
void dt_wakeup(struct dt_softc *);
void dt_deferred_wakeup(void *);
void
dtattach(struct device *parent, struct device *self, void *aux)
{
SLIST_INIT(&dtdev_list);
SIMPLEQ_INIT(&dt_probe_list);
dt_nprobes += dt_prov_profile_init();
dt_nprobes += dt_prov_syscall_init();
dt_nprobes += dt_prov_static_init();
#ifdef DDBPROF
dt_nprobes += dt_prov_kprobe_init();
#endif
}
int
dtopen(dev_t dev, int flags, int mode, struct proc *p)
{
struct dt_softc *sc;
int unit = minor(dev);
if (atomic_load_int(&allowdt) == 0)
return EPERM;
sc = dtalloc();
if (sc == NULL)
return ENOMEM;
if (dtlookup(unit) != NULL) {
dtfree(sc);
return EBUSY;
}
sc->ds_unit = unit;
sc->ds_pid = p->p_p->ps_pid;
TAILQ_INIT(&sc->ds_pcbs);
sc->ds_lastcpu = 0;
sc->ds_evtcnt = 0;
SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next);
DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid);
return 0;
}
int
dtclose(dev_t dev, int flags, int mode, struct proc *p)
{
struct dt_softc *sc;
int unit = minor(dev);
sc = dtlookup(unit);
KASSERT(sc != NULL);
DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid);
SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next);
dt_ioctl_record_stop(sc);
dt_pcb_purge(&sc->ds_pcbs);
dtfree(sc);
return 0;
}
int
dtread(dev_t dev, struct uio *uio, int flags)
{
struct dt_softc *sc;
struct dt_cpubuf *dc;
int i, error = 0, unit = minor(dev);
size_t count, max, read = 0;
sc = dtlookup(unit);
KASSERT(sc != NULL);
max = howmany(uio->uio_resid, sizeof(struct dt_evt));
if (max < 1)
return (EMSGSIZE);
while (!atomic_load_int(&sc->ds_evtcnt)) {
sleep_setup(sc, PWAIT | PCATCH, "dtread");
error = sleep_finish(INFSLP, !atomic_load_int(&sc->ds_evtcnt));
if (error == EINTR || error == ERESTART)
break;
}
if (error)
return error;
KERNEL_ASSERT_LOCKED();
for (i = 0; i < ncpusfound; i++) {
count = 0;
dc = &sc->ds_cpu[(sc->ds_lastcpu + i) % ncpusfound];
error = dt_ring_copy(dc, uio, max, &count);
if (error && count == 0)
break;
read += count;
max -= count;
if (max == 0)
break;
}
sc->ds_lastcpu += i % ncpusfound;
atomic_sub_int(&sc->ds_evtcnt, read);
return error;
}
int
dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
{
struct dt_softc *sc;
int unit = minor(dev);
int on, error = 0;
sc = dtlookup(unit);
KASSERT(sc != NULL);
switch (cmd) {
case DTIOCGPLIST:
return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr);
case DTIOCGARGS:
return dt_ioctl_get_args(sc, (struct dtioc_arg *)addr);
case DTIOCGSTATS:
return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr);
case DTIOCRECORD:
case DTIOCPRBENABLE:
case DTIOCPRBDISABLE:
case DTIOCRDVNODE:
break;
default:
return ENOTTY;
}
if ((error = suser(p)) != 0)
return error;
switch (cmd) {
case DTIOCRECORD:
on = *(int *)addr;
if (on)
error = dt_ioctl_record_start(sc);
else
dt_ioctl_record_stop(sc);
break;
case DTIOCPRBENABLE:
error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr);
break;
case DTIOCPRBDISABLE:
error = dt_ioctl_probe_disable(sc, (struct dtioc_req *)addr);
break;
case DTIOCRDVNODE:
error = dt_ioctl_rd_vnode(sc, (struct dtioc_rdvn *)addr);
break;
default:
KASSERT(0);
}
return error;
}
struct dt_softc *
dtlookup(int unit)
{
struct dt_softc *sc;
KERNEL_ASSERT_LOCKED();
SLIST_FOREACH(sc, &dtdev_list, ds_next) {
if (sc->ds_unit == unit)
break;
}
return sc;
}
struct dt_softc *
dtalloc(void)
{
struct dt_softc *sc;
struct dt_evt *dtev;
int i;
sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
if (sc == NULL)
return NULL;
for (i = 0; i < ncpusfound; i++) {
dtev = mallocarray(DT_EVTRING_SIZE, sizeof(*dtev), M_DEVBUF,
M_WAITOK|M_CANFAIL|M_ZERO);
if (dtev == NULL)
break;
sc->ds_cpu[i].dc_ring = dtev;
}
if (i < ncpusfound) {
dtfree(sc);
return NULL;
}
sc->ds_si = softintr_establish(IPL_SOFTCLOCK | IPL_MPSAFE,
dt_deferred_wakeup, sc);
if (sc->ds_si == NULL) {
dtfree(sc);
return NULL;
}
return sc;
}
void
dtfree(struct dt_softc *sc)
{
struct dt_evt *dtev;
int i;
if (sc->ds_si != NULL)
softintr_disestablish(sc->ds_si);
for (i = 0; i < ncpusfound; i++) {
dtev = sc->ds_cpu[i].dc_ring;
free(dtev, M_DEVBUF, DT_EVTRING_SIZE * sizeof(*dtev));
}
free(sc, M_DEVBUF, sizeof(*sc));
}
int
dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr)
{
struct dtioc_probe_info info, *dtpi;
struct dt_probe *dtp;
size_t size;
int error = 0;
size = dtpr->dtpr_size;
dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi);
if (size == 0)
return 0;
dtpi = dtpr->dtpr_probes;
SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
if (size < sizeof(*dtpi)) {
error = ENOSPC;
break;
}
memset(&info, 0, sizeof(info));
info.dtpi_pbn = dtp->dtp_pbn;
info.dtpi_nargs = dtp->dtp_nargs;
strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name,
sizeof(info.dtpi_prov));
strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func));
strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name));
error = copyout(&info, dtpi, sizeof(*dtpi));
if (error)
break;
size -= sizeof(*dtpi);
dtpi++;
}
return error;
}
int
dt_ioctl_get_args(struct dt_softc *sc, struct dtioc_arg *dtar)
{
struct dtioc_arg_info info, *dtai;
struct dt_probe *dtp;
size_t size, n, t;
uint32_t pbn;
int error = 0;
pbn = dtar->dtar_pbn;
if (pbn == 0 || pbn > dt_nprobes)
return EINVAL;
SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
if (pbn == dtp->dtp_pbn)
break;
}
if (dtp == NULL)
return EINVAL;
if (dtp->dtp_sysnum != 0) {
dtar->dtar_size = 0;
return 0;
}
size = dtar->dtar_size;
dtar->dtar_size = dtp->dtp_nargs * sizeof(*dtar);
if (size == 0)
return 0;
t = 0;
dtai = dtar->dtar_args;
for (n = 0; n < dtp->dtp_nargs; n++) {
if (size < sizeof(*dtai)) {
error = ENOSPC;
break;
}
if (n >= DTMAXARGTYPES || dtp->dtp_argtype[n] == NULL)
continue;
memset(&info, 0, sizeof(info));
info.dtai_pbn = dtp->dtp_pbn;
info.dtai_argn = t++;
strlcpy(info.dtai_argtype, dtp->dtp_argtype[n],
sizeof(info.dtai_argtype));
error = copyout(&info, dtai, sizeof(*dtai));
if (error)
break;
size -= sizeof(*dtai);
dtai++;
}
dtar->dtar_size = t * sizeof(*dtar);
return error;
}
int
dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst)
{
struct dt_cpubuf *dc;
uint64_t readevt, dropevt, skiptick, recurevt;
int i;
readevt = dropevt = skiptick = 0;
for (i = 0; i < ncpusfound; i++) {
dc = &sc->ds_cpu[i];
membar_consumer();
dropevt += dc->dc_dropevt;
skiptick = dc->dc_skiptick;
recurevt = dc->dc_recurevt;
readevt += dc->dc_readevt;
}
dtst->dtst_readevt = readevt;
dtst->dtst_dropevt = dropevt;
dtst->dtst_skiptick = skiptick;
dtst->dtst_recurevt = recurevt;
return 0;
}
int
dt_ioctl_record_start(struct dt_softc *sc)
{
uint64_t now;
struct dt_pcb *dp;
int error = 0;
rw_enter_write(&dt_lock);
if (sc->ds_recording) {
error = EBUSY;
goto out;
}
KERNEL_ASSERT_LOCKED();
if (TAILQ_EMPTY(&sc->ds_pcbs)) {
error = ENOENT;
goto out;
}
now = nsecuptime();
TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
struct dt_probe *dtp = dp->dp_dtp;
SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
dtp->dtp_recording++;
dtp->dtp_prov->dtpv_recording++;
if (dp->dp_nsecs != 0) {
clockintr_bind(&dp->dp_clockintr, dp->dp_cpu, dt_clock,
dp);
clockintr_schedule(&dp->dp_clockintr,
now + dp->dp_nsecs);
}
}
sc->ds_recording = 1;
dt_tracing++;
out:
rw_exit_write(&dt_lock);
return error;
}
void
dt_ioctl_record_stop(struct dt_softc *sc)
{
struct dt_pcb *dp;
rw_enter_write(&dt_lock);
if (!sc->ds_recording) {
rw_exit_write(&dt_lock);
return;
}
DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid);
dt_tracing--;
sc->ds_recording = 0;
TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
struct dt_probe *dtp = dp->dp_dtp;
if (dp->dp_nsecs != 0)
clockintr_unbind(&dp->dp_clockintr, CL_BARRIER);
dtp->dtp_recording--;
dtp->dtp_prov->dtpv_recording--;
SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
}
rw_exit_write(&dt_lock);
smr_barrier();
}
int
dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq)
{
struct dt_pcb_list plist;
struct dt_probe *dtp;
struct dt_pcb *dp;
int error;
if (sc->ds_recording)
return EBUSY;
SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
if (dtp->dtp_pbn == dtrq->dtrq_pbn)
break;
}
if (dtp == NULL)
return ENOENT;
TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
if (dp->dp_dtp->dtp_pbn == dtrq->dtrq_pbn)
return EEXIST;
}
TAILQ_INIT(&plist);
error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq);
if (error)
return error;
DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid,
dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS);
TAILQ_CONCAT(&sc->ds_pcbs, &plist, dp_snext);
return 0;
}
int
dt_ioctl_probe_disable(struct dt_softc *sc, struct dtioc_req *dtrq)
{
struct dt_probe *dtp;
int error;
if (sc->ds_recording)
return EBUSY;
SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
if (dtp->dtp_pbn == dtrq->dtrq_pbn)
break;
}
if (dtp == NULL)
return ENOENT;
if (dtp->dtp_prov->dtpv_dealloc) {
error = dtp->dtp_prov->dtpv_dealloc(dtp, sc, dtrq);
if (error)
return error;
}
DPRINTF("dt%d: pid %d dealloc\n", sc->ds_unit, sc->ds_pid,
dtrq->dtrq_pbn);
return 0;
}
int
dt_ioctl_rd_vnode(struct dt_softc *sc, struct dtioc_rdvn *dtrv)
{
struct process *ps;
struct proc *p = curproc;
boolean_t ok;
struct vm_map_entry *e;
int err = 0;
int fd;
struct uvm_vnode *uvn;
struct vnode *vn;
struct file *fp;
if ((ps = prfind(dtrv->dtrv_pid)) == NULL)
return ESRCH;
vm_map_lock_read(&ps->ps_vmspace->vm_map);
ok = uvm_map_lookup_entry(&ps->ps_vmspace->vm_map,
(vaddr_t)dtrv->dtrv_va, &e);
if (ok == 0 || (e->etype & UVM_ET_OBJ) == 0 ||
(e->protection & PROT_EXEC) == 0 ||
!UVM_OBJ_IS_VNODE(e->object.uvm_obj)) {
err = ENOENT;
vn = NULL;
DPRINTF("%s no mapping for %p\n", __func__, dtrv->dtrv_va);
} else {
uvn = (struct uvm_vnode *)e->object.uvm_obj;
vn = uvn->u_vnode;
vref(vn);
dtrv->dtrv_len = (size_t)uvn->u_size;
dtrv->dtrv_start = (caddr_t)e->start;
dtrv->dtrv_offset = (caddr_t)e->offset;
}
vm_map_unlock_read(&ps->ps_vmspace->vm_map);
if (vn != NULL) {
fdplock(p->p_fd);
err = falloc(p, &fp, &fd);
fdpunlock(p->p_fd);
if (err != 0) {
vrele(vn);
DPRINTF("%s fdopen failed (%d)\n", __func__, err);
return err;
}
err = VOP_OPEN(vn, O_RDONLY, p->p_p->ps_ucred, p);
if (err == 0) {
fp->f_flag = FREAD;
fp->f_type = DTYPE_VNODE;
fp->f_ops = &vnops;
fp->f_data = vn;
dtrv->dtrv_fd = fd;
fdplock(p->p_fd);
fdinsert(p->p_fd, fd, UF_EXCLOSE, fp);
fdpunlock(p->p_fd);
FRELE(fp, p);
} else {
DPRINTF("%s vopen() failed (%d)\n", __func__,
err);
vrele(vn);
fdplock(p->p_fd);
fdremove(p->p_fd, fd);
fdpunlock(p->p_fd);
FRELE(fp, p);
}
}
return err;
}
struct dt_probe *
dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv)
{
struct dt_probe *dtp;
dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO);
if (dtp == NULL)
return NULL;
SMR_SLIST_INIT(&dtp->dtp_pcbs);
dtp->dtp_prov = dtpv;
dtp->dtp_func = func;
dtp->dtp_name = name;
dtp->dtp_sysnum = -1;
dtp->dtp_ref = 0;
return dtp;
}
void
dt_dev_register_probe(struct dt_probe *dtp)
{
static uint64_t probe_nb;
dtp->dtp_pbn = ++probe_nb;
SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next);
}
struct dt_pcb *
dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc)
{
struct dt_pcb *dp;
dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO);
if (dp == NULL)
return NULL;
dp->dp_sc = sc;
dp->dp_dtp = dtp;
return dp;
}
void
dt_pcb_free(struct dt_pcb *dp)
{
free(dp, M_DT, sizeof(*dp));
}
void
dt_pcb_purge(struct dt_pcb_list *plist)
{
struct dt_pcb *dp;
while ((dp = TAILQ_FIRST(plist)) != NULL) {
TAILQ_REMOVE(plist, dp, dp_snext);
dt_pcb_free(dp);
}
}
void
dt_pcb_ring_skiptick(struct dt_pcb *dp, unsigned int skip)
{
struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()];
dc->dc_skiptick += skip;
membar_producer();
}
struct dt_evt *
dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
{
struct proc *p = curproc;
struct dt_evt *dtev;
int prod, cons, distance;
struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()];
if (dc->dc_inevt == 1) {
dc->dc_recurevt++;
membar_producer();
return NULL;
}
dc->dc_inevt = 1;
membar_consumer();
prod = dc->dc_prod;
cons = dc->dc_cons;
distance = prod - cons;
if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) {
dc->dc_dropevt++;
membar_producer();
dc->dc_inevt = 0;
return NULL;
}
dtev = &dc->dc_ring[cons];
memset(dtev, 0, sizeof(*dtev));
dtev->dtev_pbn = dp->dp_dtp->dtp_pbn;
dtev->dtev_cpu = cpu_number();
dtev->dtev_pid = p->p_p->ps_pid;
dtev->dtev_tid = p->p_tid + THREAD_PID_OFFSET;
nanotime(&dtev->dtev_tsp);
if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME))
strlcpy(dtev->dtev_comm, p->p_p->ps_comm, sizeof(dtev->dtev_comm));
if (ISSET(dp->dp_evtflags, DTEVT_KSTACK)) {
if (profiling)
stacktrace_save_at(&dtev->dtev_kstack, DT_FA_PROFILE);
else
stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC);
}
if (ISSET(dp->dp_evtflags, DTEVT_USTACK))
stacktrace_save_utrace(&dtev->dtev_ustack);
return dtev;
}
void
dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev)
{
struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()];
KASSERT(dtev == &dc->dc_ring[dc->dc_cons]);
dc->dc_cons = (dc->dc_cons + 1) % DT_EVTRING_SIZE;
membar_producer();
atomic_inc_int(&dp->dp_sc->ds_evtcnt);
dc->dc_inevt = 0;
dt_wakeup(dp->dp_sc);
}
int
dt_ring_copy(struct dt_cpubuf *dc, struct uio *uio, size_t max, size_t *rcvd)
{
size_t count, copied = 0;
unsigned int cons, prod;
int error = 0;
KASSERT(max > 0);
membar_consumer();
cons = dc->dc_cons;
prod = dc->dc_prod;
if (cons < prod)
count = DT_EVTRING_SIZE - prod;
else
count = cons - prod;
if (count == 0)
return 0;
count = MIN(count, max);
error = uiomove(&dc->dc_ring[prod], count * sizeof(struct dt_evt), uio);
if (error)
return error;
copied += count;
prod = (prod + count) % DT_EVTRING_SIZE;
if (max == copied || prod != 0 || cons == 0)
goto out;
count = MIN(cons, (max - copied));
error = uiomove(&dc->dc_ring[0], count * sizeof(struct dt_evt), uio);
if (error)
goto out;
copied += count;
prod += count;
out:
dc->dc_readevt += copied;
dc->dc_prod = prod;
membar_producer();
*rcvd = copied;
return error;
}
void
dt_wakeup(struct dt_softc *sc)
{
softintr_schedule(sc->ds_si);
}
void
dt_deferred_wakeup(void *arg)
{
struct dt_softc *sc = arg;
wakeup(sc);
}