root/sys/cddl/dev/sdt/sdt.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
 * Copyright 2024 Mark Johnston <markj@FreeBSD.org>
 */

/*
 * This file contains a reimplementation of the statically-defined tracing (SDT)
 * framework for DTrace. Probes and SDT providers are defined using the macros
 * in sys/sdt.h, which append all the needed structures to linker sets. When
 * this module is loaded, it iterates over all of the loaded modules and
 * registers probes and providers with the DTrace framework based on the
 * contents of these linker sets.
 *
 * A list of SDT providers is maintained here since a provider may span multiple
 * modules. When a kernel module is unloaded, a provider defined in that module
 * is unregistered only if no other modules refer to it. The DTrace framework is
 * responsible for destroying individual probes when a kernel module is
 * unloaded; in particular, probes may not span multiple kernel modules.
 */

#include <sys/param.h>
#include <sys/systm.h>

#include <sys/conf.h>
#include <sys/endian.h>
#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/linker.h>
#include <sys/linker_set.h>
#include <sys/lock.h>
#include <sys/lockstat.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/queue.h>
#include <sys/sdt.h>

#include <sys/dtrace.h>
#include <sys/dtrace_bsd.h>

#include <cddl/dev/dtrace/dtrace_cddl.h>

_Static_assert(sizeof((struct sdt_probe *)NULL)->id == sizeof(dtrace_id_t),
    "sdt_probe.id and dtrace_id_t size mismatch");

/* DTrace methods. */
static void     sdt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
static uint64_t sdt_getargval(void *, dtrace_id_t, void *, int, int);
static void     sdt_provide_probes(void *, dtrace_probedesc_t *);
static void     sdt_destroy(void *, dtrace_id_t, void *);
static void     sdt_enable(void *, dtrace_id_t, void *);
static void     sdt_disable(void *, dtrace_id_t, void *);

static void     sdt_load(void);
static int      sdt_unload(void);
static void     sdt_create_provider(struct sdt_provider *);
static void     sdt_create_probe(struct sdt_probe *);
static void     sdt_init_probe(struct sdt_probe *, linker_file_t);
static void     sdt_kld_load(void *, struct linker_file *);
static void     sdt_kld_unload_try(void *, struct linker_file *, int *);

static MALLOC_DEFINE(M_SDT, "SDT", "DTrace SDT providers");

static int sdt_probes_enabled_count;
static int lockstat_enabled_count;

static dtrace_pattr_t sdt_attr = {
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
};

static dtrace_pops_t sdt_pops = {
        .dtps_provide =         sdt_provide_probes,
        .dtps_provide_module =  NULL,
        .dtps_enable =          sdt_enable,
        .dtps_disable =         sdt_disable,
        .dtps_suspend =         NULL,
        .dtps_resume =          NULL,
        .dtps_getargdesc =      sdt_getargdesc,
        .dtps_getargval =       sdt_getargval,
        .dtps_usermode =        NULL,
        .dtps_destroy =         sdt_destroy,
};

static TAILQ_HEAD(, sdt_provider) sdt_prov_list;

static eventhandler_tag sdt_kld_load_tag;
static eventhandler_tag sdt_kld_unload_try_tag;

static void
sdt_create_provider(struct sdt_provider *prov)
{
        struct sdt_provider *curr, *newprov;

        TAILQ_FOREACH(curr, &sdt_prov_list, prov_entry)
                if (strcmp(prov->name, curr->name) == 0) {
                        /* The provider has already been defined. */
                        curr->sdt_refs++;
                        return;
                }

        /*
         * Make a copy of prov so that we don't lose fields if its module is
         * unloaded but the provider isn't destroyed. This could happen with
         * a provider that spans multiple modules.
         */
        newprov = malloc(sizeof(*newprov), M_SDT, M_WAITOK | M_ZERO);
        newprov->name = strdup(prov->name, M_SDT);
        prov->sdt_refs = newprov->sdt_refs = 1;

        TAILQ_INSERT_TAIL(&sdt_prov_list, newprov, prov_entry);

        (void)dtrace_register(newprov->name, &sdt_attr, DTRACE_PRIV_USER, NULL,
            &sdt_pops, NULL, (dtrace_provider_id_t *)&newprov->id);
        prov->id = newprov->id;
}

static void
sdt_create_probe(struct sdt_probe *probe)
{
        struct sdt_provider *prov;
        char mod[DTRACE_MODNAMELEN];
        char func[DTRACE_FUNCNAMELEN];
        char name[DTRACE_NAMELEN];
        const char *from;
        char *to;
        size_t len;
        int aframes;

        if (probe->version != (int)sizeof(*probe)) {
                printf("ignoring probe %p, version %u expected %u\n",
                    probe, probe->version, (int)sizeof(*probe));
                return;
        }

        TAILQ_FOREACH(prov, &sdt_prov_list, prov_entry)
                if (strcmp(prov->name, probe->prov->name) == 0)
                        break;

        KASSERT(prov != NULL, ("probe defined without a provider"));

        /* If no module name was specified, use the module filename. */
        if (*probe->mod == 0) {
                len = strlcpy(mod, probe->sdtp_lf->filename, sizeof(mod));
                if (len > 3 && strcmp(mod + len - 3, ".ko") == 0)
                        mod[len - 3] = '\0';
        } else
                strlcpy(mod, probe->mod, sizeof(mod));

        /*
         * Unfortunately this is necessary because the Solaris DTrace
         * code mixes consts and non-consts with casts to override
         * the incompatibilies. On FreeBSD, we use strict warnings
         * in the C compiler, so we have to respect const vs non-const.
         */
        strlcpy(func, probe->func, sizeof(func));
        if (func[0] == '\0')
                strcpy(func, "none");

        from = probe->name;
        to = name;
        for (len = 0; len < (sizeof(name) - 1) && *from != '\0';
            len++, from++, to++) {
                if (from[0] == '_' && from[1] == '_') {
                        *to = '-';
                        from++;
                } else
                        *to = *from;
        }
        *to = '\0';

        if (dtrace_probe_lookup(prov->id, mod, func, name) != DTRACE_IDNONE)
                return;

        aframes = 1; /* unwind past sdt_probe() */
        if (strcmp(prov->name, "lockstat") == 0) {
                /*
                 * Locking primitives instrumented by lockstat automatically
                 * disable inlining.  Step forward an extra frame so that DTrace
                 * variables like "caller" provide the function trying to
                 * acquire or release the lock rather than an internal function.
                 */
                aframes++;
        }
        (void)dtrace_probe_create(prov->id, mod, func, name, aframes, probe);
}

static void
sdt_init_probe(struct sdt_probe *probe, linker_file_t lf)
{
        probe->sdtp_lf = lf;
        TAILQ_INIT(&probe->argtype_list);
        STAILQ_INIT(&probe->tracepoint_list);
}

/*
 * Probes are created through the SDT module load/unload hook, so this function
 * has nothing to do. It only exists because the DTrace provider framework
 * requires one of provide_probes and provide_module to be defined.
 */
static void
sdt_provide_probes(void *arg, dtrace_probedesc_t *desc)
{
}

struct sdt_enable_cb_arg {
        struct sdt_probe *probe;
        int cpu;
        int arrived;
        int done;
        bool enable;
};

static void
sdt_probe_update_cb(void *_arg)
{
        struct sdt_enable_cb_arg *arg;
        struct sdt_tracepoint *tp;

        arg = _arg;
        if (arg->cpu != curcpu) {
                atomic_add_rel_int(&arg->arrived, 1);
                while (atomic_load_acq_int(&arg->done) == 0)
                        cpu_spinwait();
                return;
        } else {
                while (atomic_load_acq_int(&arg->arrived) != mp_ncpus - 1)
                        cpu_spinwait();
        }

        STAILQ_FOREACH(tp, &arg->probe->tracepoint_list, tracepoint_entry) {
                if (arg->enable)
                        sdt_tracepoint_patch(tp->patchpoint, tp->target);
                else
                        sdt_tracepoint_restore(tp->patchpoint);
        }

        atomic_store_rel_int(&arg->done, 1);
}

static void
sdt_probe_update(struct sdt_probe *probe, bool enable)
{
        struct sdt_enable_cb_arg cbarg;

        sched_pin();
        cbarg.probe = probe;
        cbarg.cpu = curcpu;
        atomic_store_rel_int(&cbarg.arrived, 0);
        atomic_store_rel_int(&cbarg.done, 0);
        cbarg.enable = enable;
        smp_rendezvous(NULL, sdt_probe_update_cb, NULL, &cbarg);
        sched_unpin();
}

static void
sdt_enable(void *arg __unused, dtrace_id_t id, void *parg)
{
        struct sdt_probe *probe;

        probe = parg;

        probe->id = id;
        probe->sdtp_lf->nenabled++;
        if (strcmp(probe->prov->name, "lockstat") == 0) {
                lockstat_enabled_count++;
                if (lockstat_enabled_count == 1)
                        lockstat_enabled = true;
        }
        sdt_probes_enabled_count++;
        if (sdt_probes_enabled_count == 1)
                sdt_probes_enabled = true;

        sdt_probe_update(probe, true);
}

static void
sdt_disable(void *arg __unused, dtrace_id_t id, void *parg)
{
        struct sdt_probe *probe;

        probe = parg;
        KASSERT(probe->sdtp_lf->nenabled > 0, ("no probes enabled"));

        sdt_probe_update(probe, false);

        sdt_probes_enabled_count--;
        if (sdt_probes_enabled_count == 0)
                sdt_probes_enabled = false;
        if (strcmp(probe->prov->name, "lockstat") == 0) {
                lockstat_enabled_count--;
                if (lockstat_enabled_count == 0)
                        lockstat_enabled = false;
        }
        probe->id = 0;
        probe->sdtp_lf->nenabled--;
}

static void
sdt_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc)
{
        struct sdt_argtype *argtype;
        struct sdt_probe *probe = parg;

        if (desc->dtargd_ndx >= probe->n_args) {
                desc->dtargd_ndx = DTRACE_ARGNONE;
                return;
        }

        TAILQ_FOREACH(argtype, &probe->argtype_list, argtype_entry) {
                if (desc->dtargd_ndx == argtype->ndx) {
                        desc->dtargd_mapping = desc->dtargd_ndx;
                        if (argtype->type == NULL) {
                                desc->dtargd_native[0] = '\0';
                                desc->dtargd_xlate[0] = '\0';
                                continue;
                        }
                        strlcpy(desc->dtargd_native, argtype->type,
                            sizeof(desc->dtargd_native));
                        if (argtype->xtype != NULL)
                                strlcpy(desc->dtargd_xlate, argtype->xtype,
                                    sizeof(desc->dtargd_xlate));
                }
        }
}

/*
 * Fetch arguments beyond the first five passed directly to dtrace_probe().
 * FreeBSD's SDT implement currently only supports up to 6 arguments, so we just
 * need to handle arg5 here.
 */
static uint64_t
sdt_getargval(void *arg __unused, dtrace_id_t id __unused,
    void *parg __unused, int argno, int aframes __unused)
{
        if (argno != 5) {
                DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
                return (0);
        } else {
                return (curthread->t_dtrace_sdt_arg[argno - 5]);
        }
}

static void
sdt_destroy(void *arg, dtrace_id_t id, void *parg)
{
}

static void
sdt_kld_load_providers(struct linker_file *lf)
{
        struct sdt_provider **prov, **begin, **end;
        struct sdt_probe **p_begin, **p_end;

        if (linker_file_lookup_set(lf, "sdt_providers_set", &begin, &end,
            NULL) == 0) {
                for (prov = begin; prov < end; prov++)
                        sdt_create_provider(*prov);
        }

        if (linker_file_lookup_set(lf, "sdt_probes_set", &p_begin, &p_end,
            NULL) == 0) {
                for (struct sdt_probe **probe = p_begin; probe < p_end; probe++)
                        sdt_init_probe(*probe, lf);
        }
}

static void
sdt_kld_load_probes(struct linker_file *lf)
{
        struct sdt_probe **p_begin, **p_end;
        struct sdt_argtype **a_begin, **a_end;
        struct sdt_tracepoint *tp_begin, *tp_end;

        if (linker_file_lookup_set(lf, "sdt_probes_set", &p_begin, &p_end,
            NULL) == 0) {
                for (struct sdt_probe **probe = p_begin; probe < p_end; probe++)
                        sdt_create_probe(*probe);
        }

        if (linker_file_lookup_set(lf, "sdt_argtypes_set", &a_begin, &a_end,
            NULL) == 0) {
                for (struct sdt_argtype **argtype = a_begin; argtype < a_end;
                    argtype++) {
                        (*argtype)->probe->n_args++;
                        TAILQ_INSERT_TAIL(&(*argtype)->probe->argtype_list,
                            *argtype, argtype_entry);
                }
        }

        if (linker_file_lookup_set(lf, __XSTRING(_SDT_TRACEPOINT_SET),
            &tp_begin, &tp_end, NULL) == 0) {
                for (struct sdt_tracepoint *tp = tp_begin; tp < tp_end; tp++) {
                        if (!sdt_tracepoint_valid(tp->patchpoint, tp->target)) {
                                printf(
                            "invalid tracepoint %#jx->%#jx for %s:%s:%s:%s\n",
                                    (uintmax_t)tp->patchpoint,
                                    (uintmax_t)tp->target,
                                    tp->probe->prov->name, tp->probe->mod,
                                    tp->probe->func, tp->probe->name);
                                continue;
                        }
                        STAILQ_INSERT_TAIL(&tp->probe->tracepoint_list, tp,
                            tracepoint_entry);
                }
        }
}

/*
 * Called from the kernel linker when a module is loaded, before
 * dtrace_module_loaded() is called. This is done so that it's possible to
 * register new providers when modules are loaded. The DTrace framework
 * explicitly disallows calling into the framework from the provide_module
 * provider method, so we cannot do this there.
 */
static void
sdt_kld_load(void *arg __unused, struct linker_file *lf)
{
        sdt_kld_load_providers(lf);
        sdt_kld_load_probes(lf);
}

static bool
sdt_kld_unload_providers(struct linker_file *lf)
{
        struct sdt_provider *prov, **curr, **begin, **end, *tmp;

        if (linker_file_lookup_set(lf, "sdt_providers_set", &begin, &end,
            NULL))
                /* No DTrace providers are declared in this file. */
                return (true);

        /*
         * Go through all the providers declared in this linker file and
         * unregister any that aren't declared in another loaded file.
         */
        for (curr = begin; curr < end; curr++) {
                TAILQ_FOREACH_SAFE(prov, &sdt_prov_list, prov_entry, tmp) {
                        if (strcmp(prov->name, (*curr)->name) != 0)
                                continue;

                        if (prov->sdt_refs == 1) {
                                if (dtrace_unregister(prov->id) != 0) {
                                        return (false);
                                }
                                TAILQ_REMOVE(&sdt_prov_list, prov, prov_entry);
                                free(prov->name, M_SDT);
                                free(prov, M_SDT);
                        } else
                                prov->sdt_refs--;
                        break;
                }
        }

        return (true);
}

static bool
sdt_kld_unload_probes(struct linker_file *lf)
{
        struct sdt_probe **p_begin, **p_end;
        struct sdt_argtype **a_begin, **a_end;
        struct sdt_tracepoint *tp_begin, *tp_end;

        if (linker_file_lookup_set(lf, __XSTRING(_SDT_TRACEPOINT_SET),
            &tp_begin, &tp_end, NULL) == 0) {
                for (struct sdt_tracepoint *tp = tp_begin; tp < tp_end; tp++) {
                        struct sdt_tracepoint *tp2;

                        if (!sdt_tracepoint_valid(tp->patchpoint, tp->target))
                                continue;

                        /* Only remove the entry if it is in the list. */
                        tp2 = STAILQ_FIRST(&tp->probe->tracepoint_list);
                        if (tp2 == tp) {
                                STAILQ_REMOVE_HEAD(&tp->probe->tracepoint_list,
                                    tracepoint_entry);
                        } else if (tp2 != NULL) {
                                struct sdt_tracepoint *tp3;

                                for (;;) {
                                        tp3 = STAILQ_NEXT(tp2,
                                            tracepoint_entry);
                                        if (tp3 == NULL)
                                                break;
                                        if (tp3 == tp) {
                                                STAILQ_REMOVE_AFTER(
                                                    &tp->probe->tracepoint_list,
                                                    tp2, tracepoint_entry);
                                                break;
                                        }
                                        tp2 = tp3;
                                }
                        }
                }
        }

        if (linker_file_lookup_set(lf, "sdt_argtypes_set", &a_begin, &a_end,
            NULL) == 0) {
                for (struct sdt_argtype **argtype = a_begin; argtype < a_end;
                    argtype++) {
                        struct sdt_argtype *argtype2;

                        /* Only remove the entry if it is in the list. */
                        TAILQ_FOREACH(argtype2,
                            &(*argtype)->probe->argtype_list, argtype_entry) {
                                if (argtype2 == *argtype) {
                                        (*argtype)->probe->n_args--;
                                        TAILQ_REMOVE(
                                            &(*argtype)->probe->argtype_list,
                                            *argtype, argtype_entry);
                                        break;
                                }
                        }
                }
        }

        if (linker_file_lookup_set(lf, "sdt_probes_set", &p_begin, &p_end,
            NULL) == 0) {
                for (struct sdt_probe **probe = p_begin; probe < p_end;
                    probe++) {
                        if ((*probe)->sdtp_lf == lf) {
                                if (!TAILQ_EMPTY(&(*probe)->argtype_list))
                                        return (false);
                                if (!STAILQ_EMPTY(&(*probe)->tracepoint_list))
                                        return (false);

                                /*
                                 * Don't destroy the probe as there
                                 * might be multiple instances of the
                                 * same probe in different modules.
                                 */
                        }
                }
        }

        return (true);
}

static void
sdt_kld_unload_try(void *arg __unused, struct linker_file *lf, int *error)
{
        if (*error != 0)
                /* We already have an error, so don't do anything. */
                return;

        if (!sdt_kld_unload_probes(lf))
                *error = 1;
        else if (!sdt_kld_unload_providers(lf))
                *error = 1;
}

static int
sdt_load_providers_cb(linker_file_t lf, void *arg __unused)
{
        sdt_kld_load_providers(lf);
        return (0);
}

static int
sdt_load_probes_cb(linker_file_t lf, void *arg __unused)
{
        sdt_kld_load_probes(lf);
        return (0);
}

static void
sdt_dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
    uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
{
        curthread->t_dtrace_sdt_arg[0] = arg5;
        dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
}

static void
sdt_load(void)
{

        TAILQ_INIT(&sdt_prov_list);

        sdt_probe_func = sdt_dtrace_probe;

        sdt_kld_load_tag = EVENTHANDLER_REGISTER(kld_load, sdt_kld_load, NULL,
            EVENTHANDLER_PRI_ANY);
        sdt_kld_unload_try_tag = EVENTHANDLER_REGISTER(kld_unload_try,
            sdt_kld_unload_try, NULL, EVENTHANDLER_PRI_ANY);

        /*
         * Pick up probes from the kernel and already-loaded linker files.
         * Define providers in a separate pass since a linker file may be using
         * providers defined in a file that appears later in the list.
         */
        linker_file_foreach(sdt_load_providers_cb, NULL);
        linker_file_foreach(sdt_load_probes_cb, NULL);
}

static int
sdt_unload(void)
{
        struct sdt_provider *prov, *tmp;
        int ret;

        EVENTHANDLER_DEREGISTER(kld_load, sdt_kld_load_tag);
        EVENTHANDLER_DEREGISTER(kld_unload_try, sdt_kld_unload_try_tag);

        sdt_probe_func = sdt_probe_stub;

        TAILQ_FOREACH_SAFE(prov, &sdt_prov_list, prov_entry, tmp) {
                ret = dtrace_unregister(prov->id);
                if (ret != 0)
                        return (ret);
                TAILQ_REMOVE(&sdt_prov_list, prov, prov_entry);
                free(prov->name, M_SDT);
                free(prov, M_SDT);
        }

        return (0);
}

static int
sdt_modevent(module_t mod __unused, int type, void *data __unused)
{
        switch (type) {
        case MOD_LOAD:
        case MOD_UNLOAD:
        case MOD_SHUTDOWN:
                return (0);
        default:
                return (EOPNOTSUPP);
        }
}

SYSINIT(sdt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, sdt_load, NULL);
SYSUNINIT(sdt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, sdt_unload, NULL);

DEV_MODULE(sdt, sdt_modevent, NULL);
MODULE_VERSION(sdt, 1);
MODULE_DEPEND(sdt, dtrace, 1, 1, 1);
MODULE_DEPEND(sdt, opensolaris, 1, 1, 1);