root/sys/kern/kern_devctl.c
/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c) 2002-2020 M. Warner Losh <imp@FreeBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
#include <sys/cdefs.h>
#include "opt_bus.h"
#include "opt_ddb.h"

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/eventhandler.h>
#include <sys/filio.h>
#include <sys/lock.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/poll.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/condvar.h>
#include <sys/queue.h>
#include <machine/bus.h>
#include <sys/sbuf.h>
#include <sys/selinfo.h>
#include <sys/smp.h>
#include <sys/stdarg.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/uio.h>
#include <sys/bus.h>

#include <machine/cpu.h>

#include <vm/uma.h>
#include <vm/vm.h>

#include <ddb/ddb.h>

STAILQ_HEAD(devq, dev_event_info);

static struct dev_softc {
        int             inuse;
        int             nonblock;
        int             queued;
        int             async;
        struct mtx      mtx;
        struct cv       cv;
        struct selinfo  sel;
        struct devq     devq;
        struct sigio    *sigio;
        uma_zone_t      zone;
} devsoftc;

/*
 * This design allows only one reader for /dev/devctl.  This is not desirable
 * in the long run, but will get a lot of hair out of this implementation.
 * Maybe we should make this device a clonable device.
 *
 * Also note: we specifically do not attach a device to the device_t tree
 * to avoid potential chicken and egg problems.  One could argue that all
 * of this belongs to the root node.
 */

#define DEVCTL_DEFAULT_QUEUE_LEN 1000
static int sysctl_devctl_queue(SYSCTL_HANDLER_ARGS);
static int devctl_queue_length = DEVCTL_DEFAULT_QUEUE_LEN;
SYSCTL_PROC(_hw_bus, OID_AUTO, devctl_queue, CTLTYPE_INT | CTLFLAG_RWTUN |
    CTLFLAG_MPSAFE, NULL, 0, sysctl_devctl_queue, "I", "devctl queue length");
static bool nomatch_enabled = true;
SYSCTL_BOOL(_hw_bus, OID_AUTO, devctl_nomatch_enabled, CTLFLAG_RWTUN,
    &nomatch_enabled, 0, "enable nomatch events");

static void devctl_attach_handler(void *arg __unused, device_t dev);
static void devctl_detach_handler(void *arg __unused, device_t dev,
    enum evhdev_detach state);
static void devctl_nomatch_handler(void *arg __unused, device_t dev);

static d_open_t         devopen;
static d_close_t        devclose;
static d_read_t         devread;
static d_ioctl_t        devioctl;
static d_poll_t         devpoll;
static d_kqfilter_t     devkqfilter;

#define DEVCTL_BUFFER (1024 - sizeof(void *))
struct dev_event_info {
        STAILQ_ENTRY(dev_event_info) dei_link;
        char dei_data[DEVCTL_BUFFER];
};


static struct cdevsw dev_cdevsw = {
        .d_version =    D_VERSION,
        .d_open =       devopen,
        .d_close =      devclose,
        .d_read =       devread,
        .d_ioctl =      devioctl,
        .d_poll =       devpoll,
        .d_kqfilter =   devkqfilter,
        .d_name =       "devctl",
};

static void     filt_devctl_detach(struct knote *kn);
static int      filt_devctl_read(struct knote *kn, long hint);

static const struct filterops devctl_rfiltops = {
        .f_isfd = 1,
        .f_detach = filt_devctl_detach,
        .f_event = filt_devctl_read,
        .f_copy = knote_triv_copy,
};

static struct cdev *devctl_dev;
static void devaddq(const char *type, const char *what, device_t dev);

static struct devctlbridge {
        send_event_f *send_f;
} devctl_notify_hook = { .send_f = NULL };

static void
devctl_init(void *dummy __unused)
{
        int reserve;
        uma_zone_t z;

        devctl_dev = make_dev_credf(MAKEDEV_ETERNAL, &dev_cdevsw, 0, NULL,
            UID_ROOT, GID_WHEEL, 0600, "devctl");
        mtx_init(&devsoftc.mtx, "dev mtx", "devd", MTX_DEF);
        cv_init(&devsoftc.cv, "dev cv");
        STAILQ_INIT(&devsoftc.devq);
        knlist_init_mtx(&devsoftc.sel.si_note, &devsoftc.mtx);
        if (devctl_queue_length > 0) {
                /*
                 * Allocate a zone for the messages. Preallocate 2% of these for
                 * a reserve. Allow only devctl_queue_length slabs to cap memory
                 * usage.  The reserve usually allows coverage of surges of
                 * events during memory shortages. Normally we won't have to
                 * re-use events from the queue, but will in extreme shortages.
                 */
                z = devsoftc.zone = uma_zcreate("DEVCTL",
                    sizeof(struct dev_event_info), NULL, NULL, NULL, NULL,
                    UMA_ALIGN_PTR, 0);
                reserve = max(devctl_queue_length / 50, 100);   /* 2% reserve */
                uma_zone_set_max(z, devctl_queue_length);
                uma_zone_set_maxcache(z, 0);
                uma_zone_reserve(z, reserve);
                uma_prealloc(z, reserve);
        }
        EVENTHANDLER_REGISTER(device_attach, devctl_attach_handler,
            NULL, EVENTHANDLER_PRI_LAST);
        EVENTHANDLER_REGISTER(device_detach, devctl_detach_handler,
            NULL, EVENTHANDLER_PRI_LAST);
        EVENTHANDLER_REGISTER(device_nomatch, devctl_nomatch_handler,
            NULL, EVENTHANDLER_PRI_LAST);
}
SYSINIT(devctl_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, devctl_init, NULL);

/*
 * A device was added to the tree.  We are called just after it successfully
 * attaches (that is, probe and attach success for this device).  No call
 * is made if a device is merely parented into the tree.  See devnomatch
 * if probe fails.  If attach fails, no notification is sent (but maybe
 * we should have a different message for this).
 */
static void
devctl_attach_handler(void *arg __unused, device_t dev)
{
        devaddq("+", device_get_nameunit(dev), dev);
}

/*
 * A device was removed from the tree.  We are called just before this
 * happens.
 */
static void
devctl_detach_handler(void *arg __unused, device_t dev, enum evhdev_detach state)
{
        if (state == EVHDEV_DETACH_COMPLETE)
                devaddq("-", device_get_nameunit(dev), dev);
}

/*
 * Called when there's no match for this device.  This is only called
 * the first time that no match happens, so we don't keep getting this
 * message.  Should that prove to be undesirable, we can change it.
 * This is called when all drivers that can attach to a given bus
 * decline to accept this device.  Other errors may not be detected.
 */
static void
devctl_nomatch_handler(void *arg __unused, device_t dev)
{
        if (nomatch_enabled)
                devaddq("?", "", dev);
}

static int
devopen(struct cdev *dev, int oflags, int devtype, struct thread *td)
{
        mtx_lock(&devsoftc.mtx);
        if (devsoftc.inuse) {
                mtx_unlock(&devsoftc.mtx);
                return (EBUSY);
        }
        /* move to init */
        devsoftc.inuse = 1;
        mtx_unlock(&devsoftc.mtx);
        return (0);
}

static int
devclose(struct cdev *dev, int fflag, int devtype, struct thread *td)
{
        mtx_lock(&devsoftc.mtx);
        devsoftc.inuse = 0;
        devsoftc.nonblock = 0;
        devsoftc.async = 0;
        cv_broadcast(&devsoftc.cv);
        funsetown(&devsoftc.sigio);
        mtx_unlock(&devsoftc.mtx);
        return (0);
}

/*
 * The read channel for this device is used to report changes to
 * userland in realtime.  We are required to free the data as well as
 * the n1 object because we allocate them separately.  Also note that
 * we return one record at a time.  If you try to read this device a
 * character at a time, you will lose the rest of the data.  Listening
 * programs are expected to cope.
 */
static int
devread(struct cdev *dev, struct uio *uio, int ioflag)
{
        struct dev_event_info *n1;
        int rv;

        mtx_lock(&devsoftc.mtx);
        while (STAILQ_EMPTY(&devsoftc.devq)) {
                if (devsoftc.nonblock) {
                        mtx_unlock(&devsoftc.mtx);
                        return (EAGAIN);
                }
                rv = cv_wait_sig(&devsoftc.cv, &devsoftc.mtx);
                if (rv) {
                        /*
                         * Need to translate ERESTART to EINTR here? -- jake
                         */
                        mtx_unlock(&devsoftc.mtx);
                        return (rv);
                }
        }
        n1 = STAILQ_FIRST(&devsoftc.devq);
        STAILQ_REMOVE_HEAD(&devsoftc.devq, dei_link);
        devsoftc.queued--;
        mtx_unlock(&devsoftc.mtx);
        rv = uiomove(n1->dei_data, strlen(n1->dei_data), uio);
        uma_zfree(devsoftc.zone, n1);
        return (rv);
}

static  int
devioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
{
        switch (cmd) {
        case FIONBIO:
                if (*(int*)data)
                        devsoftc.nonblock = 1;
                else
                        devsoftc.nonblock = 0;
                return (0);
        case FIOASYNC:
                if (*(int*)data)
                        devsoftc.async = 1;
                else
                        devsoftc.async = 0;
                return (0);
        case FIOSETOWN:
                return fsetown(*(int *)data, &devsoftc.sigio);
        case FIOGETOWN:
                *(int *)data = fgetown(&devsoftc.sigio);
                return (0);

                /* (un)Support for other fcntl() calls. */
        case FIOCLEX:
        case FIONCLEX:
        case FIONREAD:
        default:
                break;
        }
        return (ENOTTY);
}

static  int
devpoll(struct cdev *dev, int events, struct thread *td)
{
        int     revents = 0;

        mtx_lock(&devsoftc.mtx);
        if (events & (POLLIN | POLLRDNORM)) {
                if (!STAILQ_EMPTY(&devsoftc.devq))
                        revents = events & (POLLIN | POLLRDNORM);
                else
                        selrecord(td, &devsoftc.sel);
        }
        mtx_unlock(&devsoftc.mtx);

        return (revents);
}

static int
devkqfilter(struct cdev *dev, struct knote *kn)
{
        int error;

        if (kn->kn_filter == EVFILT_READ) {
                kn->kn_fop = &devctl_rfiltops;
                knlist_add(&devsoftc.sel.si_note, kn, 0);
                error = 0;
        } else
                error = EINVAL;
        return (error);
}

static void
filt_devctl_detach(struct knote *kn)
{
        knlist_remove(&devsoftc.sel.si_note, kn, 0);
}

static int
filt_devctl_read(struct knote *kn, long hint)
{
        kn->kn_data = devsoftc.queued;
        return (kn->kn_data != 0);
}

/**
 * @brief Return whether the userland process is running
 */
bool
devctl_process_running(void)
{
        return (devsoftc.inuse == 1);
}

static struct dev_event_info *
devctl_alloc_dei(void)
{
        struct dev_event_info *dei = NULL;

        mtx_lock(&devsoftc.mtx);
        if (devctl_queue_length == 0)
                goto out;
        dei = uma_zalloc(devsoftc.zone, M_NOWAIT);
        if (dei == NULL)
                dei = uma_zalloc(devsoftc.zone, M_NOWAIT | M_USE_RESERVE);
        if (dei == NULL) {
                /*
                 * Guard against no items in the queue. Normally, this won't
                 * happen, but if lots of events happen all at once and there's
                 * a chance we're out of allocated space but none have yet been
                 * queued when we get here, leaving nothing to steal. This can
                 * also happen with error injection. Fail safe by returning
                 * NULL in that case..
                 */
                if (devsoftc.queued == 0)
                        goto out;
                dei = STAILQ_FIRST(&devsoftc.devq);
                STAILQ_REMOVE_HEAD(&devsoftc.devq, dei_link);
                devsoftc.queued--;
        }
        MPASS(dei != NULL);
        *dei->dei_data = '\0';
out:
        mtx_unlock(&devsoftc.mtx);
        return (dei);
}

static struct dev_event_info *
devctl_alloc_dei_sb(struct sbuf *sb)
{
        struct dev_event_info *dei;

        dei = devctl_alloc_dei();
        if (dei != NULL)
                sbuf_new(sb, dei->dei_data, sizeof(dei->dei_data), SBUF_FIXEDLEN);
        return (dei);
}

static void
devctl_free_dei(struct dev_event_info *dei)
{
        uma_zfree(devsoftc.zone, dei);
}

static void
devctl_queue(struct dev_event_info *dei)
{
        mtx_lock(&devsoftc.mtx);
        STAILQ_INSERT_TAIL(&devsoftc.devq, dei, dei_link);
        devsoftc.queued++;
        cv_broadcast(&devsoftc.cv);
        KNOTE_LOCKED(&devsoftc.sel.si_note, 0);
        mtx_unlock(&devsoftc.mtx);
        selwakeup(&devsoftc.sel);
        if (devsoftc.async && devsoftc.sigio != NULL)
                pgsigio(&devsoftc.sigio, SIGIO, 0);
}

/**
 * @brief Send a 'notification' to userland, using standard ways
 */
void
devctl_notify(const char *system, const char *subsystem, const char *type,
    const char *data)
{
        struct dev_event_info *dei;
        struct sbuf sb;

        if (system == NULL || subsystem == NULL || type == NULL)
                return;
        if (devctl_notify_hook.send_f != NULL)
                devctl_notify_hook.send_f(system, subsystem, type, data);
        dei = devctl_alloc_dei_sb(&sb);
        if (dei == NULL)
                return;
        sbuf_cpy(&sb, "!system=");
        sbuf_cat(&sb, system);
        sbuf_cat(&sb, " subsystem=");
        sbuf_cat(&sb, subsystem);
        sbuf_cat(&sb, " type=");
        sbuf_cat(&sb, type);
        if (data != NULL) {
                sbuf_putc(&sb, ' ');
                sbuf_cat(&sb, data);
        }
        sbuf_putc(&sb, '\n');
        if (sbuf_finish(&sb) != 0)
                devctl_free_dei(dei);   /* overflow -> drop it */
        else
                devctl_queue(dei);
}

/*
 * Common routine that tries to make sending messages as easy as possible.
 * We allocate memory for the data, copy strings into that, but do not
 * free it unless there's an error.  The dequeue part of the driver should
 * free the data.  We don't send data when the device is disabled.  We do
 * send data, even when we have no listeners, because we wish to avoid
 * races relating to startup and restart of listening applications.
 *
 * devaddq is designed to string together the type of event, with the
 * object of that event, plus the plug and play info and location info
 * for that event.  This is likely most useful for devices, but less
 * useful for other consumers of this interface.  Those should use
 * the devctl_notify() interface instead.
 *
 * Output:
 *      ${type}${what} at $(location dev) $(pnp-info dev) on $(parent dev)
 */
static void
devaddq(const char *type, const char *what, device_t dev)
{
        struct dev_event_info *dei;
        const char *parstr;
        struct sbuf sb;
        size_t beginlen;

        dei = devctl_alloc_dei_sb(&sb);
        if (dei == NULL)
                return;
        sbuf_cpy(&sb, type);
        sbuf_cat(&sb, what);
        sbuf_cat(&sb, " at ");
        beginlen = sbuf_len(&sb);

        /* Add in the location */
        bus_child_location(dev, &sb);
        sbuf_putc(&sb, ' ');

        /* Add in pnpinfo */
        bus_child_pnpinfo(dev, &sb);

        /* Get the parent of this device, or / if high enough in the tree. */
        if (device_get_parent(dev) == NULL)
                parstr = ".";   /* Or '/' ? */
        else
                parstr = device_get_nameunit(device_get_parent(dev));
        sbuf_cat(&sb, " on ");
        sbuf_cat(&sb, parstr);
        sbuf_putc(&sb, '\n');
        if (sbuf_finish(&sb) != 0)
                goto bad;
        if (devctl_notify_hook.send_f != NULL) {
                const char *t;

                switch (*type) {
                case '+':
                        t = "ATTACH";
                        break;
                case '-':
                        t = "DETACH";
                        break;
                default:
                        t = "NOMATCH";
                        break;
                }
                devctl_notify_hook.send_f("device",
                    what, t, sbuf_data(&sb) + beginlen);
        }
        devctl_queue(dei);
        return;
bad:
        devctl_free_dei(dei);
}

static int
sysctl_devctl_queue(SYSCTL_HANDLER_ARGS)
{
        int q, error;

        q = devctl_queue_length;
        error = sysctl_handle_int(oidp, &q, 0, req);
        if (error || !req->newptr)
                return (error);
        if (q < 0)
                return (EINVAL);

        /*
         * When set as a tunable, we've not yet initialized the mutex.
         * It is safe to just assign to devctl_queue_length and return
         * as we're racing no one. We'll use whatever value set in
         * devinit.
         */
        if (!mtx_initialized(&devsoftc.mtx)) {
                devctl_queue_length = q;
                return (0);
        }

        /*
         * XXX It's hard to grow or shrink the UMA zone. Only allow
         * disabling the queue size for the moment until underlying
         * UMA issues can be sorted out.
         */
        if (q != 0)
                return (EINVAL);
        if (q == devctl_queue_length)
                return (0);
        mtx_lock(&devsoftc.mtx);
        devctl_queue_length = 0;
        uma_zdestroy(devsoftc.zone);
        devsoftc.zone = 0;
        mtx_unlock(&devsoftc.mtx);
        return (0);
}

/**
 * @brief safely quotes strings that might have double quotes in them.
 *
 * The devctl protocol relies on quoted strings having matching quotes.
 * This routine quotes any internal quotes so the resulting string
 * is safe to pass to snprintf to construct, for example pnp info strings.
 *
 * @param sb    sbuf to place the characters into
 * @param src   Original buffer.
 */
void
devctl_safe_quote_sb(struct sbuf *sb, const char *src)
{
        while (*src != '\0') {
                if (*src == '"' || *src == '\\')
                        sbuf_putc(sb, '\\');
                sbuf_putc(sb, *src++);
        }
}

void
devctl_set_notify_hook(send_event_f *hook)
{
        devctl_notify_hook.send_f = hook;
}

void
devctl_unset_notify_hook(void)
{
        devctl_notify_hook.send_f = NULL;
}