#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/signalfd.h>
#include <sys/conf.h>
#include <sys/sysmacros.h>
#include <sys/filio.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/schedctl.h>
#include <sys/id_space.h>
#include <sys/sdt.h>
#include <sys/disp.h>
#include <sys/taskq_impl.h>
#include <sys/condvar.h>
#include <sys/stdbool.h>
typedef struct signalfd_state {
kmutex_t sfd_lock;
list_t sfd_pollers;
k_sigset_t sfd_mask;
minor_t sfd_minor;
} signalfd_state_t;
typedef struct signalfd_poller {
list_node_t sp_state_node;
list_node_t sp_proc_node;
pollhead_t sp_pollhead;
signalfd_state_t *sp_state;
proc_t *sp_proc;
kmutex_t sp_lock;
kcondvar_t sp_cv;
short sp_pollev;
bool sp_pending;
taskq_ent_t sp_taskent;
k_sigset_t sp_mask;
} signalfd_poller_t;
static dev_info_t *signalfd_devi;
static id_space_t *signalfd_minors;
static void *signalfd_softstate;
static taskq_t *signalfd_wakeq;
static void
signalfd_proc_clean(proc_t *p)
{
sigfd_proc_state_t *pstate = p->p_sigfd;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(pstate != NULL);
VERIFY(list_is_empty(&pstate->sigfd_list));
p->p_sigfd = NULL;
list_destroy(&pstate->sigfd_list);
kmem_free(pstate, sizeof (*pstate));
}
static void
signalfd_wake_task(void *arg)
{
signalfd_poller_t *sp = arg;
mutex_enter(&sp->sp_lock);
VERIFY(sp->sp_pollev != 0);
VERIFY(sp->sp_pending);
do {
const short pollev = sp->sp_pollev;
const bool is_err = (pollev & POLLERR) != 0;
sp->sp_pollev = 0;
mutex_exit(&sp->sp_lock);
pollwakeup(&sp->sp_pollhead, pollev);
if (is_err) {
pollhead_clean(&sp->sp_pollhead);
}
mutex_enter(&sp->sp_lock);
if (is_err) {
break;
}
} while (sp->sp_pollev != 0);
sp->sp_pending = false;
cv_signal(&sp->sp_cv);
mutex_exit(&sp->sp_lock);
}
static void
signalfd_poller_wake(signalfd_poller_t *sp, short ev)
{
ASSERT(MUTEX_HELD(&sp->sp_lock));
sp->sp_pollev |= ev;
if (!sp->sp_pending) {
sp->sp_pending = true;
taskq_dispatch_ent(signalfd_wakeq, signalfd_wake_task, sp, 0,
&sp->sp_taskent);
}
}
static void
signalfd_pollwake_cb(void *arg0, int sig)
{
proc_t *p = (proc_t *)arg0;
sigfd_proc_state_t *pstate = (sigfd_proc_state_t *)p->p_sigfd;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(pstate != NULL);
list_t *pollers = &pstate->sigfd_list;
for (signalfd_poller_t *sp = list_head(pollers); sp != NULL;
sp = list_next(pollers, sp)) {
mutex_enter(&sp->sp_lock);
if (sigismember(&sp->sp_mask, sig)) {
signalfd_poller_wake(sp, POLLRDNORM | POLLIN);
}
mutex_exit(&sp->sp_lock);
}
}
static sigfd_proc_state_t *
signalfd_proc_pstate(proc_t *p)
{
ASSERT(MUTEX_HELD(&p->p_lock));
sigfd_proc_state_t *pstate = p->p_sigfd;
if (pstate == NULL) {
mutex_exit(&p->p_lock);
pstate = kmem_zalloc(sizeof (*pstate), KM_SLEEP);
list_create(&pstate->sigfd_list,
sizeof (signalfd_poller_t),
offsetof(signalfd_poller_t, sp_proc_node));
pstate->sigfd_pollwake_cb = signalfd_pollwake_cb;
mutex_enter(&p->p_lock);
if (p->p_sigfd == NULL) {
p->p_sigfd = pstate;
} else {
list_destroy(&pstate->sigfd_list);
kmem_free(pstate, sizeof (*pstate));
pstate = p->p_sigfd;
}
}
return (pstate);
}
static signalfd_poller_t *
signalfd_poller_associate(signalfd_state_t *state, proc_t *p)
{
sigfd_proc_state_t *pstate;
list_t *pollers;
signalfd_poller_t *sp;
ASSERT(MUTEX_HELD(&state->sfd_lock));
mutex_enter(&p->p_lock);
pstate = signalfd_proc_pstate(p);
pollers = &pstate->sigfd_list;
for (sp = list_head(pollers); sp != NULL; sp = list_next(pollers, sp)) {
if (sp->sp_state == state) {
mutex_exit(&p->p_lock);
return (sp);
}
}
mutex_exit(&p->p_lock);
sp = kmem_zalloc(sizeof (*sp), KM_SLEEP);
mutex_init(&sp->sp_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&sp->sp_cv, NULL, CV_DEFAULT, NULL);
sigorset(&sp->sp_mask, &state->sfd_mask);
sp->sp_state = state;
sp->sp_proc = p;
mutex_enter(&p->p_lock);
pstate = signalfd_proc_pstate(p);
list_insert_tail(&pstate->sigfd_list, sp);
list_insert_tail(&state->sfd_pollers, sp);
mutex_exit(&p->p_lock);
return (sp);
}
static void
signalfd_pollers_dissociate(signalfd_state_t *state)
{
ASSERT(MUTEX_HELD(&state->sfd_lock));
mutex_enter(&pidlock);
signalfd_poller_t *sp;
list_t *pollers = &state->sfd_pollers;
for (sp = list_head(pollers); sp != NULL; sp = list_next(pollers, sp)) {
proc_t *p = sp->sp_proc;
if (p == NULL) {
continue;
}
mutex_enter(&p->p_lock);
if (sp->sp_proc == NULL) {
mutex_exit(&p->p_lock);
continue;
}
VERIFY3P(sp->sp_proc, ==, p);
VERIFY3P(sp->sp_state, ==, state);
VERIFY3P(p->p_sigfd, !=, NULL);
sigfd_proc_state_t *pstate = p->p_sigfd;
list_remove(&pstate->sigfd_list, sp);
sp->sp_proc = NULL;
mutex_enter(&sp->sp_lock);
signalfd_poller_wake(sp, POLLERR);
mutex_exit(&sp->sp_lock);
if (list_is_empty(&pstate->sigfd_list)) {
signalfd_proc_clean(p);
}
mutex_exit(&p->p_lock);
}
mutex_exit(&pidlock);
}
static void
signalfd_pollers_free(signalfd_state_t *state)
{
ASSERT(MUTEX_HELD(&state->sfd_lock));
signalfd_poller_t *sp;
while ((sp = list_remove_head(&state->sfd_pollers)) != NULL) {
ASSERT3P(sp->sp_proc, ==, NULL);
mutex_enter(&sp->sp_lock);
while (sp->sp_pending) {
cv_wait(&sp->sp_cv, &sp->sp_lock);
}
ASSERT3P(sp->sp_pollhead.ph_list, ==, NULL);
cv_destroy(&sp->sp_cv);
mutex_destroy(&sp->sp_lock);
kmem_free(sp, sizeof (*sp));
}
}
static void
signalfd_exit_helper(void)
{
proc_t *p = curproc;
mutex_enter(&p->p_lock);
sigfd_proc_state_t *pstate = p->p_sigfd;
if (pstate == NULL) {
mutex_exit(&p->p_lock);
return;
}
signalfd_poller_t *sp;
while ((sp = list_remove_head(&pstate->sigfd_list)) != NULL) {
sp->sp_proc = NULL;
mutex_enter(&sp->sp_lock);
signalfd_poller_wake(sp, POLLERR);
mutex_exit(&sp->sp_lock);
}
signalfd_proc_clean(p);
mutex_exit(&p->p_lock);
}
_NOTE(ARGSUSED(1))
static int
signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cr)
{
if (getminor(*devp) != SIGNALFDMNRN_SIGNALFD) {
return (ENXIO);
}
const minor_t minor = (minor_t)id_allocff_nosleep(signalfd_minors);
if (minor == -1) {
return (ENOMEM);
}
if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) {
id_free(signalfd_minors, minor);
return (ENODEV);
}
signalfd_state_t *state = ddi_get_soft_state(signalfd_softstate, minor);
mutex_init(&state->sfd_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&state->sfd_pollers, sizeof (signalfd_poller_t),
offsetof(signalfd_poller_t, sp_state_node));
state->sfd_minor = minor;
const major_t major = getemajor(*devp);
*devp = makedevice(major, minor);
return (0);
}
static int
signalfd_consume_signal(k_sigset_t set, uio_t *uio, bool should_block)
{
kthread_t *t = curthread;
klwp_t *lwp = ttolwp(t);
proc_t *p = ttoproc(t);
int ret = 0;
t->t_sigwait = set;
mutex_enter(&p->p_lock);
schedctl_finish_sigblock(t);
const k_sigset_t oldmask = t->t_hold;
sigdiffset(&t->t_hold, &t->t_sigwait);
if (should_block) {
do {
ret = cv_waituntil_sig(&t->t_delay_cv, &p->p_lock,
NULL, 0);
} while (ret > 0);
} else {
mutex_exit(&p->p_lock);
if (issig(FORREAL) == 0) {
ret = -1;
}
mutex_enter(&p->p_lock);
}
t->t_hold = oldmask;
t->t_sig_check = 1;
if (ret == -1) {
mutex_exit(&p->p_lock);
sigemptyset(&t->t_sigwait);
return (EAGAIN);
}
if (lwp->lwp_cursig == 0 ||
!sigismember(&t->t_sigwait, lwp->lwp_cursig)) {
mutex_exit(&p->p_lock);
sigemptyset(&t->t_sigwait);
return (EINTR);
}
signalfd_siginfo_t ssi;
bzero(&ssi, sizeof (ssi));
if (lwp->lwp_curinfo != NULL) {
k_siginfo_t *infop = &lwp->lwp_curinfo->sq_info;
ssi.ssi_signo = infop->si_signo;
ssi.ssi_errno = infop->si_errno;
ssi.ssi_code = infop->si_code;
ssi.ssi_pid = infop->si_pid;
ssi.ssi_uid = infop->si_uid;
ssi.ssi_fd = infop->si_fd;
ssi.ssi_band = infop->si_band;
ssi.ssi_trapno = infop->si_trapno;
ssi.ssi_status = infop->si_status;
ssi.ssi_utime = infop->si_utime;
ssi.ssi_stime = infop->si_stime;
ssi.ssi_addr = (uint64_t)(intptr_t)infop->si_addr;
DTRACE_PROC2(signal__clear, int, 0, ksiginfo_t *, infop);
} else {
k_siginfo_t info = {
.si_signo = lwp->lwp_cursig,
.si_code = SI_NOINFO,
};
ssi.ssi_signo = info.si_signo;
ssi.ssi_code = info.si_code;
DTRACE_PROC2(signal__clear, int, 0, ksiginfo_t *, &info);
}
lwp->lwp_ru.nsignals++;
lwp->lwp_cursig = 0;
lwp->lwp_extsig = 0;
if (lwp->lwp_curinfo != NULL) {
siginfofree(lwp->lwp_curinfo);
lwp->lwp_curinfo = NULL;
}
mutex_exit(&p->p_lock);
ret = uiomove(&ssi, sizeof (ssi), UIO_READ, uio);
sigemptyset(&t->t_sigwait);
return (ret);
}
_NOTE(ARGSUSED(2))
static int
signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
{
signalfd_state_t *state;
k_sigset_t set;
bool should_block = true, got_one = false;
int res;
state = ddi_get_soft_state(signalfd_softstate, getminor(dev));
if (state == NULL) {
return (ENXIO);
}
if (uio->uio_resid < sizeof (signalfd_siginfo_t)) {
return (EINVAL);
}
if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
should_block = false;
}
mutex_enter(&state->sfd_lock);
set = state->sfd_mask;
mutex_exit(&state->sfd_lock);
if (sigisempty(&set))
return (set_errno(EINVAL));
do {
res = signalfd_consume_signal(set, uio, should_block);
if (res == 0) {
got_one = true;
should_block = false;
mutex_enter(&state->sfd_lock);
set = state->sfd_mask;
mutex_exit(&state->sfd_lock);
if (sigisempty(&set))
break;
}
} while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t));
if (got_one)
res = 0;
return (res);
}
static int
signalfd_sig_pending(proc_t *p, kthread_t *t, k_sigset_t set)
{
return (((p->p_sig.__sigbits[0] | t->t_sig.__sigbits[0]) &
set.__sigbits[0]) |
((p->p_sig.__sigbits[1] | t->t_sig.__sigbits[1]) &
set.__sigbits[1]) |
(((p->p_sig.__sigbits[2] | t->t_sig.__sigbits[2]) &
set.__sigbits[2]) & FILLSET2));
}
static int
signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
struct pollhead **phpp)
{
signalfd_state_t *state;
short revents = 0;
kthread_t *t = curthread;
proc_t *p = ttoproc(t);
state = ddi_get_soft_state(signalfd_softstate, getminor(dev));
if (state == NULL) {
return (ENXIO);
}
mutex_enter(&state->sfd_lock);
if (signalfd_sig_pending(p, t, state->sfd_mask) != 0) {
revents |= POLLRDNORM | POLLIN;
}
*reventsp = revents & events;
if ((*reventsp == 0 && !anyyet) || (events & POLLET) != 0) {
signalfd_poller_t *sp;
sp = signalfd_poller_associate(state, p);
*phpp = &sp->sp_pollhead;
}
mutex_exit(&state->sfd_lock);
return (0);
}
static void
signalfd_set_mask(signalfd_state_t *state, const sigset_t *umask)
{
k_sigset_t kmask;
sigutok(umask, &kmask);
mutex_enter(&state->sfd_lock);
state->sfd_mask = kmask;
list_t *pollers = &state->sfd_pollers;
for (signalfd_poller_t *sp = list_head(pollers); sp != NULL;
sp = list_next(pollers, sp)) {
mutex_enter(&sp->sp_lock);
sp->sp_mask = kmask;
mutex_exit(&sp->sp_lock);
}
mutex_exit(&state->sfd_lock);
}
_NOTE(ARGSUSED(4))
static int
signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
{
signalfd_state_t *state;
sigset_t mask;
state = ddi_get_soft_state(signalfd_softstate, getminor(dev));
if (state == NULL) {
return (ENXIO);
}
switch (cmd) {
case SIGNALFDIOC_MASK:
if (ddi_copyin((caddr_t)arg, &mask, sizeof (mask), md) != 0) {
return (EFAULT);
}
signalfd_set_mask(state, &mask);
return (0);
default:
break;
}
return (ENOTTY);
}
_NOTE(ARGSUSED(1))
static int
signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
{
signalfd_state_t *state;
const minor_t minor = getminor(dev);
state = ddi_get_soft_state(signalfd_softstate, minor);
if (state == NULL) {
return (ENXIO);
}
mutex_enter(&state->sfd_lock);
signalfd_pollers_dissociate(state);
signalfd_pollers_free(state);
ASSERT(list_is_empty(&state->sfd_pollers));
mutex_destroy(&state->sfd_lock);
ddi_soft_state_free(signalfd_softstate, minor);
id_free(signalfd_minors, minor);
return (0);
}
static int
signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
if (cmd != DDI_ATTACH || signalfd_devi != NULL) {
return (DDI_FAILURE);
}
signalfd_minors = id_space_create("signalfd_minors", 1, L_MAXMIN32 + 1);
if (signalfd_minors == NULL) {
cmn_err(CE_WARN, "signalfd couldn't create id space");
return (DDI_FAILURE);
}
if (ddi_soft_state_init(&signalfd_softstate,
sizeof (signalfd_state_t), 0) != 0) {
cmn_err(CE_WARN, "signalfd failed to create soft state");
id_space_destroy(signalfd_minors);
return (DDI_FAILURE);
}
if (ddi_create_minor_node(devi, "signalfd", S_IFCHR,
SIGNALFDMNRN_SIGNALFD, DDI_PSEUDO, 0) == DDI_FAILURE) {
cmn_err(CE_NOTE, "signalfd couldn't create minor node");
ddi_soft_state_fini(&signalfd_softstate);
id_space_destroy(signalfd_minors);
return (DDI_FAILURE);
}
sigfd_exit_helper = signalfd_exit_helper;
signalfd_wakeq = taskq_create("signalfd_wake", 1, minclsyspri,
0, INT_MAX, TASKQ_PREPOPULATE);
ddi_report_dev(devi);
signalfd_devi = devi;
return (DDI_SUCCESS);
}
_NOTE(ARGSUSED(0))
static int
signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
if (cmd != DDI_DETACH) {
return (DDI_FAILURE);
}
taskq_destroy(signalfd_wakeq);
sigfd_exit_helper = NULL;
id_space_destroy(signalfd_minors);
ddi_soft_state_fini(&signalfd_softstate);
ddi_remove_minor_node(signalfd_devi, NULL);
signalfd_devi = NULL;
return (DDI_SUCCESS);
}
_NOTE(ARGSUSED(0))
static int
signalfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
int error;
switch (infocmd) {
case DDI_INFO_DEVT2DEVINFO:
*result = (void *)signalfd_devi;
error = DDI_SUCCESS;
break;
case DDI_INFO_DEVT2INSTANCE:
*result = (void *)0;
error = DDI_SUCCESS;
break;
default:
error = DDI_FAILURE;
}
return (error);
}
static struct cb_ops signalfd_cb_ops = {
signalfd_open,
signalfd_close,
nulldev,
nulldev,
nodev,
signalfd_read,
nodev,
signalfd_ioctl,
nodev,
nodev,
nodev,
signalfd_poll,
ddi_prop_op,
0,
D_NEW | D_MP
};
static struct dev_ops signalfd_ops = {
DEVO_REV,
0,
signalfd_info,
nulldev,
nulldev,
signalfd_attach,
signalfd_detach,
nodev,
&signalfd_cb_ops,
NULL,
nodev,
ddi_quiesce_not_needed,
};
static struct modldrv modldrv = {
&mod_driverops,
"signalfd support",
&signalfd_ops,
};
static struct modlinkage modlinkage = {
MODREV_1,
(void *)&modldrv,
NULL
};
int
_init(void)
{
return (mod_install(&modlinkage));
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
int
_fini(void)
{
return (mod_remove(&modlinkage));
}