#include <sys/types.h>
#include <sys/devops.h>
#include <sys/conf.h>
#include <sys/modctl.h>
#include <sys/sunddi.h>
#include <sys/stat.h>
#include <sys/poll_impl.h>
#include <sys/errno.h>
#include <sys/kmem.h>
#include <sys/mkdev.h>
#include <sys/debug.h>
#include <sys/file.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/bitmap.h>
#include <sys/devpoll.h>
#include <sys/rctl.h>
#include <sys/resource.h>
#include <sys/schedctl.h>
#include <sys/epoll.h>
#define RESERVED 1
static dp_entry_t **devpolltbl;
static size_t dptblsize;
static kmutex_t devpoll_lock;
int devpoll_init;
static int dpopen(dev_t *devp, int flag, int otyp, cred_t *credp);
static int dpwrite(dev_t dev, struct uio *uiop, cred_t *credp);
static int dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
int *rvalp);
static int dppoll(dev_t dev, short events, int anyyet, short *reventsp,
struct pollhead **phpp);
static int dpclose(dev_t dev, int flag, int otyp, cred_t *credp);
static dev_info_t *dpdevi;
static struct cb_ops dp_cb_ops = {
dpopen,
dpclose,
nodev,
nodev,
nodev,
nodev,
dpwrite,
dpioctl,
nodev,
nodev,
nodev,
dppoll,
ddi_prop_op,
(struct streamtab *)0,
D_MP,
CB_REV,
nodev,
nodev
};
static int dpattach(dev_info_t *, ddi_attach_cmd_t);
static int dpdetach(dev_info_t *, ddi_detach_cmd_t);
static int dpinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
static struct dev_ops dp_ops = {
DEVO_REV,
0,
dpinfo,
nulldev,
nulldev,
dpattach,
dpdetach,
nodev,
&dp_cb_ops,
(struct bus_ops *)NULL,
nulldev,
ddi_quiesce_not_needed,
};
static struct modldrv modldrv = {
&mod_driverops,
"/dev/poll driver",
&dp_ops,
};
static struct modlinkage modlinkage = {
MODREV_1,
(void *)&modldrv,
NULL
};
static void pcachelink_assoc(pollcache_t *, pollcache_t *);
static void pcachelink_mark_stale(pollcache_t *);
static void pcachelink_purge_stale(pollcache_t *);
static void pcachelink_purge_all(pollcache_t *);
int
_init()
{
int error;
dptblsize = DEVPOLLSIZE;
devpolltbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP);
mutex_init(&devpoll_lock, NULL, MUTEX_DEFAULT, NULL);
devpoll_init = 1;
if ((error = mod_install(&modlinkage)) != 0) {
kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize);
devpoll_init = 0;
}
return (error);
}
int
_fini()
{
int error;
if ((error = mod_remove(&modlinkage)) != 0) {
return (error);
}
mutex_destroy(&devpoll_lock);
kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize);
return (0);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
static int
dpattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
if (ddi_create_minor_node(devi, "poll", S_IFCHR, 0, DDI_PSEUDO, 0)
== DDI_FAILURE) {
ddi_remove_minor_node(devi, NULL);
return (DDI_FAILURE);
}
dpdevi = devi;
return (DDI_SUCCESS);
}
static int
dpdetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
{
if (cmd != DDI_DETACH)
return (DDI_FAILURE);
ddi_remove_minor_node(devi, NULL);
return (DDI_SUCCESS);
}
static int
dpinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
int error;
switch (infocmd) {
case DDI_INFO_DEVT2DEVINFO:
*result = (void *)dpdevi;
error = DDI_SUCCESS;
break;
case DDI_INFO_DEVT2INSTANCE:
*result = (void *)0;
error = DDI_SUCCESS;
break;
default:
error = DDI_FAILURE;
}
return (error);
}
static int
dp_pcache_poll(dp_entry_t *dpep, void *dpbuf, pollcache_t *pcp, nfds_t nfds,
int *fdcntp)
{
int start, ostart, end, fdcnt, error = 0;
boolean_t done, no_wrap;
pollfd_t *pfdp;
epoll_event_t *epoll;
const short mask = POLLRDHUP | POLLWRBAND;
const boolean_t is_epoll = (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0;
ASSERT(MUTEX_HELD(&pcp->pc_lock));
if (pcp->pc_bitmap == NULL) {
return (0);
}
if (is_epoll) {
pfdp = NULL;
epoll = (epoll_event_t *)dpbuf;
} else {
pfdp = (pollfd_t *)dpbuf;
epoll = NULL;
}
retry:
start = ostart = pcp->pc_mapstart;
end = pcp->pc_mapend;
if (start == 0) {
no_wrap = B_TRUE;
} else {
no_wrap = B_FALSE;
}
done = B_FALSE;
fdcnt = 0;
while ((fdcnt < nfds) && !done) {
pollhead_t *php = NULL;
short revent = 0;
uf_entry_gen_t gen;
int fd;
fd = bt_getlowbit(pcp->pc_bitmap, start, end);
ASSERT(fd <= end);
if (fd >= 0) {
file_t *fp;
polldat_t *pdp;
if (fd == end) {
if (no_wrap) {
done = B_TRUE;
} else {
start = 0;
end = ostart - 1;
no_wrap = B_TRUE;
}
} else {
start = fd + 1;
}
pdp = pcache_lookup_fd(pcp, fd);
repoll:
ASSERT(pdp != NULL);
ASSERT(pdp->pd_fd == fd);
if (pdp->pd_fp == NULL) {
continue;
}
if ((fp = getf_gen(fd, &gen)) == NULL) {
if (is_epoll) {
pdp->pd_fp = NULL;
pdp->pd_events = 0;
polldat_disassociate(pdp);
BT_CLEAR(pcp->pc_bitmap, fd);
} else if (pfdp != NULL) {
pfdp[fdcnt].fd = fd;
pfdp[fdcnt].revents = POLLNVAL;
fdcnt++;
}
continue;
}
if (gen != pdp->pd_gen || fp != pdp->pd_fp) {
polldat_disassociate(pdp);
if (is_epoll) {
pdp->pd_fp = NULL;
pdp->pd_events = 0;
BT_CLEAR(pcp->pc_bitmap, fd);
releasef(fd);
continue;
} else {
pdp->pd_fp = fp;
pdp->pd_gen = gen;
}
}
if (pdp->pd_events == POLLONESHOT) {
releasef(fd);
BT_CLEAR(pcp->pc_bitmap, fd);
continue;
}
curthread->t_pollcache = pcp;
error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0,
&revent, &php, NULL);
if ((pdp->pd_events & POLLET) && error == 0 &&
pdp->pd_php == NULL && php == NULL && revent != 0) {
short levent = 0;
error = VOP_POLL(fp->f_vnode, POLLET,
0, &levent, &php, NULL);
}
curthread->t_pollcache = NULL;
releasef(fd);
if (error != 0) {
break;
}
if (php != NULL && pdp->pd_php != NULL &&
php != pdp->pd_php) {
polldat_disassociate(pdp);
polldat_associate(pdp, php);
ASSERT(BT_TEST(pcp->pc_bitmap, fd));
goto retry;
}
if (revent != 0) {
if (pfdp != NULL) {
pfdp[fdcnt].fd = fd;
pfdp[fdcnt].events = pdp->pd_events;
pfdp[fdcnt].revents = revent;
} else if (epoll != NULL) {
epoll_event_t *ep = &epoll[fdcnt];
ASSERT(epoll != NULL);
ep->data.u64 = pdp->pd_epolldata;
if (revent & POLLNVAL) {
revent &= ~POLLNVAL;
revent |= POLLERR;
}
if (revent & mask) {
ep->events = (revent & ~mask) |
((revent & POLLRDHUP) ?
EPOLLRDHUP : 0) |
((revent & POLLWRBAND) ?
EPOLLWRBAND : 0);
} else {
ep->events = revent;
}
if ((revent & POLLOUT) &&
(pdp->pd_events & EPOLLWRNORM)) {
ep->events |= EPOLLWRNORM;
}
} else {
pollstate_t *ps =
curthread->t_pollstate;
VERIFY(fdcnt == 0);
VERIFY(ps != NULL);
if ((ps->ps_flags & POLLSTATE_ULFAIL)
!= 0) {
ps->ps_flags &=
~POLLSTATE_ULFAIL;
continue;
} else {
fdcnt++;
break;
}
}
if (pdp->pd_events & POLLONESHOT) {
pdp->pd_events = POLLONESHOT;
polldat_disassociate(pdp);
BT_CLEAR(pcp->pc_bitmap, fd);
} else if (pdp->pd_events & POLLET) {
if (php != NULL &&
pdp->pd_php == NULL) {
polldat_associate(pdp, php);
}
if (pdp->pd_php != NULL) {
BT_CLEAR(pcp->pc_bitmap, fd);
}
}
fdcnt++;
} else if (php != NULL) {
if ((pdp->pd_php != NULL) &&
((pcp->pc_flag & PC_POLLWAKE) == 0)) {
BT_CLEAR(pcp->pc_bitmap, fd);
}
if (pdp->pd_php == NULL) {
polldat_associate(pdp, php);
goto repoll;
}
}
} else {
if (!no_wrap) {
start = 0;
end = ostart - 1;
no_wrap = B_TRUE;
} else {
done = B_TRUE;
}
}
}
if (!done) {
pcp->pc_mapstart = start;
}
ASSERT(*fdcntp == 0);
*fdcntp = fdcnt;
return (error);
}
static int
dpopen(dev_t *devp, int flag, int otyp, cred_t *credp)
{
minor_t minordev;
dp_entry_t *dpep;
pollcache_t *pcp;
ASSERT(devpoll_init);
ASSERT(dptblsize <= MAXMIN);
mutex_enter(&devpoll_lock);
for (minordev = 0; minordev < dptblsize; minordev++) {
if (devpolltbl[minordev] == NULL) {
devpolltbl[minordev] = (dp_entry_t *)RESERVED;
break;
}
}
if (minordev == dptblsize) {
dp_entry_t **newtbl;
size_t oldsize;
if ((oldsize = dptblsize) >= MAXMIN) {
mutex_exit(&devpoll_lock);
return (ENXIO);
}
dptblsize += DEVPOLLSIZE;
if (dptblsize > MAXMIN) {
dptblsize = MAXMIN;
}
newtbl = kmem_zalloc(sizeof (caddr_t) * dptblsize, KM_SLEEP);
bcopy(devpolltbl, newtbl, sizeof (caddr_t) * oldsize);
kmem_free(devpolltbl, sizeof (caddr_t) * oldsize);
devpolltbl = newtbl;
devpolltbl[minordev] = (dp_entry_t *)RESERVED;
}
mutex_exit(&devpoll_lock);
dpep = kmem_zalloc(sizeof (dp_entry_t), KM_SLEEP);
pcp = pcache_alloc();
dpep->dpe_pcache = pcp;
pcp->pc_pid = -1;
*devp = makedevice(getmajor(*devp), minordev);
mutex_enter(&devpoll_lock);
ASSERT(minordev < dptblsize);
ASSERT(devpolltbl[minordev] == (dp_entry_t *)RESERVED);
devpolltbl[minordev] = dpep;
mutex_exit(&devpoll_lock);
return (0);
}
static int
dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
{
minor_t minor;
dp_entry_t *dpep;
pollcache_t *pcp;
pollfd_t *pollfdp, *pfdp;
dvpoll_epollfd_t *epfdp;
uintptr_t limit;
int error;
uint_t size;
size_t copysize, uiosize;
nfds_t pollfdnum;
boolean_t is_epoll, fds_added = B_FALSE;
minor = getminor(dev);
mutex_enter(&devpoll_lock);
ASSERT(minor < dptblsize);
dpep = devpolltbl[minor];
ASSERT(dpep != NULL);
mutex_exit(&devpoll_lock);
mutex_enter(&dpep->dpe_lock);
pcp = dpep->dpe_pcache;
is_epoll = (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0;
size = (is_epoll) ? sizeof (dvpoll_epollfd_t) : sizeof (pollfd_t);
mutex_exit(&dpep->dpe_lock);
if (!is_epoll && curproc->p_pid != pcp->pc_pid) {
if (pcp->pc_pid != -1) {
return (EACCES);
}
pcp->pc_pid = curproc->p_pid;
}
if (uiop->uio_resid < 0) {
return (EINVAL);
}
uiosize = (size_t)uiop->uio_resid;
pollfdnum = uiosize / size;
if (is_epoll && pollfdnum > 2) {
return (EINVAL);
}
if (pollfdnum > UINT8_MAX) {
mutex_enter(&curproc->p_lock);
if (pollfdnum >
(uint_t)rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE],
curproc->p_rctls, curproc)) {
(void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
curproc->p_rctls, curproc, RCA_SAFE);
mutex_exit(&curproc->p_lock);
return (EINVAL);
}
mutex_exit(&curproc->p_lock);
}
pollfdp = kmem_alloc(uiosize, KM_SLEEP);
limit = (uintptr_t)pollfdp + (pollfdnum * size);
uiop->uio_loffset = 0;
if ((error = uiocopy((caddr_t)pollfdp, uiosize, UIO_WRITE, uiop,
©size)) != 0) {
kmem_free(pollfdp, uiosize);
return (error);
}
mutex_enter(&dpep->dpe_lock);
dpep->dpe_writerwait++;
while ((dpep->dpe_flag & DP_WRITER_PRESENT) != 0) {
ASSERT(dpep->dpe_refcnt != 0);
if (is_epoll) {
cv_wait(&dpep->dpe_cv, &dpep->dpe_lock);
continue;
}
if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) {
dpep->dpe_writerwait--;
mutex_exit(&dpep->dpe_lock);
kmem_free(pollfdp, uiosize);
return (EINTR);
}
}
dpep->dpe_writerwait--;
dpep->dpe_flag |= DP_WRITER_PRESENT;
dpep->dpe_refcnt++;
if (!is_epoll && (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0) {
error = EBUSY;
goto bypass;
}
mutex_exit(&dpep->dpe_lock);
(void) pollstate_create();
VERIFY(pollstate_enter(pcp) == PSE_SUCCESS);
if (pcp->pc_bitmap == NULL) {
pcache_create(pcp, pollfdnum);
}
for (pfdp = pollfdp; (uintptr_t)pfdp < limit;
pfdp = (pollfd_t *)((uintptr_t)pfdp + size)) {
int fd = pfdp->fd;
polldat_t *pdp;
if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles) {
if (is_epoll) {
error = EBADF;
break;
}
continue;
}
pdp = pcache_lookup_fd(pcp, fd);
if (pfdp->events != POLLREMOVE) {
uf_entry_gen_t gen;
file_t *fp = NULL;
struct pollhead *php = NULL;
if (is_epoll) {
if ((fp = getf_gen(fd, &gen)) == NULL) {
error = EBADF;
break;
}
}
if (pdp == NULL) {
pdp = pcache_alloc_fd(0);
pdp->pd_fd = fd;
pdp->pd_pcache = pcp;
pcache_insert_fd(pcp, pdp, pollfdnum);
}
if (is_epoll) {
if (pdp->pd_fp != NULL && pdp->pd_gen == gen) {
error = EEXIST;
releasef(fd);
break;
}
pdp->pd_events = POLLERR|POLLHUP;
epfdp = (dvpoll_epollfd_t *)pfdp;
pdp->pd_epolldata = epfdp->dpep_data;
}
ASSERT(pdp->pd_fd == fd);
ASSERT(pdp->pd_pcache == pcp);
if (fd >= pcp->pc_mapsize) {
mutex_exit(&pcp->pc_lock);
pcache_grow_map(pcp, fd);
mutex_enter(&pcp->pc_lock);
}
if (fd > pcp->pc_mapend) {
pcp->pc_mapend = fd;
}
if (!is_epoll) {
ASSERT(fp == NULL);
if ((fp = getf_gen(fd, &gen)) == NULL) {
BT_SET(pcp->pc_bitmap, fd);
pdp->pd_events |= pfdp->events;
continue;
}
if ((pdp->pd_events == pfdp->events) &&
(pdp->pd_fp == fp)) {
releasef(fd);
continue;
}
}
curthread->t_pollcache = pcp;
error = VOP_POLL(fp->f_vnode, pfdp->events, 0,
&pfdp->revents, &php, NULL);
if ((pfdp->events & POLLET) != 0 && error == 0 &&
php == NULL) {
short levent = 0;
error = VOP_POLL(fp->f_vnode, POLLET, 0,
&levent, &php, NULL);
}
curthread->t_pollcache = NULL;
BT_SET(pcp->pc_bitmap, fd);
if (error != 0) {
releasef(fd);
break;
}
pdp->pd_fp = fp;
pdp->pd_gen = gen;
pdp->pd_events |= pfdp->events;
if (php != NULL) {
if (pdp->pd_php == NULL) {
polldat_associate(pdp, php);
} else {
if (pdp->pd_php != php) {
polldat_disassociate(pdp);
polldat_associate(pdp, php);
}
}
}
fds_added = B_TRUE;
releasef(fd);
} else {
if (pdp == NULL || pdp->pd_fp == NULL) {
if (is_epoll) {
error = ENOENT;
break;
}
continue;
}
ASSERT(pdp->pd_fd == fd);
pdp->pd_fp = NULL;
pdp->pd_events = 0;
ASSERT(pdp->pd_thread == NULL);
polldat_disassociate(pdp);
BT_CLEAR(pcp->pc_bitmap, fd);
}
}
if (fds_added) {
cv_broadcast(&pcp->pc_cv);
pcache_wake_parents(pcp);
}
pollstate_exit(pcp);
mutex_enter(&dpep->dpe_lock);
bypass:
dpep->dpe_flag &= ~DP_WRITER_PRESENT;
dpep->dpe_refcnt--;
cv_broadcast(&dpep->dpe_cv);
mutex_exit(&dpep->dpe_lock);
kmem_free(pollfdp, uiosize);
if (error == 0) {
uioskip(uiop, copysize);
}
return (error);
}
#define DP_SIGMASK_RESTORE(ksetp) { \
if (ksetp != NULL) { \
mutex_enter(&p->p_lock); \
if (lwp->lwp_cursig == 0) { \
t->t_hold = lwp->lwp_sigoldmask; \
t->t_flag &= ~T_TOMASK; \
} \
mutex_exit(&p->p_lock); \
} \
}
static int
dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
{
minor_t minor;
dp_entry_t *dpep;
pollcache_t *pcp;
hrtime_t now;
int error = 0;
boolean_t is_epoll;
STRUCT_DECL(dvpoll, dvpoll);
if (cmd == DP_POLL || cmd == DP_PPOLL) {
now = gethrtime();
}
minor = getminor(dev);
mutex_enter(&devpoll_lock);
ASSERT(minor < dptblsize);
dpep = devpolltbl[minor];
mutex_exit(&devpoll_lock);
ASSERT(dpep != NULL);
pcp = dpep->dpe_pcache;
mutex_enter(&dpep->dpe_lock);
is_epoll = (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0;
if (cmd == DP_EPOLLCOMPAT) {
if (dpep->dpe_refcnt != 0) {
mutex_exit(&dpep->dpe_lock);
return (EBUSY);
}
dpep->dpe_flag |= DP_ISEPOLLCOMPAT;
mutex_enter(&pcp->pc_lock);
pcp->pc_flag |= PC_EPOLL;
mutex_exit(&pcp->pc_lock);
mutex_exit(&dpep->dpe_lock);
return (0);
}
if (!is_epoll && curproc->p_pid != pcp->pc_pid) {
if (pcp->pc_pid != -1) {
mutex_exit(&dpep->dpe_lock);
return (EACCES);
}
pcp->pc_pid = curproc->p_pid;
}
while ((dpep->dpe_flag & DP_WRITER_PRESENT) != 0 ||
(dpep->dpe_writerwait != 0)) {
if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) {
mutex_exit(&dpep->dpe_lock);
return (EINTR);
}
}
dpep->dpe_refcnt++;
mutex_exit(&dpep->dpe_lock);
switch (cmd) {
case DP_POLL:
case DP_PPOLL:
{
pollstate_t *ps;
nfds_t nfds;
int fdcnt = 0;
size_t size, fdsize, dpsize;
hrtime_t deadline = 0;
k_sigset_t *ksetp = NULL;
k_sigset_t kset;
sigset_t set;
kthread_t *t = curthread;
klwp_t *lwp = ttolwp(t);
struct proc *p = ttoproc(curthread);
STRUCT_INIT(dvpoll, mode);
if (cmd == DP_POLL) {
dpsize = (uintptr_t)STRUCT_FADDR(dvpoll, dp_setp) -
(uintptr_t)STRUCT_FADDR(dvpoll, dp_fds);
} else {
ASSERT(cmd == DP_PPOLL);
dpsize = STRUCT_SIZE(dvpoll);
}
if ((mode & FKIOCTL) != 0) {
bcopy((caddr_t)arg, STRUCT_BUF(dvpoll), dpsize);
error = 0;
} else {
error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll),
dpsize);
}
if (error) {
DP_REFRELE(dpep);
return (EFAULT);
}
deadline = STRUCT_FGET(dvpoll, dp_timeout);
if (deadline > 0) {
deadline = MSEC2NSEC(deadline);
deadline = MAX(deadline, nsec_per_tick);
deadline += now;
}
if (cmd == DP_PPOLL) {
void *setp = STRUCT_FGETP(dvpoll, dp_setp);
if (setp != NULL) {
if ((mode & FKIOCTL) != 0) {
ksetp = (k_sigset_t *)setp;
} else {
if (copyin(setp, &set, sizeof (set))) {
DP_REFRELE(dpep);
return (EFAULT);
}
sigutok(&set, &kset);
ksetp = &kset;
}
mutex_enter(&p->p_lock);
schedctl_finish_sigblock(t);
lwp->lwp_sigoldmask = t->t_hold;
t->t_hold = *ksetp;
t->t_flag |= T_TOMASK;
if (!cv_reltimedwait_sig(&t->t_delay_cv,
&p->p_lock, 0, TR_CLOCK_TICK)) {
if (lwp->lwp_cursig == 0) {
t->t_hold = lwp->lwp_sigoldmask;
t->t_flag &= ~T_TOMASK;
}
mutex_exit(&p->p_lock);
DP_REFRELE(dpep);
return (EINTR);
}
mutex_exit(&p->p_lock);
}
}
if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) {
DP_REFRELE(dpep);
if (deadline == 0) {
DP_SIGMASK_RESTORE(ksetp);
return (0);
}
mutex_enter(&curthread->t_delay_lock);
while ((error =
cv_timedwait_sig_hrtime(&curthread->t_delay_cv,
&curthread->t_delay_lock, deadline)) > 0)
continue;
mutex_exit(&curthread->t_delay_lock);
DP_SIGMASK_RESTORE(ksetp);
return (error == 0 ? EINTR : 0);
}
if (is_epoll) {
size = nfds * (fdsize = sizeof (epoll_event_t));
} else {
size = nfds * (fdsize = sizeof (pollfd_t));
}
ps = pollstate_create();
if (ps->ps_dpbufsize < size) {
mutex_enter(&p->p_lock);
if ((nfds >> 1) > p->p_fno_ctl) {
nfds = p->p_fno_ctl;
size = nfds * fdsize;
}
mutex_exit(&p->p_lock);
if (ps->ps_dpbufsize < size) {
kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize);
ps->ps_dpbuf = kmem_zalloc(size, KM_SLEEP);
ps->ps_dpbufsize = size;
}
}
VERIFY(pollstate_enter(pcp) == PSE_SUCCESS);
for (;;) {
pcp->pc_flag &= ~PC_POLLWAKE;
pcachelink_mark_stale(pcp);
error = dp_pcache_poll(dpep, ps->ps_dpbuf,
pcp, nfds, &fdcnt);
if (fdcnt > 0 || error != 0)
break;
pcachelink_purge_stale(pcp);
if (pcp->pc_flag & PC_POLLWAKE)
continue;
if (deadline == 0) {
break;
}
error = cv_timedwait_sig_hrtime(&pcp->pc_cv,
&pcp->pc_lock, deadline);
if (error <= 0) {
error = (error == 0) ? EINTR : 0;
break;
} else {
error = 0;
}
}
pollstate_exit(pcp);
DP_SIGMASK_RESTORE(ksetp);
if (error == 0 && fdcnt > 0) {
if (copyout(ps->ps_dpbuf,
STRUCT_FGETP(dvpoll, dp_fds), fdcnt * fdsize)) {
DP_REFRELE(dpep);
return (EFAULT);
}
*rvalp = fdcnt;
}
break;
}
case DP_ISPOLLED:
{
pollfd_t pollfd;
polldat_t *pdp;
STRUCT_INIT(dvpoll, mode);
error = copyin((caddr_t)arg, &pollfd, sizeof (pollfd_t));
if (error) {
DP_REFRELE(dpep);
return (EFAULT);
}
mutex_enter(&pcp->pc_lock);
if (pcp->pc_hash == NULL) {
mutex_exit(&pcp->pc_lock);
DP_REFRELE(dpep);
return (0);
}
if (pollfd.fd < 0) {
mutex_exit(&pcp->pc_lock);
break;
}
pdp = pcache_lookup_fd(pcp, pollfd.fd);
if ((pdp != NULL) && (pdp->pd_fd == pollfd.fd) &&
(pdp->pd_fp != NULL)) {
pollfd.revents = pdp->pd_events;
if (copyout(&pollfd, (caddr_t)arg, sizeof (pollfd_t))) {
mutex_exit(&pcp->pc_lock);
DP_REFRELE(dpep);
return (EFAULT);
}
*rvalp = 1;
}
mutex_exit(&pcp->pc_lock);
break;
}
default:
DP_REFRELE(dpep);
return (EINVAL);
}
DP_REFRELE(dpep);
return (error);
}
static int
dppoll(dev_t dev, short events, int anyyet, short *reventsp,
struct pollhead **phpp)
{
minor_t minor;
dp_entry_t *dpep;
pollcache_t *pcp;
int res, rc = 0;
minor = getminor(dev);
mutex_enter(&devpoll_lock);
ASSERT(minor < dptblsize);
dpep = devpolltbl[minor];
ASSERT(dpep != NULL);
mutex_exit(&devpoll_lock);
mutex_enter(&dpep->dpe_lock);
if ((dpep->dpe_flag & DP_ISEPOLLCOMPAT) == 0) {
*reventsp = POLLERR;
mutex_exit(&dpep->dpe_lock);
return (0);
} else {
dpep->dpe_refcnt++;
pcp = dpep->dpe_pcache;
mutex_exit(&dpep->dpe_lock);
}
res = pollstate_enter(pcp);
if (res == PSE_SUCCESS) {
nfds_t nfds = 1;
int fdcnt = 0;
pollstate_t *ps = curthread->t_pollstate;
if (events & (POLLIN|POLLRDNORM)) {
rc = dp_pcache_poll(dpep, NULL, pcp, nfds, &fdcnt);
} else {
rc = 0;
fdcnt = 0;
}
if (rc == 0 && fdcnt > 0) {
*reventsp = POLLIN|POLLRDNORM;
} else {
*reventsp = 0;
}
pcachelink_assoc(pcp, ps->ps_pc_stack[0]);
pollstate_exit(pcp);
} else {
switch (res) {
case PSE_FAIL_DEPTH:
rc = EINVAL;
break;
case PSE_FAIL_LOOP:
case PSE_FAIL_DEADLOCK:
rc = ELOOP;
break;
default:
*reventsp = POLLERR;
rc = 0;
break;
}
}
DP_REFRELE(dpep);
return (rc);
}
static int
dpclose(dev_t dev, int flag, int otyp, cred_t *credp)
{
minor_t minor;
dp_entry_t *dpep;
pollcache_t *pcp;
int i;
polldat_t **hashtbl;
polldat_t *pdp;
minor = getminor(dev);
mutex_enter(&devpoll_lock);
dpep = devpolltbl[minor];
ASSERT(dpep != NULL);
devpolltbl[minor] = NULL;
mutex_exit(&devpoll_lock);
pcp = dpep->dpe_pcache;
ASSERT(pcp != NULL);
hashtbl = pcp->pc_hash;
for (i = 0; i < pcp->pc_hashsize; i++) {
for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) {
polldat_disassociate(pdp);
pdp->pd_fp = NULL;
}
}
mutex_enter(&pcp->pc_no_exit);
ASSERT(pcp->pc_busy >= 0);
while (pcp->pc_busy > 0)
cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit);
mutex_exit(&pcp->pc_no_exit);
if (pcp->pc_parents != NULL || pcp->pc_children != NULL) {
mutex_enter(&pcp->pc_lock);
pcachelink_purge_all(pcp);
mutex_exit(&pcp->pc_lock);
}
pcache_destroy(pcp);
ASSERT(dpep->dpe_refcnt == 0);
kmem_free(dpep, sizeof (dp_entry_t));
return (0);
}
static void
pcachelink_locked_rele(pcachelink_t *pl)
{
ASSERT(MUTEX_HELD(&pl->pcl_lock));
VERIFY(pl->pcl_refcnt >= 1);
pl->pcl_refcnt--;
if (pl->pcl_refcnt == 0) {
VERIFY(pl->pcl_state == PCL_INVALID);
ASSERT(pl->pcl_parent_pc == NULL);
ASSERT(pl->pcl_child_pc == NULL);
ASSERT(pl->pcl_parent_next == NULL);
ASSERT(pl->pcl_child_next == NULL);
pl->pcl_state = PCL_FREE;
mutex_destroy(&pl->pcl_lock);
kmem_free(pl, sizeof (pcachelink_t));
} else {
mutex_exit(&pl->pcl_lock);
}
}
static void
pcachelink_assoc(pollcache_t *child, pollcache_t *parent)
{
pcachelink_t *pl, **plpn;
ASSERT(MUTEX_HELD(&child->pc_lock));
ASSERT(MUTEX_HELD(&parent->pc_lock));
plpn = &child->pc_parents;
for (pl = child->pc_parents; pl != NULL; pl = *plpn) {
mutex_enter(&pl->pcl_lock);
if (pl->pcl_state == PCL_INVALID) {
*plpn = pl->pcl_parent_next;
pl->pcl_child_pc = NULL;
pl->pcl_parent_next = NULL;
pcachelink_locked_rele(pl);
} else if (pl->pcl_parent_pc == parent) {
ASSERT(pl->pcl_state == PCL_VALID ||
pl->pcl_state == PCL_STALE);
pl->pcl_state = PCL_VALID;
mutex_exit(&pl->pcl_lock);
return;
} else {
plpn = &pl->pcl_parent_next;
mutex_exit(&pl->pcl_lock);
}
}
pl = kmem_zalloc(sizeof (pcachelink_t), KM_SLEEP);
mutex_init(&pl->pcl_lock, NULL, MUTEX_DEFAULT, NULL);
pl->pcl_parent_pc = parent;
pl->pcl_child_next = parent->pc_children;
parent->pc_children = pl;
pl->pcl_refcnt++;
pl->pcl_child_pc = child;
pl->pcl_parent_next = child->pc_parents;
child->pc_parents = pl;
pl->pcl_refcnt++;
pl->pcl_state = PCL_VALID;
}
static void
pcachelink_mark_stale(pollcache_t *pcp)
{
pcachelink_t *pl, **plpn;
ASSERT(MUTEX_HELD(&pcp->pc_lock));
plpn = &pcp->pc_children;
for (pl = pcp->pc_children; pl != NULL; pl = *plpn) {
mutex_enter(&pl->pcl_lock);
if (pl->pcl_state == PCL_INVALID) {
*plpn = pl->pcl_child_next;
pl->pcl_parent_pc = NULL;
pl->pcl_child_next = NULL;
pcachelink_locked_rele(pl);
} else {
pl->pcl_state = PCL_STALE;
plpn = &pl->pcl_child_next;
mutex_exit(&pl->pcl_lock);
}
}
}
static void
pcachelink_purge_stale(pollcache_t *pcp)
{
pcachelink_t *pl, **plpn;
ASSERT(MUTEX_HELD(&pcp->pc_lock));
plpn = &pcp->pc_children;
for (pl = pcp->pc_children; pl != NULL; pl = *plpn) {
mutex_enter(&pl->pcl_lock);
switch (pl->pcl_state) {
case PCL_STALE:
pl->pcl_state = PCL_INVALID;
case PCL_INVALID:
*plpn = pl->pcl_child_next;
pl->pcl_parent_pc = NULL;
pl->pcl_child_next = NULL;
pcachelink_locked_rele(pl);
break;
default:
plpn = &pl->pcl_child_next;
mutex_exit(&pl->pcl_lock);
}
}
}
static void
pcachelink_purge_all(pollcache_t *pcp)
{
pcachelink_t *pl, **plpn;
ASSERT(MUTEX_HELD(&pcp->pc_lock));
plpn = &pcp->pc_parents;
for (pl = pcp->pc_parents; pl != NULL; pl = *plpn) {
mutex_enter(&pl->pcl_lock);
pl->pcl_state = PCL_INVALID;
*plpn = pl->pcl_parent_next;
pl->pcl_child_pc = NULL;
pl->pcl_parent_next = NULL;
pcachelink_locked_rele(pl);
}
plpn = &pcp->pc_children;
for (pl = pcp->pc_children; pl != NULL; pl = *plpn) {
mutex_enter(&pl->pcl_lock);
pl->pcl_state = PCL_INVALID;
*plpn = pl->pcl_child_next;
pl->pcl_parent_pc = NULL;
pl->pcl_child_next = NULL;
pcachelink_locked_rele(pl);
}
ASSERT(pcp->pc_parents == NULL);
ASSERT(pcp->pc_children == NULL);
}