#include <sys/param.h>
#include <sys/isa_defs.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/user.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/mode.h>
#include <sys/proc.h>
#include <sys/uio.h>
#include <sys/poll_impl.h>
#include <sys/kmem.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/bitmap.h>
#include <sys/kstat.h>
#include <sys/rctl.h>
#include <sys/port_impl.h>
#include <sys/schedctl.h>
#include <sys/cpu.h>
#define NPHLOCKS 64
#define PHLOCKADDR(php) &plocks[(((uintptr_t)(php)) >> 8) & (NPHLOCKS - 1)]
#define PHLOCK(php) PHLOCKADDR(php).pp_lock
#define PH_ENTER(php) mutex_enter(PHLOCK(php))
#define PH_EXIT(php) mutex_exit(PHLOCK(php))
#define VALID_POLL_EVENTS (POLLIN | POLLPRI | POLLOUT | POLLRDNORM \
| POLLRDBAND | POLLWRBAND | POLLHUP | POLLERR | POLLNVAL)
static struct {
kstat_named_t polllistmiss;
kstat_named_t pollcachehit;
kstat_named_t pollcachephit;
kstat_named_t pollcachemiss;
kstat_named_t pollunlockfail;
} pollstats = {
{ "polllistmiss", KSTAT_DATA_UINT64 },
{ "pollcachehit", KSTAT_DATA_UINT64 },
{ "pollcachephit", KSTAT_DATA_UINT64 },
{ "pollcachemiss", KSTAT_DATA_UINT64 },
{ "pollunlockfail", KSTAT_DATA_UINT64 }
};
kstat_named_t *pollstats_ptr = (kstat_named_t *)&pollstats;
uint_t pollstats_ndata = sizeof (pollstats) / sizeof (kstat_named_t);
struct pplock {
kmutex_t pp_lock;
short pp_flag;
kcondvar_t pp_wait_cv;
int32_t pp_pad;
};
static struct pplock plocks[NPHLOCKS];
static kmutex_t pollstate_contenders_lock;
static pollstate_t *pollstate_contenders = NULL;
#ifdef DEBUG
static int pollchecksanity(pollstate_t *, nfds_t);
static int pollcheckxref(pollstate_t *, int);
static void pollcheckphlist(void);
static int pollcheckrevents(pollstate_t *, int, int, int);
static void checkpolldat(pollstate_t *);
#endif
static int plist_chkdupfd(file_t *, polldat_t *, pollstate_t *, pollfd_t *, int,
int *);
int
pollunlock(int *lockstate)
{
pollstate_t *ps = curthread->t_pollstate;
pollcache_t *pcp;
ASSERT(lockstate != NULL);
if (ps != NULL && ps->ps_depth > 1) {
ps->ps_flags |= POLLSTATE_ULFAIL;
pollstats.pollunlockfail.value.ui64++;
return (-1);
}
if (curthread->t_pollcache == NULL)
pcp = ps->ps_pcache;
else
pcp = curthread->t_pollcache;
if (!mutex_owned(&pcp->pc_lock)) {
*lockstate = 0;
} else {
*lockstate = 1;
mutex_exit(&pcp->pc_lock);
}
return (0);
}
CTASSERT(offsetof(pollcache_t, pc_lock) == offsetof(port_fdcache_t, pc_lock));
CTASSERT(offsetof(pollcache_t, pc_flag) == offsetof(port_fdcache_t, pc_flag));
void
pollrelock(int lockstate)
{
pollstate_t *ps = curthread->t_pollstate;
pollcache_t *pcp;
if (lockstate == 0)
return;
if (curthread->t_pollcache == NULL)
pcp = ps->ps_pcache;
else
pcp = curthread->t_pollcache;
mutex_enter(&pcp->pc_lock);
}
int
polllock(pollhead_t *php, kmutex_t *lp)
{
if (mutex_tryenter(lp) == 0) {
int state;
if (pollunlock(&state) != 0) {
return (-1);
}
mutex_enter(lp);
pollrelock(state);
}
return (0);
}
static int
poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
{
kthread_t *t = curthread;
klwp_t *lwp = ttolwp(t);
proc_t *p = ttoproc(t);
int fdcnt = 0;
int i;
hrtime_t deadline;
pollfd_t *pollfdp;
pollstate_t *ps;
pollcache_t *pcp;
int error = 0;
nfds_t old_nfds;
int cacheindex = 0;
if (tsp == NULL) {
deadline = -1;
} else if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) {
deadline = 0;
} else {
deadline = ((hrtime_t)tsp->tv_sec * NANOSEC) + tsp->tv_nsec;
deadline = MAX(deadline, nsec_per_tick);
deadline += gethrtime();
}
if (ksetp != NULL) {
mutex_enter(&p->p_lock);
schedctl_finish_sigblock(t);
lwp->lwp_sigoldmask = t->t_hold;
t->t_hold = *ksetp;
t->t_flag |= T_TOMASK;
if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0,
TR_CLOCK_TICK)) {
mutex_exit(&p->p_lock);
error = EINTR;
goto pollout;
}
mutex_exit(&p->p_lock);
}
if (nfds == 0) {
if (deadline != 0) {
mutex_enter(&t->t_delay_lock);
while ((error = cv_timedwait_sig_hrtime(&t->t_delay_cv,
&t->t_delay_lock, deadline)) > 0)
continue;
mutex_exit(&t->t_delay_lock);
error = (error == 0) ? EINTR : 0;
}
goto pollout;
}
if (nfds > p->p_fno_ctl) {
mutex_enter(&p->p_lock);
(void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
p->p_rctls, p, RCA_SAFE);
mutex_exit(&p->p_lock);
error = EINVAL;
goto pollout;
}
ps = pollstate_create();
if (ps->ps_pcache == NULL)
ps->ps_pcache = pcache_alloc();
pcp = ps->ps_pcache;
old_nfds = ps->ps_nfds;
if (nfds != old_nfds) {
kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t));
pollfdp = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP);
ps->ps_pollfd = pollfdp;
ps->ps_nfds = nfds;
}
pollfdp = ps->ps_pollfd;
if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) {
error = EFAULT;
goto pollout;
}
if (fds == NULL) {
error = EINVAL;
goto pollout;
}
mutex_enter(&ps->ps_lock);
pcp = ps->ps_pcache;
ASSERT(pcp != NULL);
if (pcp->pc_bitmap == NULL) {
pcache_create(pcp, nfds);
error = pcacheset_cache_list(ps, fds, &fdcnt, cacheindex);
if (fdcnt || error) {
mutex_exit(&ps->ps_lock);
goto pollout;
}
} else {
pollcacheset_t *pcset = ps->ps_pcacheset;
for (cacheindex = 0; cacheindex < ps->ps_nsets; cacheindex++) {
if (pcset[cacheindex].pcs_usradr == (uintptr_t)fds) {
if ((++pcset[cacheindex].pcs_count) == 0) {
pcacheset_reset_count(ps, cacheindex);
}
error = pcacheset_resolve(ps, nfds, &fdcnt,
cacheindex);
if (error) {
mutex_exit(&ps->ps_lock);
goto pollout;
}
break;
}
if (pcset[cacheindex].pcs_usradr == (uintptr_t)NULL) {
error = pcacheset_cache_list(ps, fds, &fdcnt,
cacheindex);
if (fdcnt || error) {
mutex_exit(&ps->ps_lock);
goto pollout;
}
break;
}
}
if (cacheindex == ps->ps_nsets) {
pollstats.polllistmiss.value.ui64++;
cacheindex = pcacheset_replace(ps);
ASSERT(cacheindex < ps->ps_nsets);
pcset[cacheindex].pcs_usradr = (uintptr_t)fds;
error = pcacheset_resolve(ps, nfds, &fdcnt, cacheindex);
if (error) {
mutex_exit(&ps->ps_lock);
goto pollout;
}
}
}
mutex_enter(&pcp->pc_lock);
for (;;) {
pcp->pc_flag = 0;
error = pcache_poll(pollfdp, ps, nfds, &fdcnt, cacheindex);
if (fdcnt || error) {
mutex_exit(&pcp->pc_lock);
mutex_exit(&ps->ps_lock);
break;
}
if (pcp->pc_flag & PC_POLLWAKE)
continue;
mutex_exit(&ps->ps_lock);
if (deadline == 0) {
error = -1;
} else {
error = cv_timedwait_sig_hrtime(&pcp->pc_cv,
&pcp->pc_lock, deadline);
}
mutex_exit(&pcp->pc_lock);
if (error <= 0) {
error = (error == 0) ? EINTR : 0;
break;
}
mutex_enter(&ps->ps_lock);
mutex_enter(&pcp->pc_lock);
}
pollout:
if (ksetp != NULL) {
mutex_enter(&p->p_lock);
if (lwp->lwp_cursig == 0) {
t->t_hold = lwp->lwp_sigoldmask;
t->t_flag &= ~T_TOMASK;
}
mutex_exit(&p->p_lock);
}
if (error)
return (set_errno(error));
if (nfds != 0 &&
copyout(pollfdp, fds, nfds * sizeof (pollfd_t)))
return (set_errno(EFAULT));
#ifdef DEBUG
if (fdcnt) {
int reventcnt = 0;
for (i = 0; i < nfds; i++) {
if (pollfdp[i].fd < 0) {
ASSERT(pollfdp[i].revents == 0);
continue;
}
if (pollfdp[i].revents) {
reventcnt++;
}
}
ASSERT(fdcnt == reventcnt);
} else {
for (i = 0; i < nfds; i++) {
ASSERT(pollfdp[i].revents == 0);
}
}
#endif
return (fdcnt);
}
int
pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp)
{
timespec_t ts;
timespec_t *tsp;
sigset_t set;
k_sigset_t kset;
k_sigset_t *ksetp;
model_t datamodel = get_udatamodel();
if (timeoutp == NULL)
tsp = NULL;
else {
if (datamodel == DATAMODEL_NATIVE) {
if (copyin(timeoutp, &ts, sizeof (ts)))
return (set_errno(EFAULT));
} else {
timespec32_t ts32;
if (copyin(timeoutp, &ts32, sizeof (ts32)))
return (set_errno(EFAULT));
TIMESPEC32_TO_TIMESPEC(&ts, &ts32)
}
if (itimerspecfix(&ts))
return (set_errno(EINVAL));
tsp = &ts;
}
if (setp == NULL)
ksetp = NULL;
else {
if (copyin(setp, &set, sizeof (set)))
return (set_errno(EFAULT));
sigutok(&set, &kset);
ksetp = &kset;
}
return (poll_common(fds, nfds, tsp, ksetp));
}
void
pollcleanup()
{
pollstate_t *ps = curthread->t_pollstate;
pollcache_t *pcp;
if (ps == NULL)
return;
pcp = ps->ps_pcache;
if (pcp == NULL) {
goto pollcleanout;
}
if (pcp->pc_bitmap != NULL) {
ASSERT(MUTEX_NOT_HELD(&ps->ps_lock));
mutex_enter(&ps->ps_lock);
pcache_clean(pcp);
mutex_exit(&ps->ps_lock);
#ifdef DEBUG
checkfpollinfo();
pollcheckphlist();
#endif
}
mutex_enter(&pcp->pc_no_exit);
ASSERT(pcp->pc_busy >= 0);
while (pcp->pc_busy > 0)
cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit);
mutex_exit(&pcp->pc_no_exit);
pollcleanout:
pollstate_destroy(ps);
curthread->t_pollstate = NULL;
}
void
pollwakeup(pollhead_t *php, short events_arg)
{
polldat_t *pdp;
int events = (ushort_t)events_arg;
struct plist {
port_t *pp;
int pevents;
struct plist *next;
};
struct plist *plhead = NULL, *pltail = NULL;
retry:
PH_ENTER(php);
for (pdp = php->ph_list; pdp; pdp = pdp->pd_next) {
if ((pdp->pd_events & events) ||
(events & (POLLHUP | POLLERR))) {
pollcache_t *pcp;
if (pdp->pd_portev != NULL) {
port_kevent_t *pkevp = pdp->pd_portev;
ASSERT(pkevp->portkev_source == PORT_SOURCE_FD);
mutex_enter(&pkevp->portkev_lock);
if (pkevp->portkev_flags & PORT_KEV_VALID) {
int pevents;
pkevp->portkev_flags &= ~PORT_KEV_VALID;
pkevp->portkev_events |= events &
(pdp->pd_events | POLLHUP |
POLLERR);
port_send_event(pkevp);
pevents =
port_pollwkup(pkevp->portkev_port);
if (pevents) {
struct plist *t;
t = kmem_zalloc(
sizeof (struct plist),
KM_SLEEP);
t->pp = pkevp->portkev_port;
t->pevents = pevents;
if (plhead == NULL) {
plhead = t;
} else {
pltail->next = t;
}
pltail = t;
}
} else {
mutex_exit(&pkevp->portkev_lock);
}
continue;
}
pcp = pdp->pd_pcache;
if (mutex_tryenter(&pcp->pc_lock)) {
pollnotify(pcp, pdp->pd_fd);
mutex_exit(&pcp->pc_lock);
} else {
mutex_enter(&pcp->pc_no_exit);
pcp->pc_busy++;
mutex_exit(&pcp->pc_no_exit);
PH_EXIT(php);
mutex_enter(&pcp->pc_lock);
mutex_exit(&pcp->pc_lock);
mutex_enter(&pcp->pc_no_exit);
pcp->pc_busy--;
if (pcp->pc_busy == 0) {
cv_signal(&pcp->pc_busy_cv);
}
mutex_exit(&pcp->pc_no_exit);
goto retry;
}
}
}
if (plhead != NULL && &plhead->pp->port_pollhd == php) {
struct plist *t;
port_pollwkdone(plhead->pp);
t = plhead;
plhead = plhead->next;
kmem_free(t, sizeof (struct plist));
}
PH_EXIT(php);
if (plhead != NULL) {
php = &plhead->pp->port_pollhd;
events = plhead->pevents;
goto retry;
}
}
void
pollnotify(pollcache_t *pcp, int fd)
{
ASSERT(fd < pcp->pc_mapsize);
ASSERT(MUTEX_HELD(&pcp->pc_lock));
BT_SET(pcp->pc_bitmap, fd);
pcp->pc_flag |= PC_POLLWAKE;
cv_broadcast(&pcp->pc_cv);
pcache_wake_parents(pcp);
}
void
polldat_associate(polldat_t *pdp, pollhead_t *php)
{
ASSERT3P(pdp->pd_php, ==, NULL);
ASSERT3P(pdp->pd_next, ==, NULL);
PH_ENTER(php);
#ifdef DEBUG
for (polldat_t *wp = php->ph_list; wp != NULL; wp = wp->pd_next) {
ASSERT3P(wp, !=, pdp);
}
#endif
pdp->pd_next = php->ph_list;
php->ph_list = pdp;
pdp->pd_php = php;
PH_EXIT(php);
}
void
polldat_disassociate(polldat_t *pdp)
{
pollhead_t *php;
for (;;) {
php = pdp->pd_php;
if (php == NULL) {
return;
}
PH_ENTER(php);
if (pdp->pd_php == php) {
break;
}
PH_EXIT(php);
}
polldat_t **wpp = &php->ph_list, *wp = php->ph_list;
while (wp != NULL) {
if (wp == pdp) {
*wpp = pdp->pd_next;
pdp->pd_next = NULL;
break;
}
wpp = &wp->pd_next;
wp = wp->pd_next;
}
#ifdef DEBUG
ASSERT(wp != NULL);
for (wp = *wpp; wp; wp = wp->pd_next) {
ASSERT(wp != pdp);
}
#endif
pdp->pd_php = NULL;
PH_EXIT(php);
}
int
pcacheset_cmp(pollfd_t *current, pollfd_t *cached, pollfd_t *newlist, int n)
{
int ix;
for (ix = 0; ix < n; ix++) {
if ((ix & 0x7) == 0) {
prefetch_write_many((caddr_t)¤t[ix + 8]);
prefetch_write_many((caddr_t)&cached[ix + 8]);
}
if (current[ix].fd == cached[ix].fd) {
if (current[ix].events & ~VALID_POLL_EVENTS) {
current[ix].events &= VALID_POLL_EVENTS;
if (newlist != NULL)
newlist[ix].events = current[ix].events;
}
if (current[ix].events == cached[ix].events) {
current[ix].revents = 0;
continue;
}
}
if ((current[ix].fd < 0) && (cached[ix].fd < 0)) {
current[ix].revents = 0;
continue;
}
return (ix);
}
return (ix);
}
polldat_t *
pcache_lookup_fd(pollcache_t *pcp, int fd)
{
int hashindex;
polldat_t *pdp;
hashindex = POLLHASH(pcp->pc_hashsize, fd);
pdp = pcp->pc_hash[hashindex];
while (pdp != NULL) {
if (pdp->pd_fd == fd)
break;
pdp = pdp->pd_hashnext;
}
return (pdp);
}
polldat_t *
pcache_alloc_fd(int nsets)
{
polldat_t *pdp;
pdp = kmem_zalloc(sizeof (polldat_t), KM_SLEEP);
if (nsets > 0) {
pdp->pd_ref = kmem_zalloc(sizeof (xref_t) * nsets, KM_SLEEP);
pdp->pd_nsets = nsets;
}
return (pdp);
}
void
pcache_insert_fd(pollcache_t *pcp, polldat_t *pdp, nfds_t nfds)
{
int hashindex;
int fd;
if ((pcp->pc_fdcount > pcp->pc_hashsize * POLLHASHTHRESHOLD) ||
(nfds > pcp->pc_hashsize * POLLHASHTHRESHOLD)) {
pcache_grow_hashtbl(pcp, nfds);
}
fd = pdp->pd_fd;
hashindex = POLLHASH(pcp->pc_hashsize, fd);
pdp->pd_hashnext = pcp->pc_hash[hashindex];
pcp->pc_hash[hashindex] = pdp;
pcp->pc_fdcount++;
#ifdef DEBUG
{
polldat_t *pdp1;
for (pdp1 = pdp->pd_hashnext; pdp1; pdp1 = pdp1->pd_hashnext) {
ASSERT(pdp->pd_fd != pdp1->pd_fd);
}
}
#endif
}
void
pcache_grow_hashtbl(pollcache_t *pcp, nfds_t nfds)
{
int oldsize;
polldat_t **oldtbl;
polldat_t *pdp, *pdp1;
int i;
#ifdef DEBUG
int count = 0;
#endif
ASSERT(pcp->pc_hashsize % POLLHASHCHUNKSZ == 0);
oldsize = pcp->pc_hashsize;
oldtbl = pcp->pc_hash;
if (nfds > pcp->pc_hashsize * POLLHASHINC) {
pcp->pc_hashsize = (nfds + POLLHASHCHUNKSZ - 1) &
~(POLLHASHCHUNKSZ - 1);
} else {
pcp->pc_hashsize = pcp->pc_hashsize * POLLHASHINC;
}
pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (polldat_t *),
KM_SLEEP);
pcp->pc_fdcount = 0;
for (i = 0; i < oldsize; i++) {
pdp = oldtbl[i];
while (pdp != NULL) {
pdp1 = pdp->pd_hashnext;
pcache_insert_fd(pcp, pdp, nfds);
pdp = pdp1;
#ifdef DEBUG
count++;
#endif
}
}
kmem_free(oldtbl, oldsize * sizeof (polldat_t *));
ASSERT(pcp->pc_fdcount == count);
}
void
pcache_grow_map(pollcache_t *pcp, int fd)
{
int newsize;
ulong_t *newmap;
newsize = (fd + POLLMAPCHUNK) & ~(POLLMAPCHUNK - 1);
newmap = kmem_zalloc((newsize / BT_NBIPUL) * sizeof (ulong_t),
KM_SLEEP);
ASSERT(mutex_owned(&pcp->pc_lock) == 0);
mutex_enter(&pcp->pc_lock);
bcopy(pcp->pc_bitmap, newmap,
(pcp->pc_mapsize / BT_NBIPUL) * sizeof (ulong_t));
kmem_free(pcp->pc_bitmap,
(pcp->pc_mapsize /BT_NBIPUL) * sizeof (ulong_t));
pcp->pc_bitmap = newmap;
pcp->pc_mapsize = newsize;
mutex_exit(&pcp->pc_lock);
}
void
pcache_clean(pollcache_t *pcp)
{
int i;
polldat_t **hashtbl;
polldat_t *pdp;
ASSERT(MUTEX_HELD(&curthread->t_pollstate->ps_lock));
hashtbl = pcp->pc_hash;
for (i = 0; i < pcp->pc_hashsize; i++) {
for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) {
polldat_disassociate(pdp);
if (pdp->pd_fp != NULL) {
delfpollinfo(pdp->pd_fd);
pdp->pd_fp = NULL;
}
}
}
}
void
pcacheset_invalidate(pollstate_t *ps, polldat_t *pdp)
{
int i;
int fd = pdp->pd_fd;
ASSERT(pdp->pd_fp == NULL);
ASSERT(MUTEX_HELD(&ps->ps_lock));
pdp->pd_events = 0;
for (i = 0; i < ps->ps_nsets; i++) {
xref_t *refp;
pollcacheset_t *pcsp;
ASSERT(pdp->pd_ref != NULL);
refp = &pdp->pd_ref[i];
if (refp->xf_refcnt) {
ASSERT(refp->xf_position >= 0);
pcsp = &ps->ps_pcacheset[i];
if (refp->xf_refcnt == 1) {
pcsp->pcs_pollfd[refp->xf_position].fd = -1;
refp->xf_refcnt = 0;
pdp->pd_count--;
} else if (refp->xf_refcnt > 1) {
int j;
for (j = refp->xf_position;
j < pcsp->pcs_nfds; j++) {
if (pcsp->pcs_pollfd[j].fd == fd) {
pcsp->pcs_pollfd[j].fd = -1;
refp->xf_refcnt--;
pdp->pd_count--;
}
}
}
ASSERT(refp->xf_refcnt == 0);
refp->xf_position = POLLPOSINVAL;
}
}
ASSERT(pdp->pd_count == 0);
}
int
pcache_insert(pollstate_t *ps, file_t *fp, pollfd_t *pollfdp, int *fdcntp,
ssize_t pos, int which)
{
pollcache_t *pcp = ps->ps_pcache;
polldat_t *pdp;
int error;
int fd;
pollhead_t *memphp = NULL;
xref_t *refp;
int newpollfd = 0;
ASSERT(MUTEX_HELD(&ps->ps_lock));
ASSERT(curthread->t_pollcache == NULL);
error = VOP_POLL(fp->f_vnode, pollfdp->events, 0, &pollfdp->revents,
&memphp, NULL);
if (error) {
return (error);
}
if (pollfdp->revents) {
(*fdcntp)++;
}
fd = pollfdp->fd;
pdp = pcache_lookup_fd(pcp, fd);
if (pdp == NULL) {
ASSERT(ps->ps_nsets > 0);
pdp = pcache_alloc_fd(ps->ps_nsets);
newpollfd = 1;
}
if ((pdp->pd_count > 0) && (pdp->pd_fp == NULL)) {
pcacheset_invalidate(ps, pdp);
ASSERT(pdp->pd_next == NULL);
}
if (pdp->pd_count == 0) {
pdp->pd_fd = fd;
pdp->pd_fp = fp;
addfpollinfo(fd);
pdp->pd_thread = curthread;
pdp->pd_pcache = pcp;
ASSERT(pdp->pd_next == NULL);
}
ASSERT(infpollinfo(fd));
ASSERT(pdp->pd_fp == fp);
pdp->pd_events |= pollfdp->events;
pdp->pd_count++;
ASSERT(pdp->pd_ref != NULL);
refp = &pdp->pd_ref[which];
if (refp->xf_refcnt == 0) {
refp->xf_position = pos;
} else {
if (pos < refp->xf_position) {
refp->xf_position = pos;
}
}
ASSERT(pollfdp->fd == ps->ps_pollfd[refp->xf_position].fd);
refp->xf_refcnt++;
if (fd >= pcp->pc_mapsize) {
pcache_grow_map(pcp, fd);
}
if (fd > pcp->pc_mapend) {
pcp->pc_mapend = fd;
}
if (newpollfd != 0) {
pcache_insert_fd(ps->ps_pcache, pdp, ps->ps_nfds);
}
if (memphp) {
if (pdp->pd_php == NULL) {
polldat_associate(pdp, memphp);
} else {
if (memphp != pdp->pd_php) {
polldat_disassociate(pdp);
polldat_associate(pdp, memphp);
}
}
}
mutex_enter(&pcp->pc_lock);
BT_SET(pcp->pc_bitmap, fd);
mutex_exit(&pcp->pc_lock);
return (0);
}
int
pcache_delete_fd(pollstate_t *ps, int fd, size_t pos, int which, uint_t cevent)
{
pollcache_t *pcp = ps->ps_pcache;
polldat_t *pdp;
xref_t *refp;
ASSERT(fd < pcp->pc_mapsize);
ASSERT(MUTEX_HELD(&ps->ps_lock));
pdp = pcache_lookup_fd(pcp, fd);
ASSERT(pdp != NULL);
ASSERT(pdp->pd_count > 0);
ASSERT(pdp->pd_ref != NULL);
refp = &pdp->pd_ref[which];
if (pdp->pd_count == 1) {
pdp->pd_events = 0;
refp->xf_position = POLLPOSINVAL;
ASSERT(refp->xf_refcnt == 1);
refp->xf_refcnt = 0;
polldat_disassociate(pdp);
pdp->pd_count = 0;
if (pdp->pd_fp != NULL) {
pdp->pd_fp = NULL;
delfpollinfo(fd);
}
mutex_enter(&pcp->pc_lock);
BT_CLEAR(pcp->pc_bitmap, fd);
mutex_exit(&pcp->pc_lock);
return (0);
}
if ((cevent & POLLCLOSED) == POLLCLOSED) {
pcacheset_invalidate(ps, pdp);
ASSERT(pdp->pd_php == NULL);
mutex_enter(&pcp->pc_lock);
BT_CLEAR(pcp->pc_bitmap, fd);
mutex_exit(&pcp->pc_lock);
return (0);
}
#ifdef DEBUG
if (getf(fd) != NULL) {
ASSERT(infpollinfo(fd));
releasef(fd);
}
#endif
pdp->pd_count--;
ASSERT(refp->xf_refcnt > 0);
if (--refp->xf_refcnt == 0) {
refp->xf_position = POLLPOSINVAL;
} else {
ASSERT(pos >= refp->xf_position);
if (pos == refp->xf_position) {
refp->xf_position = POLLPOSTRANS;
return (1);
}
}
return (0);
}
void
pcache_update_xref(pollcache_t *pcp, int fd, ssize_t pos, int which)
{
polldat_t *pdp;
pdp = pcache_lookup_fd(pcp, fd);
ASSERT(pdp != NULL);
ASSERT(pdp->pd_ref != NULL);
pdp->pd_ref[which].xf_position = pos;
}
#ifdef DEBUG
static int
pollchecksanity(pollstate_t *ps, nfds_t nfds)
{
int i;
int fd;
pollcache_t *pcp = ps->ps_pcache;
polldat_t *pdp;
pollfd_t *pollfdp = ps->ps_pollfd;
file_t *fp;
ASSERT(MUTEX_HELD(&ps->ps_lock));
for (i = 0; i < nfds; i++) {
fd = pollfdp[i].fd;
if (fd < 0) {
ASSERT(pollfdp[i].revents == 0);
continue;
}
if (pollfdp[i].revents == POLLNVAL)
continue;
if ((fp = getf(fd)) == NULL)
continue;
pdp = pcache_lookup_fd(pcp, fd);
ASSERT(pdp != NULL);
ASSERT(infpollinfo(fd));
ASSERT(pdp->pd_fp == fp);
releasef(fd);
if (BT_TEST(pcp->pc_bitmap, fd))
continue;
if (pdp->pd_php == NULL)
return (0);
}
return (1);
}
#endif
int
pcacheset_resolve(pollstate_t *ps, nfds_t nfds, int *fdcntp, int which)
{
int i;
pollcache_t *pcp = ps->ps_pcache;
pollfd_t *newlist = NULL;
pollfd_t *current = ps->ps_pollfd;
pollfd_t *cached;
pollcacheset_t *pcsp;
int common;
int count = 0;
int offset;
int remain;
int fd;
file_t *fp;
int fdcnt = 0;
int cnt = 0;
nfds_t old_nfds;
int error = 0;
int mismatch = 0;
ASSERT(MUTEX_HELD(&ps->ps_lock));
#ifdef DEBUG
checkpolldat(ps);
#endif
pcsp = &ps->ps_pcacheset[which];
old_nfds = pcsp->pcs_nfds;
common = (nfds > old_nfds) ? old_nfds : nfds;
if (nfds != old_nfds) {
newlist = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP);
bcopy(current, newlist, sizeof (pollfd_t) * nfds);
}
cached = pcsp->pcs_pollfd;
remain = common;
while (count < common) {
int tmpfd;
pollfd_t *np;
np = (newlist != NULL) ? &newlist[count] : NULL;
offset = pcacheset_cmp(¤t[count], &cached[count], np,
remain);
if ((count == 0) && (offset == common)) {
pollstats.pollcachehit.value.ui64++;
} else {
mismatch++;
}
count += offset;
if (offset < remain) {
ASSERT(count < common);
ASSERT((current[count].fd != cached[count].fd) ||
(current[count].events != cached[count].events));
if (current[count].events & ~VALID_POLL_EVENTS) {
if (newlist != NULL) {
newlist[count].events =
current[count].events &=
VALID_POLL_EVENTS;
} else {
current[count].events &=
VALID_POLL_EVENTS;
}
}
if (cached[count].fd >= 0) {
tmpfd = cached[count].fd;
if (pcache_delete_fd(ps, tmpfd, count, which,
(uint_t)cached[count].events)) {
for (i = count + 1; i < old_nfds; i++) {
if (cached[i].fd == tmpfd) {
pcache_update_xref(pcp,
tmpfd, (ssize_t)i,
which);
break;
}
}
ASSERT(i <= old_nfds);
}
cached[count].fd = -1;
if (newlist != NULL) {
newlist[count].fd = -1;
}
}
if ((tmpfd = current[count].fd) >= 0) {
if ((fp = getf(tmpfd)) == NULL) {
current[count].revents = POLLNVAL;
if (newlist != NULL) {
newlist[count].fd = -1;
}
cached[count].fd = -1;
fdcnt++;
} else {
error = pcache_insert(ps, fp,
¤t[count], &cnt,
(ssize_t)count, which);
if (error) {
releasef(tmpfd);
if (newlist != NULL) {
kmem_free(newlist,
nfds *
sizeof (pollfd_t));
}
return (error);
}
if (newlist != NULL) {
newlist[count].fd =
current[count].fd;
newlist[count].events =
current[count].events;
}
cached[count].fd = current[count].fd;
cached[count].events =
current[count].events;
releasef(tmpfd);
}
} else {
current[count].revents = 0;
}
count++;
remain = common - count;
}
}
if (mismatch != 0) {
if (mismatch == common) {
pollstats.pollcachemiss.value.ui64++;
} else {
pollstats.pollcachephit.value.ui64++;
}
}
if (nfds > old_nfds) {
ASSERT(newlist != NULL);
for (i = old_nfds; i < nfds; i++) {
if (current[i].events & ~VALID_POLL_EVENTS) {
newlist[i].events = current[i].events =
current[i].events & VALID_POLL_EVENTS;
}
if ((fd = current[i].fd) < 0) {
current[i].revents = 0;
continue;
}
if ((fp = getf(fd)) == NULL) {
current[i].revents = POLLNVAL;
newlist[i].fd = -1;
fdcnt++;
continue;
}
error = pcache_insert(ps, fp, ¤t[i], &cnt,
(ssize_t)i, which);
releasef(fd);
if (error) {
pcacheset_remove_list(ps, current, old_nfds,
i, which, 0);
kmem_free(newlist, nfds * sizeof (pollfd_t));
return (error);
}
}
}
if (old_nfds > nfds) {
pcacheset_remove_list(ps, pcsp->pcs_pollfd, nfds, old_nfds,
which, 1);
}
if (newlist != NULL) {
kmem_free(pcsp->pcs_pollfd, old_nfds * sizeof (pollfd_t));
pcsp->pcs_pollfd = newlist;
pcsp->pcs_nfds = nfds;
}
ASSERT(*fdcntp == 0);
*fdcntp = fdcnt;
ASSERT(pollchecksanity(ps, nfds));
ASSERT(pollcheckxref(ps, which));
#ifdef DEBUG
checkpolldat(ps);
#endif
return (0);
}
#ifdef DEBUG
static int
pollscanrevents(pollcache_t *pcp, pollfd_t *pollfdp, nfds_t nfds)
{
int i;
int reventcnt = 0;
for (i = 0; i < nfds; i++) {
if (pollfdp[i].fd < 0) {
ASSERT(pollfdp[i].revents == 0);
continue;
}
if (pollfdp[i].revents) {
reventcnt++;
}
if (pollfdp[i].revents && (pollfdp[i].revents != POLLNVAL)) {
ASSERT(BT_TEST(pcp->pc_bitmap, pollfdp[i].fd));
}
}
return (reventcnt);
}
#endif
int
pcache_poll(pollfd_t *pollfdp, pollstate_t *ps, nfds_t nfds, int *fdcntp,
int which)
{
int i;
pollcache_t *pcp;
int fd;
int begin, end, done;
pollhead_t *php;
int fdcnt;
int error = 0;
file_t *fp;
polldat_t *pdp;
xref_t *refp;
int entry;
pcp = ps->ps_pcache;
ASSERT(MUTEX_HELD(&ps->ps_lock));
ASSERT(MUTEX_HELD(&pcp->pc_lock));
retry:
done = 0;
begin = 0;
fdcnt = 0;
end = pcp->pc_mapend;
while ((fdcnt < nfds) && !done) {
php = NULL;
fd = bt_getlowbit(pcp->pc_bitmap, begin, end);
ASSERT(fd <= end);
if (fd >= 0) {
ASSERT(pollcheckrevents(ps, begin, fd, which));
if (fd == end) {
done = 1;
} else {
begin = fd + 1;
}
pdp = pcache_lookup_fd(pcp, fd);
ASSERT(pdp != NULL);
ASSERT(pdp->pd_ref != NULL);
refp = &pdp->pd_ref[which];
if (refp->xf_refcnt == 0)
continue;
entry = refp->xf_position;
ASSERT((entry >= 0) && (entry < nfds));
ASSERT(pollfdp[entry].fd == fd);
if (pdp->pd_fp == NULL) {
ASSERT(pdp->pd_count > 0);
pollfdp[entry].revents = POLLNVAL;
fdcnt++;
if (refp->xf_refcnt > 1) {
for (i = entry + 1; i < nfds; i++) {
if (pollfdp[i].fd == fd) {
pollfdp[i].revents =
POLLNVAL;
fdcnt++;
}
}
}
pcacheset_invalidate(ps, pdp);
continue;
}
if ((fp = getf(fd)) == NULL) {
pollfdp[entry].revents = POLLNVAL;
fdcnt++;
if (refp->xf_refcnt > 1) {
for (i = entry + 1; i < nfds; i++) {
if (pollfdp[i].fd == fd) {
pollfdp[i].revents =
POLLNVAL;
fdcnt++;
}
}
}
continue;
}
ASSERT(pdp->pd_fp == fp);
ASSERT(infpollinfo(fd));
ASSERT(pdp->pd_php == NULL ||
MUTEX_NOT_HELD(PHLOCK(pdp->pd_php)));
ASSERT(curthread->t_pollcache == NULL);
error = VOP_POLL(fp->f_vnode, pollfdp[entry].events, 0,
&pollfdp[entry].revents, &php, NULL);
if (error) {
releasef(fd);
break;
}
if (php != NULL && pdp->pd_php != NULL &&
php != pdp->pd_php) {
releasef(fd);
polldat_disassociate(pdp);
polldat_associate(pdp, php);
BT_SET(pcp->pc_bitmap, fd);
goto retry;
}
if (pollfdp[entry].revents) {
ASSERT(refp->xf_refcnt >= 1);
fdcnt++;
if (refp->xf_refcnt > 1) {
error = plist_chkdupfd(fp, pdp, ps,
pollfdp, entry, &fdcnt);
if (error > 0) {
releasef(fd);
break;
}
if (error < 0) {
goto retry;
}
}
releasef(fd);
} else {
if ((pdp->pd_php != NULL) &&
(pollfdp[entry].events == pdp->pd_events) &&
((pcp->pc_flag & PC_POLLWAKE) == 0)) {
BT_CLEAR(pcp->pc_bitmap, fd);
}
if ((pdp->pd_php == NULL) && (php != NULL)) {
polldat_associate(pdp, php);
releasef(fd);
goto retry;
}
if (refp->xf_refcnt > 1) {
error = plist_chkdupfd(fp, pdp, ps,
pollfdp, entry, &fdcnt);
if (error > 0) {
releasef(fd);
break;
}
if (error < 0) {
goto retry;
}
}
releasef(fd);
}
} else {
done = 1;
ASSERT(pollcheckrevents(ps, begin, end + 1, which));
}
}
if (!error) {
ASSERT(*fdcntp + fdcnt == pollscanrevents(pcp, pollfdp, nfds));
*fdcntp += fdcnt;
}
return (error);
}
int
pcacheset_cache_list(pollstate_t *ps, pollfd_t *fds, int *fdcntp, int which)
{
pollfd_t *pollfdp = ps->ps_pollfd;
pollcacheset_t *pcacheset = ps->ps_pcacheset;
pollfd_t *newfdlist;
int i;
int fd;
file_t *fp;
int error = 0;
ASSERT(MUTEX_HELD(&ps->ps_lock));
ASSERT(which < ps->ps_nsets);
ASSERT(pcacheset != NULL);
ASSERT(pcacheset[which].pcs_pollfd == NULL);
newfdlist = kmem_alloc(ps->ps_nfds * sizeof (pollfd_t), KM_SLEEP);
bcopy(pollfdp, newfdlist, sizeof (pollfd_t) * ps->ps_nfds);
pcacheset[which].pcs_pollfd = newfdlist;
pcacheset[which].pcs_nfds = ps->ps_nfds;
pcacheset[which].pcs_usradr = (uintptr_t)fds;
for (i = 0; i < ps->ps_nfds; i++) {
fd = pollfdp[i].fd;
if (pollfdp[i].events & ~VALID_POLL_EVENTS) {
newfdlist[i].events = pollfdp[i].events =
pollfdp[i].events & VALID_POLL_EVENTS;
}
if (fd < 0) {
pollfdp[i].revents = 0;
continue;
}
if ((fp = getf(fd)) == NULL) {
pollfdp[i].revents = POLLNVAL;
newfdlist[i].fd = -1;
(*fdcntp)++;
continue;
}
error = pcache_insert(ps, fp, &pollfdp[i], fdcntp, (ssize_t)i,
which);
releasef(fd);
if (error) {
pcacheset_remove_list(ps, pollfdp, 0, i, which, 0);
kmem_free(newfdlist, ps->ps_nfds * sizeof (pollfd_t));
pcacheset[which].pcs_pollfd = NULL;
pcacheset[which].pcs_usradr = (uintptr_t)NULL;
break;
}
}
return (error);
}
void
pcache_clean_entry(pollstate_t *ps, int fd)
{
pollcache_t *pcp;
polldat_t *pdp;
int i;
ASSERT(ps != NULL);
ASSERT(MUTEX_HELD(&ps->ps_lock));
pcp = ps->ps_pcache;
ASSERT(pcp);
pdp = pcache_lookup_fd(pcp, fd);
ASSERT(pdp != NULL);
pdp->pd_fp = NULL;
ASSERT(pdp->pd_ref != NULL);
for (i = 0; i < ps->ps_nsets; i++) {
xref_t *refp;
pollcacheset_t *pcsp;
refp = &pdp->pd_ref[i];
if (refp->xf_refcnt) {
ASSERT(refp->xf_position >= 0);
pcsp = &ps->ps_pcacheset[i];
if (refp->xf_refcnt == 1) {
pcsp->pcs_pollfd[refp->xf_position].events =
(short)POLLCLOSED;
}
if (refp->xf_refcnt > 1) {
int j;
for (j = refp->xf_position;
j < pcsp->pcs_nfds; j++) {
if (pcsp->pcs_pollfd[j].fd == fd) {
pcsp->pcs_pollfd[j].events =
(short)POLLCLOSED;
}
}
}
}
}
if (pdp->pd_php) {
pollwakeup(pdp->pd_php, POLLHUP);
polldat_disassociate(pdp);
}
}
void
pcache_wake_parents(pollcache_t *pcp)
{
pcachelink_t *pl, *pln;
ASSERT(MUTEX_HELD(&pcp->pc_lock));
for (pl = pcp->pc_parents; pl != NULL; pl = pln) {
mutex_enter(&pl->pcl_lock);
if (pl->pcl_state == PCL_VALID) {
ASSERT(pl->pcl_parent_pc != NULL);
cv_broadcast(&pl->pcl_parent_pc->pc_cv);
}
pln = pl->pcl_parent_next;
mutex_exit(&pl->pcl_lock);
}
}
pollstate_t *
pollstate_create()
{
pollstate_t *ps = curthread->t_pollstate;
if (ps == NULL) {
ps = kmem_zalloc(sizeof (pollstate_t), KM_SLEEP);
ps->ps_nsets = POLLFDSETS;
ps->ps_pcacheset = pcacheset_create(ps->ps_nsets);
curthread->t_pollstate = ps;
} else {
ASSERT(ps->ps_depth == 0);
ASSERT(ps->ps_flags == 0);
ASSERT(ps->ps_pc_stack[0] == 0);
}
return (ps);
}
void
pollstate_destroy(pollstate_t *ps)
{
if (ps->ps_pollfd != NULL) {
kmem_free(ps->ps_pollfd, ps->ps_nfds * sizeof (pollfd_t));
ps->ps_pollfd = NULL;
}
if (ps->ps_pcache != NULL) {
pcache_destroy(ps->ps_pcache);
ps->ps_pcache = NULL;
}
pcacheset_destroy(ps->ps_pcacheset, ps->ps_nsets);
ps->ps_pcacheset = NULL;
if (ps->ps_dpbuf != NULL) {
kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize);
ps->ps_dpbuf = NULL;
}
mutex_destroy(&ps->ps_lock);
kmem_free(ps, sizeof (pollstate_t));
}
static int
pollstate_contend(pollstate_t *ps, pollcache_t *pcp)
{
pollstate_t *rem, *next;
pollcache_t *desired_pc;
int result = 0, depth_total;
mutex_enter(&pollstate_contenders_lock);
if (mutex_tryenter(&pcp->pc_lock) != 0) {
goto out;
}
depth_total = ps->ps_depth;
desired_pc = pcp;
for (rem = pollstate_contenders; rem != NULL; rem = next) {
int i, j;
next = rem->ps_contend_nextp;
for (i = 0; i < rem->ps_depth; i++) {
if (rem->ps_pc_stack[i] != desired_pc) {
continue;
}
ASSERT(rem->ps_contend_pc != NULL);
for (j = 0; j < ps->ps_depth; j++) {
if (rem->ps_contend_pc == ps->ps_pc_stack[j]) {
rem->ps_flags |= POLLSTATE_STALEMATE;
result = -1;
goto out;
}
}
desired_pc = rem->ps_contend_pc;
next = pollstate_contenders;
depth_total += (rem->ps_depth - i);
if (depth_total >= POLLMAXDEPTH) {
result = -1;
goto out;
}
}
}
ps->ps_contend_pc = pcp;
ps->ps_contend_nextp = pollstate_contenders;
ps->ps_contend_pnextp = &pollstate_contenders;
if (pollstate_contenders != NULL) {
pollstate_contenders->ps_contend_pnextp =
&ps->ps_contend_nextp;
}
pollstate_contenders = ps;
mutex_exit(&pollstate_contenders_lock);
mutex_enter(&pcp->pc_lock);
mutex_enter(&pollstate_contenders_lock);
if ((ps->ps_flags & POLLSTATE_STALEMATE) != 0) {
result = -1;
ps->ps_flags &= ~POLLSTATE_STALEMATE;
mutex_exit(&pcp->pc_lock);
}
if (ps->ps_contend_nextp != NULL) {
ps->ps_contend_nextp->ps_contend_pnextp =
ps->ps_contend_pnextp;
}
*ps->ps_contend_pnextp = ps->ps_contend_nextp;
ps->ps_contend_pc = NULL;
ps->ps_contend_nextp = NULL;
ps->ps_contend_pnextp = NULL;
out:
mutex_exit(&pollstate_contenders_lock);
return (result);
}
int
pollstate_enter(pollcache_t *pcp)
{
pollstate_t *ps = curthread->t_pollstate;
int i;
if (ps == NULL) {
return (PSE_FAIL_POLLSTATE);
}
if (ps->ps_depth >= POLLMAXDEPTH) {
return (PSE_FAIL_DEPTH);
}
for (i = 0; i < ps->ps_depth; i++) {
if (ps->ps_pc_stack[i] == pcp) {
return (PSE_FAIL_LOOP);
}
}
ASSERT(ps->ps_pc_stack[i] == NULL);
if (ps->ps_depth == 0) {
mutex_enter(&pcp->pc_lock);
} else if (mutex_tryenter(&pcp->pc_lock) == 0) {
if (pollstate_contend(ps, pcp) != 0) {
return (PSE_FAIL_DEADLOCK);
}
}
ps->ps_pc_stack[ps->ps_depth++] = pcp;
return (PSE_SUCCESS);
}
void
pollstate_exit(pollcache_t *pcp)
{
pollstate_t *ps = curthread->t_pollstate;
VERIFY(ps != NULL);
VERIFY(ps->ps_pc_stack[ps->ps_depth - 1] == pcp);
mutex_exit(&pcp->pc_lock);
ps->ps_pc_stack[--ps->ps_depth] = NULL;
VERIFY(ps->ps_depth >= 0);
}
void
pollblockexit(fpollinfo_t *fpip)
{
for (; fpip; fpip = fpip->fp_next) {
pollcache_t *pcp = fpip->fp_thread->t_pollstate->ps_pcache;
mutex_enter(&pcp->pc_no_exit);
pcp->pc_busy++;
mutex_exit(&pcp->pc_no_exit);
}
}
void
pollcacheclean(fpollinfo_t *fip, int fd)
{
struct fpollinfo *fpip, *fpip2;
fpip = fip;
while (fpip) {
pollstate_t *ps = fpip->fp_thread->t_pollstate;
pollcache_t *pcp = ps->ps_pcache;
mutex_enter(&ps->ps_lock);
pcache_clean_entry(ps, fd);
mutex_exit(&ps->ps_lock);
mutex_enter(&pcp->pc_no_exit);
pcp->pc_busy--;
if (pcp->pc_busy == 0) {
cv_signal(&pcp->pc_busy_cv);
}
mutex_exit(&pcp->pc_no_exit);
fpip2 = fpip;
fpip = fpip->fp_next;
kmem_free(fpip2, sizeof (fpollinfo_t));
}
}
void
pcacheset_reset_count(pollstate_t *ps, int index)
{
int i;
ASSERT(MUTEX_HELD(&ps->ps_lock));
for (i = 0; i < ps->ps_nsets; i++) {
if (ps->ps_pcacheset[i].pcs_pollfd != NULL) {
ps->ps_pcacheset[i].pcs_count = 0;
}
}
ps->ps_pcacheset[index].pcs_count = 1;
}
int
pcacheset_replace(pollstate_t *ps)
{
int i;
int index = 0;
ASSERT(MUTEX_HELD(&ps->ps_lock));
for (i = 1; i < ps->ps_nsets; i++) {
if (ps->ps_pcacheset[index].pcs_count >
ps->ps_pcacheset[i].pcs_count) {
index = i;
}
}
ps->ps_pcacheset[index].pcs_count = 0;
return (index);
}
void
pollhead_clean(pollhead_t *php)
{
polldat_t *pdp;
PH_ENTER(php);
while (php->ph_list != NULL) {
pollstate_t *ps;
pollcache_t *pcp;
pdp = php->ph_list;
ASSERT(pdp->pd_php == php);
if (pdp->pd_thread == NULL) {
php->ph_list = pdp->pd_next;
pdp->pd_php = NULL;
pdp->pd_next = NULL;
continue;
}
ps = pdp->pd_thread->t_pollstate;
ASSERT(ps != NULL);
pcp = pdp->pd_pcache;
ASSERT(pcp != NULL);
mutex_enter(&pcp->pc_no_exit);
pcp->pc_busy++;
mutex_exit(&pcp->pc_no_exit);
PH_EXIT(php);
mutex_enter(&ps->ps_lock);
PH_ENTER(php);
if (pdp->pd_php == php) {
ASSERT(pdp == php->ph_list);
php->ph_list = pdp->pd_next;
pdp->pd_php = NULL;
pdp->pd_next = NULL;
}
PH_EXIT(php);
mutex_exit(&ps->ps_lock);
mutex_enter(&pcp->pc_no_exit);
pcp->pc_busy--;
if (pcp->pc_busy == 0) {
cv_signal(&pcp->pc_busy_cv);
}
mutex_exit(&pcp->pc_no_exit);
PH_ENTER(php);
}
PH_EXIT(php);
}
void
pcacheset_remove_list(pollstate_t *ps, pollfd_t *pollfdp, int start, int end,
int cacheindex, int flag)
{
int i;
ASSERT(MUTEX_HELD(&ps->ps_lock));
for (i = start; i < end; i++) {
if ((pollfdp[i].fd >= 0) &&
(flag || !(pollfdp[i].revents & POLLNVAL))) {
if (pcache_delete_fd(ps, pollfdp[i].fd, i, cacheindex,
(uint_t)pollfdp[i].events)) {
int j;
int fd = pollfdp[i].fd;
for (j = i + 1; j < end; j++) {
if (pollfdp[j].fd == fd) {
pcache_update_xref(
ps->ps_pcache, fd,
(ssize_t)j, cacheindex);
break;
}
}
ASSERT(j <= end);
}
}
}
}
#ifdef DEBUG
#include<sys/strsubr.h>
static void
pollcheckphlist()
{
int i;
file_t *fp;
uf_entry_t *ufp;
uf_info_t *fip = P_FINFO(curproc);
struct stdata *stp;
polldat_t *pdp;
mutex_enter(&fip->fi_lock);
for (i = 0; i < fip->fi_nfiles; i++) {
UF_ENTER(ufp, fip, i);
if ((fp = ufp->uf_file) != NULL) {
if ((stp = fp->f_vnode->v_stream) != NULL) {
PH_ENTER(&stp->sd_pollist);
pdp = stp->sd_pollist.ph_list;
while (pdp) {
ASSERT(pdp->pd_thread != curthread);
pdp = pdp->pd_next;
}
PH_EXIT(&stp->sd_pollist);
}
}
UF_EXIT(ufp);
}
mutex_exit(&fip->fi_lock);
}
static int
pollcheckxref(pollstate_t *ps, int cacheindex)
{
pollfd_t *pollfdp = ps->ps_pcacheset[cacheindex].pcs_pollfd;
pollcache_t *pcp = ps->ps_pcache;
polldat_t *pdp;
int i;
xref_t *refp;
for (i = 0; i < ps->ps_pcacheset[cacheindex].pcs_nfds; i++) {
if (pollfdp[i].fd < 0) {
continue;
}
pdp = pcache_lookup_fd(pcp, pollfdp[i].fd);
ASSERT(pdp != NULL);
ASSERT(pdp->pd_ref != NULL);
refp = &pdp->pd_ref[cacheindex];
if (refp->xf_position >= 0) {
ASSERT(refp->xf_refcnt >= 1);
ASSERT(pollfdp[refp->xf_position].fd == pdp->pd_fd);
if (refp->xf_refcnt > 1) {
int j;
int count = 0;
for (j = refp->xf_position;
j < ps->ps_pcacheset[cacheindex].pcs_nfds;
j++) {
if (pollfdp[j].fd == pdp->pd_fd) {
count++;
}
}
ASSERT(count == refp->xf_refcnt);
}
}
}
return (1);
}
static void
checkpolldat(pollstate_t *ps)
{
pollcache_t *pcp = ps->ps_pcache;
polldat_t **hashtbl;
int i;
hashtbl = pcp->pc_hash;
for (i = 0; i < pcp->pc_hashsize; i++) {
polldat_t *pdp;
for (pdp = hashtbl[i]; pdp; pdp = pdp->pd_hashnext) {
ASSERT(pdp->pd_ref != NULL);
if (pdp->pd_count > 0) {
xref_t *refp;
int j;
pollcacheset_t *pcsp;
pollfd_t *pollfd;
for (j = 0; j < ps->ps_nsets; j++) {
refp = &pdp->pd_ref[j];
if (refp->xf_refcnt > 0) {
pcsp = &ps->ps_pcacheset[j];
ASSERT(refp->xf_position <
pcsp->pcs_nfds);
pollfd = pcsp->pcs_pollfd;
ASSERT(pdp->pd_fd ==
pollfd[refp->xf_position].
fd);
}
}
}
}
}
}
void
checkwfdlist(vnode_t *vp, fpollinfo_t *fpip)
{
stdata_t *stp;
polldat_t *pdp;
fpollinfo_t *fpip2;
if ((stp = vp->v_stream) == NULL) {
return;
}
PH_ENTER(&stp->sd_pollist);
for (pdp = stp->sd_pollist.ph_list; pdp; pdp = pdp->pd_next) {
if (pdp->pd_thread != NULL &&
pdp->pd_thread->t_procp == curthread->t_procp) {
for (fpip2 = fpip; fpip2; fpip2 = fpip2->fp_next) {
if (pdp->pd_thread == fpip2->fp_thread) {
break;
}
}
ASSERT(fpip2 != NULL);
}
}
PH_EXIT(&stp->sd_pollist);
}
static int
pollcheckrevents(pollstate_t *ps, int begin, int end, int cacheindex)
{
pollcache_t *pcp = ps->ps_pcache;
pollfd_t *pollfdp = ps->ps_pollfd;
int i;
for (i = begin; i < end; i++) {
polldat_t *pdp;
ASSERT(!BT_TEST(pcp->pc_bitmap, i));
pdp = pcache_lookup_fd(pcp, i);
if (pdp && pdp->pd_fp != NULL) {
xref_t *refp;
int entry;
ASSERT(pdp->pd_ref != NULL);
refp = &pdp->pd_ref[cacheindex];
if (refp->xf_refcnt == 0) {
continue;
}
entry = refp->xf_position;
ASSERT(entry >= 0);
ASSERT(pollfdp[entry].revents == 0);
if (refp->xf_refcnt > 1) {
int j;
for (j = entry + 1; j < ps->ps_nfds; j++) {
if (pollfdp[j].fd == i) {
ASSERT(pollfdp[j].revents == 0);
}
}
}
}
}
return (1);
}
#endif
pollcache_t *
pcache_alloc()
{
return (kmem_zalloc(sizeof (pollcache_t), KM_SLEEP));
}
void
pcache_create(pollcache_t *pcp, nfds_t nfds)
{
size_t mapsize;
if ((mapsize = POLLMAPCHUNK) <= nfds) {
mapsize = (nfds + POLLMAPCHUNK - 1) & ~(POLLMAPCHUNK - 1);
}
pcp->pc_bitmap = kmem_zalloc((mapsize / BT_NBIPUL) * sizeof (ulong_t),
KM_SLEEP);
pcp->pc_mapsize = mapsize;
if (nfds < POLLHASHCHUNKSZ) {
pcp->pc_hashsize = POLLHASHCHUNKSZ;
} else {
pcp->pc_hashsize = (nfds + POLLHASHCHUNKSZ - 1) &
~(POLLHASHCHUNKSZ - 1);
}
pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (polldat_t *),
KM_SLEEP);
}
void
pcache_destroy(pollcache_t *pcp)
{
polldat_t **hashtbl;
int i;
hashtbl = pcp->pc_hash;
for (i = 0; i < pcp->pc_hashsize; i++) {
if (hashtbl[i] != NULL) {
polldat_t *pdp, *pdp2;
pdp = hashtbl[i];
while (pdp != NULL) {
pdp2 = pdp->pd_hashnext;
if (pdp->pd_ref != NULL) {
kmem_free(pdp->pd_ref, sizeof (xref_t) *
pdp->pd_nsets);
}
kmem_free(pdp, sizeof (polldat_t));
pdp = pdp2;
pcp->pc_fdcount--;
}
}
}
ASSERT(pcp->pc_fdcount == 0);
kmem_free(pcp->pc_hash, sizeof (polldat_t *) * pcp->pc_hashsize);
kmem_free(pcp->pc_bitmap,
sizeof (ulong_t) * (pcp->pc_mapsize/BT_NBIPUL));
mutex_destroy(&pcp->pc_no_exit);
mutex_destroy(&pcp->pc_lock);
cv_destroy(&pcp->pc_cv);
cv_destroy(&pcp->pc_busy_cv);
kmem_free(pcp, sizeof (pollcache_t));
}
pollcacheset_t *
pcacheset_create(int nsets)
{
return (kmem_zalloc(sizeof (pollcacheset_t) * nsets, KM_SLEEP));
}
void
pcacheset_destroy(pollcacheset_t *pcsp, int nsets)
{
int i;
for (i = 0; i < nsets; i++) {
if (pcsp[i].pcs_pollfd != NULL) {
kmem_free(pcsp[i].pcs_pollfd, pcsp[i].pcs_nfds *
sizeof (pollfd_t));
}
}
kmem_free(pcsp, sizeof (pollcacheset_t) * nsets);
}
static int
plist_chkdupfd(file_t *fp, polldat_t *pdp, pollstate_t *psp, pollfd_t *pollfdp,
int entry, int *fdcntp)
{
int i;
int fd;
nfds_t nfds = psp->ps_nfds;
fd = pollfdp[entry].fd;
for (i = entry + 1; i < nfds; i++) {
if (pollfdp[i].fd == fd) {
if (pollfdp[i].events == pollfdp[entry].events) {
if ((pollfdp[i].revents =
pollfdp[entry].revents) != 0) {
(*fdcntp)++;
}
} else {
int error;
pollhead_t *php;
pollcache_t *pcp = psp->ps_pcache;
php = NULL;
ASSERT(curthread->t_pollcache == NULL);
error = VOP_POLL(fp->f_vnode,
pollfdp[i].events, 0,
&pollfdp[i].revents, &php, NULL);
if (error) {
return (error);
}
if (php != NULL && pdp->pd_php != NULL &&
php != pdp->pd_php) {
polldat_disassociate(pdp);
polldat_associate(pdp, php);
BT_SET(pcp->pc_bitmap, fd);
return (-1);
}
if (pollfdp[i].revents) {
(*fdcntp)++;
}
}
}
}
return (0);
}