#include <sys/types.h>
#include <sys/param.h>
#include <sys/signal.h>
#include <sys/cmn_err.h>
#include <sys/stropts.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/strsubr.h>
#include <sys/strsun.h>
#include <sys/atomic.h>
#include <sys/tihdr.h>
#include <fs/sockfs/sockcommon.h>
#include <fs/sockfs/sockfilter_impl.h>
#include <fs/sockfs/socktpi.h>
#include <fs/sockfs/sodirect.h>
#include <sys/ddi.h>
#include <inet/ip.h>
#include <sys/time.h>
#include <sys/cmn_err.h>
#ifdef SOCK_TEST
extern int do_useracc;
extern clock_t sock_test_timelimit;
#endif
#define MBLK_PULL_LEN 64
uint32_t so_mblk_pull_len = MBLK_PULL_LEN;
#ifdef DEBUG
boolean_t so_debug_length = B_FALSE;
static boolean_t so_check_length(sonode_t *so);
#endif
static int
so_acceptq_dequeue_locked(struct sonode *so, boolean_t dontblock,
struct sonode **nsop)
{
struct sonode *nso = NULL;
*nsop = NULL;
ASSERT(MUTEX_HELD(&so->so_acceptq_lock));
while ((nso = list_remove_head(&so->so_acceptq_list)) == NULL) {
if (dontblock)
return (EWOULDBLOCK);
if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING))
return (EINTR);
if (cv_wait_sig_swap(&so->so_acceptq_cv,
&so->so_acceptq_lock) == 0)
return (EINTR);
}
ASSERT(nso != NULL);
ASSERT(so->so_acceptq_len > 0);
so->so_acceptq_len--;
nso->so_listener = NULL;
*nsop = nso;
return (0);
}
int
so_acceptq_dequeue(struct sonode *so, boolean_t dontblock,
struct sonode **nsop)
{
int error;
mutex_enter(&so->so_acceptq_lock);
error = so_acceptq_dequeue_locked(so, dontblock, nsop);
mutex_exit(&so->so_acceptq_lock);
return (error);
}
static void
so_acceptq_flush_impl(struct sonode *so, list_t *list, boolean_t doclose)
{
struct sonode *nso;
while ((nso = list_remove_head(list)) != NULL) {
nso->so_listener = NULL;
if (doclose) {
(void) socket_close(nso, 0, CRED());
} else {
ASSERT(so->so_filter_active == 0);
ASSERT(nso->so_count == 1);
nso->so_count--;
VN_RELE(SOTOV(nso));
}
socket_destroy(nso);
}
}
void
so_acceptq_flush(struct sonode *so, boolean_t doclose)
{
so_acceptq_flush_impl(so, &so->so_acceptq_list, doclose);
so_acceptq_flush_impl(so, &so->so_acceptq_defer, doclose);
so->so_acceptq_len = 0;
}
int
so_wait_connected_locked(struct sonode *so, boolean_t nonblock,
sock_connid_t id)
{
ASSERT(MUTEX_HELD(&so->so_lock));
if (so->so_error != 0 && SOCK_CONNID_LT(so->so_proto_connid, id))
so->so_error = 0;
while (SOCK_CONNID_LT(so->so_proto_connid, id)) {
if (nonblock)
return (EINPROGRESS);
if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING))
return (EINTR);
if (cv_wait_sig_swap(&so->so_state_cv, &so->so_lock) == 0)
return (EINTR);
}
if (so->so_error != 0)
return (sogeterr(so, B_TRUE));
if ((so->so_state & SS_ISCONNECTED) == 0)
return (ECONNREFUSED);
return (0);
}
int
so_wait_connected(struct sonode *so, boolean_t nonblock, sock_connid_t id)
{
int error;
mutex_enter(&so->so_lock);
error = so_wait_connected_locked(so, nonblock, id);
mutex_exit(&so->so_lock);
return (error);
}
int
so_snd_wait_qnotfull_locked(struct sonode *so, boolean_t dontblock)
{
int error;
ASSERT(MUTEX_HELD(&so->so_lock));
while (SO_SND_FLOWCTRLD(so)) {
if (so->so_state & SS_CANTSENDMORE)
return (EPIPE);
if (dontblock)
return (EWOULDBLOCK);
if (so->so_state & (SS_CLOSING | SS_FALLBACK_PENDING))
return (EINTR);
if (so->so_sndtimeo == 0) {
error = cv_wait_sig(&so->so_snd_cv, &so->so_lock);
} else {
error = cv_reltimedwait_sig(&so->so_snd_cv,
&so->so_lock, so->so_sndtimeo, TR_CLOCK_TICK);
}
if (error == 0)
return (EINTR);
else if (error == -1)
return (EAGAIN);
}
return (0);
}
int
so_snd_wait_qnotfull(struct sonode *so, boolean_t dontblock)
{
int error = 0;
mutex_enter(&so->so_lock);
so->so_snd_wakeup = B_TRUE;
error = so_snd_wait_qnotfull_locked(so, dontblock);
so->so_snd_wakeup = B_FALSE;
mutex_exit(&so->so_lock);
return (error);
}
void
so_snd_qfull(struct sonode *so)
{
mutex_enter(&so->so_lock);
so->so_snd_qfull = B_TRUE;
mutex_exit(&so->so_lock);
}
void
so_snd_qnotfull(struct sonode *so)
{
mutex_enter(&so->so_lock);
so->so_snd_qfull = B_FALSE;
cv_broadcast(&so->so_snd_cv);
mutex_exit(&so->so_lock);
}
int
socket_chgpgrp(struct sonode *so, pid_t pid)
{
int error;
ASSERT(MUTEX_HELD(&so->so_lock));
if (pid != 0) {
error = kill(pid, 0);
if (error != 0) {
return (error);
}
}
so->so_pgrp = pid;
return (0);
}
static void
socket_sigproc(proc_t *proc, int event)
{
k_siginfo_t info;
ASSERT(event & (SOCKETSIG_WRITE | SOCKETSIG_READ | SOCKETSIG_URG));
if (event & SOCKETSIG_WRITE) {
info.si_signo = SIGPOLL;
info.si_code = POLL_OUT;
info.si_errno = 0;
info.si_fd = 0;
info.si_band = 0;
sigaddq(proc, NULL, &info, KM_NOSLEEP);
}
if (event & SOCKETSIG_READ) {
sigtoproc(proc, NULL, SIGPOLL);
}
if (event & SOCKETSIG_URG) {
sigtoproc(proc, NULL, SIGURG);
}
}
void
socket_sendsig(struct sonode *so, int event)
{
proc_t *proc;
ASSERT(MUTEX_HELD(&so->so_lock));
if (so->so_pgrp == 0 || (!(so->so_state & SS_ASYNC) &&
event != SOCKETSIG_URG)) {
return;
}
dprint(3, ("sending sig %d to %d\n", event, so->so_pgrp));
if (so->so_pgrp > 0) {
mutex_enter(&pidlock);
proc = prfind_zone(so->so_pgrp, so->so_zoneid);
if (proc == NULL) {
mutex_exit(&pidlock);
return;
}
mutex_enter(&proc->p_lock);
mutex_exit(&pidlock);
socket_sigproc(proc, event);
mutex_exit(&proc->p_lock);
} else {
pid_t pgrp = -so->so_pgrp;
mutex_enter(&pidlock);
proc = pgfind_zone(pgrp, so->so_zoneid);
while (proc != NULL) {
mutex_enter(&proc->p_lock);
socket_sigproc(proc, event);
mutex_exit(&proc->p_lock);
proc = proc->p_pglink;
}
mutex_exit(&pidlock);
}
}
#define MIN(a, b) ((a) < (b) ? (a) : (b))
mblk_t *
socopyinuio(uio_t *uiop, ssize_t iosize, size_t wroff, ssize_t maxblk,
size_t tail_len, int *errorp)
{
mblk_t *head = NULL, **tail = &head;
ASSERT(iosize == INFPSZ || iosize > 0);
if (iosize == INFPSZ || iosize > uiop->uio_resid)
iosize = uiop->uio_resid;
if (maxblk == INFPSZ)
maxblk = iosize;
if (iosize < 0 || maxblk < 0 || (maxblk == 0 && iosize > 0))
goto done;
do {
ssize_t blocksize;
mblk_t *mp;
blocksize = MIN(iosize, maxblk);
ASSERT(blocksize >= 0);
mp = allocb(wroff + blocksize + tail_len, BPRI_MED);
if (mp == NULL) {
*errorp = ENOMEM;
return (head);
}
mp->b_rptr += wroff;
mp->b_wptr = mp->b_rptr + blocksize;
*tail = mp;
tail = &mp->b_cont;
if ((*errorp = uiomove(mp->b_rptr, (size_t)blocksize,
UIO_WRITE, uiop)) != 0) {
ASSERT(*errorp != ENOMEM);
freemsg(head);
return (NULL);
}
iosize -= blocksize;
} while (iosize > 0);
done:
*errorp = 0;
return (head);
}
mblk_t *
socopyoutuio(mblk_t *mp, struct uio *uiop, ssize_t max_read, int *errorp)
{
int error;
ptrdiff_t n;
mblk_t *nmp;
ASSERT(mp->b_wptr >= mp->b_rptr);
if (max_read == INFPSZ || max_read > uiop->uio_resid)
max_read = uiop->uio_resid;
do {
if ((n = MIN(max_read, MBLKL(mp))) != 0) {
ASSERT(n > 0);
error = uiomove(mp->b_rptr, n, UIO_READ, uiop);
if (error != 0) {
freemsg(mp);
*errorp = error;
return (NULL);
}
}
mp->b_rptr += n;
max_read -= n;
while (mp != NULL && (mp->b_rptr >= mp->b_wptr)) {
nmp = mp;
mp = mp->b_cont;
freeb(nmp);
}
} while (mp != NULL && max_read > 0);
*errorp = 0;
return (mp);
}
static void
so_prepend_msg(struct sonode *so, mblk_t *mp, mblk_t *last_tail)
{
ASSERT(last_tail != NULL);
mp->b_next = so->so_rcv_q_head;
mp->b_prev = last_tail;
ASSERT(!(DB_FLAGS(mp) & DBLK_UIOA));
if (so->so_rcv_q_head == NULL) {
ASSERT(so->so_rcv_q_last_head == NULL);
so->so_rcv_q_last_head = mp;
#ifdef DEBUG
} else {
ASSERT(!(DB_FLAGS(so->so_rcv_q_head) & DBLK_UIOA));
#endif
}
so->so_rcv_q_head = mp;
#ifdef DEBUG
if (so_debug_length) {
mutex_enter(&so->so_lock);
ASSERT(so_check_length(so));
mutex_exit(&so->so_lock);
}
#endif
}
void
so_process_new_message(struct sonode *so, mblk_t *mp_head, mblk_t *mp_last_head)
{
if (so->so_filter_active > 0 &&
(mp_head = sof_filter_data_in_proc(so, mp_head,
&mp_last_head)) == NULL)
return;
ASSERT(mp_head->b_prev != NULL);
if (so->so_rcv_q_head == NULL) {
so->so_rcv_q_head = mp_head;
so->so_rcv_q_last_head = mp_last_head;
ASSERT(so->so_rcv_q_last_head->b_prev != NULL);
} else {
boolean_t flag_equal = ((DB_FLAGS(mp_head) & DBLK_UIOA) ==
(DB_FLAGS(so->so_rcv_q_last_head) & DBLK_UIOA));
if (mp_head->b_next == NULL &&
DB_TYPE(mp_head) == M_DATA &&
DB_TYPE(so->so_rcv_q_last_head) == M_DATA && flag_equal) {
so->so_rcv_q_last_head->b_prev->b_cont = mp_head;
so->so_rcv_q_last_head->b_prev = mp_head->b_prev;
mp_head->b_prev = NULL;
} else if (flag_equal && (DB_FLAGS(mp_head) & DBLK_UIOA)) {
ASSERT(mp_head->b_next != NULL);
so->so_rcv_q_last_head->b_prev->b_cont = mp_head;
so->so_rcv_q_last_head->b_prev = mp_head->b_prev;
mp_head->b_prev = NULL;
so->so_rcv_q_last_head->b_next = mp_head->b_next;
mp_head->b_next = NULL;
so->so_rcv_q_last_head = mp_last_head;
} else {
#ifdef DEBUG
{
mblk_t *tmp_mblk;
tmp_mblk = mp_head;
while (tmp_mblk != NULL) {
ASSERT(tmp_mblk->b_prev != NULL);
tmp_mblk = tmp_mblk->b_next;
}
}
#endif
so->so_rcv_q_last_head->b_next = mp_head;
so->so_rcv_q_last_head = mp_last_head;
}
}
}
boolean_t
so_check_flow_control(struct sonode *so)
{
ASSERT(MUTEX_HELD(&so->so_lock));
if (so->so_flowctrld && (so->so_rcv_queued < so->so_rcvlowat &&
!(so->so_state & SS_FIL_RCV_FLOWCTRL))) {
so->so_flowctrld = B_FALSE;
mutex_exit(&so->so_lock);
if (so->so_downcalls != NULL &&
so->so_downcalls->sd_clr_flowctrl != NULL) {
(*so->so_downcalls->sd_clr_flowctrl)
(so->so_proto_handle);
}
sof_sonode_notify_filters(so, SOF_EV_INJECT_DATA_IN_OK, 0);
return (B_TRUE);
} else {
mutex_exit(&so->so_lock);
return (B_FALSE);
}
}
int
so_dequeue_msg(struct sonode *so, mblk_t **mctlp, struct uio *uiop,
rval_t *rvalp, int flags)
{
mblk_t *mp, *nmp;
mblk_t *savemp, *savemptail;
mblk_t *new_msg_head;
mblk_t *new_msg_last_head;
mblk_t *last_tail = NULL;
boolean_t partial_read;
boolean_t reset_atmark = B_FALSE;
int more = 0;
int error;
ssize_t oobmark;
sodirect_t *sodp = so->so_direct;
partial_read = B_FALSE;
*mctlp = NULL;
again:
mutex_enter(&so->so_lock);
again1:
#ifdef DEBUG
if (so_debug_length) {
ASSERT(so_check_length(so));
}
#endif
if (so->so_state & SS_RCVATMARK) {
if (flags & MSG_NOMARK) {
mutex_exit(&so->so_lock);
return (EWOULDBLOCK);
}
reset_atmark = B_TRUE;
}
if (sodp != NULL) {
if (sodp->sod_enabled) {
if (sodp->sod_uioa.uioa_state & UIOA_ALLOC) {
sodp = NULL;
} else if (sodp->sod_uioa.uioa_state & UIOA_INIT) {
sodp->sod_uioa.uioa_state &= UIOA_CLR;
sodp->sod_uioa.uioa_state |= UIOA_ENABLED;
sod_uioa_so_init(so, sodp, uiop);
}
} else {
sodp = NULL;
}
}
new_msg_head = so->so_rcv_head;
new_msg_last_head = so->so_rcv_last_head;
so->so_rcv_head = NULL;
so->so_rcv_last_head = NULL;
oobmark = so->so_oobmark;
mutex_exit(&so->so_lock);
if (new_msg_head != NULL) {
so_process_new_message(so, new_msg_head, new_msg_last_head);
}
savemp = savemptail = NULL;
rvalp->r_vals = 0;
error = 0;
mp = so->so_rcv_q_head;
if (mp != NULL &&
(so->so_rcv_timer_tid == 0 ||
so->so_rcv_queued >= so->so_rcv_thresh)) {
partial_read = B_FALSE;
if (flags & MSG_PEEK) {
if ((nmp = dupmsg(mp)) == NULL &&
(nmp = copymsg(mp)) == NULL) {
size_t size = msgsize(mp);
error = strwaitbuf(size, BPRI_HI);
if (error) {
return (error);
}
goto again;
}
mp = nmp;
} else {
ASSERT(mp->b_prev != NULL);
last_tail = mp->b_prev;
mp->b_prev = NULL;
so->so_rcv_q_head = mp->b_next;
if (so->so_rcv_q_head == NULL) {
so->so_rcv_q_last_head = NULL;
}
mp->b_next = NULL;
}
ASSERT(mctlp != NULL);
if (DB_TYPE(mp) != M_DATA) {
*mctlp = mp;
savemp = mp;
savemptail = mp;
ASSERT(DB_TYPE(mp) == M_PROTO ||
DB_TYPE(mp) == M_PCPROTO);
while (mp->b_cont != NULL &&
DB_TYPE(mp->b_cont) != M_DATA) {
ASSERT(DB_TYPE(mp->b_cont) == M_PROTO ||
DB_TYPE(mp->b_cont) == M_PCPROTO);
mp = mp->b_cont;
savemptail = mp;
}
mp = savemptail->b_cont;
savemptail->b_cont = NULL;
}
ASSERT(DB_TYPE(mp) == M_DATA);
if (uiop->uio_resid >= 0) {
ssize_t copied = 0;
if (sodp != NULL && (DB_FLAGS(mp) & DBLK_UIOA)) {
mutex_enter(&so->so_lock);
ASSERT(uiop == (uio_t *)&sodp->sod_uioa);
copied = sod_uioa_mblk(so, mp);
if (copied > 0)
partial_read = B_TRUE;
mutex_exit(&so->so_lock);
mp = NULL;
} else {
ssize_t oldresid = uiop->uio_resid;
if (MBLKL(mp) < so_mblk_pull_len) {
if (pullupmsg(mp, -1) == 1) {
last_tail = mp;
}
}
mp = socopyoutuio(mp, uiop,
oobmark == 0 ? INFPSZ : oobmark, &error);
if (error != 0) {
freemsg(*mctlp);
*mctlp = NULL;
more = 0;
goto done;
}
ASSERT(oldresid >= uiop->uio_resid);
copied = oldresid - uiop->uio_resid;
if (oldresid > uiop->uio_resid)
partial_read = B_TRUE;
}
ASSERT(copied >= 0);
if (copied > 0 && !(flags & MSG_PEEK)) {
mutex_enter(&so->so_lock);
so->so_rcv_queued -= copied;
ASSERT(so->so_oobmark >= 0);
if (so->so_oobmark > 0) {
so->so_oobmark -= copied;
ASSERT(so->so_oobmark >= 0);
if (so->so_oobmark == 0) {
ASSERT(so->so_state &
SS_OOBPEND);
so->so_oobmark = 0;
so->so_state |= SS_RCVATMARK;
}
}
rvalp->r_val2 = so_check_flow_control(so);
}
}
if (mp != NULL) {
more |= MOREDATA;
if ((flags & (MSG_PEEK|MSG_TRUNC))) {
if (flags & MSG_PEEK) {
freemsg(mp);
} else {
unsigned int msize = msgdsize(mp);
freemsg(mp);
mutex_enter(&so->so_lock);
so->so_rcv_queued -= msize;
rvalp->r_val2 =
so_check_flow_control(so);
}
} else if (partial_read && !somsghasdata(mp)) {
freemsg(mp);
more &= ~MOREDATA;
} else {
if (savemp != NULL &&
(flags & MSG_DUPCTRL)) {
mblk_t *nmp;
ASSERT(DB_TYPE(savemp) != M_DATA &&
DB_TYPE(savemptail) != M_DATA);
try_again:
if ((nmp = dupmsg(savemp)) == NULL &&
(nmp = copymsg(savemp)) == NULL) {
size_t size = msgsize(savemp);
error = strwaitbuf(size,
BPRI_HI);
if (error != 0) {
freemsg(mp);
goto done;
}
goto try_again;
}
ASSERT(nmp != NULL);
ASSERT(DB_TYPE(nmp) != M_DATA);
savemptail->b_cont = mp;
*mctlp = nmp;
mp = savemp;
}
so_prepend_msg(so, mp, last_tail);
}
}
if (partial_read && !(so->so_state & SS_RCVATMARK) &&
*mctlp == NULL && uiop->uio_resid > 0 &&
!(flags & MSG_PEEK) && so->so_rcv_head != NULL) {
goto again;
}
} else if (!partial_read) {
mutex_enter(&so->so_lock);
if (so->so_error != 0) {
error = sogeterr(so, !(flags & MSG_PEEK));
mutex_exit(&so->so_lock);
return (error);
}
if (so->so_rcv_head != NULL)
goto again1;
if (!(so->so_state & SS_CANTRCVMORE) && uiop->uio_resid > 0) {
if ((uiop->uio_fmode & (FNDELAY|FNONBLOCK)) ||
(flags & MSG_DONTWAIT)) {
error = EWOULDBLOCK;
} else {
if (so->so_state & (SS_CLOSING |
SS_FALLBACK_PENDING)) {
mutex_exit(&so->so_lock);
error = EINTR;
goto done;
}
so->so_rcv_wakeup = B_TRUE;
so->so_rcv_wanted = uiop->uio_resid;
if (so->so_rcvtimeo == 0) {
error = cv_wait_sig(&so->so_rcv_cv,
&so->so_lock);
} else {
error = cv_reltimedwait_sig(
&so->so_rcv_cv, &so->so_lock,
so->so_rcvtimeo, TR_CLOCK_TICK);
}
so->so_rcv_wakeup = B_FALSE;
so->so_rcv_wanted = 0;
if (error == 0) {
error = EINTR;
} else if (error == -1) {
error = EAGAIN;
} else {
goto again1;
}
}
}
mutex_exit(&so->so_lock);
}
if (reset_atmark && partial_read && !(flags & MSG_PEEK)) {
mutex_enter(&so->so_lock);
ASSERT(so_verify_oobstate(so));
so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK);
freemsg(so->so_oobmsg);
so->so_oobmsg = NULL;
ASSERT(so_verify_oobstate(so));
mutex_exit(&so->so_lock);
}
ASSERT(so->so_rcv_wakeup == B_FALSE);
done:
if (sodp != NULL) {
mutex_enter(&so->so_lock);
if (sodp->sod_enabled &&
(sodp->sod_uioa.uioa_state & UIOA_ENABLED)) {
SOD_UIOAFINI(sodp);
if (sodp->sod_uioa.uioa_mbytes > 0) {
ASSERT(so->so_rcv_q_head != NULL ||
so->so_rcv_head != NULL);
so->so_rcv_queued -= sod_uioa_mblk(so, NULL);
if (error == EWOULDBLOCK)
error = 0;
}
}
mutex_exit(&so->so_lock);
}
#ifdef DEBUG
if (so_debug_length) {
mutex_enter(&so->so_lock);
ASSERT(so_check_length(so));
mutex_exit(&so->so_lock);
}
#endif
rvalp->r_val1 = more;
ASSERT(MUTEX_NOT_HELD(&so->so_lock));
return (error);
}
void
so_enqueue_msg(struct sonode *so, mblk_t *mp, size_t msg_size)
{
ASSERT(MUTEX_HELD(&so->so_lock));
#ifdef DEBUG
if (so_debug_length) {
ASSERT(so_check_length(so));
}
#endif
so->so_rcv_queued += msg_size;
if (so->so_rcv_head == NULL) {
ASSERT(so->so_rcv_last_head == NULL);
so->so_rcv_head = mp;
so->so_rcv_last_head = mp;
} else if ((DB_TYPE(mp) == M_DATA &&
DB_TYPE(so->so_rcv_last_head) == M_DATA) &&
((DB_FLAGS(mp) & DBLK_UIOA) ==
(DB_FLAGS(so->so_rcv_last_head) & DBLK_UIOA))) {
ASSERT(so->so_rcv_last_head != NULL);
ASSERT(so->so_rcv_last_head->b_prev != NULL);
so->so_rcv_last_head->b_prev->b_cont = mp;
} else {
so->so_rcv_last_head->b_next = mp;
so->so_rcv_last_head = mp;
}
while (mp->b_cont != NULL)
mp = mp->b_cont;
so->so_rcv_last_head->b_prev = mp;
#ifdef DEBUG
if (so_debug_length) {
ASSERT(so_check_length(so));
}
#endif
}
boolean_t
somsghasdata(mblk_t *mp)
{
for (; mp; mp = mp->b_cont)
if (mp->b_datap->db_type == M_DATA) {
ASSERT(mp->b_wptr >= mp->b_rptr);
if (mp->b_wptr > mp->b_rptr)
return (B_TRUE);
}
return (B_FALSE);
}
void
so_rcv_flush(struct sonode *so)
{
mblk_t *mp;
ASSERT(MUTEX_HELD(&so->so_lock));
if (so->so_oobmsg != NULL) {
freemsg(so->so_oobmsg);
so->so_oobmsg = NULL;
so->so_oobmark = 0;
so->so_state &=
~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|SS_RCVATMARK);
}
while (so->so_rcv_q_head != NULL) {
mp = so->so_rcv_q_head;
so->so_rcv_q_head = mp->b_next;
mp->b_next = mp->b_prev = NULL;
freemsg(mp);
}
while (so->so_rcv_head != NULL) {
mp = so->so_rcv_head;
so->so_rcv_head = mp->b_next;
mp->b_next = mp->b_prev = NULL;
freemsg(mp);
}
so->so_rcv_queued = 0;
so->so_rcv_q_head = NULL;
so->so_rcv_q_last_head = NULL;
so->so_rcv_head = NULL;
so->so_rcv_last_head = NULL;
}
int
sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags,
boolean_t oob_inline)
{
mblk_t *mp, *nmp;
int error;
dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", (void *)so, (void *)msg,
flags));
if (msg != NULL) {
msg->msg_controllen = 0;
msg->msg_namelen = 0;
msg->msg_flags = 0;
}
mutex_enter(&so->so_lock);
ASSERT(so_verify_oobstate(so));
if (oob_inline ||
(so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) {
dprintso(so, 1, ("sorecvoob: inline or data consumed\n"));
mutex_exit(&so->so_lock);
return (EINVAL);
}
if (!(so->so_state & SS_HAVEOOBDATA)) {
dprintso(so, 1, ("sorecvoob: no data yet\n"));
mutex_exit(&so->so_lock);
return (EWOULDBLOCK);
}
ASSERT(so->so_oobmsg != NULL);
mp = so->so_oobmsg;
if (flags & MSG_PEEK) {
mblk_t *mp1;
mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL);
ASSERT(mp1);
while (mp != NULL) {
ssize_t size;
size = MBLKL(mp);
bcopy(mp->b_rptr, mp1->b_wptr, size);
mp1->b_wptr += size;
ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim);
mp = mp->b_cont;
}
mp = mp1;
} else {
so->so_oobmsg = NULL;
so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA;
}
ASSERT(so_verify_oobstate(so));
mutex_exit(&so->so_lock);
error = 0;
nmp = mp;
while (nmp != NULL && uiop->uio_resid > 0) {
ssize_t n = MBLKL(nmp);
n = MIN(n, uiop->uio_resid);
if (n > 0)
error = uiomove(nmp->b_rptr, n,
UIO_READ, uiop);
if (error)
break;
nmp = nmp->b_cont;
}
ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
freemsg(mp);
return (error);
}
struct sonode *
socket_sonode_create(struct sockparams *sp, int family, int type,
int protocol, int version, int sflags, int *errorp, struct cred *cr)
{
sonode_t *so;
int kmflags;
if (SOCK_UC_VERSION != sp->sp_smod_info->smod_uc_version ||
SOCK_DC_VERSION != sp->sp_smod_info->smod_dc_version) {
#ifdef DEBUG
cmn_err(CE_CONT, "protocol and socket module version mismatch");
#endif
*errorp = EINVAL;
return (NULL);
}
kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
so = kmem_cache_alloc(socket_cache, kmflags);
if (so == NULL) {
*errorp = ENOMEM;
return (NULL);
}
sonode_init(so, sp, family, type, protocol, &so_sonodeops);
if (version == SOV_DEFAULT)
version = so_default_version;
so->so_version = (short)version;
so->so_proto_props.sopp_rxhiwat = SOCKET_RECVHIWATER;
so->so_proto_props.sopp_rxlowat = SOCKET_RECVLOWATER;
so->so_proto_props.sopp_maxpsz = INFPSZ;
so->so_proto_props.sopp_maxblk = INFPSZ;
return (so);
}
int
socket_init_common(struct sonode *so, struct sonode *pso, int flags, cred_t *cr)
{
int error = 0;
if (pso != NULL) {
mutex_enter(&pso->so_lock);
so->so_state |= SS_ISCONNECTED | (pso->so_state & SS_ASYNC);
so->so_pgrp = pso->so_pgrp;
so->so_rcvtimeo = pso->so_rcvtimeo;
so->so_sndtimeo = pso->so_sndtimeo;
so->so_xpg_rcvbuf = pso->so_xpg_rcvbuf;
so->so_options = pso->so_options & (SO_DEBUG|SO_REUSEADDR|
SO_KEEPALIVE|SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK|
SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER);
so->so_proto_props = pso->so_proto_props;
so->so_mode = pso->so_mode;
so->so_pollev = pso->so_pollev & SO_POLLEV_ALWAYS;
mutex_exit(&pso->so_lock);
if (pso->so_filter_active > 0 &&
(error = sof_sonode_inherit_filters(so, pso)) != 0)
return (error);
} else {
struct sockparams *sp = so->so_sockparams;
sock_upcalls_t *upcalls_to_use;
if (!list_is_empty(&sp->sp_auto_filters) &&
(error = sof_sonode_autoattach_filters(so, cr)) != 0)
return (error);
so->so_state |= SS_FILOP_OK;
upcalls_to_use = &so_upcalls;
so->so_proto_handle =
sp->sp_smod_info->smod_proto_create_func(so->so_family,
so->so_type, so->so_protocol, &so->so_downcalls,
&so->so_mode, &error, flags, cr);
if (so->so_proto_handle == NULL) {
ASSERT(error != 0);
return ((error == 0) ? ENOMEM : error);
}
ASSERT(so->so_downcalls != NULL);
ASSERT(so->so_downcalls->sd_send != NULL ||
so->so_downcalls->sd_send_uio != NULL);
if (so->so_downcalls->sd_recv_uio != NULL) {
ASSERT(so->so_downcalls->sd_poll != NULL);
so->so_pollev |= SO_POLLEV_ALWAYS;
}
(*so->so_downcalls->sd_activate)(so->so_proto_handle,
(sock_upper_handle_t)so, upcalls_to_use, 0, cr);
if (so->so_protocol != so->so_sockparams->sp_protocol) {
int protocol = so->so_protocol;
int error;
error = socket_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE,
&protocol, (t_uscalar_t)sizeof (protocol), cr);
if (error) {
(void) (*so->so_downcalls->sd_close)
(so->so_proto_handle, 0, cr);
mutex_enter(&so->so_lock);
so_rcv_flush(so);
mutex_exit(&so->so_lock);
return (EPROTONOSUPPORT);
}
}
}
if (uioasync.enabled)
sod_sock_init(so);
VN_HOLD(SOTOV(so));
return (0);
}
int
socket_ioctl_common(struct sonode *so, int cmd, intptr_t arg, int mode,
struct cred *cr, int32_t *rvalp)
{
switch (cmd) {
case SIOCSQPTR:
return (EOPNOTSUPP);
case FIONBIO: {
int32_t value;
if (so_copyin((void *)arg, &value, sizeof (int32_t),
(mode & (int)FKIOCTL)))
return (EFAULT);
mutex_enter(&so->so_lock);
if (value) {
so->so_state |= SS_NDELAY;
} else {
so->so_state &= ~SS_NDELAY;
}
mutex_exit(&so->so_lock);
return (0);
}
case FIOASYNC: {
int32_t value;
if (so_copyin((void *)arg, &value, sizeof (int32_t),
(mode & (int)FKIOCTL)))
return (EFAULT);
mutex_enter(&so->so_lock);
if (value) {
so->so_state |= SS_ASYNC;
} else {
so->so_state &= ~SS_ASYNC;
}
mutex_exit(&so->so_lock);
return (0);
}
case SIOCSPGRP:
case FIOSETOWN: {
int error;
pid_t pid;
if (so_copyin((void *)arg, &pid, sizeof (pid_t),
(mode & (int)FKIOCTL)))
return (EFAULT);
mutex_enter(&so->so_lock);
error = (pid != so->so_pgrp) ? socket_chgpgrp(so, pid) : 0;
mutex_exit(&so->so_lock);
return (error);
}
case SIOCGPGRP:
case FIOGETOWN:
if (so_copyout(&so->so_pgrp, (void *)arg,
sizeof (pid_t), (mode & (int)FKIOCTL)))
return (EFAULT);
return (0);
case SIOCATMARK: {
int retval;
if ((so->so_mode & SM_EXDATA) == 0)
return (EINVAL);
if (so->so_downcalls->sd_recv_uio != NULL)
return (-1);
retval = (so->so_state & SS_RCVATMARK) != 0;
if (so_copyout(&retval, (void *)arg, sizeof (int),
(mode & (int)FKIOCTL))) {
return (EFAULT);
}
return (0);
}
case FIONREAD: {
int retval;
if (so->so_downcalls->sd_recv_uio != NULL)
return (-1);
retval = MIN(so->so_rcv_queued, INT_MAX);
if (so_copyout(&retval, (void *)arg,
sizeof (retval), (mode & (int)FKIOCTL))) {
return (EFAULT);
}
return (0);
}
case _I_GETPEERCRED: {
int error = 0;
if ((mode & FKIOCTL) == 0)
return (EINVAL);
mutex_enter(&so->so_lock);
if ((so->so_mode & SM_CONNREQUIRED) == 0) {
error = ENOTSUP;
} else if ((so->so_state & SS_ISCONNECTED) == 0) {
error = ENOTCONN;
} else if (so->so_peercred != NULL) {
k_peercred_t *kp = (k_peercred_t *)arg;
kp->pc_cr = so->so_peercred;
kp->pc_cpid = so->so_cpid;
crhold(so->so_peercred);
} else {
error = EINVAL;
}
mutex_exit(&so->so_lock);
return (error);
}
default:
return (-1);
}
}
static int
so_strioc_nread(struct sonode *so, intptr_t arg, int mode, int32_t *rvalp)
{
size_t size = 0;
int retval;
int count = 0;
mblk_t *mp;
clock_t wakeup = drv_usectohz(10);
if (so->so_downcalls == NULL ||
so->so_downcalls->sd_recv_uio != NULL)
return (EINVAL);
mutex_enter(&so->so_lock);
while (so->so_flag & SOREADLOCKED) {
if (so->so_rcv_wakeup)
goto out;
(void) cv_reltimedwait(&so->so_read_cv, &so->so_lock, wakeup,
TR_CLOCK_TICK);
}
mp = so->so_rcv_q_head;
if (mp != NULL) {
size = msgdsize(so->so_rcv_q_head);
for (; mp != NULL; mp = mp->b_next)
count++;
} else {
size = msgdsize(so->so_rcv_head);
}
for (mp = so->so_rcv_head; mp != NULL; mp = mp->b_next)
count++;
out:
mutex_exit(&so->so_lock);
retval = MIN(size, INT_MAX);
if (so_copyout(&retval, (void *)arg, sizeof (retval),
(mode & (int)FKIOCTL))) {
return (EFAULT);
} else {
*rvalp = count;
return (0);
}
}
int
socket_strioc_common(struct sonode *so, int cmd, intptr_t arg, int mode,
struct cred *cr, int32_t *rvalp)
{
int retval;
if ((cmd & 0xffffff00U) != STR)
return (-1);
switch (cmd) {
case I_CANPUT:
return (EOPNOTSUPP);
case I_NREAD:
return (so_strioc_nread(so, arg, mode, rvalp));
case I_LOOK:
if (so_copyout("sockmod", (void *)arg, strlen("sockmod") + 1,
(mode & (int)FKIOCTL))) {
return (EFAULT);
}
return (0);
default:
break;
}
if ((retval = so_tpi_fallback(so, cr)) == 0) {
ASSERT(so->so_rcv_q_head == NULL);
return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
} else {
return (retval);
}
}
int
socket_getopt_common(struct sonode *so, int level, int option_name,
void *optval, socklen_t *optlenp, int flags)
{
if (level != SOL_SOCKET)
return (-1);
switch (option_name) {
case SO_ERROR:
case SO_DOMAIN:
case SO_TYPE:
case SO_ACCEPTCONN: {
int32_t value;
socklen_t optlen = *optlenp;
if (optlen < (t_uscalar_t)sizeof (int32_t)) {
return (EINVAL);
}
switch (option_name) {
case SO_ERROR:
mutex_enter(&so->so_lock);
value = sogeterr(so, B_TRUE);
mutex_exit(&so->so_lock);
break;
case SO_DOMAIN:
value = so->so_family;
break;
case SO_TYPE:
value = so->so_type;
break;
case SO_ACCEPTCONN:
if (so->so_state & SS_ACCEPTCONN)
value = SO_ACCEPTCONN;
else
value = 0;
break;
}
bcopy(&value, optval, sizeof (value));
*optlenp = sizeof (value);
return (0);
}
case SO_SNDTIMEO:
case SO_RCVTIMEO: {
clock_t value;
socklen_t optlen = *optlenp;
if (get_udatamodel() == DATAMODEL_NONE ||
get_udatamodel() == DATAMODEL_NATIVE) {
if (optlen < sizeof (struct timeval))
return (EINVAL);
} else {
if (optlen < sizeof (struct timeval32))
return (EINVAL);
}
if (option_name == SO_RCVTIMEO)
value = drv_hztousec(so->so_rcvtimeo);
else
value = drv_hztousec(so->so_sndtimeo);
if (get_udatamodel() == DATAMODEL_NONE ||
get_udatamodel() == DATAMODEL_NATIVE) {
((struct timeval *)(optval))->tv_sec =
value / (1000 * 1000);
((struct timeval *)(optval))->tv_usec =
value % (1000 * 1000);
*optlenp = sizeof (struct timeval);
} else {
((struct timeval32 *)(optval))->tv_sec =
value / (1000 * 1000);
((struct timeval32 *)(optval))->tv_usec =
value % (1000 * 1000);
*optlenp = sizeof (struct timeval32);
}
return (0);
}
case SO_DEBUG:
case SO_REUSEADDR:
case SO_KEEPALIVE:
case SO_DONTROUTE:
case SO_BROADCAST:
case SO_USELOOPBACK:
case SO_OOBINLINE:
case SO_SNDBUF:
#ifdef notyet
case SO_SNDLOWAT:
case SO_RCVLOWAT:
#endif
case SO_DGRAM_ERRIND: {
socklen_t optlen = *optlenp;
if (optlen < (t_uscalar_t)sizeof (int32_t))
return (EINVAL);
break;
}
case SO_RCVBUF: {
socklen_t optlen = *optlenp;
if (optlen < (t_uscalar_t)sizeof (int32_t))
return (EINVAL);
if ((flags & _SOGETSOCKOPT_XPG4_2) && so->so_xpg_rcvbuf != 0) {
*(int32_t *)optval = so->so_xpg_rcvbuf;
*optlenp = sizeof (so->so_xpg_rcvbuf);
return (0);
}
break;
}
case SO_LINGER: {
socklen_t optlen = *optlenp;
if (optlen < (t_uscalar_t)sizeof (struct linger))
return (EINVAL);
break;
}
case SO_SND_BUFINFO: {
socklen_t optlen = *optlenp;
if (optlen < (t_uscalar_t)sizeof (struct so_snd_bufinfo))
return (EINVAL);
((struct so_snd_bufinfo *)(optval))->sbi_wroff =
(so->so_proto_props).sopp_wroff;
((struct so_snd_bufinfo *)(optval))->sbi_maxblk =
(so->so_proto_props).sopp_maxblk;
((struct so_snd_bufinfo *)(optval))->sbi_maxpsz =
(so->so_proto_props).sopp_maxpsz;
((struct so_snd_bufinfo *)(optval))->sbi_tail =
(so->so_proto_props).sopp_tail;
*optlenp = sizeof (struct so_snd_bufinfo);
return (0);
}
case SO_SND_COPYAVOID: {
sof_instance_t *inst;
for (inst = so->so_filter_top; inst != NULL;
inst = inst->sofi_next) {
if (SOF_INTERESTED(inst, data_out))
return (EOPNOTSUPP);
}
break;
}
default:
break;
}
return (-1);
}
void
socket_sonode_destroy(struct sonode *so)
{
sonode_fini(so);
kmem_cache_free(socket_cache, so);
}
int
so_zcopy_wait(struct sonode *so)
{
int error = 0;
mutex_enter(&so->so_lock);
while (!(so->so_copyflag & STZCNOTIFY)) {
if (so->so_state & SS_CLOSING) {
mutex_exit(&so->so_lock);
return (EINTR);
}
if (cv_wait_sig(&so->so_copy_cv, &so->so_lock) == 0) {
error = EINTR;
break;
}
}
so->so_copyflag &= ~STZCNOTIFY;
mutex_exit(&so->so_lock);
return (error);
}
void
so_timer_callback(void *arg)
{
struct sonode *so = (struct sonode *)arg;
mutex_enter(&so->so_lock);
so->so_rcv_timer_tid = 0;
if (so->so_rcv_queued > 0) {
so_notify_data(so, so->so_rcv_queued);
} else {
mutex_exit(&so->so_lock);
}
}
#ifdef DEBUG
static boolean_t
so_check_length(sonode_t *so)
{
mblk_t *mp = so->so_rcv_q_head;
int len = 0;
ASSERT(MUTEX_HELD(&so->so_lock));
if (mp != NULL) {
len = msgdsize(mp);
while ((mp = mp->b_next) != NULL)
len += msgdsize(mp);
}
mp = so->so_rcv_head;
if (mp != NULL) {
len += msgdsize(mp);
while ((mp = mp->b_next) != NULL)
len += msgdsize(mp);
}
return ((len == so->so_rcv_queued) ? B_TRUE : B_FALSE);
}
#endif
int
so_get_mod_version(struct sockparams *sp)
{
ASSERT(sp != NULL && sp->sp_smod_info != NULL);
return (sp->sp_smod_info->smod_version);
}
static boolean_t
so_start_fallback(struct sonode *so)
{
ASSERT(RW_READ_HELD(&so->so_fallback_rwlock));
mutex_enter(&so->so_lock);
if (so->so_state & SS_FALLBACK_PENDING) {
mutex_exit(&so->so_lock);
return (B_FALSE);
}
so->so_state |= SS_FALLBACK_PENDING;
cv_broadcast(&so->so_state_cv);
cv_broadcast(&so->so_rcv_cv);
cv_broadcast(&so->so_snd_cv);
mutex_enter(&so->so_acceptq_lock);
cv_broadcast(&so->so_acceptq_cv);
mutex_exit(&so->so_acceptq_lock);
mutex_exit(&so->so_lock);
if (rw_tryupgrade(&so->so_fallback_rwlock) == 0) {
rw_exit(&so->so_fallback_rwlock);
DTRACE_PROBE1(pending__ops__wait, (struct sonode *), so);
rw_enter(&so->so_fallback_rwlock, RW_WRITER);
DTRACE_PROBE1(pending__ops__complete, (struct sonode *), so);
}
return (B_TRUE);
}
static void
so_end_fallback(struct sonode *so)
{
ASSERT(RW_ISWRITER(&so->so_fallback_rwlock));
mutex_enter(&so->so_lock);
so->so_state &= ~(SS_FALLBACK_PENDING|SS_FALLBACK_DRAIN);
mutex_exit(&so->so_lock);
rw_downgrade(&so->so_fallback_rwlock);
}
static mblk_t *
so_quiesced_cb(sock_upper_handle_t sock_handle, sock_quiesce_arg_t *arg,
struct T_capability_ack *tcap,
struct sockaddr *laddr, socklen_t laddrlen,
struct sockaddr *faddr, socklen_t faddrlen, short opts)
{
struct sonode *so = (struct sonode *)sock_handle;
boolean_t atmark;
mblk_t *retmp = NULL, **tailmpp = &retmp;
if (tcap != NULL)
sotpi_update_state(so, tcap, laddr, laddrlen, faddr, faddrlen,
opts);
mutex_enter(&so->so_lock);
so->so_state |= SS_FALLBACK_DRAIN;
SOCKET_TIMER_CANCEL(so);
mutex_exit(&so->so_lock);
if (so->so_rcv_head != NULL) {
if (so->so_rcv_q_last_head == NULL)
so->so_rcv_q_head = so->so_rcv_head;
else
so->so_rcv_q_last_head->b_next = so->so_rcv_head;
so->so_rcv_q_last_head = so->so_rcv_last_head;
}
atmark = (so->so_state & SS_RCVATMARK) != 0;
so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA);
ASSERT(so->so_oobmsg != NULL || so->so_oobmark <= so->so_rcv_queued);
while (so->so_rcv_q_head != NULL) {
mblk_t *mp = so->so_rcv_q_head;
size_t mlen = msgdsize(mp);
so->so_rcv_q_head = mp->b_next;
mp->b_next = NULL;
mp->b_prev = NULL;
if (atmark) {
struct T_exdata_ind *tei;
mblk_t *mp1 = arg->soqa_exdata_mp;
arg->soqa_exdata_mp = NULL;
ASSERT(mp1 != NULL);
mp1->b_datap->db_type = M_PROTO;
tei = (struct T_exdata_ind *)mp1->b_rptr;
tei->PRIM_type = T_EXDATA_IND;
tei->MORE_flag = 0;
mp1->b_wptr = (uchar_t *)&tei[1];
if (IS_SO_OOB_INLINE(so)) {
mp1->b_cont = mp;
} else {
ASSERT(so->so_oobmsg != NULL);
mp1->b_cont = so->so_oobmsg;
so->so_oobmsg = NULL;
mp->b_next = so->so_rcv_q_head;
so->so_rcv_q_head = mp;
mlen = 0;
}
mp = mp1;
atmark = B_FALSE;
} else if (so->so_oobmark > 0) {
if (so->so_oobmark < mlen) {
mblk_t *urg_mp = mp;
atmark = B_TRUE;
mp = NULL;
mlen = so->so_oobmark;
do {
so->so_oobmark -= MBLKL(urg_mp);
mp = urg_mp;
urg_mp = urg_mp->b_cont;
} while (so->so_oobmark > 0);
mp->b_cont = NULL;
if (urg_mp != NULL) {
urg_mp->b_next = so->so_rcv_q_head;
so->so_rcv_q_head = urg_mp;
}
} else {
so->so_oobmark -= mlen;
if (so->so_oobmark == 0)
atmark = B_TRUE;
}
}
so->so_rcv_queued -= mlen;
*tailmpp = mp;
tailmpp = &mp->b_next;
}
so->so_rcv_head = NULL;
so->so_rcv_last_head = NULL;
so->so_rcv_q_head = NULL;
so->so_rcv_q_last_head = NULL;
if (atmark || so->so_oobmark > 0) {
mblk_t *mp;
if (atmark && so->so_oobmsg != NULL) {
struct T_exdata_ind *tei;
mp = arg->soqa_exdata_mp;
arg->soqa_exdata_mp = NULL;
ASSERT(mp != NULL);
mp->b_datap->db_type = M_PROTO;
tei = (struct T_exdata_ind *)mp->b_rptr;
tei->PRIM_type = T_EXDATA_IND;
tei->MORE_flag = 0;
mp->b_wptr = (uchar_t *)&tei[1];
mp->b_cont = so->so_oobmsg;
so->so_oobmsg = NULL;
*tailmpp = mp;
tailmpp = &mp->b_next;
} else {
mp = arg->soqa_exdata_mp;
arg->soqa_exdata_mp = NULL;
ASSERT(mp != NULL);
DB_TYPE(mp) = M_PCSIG;
*mp->b_wptr++ = (uchar_t)SIGURG;
*tailmpp = mp;
tailmpp = &mp->b_next;
mp = arg->soqa_urgmark_mp;
arg->soqa_urgmark_mp = NULL;
mp->b_flag = atmark ? MSGMARKNEXT : MSGNOTMARKNEXT;
*tailmpp = mp;
tailmpp = &mp->b_next;
so->so_oobmark = 0;
}
}
ASSERT(so->so_oobmark == 0);
ASSERT(so->so_rcv_queued == 0);
return (retmp);
}
#ifdef DEBUG
void
so_integrity_check(struct sonode *cur, struct sonode *orig)
{
VERIFY(cur->so_vnode == orig->so_vnode);
VERIFY(cur->so_ops == orig->so_ops);
#define CHECK_STATE (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_NDELAY|SS_NONBLOCK| \
SS_ASYNC|SS_ACCEPTCONN|SS_SAVEDEOR|SS_RCVATMARK|SS_OOBPEND| \
SS_HAVEOOBDATA|SS_HADOOBDATA|SS_SENTLASTREADSIG|SS_SENTLASTWRITESIG)
VERIFY((cur->so_state & (orig->so_state & CHECK_STATE)) ==
(orig->so_state & CHECK_STATE));
VERIFY(cur->so_mode == orig->so_mode);
VERIFY(cur->so_flag == orig->so_flag);
VERIFY(cur->so_count == orig->so_count);
VERIFY(cur->so_sockparams == orig->so_sockparams);
VERIFY(cur->so_error != 0 || orig->so_error == 0);
VERIFY(cur->so_family == orig->so_family);
VERIFY(cur->so_type == orig->so_type);
VERIFY(cur->so_protocol == orig->so_protocol);
VERIFY(cur->so_version == orig->so_version);
VERIFY(cur->so_acceptq_len >= orig->so_acceptq_len);
VERIFY(list_head(&cur->so_acceptq_list) ==
list_head(&orig->so_acceptq_list));
VERIFY(cur->so_backlog == orig->so_backlog);
VERIFY(cur->so_oobmark >= orig->so_oobmark);
VERIFY(cur->so_pgrp == orig->so_pgrp);
VERIFY(cur->so_peercred == orig->so_peercred);
VERIFY(cur->so_cpid == orig->so_cpid);
VERIFY(cur->so_zoneid == orig->so_zoneid);
VERIFY(cur->so_rcv_queued >= orig->so_rcv_queued);
VERIFY(cur->so_rcv_q_head == orig->so_rcv_q_head);
VERIFY(cur->so_rcv_head == orig->so_rcv_head);
VERIFY(cur->so_proto_handle == orig->so_proto_handle);
VERIFY(cur->so_downcalls == orig->so_downcalls);
}
#endif
int
so_tpi_fallback(struct sonode *so, struct cred *cr)
{
int error;
queue_t *q;
struct sockparams *sp;
struct sockparams *newsp = NULL;
so_proto_fallback_func_t fbfunc;
const char *devpath;
boolean_t direct;
struct sonode *nso;
sock_quiesce_arg_t arg = { NULL, NULL };
#ifdef DEBUG
struct sonode origso;
#endif
error = 0;
sp = so->so_sockparams;
fbfunc = sp->sp_smod_info->smod_proto_fallback_func;
if (so->so_filter_active > 0 || so->so_krecv_cb != NULL)
return (EINVAL);
switch (so->so_family) {
case AF_INET:
devpath = sp->sp_smod_info->smod_fallback_devpath_v4;
break;
case AF_INET6:
devpath = sp->sp_smod_info->smod_fallback_devpath_v6;
break;
default:
return (EINVAL);
}
if (devpath == NULL || fbfunc == NULL)
return (EINVAL);
if (!so_start_fallback(so))
return (EAGAIN);
#ifdef DEBUG
bcopy(so, &origso, sizeof (*so));
#endif
sp->sp_stats.sps_nfallback.value.ui64++;
newsp = sockparams_hold_ephemeral_bydev(so->so_family, so->so_type,
so->so_protocol, devpath, KM_SLEEP, &error);
if (error != 0)
goto out;
if (so->so_direct != NULL) {
sodirect_t *sodp = so->so_direct;
mutex_enter(&so->so_lock);
so->so_direct->sod_enabled = B_FALSE;
so->so_state &= ~SS_SODIRECT;
ASSERT(sodp->sod_uioafh == NULL);
mutex_exit(&so->so_lock);
}
error = sotpi_convert_sonode(so, newsp, &direct, &q, cr);
if (error != 0)
goto out;
arg.soqa_exdata_mp = allocb_wait(sizeof (struct T_exdata_ind),
BPRI_MED, STR_NOSIG, NULL);
arg.soqa_urgmark_mp = allocb_wait(0, BPRI_MED, STR_NOSIG, NULL);
DTRACE_PROBE1(proto__fallback__begin, struct sonode *, so);
error = (*fbfunc)(so->so_proto_handle, q, direct, so_quiesced_cb,
&arg);
DTRACE_PROBE1(proto__fallback__end, struct sonode *, so);
if (error != 0) {
sotpi_revert_sonode(so, cr);
goto out;
}
nso = list_head(&so->so_acceptq_list);
while (nso != NULL) {
int rval;
struct sonode *next;
if (arg.soqa_exdata_mp == NULL) {
arg.soqa_exdata_mp =
allocb_wait(sizeof (struct T_exdata_ind),
BPRI_MED, STR_NOSIG, NULL);
}
if (arg.soqa_urgmark_mp == NULL) {
arg.soqa_urgmark_mp = allocb_wait(0, BPRI_MED,
STR_NOSIG, NULL);
}
DTRACE_PROBE1(proto__fallback__begin, struct sonode *, nso);
rval = (*fbfunc)(nso->so_proto_handle, NULL, direct,
so_quiesced_cb, &arg);
DTRACE_PROBE1(proto__fallback__end, struct sonode *, nso);
if (rval != 0) {
zcmn_err(getzoneid(), CE_WARN,
"Failed to convert socket in accept queue to TPI. "
"Pid = %d\n", curproc->p_pid);
next = list_next(&so->so_acceptq_list, nso);
list_remove(&so->so_acceptq_list, nso);
so->so_acceptq_len--;
(void) socket_close(nso, 0, CRED());
socket_destroy(nso);
nso = next;
} else {
nso = list_next(&so->so_acceptq_list, nso);
}
}
so_acceptq_flush(so, B_FALSE);
mutex_enter(&so->so_lock);
so->so_state |= SS_FALLBACK_COMP;
mutex_exit(&so->so_lock);
so->so_ops = &sotpi_sonodeops;
pollwakeup(&so->so_poll_list, POLLERR);
ASSERT(SOTOV(so)->v_count >= 2);
VN_RELE(SOTOV(so));
out:
so_end_fallback(so);
if (error != 0) {
#ifdef DEBUG
so_integrity_check(so, &origso);
#endif
zcmn_err(getzoneid(), CE_WARN,
"Failed to convert socket to TPI (err=%d). Pid = %d\n",
error, curproc->p_pid);
if (newsp != NULL)
SOCKPARAMS_DEC_REF(newsp);
}
if (arg.soqa_exdata_mp != NULL)
freemsg(arg.soqa_exdata_mp);
if (arg.soqa_urgmark_mp != NULL)
freemsg(arg.soqa_urgmark_mp);
return (error);
}
int
so_krecv_set(sonode_t *so, so_krecv_f cb, void *arg)
{
int ret;
if (cb == NULL && arg != NULL)
return (EINVAL);
SO_BLOCK_FALLBACK(so, so_krecv_set(so, cb, arg));
mutex_enter(&so->so_lock);
if (so->so_state & SS_FALLBACK_COMP) {
mutex_exit(&so->so_lock);
SO_UNBLOCK_FALLBACK(so);
return (ENOTSUP);
}
ret = so_lock_read(so, 0);
VERIFY(ret == 0);
so_rcv_flush(so);
so->so_krecv_cb = cb;
so->so_krecv_arg = arg;
so_unlock_read(so);
mutex_exit(&so->so_lock);
SO_UNBLOCK_FALLBACK(so);
return (0);
}
void
so_krecv_unblock(sonode_t *so)
{
mutex_enter(&so->so_lock);
VERIFY(so->so_krecv_cb != NULL);
so->so_rcv_queued = 0;
(void) so_check_flow_control(so);
}