#include <sys/types.h>
#include <sys/t_lock.h>
#include <sys/param.h>
#include <sys/cred.h>
#include <sys/user.h>
#include <sys/proc.h>
#include <sys/time.h>
#include <sys/ipc.h>
#include <sys/ipc_impl.h>
#include <sys/msg.h>
#include <sys/msg_impl.h>
#include <sys/list.h>
#include <sys/systm.h>
#include <sys/sysmacros.h>
#include <sys/cpuvar.h>
#include <sys/kmem.h>
#include <sys/ddi.h>
#include <sys/errno.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/project.h>
#include <sys/modctl.h>
#include <sys/syscall.h>
#include <sys/policy.h>
#include <sys/zone.h>
#include <c2/audit.h>
size_t msginfo_msgmax = 2048;
size_t msginfo_msgmnb = 4096;
int msginfo_msgmni = 50;
int msginfo_msgtql = 40;
int msginfo_msgssz = 8;
int msginfo_msgmap = 0;
ushort_t msginfo_msgseg = 1024;
extern rctl_hndl_t rc_zone_msgmni;
extern rctl_hndl_t rc_project_msgmni;
extern rctl_hndl_t rc_process_msgmnb;
extern rctl_hndl_t rc_process_msgtql;
static ipc_service_t *msq_svc;
static zone_key_t msg_zone_key;
static void msg_dtor(kipc_perm_t *);
static void msg_rmid(kipc_perm_t *);
static void msg_remove_zone(zoneid_t, void *);
static ssize_t msgsys(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2,
uintptr_t a4, uintptr_t a5);
static struct sysent ipcmsg_sysent = {
6,
#ifdef _LP64
SE_ARGC | SE_NOUNLOAD | SE_64RVAL,
#else
SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
#endif
(int (*)())(uintptr_t)msgsys
};
#ifdef _SYSCALL32_IMPL
static ssize32_t msgsys32(int opcode, uint32_t a0, uint32_t a1, uint32_t a2,
uint32_t a4, uint32_t a5);
static struct sysent ipcmsg_sysent32 = {
6,
SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
msgsys32
};
#endif
static struct modlsys modlsys = {
&mod_syscallops, "System V message facility", &ipcmsg_sysent
};
#ifdef _SYSCALL32_IMPL
static struct modlsys modlsys32 = {
&mod_syscallops32, "32-bit System V message facility", &ipcmsg_sysent32
};
#endif
static uint_t msg_type_hash(long);
static int msgq_check_err(kmsqid_t *qp, int cvres);
static int msg_rcvq_sleep(list_t *, msgq_wakeup_t *, kmutex_t **,
kmsqid_t *);
static int msg_copyout(kmsqid_t *, long, kmutex_t **, size_t *, size_t,
struct msg *, struct ipcmsgbuf *, int);
static void msg_rcvq_wakeup_all(list_t *);
static void msg_wakeup_senders(kmsqid_t *);
static void msg_wakeup_rdr(kmsqid_t *, msg_select_t **, long);
static msgq_wakeup_t *msg_fnd_any_snd(kmsqid_t *, int, long);
static msgq_wakeup_t *msg_fnd_any_rdr(kmsqid_t *, int, long);
static msgq_wakeup_t *msg_fnd_neg_snd(kmsqid_t *, int, long);
static msgq_wakeup_t *msg_fnd_spc_snd(kmsqid_t *, int, long);
static struct msg *msgrcv_lookup(kmsqid_t *, long);
msg_select_t msg_fnd_sndr[] = {
{ msg_fnd_any_snd, &msg_fnd_sndr[1] },
{ msg_fnd_spc_snd, &msg_fnd_sndr[2] },
{ msg_fnd_neg_snd, &msg_fnd_sndr[0] }
};
msg_select_t msg_fnd_rdr[1] = {
{ msg_fnd_any_rdr, &msg_fnd_rdr[0] },
};
static struct modlinkage modlinkage = {
MODREV_1,
&modlsys,
#ifdef _SYSCALL32_IMPL
&modlsys32,
#endif
NULL
};
#define MSG_SMALL_INIT (size_t)-1
int
_init(void)
{
int result;
msq_svc = ipcs_create("msqids", rc_project_msgmni, rc_zone_msgmni,
sizeof (kmsqid_t), msg_dtor, msg_rmid, AT_IPC_MSG,
offsetof(ipc_rqty_t, ipcq_msgmni));
zone_key_create(&msg_zone_key, NULL, msg_remove_zone, NULL);
if ((result = mod_install(&modlinkage)) == 0)
return (0);
(void) zone_key_delete(msg_zone_key);
ipcs_destroy(msq_svc);
return (result);
}
int
_fini(void)
{
return (EBUSY);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
static void
msg_dtor(kipc_perm_t *perm)
{
kmsqid_t *qp = (kmsqid_t *)perm;
int ii;
for (ii = 0; ii <= MSG_MAX_QNUM; ii++) {
ASSERT(list_is_empty(&qp->msg_wait_snd[ii]));
ASSERT(list_is_empty(&qp->msg_wait_snd_ngt[ii]));
list_destroy(&qp->msg_wait_snd[ii]);
list_destroy(&qp->msg_wait_snd_ngt[ii]);
}
ASSERT(list_is_empty(&qp->msg_cpy_block));
ASSERT(list_is_empty(&qp->msg_wait_rcv));
list_destroy(&qp->msg_cpy_block);
ASSERT(qp->msg_snd_cnt == 0);
ASSERT(qp->msg_cbytes == 0);
list_destroy(&qp->msg_list);
list_destroy(&qp->msg_wait_rcv);
}
#define msg_hold(mp) (mp)->msg_copycnt++
static void
msg_rele(struct msg *mp)
{
ASSERT(mp->msg_copycnt > 0);
if (mp->msg_copycnt-- == 1) {
if (mp->msg_addr)
kmem_free(mp->msg_addr, mp->msg_size);
kmem_free(mp, sizeof (struct msg));
}
}
static void
msgunlink(kmsqid_t *qp, struct msg *mp)
{
list_remove(&qp->msg_list, mp);
qp->msg_qnum--;
qp->msg_cbytes -= mp->msg_size;
msg_rele(mp);
msg_wakeup_senders(qp);
}
static void
msg_rmid(kipc_perm_t *perm)
{
kmsqid_t *qp = (kmsqid_t *)perm;
struct msg *mp;
int ii;
while ((mp = list_head(&qp->msg_list)) != NULL)
msgunlink(qp, mp);
ASSERT(qp->msg_cbytes == 0);
for (ii = 0; ii <= MSG_MAX_QNUM; ii++) {
msg_rcvq_wakeup_all(&qp->msg_wait_snd[ii]);
msg_rcvq_wakeup_all(&qp->msg_wait_snd_ngt[ii]);
}
msg_rcvq_wakeup_all(&qp->msg_cpy_block);
msg_rcvq_wakeup_all(&qp->msg_wait_rcv);
}
static int
msgctl(int msgid, int cmd, void *arg)
{
STRUCT_DECL(msqid_ds, ds);
kmsqid_t *qp;
int error;
struct cred *cr;
model_t mdl = get_udatamodel();
struct msqid_ds64 ds64;
kmutex_t *lock;
proc_t *pp = curproc;
STRUCT_INIT(ds, mdl);
cr = CRED();
switch (cmd) {
case IPC_SET:
if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds)))
return (set_errno(EFAULT));
break;
case IPC_SET64:
if (copyin(arg, &ds64, sizeof (struct msqid_ds64)))
return (set_errno(EFAULT));
break;
case IPC_RMID:
if (error = ipc_rmid(msq_svc, msgid, cr))
return (set_errno(error));
return (0);
}
if ((lock = ipc_lookup(msq_svc, msgid, (kipc_perm_t **)&qp)) == NULL)
return (set_errno(EINVAL));
switch (cmd) {
case IPC_SET:
if (STRUCT_FGET(ds, msg_qbytes) > qp->msg_qbytes &&
secpolicy_ipc_config(cr) != 0) {
mutex_exit(lock);
return (set_errno(EPERM));
}
if (error = ipcperm_set(msq_svc, cr, &qp->msg_perm,
&STRUCT_BUF(ds)->msg_perm, mdl)) {
mutex_exit(lock);
return (set_errno(error));
}
qp->msg_qbytes = STRUCT_FGET(ds, msg_qbytes);
qp->msg_ctime = gethrestime_sec();
break;
case IPC_STAT:
if (error = ipcperm_access(&qp->msg_perm, MSG_R, cr)) {
mutex_exit(lock);
return (set_errno(error));
}
if (qp->msg_rcv_cnt)
qp->msg_perm.ipc_mode |= MSG_RWAIT;
if (qp->msg_snd_cnt)
qp->msg_perm.ipc_mode |= MSG_WWAIT;
ipcperm_stat(&STRUCT_BUF(ds)->msg_perm, &qp->msg_perm, mdl);
qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
STRUCT_FSETP(ds, msg_first, NULL);
STRUCT_FSETP(ds, msg_last, NULL);
STRUCT_FSET(ds, msg_cbytes, qp->msg_cbytes);
STRUCT_FSET(ds, msg_qnum, qp->msg_qnum);
STRUCT_FSET(ds, msg_qbytes, qp->msg_qbytes);
STRUCT_FSET(ds, msg_lspid, qp->msg_lspid);
STRUCT_FSET(ds, msg_lrpid, qp->msg_lrpid);
STRUCT_FSET(ds, msg_stime, qp->msg_stime);
STRUCT_FSET(ds, msg_rtime, qp->msg_rtime);
STRUCT_FSET(ds, msg_ctime, qp->msg_ctime);
break;
case IPC_SET64:
mutex_enter(&pp->p_lock);
if ((ds64.msgx_qbytes > qp->msg_qbytes) &&
secpolicy_ipc_config(cr) != 0 &&
rctl_test(rc_process_msgmnb, pp->p_rctls, pp,
ds64.msgx_qbytes, RCA_SAFE) & RCT_DENY) {
mutex_exit(&pp->p_lock);
mutex_exit(lock);
return (set_errno(EPERM));
}
mutex_exit(&pp->p_lock);
if (error = ipcperm_set64(msq_svc, cr, &qp->msg_perm,
&ds64.msgx_perm)) {
mutex_exit(lock);
return (set_errno(error));
}
qp->msg_qbytes = ds64.msgx_qbytes;
qp->msg_ctime = gethrestime_sec();
break;
case IPC_STAT64:
if (qp->msg_rcv_cnt)
qp->msg_perm.ipc_mode |= MSG_RWAIT;
if (qp->msg_snd_cnt)
qp->msg_perm.ipc_mode |= MSG_WWAIT;
ipcperm_stat64(&ds64.msgx_perm, &qp->msg_perm);
qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
ds64.msgx_cbytes = qp->msg_cbytes;
ds64.msgx_qnum = qp->msg_qnum;
ds64.msgx_qbytes = qp->msg_qbytes;
ds64.msgx_lspid = qp->msg_lspid;
ds64.msgx_lrpid = qp->msg_lrpid;
ds64.msgx_stime = qp->msg_stime;
ds64.msgx_rtime = qp->msg_rtime;
ds64.msgx_ctime = qp->msg_ctime;
break;
default:
mutex_exit(lock);
return (set_errno(EINVAL));
}
mutex_exit(lock);
switch (cmd) {
case IPC_STAT:
if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds)))
return (set_errno(EFAULT));
break;
case IPC_STAT64:
if (copyout(&ds64, arg, sizeof (struct msqid_ds64)))
return (set_errno(EFAULT));
break;
}
return (0);
}
static void
msg_remove_zone(zoneid_t zoneid, void *arg)
{
ipc_remove_zone(msq_svc, zoneid);
}
static int
msgget(key_t key, int msgflg)
{
kmsqid_t *qp;
kmutex_t *lock;
int id, error;
int ii;
proc_t *pp = curproc;
top:
if (error = ipc_get(msq_svc, key, msgflg, (kipc_perm_t **)&qp, &lock))
return (set_errno(error));
if (IPC_FREE(&qp->msg_perm)) {
mutex_exit(lock);
mutex_exit(&pp->p_lock);
list_create(&qp->msg_list, sizeof (struct msg),
offsetof(struct msg, msg_node));
qp->msg_qnum = 0;
qp->msg_lspid = qp->msg_lrpid = 0;
qp->msg_stime = qp->msg_rtime = 0;
qp->msg_ctime = gethrestime_sec();
qp->msg_ngt_cnt = 0;
qp->msg_neg_copy = 0;
for (ii = 0; ii <= MSG_MAX_QNUM; ii++) {
list_create(&qp->msg_wait_snd[ii],
sizeof (msgq_wakeup_t),
offsetof(msgq_wakeup_t, msgw_list));
list_create(&qp->msg_wait_snd_ngt[ii],
sizeof (msgq_wakeup_t),
offsetof(msgq_wakeup_t, msgw_list));
}
qp->msg_lowest_type = MSG_SMALL_INIT;
list_create(&qp->msg_cpy_block,
sizeof (msgq_wakeup_t),
offsetof(msgq_wakeup_t, msgw_list));
list_create(&qp->msg_wait_rcv,
sizeof (msgq_wakeup_t),
offsetof(msgq_wakeup_t, msgw_list));
qp->msg_fnd_sndr = &msg_fnd_sndr[0];
qp->msg_fnd_rdr = &msg_fnd_rdr[0];
qp->msg_rcv_cnt = 0;
qp->msg_snd_cnt = 0;
qp->msg_snd_smallest = MSG_SMALL_INIT;
if (error = ipc_commit_begin(msq_svc, key, msgflg,
(kipc_perm_t *)qp)) {
if (error == EAGAIN)
goto top;
return (set_errno(error));
}
qp->msg_qbytes = rctl_enforced_value(rc_process_msgmnb,
pp->p_rctls, pp);
qp->msg_qmax = rctl_enforced_value(rc_process_msgtql,
pp->p_rctls, pp);
lock = ipc_commit_end(msq_svc, &qp->msg_perm);
}
if (AU_AUDITING())
audit_ipcget(AT_IPC_MSG, (void *)qp);
id = qp->msg_perm.ipc_id;
mutex_exit(lock);
return (id);
}
static ssize_t
msgrcv(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, long msgtyp, int msgflg)
{
struct msg *smp;
kmsqid_t *qp;
kmutex_t *lock;
size_t xtsz;
int error = 0;
int cvres;
uint_t msg_hash;
msgq_wakeup_t msg_entry;
CPU_STATS_ADDQ(CPU, sys, msg, 1);
msg_hash = msg_type_hash(msgtyp);
if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) {
return ((ssize_t)set_errno(EINVAL));
}
ipc_hold(msq_svc, (kipc_perm_t *)qp);
if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED())) {
goto msgrcv_out;
}
msg_entry.msgw_thrd = curthread;
msg_entry.msgw_snd_wake = 0;
msg_entry.msgw_type = msgtyp;
findmsg:
smp = msgrcv_lookup(qp, msgtyp);
if (smp) {
if ((smp->msg_flags & MSG_RCVCOPY) == 0) {
long t = msg_entry.msgw_snd_wake;
long copy_type = smp->msg_type;
error = msg_copyout(qp, msgtyp, &lock, &xtsz, msgsz,
smp, msgp, msgflg);
if (!error && t && (copy_type != t))
msg_wakeup_rdr(qp, &qp->msg_fnd_sndr, t);
msg_wakeup_rdr(qp, &qp->msg_fnd_rdr, 0);
goto msgrcv_out;
}
cvres = msg_rcvq_sleep(&qp->msg_cpy_block,
&msg_entry, &lock, qp);
error = msgq_check_err(qp, cvres);
if (error) {
goto msgrcv_out;
}
goto findmsg;
}
if (msgflg & IPC_NOWAIT) {
error = ENOMSG;
goto msgrcv_out;
}
msg_wakeup_rdr(qp, &qp->msg_fnd_rdr, 0);
msg_entry.msgw_snd_wake = 0;
if (msgtyp >= 0) {
cvres = msg_rcvq_sleep(&qp->msg_wait_snd[msg_hash],
&msg_entry, &lock, qp);
} else {
qp->msg_ngt_cnt++;
cvres = msg_rcvq_sleep(&qp->msg_wait_snd_ngt[msg_hash],
&msg_entry, &lock, qp);
qp->msg_ngt_cnt--;
}
if (!(error = msgq_check_err(qp, cvres))) {
goto findmsg;
}
msgrcv_out:
if (error) {
msg_wakeup_rdr(qp, &qp->msg_fnd_rdr, 0);
if (msg_entry.msgw_snd_wake) {
msg_wakeup_rdr(qp, &qp->msg_fnd_sndr,
msg_entry.msgw_snd_wake);
}
ipc_rele(msq_svc, (kipc_perm_t *)qp);
return ((ssize_t)set_errno(error));
}
ipc_rele(msq_svc, (kipc_perm_t *)qp);
return ((ssize_t)xtsz);
}
static int
msgq_check_err(kmsqid_t *qp, int cvres)
{
if (IPC_FREE(&qp->msg_perm)) {
return (EIDRM);
}
if (cvres == 0) {
return (EINTR);
}
return (0);
}
static int
msg_copyout(kmsqid_t *qp, long msgtyp, kmutex_t **lock, size_t *xtsz_ret,
size_t msgsz, struct msg *smp, struct ipcmsgbuf *msgp, int msgflg)
{
size_t xtsz;
STRUCT_HANDLE(ipcmsgbuf, umsgp);
model_t mdl = get_udatamodel();
int copyerror = 0;
STRUCT_SET_HANDLE(umsgp, mdl, msgp);
if (msgsz < smp->msg_size) {
if ((msgflg & MSG_NOERROR) == 0) {
return (E2BIG);
} else {
xtsz = msgsz;
}
} else {
xtsz = smp->msg_size;
}
*xtsz_ret = xtsz;
ASSERT((smp->msg_flags & MSG_RCVCOPY) == 0);
smp->msg_flags |= MSG_RCVCOPY;
msg_hold(smp);
if (msgtyp < 0) {
ASSERT(qp->msg_neg_copy == 0);
qp->msg_neg_copy = 1;
}
mutex_exit(*lock);
if (mdl == DATAMODEL_NATIVE) {
copyerror = copyout(&smp->msg_type, msgp,
sizeof (smp->msg_type));
} else {
int32_t msg_type32 = smp->msg_type;
copyerror = copyout(&msg_type32, msgp,
sizeof (msg_type32));
}
if (copyerror == 0 && xtsz) {
copyerror = copyout(smp->msg_addr,
STRUCT_FADDR(umsgp, mtext), xtsz);
}
*lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
if (msgtyp < 0) {
qp->msg_neg_copy = 0;
}
ASSERT(smp->msg_flags & MSG_RCVCOPY);
smp->msg_flags &= ~MSG_RCVCOPY;
msg_rele(smp);
if (IPC_FREE(&qp->msg_perm)) {
return (EIDRM);
}
if (copyerror) {
return (EFAULT);
}
qp->msg_lrpid = ttoproc(curthread)->p_pid;
qp->msg_rtime = gethrestime_sec();
msgunlink(qp, smp);
return (0);
}
static struct msg *
msgrcv_lookup(kmsqid_t *qp, long msgtyp)
{
struct msg *smp = NULL;
long qp_low;
struct msg *mp;
long low_msgtype;
static struct msg neg_copy_smp;
mp = list_head(&qp->msg_list);
if (msgtyp == 0) {
smp = mp;
} else {
qp_low = qp->msg_lowest_type;
if (msgtyp > 0) {
if (qp_low > msgtyp) {
return (NULL);
}
for (; mp; mp = list_next(&qp->msg_list, mp)) {
if (msgtyp == mp->msg_type) {
smp = mp;
break;
}
}
} else {
low_msgtype = -msgtyp;
if (low_msgtype < qp_low) {
return (NULL);
}
if (qp->msg_neg_copy) {
neg_copy_smp.msg_flags = MSG_RCVCOPY;
return (&neg_copy_smp);
}
for (; mp; mp = list_next(&qp->msg_list, mp)) {
if (mp->msg_type <= low_msgtype &&
!(smp && smp->msg_type <= mp->msg_type)) {
smp = mp;
low_msgtype = mp->msg_type;
if (low_msgtype == qp_low) {
break;
}
}
}
if (smp) {
qp->msg_lowest_type = smp->msg_type;
}
}
}
return (smp);
}
static int
msgids(int *buf, uint_t nids, uint_t *pnids)
{
int error;
if (error = ipc_ids(msq_svc, buf, nids, pnids))
return (set_errno(error));
return (0);
}
#define RND(x) roundup((x), sizeof (size_t))
#define RND32(x) roundup((x), sizeof (size32_t))
static int
msgsnap(int msqid, caddr_t buf, size_t bufsz, long msgtyp)
{
struct msg *mp;
kmsqid_t *qp;
kmutex_t *lock;
size_t size;
size_t nmsg;
struct msg **snaplist;
int error, i;
model_t mdl = get_udatamodel();
STRUCT_DECL(msgsnap_head, head);
STRUCT_DECL(msgsnap_mhead, mhead);
STRUCT_INIT(head, mdl);
STRUCT_INIT(mhead, mdl);
if (bufsz < STRUCT_SIZE(head))
return (set_errno(EINVAL));
if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL)
return (set_errno(EINVAL));
if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED())) {
mutex_exit(lock);
return (set_errno(error));
}
ipc_hold(msq_svc, (kipc_perm_t *)qp);
size = nmsg = 0;
for (mp = list_head(&qp->msg_list); mp;
mp = list_next(&qp->msg_list, mp)) {
if (msgtyp == 0 ||
(msgtyp > 0 && msgtyp == mp->msg_type) ||
(msgtyp < 0 && mp->msg_type <= -msgtyp)) {
nmsg++;
if (mdl == DATAMODEL_NATIVE)
size += RND(mp->msg_size);
else
size += RND32(mp->msg_size);
}
}
size += STRUCT_SIZE(head) + nmsg * STRUCT_SIZE(mhead);
if (size > bufsz)
nmsg = 0;
if (nmsg > 0) {
snaplist = (struct msg **)kmem_alloc(nmsg *
sizeof (struct msg *), KM_SLEEP);
i = 0;
for (mp = list_head(&qp->msg_list); mp;
mp = list_next(&qp->msg_list, mp)) {
if (msgtyp == 0 ||
(msgtyp > 0 && msgtyp == mp->msg_type) ||
(msgtyp < 0 && mp->msg_type <= -msgtyp)) {
msg_hold(mp);
snaplist[i] = mp;
i++;
}
}
}
mutex_exit(lock);
STRUCT_FSET(head, msgsnap_size, size);
STRUCT_FSET(head, msgsnap_nmsg, nmsg);
if (copyout(STRUCT_BUF(head), buf, STRUCT_SIZE(head)))
error = EFAULT;
buf += STRUCT_SIZE(head);
for (i = 0; i < nmsg; i++) {
mp = snaplist[i];
if (error == 0) {
STRUCT_FSET(mhead, msgsnap_mlen, mp->msg_size);
STRUCT_FSET(mhead, msgsnap_mtype, mp->msg_type);
if (copyout(STRUCT_BUF(mhead), buf, STRUCT_SIZE(mhead)))
error = EFAULT;
buf += STRUCT_SIZE(mhead);
if (error == 0 &&
mp->msg_size != 0 &&
copyout(mp->msg_addr, buf, mp->msg_size))
error = EFAULT;
if (mdl == DATAMODEL_NATIVE)
buf += RND(mp->msg_size);
else
buf += RND32(mp->msg_size);
}
lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
msg_rele(mp);
if (IPC_FREE(&qp->msg_perm))
error = EIDRM;
mutex_exit(lock);
}
(void) ipc_lock(msq_svc, qp->msg_perm.ipc_id);
ipc_rele(msq_svc, (kipc_perm_t *)qp);
if (nmsg > 0)
kmem_free(snaplist, nmsg * sizeof (struct msg *));
if (error)
return (set_errno(error));
return (0);
}
#define MSG_PREALLOC_LIMIT 8192
static int
msgsnd(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, int msgflg)
{
kmsqid_t *qp;
kmutex_t *lock = NULL;
struct msg *mp = NULL;
long type;
int error = 0, wait_wakeup = 0;
msgq_wakeup_t msg_entry;
model_t mdl = get_udatamodel();
STRUCT_HANDLE(ipcmsgbuf, umsgp);
CPU_STATS_ADDQ(CPU, sys, msg, 1);
STRUCT_SET_HANDLE(umsgp, mdl, msgp);
if (mdl == DATAMODEL_NATIVE) {
if (copyin(msgp, &type, sizeof (type)))
return (set_errno(EFAULT));
} else {
int32_t type32;
if (copyin(msgp, &type32, sizeof (type32)))
return (set_errno(EFAULT));
type = type32;
}
if (type < 1)
return (set_errno(EINVAL));
if (msgsz <= MSG_PREALLOC_LIMIT) {
mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP);
mp->msg_copycnt = 1;
mp->msg_size = msgsz;
if (msgsz) {
mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP);
if (copyin(STRUCT_FADDR(umsgp, mtext),
mp->msg_addr, msgsz) == -1) {
error = EFAULT;
goto msgsnd_out;
}
}
}
if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) {
error = EINVAL;
goto msgsnd_out;
}
ipc_hold(msq_svc, (kipc_perm_t *)qp);
if (msgsz > qp->msg_qbytes) {
error = EINVAL;
goto msgsnd_out;
}
if (error = ipcperm_access(&qp->msg_perm, MSG_W, CRED()))
goto msgsnd_out;
top:
ASSERT(qp->msg_qnum <= qp->msg_qmax);
while ((msgsz > qp->msg_qbytes - qp->msg_cbytes) ||
(qp->msg_qnum == qp->msg_qmax)) {
int cvres;
if (msgflg & IPC_NOWAIT) {
error = EAGAIN;
goto msgsnd_out;
}
wait_wakeup = 0;
qp->msg_snd_cnt++;
msg_entry.msgw_snd_size = msgsz;
msg_entry.msgw_thrd = curthread;
msg_entry.msgw_type = type;
cv_init(&msg_entry.msgw_wake_cv, NULL, 0, NULL);
list_insert_tail(&qp->msg_wait_rcv, &msg_entry);
if (qp->msg_snd_smallest > msgsz)
qp->msg_snd_smallest = msgsz;
cvres = cv_wait_sig(&msg_entry.msgw_wake_cv, lock);
lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, lock);
qp->msg_snd_cnt--;
if (list_link_active(&msg_entry.msgw_list))
list_remove(&qp->msg_wait_rcv, &msg_entry);
if (error = msgq_check_err(qp, cvres)) {
goto msgsnd_out;
}
wait_wakeup = 1;
}
if (mp == NULL) {
int failure;
mutex_exit(lock);
ASSERT(msgsz > 0);
mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP);
mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP);
mp->msg_size = msgsz;
mp->msg_copycnt = 1;
failure = (copyin(STRUCT_FADDR(umsgp, mtext),
mp->msg_addr, msgsz) == -1);
lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
if (IPC_FREE(&qp->msg_perm)) {
error = EIDRM;
goto msgsnd_out;
}
if (failure) {
error = EFAULT;
goto msgsnd_out;
}
goto top;
}
qp->msg_qnum++;
qp->msg_cbytes += msgsz;
qp->msg_lspid = curproc->p_pid;
qp->msg_stime = gethrestime_sec();
mp->msg_type = type;
if (qp->msg_lowest_type > type)
qp->msg_lowest_type = type;
list_insert_tail(&qp->msg_list, mp);
msg_wakeup_rdr(qp, &qp->msg_fnd_sndr, type);
msgsnd_out:
if (wait_wakeup && error) {
msg_wakeup_senders(qp);
}
if (lock)
ipc_rele(msq_svc, (kipc_perm_t *)qp);
if (error) {
if (mp)
msg_rele(mp);
return (set_errno(error));
}
return (0);
}
static void
msg_wakeup_rdr(kmsqid_t *qp, msg_select_t **flist, long type)
{
msg_select_t *walker = *flist;
msgq_wakeup_t *wakeup;
uint_t msg_hash;
msg_hash = msg_type_hash(type);
do {
wakeup = walker->selection(qp, msg_hash, type);
walker = walker->next_selection;
} while (!wakeup && walker != *flist);
*flist = (*flist)->next_selection;
if (wakeup) {
if (type) {
wakeup->msgw_snd_wake = type;
}
cv_signal(&wakeup->msgw_wake_cv);
}
}
static uint_t
msg_type_hash(long msg_type)
{
if (msg_type < 0) {
long hash = -msg_type / MSG_NEG_INTERVAL;
if (hash > MSG_MAX_QNUM)
hash = MSG_MAX_QNUM;
return (hash);
}
if (msg_type)
return (1 + (msg_type % MSG_MAX_QNUM));
return (0);
}
static msgq_wakeup_t *
msg_fnd_any_snd(kmsqid_t *qp, int msg_hash, long type)
{
msgq_wakeup_t *walker;
walker = list_head(&qp->msg_wait_snd[0]);
if (walker)
list_remove(&qp->msg_wait_snd[0], walker);
return (walker);
}
static msgq_wakeup_t *
msg_fnd_any_rdr(kmsqid_t *qp, int msg_hash, long type)
{
msgq_wakeup_t *walker;
walker = list_head(&qp->msg_cpy_block);
if (walker)
list_remove(&qp->msg_cpy_block, walker);
return (walker);
}
static msgq_wakeup_t *
msg_fnd_spc_snd(kmsqid_t *qp, int msg_hash, long type)
{
msgq_wakeup_t *walker;
walker = list_head(&qp->msg_wait_snd[msg_hash]);
while (walker && walker->msgw_type != type)
walker = list_next(&qp->msg_wait_snd[msg_hash], walker);
if (walker)
list_remove(&qp->msg_wait_snd[msg_hash], walker);
return (walker);
}
static msgq_wakeup_t *
msg_fnd_neg_snd(kmsqid_t *qp, int msg_hash, long type)
{
msgq_wakeup_t *qptr;
int count;
int check_index;
int neg_index;
int nbuckets;
if (!qp->msg_ngt_cnt) {
return (NULL);
}
neg_index = msg_type_hash(-type);
nbuckets = MSG_MAX_QNUM - neg_index + 1;
check_index = neg_index + (qp->msg_stime % nbuckets);
for (count = nbuckets; count > 0; count--) {
qptr = list_head(&qp->msg_wait_snd_ngt[check_index]);
while (qptr) {
if (-qptr->msgw_type >= type) {
list_remove(&qp->msg_wait_snd_ngt[check_index],
qptr);
return (qptr);
}
qptr = list_next(&qp->msg_wait_snd_ngt[check_index],
qptr);
}
if (++check_index > MSG_MAX_QNUM) {
check_index = neg_index;
}
}
return (NULL);
}
static int
msg_rcvq_sleep(list_t *queue, msgq_wakeup_t *entry, kmutex_t **lock,
kmsqid_t *qp)
{
int cvres;
cv_init(&entry->msgw_wake_cv, NULL, 0, NULL);
list_insert_tail(queue, entry);
qp->msg_rcv_cnt++;
cvres = cv_wait_sig(&entry->msgw_wake_cv, *lock);
*lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, *lock);
qp->msg_rcv_cnt--;
if (list_link_active(&entry->msgw_list)) {
list_remove(queue, entry);
}
return (cvres);
}
static void
msg_rcvq_wakeup_all(list_t *q_ptr)
{
msgq_wakeup_t *q_walk;
while (q_walk = list_head(q_ptr)) {
list_remove(q_ptr, q_walk);
cv_signal(&q_walk->msgw_wake_cv);
}
}
static ssize_t
msgsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3,
uintptr_t a4, uintptr_t a5)
{
ssize_t error;
switch (opcode) {
case MSGGET:
error = msgget((key_t)a1, (int)a2);
break;
case MSGCTL:
error = msgctl((int)a1, (int)a2, (void *)a3);
break;
case MSGRCV:
error = msgrcv((int)a1, (struct ipcmsgbuf *)a2,
(size_t)a3, (long)a4, (int)a5);
break;
case MSGSND:
error = msgsnd((int)a1, (struct ipcmsgbuf *)a2,
(size_t)a3, (int)a4);
break;
case MSGIDS:
error = msgids((int *)a1, (uint_t)a2, (uint_t *)a3);
break;
case MSGSNAP:
error = msgsnap((int)a1, (caddr_t)a2, (size_t)a3, (long)a4);
break;
default:
error = set_errno(EINVAL);
break;
}
return (error);
}
static void
msg_wakeup_senders(kmsqid_t *qp)
{
struct msgq_wakeup *ptr, *optr;
size_t avail, smallest;
int msgs_out;
if (IPC_FREE(&qp->msg_perm) || qp->msg_qnum >= qp->msg_qmax)
return;
avail = qp->msg_qbytes - qp->msg_cbytes;
if (avail < qp->msg_snd_smallest)
return;
ptr = list_head(&qp->msg_wait_rcv);
if (ptr == NULL) {
qp->msg_snd_smallest = MSG_SMALL_INIT;
return;
}
optr = ptr;
smallest = MSG_SMALL_INIT;
msgs_out = qp->msg_qnum;
while (ptr) {
ptr = list_next(&qp->msg_wait_rcv, ptr);
if (optr->msgw_snd_size <= avail) {
list_remove(&qp->msg_wait_rcv, optr);
avail -= optr->msgw_snd_size;
cv_signal(&optr->msgw_wake_cv);
msgs_out++;
if (msgs_out == qp->msg_qmax ||
avail < qp->msg_snd_smallest)
break;
} else {
if (smallest > optr->msgw_snd_size)
smallest = optr->msgw_snd_size;
}
optr = ptr;
}
if (ptr == NULL && smallest != MSG_SMALL_INIT)
qp->msg_snd_smallest = smallest;
}
#ifdef _SYSCALL32_IMPL
static ssize32_t
msgsys32(int opcode, uint32_t a1, uint32_t a2, uint32_t a3,
uint32_t a4, uint32_t a5)
{
ssize_t error;
switch (opcode) {
case MSGGET:
error = msgget((key_t)a1, (int)a2);
break;
case MSGCTL:
error = msgctl((int)a1, (int)a2, (void *)(uintptr_t)a3);
break;
case MSGRCV:
error = msgrcv((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2,
(size_t)a3, (long)(int32_t)a4, (int)a5);
break;
case MSGSND:
error = msgsnd((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2,
(size_t)(int32_t)a3, (int)a4);
break;
case MSGIDS:
error = msgids((int *)(uintptr_t)a1, (uint_t)a2,
(uint_t *)(uintptr_t)a3);
break;
case MSGSNAP:
error = msgsnap((int)a1, (caddr_t)(uintptr_t)a2, (size_t)a3,
(long)(int32_t)a4);
break;
default:
error = set_errno(EINVAL);
break;
}
return (error);
}
#endif