#include <sys/param.h>
#include <sys/vmparam.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/cmn_err.h>
#include <sys/signal.h>
#include <sys/stack.h>
#include <sys/cred.h>
#include <sys/user.h>
#include <sys/debug.h>
#include <sys/errno.h>
#include <sys/proc.h>
#include <sys/var.h>
#include <sys/inline.h>
#include <sys/syscall.h>
#include <sys/ucontext.h>
#include <sys/cpuvar.h>
#include <sys/siginfo.h>
#include <sys/trap.h>
#include <sys/machtrap.h>
#include <sys/sysinfo.h>
#include <sys/procfs.h>
#include <sys/prsystm.h>
#include <sys/fpu/fpusystm.h>
#include <sys/modctl.h>
#include <sys/aio_impl.h>
#include <c2/audit.h>
#include <sys/machpcb.h>
#include <sys/privregs.h>
#include <sys/copyops.h>
#include <sys/timer.h>
#include <sys/priv.h>
#include <sys/msacct.h>
int syscalltrace = 0;
#ifdef SYSCALLTRACE
static kmutex_t systrace_lock;
#endif
static krwlock_t *lock_syscall(struct sysent *, uint_t);
#ifdef _SYSCALL32_IMPL
static struct sysent *
lwp_getsysent(klwp_t *lwp)
{
if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE)
return (sysent);
return (sysent32);
}
#define LWP_GETSYSENT(lwp) (lwp_getsysent(lwp))
#else
#define LWP_GETSYSENT(lwp) (sysent)
#endif
void
xregrestore(klwp_t *lwp, int shared)
{
if (lwp->lwp_pcb.pcb_xregstat == XREGMODIFIED) {
struct machpcb *mpcb = lwptompcb(lwp);
caddr_t sp = (caddr_t)lwptoregs(lwp)->r_sp;
size_t rwinsize;
caddr_t rwp;
int is64;
if (lwp_getdatamodel(lwp) == DATAMODEL_LP64) {
rwinsize = sizeof (struct rwindow);
rwp = sp + STACK_BIAS;
is64 = 1;
} else {
rwinsize = sizeof (struct rwindow32);
sp = (caddr_t)(uintptr_t)(caddr32_t)(uintptr_t)sp;
rwp = sp;
is64 = 0;
}
if (is64)
(void) copyout_nowatch(&lwp->lwp_pcb.pcb_xregs,
rwp, rwinsize);
else {
struct rwindow32 rwindow32;
int watched;
watched = watch_disable_addr(rwp, rwinsize, S_WRITE);
rwindow_nto32(&lwp->lwp_pcb.pcb_xregs, &rwindow32);
(void) copyout(&rwindow32, rwp, rwinsize);
if (watched)
watch_enable_addr(rwp, rwinsize, S_WRITE);
}
mpcb->mpcb_rsp[0] = sp;
mpcb->mpcb_rsp[1] = NULL;
bcopy(&lwp->lwp_pcb.pcb_xregs, &mpcb->mpcb_rwin[0],
sizeof (lwp->lwp_pcb.pcb_xregs));
}
lwp->lwp_pcb.pcb_xregstat = XREGNONE;
}
uint_t
get_syscall_args(klwp_t *lwp, long *argp, int *nargsp)
{
kthread_t *t = lwptot(lwp);
uint_t code = t->t_sysnum;
long mask;
long *ap;
int nargs;
if (lwptoproc(lwp)->p_model == DATAMODEL_ILP32)
mask = (uint32_t)0xffffffffU;
else
mask = 0xffffffffffffffff;
if (code != 0 && code < NSYSCALL) {
nargs = LWP_GETSYSENT(lwp)[code].sy_narg;
ASSERT(nargs <= MAXSYSARGS);
*nargsp = nargs;
ap = lwp->lwp_ap;
while (nargs-- > 0)
*argp++ = *ap++ & mask;
} else {
*nargsp = 0;
}
return (code);
}
#ifdef _SYSCALL32_IMPL
uint_t
get_syscall32_args(klwp_t *lwp, int *argp, int *nargsp)
{
long args[MAXSYSARGS];
uint_t i, code;
code = get_syscall_args(lwp, args, nargsp);
for (i = 0; i != *nargsp; i++)
*argp++ = (int)args[i];
return (code);
}
#endif
int
save_syscall_args()
{
kthread_t *t = curthread;
klwp_t *lwp = ttolwp(t);
struct regs *rp = lwptoregs(lwp);
uint_t code = t->t_sysnum;
uint_t nargs;
int i;
caddr_t ua;
model_t datamodel;
if (lwp->lwp_argsaved || code == 0)
return (0);
if (code >= NSYSCALL) {
nargs = 0;
} else {
struct sysent *se = LWP_GETSYSENT(lwp);
struct sysent *callp = se + code;
nargs = callp->sy_narg;
if (LOADABLE_SYSCALL(callp) && nargs == 0) {
krwlock_t *module_lock;
module_lock = lock_syscall(se, code);
nargs = callp->sy_narg;
rw_exit(module_lock);
}
}
if (nargs == 0)
goto out;
ASSERT(nargs <= MAXSYSARGS);
if ((datamodel = lwp_getdatamodel(lwp)) == DATAMODEL_ILP32) {
if (rp->r_g1 == 0) {
lwp->lwp_arg[0] = (uint32_t)rp->r_o1;
lwp->lwp_arg[1] = (uint32_t)rp->r_o2;
lwp->lwp_arg[2] = (uint32_t)rp->r_o3;
lwp->lwp_arg[3] = (uint32_t)rp->r_o4;
lwp->lwp_arg[4] = (uint32_t)rp->r_o5;
if (nargs > 5) {
ua = (caddr_t)(uintptr_t)(caddr32_t)(uintptr_t)
(rp->r_sp + MINFRAME32);
for (i = 5; i < nargs; i++) {
uint32_t a;
if (fuword32(ua, &a) != 0)
return (-1);
lwp->lwp_arg[i] = a;
ua += sizeof (a);
}
}
} else {
lwp->lwp_arg[0] = (uint32_t)rp->r_o0;
lwp->lwp_arg[1] = (uint32_t)rp->r_o1;
lwp->lwp_arg[2] = (uint32_t)rp->r_o2;
lwp->lwp_arg[3] = (uint32_t)rp->r_o3;
lwp->lwp_arg[4] = (uint32_t)rp->r_o4;
lwp->lwp_arg[5] = (uint32_t)rp->r_o5;
if (nargs > 6) {
ua = (caddr_t)(uintptr_t)(caddr32_t)(uintptr_t)
(rp->r_sp + MINFRAME32);
for (i = 6; i < nargs; i++) {
uint32_t a;
if (fuword32(ua, &a) != 0)
return (-1);
lwp->lwp_arg[i] = a;
ua += sizeof (a);
}
}
}
} else {
ASSERT(datamodel == DATAMODEL_LP64);
lwp->lwp_arg[0] = rp->r_o0;
lwp->lwp_arg[1] = rp->r_o1;
lwp->lwp_arg[2] = rp->r_o2;
lwp->lwp_arg[3] = rp->r_o3;
lwp->lwp_arg[4] = rp->r_o4;
lwp->lwp_arg[5] = rp->r_o5;
if (nargs > 6) {
ua = (caddr_t)rp->r_sp + MINFRAME + STACK_BIAS;
for (i = 6; i < nargs; i++) {
unsigned long a;
if (fulword(ua, &a) != 0)
return (-1);
lwp->lwp_arg[i] = a;
ua += sizeof (a);
}
}
}
out:
lwp->lwp_ap = lwp->lwp_arg;
lwp->lwp_argsaved = 1;
t->t_post_sys = 1;
return (0);
}
void
reset_syscall_args(void)
{
klwp_t *lwp = ttolwp(curthread);
lwp->lwp_ap = (long *)&lwptoregs(lwp)->r_o0;
lwp->lwp_argsaved = 0;
}
int64_t
nosys(void)
{
tsignal(curthread, SIGSYS);
return ((int64_t)set_errno(ENOSYS));
}
int
nosys32(void)
{
return (nosys());
}
int
pre_syscall(int arg0)
{
unsigned int code;
kthread_t *t = curthread;
proc_t *p = ttoproc(t);
klwp_t *lwp = ttolwp(t);
struct regs *rp = lwptoregs(lwp);
int repost;
t->t_pre_sys = repost = 0;
ASSERT(t->t_schedflag & TS_DONT_SWAP);
syscall_mstate(LMS_USER, LMS_SYSTEM);
ASSERT(lwp->lwp_ap == (long *)&rp->r_o0);
if (t->t_cred != p->p_cred) {
cred_t *oldcred = t->t_cred;
t->t_cred = crgetcred();
crfree(oldcred);
}
if (lwp->lwp_pcb.pcb_step != STEP_NONE) {
(void) prundostep();
repost = 1;
}
code = t->t_sysnum;
if (code == 0 && arg0 != 0) {
code = arg0;
t->t_sysnum = arg0;
}
if (PTOU(p)->u_systrap) {
if (prismember(&PTOU(p)->u_entrymask, code)) {
mutex_enter(&p->p_lock);
if (PTOU(p)->u_systrap &&
prismember(&PTOU(p)->u_entrymask, code)) {
stop(PR_SYSENTRY, code);
lwp->lwp_argsaved = 0;
lwp->lwp_ap = (long *)&rp->r_o0;
}
mutex_exit(&p->p_lock);
}
repost = 1;
}
if (lwp->lwp_sysabort) {
lwp->lwp_sysabort = 0;
(void) set_errno(EINTR);
t->t_pre_sys = 1;
return (1);
}
if (audit_active == C2AUDIT_LOADED) {
uint32_t auditing = au_zone_getstate(NULL);
if (auditing & AU_AUDIT_MASK) {
int error;
if (error = audit_start(T_SYSCALL, code, auditing, \
0, lwp)) {
t->t_pre_sys = 1;
lwp->lwp_error = 0;
return (error);
}
repost = 1;
}
}
#ifdef SYSCALLTRACE
if (syscalltrace) {
int i;
long *ap;
char *cp;
char *sysname;
struct sysent *callp;
if (code >= NSYSCALL)
callp = &nosys_ent;
else
callp = LWP_GETSYSENT(lwp) + code;
(void) save_syscall_args();
mutex_enter(&systrace_lock);
printf("%d: ", p->p_pid);
if (code >= NSYSCALL)
printf("0x%x", code);
else {
sysname = mod_getsysname(code);
printf("%s[0x%x]", sysname == NULL ? "NULL" :
sysname, code);
}
cp = "(";
for (i = 0, ap = lwp->lwp_ap; i < callp->sy_narg; i++, ap++) {
printf("%s%lx", cp, *ap);
cp = ", ";
}
if (i)
printf(")");
printf(" %s id=0x%p\n", PTOU(p)->u_comm, curthread);
mutex_exit(&systrace_lock);
}
#endif
if (repost)
t->t_pre_sys = 1;
lwp->lwp_error = 0;
lwp->lwp_badpriv = PRIV_NONE;
return (0);
}
void
post_syscall(long rval1, long rval2)
{
kthread_t *t = curthread;
proc_t *p = curproc;
klwp_t *lwp = ttolwp(t);
struct regs *rp = lwptoregs(lwp);
uint_t error;
int code = t->t_sysnum;
int repost = 0;
int proc_stop = 0;
int sigprof = 0;
t->t_post_sys = 0;
error = lwp->lwp_errno;
if (code == 0)
goto sig_check;
if (AU_AUDITING()) {
rval_t rval;
rval.r_val1 = (int)rval1;
rval.r_val2 = (int)rval2;
audit_finish(T_SYSCALL, code, error, &rval);
repost = 1;
}
if (curthread->t_pdmsg != NULL) {
char *m = curthread->t_pdmsg;
uprintf("%s", m);
kmem_free(m, strlen(m) + 1);
curthread->t_pdmsg = NULL;
}
if (PTOU(p)->u_systrap) {
if (prismember(&PTOU(p)->u_exitmask, code)) {
proc_stop = 1;
(void) save_syscall_args();
}
repost = 1;
}
if (curthread->t_rprof != NULL &&
curthread->t_rprof->rp_anystate != 0) {
(void) save_syscall_args();
sigprof = 1;
}
if (lwp->lwp_eosys == NORMALRETURN) {
if (error == 0) {
#ifdef SYSCALLTRACE
if (syscalltrace) {
mutex_enter(&systrace_lock);
printf(
"%d: r_val1=0x%lx, r_val2=0x%lx, id 0x%p\n",
p->p_pid, rval1, rval2, curthread);
mutex_exit(&systrace_lock);
}
#endif
rp->r_tstate &= ~TSTATE_IC;
rp->r_o0 = rval1;
rp->r_o1 = rval2;
} else {
int sig;
#ifdef SYSCALLTRACE
if (syscalltrace) {
mutex_enter(&systrace_lock);
printf("%d: error=%d, id 0x%p\n",
p->p_pid, error, curthread);
mutex_exit(&systrace_lock);
}
#endif
if (error == EINTR && t->t_activefd.a_stale)
error = EBADF;
if (error == EINTR &&
(sig = lwp->lwp_cursig) != 0 &&
sigismember(&PTOU(p)->u_sigrestart, sig) &&
PTOU(p)->u_signal[sig - 1] != SIG_DFL &&
PTOU(p)->u_signal[sig - 1] != SIG_IGN)
error = ERESTART;
rp->r_o0 = error;
rp->r_tstate |= TSTATE_IC;
}
if (!(t->t_flag & T_FORKALL)) {
rp->r_pc = rp->r_npc;
rp->r_npc += 4;
}
}
if (proc_stop) {
mutex_enter(&p->p_lock);
if (PTOU(p)->u_systrap &&
prismember(&PTOU(p)->u_exitmask, code))
stop(PR_SYSEXIT, code);
mutex_exit(&p->p_lock);
}
if (t->t_flag & T_VFPARENT) {
ASSERT(code == SYS_vfork || code == SYS_forksys);
ASSERT(rp->r_o1 == 0 && error == 0);
vfwait((pid_t)rval1);
t->t_flag &= ~T_VFPARENT;
}
if (p->p_prof.pr_scale) {
if (lwp->lwp_oweupc)
profil_tick(rp->r_pc);
repost = 1;
}
sig_check:
lwp->lwp_eosys = NORMALRETURN;
clear_stale_fd();
t->t_flag &= ~T_FORKALL;
if (t->t_astflag | t->t_sig_check) {
astoff(t);
t->t_sig_check = 0;
if (curthread->t_proc_flag & TP_CHANGEBIND) {
mutex_enter(&p->p_lock);
if (curthread->t_proc_flag & TP_CHANGEBIND) {
timer_lwpbind();
curthread->t_proc_flag &= ~TP_CHANGEBIND;
}
mutex_exit(&p->p_lock);
}
if (p->p_aio)
aio_cleanup(0);
if (ISHOLD(p) || (t->t_proc_flag & TP_EXITLWP))
holdlwp();
if (ISSIG_PENDING(t, lwp, p)) {
if (issig(FORREAL))
psig();
t->t_sig_check = 1;
}
if (sigprof) {
int nargs = (code > 0 && code < NSYSCALL)?
LWP_GETSYSENT(lwp)[code].sy_narg : 0;
realsigprof(code, nargs, error);
t->t_sig_check = 1;
}
if (lwp->lwp_pcb.pcb_flags & CPC_OVERFLOW)
aston(t);
if (lwp->lwp_pcb.pcb_flags & ASYNC_HWERR)
aston(t);
}
if (lwp->lwp_pcb.pcb_xregstat != XREGNONE)
xregrestore(lwp, 1);
lwp->lwp_errno = 0;
lwp->lwp_state = LWP_USER;
if (t->t_trapret) {
t->t_trapret = 0;
thread_lock(t);
CL_TRAPRET(t);
thread_unlock(t);
}
if (CPU->cpu_runrun || t->t_schedflag & TS_ANYWAITQ)
preempt();
prunstop();
if (lwp->lwp_pcb.pcb_step != STEP_NONE) {
prdostep();
repost = 1;
}
t->t_sysnum = 0;
lwp->lwp_ap = (long *)&rp->r_o0;
lwp->lwp_argsaved = 0;
if (repost)
t->t_post_sys = 1;
if (lwp->lwp_ustack != 0 && lwp->lwp_old_stk_ctl != 0) {
rlim64_t new_size;
model_t model;
caddr_t top;
struct rlimit64 rl;
mutex_enter(&p->p_lock);
new_size = p->p_stk_ctl;
model = p->p_model;
top = p->p_usrstack;
(void) rctl_rlimit_get(rctlproc_legacy[RLIMIT_STACK], p, &rl);
mutex_exit(&p->p_lock);
if (rl.rlim_cur == RLIM64_INFINITY)
new_size = 0;
if (model == DATAMODEL_NATIVE) {
stack_t stk;
if (copyin((stack_t *)lwp->lwp_ustack, &stk,
sizeof (stack_t)) == 0 &&
(stk.ss_size == lwp->lwp_old_stk_ctl ||
stk.ss_size == 0) &&
stk.ss_sp == top - stk.ss_size) {
stk.ss_sp = (void *)((uintptr_t)stk.ss_sp +
stk.ss_size - new_size);
stk.ss_size = new_size;
(void) copyout(&stk,
(stack_t *)lwp->lwp_ustack,
sizeof (stack_t));
}
} else {
stack32_t stk32;
if (copyin((stack32_t *)lwp->lwp_ustack, &stk32,
sizeof (stack32_t)) == 0 &&
(stk32.ss_size == lwp->lwp_old_stk_ctl ||
stk32.ss_size == 0) &&
stk32.ss_sp ==
(caddr32_t)(uintptr_t)(top - stk32.ss_size)) {
stk32.ss_sp += stk32.ss_size - new_size;
stk32.ss_size = new_size;
(void) copyout(&stk32,
(stack32_t *)lwp->lwp_ustack,
sizeof (stack32_t));
}
}
lwp->lwp_old_stk_ctl = 0;
}
syscall_mstate(LMS_SYSTEM, LMS_USER);
}
int64_t
syscall_ap()
{
uint_t error;
struct sysent *callp;
rval_t rval;
klwp_t *lwp = ttolwp(curthread);
struct regs *rp = lwptoregs(lwp);
callp = LWP_GETSYSENT(lwp) + curthread->t_sysnum;
if (callp->sy_narg > 6 && save_syscall_args())
return ((int64_t)set_errno(EFAULT));
rval.r_val1 = 0;
rval.r_val2 = (int)rp->r_o1;
lwp->lwp_error = 0;
error = (*(callp->sy_call))(lwp->lwp_ap, &rval);
if (error)
return ((int64_t)set_errno(error));
return (rval.r_vals);
}
static krwlock_t *
lock_syscall(struct sysent *table, uint_t code)
{
krwlock_t *module_lock;
struct modctl *modp;
int id;
struct sysent *callp;
module_lock = table[code].sy_lock;
callp = &table[code];
rw_enter(module_lock, RW_READER);
if (LOADED_SYSCALL(callp))
return (module_lock);
rw_exit(module_lock);
for (;;) {
if ((id = modload("sys", syscallnames[code])) == -1)
break;
modp = mod_find_by_filename("sys", syscallnames[code]);
if (modp == NULL)
continue;
mutex_enter(&mod_lock);
if (!modp->mod_installed) {
mutex_exit(&mod_lock);
continue;
}
break;
}
rw_enter(module_lock, RW_READER);
if (id != -1)
mutex_exit(&mod_lock);
return (module_lock);
}
int64_t
loadable_syscall(
long a0, long a1, long a2, long a3,
long a4, long a5, long a6, long a7)
{
int64_t rval;
struct sysent *callp;
struct sysent *se = LWP_GETSYSENT(ttolwp(curthread));
krwlock_t *module_lock;
int code;
code = curthread->t_sysnum;
callp = se + code;
module_lock = lock_syscall(se, code);
if (callp->sy_flags & SE_ARGC) {
int64_t (*sy_call)();
sy_call = (int64_t (*)())callp->sy_call;
rval = (*sy_call)(a0, a1, a2, a3, a4, a5);
} else {
rval = syscall_ap();
}
rw_exit(module_lock);
return (rval);
}
int64_t
indir(int code, long a0, long a1, long a2, long a3, long a4)
{
klwp_t *lwp = ttolwp(curthread);
struct sysent *callp;
if (code <= 0 || code >= NSYSCALL)
return (nosys());
ASSERT(lwp->lwp_ap != NULL);
curthread->t_sysnum = code;
callp = LWP_GETSYSENT(lwp) + code;
if (callp->sy_narg > 5) {
if (save_syscall_args())
return ((int64_t)set_errno(EFAULT));
} else if (!lwp->lwp_argsaved) {
long *ap;
ap = lwp->lwp_ap;
lwp->lwp_ap = ap + 1;
curthread->t_post_sys = 1;
}
return ((*callp->sy_callc)(a0, a1, a2, a3, a4, lwp->lwp_arg[5]));
}
uint_t
set_errno(uint_t error)
{
ASSERT(error != 0);
curthread->t_post_sys = 1;
return (ttolwp(curthread)->lwp_errno = error);
}
void
set_proc_pre_sys(proc_t *p)
{
kthread_t *t;
kthread_t *first;
ASSERT(MUTEX_HELD(&p->p_lock));
t = first = p->p_tlist;
do {
t->t_pre_sys = 1;
} while ((t = t->t_forw) != first);
}
void
set_proc_post_sys(proc_t *p)
{
kthread_t *t;
kthread_t *first;
ASSERT(MUTEX_HELD(&p->p_lock));
t = first = p->p_tlist;
do {
t->t_post_sys = 1;
} while ((t = t->t_forw) != first);
}
void
set_proc_sys(proc_t *p)
{
kthread_t *t;
kthread_t *first;
ASSERT(MUTEX_HELD(&p->p_lock));
t = first = p->p_tlist;
do {
t->t_pre_sys = 1;
t->t_post_sys = 1;
} while ((t = t->t_forw) != first);
}
void
set_all_proc_sys()
{
kthread_t *t;
kthread_t *first;
mutex_enter(&pidlock);
t = first = curthread;
do {
t->t_pre_sys = 1;
t->t_post_sys = 1;
} while ((t = t->t_next) != first);
mutex_exit(&pidlock);
}
void
set_all_zone_usr_proc_sys(zoneid_t zoneid)
{
proc_t *p;
kthread_t *t;
mutex_enter(&pidlock);
for (p = practive; p != NULL; p = p->p_next) {
if (p->p_exec == NULLVP || p->p_as == &kas ||
p->p_stat == SIDL || p->p_stat == SZOMB ||
(p->p_flag & (SSYS | SEXITING | SEXITLWPS)))
continue;
if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) {
mutex_enter(&p->p_lock);
if ((t = p->p_tlist) == NULL) {
mutex_exit(&p->p_lock);
continue;
}
do {
t->t_pre_sys = 1;
t->t_post_sys = 1;
} while (p->p_tlist != (t = t->t_forw));
mutex_exit(&p->p_lock);
}
}
mutex_exit(&pidlock);
}
void
set_proc_ast(proc_t *p)
{
kthread_t *t;
kthread_t *first;
ASSERT(MUTEX_HELD(&p->p_lock));
t = first = p->p_tlist;
do {
aston(t);
} while ((t = t->t_forw) != first);
}