#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/user.h>
#include <sys/signal.h>
#include <sys/syscall.h>
#include <sys/syscall_mi.h>
#include <uvm/uvm_extern.h>
#include <machine/cpufunc.h>
#include <machine/psl.h>
#include <machine/trap.h>
#ifdef DDB
#include <machine/db_machdep.h>
#endif
#include "isa.h"
int upageflttrap(struct trapframe *, uint32_t);
int kpageflttrap(struct trapframe *, uint32_t);
void trap(struct trapframe *);
void ast(struct trapframe *);
void syscall(struct trapframe *);
char *trap_type[] = {
"privileged instruction fault",
"breakpoint trap",
"arithmetic trap",
"reserved trap",
"protection fault",
"trace trap",
"page fault",
"alignment fault",
"integer divide fault",
"non-maskable interrupt",
"overflow trap",
"bounds check fault",
"FPU not available fault",
"double fault",
"FPU operand fetch fault",
"invalid TSS fault",
"segment not present fault",
"stack fault",
"machine check",
"SIMD FP fault",
};
int trap_types = sizeof trap_type / sizeof trap_type[0];
#ifdef DEBUG
int trapdebug = 0;
#endif
static inline int
pgex2access(int pgex)
{
if (pgex & PGEX_W)
return PROT_WRITE;
else if (pgex & PGEX_I)
return PROT_EXEC;
return PROT_READ;
}
int
upageflttrap(struct trapframe *frame, uint32_t cr2)
{
struct proc *p = curproc;
vaddr_t va = trunc_page((vaddr_t)cr2);
vm_prot_t access_type = pgex2access(frame->tf_err);
union sigval sv;
int signal, sicode, error;
error = uvm_fault(&p->p_vmspace->vm_map, va, 0, access_type);
if (cpu_pae == 0 && error == EACCES && access_type == PROT_READ)
error = uvm_fault(&p->p_vmspace->vm_map, va, 0, PROT_EXEC);
if (error == 0) {
uvm_grow(p, va);
return 1;
}
signal = SIGSEGV;
sicode = SEGV_MAPERR;
if (error == ENOMEM) {
printf("UVM: pid %d (%s), uid %d killed:"
" out of swap\n", p->p_p->ps_pid, p->p_p->ps_comm,
p->p_ucred ? (int)p->p_ucred->cr_uid : -1);
signal = SIGKILL;
} else {
if (error == EACCES)
sicode = SEGV_ACCERR;
else if (error == EIO) {
signal = SIGBUS;
sicode = BUS_OBJERR;
}
}
sv.sival_ptr = (void *)cr2;
trapsignal(p, signal, T_PAGEFLT, sicode, sv);
return 1;
}
int
kpageflttrap(struct trapframe *frame, uint32_t cr2)
{
struct proc *p = curproc;
struct pcb *pcb;
vaddr_t va = trunc_page((vaddr_t)cr2);
struct vm_map *map;
vm_prot_t access_type = pgex2access(frame->tf_err);
caddr_t onfault;
int error;
if (p == NULL || p->p_addr == NULL || p->p_vmspace == NULL)
return 0;
pcb = &p->p_addr->u_pcb;
if (cr2 <= VM_MAXUSER_ADDRESS && frame->tf_err & PGEX_I)
panic("attempt to execute user address %p "
"in supervisor mode", (void *)cr2);
if (pcb->pcb_onfault == NULL && cr2 <= VM_MAXUSER_ADDRESS &&
frame->tf_err & PGEX_P)
panic("attempt to access user address %p "
"in supervisor mode", (void *)cr2);
map = &p->p_vmspace->vm_map;
if (va >= VM_MIN_KERNEL_ADDRESS)
map = kernel_map;
if (curcpu()->ci_inatomic == 0 || map == kernel_map) {
onfault = pcb->pcb_onfault;
pcb->pcb_onfault = NULL;
error = uvm_fault(map, va, 0, access_type);
pcb->pcb_onfault = onfault;
if (error == 0 && map != kernel_map)
uvm_grow(p, va);
} else
error = EFAULT;
if (error) {
if (pcb->pcb_onfault == NULL) {
panic("uvm_fault(%p, 0x%lx, 0, %d) -> %x",
map, va, access_type, error);
return 0;
}
frame->tf_eip = (u_int32_t)pcb->pcb_onfault;
}
return 1;
}
void
trap(struct trapframe *frame)
{
struct proc *p = curproc;
int type = frame->tf_trapno;
struct pcb *pcb = NULL;
extern char resume_iret[], resume_pop_ds[], resume_pop_es[],
resume_pop_fs[], resume_pop_gs[];
struct trapframe *vframe;
int resume;
union sigval sv;
vaddr_t gdt_cs = SEGDESC_LIMIT(curcpu()->ci_gdt[GUCODE_SEL].sd);
uint32_t cr2 = rcr2();
atomic_inc_int(&uvmexp.traps);
#ifdef DEBUG
if (trapdebug) {
printf("trap %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
frame->tf_trapno, frame->tf_err, frame->tf_eip,
frame->tf_cs, frame->tf_eflags, cr2, lapic_tpr);
printf("curproc %p\n", curproc);
}
#endif
#ifdef DIAGNOSTIC
if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) {
u_int ef = read_eflags();
if (ef & PSL_AC) {
write_eflags(ef & ~PSL_AC);
panic("%s: AC set on entry", "trap");
}
}
#endif
if (!KERNELMODE(frame->tf_cs, frame->tf_eflags)) {
type |= T_USER;
p->p_md.md_regs = frame;
refreshcreds(p);
}
switch (type) {
case T_PROTFLT:
case T_SEGNPFLT:
case T_ALIGNFLT:
if (p && p->p_addr) {
pcb = &p->p_addr->u_pcb;
if (pcb->pcb_onfault != 0) {
frame->tf_eip = (int)pcb->pcb_onfault;
return;
}
}
switch (*(u_char *)frame->tf_eip) {
case 0xcf:
vframe = (void *)((int)&frame->tf_esp -
offsetof(struct trapframe, tf_eip));
resume = (int)resume_iret;
break;
case 0x1f:
vframe = (void *)((int)&frame->tf_esp -
offsetof(struct trapframe, tf_ds));
resume = (int)resume_pop_ds;
break;
case 0x07:
vframe = (void *)((int)&frame->tf_esp -
offsetof(struct trapframe, tf_es));
resume = (int)resume_pop_es;
break;
case 0x0f:
switch (*(u_char *)(frame->tf_eip + 1)) {
case 0xa1:
vframe = (void *)((int)&frame->tf_esp -
offsetof(struct trapframe, tf_fs));
resume = (int)resume_pop_fs;
break;
case 0xa9:
vframe = (void *)((int)&frame->tf_esp -
offsetof(struct trapframe, tf_gs));
resume = (int)resume_pop_gs;
break;
default:
goto we_re_toast;
}
break;
default:
goto we_re_toast;
}
if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
goto we_re_toast;
frame->tf_eip = resume;
return;
case T_PROTFLT|T_USER:
if (cpu_pae == 0 &&
pmap_exec_fixup(&p->p_vmspace->vm_map, frame, gdt_cs,
&p->p_addr->u_pcb))
goto out;
sv.sival_int = frame->tf_eip;
trapsignal(p, SIGSEGV, type &~ T_USER, SEGV_MAPERR, sv);
goto out;
case T_TSSFLT|T_USER:
sv.sival_int = frame->tf_eip;
trapsignal(p, SIGBUS, type &~ T_USER, BUS_OBJERR, sv);
goto out;
case T_SEGNPFLT|T_USER:
case T_STKFLT|T_USER:
sv.sival_int = frame->tf_eip;
trapsignal(p, SIGSEGV, type &~ T_USER, SEGV_MAPERR, sv);
goto out;
case T_ALIGNFLT|T_USER:
sv.sival_int = frame->tf_eip;
trapsignal(p, SIGBUS, type &~ T_USER, BUS_ADRALN, sv);
goto out;
case T_PRIVINFLT|T_USER:
sv.sival_int = frame->tf_eip;
trapsignal(p, SIGILL, type &~ T_USER, ILL_PRVOPC, sv);
goto out;
case T_FPOPFLT|T_USER:
sv.sival_int = frame->tf_eip;
trapsignal(p, SIGILL, type &~ T_USER, ILL_COPROC, sv);
goto out;
case T_DNA|T_USER: {
printf("pid %d killed due to lack of floating point\n",
p->p_p->ps_pid);
sv.sival_int = frame->tf_eip;
trapsignal(p, SIGKILL, type &~ T_USER, FPE_FLTINV, sv);
goto out;
}
case T_BOUND|T_USER:
sv.sival_int = frame->tf_eip;
trapsignal(p, SIGFPE, type &~ T_USER, FPE_FLTSUB, sv);
goto out;
case T_OFLOW|T_USER:
sv.sival_int = frame->tf_eip;
trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTOVF, sv);
goto out;
case T_DIVIDE|T_USER:
sv.sival_int = frame->tf_eip;
trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTDIV, sv);
goto out;
case T_ARITHTRAP|T_USER:
sv.sival_int = frame->tf_eip;
trapsignal(p, SIGFPE, frame->tf_err, FPE_INTOVF, sv);
goto out;
case T_XFTRAP|T_USER:
npxtrap(frame);
goto out;
case T_PAGEFLT:
if (kpageflttrap(frame, cr2))
return;
goto we_re_toast;
case T_PAGEFLT|T_USER: {
if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p),
"[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n",
uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial))
goto out;
if (upageflttrap(frame, cr2))
goto out;
goto we_re_toast;
}
#if 0
#if !defined(DDB)
case T_TRCTRAP:
return;
#endif
#endif
case T_BPTFLT|T_USER:
sv.sival_int = cr2;
trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_BRKPT, sv);
break;
case T_TRCTRAP|T_USER:
sv.sival_int = cr2;
trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_TRACE, sv);
break;
#if NISA > 0
case T_NMI:
case T_NMI|T_USER:
#ifdef DDB
printf ("NMI ... going to debugger\n");
if (db_ktrap(type, 0, frame))
return;
#endif
if (isa_nmi() == 0)
return;
else
goto we_re_toast;
#endif
case T_TRCTRAP:
#ifndef DDB
return;
#endif
default:
we_re_toast:
#ifdef DDB
if (db_ktrap(type, frame->tf_err, frame))
return;
#endif
if (frame->tf_trapno < trap_types)
printf("fatal %s (%d)", trap_type[frame->tf_trapno],
frame->tf_trapno);
else
printf("unknown trap %d", frame->tf_trapno);
printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor");
printf("trap type %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
type, frame->tf_err, frame->tf_eip, frame->tf_cs,
frame->tf_eflags, cr2, lapic_tpr);
panic("trap type %d, code=%x, pc=%x",
type, frame->tf_err, frame->tf_eip);
}
if ((type & T_USER) == 0)
return;
out:
userret(p);
}
void
ast(struct trapframe *frame)
{
struct proc *p = curproc;
atomic_inc_int(&uvmexp.traps);
KASSERT(!KERNELMODE(frame->tf_cs, frame->tf_eflags));
p->p_md.md_regs = frame;
refreshcreds(p);
atomic_inc_int(&uvmexp.softs);
mi_ast(p, curcpu()->ci_want_resched);
userret(p);
}
void
syscall(struct trapframe *frame)
{
caddr_t params;
const struct sysent *callp = sysent;
struct proc *p = curproc;
int error;
register_t code, args[8], rval[2];
#ifdef DIAGNOSTIC
int ocpl = lapic_tpr;
#endif
short argsize;
atomic_inc_int(&uvmexp.syscalls);
#ifdef DIAGNOSTIC
if (!USERMODE(frame->tf_cs, frame->tf_eflags))
panic("syscall");
#endif
#ifdef DIAGNOSTIC
if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) {
u_int ef = read_eflags();
if (ef & PSL_AC) {
write_eflags(ef & ~PSL_AC);
panic("%s: AC set on entry", "syscall");
}
}
#endif
p->p_md.md_regs = frame;
code = frame->tf_eax;
if (code > 0 && code < SYS_MAXSYSCALL)
callp += code;
argsize = callp->sy_argsize;
params = (caddr_t)frame->tf_esp + sizeof(int);
if (argsize && (error = copyin(params, args, argsize)))
goto bad;
rval[0] = 0;
rval[1] = frame->tf_edx;
error = mi_syscall(p, code, callp, args, rval);
switch (error) {
case 0:
frame->tf_eax = rval[0];
frame->tf_edx = rval[1];
frame->tf_eflags &= ~PSL_C;
break;
case ERESTART:
frame->tf_eip -= 2;
break;
case EJUSTRETURN:
break;
default:
bad:
frame->tf_eax = error;
frame->tf_eflags |= PSL_C;
break;
}
mi_syscall_return(p, code, error, rval);
#ifdef DIAGNOSTIC
if (lapic_tpr != ocpl) {
printf("WARNING: SPL (0x%x) NOT LOWERED ON "
"syscall(0x%lx, 0x%lx, 0x%lx, 0x%lx...) EXIT, PID %d\n",
lapic_tpr, code, args[0], args[1], args[2],
p->p_p->ps_pid);
lapic_tpr = ocpl;
}
#endif
}
void
child_return(void *arg)
{
struct proc *p = (struct proc *)arg;
struct trapframe *tf = p->p_md.md_regs;
tf->tf_eax = 0;
tf->tf_eflags &= ~PSL_C;
KERNEL_UNLOCK();
mi_child_return(p);
}