#if 0
#define IPRINTF(x) printf x
#else
#define IPRINTF(x)
#endif
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/user.h>
#include <sys/device.h>
#include <uvm/uvm_extern.h>
#include <machine/intr.h>
#include <machine/npx.h>
#include <machine/pio.h>
#include <machine/cpufunc.h>
#include <machine/pcb.h>
#include <machine/trap.h>
#include <machine/specialreg.h>
#include <machine/i8259.h>
#include <dev/isa/isavar.h>
#define fldcw(addr) __asm("fldcw %0" : : "m" (*addr))
#define fnclex() __asm("fnclex")
#define fninit() __asm("fninit")
#define fnsave(addr) __asm("fnsave %0" : "=m" (*addr))
#define fnstcw(addr) __asm("fnstcw %0" : "=m" (*addr))
#define fnstsw(addr) __asm("fnstsw %0" : "=m" (*addr))
#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fwait")
#define frstor(addr) __asm("frstor %0" : : "m" (*addr))
#define fwait() __asm("fwait")
#define clts() __asm("clts")
#define stts() lcr0(rcr0() | CR0_TS)
uint32_t fpu_mxcsr_mask;
int npxintr(void *);
static int npxprobe1(struct isa_attach_args *);
static int x86fpflags_to_siginfo(u_int32_t);
struct npx_softc {
struct device sc_dev;
void *sc_ih;
};
int npxprobe(struct device *, void *, void *);
void npxattach(struct device *, struct device *, void *);
const struct cfattach npx_ca = {
sizeof(struct npx_softc), npxprobe, npxattach
};
struct cfdriver npx_cd = {
NULL, "npx", DV_DULL
};
enum npx_type {
NPX_NONE = 0,
NPX_INTERRUPT,
NPX_EXCEPTION,
NPX_BROKEN,
NPX_CPUID,
};
static enum npx_type npx_type;
static volatile u_int npx_intrs_while_probing
__attribute__((section(".kudata")));
static volatile u_int npx_traps_while_probing
__attribute__((section(".kudata")));
#define fxsave(addr) __asm("fxsave %0" : "=m" (*addr))
#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*addr))
#define ldmxcsr(addr) __asm("ldmxcsr %0" : : "m" (*addr))
static __inline void
fpu_save(union savefpu *addr)
{
if (i386_use_fxsave) {
fxsave(&addr->sv_xmm);
fninit();
} else
fnsave(&addr->sv_87);
}
static int
npxdna_notset(struct cpu_info *ci)
{
panic("npxdna vector not initialized");
}
int (*npxdna_func)(struct cpu_info *) = npxdna_notset;
int npxdna_s87(struct cpu_info *);
int npxdna_xmm(struct cpu_info *);
void probeintr(void);
asm (".text\n\t"
"probeintr:\n\t"
"ss\n\t"
"incl npx_intrs_while_probing\n\t"
"pushl %eax\n\t"
"movb $0x20,%al # EOI (asm in strings loses cpp features)\n\t"
"outb %al,$0xa0 # IO_ICU2\n\t"
"outb %al,$0x20 # IO_ICU1\n\t"
"movb $0,%al\n\t"
"outb %al,$0xf0 # clear BUSY# latch\n\t"
"popl %eax\n\t"
"iret\n\t");
void probetrap(void);
asm (".text\n\t"
"probetrap:\n\t"
"ss\n\t"
"incl npx_traps_while_probing\n\t"
"fnclex\n\t"
"iret\n\t");
static inline int
npxprobe1(struct isa_attach_args *ia)
{
int control;
int status;
ia->ia_iosize = 16;
ia->ia_msize = 0;
fninit();
delay(1000);
status = 0x5a5a;
fnstsw(&status);
if ((status & 0xb8ff) == 0) {
control = 0x5a5a;
fnstcw(&control);
if ((control & 0x1f3f) == 0x033f) {
control &= ~(1 << 2);
fldcw(&control);
npx_traps_while_probing = npx_intrs_while_probing = 0;
fp_divide_by_0();
delay(1);
if (npx_traps_while_probing != 0) {
npx_type = NPX_EXCEPTION;
ia->ia_irq = IRQUNK;
} else if (npx_intrs_while_probing != 0) {
npx_type = NPX_INTERRUPT;
} else {
npx_type = NPX_BROKEN;
ia->ia_irq = IRQUNK;
}
return 1;
}
}
npx_type = NPX_NONE;
return 0;
}
int
npxprobe(struct device *parent, void *match, void *aux)
{
struct isa_attach_args *ia = aux;
int irq;
int result;
u_long s;
unsigned save_imen;
struct gate_descriptor save_idt_npxintr;
struct gate_descriptor save_idt_npxtrap;
if (cpu_feature & CPUID_FPU) {
npx_type = NPX_CPUID;
ia->ia_irq = IRQUNK;
ia->ia_iosize = 16;
ia->ia_msize = 0;
return 1;
}
irq = NRSVIDT + ia->ia_irq;
s = intr_disable();
save_idt_npxintr = idt[irq];
save_idt_npxtrap = idt[16];
setgate(&idt[irq], probeintr, 0, SDT_SYS386IGT, SEL_KPL, GICODE_SEL);
setgate(&idt[16], probetrap, 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL);
save_imen = imen;
imen = ~((1 << IRQ_SLAVE) | (1 << ia->ia_irq));
SET_ICUS();
outb(0xf1, 0);
delay(1000);
outb(0xf0, 0);
lcr0(rcr0() & ~(CR0_EM|CR0_TS));
intr_restore(s);
result = npxprobe1(ia);
s = intr_disable();
lcr0(rcr0() | (CR0_EM|CR0_TS));
imen = save_imen;
SET_ICUS();
idt[irq] = save_idt_npxintr;
idt[16] = save_idt_npxtrap;
intr_restore(s);
return (result);
}
int npx586bug1(int, int);
asm (".text\n\t"
"npx586bug1:\n\t"
"fildl 4(%esp) # x\n\t"
"fildl 8(%esp) # y\n\t"
"fld %st(1)\n\t"
"fdiv %st(1),%st # x/y\n\t"
"fmulp %st,%st(1) # (x/y)*y\n\t"
"fsubrp %st,%st(1) # x-(x/y)*y\n\t"
"pushl $0\n\t"
"fistpl (%esp)\n\t"
"popl %eax\n\t"
"ret\n\t");
void
npxinit(struct cpu_info *ci)
{
lcr0(rcr0() & ~(CR0_EM|CR0_TS));
fninit();
if (npx586bug1(4195835, 3145727) != 0) {
printf("%s: WARNING: Pentium FDIV bug detected!\n",
ci->ci_dev->dv_xname);
}
if (fpu_mxcsr_mask == 0 && i386_use_fxsave) {
struct savexmm xm __attribute__((aligned(16)));
bzero(&xm, sizeof(xm));
fxsave(&xm);
if (xm.sv_env.en_mxcsr_mask)
fpu_mxcsr_mask = xm.sv_env.en_mxcsr_mask;
else
fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
}
lcr0(rcr0() | (CR0_TS));
}
void
npxattach(struct device *parent, struct device *self, void *aux)
{
struct npx_softc *sc = (void *)self;
struct isa_attach_args *ia = aux;
switch (npx_type) {
case NPX_INTERRUPT:
printf("\n");
lcr0(rcr0() & ~CR0_NE);
sc->sc_ih = isa_intr_establish(ia->ia_ic, ia->ia_irq,
IST_EDGE, IPL_NONE, npxintr, 0, sc->sc_dev.dv_xname);
break;
case NPX_EXCEPTION:
printf(": using exception 16\n");
break;
case NPX_CPUID:
printf(": reported by CPUID; using exception 16\n");
npx_type = NPX_EXCEPTION;
break;
case NPX_BROKEN:
printf(": error reporting broken; not using\n");
npx_type = NPX_NONE;
return;
case NPX_NONE:
return;
}
npxinit(&cpu_info_primary);
if (i386_use_fxsave)
npxdna_func = npxdna_xmm;
else
npxdna_func = npxdna_s87;
}
int
npxintr(void *arg)
{
struct cpu_info *ci = curcpu();
struct proc *p = ci->ci_fpcurproc;
union savefpu *addr;
struct intrframe *frame = arg;
int code;
union sigval sv;
atomic_inc_int(&uvmexp.traps);
IPRINTF(("%s: fp intr\n", ci->ci_dev->dv_xname));
if (p == NULL || npx_type == NPX_NONE) {
printf("npxintr: p = %lx, curproc = %lx, npx_type = %d\n",
(u_long) p, (u_long) curproc, npx_type);
panic("npxintr from nowhere");
}
outb(0xf0, 0);
if (ci->ci_fpsaving)
return (1);
#ifdef DIAGNOSTIC
if (p != curproc)
panic("npxintr: wrong process");
#endif
addr = &p->p_addr->u_pcb.pcb_savefpu;
fpu_save(addr);
fwait();
if (i386_use_fxsave) {
fldcw(&addr->sv_xmm.sv_env.en_cw);
} else
fldcw(&addr->sv_87.sv_env.en_cw);
fwait();
if (i386_use_fxsave) {
addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw;
addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw;
} else {
addr->sv_87.sv_ex_sw = addr->sv_87.sv_env.en_sw;
addr->sv_87.sv_ex_tw = addr->sv_87.sv_env.en_tw;
}
if (p == curproc && USERMODE(frame->if_cs, frame->if_eflags)) {
p->p_md.md_regs = (struct trapframe *)&frame->if_fs;
if (i386_use_fxsave)
code = x86fpflags_to_siginfo(addr->sv_xmm.sv_ex_sw);
else
code = x86fpflags_to_siginfo(addr->sv_87.sv_ex_sw);
sv.sival_int = frame->if_eip;
trapsignal(p, SIGFPE, T_ARITHTRAP, code, sv);
} else {
KERNEL_LOCK();
psignal(p, SIGFPE);
KERNEL_UNLOCK();
}
return (1);
}
void
npxtrap(struct trapframe *frame)
{
struct proc *p = curcpu()->ci_fpcurproc;
union savefpu *addr = &p->p_addr->u_pcb.pcb_savefpu;
u_int32_t mxcsr, statbits;
int code;
union sigval sv;
#ifdef DIAGNOSTIC
if (p != curproc)
panic("npxtrap: wrong process");
#endif
fxsave(&addr->sv_xmm);
mxcsr = addr->sv_xmm.sv_env.en_mxcsr;
statbits = mxcsr;
mxcsr &= ~0x3f;
ldmxcsr(&mxcsr);
addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw;
addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw;
code = x86fpflags_to_siginfo(statbits);
sv.sival_int = frame->tf_eip;
trapsignal(p, SIGFPE, frame->tf_err, code, sv);
}
static int
x86fpflags_to_siginfo(u_int32_t flags)
{
int i;
static int x86fp_siginfo_table[] = {
FPE_FLTINV,
FPE_FLTRES,
FPE_FLTDIV,
FPE_FLTOVF,
FPE_FLTUND,
FPE_FLTRES,
FPE_FLTINV,
};
for (i=0;i < sizeof(x86fp_siginfo_table)/sizeof(int); i++) {
if (flags & (1 << i))
return (x86fp_siginfo_table[i]);
}
return (FPE_FLTINV);
}
int
npxdna_xmm(struct cpu_info *ci)
{
union savefpu *sfp;
struct proc *p;
int s;
if (ci->ci_fpsaving) {
printf("recursive npx trap; cr0=%x\n", rcr0());
return (0);
}
s = splipi();
#ifdef MULTIPROCESSOR
p = ci->ci_curproc;
#else
p = curproc;
#endif
IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev->dv_xname, (u_long)p,
(p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : ""));
if (ci->ci_fpcurproc != NULL) {
IPRINTF(("%s: fp save %lx\n", ci->ci_dev->dv_xname,
(u_long)ci->ci_fpcurproc));
npxsave_cpu(ci, ci->ci_fpcurproc != &proc0);
} else {
clts();
IPRINTF(("%s: fp init\n", ci->ci_dev->dv_xname));
fninit();
fwait();
stts();
}
splx(s);
IPRINTF(("%s: done saving\n", ci->ci_dev->dv_xname));
KDASSERT(ci->ci_fpcurproc == NULL);
#ifndef MULTIPROCESSOR
KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL);
#else
if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
npxsave_proc(p, 1);
#endif
p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
clts();
s = splipi();
ci->ci_fpcurproc = p;
p->p_addr->u_pcb.pcb_fpcpu = ci;
splx(s);
atomic_inc_int(&uvmexp.fpswtch);
sfp = &p->p_addr->u_pcb.pcb_savefpu;
if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
bzero(&sfp->sv_xmm, sizeof(sfp->sv_xmm));
sfp->sv_xmm.sv_env.en_cw = __INITIAL_NPXCW__;
sfp->sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
fxrstor(&sfp->sv_xmm);
p->p_md.md_flags |= MDP_USEDFPU;
} else {
static double zero = 0.0;
fnclex();
__asm volatile("ffree %%st(7)\n\tfldl %0" : : "m" (zero));
fxrstor(&sfp->sv_xmm);
}
return (1);
}
int
npxdna_s87(struct cpu_info *ci)
{
union savefpu *sfp;
struct proc *p;
int s;
KDASSERT(i386_use_fxsave == 0);
if (ci->ci_fpsaving) {
printf("recursive npx trap; cr0=%x\n", rcr0());
return (0);
}
s = splipi();
#ifdef MULTIPROCESSOR
p = ci->ci_curproc;
#else
p = curproc;
#endif
IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev->dv_xname, (u_long)p,
(p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : ""));
if (ci->ci_fpcurproc != NULL) {
IPRINTF(("%s: fp save %lx\n", ci->ci_dev->dv_xname,
(u_long)ci->ci_fpcurproc));
npxsave_cpu(ci, ci->ci_fpcurproc != &proc0);
} else {
clts();
IPRINTF(("%s: fp init\n", ci->ci_dev->dv_xname));
fninit();
fwait();
stts();
}
splx(s);
IPRINTF(("%s: done saving\n", ci->ci_dev->dv_xname));
KDASSERT(ci->ci_fpcurproc == NULL);
#ifndef MULTIPROCESSOR
KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL);
#else
if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
npxsave_proc(p, 1);
#endif
p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
clts();
s = splipi();
ci->ci_fpcurproc = p;
p->p_addr->u_pcb.pcb_fpcpu = ci;
splx(s);
atomic_inc_int(&uvmexp.fpswtch);
sfp = &p->p_addr->u_pcb.pcb_savefpu;
if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
bzero(&sfp->sv_87, sizeof(sfp->sv_87));
sfp->sv_87.sv_env.en_cw = __INITIAL_NPXCW__;
sfp->sv_87.sv_env.en_tw = 0xffff;
frstor(&sfp->sv_87);
p->p_md.md_flags |= MDP_USEDFPU;
} else {
frstor(&sfp->sv_87);
}
return (1);
}
void
npxsave_cpu(struct cpu_info *ci, int save)
{
struct proc *p;
int s;
KDASSERT(ci == curcpu());
p = ci->ci_fpcurproc;
if (p == NULL)
return;
IPRINTF(("%s: fp cpu %s %lx\n", ci->ci_dev->dv_xname,
save ? "save" : "flush", (u_long)p));
if (save) {
#ifdef DIAGNOSTIC
if (ci->ci_fpsaving != 0)
panic("npxsave_cpu: recursive save!");
#endif
clts();
ci->ci_fpsaving = 1;
fpu_save(&p->p_addr->u_pcb.pcb_savefpu);
ci->ci_fpsaving = 0;
fwait();
}
stts();
p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
s = splipi();
p->p_addr->u_pcb.pcb_fpcpu = NULL;
ci->ci_fpcurproc = NULL;
splx(s);
}
void
npxsave_proc(struct proc *p, int save)
{
struct cpu_info *ci = curcpu();
struct cpu_info *oci;
KDASSERT(p->p_addr != NULL);
oci = p->p_addr->u_pcb.pcb_fpcpu;
if (oci == NULL)
return;
IPRINTF(("%s: fp proc %s %lx\n", ci->ci_dev->dv_xname,
save ? "save" : "flush", (u_long)p));
#if defined(MULTIPROCESSOR)
if (oci == ci) {
int s = splipi();
npxsave_cpu(ci, save);
splx(s);
} else {
IPRINTF(("%s: fp ipi to %s %s %lx\n", ci->ci_dev->dv_xname,
oci->ci_dev->dv_xname, save ? "save" : "flush", (u_long)p));
oci->ci_fpsaveproc = p;
i386_send_ipi(oci,
save ? I386_IPI_SYNCH_FPU : I386_IPI_FLUSH_FPU);
while (p->p_addr->u_pcb.pcb_fpcpu != NULL)
CPU_BUSY_CYCLE();
}
#else
KASSERT(ci->ci_fpcurproc == p);
npxsave_cpu(ci, save);
#endif
}
void
fpu_kernel_enter(void)
{
struct cpu_info *ci = curcpu();
uint32_t cw;
int s;
if (ci->ci_fpcurproc == &proc0) {
clts();
return;
}
s = splipi();
if (ci->ci_fpcurproc != NULL) {
npxsave_cpu(ci, 1);
atomic_inc_int(&uvmexp.fpswtch);
}
ci->ci_fpcurproc = &proc0;
splx(s);
clts();
fninit();
cw = __INITIAL_NPXCW__;
fldcw(&cw);
if (i386_has_sse || i386_has_sse2) {
cw = __INITIAL_MXCSR__;
ldmxcsr(&cw);
}
}
void
fpu_kernel_exit(void)
{
stts();
}