root/sys/arch/mips64/mips64/fp_emulate.c
/*      $OpenBSD: fp_emulate.c,v 1.25 2023/01/11 03:19:52 visa Exp $    */

/*
 * Copyright (c) 2010 Miodrag Vallat.
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * Floating Point completion/emulation code (MI softfloat code control engine).
 *
 * Supports all MIPS IV COP1 and COP1X floating-point instructions.
 *
 * Floating-point load and store instructions, as well as branch instructions,
 * are only handled if the kernel is compiled with option FPUEMUL.
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/signalvar.h>

#include <machine/cpu.h>
#include <mips64/mips_cpu.h>
#include <machine/fpu.h>
#include <machine/frame.h>
#include <machine/ieee.h>
#include <machine/ieeefp.h>
#include <machine/mips_opcode.h>
#include <machine/regnum.h>

#include <lib/libkern/softfloat.h>
#if defined(DEBUG) && defined(DDB)
#include <machine/db_machdep.h>
#endif

int     fpu_emulate(struct proc *, struct trapframe *, uint32_t,
            union sigval *);
int     fpu_emulate_cop1(struct proc *, struct trapframe *, uint32_t);
int     fpu_emulate_cop1x(struct proc *, struct trapframe *, uint32_t);
uint64_t
        fpu_load(struct proc *, struct trapframe *, uint, uint);
void    fpu_store(struct proc *, struct trapframe *, uint, uint, uint64_t);
#ifdef FPUEMUL
int     nofpu_emulate_cop1(struct proc *, struct trapframe *, uint32_t,
            union sigval *);
int     nofpu_emulate_cop1x(struct proc *, struct trapframe *, uint32_t,
            union sigval *);
int     nofpu_emulate_loadstore(struct proc *, struct trapframe *, uint32_t,
            union sigval *);
int     nofpu_emulate_movci(struct trapframe *, uint32_t);
#endif

typedef int (fpu_fn3)(struct proc *, struct trapframe *, uint, uint, uint,
            uint);
typedef int (fpu_fn4)(struct proc *, struct trapframe *, uint, uint, uint,
            uint, uint);
fpu_fn3 fpu_abs;
fpu_fn3 fpu_add;
int     fpu_c(struct proc *, struct trapframe *, uint, uint, uint, uint, uint);
fpu_fn3 fpu_ceil_l;
fpu_fn3 fpu_ceil_w;
fpu_fn3 fpu_cvt_d;
fpu_fn3 fpu_cvt_l;
fpu_fn3 fpu_cvt_s;
fpu_fn3 fpu_cvt_w;
fpu_fn3 fpu_div;
fpu_fn3 fpu_floor_l;
fpu_fn3 fpu_floor_w;
int     fpu_int_l(struct proc *, struct trapframe *, uint, uint, uint, uint,
            uint);
int     fpu_int_w(struct proc *, struct trapframe *, uint, uint, uint, uint,
            uint);
fpu_fn4 fpu_madd;
fpu_fn4 fpu_msub;
fpu_fn3 fpu_mov;
fpu_fn3 fpu_movcf;
fpu_fn3 fpu_movn;
fpu_fn3 fpu_movz;
fpu_fn3 fpu_mul;
fpu_fn3 fpu_neg;
fpu_fn4 fpu_nmadd;
fpu_fn4 fpu_nmsub;
fpu_fn3 fpu_recip;
fpu_fn3 fpu_round_l;
fpu_fn3 fpu_round_w;
fpu_fn3 fpu_rsqrt;
fpu_fn3 fpu_sqrt;
fpu_fn3 fpu_sub;
fpu_fn3 fpu_trunc_l;
fpu_fn3 fpu_trunc_w;

/*
 * Encoding of operand format within opcodes `fmt' and `fmt3' fields.
 */
#define FMT_S   0x00
#define FMT_D   0x01
#define FMT_W   0x04
#define FMT_L   0x05

/*
 * Inlines from softfloat-specialize.h which are not made public, needed
 * for fpu_abs.
 */
#define float32_is_nan(a) \
        (0xff000000 < (a << 1))
#define float32_is_signaling_nan(a) \
        ((((a >> 22) & 0x1ff) == 0x1fe) && (a & 0x003fffff))

/*
 * Precomputed results of intXX_to_floatXX(1)
 */
#define ONE_F32 (float32)(SNG_EXP_BIAS << SNG_FRACBITS)
#define ONE_F64 (float64)((uint64_t)DBL_EXP_BIAS << DBL_FRACBITS)

static inline uint32_t
getfsr(void)
{
        uint32_t fsr;

        __asm__ volatile (
        "       .set    push\n"
        "       .set    hardfloat\n"
        "       cfc1    %0, $31\n"      /* stall until FPU done */
        "       cfc1    %0, $31\n"      /* now get status */
        "       .set    pop\n"
        : "=r" (fsr));
        return fsr;
}

static inline void
setfsr(uint32_t fsr)
{
        __asm__ volatile (
        "       .set    push\n"
        "       .set    hardfloat\n"
        "       ctc1    %0, $31\n"
        "       .set    pop\n"
        : : "r" (fsr));
}

/*
 * Handle a floating-point exception.
 */
void
MipsFPTrap(struct trapframe *tf)
{
        struct cpu_info *ci = curcpu();
        struct proc *p = ci->ci_curproc;
        union sigval sv;
        vaddr_t pc;
        register_t sr;
        uint32_t fsr, excbits;
        uint32_t branch = 0;
        uint32_t insn;
        InstFmt inst;
        int sig = 0;
        int fault_type = SI_NOINFO;
        int update_pcb = 0;
        int emulate = 0;
        int skip_insn = 1;

        KDASSERT(tf == p->p_md.md_regs);

        pc = (vaddr_t)tf->pc;
        if (tf->cause & CR_BR_DELAY)
                pc += 4;

        if (CPU_HAS_FPU(ci)) {
                /*
                 * Enable FPU, and read its status register.
                 */

                sr = getsr();
                setsr(sr | SR_COP_1_BIT);
                fsr = getfsr();

                /*
                 * If this is not an unimplemented operation, but a genuine
                 * FPU exception, signal the process.
                 */

                if ((fsr & FPCSR_C_E) == 0) {
                        sig = SIGFPE;
                        goto deliver;
                }
        } else {
#ifdef CPU_OCTEON
                /*
                 * SR_FR_32 is hardwired to zero on Octeon; make sure it is
                 * set in the emulation view of the FPU state.
                 */
                tf->sr |= SR_FR_32;
#endif
                fsr = tf->fsr;
        }

        /*
         * Get the faulting instruction.  This should not fail, and
         * if it does, it's probably not your lucky day.
         */

        if (copyinsn(p, pc, &insn) != 0) {
                sig = SIGBUS;
                fault_type = BUS_OBJERR;
                sv.sival_ptr = (void *)pc;
                goto deliver;
        }
        inst = *(InstFmt *)&insn;

        if (tf->cause & CR_BR_DELAY) {
                if (copyinsn(p, tf->pc, &branch) != 0) {
                        sig = SIGBUS;
                        fault_type = BUS_OBJERR;
                        sv.sival_ptr = (void *)tf->pc;
                        goto deliver;
                }
        }

        /*
         * Emulate the instruction.
         */

#ifdef DEBUG
#ifdef DDB
        printf("%s: unimplemented FPU completion, fsr 0x%08x\n0x%lx: ",
            p->p_p->ps_comm, fsr, pc);
        dbmd_print_insn(insn, pc, printf);
#else
        printf("%s: unimplemented FPU completion, insn 0x%08x fsr 0x%08x\n",
            p->p_p->ps_comm, insn, fsr);
#endif
#endif

        switch (inst.FRType.op) {
        default:
                /*
                 * Not a FPU instruction.
                 */
                break;
#ifdef FPUEMUL
        case OP_SPECIAL:
                switch (inst.FRType.func) {
                default:
                        /*
                         * Not a FPU instruction.
                         */
                        break;
                case OP_MOVCI:
                        /*
                         * This instruction should not require emulation,
                         * unless there is no FPU.
                         */
                        if (!CPU_HAS_FPU(ci))
                                emulate = 1;
                        break;
                }
                break;
        case OP_LDC1:
        case OP_LWC1:
        case OP_SDC1:
        case OP_SWC1:
                /*
                 * These instructions should not require emulation,
                 * unless there is no FPU.
                 */
                if (!CPU_HAS_FPU(ci))
                        emulate = 1;
                break;
#endif
        case OP_COP1:
                switch (inst.RType.rs) {
                case OP_BC:
                        skip_insn = 0;
                        /* FALLTHROUGH */
                case OP_MF:
                case OP_DMF:
                case OP_CF:
                case OP_MT:
                case OP_DMT:
                case OP_CT:
                        /*
                         * These instructions should not require emulation,
                         * unless there is no FPU.
                         */
                        if (!CPU_HAS_FPU(ci))
                                emulate = 1;
                        break;
                default:
                        emulate = 1;
                        break;
                }
                break;
        case OP_COP1X:
                switch (inst.FQType.op4) {
                default:
                        switch (inst.FRType.func) {
#ifdef FPUEMUL
                        case OP_LDXC1:
                        case OP_LWXC1:
                        case OP_SDXC1:
                        case OP_SWXC1:
                        case OP_PREFX:
                                /*
                                 * These instructions should not require
                                 * emulation, unless there is no FPU.
                                 */
                                if (!CPU_HAS_FPU(ci))
                                        emulate = 1;
                                break;
#endif
                        default:
                                /*
                                 * Not a valid instruction.
                                 */
                                break;
                        }
                        break;
                case OP_MADD:
                case OP_MSUB:
                case OP_NMADD:
                case OP_NMSUB:
                        emulate = 1;
                        break;
                }
                break;
        }

        if (emulate) {
                if (CPU_HAS_FPU(ci)) {
                        KASSERT(p == ci->ci_fpuproc);
                        save_fpu();
                }

                update_pcb = 1;

                sig = fpu_emulate(p, tf, insn, &sv);
                /* reload fsr, possibly modified by softfloat code */
                fsr = tf->fsr;
                if (sig == 0) {
                        /* raise SIGFPE if necessary */
                        excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
                        excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
                        if (excbits != 0)
                                sig = SIGFPE;
                }
        } else {
                sig = SIGILL;
                fault_type = ILL_ILLOPC;
        }

deliver:
        switch (sig) {
        case SIGFPE:
                excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
                excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
                if (excbits & FP_X_INV)
                        fault_type = FPE_FLTINV;
                else if (excbits & FP_X_DZ)
                        fault_type = FPE_INTDIV;
                else if (excbits & FP_X_OFL)
                        fault_type = FPE_FLTUND;
                else if (excbits & FP_X_UFL)
                        fault_type = FPE_FLTOVF;
                else /* if (excbits & FP_X_IMP) */
                        fault_type = FPE_FLTRES;

                break;
#ifdef FPUEMUL
        case SIGBUS:
                if (fault_type == SI_NOINFO)
                        fault_type = BUS_ADRALN;
                break;
        case SIGSEGV:
                if (fault_type == SI_NOINFO)
                        fault_type = SEGV_MAPERR;
                break;
#endif
        }

        /*
         * Skip the instruction, unless we are delivering SIGILL.
         */
        if (CPU_HAS_FPU(ci) || skip_insn) {
                if (sig != SIGILL) {
                        if (tf->cause & CR_BR_DELAY) {
                                /*
                                 * Note that it doesn't matter, at this point,
                                 * that we pass the updated FSR value, as it is
                                 * only used to decide whether to branch or not
                                 * if the faulting instruction was BC1[FT].
                                 */
                                tf->pc = MipsEmulateBranch(tf, tf->pc, fsr,
                                    branch);
                        } else
                                tf->pc += 4;
                }
        }

        /*
         * Update the FPU status register.
         * We need to make sure that this will not cause an exception
         * in kernel mode.
         */

        /* propagate raised exceptions to the sticky bits */
        fsr &= ~FPCSR_C_E;
        excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
        fsr |= excbits << FPCSR_F_SHIFT;
        /* clear all exception sources */
        fsr &= ~FPCSR_C_MASK;
        if (update_pcb)
                tf->fsr = fsr;

        if (CPU_HAS_FPU(ci)) {
                setfsr(fsr);
                /* disable fpu before returning to trap() */
                setsr(sr);
        }

        if (sig != 0) {
                if (sig != SIGBUS && sig != SIGSEGV)
                        sv.sival_ptr = (void *)pc;
                trapsignal(p, sig, 0, fault_type, sv);
        }
}

/*
 * Emulate an FPU instruction.  The FPU register set has been saved in the
 * current PCB, and is pointed to by the trap frame.
 */
int
fpu_emulate(struct proc *p, struct trapframe *tf, uint32_t insn,
    union sigval *sv)
{
        InstFmt inst;

        tf->zero = 0;   /* not written by trap code */

        inst = *(InstFmt *)&insn;

        if (CPU_HAS_FPU(p->p_cpu)) {
                switch (inst.FRType.op) {
                default:
                        break;
                case OP_COP1:
                        return fpu_emulate_cop1(p, tf, insn);
                case OP_COP1X:
                        return fpu_emulate_cop1x(p, tf, insn);
                }

                return SIGILL;
        }

#ifdef FPUEMUL
        switch (inst.FRType.op) {
        default:
                break;
        case OP_SPECIAL:
                return nofpu_emulate_movci(tf, insn);
        case OP_LDC1:
        case OP_LWC1:
        case OP_SDC1:
        case OP_SWC1:
                return nofpu_emulate_loadstore(p, tf, insn, sv);
        case OP_COP1:
                switch (inst.RType.rs) {
                case OP_MF:
                case OP_DMF:
                case OP_CF:
                case OP_MT:
                case OP_DMT:
                case OP_CT:
                case OP_BC:
                        return nofpu_emulate_cop1(p, tf, insn, sv);
                default:
                        return fpu_emulate_cop1(p, tf, insn);
                }
                break;
        case OP_COP1X:
                switch (inst.FQType.op4) {
                default:
                        switch (inst.FRType.func) {
                        case OP_LDXC1:
                        case OP_LWXC1:
                        case OP_SDXC1:
                        case OP_SWXC1:
                        case OP_PREFX:
                                return nofpu_emulate_cop1x(p, tf, insn, sv);
                        default:
                                break;
                        }
                        break;
                case OP_MADD:
                case OP_MSUB:
                case OP_NMADD:
                case OP_NMSUB:
                        return fpu_emulate_cop1x(p, tf, insn);
                }
        }
#endif

        return SIGILL;
}

/*
 * Emulate a COP1 FPU instruction.
 */
int
fpu_emulate_cop1(struct proc *p, struct trapframe *tf, uint32_t insn)
{
        InstFmt inst;
        uint ft, fs, fd;
        fpu_fn3 *fpu_op;
        static fpu_fn3 *const fpu_ops1[1 << 6] = {
                fpu_add,                /* 0x00 */
                fpu_sub,
                fpu_mul,
                fpu_div,
                fpu_sqrt,
                fpu_abs,
                fpu_mov,
                fpu_neg,
                fpu_round_l,            /* 0x08 */
                fpu_trunc_l,
                fpu_ceil_l,
                fpu_floor_l,
                fpu_round_w,
                fpu_trunc_w,
                fpu_ceil_w,
                fpu_floor_w,
                NULL,                   /* 0x10 */
                fpu_movcf,
                fpu_movz,
                fpu_movn,
                NULL,
                fpu_recip,
                fpu_rsqrt,
                NULL,
                NULL,                   /* 0x18 */
                NULL,
                NULL,
                NULL,
                NULL,
                NULL,
                NULL,
                NULL,
                fpu_cvt_s,              /* 0x20 */
                fpu_cvt_d,
                NULL,
                NULL,
                fpu_cvt_w,
                fpu_cvt_l,
                NULL,
                NULL,
                NULL,                   /* 0x28 */
                NULL,
                NULL,
                NULL,
                NULL,
                NULL,
                NULL,
                NULL,
                (fpu_fn3 *)fpu_c,       /* 0x30 */
                (fpu_fn3 *)fpu_c,
                (fpu_fn3 *)fpu_c,
                (fpu_fn3 *)fpu_c,
                (fpu_fn3 *)fpu_c,
                (fpu_fn3 *)fpu_c,
                (fpu_fn3 *)fpu_c,
                (fpu_fn3 *)fpu_c,
                (fpu_fn3 *)fpu_c,       /* 0x38 */
                (fpu_fn3 *)fpu_c,
                (fpu_fn3 *)fpu_c,
                (fpu_fn3 *)fpu_c,
                (fpu_fn3 *)fpu_c,
                (fpu_fn3 *)fpu_c,
                (fpu_fn3 *)fpu_c,
                (fpu_fn3 *)fpu_c
        };

        inst = *(InstFmt *)&insn;

        /* 
         * Check for valid function code.
         */

        fpu_op = fpu_ops1[inst.FRType.func];
        if (fpu_op == NULL)
                return SIGILL;

        /*
         * Check for valid format.  FRType assumes bit 25 is always set,
         * so we need to check for it explicitly.
         */

        if ((insn & (1 << 25)) == 0)
                return SIGILL;
        switch (inst.FRType.fmt) {
        default:
                return SIGILL;
        case FMT_S:
        case FMT_D:
        case FMT_W:
        case FMT_L:
                break;
        }

        /*
         * Check for valid register values. Only even-numbered registers
         * can be used if the FR bit is clear in coprocessor 0 status
         * register.
         *
         * Note that c.cond does not specify a register number in the fd
         * field, but the fd field must have zero in its low two bits, so
         * the test will not reject valid c.cond instructions.
         */

        ft = inst.FRType.ft;
        fs = inst.FRType.fs;
        fd = inst.FRType.fd;
        if ((tf->sr & SR_FR_32) == 0) {
                if ((ft | fs | fd) & 1)
                        return SIGILL;
        }

        /*
         * Finally dispatch to the proper routine.
         */

        if (fpu_op == (fpu_fn3 *)&fpu_c)
                return
                    fpu_c(p, tf, inst.FRType.fmt, ft, fs, fd, inst.FRType.func);
        else
                return (*fpu_op)(p, tf, inst.FRType.fmt, ft, fs, fd);
}

/*
 * Emulate a COP1X FPU instruction.
 */
int
fpu_emulate_cop1x(struct proc *p, struct trapframe *tf, uint32_t insn)
{
        InstFmt inst;
        uint fr, ft, fs, fd;
        fpu_fn4 *fpu_op;
        static fpu_fn4 *const fpu_ops1x[1 << 3] = {
                NULL,
                NULL,
                NULL,
                NULL,
                fpu_madd,
                fpu_msub,
                fpu_nmadd,
                fpu_nmsub
        };

        inst = *(InstFmt *)&insn;

        /* 
         * Check for valid function code.
         */

        fpu_op = fpu_ops1x[inst.FQType.op4];
        if (fpu_op == NULL)
                return SIGILL;

        /*
         * Check for valid format.
         */

        switch (inst.FQType.fmt3) {
        default:
                return SIGILL;
        case FMT_S:
        case FMT_D:
        case FMT_W:
        case FMT_L:
                break;
        }

        /*
         * Check for valid register values. Only even-numbered registers
         * can be used if the FR bit is clear in coprocessor 0 status
         * register.
         */

        fr = inst.FQType.fr;
        ft = inst.FQType.ft;
        fs = inst.FQType.fs;
        fd = inst.FQType.fd;
        if ((tf->sr & SR_FR_32) == 0) {
                if ((fr | ft | fs | fd) & 1)
                        return SIGILL;
        }

        /*
         * Finally dispatch to the proper routine.
         */

        return (*fpu_op)(p, tf, inst.FRType.fmt, fr, ft, fs, fd);
}

/*
 * Load a floating-point argument according to the specified format.
 */
uint64_t
fpu_load(struct proc *p, struct trapframe *tf, uint fmt, uint regno)
{
        uint64_t tmp, tmp2;

        tmp = ((uint64_t *)p->p_md.md_regs)[FPBASE + regno];
        if (tf->sr & SR_FR_32) {
                switch (fmt) {
                case FMT_D:
                case FMT_L:
                        break;
                case FMT_S:
                case FMT_W:
                        tmp &= 0xffffffff;
                        break;
                }
        } else {
                tmp &= 0xffffffff;
                switch (fmt) {
                case FMT_D:
                case FMT_L:
                        /* caller has enforced regno is even */
                        tmp2 =
                            ((uint64_t *)p->p_md.md_regs)[FPBASE + regno + 1];
                        tmp |= tmp2 << 32;
                        break;
                case FMT_S:
                case FMT_W:
                        break;
                }
        }

        return tmp;
}

/*
 * Store a floating-point result according to the specified format.
 */
void
fpu_store(struct proc *p, struct trapframe *tf, uint fmt, uint regno,
    uint64_t rslt)
{
        if (tf->sr & SR_FR_32) {
                ((uint64_t *)p->p_md.md_regs)[FPBASE + regno] = rslt;
        } else {
                /* caller has enforced regno is even */
                ((uint64_t *)p->p_md.md_regs)[FPBASE + regno] =
                    rslt & 0xffffffff;
                ((uint64_t *)p->p_md.md_regs)[FPBASE + regno + 1] =
                    (rslt >> 32) & 0xffffffff;
        }
}

/*
 * Integer conversion
 */

int
fpu_int_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd, uint rm)
{
        uint64_t raw;
        uint32_t oldrm;

        if (ft != 0)
                return SIGILL;
        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw = fpu_load(p, tf, fmt, fs);

        /* round towards required mode */
        oldrm = tf->fsr & FPCSR_RM_MASK;
        tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
        if (fmt == FMT_S)
                raw = float32_to_int64((float32)raw);
        else
                raw = float64_to_int64((float64)raw);
        /* restore rounding mode */
        tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;

        if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
                fpu_store(p, tf, fmt, fd, raw);

        return 0;
}

int
fpu_int_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd, uint rm)
{
        uint64_t raw;
        uint32_t oldrm;

        if (ft != 0)
                return SIGILL;
        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw = fpu_load(p, tf, fmt, fs);

        /* round towards required mode */
        oldrm = tf->fsr & FPCSR_RM_MASK;
        tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
        if (fmt == FMT_S)
                raw = float32_to_int32((float32)raw);
        else
                raw = float64_to_int32((float64)raw);
        /* restore rounding mode */
        tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;

        if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
                fpu_store(p, tf, fmt, fd, raw);

        return 0;
}

/*
 * FPU Instruction emulation
 */

int
fpu_abs(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw;

        if (ft != 0)
                return SIGILL;
        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw = fpu_load(p, tf, fmt, fs);
        /* clear sign bit unless NaN */
        if (fmt == FMT_S) {
                float32 f32 = (float32)raw;
                if (float32_is_nan(f32)) {
                        float_set_invalid();
                } else {
                        f32 &= ~(1L << 31);
                        raw = (uint64_t)f32;
                }
        } else {
                float64 f64 = (float64)raw;
                if (float64_is_nan(f64)) {
                        float_set_invalid();
                } else {
                        f64 &= ~(1L << 63);
                        raw = (uint64_t)f64;
                }
        }
        fpu_store(p, tf, fmt, fd, raw);

        return 0;
}

int
fpu_add(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw1, raw2, rslt;

        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw1 = fpu_load(p, tf, fmt, fs);
        raw2 = fpu_load(p, tf, fmt, ft);
        if (fmt == FMT_S) {
                float32 f32 = float32_add((float32)raw1, (float32)raw2);
                rslt = (uint64_t)f32;
        } else {
                float64 f64 = float64_add((float64)raw1, (float64)raw2);
                rslt = (uint64_t)f64;
        }
        fpu_store(p, tf, fmt, fd, rslt);

        return 0;
}

int
fpu_c(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd, uint op)
{
        uint64_t raw1, raw2;
        uint cc, lt, eq, uo;

        if ((fd & 0x03) != 0)
                return SIGILL;
        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        lt = eq = uo = 0;
        cc = fd >> 2;

        raw1 = fpu_load(p, tf, fmt, fs);
        raw2 = fpu_load(p, tf, fmt, ft);

        if (fmt == FMT_S) {
                float32 f32a = (float32)raw1;
                float32 f32b = (float32)raw2;
                if (float32_is_nan(f32a)) {
                        uo = 1 << 0;
                        if (float32_is_signaling_nan(f32a))
                                op |= 0x08;     /* force invalid exception */
                }
                if (float32_is_nan(f32b)) {
                        uo = 1 << 0;
                        if (float32_is_signaling_nan(f32b))
                                op |= 0x08;     /* force invalid exception */
                }
                if (uo == 0) {
                        if (float32_eq(f32a, f32b))
                                eq = 1 << 1;
                        else if (float32_lt(f32a, f32b))
                                lt = 1 << 2;
                }
        } else {
                float64 f64a = (float64)raw1;
                float64 f64b = (float64)raw2;
                if (float64_is_nan(f64a)) {
                        uo = 1 << 0;
                        if (float64_is_signaling_nan(f64a))
                                op |= 0x08;     /* force invalid exception */
                }
                if (float64_is_nan(f64b)) {
                        uo = 1 << 0;
                        if (float64_is_signaling_nan(f64b))
                                op |= 0x08;     /* force invalid exception */
                }
                if (uo == 0) {
                        if (float64_eq(f64a, f64b))
                                eq = 1 << 1;
                        else if (float64_lt(f64a, f64b))
                                lt = 1 << 2;
                }
        }

        if (uo && (op & 0x08)) {
                float_set_invalid();
                if (tf->fsr & FPCSR_E_V) {
                        /* comparison result intentionally not written */
                        goto skip;
                }
        }

        if ((uo | eq | lt) & op)
                tf->fsr |= FPCSR_CONDVAL(cc);
        else
                tf->fsr &= ~FPCSR_CONDVAL(cc);
skip:

        return 0;
}

int
fpu_ceil_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        /* round towards positive infinity */
        return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RP);
}

int
fpu_ceil_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        /* round towards positive infinity */
        return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RP);
}

int
fpu_cvt_d(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw;

        if (ft != 0)
                return SIGILL;
        if (fmt == FMT_D)
                return SIGILL;

        raw = fpu_load(p, tf, fmt, fs);
        switch (fmt) {
        case FMT_L:
                raw = int64_to_float64((int64_t)raw);
                break;
        case FMT_S:
                raw = float32_to_float64((float32)raw);
                break;
        case FMT_W:
                raw = int32_to_float64((int32_t)raw);
                break;
        }
        fpu_store(p, tf, fmt, fd, raw);

        return 0;
}

int
fpu_cvt_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw;
        uint32_t rm;

        if (ft != 0)
                return SIGILL;
        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        rm = tf->fsr & FPCSR_RM_MASK;
        raw = fpu_load(p, tf, fmt, fs);
        if (fmt == FMT_D) {
                if (rm == FP_RZ)
                        raw = float64_to_int64_round_to_zero((float64)raw);
                else
                        raw = float64_to_int64((float64)raw);
        } else {
                if (rm == FP_RZ)
                        raw = float32_to_int64_round_to_zero((float32)raw);
                else
                        raw = float32_to_int64((float32)raw);
        }
        if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
                fpu_store(p, tf, fmt, fd, raw);

        return 0;
}

int
fpu_cvt_s(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw;

        if (ft != 0)
                return SIGILL;
        if (fmt == FMT_S)
                return SIGILL;

        raw = fpu_load(p, tf, fmt, fs);
        switch (fmt) {
        case FMT_D:
                raw = float64_to_float32((float64)raw);
                break;
        case FMT_L:
                raw = int64_to_float32((int64_t)raw);
                break;
        case FMT_W:
                raw = int32_to_float32((int32_t)raw);
                break;
        }
        fpu_store(p, tf, fmt, fd, raw);

        return 0;
}

int
fpu_cvt_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw;
        uint32_t rm;

        if (ft != 0)
                return SIGILL;
        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        rm = tf->fsr & FPCSR_RM_MASK;
        raw = fpu_load(p, tf, fmt, fs);
        if (fmt == FMT_D) {
                if (rm == FP_RZ)
                        raw = float64_to_int32_round_to_zero((float64)raw);
                else
                        raw = float64_to_int32((float64)raw);
        } else {
                if (rm == FP_RZ)
                        raw = float32_to_int32_round_to_zero((float32)raw);
                else
                        raw = float32_to_int32((float32)raw);
        }
        if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
                fpu_store(p, tf, fmt, fd, raw);

        return 0;
}

int
fpu_div(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw1, raw2, rslt;

        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw1 = fpu_load(p, tf, fmt, fs);
        raw2 = fpu_load(p, tf, fmt, ft);
        if (fmt == FMT_S) {
                float32 f32 = float32_div((float32)raw1, (float32)raw2);
                rslt = (uint64_t)f32;
        } else {
                float64 f64 = float64_div((float64)raw1, (float64)raw2);
                rslt = (uint64_t)f64;
        }
        fpu_store(p, tf, fmt, fd, rslt);

        return 0;
}

int
fpu_floor_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        /* round towards negative infinity */
        return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RM);
}

int
fpu_floor_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        /* round towards negative infinity */
        return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RM);
}

int
fpu_madd(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
    uint fs, uint fd)
{
        uint64_t raw1, raw2, raw3, rslt;

        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw1 = fpu_load(p, tf, fmt, fs);
        raw2 = fpu_load(p, tf, fmt, ft);
        raw3 = fpu_load(p, tf, fmt, fr);
        if (fmt == FMT_S) {
                float32 f32 = float32_add(
                    float32_mul((float32)raw1, (float32)raw2),
                    (float32)raw3);
                rslt = (uint64_t)f32;
        } else {
                float64 f64 = float64_add(
                    float64_mul((float64)raw1, (float64)raw2),
                    (float64)raw3);
                rslt = (uint64_t)f64;
        }
        fpu_store(p, tf, fmt, fd, rslt);

        return 0;
}

int
fpu_mov(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw;

        if (ft != 0)
                return SIGILL;
        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw = fpu_load(p, tf, fmt, fs);
        fpu_store(p, tf, fmt, fd, raw);

        return 0;
}

int
fpu_movcf(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw;
        uint cc, istf;
        int condition;

        if ((ft & 0x02) != 0)
                return SIGILL;
        cc = ft >> 2;
        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        condition = tf->fsr & FPCSR_CONDVAL(cc);
        istf = ft & COPz_BC_TF_MASK;
        if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {
                raw = fpu_load(p, tf, fmt, fs);
                fpu_store(p, tf, fmt, fd, raw);
        }

        return 0;
}

int
fpu_movn(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        register_t *regs = (register_t *)tf;
        uint64_t raw;

        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        if (ft != ZERO && regs[ft] != 0) {
                raw = fpu_load(p, tf, fmt, fs);
                fpu_store(p, tf, fmt, fd, raw);
        }

        return 0;
}

int
fpu_movz(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        register_t *regs = (register_t *)tf;
        uint64_t raw;

        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        if (ft == ZERO || regs[ft] == 0) {
                raw = fpu_load(p, tf, fmt, fs);
                fpu_store(p, tf, fmt, fd, raw);
        }

        return 0;
}

int
fpu_msub(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
    uint fs, uint fd)
{
        uint64_t raw1, raw2, raw3, rslt;

        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw1 = fpu_load(p, tf, fmt, fs);
        raw2 = fpu_load(p, tf, fmt, ft);
        raw3 = fpu_load(p, tf, fmt, fr);
        if (fmt == FMT_S) {
                float32 f32 = float32_sub(
                    float32_mul((float32)raw1, (float32)raw2),
                    (float32)raw3);
                rslt = (uint64_t)f32;
        } else {
                float64 f64 = float64_sub(
                    float64_mul((float64)raw1, (float64)raw2),
                    (float64)raw3);
                rslt = (uint64_t)f64;
        }
        fpu_store(p, tf, fmt, fd, rslt);

        return 0;
}

int
fpu_mul(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw1, raw2, rslt;

        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw1 = fpu_load(p, tf, fmt, fs);
        raw2 = fpu_load(p, tf, fmt, ft);
        if (fmt == FMT_S) {
                float32 f32 = float32_mul((float32)raw1, (float32)raw2);
                rslt = (uint64_t)f32;
        } else {
                float64 f64 = float64_mul((float64)raw1, (float64)raw2);
                rslt = (uint64_t)f64;
        }
        fpu_store(p, tf, fmt, fd, rslt);

        return 0;
}

int
fpu_neg(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw;

        if (ft != 0)
                return SIGILL;
        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw = fpu_load(p, tf, fmt, fs);
        /* flip sign bit unless NaN */
        if (fmt == FMT_S) {
                float32 f32 = (float32)raw;
                if (float32_is_nan(f32)) {
                        float_set_invalid();
                } else {
                        f32 ^= 1L << 31;
                        raw = (uint64_t)f32;
                }
        } else {
                float64 f64 = (float64)raw;
                if (float64_is_nan(f64)) {
                        float_set_invalid();
                } else {
                        f64 ^= 1L << 63;
                        raw = (uint64_t)f64;
                }
        }
        fpu_store(p, tf, fmt, fd, raw);

        return 0;
}

int
fpu_nmadd(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
    uint fs, uint fd)
{
        uint64_t raw1, raw2, raw3, rslt;

        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw1 = fpu_load(p, tf, fmt, fs);
        raw2 = fpu_load(p, tf, fmt, ft);
        raw3 = fpu_load(p, tf, fmt, fr);
        if (fmt == FMT_S) {
                float32 f32 = float32_add(
                    float32_mul((float32)raw1, (float32)raw2),
                    (float32)raw3);
                if (float32_is_nan(f32))
                        float_set_invalid();
                else
                        f32 ^= 1L << 31;
                rslt = (uint64_t)f32;
        } else {
                float64 f64 = float64_add(
                    float64_mul((float64)raw1, (float64)raw2),
                    (float64)raw3);
                if (float64_is_nan(f64))
                        float_set_invalid();
                else
                        f64 ^= 1L << 63;
                rslt = (uint64_t)f64;
        }
        fpu_store(p, tf, fmt, fd, rslt);

        return 0;
}

int
fpu_nmsub(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
    uint fs, uint fd)
{
        uint64_t raw1, raw2, raw3, rslt;

        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw1 = fpu_load(p, tf, fmt, fs);
        raw2 = fpu_load(p, tf, fmt, ft);
        raw3 = fpu_load(p, tf, fmt, fr);
        if (fmt == FMT_S) {
                float32 f32 = float32_sub(
                    float32_mul((float32)raw1, (float32)raw2),
                    (float32)raw3);
                if (float32_is_nan(f32))
                        float_set_invalid();
                else
                        f32 ^= 1L << 31;
                rslt = (uint64_t)f32;
        } else {
                float64 f64 = float64_sub(
                    float64_mul((float64)raw1, (float64)raw2),
                    (float64)raw3);
                if (float64_is_nan(f64))
                        float_set_invalid();
                else
                        f64 ^= 1L << 63;
                rslt = (uint64_t)f64;
        }
        fpu_store(p, tf, fmt, fd, rslt);

        return 0;
}

int
fpu_recip(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw;

        if (ft != 0)
                return SIGILL;
        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw = fpu_load(p, tf, fmt, fs);
        if (fmt == FMT_S) {
                float32 f32 = float32_div(ONE_F32, (float32)raw);
                raw = (uint64_t)f32;
        } else {
                float64 f64 = float64_div(ONE_F64, (float64)raw);
                raw = (uint64_t)f64;
        }
        fpu_store(p, tf, fmt, fd, raw);

        return 0;
}

int
fpu_round_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        /* round towards nearest */
        return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RN);
}

int
fpu_round_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        /* round towards nearest */
        return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RN);
}

int
fpu_rsqrt(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw;

        if (ft != 0)
                return SIGILL;
        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw = fpu_load(p, tf, fmt, fs);
        if (fmt == FMT_S) {
                float32 f32 = float32_sqrt((float32)raw);
                if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
                    (FPCSR_C_V | FPCSR_E_V))
                        f32 = float32_div(ONE_F32, f32);
                raw = (uint64_t)f32;
        } else {
                float64 f64 = float64_sqrt((float64)raw);
                if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
                    (FPCSR_C_V | FPCSR_E_V))
                        f64 = float64_div(ONE_F64, f64);
                raw = (uint64_t)f64;
        }
        fpu_store(p, tf, fmt, fd, raw);

        return 0;
}

int
fpu_sqrt(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw;

        if (ft != 0)
                return SIGILL;
        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw = fpu_load(p, tf, fmt, fs);
        if (fmt == FMT_S) {
                float32 f32 = float32_sqrt((float32)raw);
                raw = (uint64_t)f32;
        } else {
                float64 f64 = float64_sqrt((float64)raw);
                raw = (uint64_t)f64;
        }
        fpu_store(p, tf, fmt, fd, raw);

        return 0;
}

int
fpu_sub(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        uint64_t raw1, raw2, rslt;

        if (fmt != FMT_S && fmt != FMT_D)
                return SIGILL;

        raw1 = fpu_load(p, tf, fmt, fs);
        raw2 = fpu_load(p, tf, fmt, ft);
        if (fmt == FMT_S) {
                float32 f32 = float32_sub((float32)raw1, (float32)raw2);
                rslt = (uint64_t)f32;
        } else {
                float64 f64 = float64_sub((float64)raw1, (float64)raw2);
                rslt = (uint64_t)f64;
        }
        fpu_store(p, tf, fmt, fd, rslt);

        return 0;
}

int
fpu_trunc_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        /* round towards zero */
        return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RZ);
}

int
fpu_trunc_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
    uint fd)
{
        /* round towards zero */
        return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RZ);
}

#ifdef FPUEMUL

/*
 * Emulate a COP1 non-FPU instruction.
 */
int
nofpu_emulate_cop1(struct proc *p, struct trapframe *tf, uint32_t insn,
    union sigval *sv)
{
        register_t *regs = (register_t *)tf;
        InstFmt inst;
        int32_t cval;

        inst = *(InstFmt *)&insn;

        switch (inst.RType.rs) {
        case OP_MF:
                if (inst.FRType.fd != 0 || inst.FRType.func != 0)
                        return SIGILL;
                if (inst.FRType.ft != ZERO)
                        regs[inst.FRType.ft] = (int32_t)
                            ((uint64_t *)p->p_md.md_regs)
                              [FPBASE + inst.FRType.fs];
                break;
        case OP_DMF:
                if (inst.FRType.fd != 0 || inst.FRType.func != 0)
                        return SIGILL;
                if ((tf->sr & SR_FR_32) != 0 || (inst.FRType.fs & 1) == 0) {
                        if (inst.FRType.ft != ZERO)
                                regs[inst.FRType.ft] =
                                    fpu_load(p, tf, FMT_L, inst.FRType.fs);
                }
                break;
        case OP_CF:
                if (inst.FRType.fd != 0 || inst.FRType.func != 0)
                        return SIGILL;
                if (inst.FRType.ft != ZERO) {
                        switch (inst.FRType.fs) {
                        case 0: /* FPC_ID */
                                cval = MIPS_SOFT << 8;
                                break;
                        case 31: /* FPC_CSR */
                                cval = (int32_t)tf->fsr;
                                break;
                        default:
                                cval = 0;
                                break;
                        }
                        regs[inst.FRType.ft] = (int64_t)cval;
                }
                break;
        case OP_MT:
                if (inst.FRType.fd != 0 || inst.FRType.func != 0)
                        return SIGILL;
                ((uint64_t *)p->p_md.md_regs)[FPBASE + inst.FRType.fs] =
                    (int32_t)regs[inst.FRType.ft];
                break;
        case OP_DMT:
                if (inst.FRType.fd != 0 || inst.FRType.func != 0)
                        return SIGILL;
                if ((tf->sr & SR_FR_32) != 0 || (inst.FRType.fs & 1) == 0) {
                        fpu_store(p, tf, FMT_L, inst.FRType.fs,
                            regs[inst.FRType.ft]);
                }
                break;
        case OP_CT:
                if (inst.FRType.fd != 0 || inst.FRType.func != 0)
                        return SIGILL;
                cval = (int32_t)regs[inst.FRType.ft];
                switch (inst.FRType.fs) {
                case 31: /* FPC_CSR */
                        cval &= ~FPCSR_C_E;
                        tf->fsr = cval;
                        break;
                case 0: /* FPC_ID */
                default:
                        break;
                }
                break;
        case OP_BC:
           {
                uint cc, nd, istf;
                int condition;
                vaddr_t dest;
                uint32_t dinsn;

                cc = (inst.RType.rt & COPz_BC_CC_MASK) >> COPz_BC_CC_SHIFT;
                nd = inst.RType.rt & COPz_BCL_TF_MASK;
                istf = inst.RType.rt & COPz_BC_TF_MASK;
                condition = tf->fsr & FPCSR_CONDVAL(cc);
                if ((!condition && !istf) /*bc1f*/ ||
                    (condition && istf) /*bc1t*/) {
                        /*
                         * Branch taken: if the delay slot is not a nop,
                         * copy the delay slot instruction to the dedicated
                         * relocation page, in order to be able to have the
                         * cpu process it and give control back to the
                         * kernel, for us to redirect to the branch
                         * destination.
                         */
                        /* inline MipsEmulateBranch(tf, tf->pc, tf->fsr, insn)*/
                        dest = tf->pc + 4 + ((short)inst.IType.imm << 2);
                        if (copyinsn(p, tf->pc + 4, &dinsn) != 0) {
                                sv->sival_ptr = (void *)(tf->pc + 4);
                                return SIGSEGV;
                        }
                        if (dinsn == 0x00000000 /* nop */ ||
                            dinsn == 0x00000040 /* ssnop */) {
                                tf->pc = dest;
                        } else {
                                if (fpe_branch_emulate(curproc, tf, dinsn,
                                    dest) != 0)
                                        return SIGILL;
                        }
                } else {
                        /*
                         * Branch not taken: skip the instruction, and
                         * skip the delay slot if it was a `branch likely'
                         * instruction.
                         */
                        tf->pc += 4;
                        if (nd)
                                tf->pc += 4;
                }
            }
                break;
        }

        return 0;
}

/*
 * Emulate a COP1X non-FPU instruction.
 */
int
nofpu_emulate_cop1x(struct proc *p, struct trapframe *tf, uint32_t insn,
    union sigval *sv)
{
        register_t *regs = (register_t *)tf;
        InstFmt inst;
        vaddr_t va;
        uint64_t ddata;
        uint32_t wdata;

        inst = *(InstFmt *)&insn;
        switch (inst.FRType.func) {
        case OP_LDXC1:
                if (inst.FQType.fs != 0)
                        return SIGILL;
                va = (vaddr_t)regs[inst.FQType.fr] +
                    (vaddr_t)regs[inst.FQType.ft];
                if ((va & 0x07) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGBUS;
                }
                if (copyin((const void *)va, &ddata, sizeof ddata) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGSEGV;
                }
                if ((tf->sr & SR_FR_32) != 0 || (inst.FQType.fd & 1) == 0)
                        fpu_store(p, tf, FMT_L, inst.FQType.fd, ddata);
                break;
        case OP_LWXC1:
                if (inst.FQType.fs != 0)
                        return SIGILL;
                va = (vaddr_t)regs[inst.FQType.fr] +
                    (vaddr_t)regs[inst.FQType.ft];
                if ((va & 0x03) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGBUS;
                }
                if (copyin((const void *)va, &wdata, sizeof wdata) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGSEGV;
                }
                ((uint64_t *)p->p_md.md_regs)[FPBASE + inst.FQType.fd] = wdata;
                break;
        case OP_SDXC1:
                if (inst.FQType.fd != 0)
                        return SIGILL;
                va = (vaddr_t)regs[inst.FQType.fr] +
                    (vaddr_t)regs[inst.FQType.ft];
                if ((va & 0x07) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGBUS;
                }
                if ((tf->sr & SR_FR_32) != 0 || (inst.FQType.fs & 1) == 0)
                        ddata = fpu_load(p, tf, FMT_L, inst.FQType.fs);
                else {
                        /* undefined behaviour, don't expose stack content */
                        ddata = 0;
                }
                if (copyout(&ddata, (void *)va, sizeof ddata) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGSEGV;
                }
                break;
        case OP_SWXC1:
                if (inst.FQType.fd != 0)
                        return SIGILL;
                va = (vaddr_t)regs[inst.FQType.fr] +
                    (vaddr_t)regs[inst.FQType.ft];
                if ((va & 0x03) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGBUS;
                }
                wdata = ((uint64_t *)p->p_md.md_regs)[FPBASE + inst.FQType.fs];
                if (copyout(&wdata, (void *)va, sizeof wdata) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGSEGV;
                }
                break;
        case OP_PREFX:
                /* nothing to do */
                break;
        }

        return 0;
}

/*
 * Emulate a load/store instruction on FPU registers.
 */
int
nofpu_emulate_loadstore(struct proc *p, struct trapframe *tf, uint32_t insn,
    union sigval *sv)
{
        register_t *regs = (register_t *)tf;
        InstFmt inst;
        vaddr_t va;
        uint64_t ddata;
        uint32_t wdata;

        inst = *(InstFmt *)&insn;
        switch (inst.IType.op) {
        case OP_LDC1:
                va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
                if ((va & 0x07) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGBUS;
                }
                if (copyin((const void *)va, &ddata, sizeof ddata) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGSEGV;
                }
                if ((tf->sr & SR_FR_32) != 0 || (inst.IType.rt & 1) == 0)
                        fpu_store(p, tf, FMT_L, inst.IType.rt, ddata);
                break;
        case OP_LWC1:
                va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
                if ((va & 0x03) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGBUS;
                }
                if (copyin((const void *)va, &wdata, sizeof wdata) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGSEGV;
                }
                ((uint64_t *)p->p_md.md_regs)[FPBASE + inst.IType.rt] = wdata;
                break;
        case OP_SDC1:
                va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
                if ((va & 0x07) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGBUS;
                }
                if ((tf->sr & SR_FR_32) != 0 || (inst.IType.rt & 1) == 0)
                        ddata = fpu_load(p, tf, FMT_L, inst.IType.rt);
                else {
                        /* undefined behaviour, don't expose stack content */
                        ddata = 0;
                }
                if (copyout(&ddata, (void *)va, sizeof ddata) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGSEGV;
                }
                break;
        case OP_SWC1:
                va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
                if ((va & 0x03) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGBUS;
                }
                wdata = ((uint64_t *)p->p_md.md_regs)[FPBASE + inst.IType.rt];
                if (copyout(&wdata, (void *)va, sizeof wdata) != 0) {
                        sv->sival_ptr = (void *)va;
                        return SIGSEGV;
                }
                break;
        }

        return 0;
}

/*
 * Emulate MOVF and MOVT.
 */
int
nofpu_emulate_movci(struct trapframe *tf, uint32_t insn)
{
        register_t *regs = (register_t *)tf;
        InstFmt inst;
        uint cc, istf;
        int condition;

        inst = *(InstFmt *)&insn;
        if ((inst.RType.rt & 0x02) != 0 || inst.RType.shamt != 0)
                return SIGILL;

        cc = inst.RType.rt >> 2;
        istf = inst.RType.rt & COPz_BC_TF_MASK;
        condition = tf->fsr & FPCSR_CONDVAL(cc);
        if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {
                if (inst.RType.rd != ZERO)
                        regs[inst.RType.rd] = regs[inst.RType.rs];
        }

        return 0;
}

#endif  /* FPUEMUL */