root/usr/src/uts/sparc/v9/fpu/fpu.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/signal.h>
#include <sys/trap.h>
#include <sys/machtrap.h>
#include <sys/fault.h>
#include <sys/systm.h>
#include <sys/user.h>
#include <sys/file.h>
#include <sys/proc.h>
#include <sys/core.h>
#include <sys/pcb.h>
#include <sys/cpuvar.h>
#include <sys/thread.h>
#include <sys/disp.h>
#include <sys/stack.h>
#include <sys/cmn_err.h>
#include <sys/privregs.h>
#include <sys/debug.h>

#include <sys/fpu/fpu_simulator.h>
#include <sys/fpu/globals.h>
#include <sys/fpu/fpusystm.h>

int fpdispr = 0;

/*
 * For use by procfs to save the floating point context of the thread.
 * Note the if (ttolwp(lwp) == curthread) in prstop, which calls
 * this function, ensures that it is safe to read the fprs here.
 */
void
fp_prsave(kfpu_t *fp)
{
        if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))  {
                kpreempt_disable();
                if (fpu_exists) {
                        fp->fpu_fprs = _fp_read_fprs();
                        if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
                                uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);

                                _fp_write_fprs(fprs);
                                fp->fpu_fprs = fprs;
#ifdef DEBUG
                                if (fpdispr)
                                        cmn_err(CE_NOTE,
                                            "fp_prsave with fp disabled!");
#endif
                        }
                        fp_fksave(fp);
                }
                kpreempt_enable();
        }
}

/*
 * Copy the floating point context of the forked thread.
 */
void
fp_fork(klwp_t *lwp, klwp_t *clwp)
{
        kfpu_t *cfp, *pfp;
        int i;

        cfp = lwptofpu(clwp);
        pfp = lwptofpu(lwp);

        /*
         * copy the parents fpq
         */
        cfp->fpu_qcnt = pfp->fpu_qcnt;
        for (i = 0; i < pfp->fpu_qcnt; i++)
                cfp->fpu_q[i] = pfp->fpu_q[i];

        /*
         * save the context of the parent into the childs fpu structure
         */
        cfp->fpu_fprs = pfp->fpu_fprs;
        if (ttolwp(curthread) == lwp && fpu_exists) {
                fp_fksave(cfp);
        } else {
                for (i = 0; i < 32; i++)
                        cfp->fpu_fr.fpu_regs[i] = pfp->fpu_fr.fpu_regs[i];
                for (i = 16; i < 32; i++)
                        cfp->fpu_fr.fpu_dregs[i] = pfp->fpu_fr.fpu_dregs[i];
        }
        cfp->fpu_en = 1;
}

/*
 * Free any state associated with floating point context.
 * Fp_free can be called in two cases:
 * 1) from reaper -> thread_free -> lwp_freeregs -> fp_free
 *      fp context belongs to a thread on deathrow
 *      nothing to do,  thread will never be resumed
 *      thread calling ctxfree is reaper
 *
 * 2) from exec -> lwp_freeregs -> fp_free
 *      fp context belongs to the current thread
 *      must disable fpu, thread calling ctxfree is curthread
 */
/*ARGSUSED1*/
void
fp_free(kfpu_t *fp, int isexec)
{
        int s;
        uint32_t fprs = 0;

        if (curthread->t_lwp != NULL && lwptofpu(curthread->t_lwp) == fp) {
                fp->fpu_en = 0;
                fp->fpu_fprs = fprs;
                s = splhigh();
                _fp_write_fprs(fprs);
                splx(s);
        }
}


#ifdef SF_ERRATA_30 /* call causes fp-disabled */
extern int spitfire_call_bug;
int ill_fpcalls;
#endif

void
fp_enable(void)
{
        klwp_id_t lwp;
        kfpu_t *fp;

        lwp = ttolwp(curthread);
        ASSERT(lwp != NULL);
        fp = lwptofpu(lwp);

        if (fpu_exists) {
                if (fp->fpu_en) {
#ifdef DEBUG
                        if (fpdispr)
                                cmn_err(CE_NOTE,
                                    "fpu disabled, but already enabled\n");
#endif
                        if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
                                fp->fpu_fprs = FPRS_FEF;
#ifdef DEBUG
                                if (fpdispr)
                                        cmn_err(CE_NOTE,
                                        "fpu disabled, saved fprs disabled\n");
#endif
                        }
                        _fp_write_fprs(FPRS_FEF);
                        fp_restore(fp);
                } else {
                        fp->fpu_en = 1;
                        fp->fpu_fsr = 0;
                        fp->fpu_fprs = FPRS_FEF;
                        _fp_write_fprs(FPRS_FEF);
                        fp_clearregs(fp);
                }
        } else {
                int i;

                if (!fp->fpu_en) {
                        fp->fpu_en = 1;
                        fp->fpu_fsr = 0;
                        for (i = 0; i < 32; i++)
                                fp->fpu_fr.fpu_regs[i] = (uint_t)-1; /* NaN */
                        for (i = 16; i < 32; i++)               /* NaN */
                                fp->fpu_fr.fpu_dregs[i] = (uint64_t)-1;
                }
        }
}

/*
 * fp_disabled normally occurs when the first floating point in a non-threaded
 * program causes an fp_disabled trap. For threaded programs, the ILP32 threads
 * library calls the .setpsr fasttrap, which has been modified to also set the
 * appropriate bits in fpu_en and fpu_fprs, as well as to enable the %fprs,
 * as before. The LP64 threads library will write to the %fprs directly,
 * so fpu_en will never get updated for LP64 threaded programs,
 * although fpu_fprs will, via resume.
 */
void
fp_disabled(struct regs *rp)
{
        klwp_id_t lwp;
        kfpu_t *fp;
        int ftt;

#ifdef SF_ERRATA_30 /* call causes fp-disabled */
        /*
         * This code is here because sometimes the call instruction
         * generates an fp_disabled trap when the call offset is large.
         */
        if (spitfire_call_bug) {
                uint_t instr = 0;
                extern void trap(struct regs *rp, caddr_t addr, uint32_t type,
                    uint32_t mmu_fsr);

                if (USERMODE(rp->r_tstate)) {
                        (void) fuword32((void *)rp->r_pc, &instr);
                } else {
                        instr = *(uint_t *)(rp->r_pc);
                }
                if ((instr & 0xc0000000) == 0x40000000) {
                        ill_fpcalls++;
                        trap(rp, NULL, T_UNIMP_INSTR, 0);
                        return;
                }
        }
#endif /* SF_ERRATA_30 - call causes fp-disabled */

#ifdef CHEETAH_ERRATUM_109 /* interrupts not taken during fpops */
        /*
         * UltraSPARC III will report spurious fp-disabled exceptions when
         * the pipe is full of fpops and an interrupt is triggered.  By the
         * time we get here the interrupt has been taken and we just need
         * to return to where we came from and try again.
         */
        if (fpu_exists && _fp_read_fprs() & FPRS_FEF)
                return;
#endif /* CHEETAH_ERRATUM_109 */

        lwp = ttolwp(curthread);
        ASSERT(lwp != NULL);
        fp = lwptofpu(lwp);
        if (fpu_exists) {
                kpreempt_disable();
                if (fp->fpu_en) {
#ifdef DEBUG
                        if (fpdispr)
                                cmn_err(CE_NOTE,
                                    "fpu disabled, but already enabled\n");
#endif
                        if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
                                fp->fpu_fprs = FPRS_FEF;
#ifdef DEBUG
                                if (fpdispr)
                                        cmn_err(CE_NOTE,
                                        "fpu disabled, saved fprs disabled\n");
#endif
                        }
                        _fp_write_fprs(FPRS_FEF);
                        fp_restore(fp);
                } else {
                        fp->fpu_en = 1;
                        fp->fpu_fsr = 0;
                        fp->fpu_fprs = FPRS_FEF;
                        _fp_write_fprs(FPRS_FEF);
                        fp_clearregs(fp);
                }
                kpreempt_enable();
        } else {
                fp_simd_type fpsd;
                int i;

                (void) flush_user_windows_to_stack(NULL);
                if (!fp->fpu_en) {
                        fp->fpu_en = 1;
                        fp->fpu_fsr = 0;
                        for (i = 0; i < 32; i++)
                                fp->fpu_fr.fpu_regs[i] = (uint_t)-1; /* NaN */
                        for (i = 16; i < 32; i++)               /* NaN */
                                fp->fpu_fr.fpu_dregs[i] = (uint64_t)-1;
                }
                if (ftt = fp_emulator(&fpsd, (fp_inst_type *)rp->r_pc,
                    rp, (ulong_t *)rp->r_sp, fp)) {
                        fp->fpu_q_entrysize = sizeof (struct _fpq);
                        fp_traps(&fpsd, ftt, rp);
                }
        }
}

/*
 * Process the floating point queue in lwp->lwp_pcb.
 *
 * Each entry in the floating point queue is processed in turn.
 * If processing an entry results in an exception fp_traps() is called to
 * handle the exception - this usually results in the generation of a signal
 * to be delivered to the user. There are 2 possible outcomes to this (note
 * that hardware generated signals cannot be held!):
 *
 *   1. If the signal is being ignored we continue to process the rest
 *      of the entries in the queue.
 *
 *   2. If arrangements have been made for return to a user signal handler,
 *      sendsig() will have copied the floating point queue onto the user's
 *      signal stack and zero'ed the queue count in the u_pcb. Note that
 *      this has the side effect of terminating fp_runq's processing loop.
 *      We will re-run the floating point queue on return from the user
 *      signal handler if necessary as part of normal setcontext processing.
 */
void
fp_runq(struct regs *rp)
{
        kfpu_t *fp = lwptofpu(curthread->t_lwp);
        struct _fq *fqp = fp->fpu_q;
        fp_simd_type fpsd;
        uint64_t gsr = get_gsr(fp);

        /*
         * don't preempt while manipulating the queue
         */
        kpreempt_disable();

        while (fp->fpu_qcnt) {
                int fptrap;

                fptrap = fpu_simulator((fp_simd_type *)&fpsd,
                    (fp_inst_type *)fqp->FQu.fpq.fpq_addr,
                    (fsr_type *)&fp->fpu_fsr, gsr,
                    fqp->FQu.fpq.fpq_instr);
                if (fptrap) {
                        /*
                         * Instruction could not be simulated so we will
                         * attempt to deliver a signal.
                         * We may be called again upon signal exit (setcontext)
                         * and can continue to process the queue then.
                         */
                        if (fqp != fp->fpu_q) {
                                int i;
                                struct _fq *fqdp;

                                /*
                                 * We need to normalize the floating queue so
                                 * the excepting instruction is at the head,
                                 * so that the queue may be copied onto the
                                 * user signal stack by sendsig().
                                 */
                                fqdp = fp->fpu_q;
                                for (i = fp->fpu_qcnt; i; i--) {
                                        *fqdp++ = *fqp++;
                                }
                                fqp = fp->fpu_q;
                        }
                        fp->fpu_q_entrysize = sizeof (struct _fpq);

                        /*
                         * fpu_simulator uses the fp registers directly but it
                         * uses the software copy of the fsr. We need to write
                         * that back to fpu so that fpu's state is current for
                         * ucontext.
                         */
                        if (fpu_exists)
                                _fp_write_pfsr(&fp->fpu_fsr);

                        /* post signal */
                        fp_traps(&fpsd, fptrap, rp);

                        /*
                         * Break from loop to allow signal to be sent.
                         * If there are other instructions in the fp queue
                         * they will be processed when/if the user retuns
                         * from the signal handler with a non-empty queue.
                         */
                        break;
                }
                fp->fpu_qcnt--;
                fqp++;
        }

        /*
         * fpu_simulator uses the fp registers directly, so we have
         * to update the pcb copies to keep current, but it uses the
         * software copy of the fsr, so we write that back to fpu
         */
        if (fpu_exists) {
                int i;

                for (i = 0; i < 32; i++)
                        _fp_read_pfreg(&fp->fpu_fr.fpu_regs[i], i);
                for (i = 16; i < 32; i++)
                        _fp_read_pdreg(&fp->fpu_fr.fpu_dregs[i], i);
                _fp_write_pfsr(&fp->fpu_fsr);
        }

        kpreempt_enable();
}

/*
 * Get the precise trapped V9 floating point instruction.
 * Fake up a queue to process. If getting the instruction results
 * in an exception fp_traps() is called to handle the exception - this
 * usually results in the generation of a signal to be delivered to the user.
 */

void
fp_precise(struct regs *rp)
{
        fp_simd_type    fpsd;
        int             inst_ftt;

        union {
                uint_t          i;
                fp_inst_type    inst;
        } kluge;

        klwp_t *lwp = ttolwp(curthread);
        kfpu_t *fp = lwptofpu(lwp);
        uint64_t gsr;
        int mstate;
        if (fpu_exists)
                save_gsr(fp);
        gsr = get_gsr(fp);

        /*
         * Get the instruction to be emulated from the pc saved by the trap.
         * Note that the kernel is NOT prepared to handle a kernel fp
         * exception if it can't pass successfully through the fp simulator.
         *
         * If the trap occurred in user mode, set lwp_state to LWP_SYS for the
         * purposes of clock accounting and switch to the LMS_TRAP microstate.
         */
        if (USERMODE(rp->r_tstate)) {
                inst_ftt = _fp_read_inst((uint32_t *)rp->r_pc, &kluge.i, &fpsd);
                mstate = new_mstate(curthread, LMS_TRAP);
                lwp->lwp_state = LWP_SYS;
        } else {
                kluge.i = *(uint_t *)rp->r_pc;
                inst_ftt = ftt_none;
        }

        if (inst_ftt != ftt_none) {
                /*
                 * Save the bad address and post the signal.
                 * It can only be an ftt_alignment or ftt_fault trap.
                 * XXX - How can this work w/mainsail and do_unaligned?
                 */
                fpsd.fp_trapaddr = (caddr_t)rp->r_pc;
                fp_traps(&fpsd, inst_ftt, rp);
        } else {
                /*
                 * Conjure up a floating point queue and advance the pc/npc
                 * to fake a deferred fp trap. We now run the fp simulator
                 * in fp_precise, while allowing setfpregs to call fp_runq,
                 * because this allows us to do the ugly machinations to
                 * inc/dec the pc depending on the trap type, as per
                 * bugid 1210159. fp_runq is still going to have the
                 * generic "how do I connect the "fp queue to the pc/npc"
                 * problem alluded to in bugid 1192883, which is only a
                 * problem for a restorecontext of a v8 fp queue on a
                 * v9 system, which seems like the .000000001% case (on v9)!
                 */
                struct _fpq *pfpq = &fp->fpu_q->FQu.fpq;
                fp_simd_type    fpsd;
                int fptrap;

                pfpq->fpq_addr = (uint_t *)rp->r_pc;
                pfpq->fpq_instr = kluge.i;
                fp->fpu_qcnt = 1;
                fp->fpu_q_entrysize = sizeof (struct _fpq);

                kpreempt_disable();
                (void) flush_user_windows_to_stack(NULL);
                fptrap = fpu_vis_sim((fp_simd_type *)&fpsd,
                    (fp_inst_type *)pfpq->fpq_addr, rp,
                    (fsr_type *)&fp->fpu_fsr, gsr, kluge.i);

                /* update the hardware fp fsr state for sake of ucontext */
                if (fpu_exists)
                        _fp_write_pfsr(&fp->fpu_fsr);

                if (fptrap) {
                        /* back up the pc if the signal needs to be precise */
                        if (fptrap != ftt_ieee) {
                                fp->fpu_qcnt = 0;
                        }
                        /* post signal */
                        fp_traps(&fpsd, fptrap, rp);

                        /* decrement queue count for ieee exceptions */
                        if (fptrap == ftt_ieee) {
                                fp->fpu_qcnt = 0;
                        }
                } else {
                        fp->fpu_qcnt = 0;
                }
                /* update the software pcb copies of hardware fp registers */
                if (fpu_exists) {
                        fp_save(fp);
                }
                kpreempt_enable();
        }

        /*
         * Reset lwp_state to LWP_USER for the purposes of clock accounting,
         * and restore the previously saved microstate.
         */
        if (USERMODE(rp->r_tstate)) {
                (void) new_mstate(curthread, mstate);
                lwp->lwp_state = LWP_USER;
        }
}

/*
 * Handle floating point traps generated by simulation/emulation.
 */
void
fp_traps(
        fp_simd_type *pfpsd,    /* Pointer to simulator data */
        enum ftt_type ftt,      /* trap type */
        struct regs *rp)        /* ptr to regs fro trap */
{
        /*
         * If we take a user's exception in kernel mode, we want to trap
         * with the user's registers.
         */
        switch (ftt) {
        case ftt_ieee:
                fpu_trap(rp, pfpsd->fp_trapaddr, T_FP_EXCEPTION_IEEE,
                    pfpsd->fp_trapcode);
                break;
        case ftt_fault:
                fpu_trap(rp, pfpsd->fp_trapaddr, T_DATA_EXCEPTION, 0);
                break;
        case ftt_alignment:
                fpu_trap(rp, pfpsd->fp_trapaddr, T_ALIGNMENT, 0);
                break;
        case ftt_unimplemented:
                fpu_trap(rp, pfpsd->fp_trapaddr, T_UNIMP_INSTR, 0);
                break;
        default:
                /*
                 * We don't expect any of the other types here.
                 */
                cmn_err(CE_PANIC, "fp_traps: bad ftt");
        }
}