usr/src/uts/sun4u/lw8/os/lw8_platmod.c

root/usr/src/uts/sun4u/lw8/os/lw8_platmod.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include <sys/time.h>
#include <sys/cpuvar.h>
#include <sys/dditypes.h>
#include <sys/ddipropdefs.h>
#include <sys/ddi_impldefs.h>
#include <sys/sunddi.h>
#include <sys/esunddi.h>
#include <sys/sunndi.h>
#include <sys/platform_module.h>
#include <sys/errno.h>
#include <sys/conf.h>
#include <sys/modctl.h>
#include <sys/promif.h>
#include <sys/promimpl.h>
#include <sys/prom_plat.h>
#include <sys/cmn_err.h>
#include <sys/sysmacros.h>
#include <sys/mem_cage.h>
#include <sys/kobj.h>
#include <sys/utsname.h>
#include <sys/cpu_sgnblk_defs.h>
#include <sys/atomic.h>
#include <sys/kdi_impl.h>

#include <sys/sgsbbc.h>
#include <sys/sgsbbc_iosram.h>
#include <sys/sgsbbc_iosram_priv.h>
#include <sys/sgsbbc_mailbox.h>
#include <sys/sgsgn.h>
#include <sys/serengeti.h>
#include <sys/sgfrutypes.h>
#include <sys/machsystm.h>
#include <sys/sbd_ioctl.h>
#include <sys/sbd.h>
#include <sys/sbdp_mem.h>
#include <sys/sgcn.h>

#include <sys/memnode.h>
#include <vm/vm_dep.h>
#include <vm/page.h>

#include <sys/cheetahregs.h>
#include <sys/plat_ecc_unum.h>
#include <sys/plat_ecc_dimm.h>

#include <sys/lgrp.h>
#include <sys/clock_impl.h>

static int sg_debug = 0;

#ifdef DEBUG
#define DCMNERR if (sg_debug) cmn_err
#else
#define DCMNERR
#endif

int (*p2get_mem_unum)(int, uint64_t, char *, int, int *);

/* local functions */
static void cpu_sgn_update(ushort_t sgn, uchar_t state,
    uchar_t sub_state, int cpuid);


/*
 * Local data.
 *
 * iosram_write_ptr is a pointer to iosram_write().  Because of
 * kernel dynamic linking, we can't get to the function by name,
 * but we can look up its address, and store it in this variable
 * instead.
 *
 * We include the extern for iosram_write() here not because we call
 * it, but to force compilation errors if its prototype doesn't
 * match the prototype of iosram_write_ptr.
 *
 * The same issues apply to iosram_read() and iosram_read_ptr.
 */
/*CSTYLED*/
extern int   iosram_write     (int, uint32_t, caddr_t, uint32_t);
static int (*iosram_write_ptr)(int, uint32_t, caddr_t, uint32_t) = NULL;
/*CSTYLED*/
extern int   iosram_read     (int, uint32_t, caddr_t, uint32_t);
static int (*iosram_read_ptr)(int, uint32_t, caddr_t, uint32_t) = NULL;


/*
 * Variable to indicate if the date should be obtained from the SC or not.
 */
int todsg_use_sc = FALSE;       /* set the false at the beginning */

/*
 * Preallocation of spare tsb's for DR
 *
 * We don't allocate spares for Wildcat since TSBs should come
 * out of memory local to the node.
 */
#define IOMMU_PER_SCHIZO        2
int serengeti_tsb_spares = (SG_MAX_IO_BDS * SG_SCHIZO_PER_IO_BD *
        IOMMU_PER_SCHIZO);

/*
 * sg_max_ncpus is the maximum number of CPUs supported on lw8.
 * sg_max_ncpus is set to be smaller than NCPU to reduce the amount of
 * memory the logs take up until we have a dynamic log memory allocation
 * solution.
 */
int sg_max_ncpus = (12 * 2);    /* (max # of processors * # of cores/proc) */

/*
 * variables to control mailbox message timeouts.
 * These can be patched via /etc/system or mdb.
 */
int     sbbc_mbox_default_timeout = MBOX_DEFAULT_TIMEOUT;
int     sbbc_mbox_min_timeout = MBOX_MIN_TIMEOUT;

/* cached 'chosen' node_id */
pnode_t chosen_nodeid = (pnode_t)0;

/*
 * Table that maps memory slices to a specific memnode.
 */
int slice_to_memnode[SG_MAX_SLICE];

/*
 * We define and use LW8_MAX_CPU_BDS here instead of SG_MAX_CPU_BDS
 * since a LW8 machine will never have a CPU/Mem board #5 (SB5).
 * A LW8 machine can only have a maximum of three CPU/Mem boards, but
 * the board numbers assigned are 0, 2, and 4.  LW8_MAX_CPU_BDS is
 * defined to be 5 since the entries in the domain_dimm_sids array
 * are keyed by board number.  Not perfect but some wasted space
 * is avoided.
 */
#define LW8_MAX_CPU_BDS         5

plat_dimm_sid_board_t   domain_dimm_sids[LW8_MAX_CPU_BDS];

int
set_platform_tsb_spares()
{
        return (MIN(serengeti_tsb_spares, MAX_UPA));
}

#pragma weak mmu_init_large_pages

void
set_platform_defaults(void)
{
        extern int watchdog_enable;
        extern uint64_t xc_tick_limit_scale;
        extern void mmu_init_large_pages(size_t);

#ifdef DEBUG
        char *todsg_name = "todsg";
        ce_verbose_memory = 2;
        ce_verbose_other = 2;
#endif /* DEBUG */

        watchdog_enable = TRUE;
        watchdog_available = TRUE;

        cpu_sgn_func = cpu_sgn_update;

#ifdef DEBUG
        /* tod_module_name should be set to "todsg" from OBP property */
        if (tod_module_name && (strcmp(tod_module_name, todsg_name) == 0))
                prom_printf("Using todsg driver\n");
        else {
                prom_printf("Force using todsg driver\n");
                tod_module_name = todsg_name;
        }
#endif /* DEBUG */

        /* lw8 does not support forthdebug */
        forthdebug_supported = 0;


        /*
         * Some DR operations require the system to be sync paused.
         * Sync pause on Serengeti could potentially take up to 4
         * seconds to complete depending on the load on the SC.  To
         * avoid send_mond panics during such operations, we need to
         * increase xc_tick_limit to a larger value on Serengeti by
         * setting xc_tick_limit_scale to 5.
         */
        xc_tick_limit_scale = 5;

        if ((mmu_page_sizes == max_mmu_page_sizes) &&
            (mmu_ism_pagesize != DEFAULT_ISM_PAGESIZE)) {
                if (&mmu_init_large_pages)
                        mmu_init_large_pages(mmu_ism_pagesize);
        }
}

void
load_platform_modules(void)
{
        if (modload("misc", "pcihp") < 0) {
                cmn_err(CE_NOTE, "pcihp driver failed to load");
        }
}

/*ARGSUSED*/
int
plat_cpu_poweron(struct cpu *cp)
{
        int (*serengeti_cpu_poweron)(struct cpu *) = NULL;

        serengeti_cpu_poweron =
            (int (*)(struct cpu *))modgetsymvalue("sbdp_cpu_poweron", 0);

        if (serengeti_cpu_poweron == NULL)
                return (ENOTSUP);
        else
                return ((serengeti_cpu_poweron)(cp));
}

/*ARGSUSED*/
int
plat_cpu_poweroff(struct cpu *cp)
{
        int (*serengeti_cpu_poweroff)(struct cpu *) = NULL;

        serengeti_cpu_poweroff =
            (int (*)(struct cpu *))modgetsymvalue("sbdp_cpu_poweroff", 0);

        if (serengeti_cpu_poweroff == NULL)
                return (ENOTSUP);
        else
                return ((serengeti_cpu_poweroff)(cp));
}

#ifdef DEBUG
pgcnt_t serengeti_cage_size_limit;
#endif

/* Preferred minimum cage size (expressed in pages)... for DR */
pgcnt_t serengeti_minimum_cage_size = 0;

void
set_platform_cage_params(void)
{
        extern pgcnt_t total_pages;
        extern struct memlist *phys_avail;

        if (kernel_cage_enable) {
                pgcnt_t preferred_cage_size;

                preferred_cage_size =
                    MAX(serengeti_minimum_cage_size, total_pages / 256);
#ifdef DEBUG
                if (serengeti_cage_size_limit)
                        preferred_cage_size = serengeti_cage_size_limit;
#endif
                /*
                 * Post copies obp into the lowest slice.  This requires the
                 * cage to grow upwards
                 */
                kcage_range_init(phys_avail, KCAGE_UP, preferred_cage_size);
        }

        kcage_startup_dir = KCAGE_UP;

        /* Only note when the cage is off since it should always be on. */
        if (!kcage_on)
                cmn_err(CE_NOTE, "!DR Kernel Cage is DISABLED");
}

#define ALIGN(x, a)     ((a) == 0 ? (uint64_t)(x) : \
        (((uint64_t)(x) + (uint64_t)(a) - 1l) & ~((uint64_t)(a) - 1l)))

void
update_mem_bounds(int brd, uint64_t base, uint64_t sz)
{
        uint64_t        end;
        int             mnode;

        end = base + sz - 1;

        /*
         * First see if this board already has a memnode associated
         * with it.  If not, see if this slice has a memnode.  This
         * covers the cases where a single slice covers multiple
         * boards (cross-board interleaving) and where a single
         * board has multiple slices (1+GB DIMMs).
         */
        if ((mnode = plat_lgrphand_to_mem_node(brd)) == -1) {
                if ((mnode = slice_to_memnode[PA_2_SLICE(base)]) == -1)
                        mnode = mem_node_alloc();
                plat_assign_lgrphand_to_mem_node(brd, mnode);
        }

        /*
         * Align base at 16GB boundary
         */
        base = ALIGN(base, (1ul << PA_SLICE_SHIFT));

        while (base < end) {
                slice_to_memnode[PA_2_SLICE(base)] = mnode;
                base += (1ul << PA_SLICE_SHIFT);
        }
}

/*
 * Dynamically detect memory slices in the system by decoding
 * the cpu memory decoder registers at boot time.
 */
void
plat_fill_mc(pnode_t nodeid)
{
        uint64_t        mc_addr, mask;
        uint64_t        mc_decode[SG_MAX_BANKS_PER_MC];
        uint64_t        base, size;
        uint32_t        regs[4];
        int             len;
        int             local_mc;
        int             portid;
        int             boardid;
        int             i;

        if ((prom_getprop(nodeid, "portid", (caddr_t)&portid) < 0) ||
            (portid == -1))
                return;

        /*
         * Decode the board number from the MC portid
         */
        boardid = SG_PORTID_TO_BOARD_NUM(portid);

        /*
         * The "reg" property returns 4 32-bit values. The first two are
         * combined to form a 64-bit address.  The second two are for a
         * 64-bit size, but we don't actually need to look at that value.
         */
        len = prom_getproplen(nodeid, "reg");
        if (len != (sizeof (uint32_t) * 4)) {
                prom_printf("Warning: malformed 'reg' property\n");
                return;
        }
        if (prom_getprop(nodeid, "reg", (caddr_t)regs) < 0)
                return;
        mc_addr = ((uint64_t)regs[0]) << 32;
        mc_addr |= (uint64_t)regs[1];

        /*
         * Figure out whether the memory controller we are examining
         * belongs to this CPU or a different one.
         */
        if (portid == cpunodes[CPU->cpu_id].portid)
                local_mc = 1;
        else
                local_mc = 0;

        for (i = 0; i < SG_MAX_BANKS_PER_MC; i++) {
                mask = SG_REG_2_OFFSET(i);

                /*
                 * If the memory controller is local to this CPU, we use
                 * the special ASI to read the decode registers.
                 * Otherwise, we load the values from a magic address in
                 * I/O space.
                 */
                if (local_mc)
                        mc_decode[i] = lddmcdecode(mask & MC_OFFSET_MASK);
                else
                        mc_decode[i] = lddphysio((mc_addr | mask));

                if (mc_decode[i] >> MC_VALID_SHIFT) {
                        /*
                         * The memory decode register is a bitmask field,
                         * so we can decode that into both a base and
                         * a span.
                         */
                        base = MC_BASE(mc_decode[i]) << PHYS2UM_SHIFT;
                        size = MC_UK2SPAN(mc_decode[i]);
                        update_mem_bounds(boardid, base, size);
                }
        }
}

/*
 * This routine is run midway through the boot process.  By the time we get
 * here, we know about all the active CPU boards in the system, and we have
 * extracted information about each board's memory from the memory
 * controllers.  We have also figured out which ranges of memory will be
 * assigned to which memnodes, so we walk the slice table to build the table
 * of memnodes.
 */
/* ARGSUSED */
void
plat_build_mem_nodes(prom_memlist_t *list, size_t  nelems)
{
        int     slice;
        pfn_t   basepfn;
        pgcnt_t npgs;

        mem_node_pfn_shift = PFN_SLICE_SHIFT;
        mem_node_physalign = (1ull << PA_SLICE_SHIFT);

        for (slice = 0; slice < SG_MAX_SLICE; slice++) {
                if (slice_to_memnode[slice] == -1)
                        continue;
                basepfn = (uint64_t)slice << PFN_SLICE_SHIFT;
                npgs = 1ull << PFN_SLICE_SHIFT;
                mem_node_add_slice(basepfn, basepfn + npgs - 1);
        }
}

int
plat_pfn_to_mem_node(pfn_t pfn)
{
        int node;

        node = slice_to_memnode[PFN_2_SLICE(pfn)];

        return (node);
}

/*
 * Serengeti support for lgroups.
 *
 * On Serengeti, an lgroup platform handle == board number.
 *
 * Mappings between lgroup handles and memnodes are managed
 * in addition to mappings between memory slices and memnodes
 * to support cross-board interleaving as well as multiple
 * slices per board (e.g. >1GB DIMMs). The initial mapping
 * of memnodes to lgroup handles is determined at boot time.
 * A DR addition of memory adds a new mapping. A DR copy-rename
 * swaps mappings.
 */

/*
 * Macro for extracting the board number from the CPU id
 */
#define CPUID_TO_BOARD(id)      (((id) >> 2) & 0x7)

/*
 * Return the platform handle for the lgroup containing the given CPU
 *
 * For Serengeti, lgroup platform handle == board number
 */
lgrp_handle_t
plat_lgrp_cpu_to_hand(processorid_t id)
{
        return (CPUID_TO_BOARD(id));
}

/*
 * Platform specific lgroup initialization
 */
void
plat_lgrp_init(void)
{
        int i;
        extern uint32_t lgrp_expand_proc_thresh;
        extern uint32_t lgrp_expand_proc_diff;

        /*
         * Initialize lookup tables to invalid values so we catch
         * any illegal use of them.
         */
        for (i = 0; i < SG_MAX_SLICE; i++) {
                slice_to_memnode[i] = -1;
        }

        /*
         * Set tuneables for Serengeti architecture
         *
         * lgrp_expand_proc_thresh is the minimum load on the lgroups
         * this process is currently running on before considering
         * expanding threads to another lgroup.
         *
         * lgrp_expand_proc_diff determines how much less the remote lgroup
         * must be loaded before expanding to it.
         *
         * Bandwidth is maximized on Serengeti by spreading load across
         * the machine. The impact to inter-thread communication isn't
         * too costly since remote latencies are relatively low.  These
         * values equate to one CPU's load and so attempt to spread the
         * load out across as many lgroups as possible one CPU at a time.
         */
        lgrp_expand_proc_thresh = LGRP_LOADAVG_THREAD_MAX;
        lgrp_expand_proc_diff = LGRP_LOADAVG_THREAD_MAX;
}

/*
 * Platform notification of lgroup (re)configuration changes
 */
/*ARGSUSED*/
void
plat_lgrp_config(lgrp_config_flag_t evt, uintptr_t arg)
{
        update_membounds_t      *umb;
        lgrp_config_mem_rename_t lmr;
        lgrp_handle_t           shand, thand;
        int                     snode, tnode;

        switch (evt) {

        case LGRP_CONFIG_MEM_ADD:
                umb = (update_membounds_t *)arg;
                update_mem_bounds(umb->u_board, umb->u_base, umb->u_len);

                break;

        case LGRP_CONFIG_MEM_DEL:
                /* We don't have to do anything */

                break;

        case LGRP_CONFIG_MEM_RENAME:
                /*
                 * During a DR copy-rename operation, all of the memory
                 * on one board is moved to another board -- but the
                 * addresses/pfns and memnodes don't change. This means
                 * the memory has changed locations without changing identity.
                 *
                 * Source is where we are copying from and target is where we
                 * are copying to.  After source memnode is copied to target
                 * memnode, the physical addresses of the target memnode are
                 * renamed to match what the source memnode had.  Then target
                 * memnode can be removed and source memnode can take its
                 * place.
                 *
                 * To do this, swap the lgroup handle to memnode mappings for
                 * the boards, so target lgroup will have source memnode and
                 * source lgroup will have empty target memnode which is where
                 * its memory will go (if any is added to it later).
                 *
                 * Then source memnode needs to be removed from its lgroup
                 * and added to the target lgroup where the memory was living
                 * but under a different name/memnode.  The memory was in the
                 * target memnode and now lives in the source memnode with
                 * different physical addresses even though it is the same
                 * memory.
                 */
                shand = arg & 0xffff;
                thand = (arg & 0xffff0000) >> 16;
                snode = plat_lgrphand_to_mem_node(shand);
                tnode = plat_lgrphand_to_mem_node(thand);

                plat_assign_lgrphand_to_mem_node(thand, snode);
                plat_assign_lgrphand_to_mem_node(shand, tnode);

                /*
                 * Remove source memnode of copy rename from its lgroup
                 * and add it to its new target lgroup
                 */
                lmr.lmem_rename_from = shand;
                lmr.lmem_rename_to = thand;

                lgrp_config(LGRP_CONFIG_MEM_RENAME, (uintptr_t)snode,
                    (uintptr_t)&lmr);

                break;

        default:
                break;
        }
}

/*
 * Return latency between "from" and "to" lgroups
 *
 * This latency number can only be used for relative comparison
 * between lgroups on the running system, cannot be used across platforms,
 * and may not reflect the actual latency.  It is platform and implementation
 * specific, so platform gets to decide its value.  It would be nice if the
 * number was at least proportional to make comparisons more meaningful though.
 * NOTE: The numbers below are supposed to be load latencies for uncached
 * memory divided by 10.
 */
int
plat_lgrp_latency(lgrp_handle_t from, lgrp_handle_t to)
{
        /*
         * Return min remote latency when there are more than two lgroups
         * (root and child) and getting latency between two different lgroups
         * or root is involved
         */
        if (lgrp_optimizations() && (from != to ||
            from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE))
                return (28);
        else
                return (23);
}

/* ARGSUSED */
void
plat_freelist_process(int mnode)
{
}

/*
 * Find dip for chosen IOSRAM
 */
dev_info_t *
find_chosen_dip(void)
{
        dev_info_t      *dip;
        char            master_sbbc[MAXNAMELEN];
        int             nodeid;
        uint_t          tunnel;

        /*
         * find the /chosen SBBC node, prom interface will handle errors
         */
        nodeid = prom_chosennode();
        /*
         * get the 'iosram' property from the /chosen node
         */
        if (prom_getprop(nodeid, IOSRAM_CHOSEN_PROP, (caddr_t)&tunnel) <= 0) {
                SBBC_ERR(CE_PANIC, "No iosram property found! \n");
        }

        if (prom_phandle_to_path((phandle_t)tunnel, master_sbbc,
            sizeof (master_sbbc)) < 0) {
                SBBC_ERR1(CE_PANIC, "prom_phandle_to_path(%d) failed\n",
                    tunnel);
        }

        chosen_nodeid = nodeid;

        /*
         * load and attach the sgsbbc driver.
         * This will also attach all the sgsbbc driver instances
         */
        if (i_ddi_attach_hw_nodes("sgsbbc") != DDI_SUCCESS) {
                cmn_err(CE_WARN, "sgsbbc failed to load\n");
        }
        /* translate a path name to a dev_info_t */
        dip = e_ddi_hold_devi_by_path(master_sbbc, 0);
        if ((dip == NULL) || (ddi_get_nodeid(dip) != tunnel)) {
                cmn_err(CE_PANIC,
                    "e_ddi_hold_devi_by_path(%x) failed for SBBC\n", tunnel);
        }

        /* make sure devi_ref is ZERO */
        ndi_rele_devi(dip);
        DCMNERR(CE_CONT, "Chosen IOSRAM is at %s \n", master_sbbc);

        return (dip);
}

void
load_platform_drivers(void)
{
        int ret;

        /*
         * Load the mc-us3 memory driver.
         */
        if (i_ddi_attach_hw_nodes("mc-us3") != DDI_SUCCESS)
                cmn_err(CE_WARN, "mc-us3 failed to load");
        else
                (void) ddi_hold_driver(ddi_name_to_major("mc-us3"));

        /*
         * Initialize the chosen IOSRAM before its clients
         * are loaded.
         */
        (void) find_chosen_dip();

        /*
         * Load the environmentals driver (sgenv)
         *
         * We need this driver to handle events from the SC when state
         * changes occur in the environmental data.
         */
        if (i_ddi_attach_hw_nodes("sgenv") != DDI_SUCCESS)
                cmn_err(CE_WARN, "sgenv failed to load");

        /*
         * Ideally, we'd do this in set_platform_defaults(), but
         * at that point it's too early to look up symbols.
         */
        iosram_write_ptr = (int (*)(int, uint32_t, caddr_t, uint32_t))
            modgetsymvalue("iosram_write", 0);

        if (iosram_write_ptr == NULL) {
                DCMNERR(CE_WARN, "load_platform_defaults: iosram_write()"
                    " not found; signatures will not be updated\n");
        } else {
                /*
                 * The iosram read ptr is only needed if we can actually
                 * write CPU signatures, so only bother setting it if we
                 * set a valid write pointer, above.
                 */
                iosram_read_ptr = (int (*)(int, uint32_t, caddr_t, uint32_t))
                    modgetsymvalue("iosram_read", 0);

                if (iosram_read_ptr == NULL)
                        DCMNERR(CE_WARN, "load_platform_defaults: iosram_read()"
                            " not found\n");
        }

        /*
         * Set todsg_use_sc to TRUE so that we will be getting date
         * from the SC.
         */
        todsg_use_sc = TRUE;

        /*
         * Now is a good time to activate hardware watchdog (if one exists).
         */
        mutex_enter(&tod_lock);
        if (watchdog_enable)
                ret = tod_ops.tod_set_watchdog_timer(watchdog_timeout_seconds);
        mutex_exit(&tod_lock);
        if (ret != 0)
                printf("Hardware watchdog enabled\n");

        plat_ecc_init();
}

/*
 * No platform drivers on this platform
 */
char *platform_module_list[] = {
        (char *)0
};

/*ARGSUSED*/
void
plat_tod_fault(enum tod_fault_type tod_bad)
{
}
int
plat_max_boards()
{
        return (SG_MAX_BDS);
}
int
plat_max_io_units_per_board()
{
        return (SG_MAX_IO_PER_BD);
}
int
plat_max_cmp_units_per_board()
{
        return (SG_MAX_CMPS_PER_BD);
}
int
plat_max_cpu_units_per_board()
{
        return (SG_MAX_CPUS_PER_BD);
}

int
plat_max_mc_units_per_board()
{
        return (SG_MAX_CMPS_PER_BD); /* each CPU die has a memory controller */
}

int
plat_max_mem_units_per_board()
{
        return (SG_MAX_MEM_PER_BD);
}

int
plat_max_cpumem_boards(void)
{
        return (LW8_MAX_CPU_BDS);
}

int
set_platform_max_ncpus(void)
{
        return (sg_max_ncpus);
}

void
plat_dmv_params(uint_t *hwint, uint_t *swint)
{
        *hwint = MAX_UPA;
        *swint = 0;
}

static int (*sg_mbox)(sbbc_msg_t *, sbbc_msg_t *, time_t) = NULL;

/*
 * Our nodename has been set, pass it along to the SC.
 */
void
plat_nodename_set(void)
{
        sbbc_msg_t      req;    /* request */
        sbbc_msg_t      resp;   /* response */
        int             rv;     /* return value from call to mbox */
        struct nodename_info {
                int32_t namelen;
                char    nodename[_SYS_NMLN];
        } nni;

        /*
         * find the symbol for the mailbox routine
         */
        if (sg_mbox == NULL)
                sg_mbox = (int (*)(sbbc_msg_t *, sbbc_msg_t *, time_t))
                    modgetsymvalue("sbbc_mbox_request_response", 0);

        if (sg_mbox == NULL) {
                cmn_err(CE_NOTE, "!plat_nodename_set: sg_mbox not found\n");
                return;
        }

        /*
         * construct the message telling the SC our nodename
         */
        (void) strcpy(nni.nodename, utsname.nodename);
        nni.namelen = (int32_t)strlen(nni.nodename);

        req.msg_type.type = INFO_MBOX;
        req.msg_type.sub_type = INFO_MBOX_NODENAME;
        req.msg_status = 0;
        req.msg_len = (int)(nni.namelen + sizeof (nni.namelen));
        req.msg_bytes = 0;
        req.msg_buf = (caddr_t)&nni;
        req.msg_data[0] = 0;
        req.msg_data[1] = 0;

        /*
         * initialize the response back from the SC
         */
        resp.msg_type.type = INFO_MBOX;
        resp.msg_type.sub_type = INFO_MBOX_NODENAME;
        resp.msg_status = 0;
        resp.msg_len = 0;
        resp.msg_bytes = 0;
        resp.msg_buf = (caddr_t)0;
        resp.msg_data[0] = 0;
        resp.msg_data[1] = 0;

        /*
         * ship it and check for success
         */
        rv = (sg_mbox)(&req, &resp, sbbc_mbox_default_timeout);

        if (rv != 0) {
                cmn_err(CE_NOTE, "!plat_nodename_set: sg_mbox retval %d\n", rv);
        } else if (resp.msg_status != 0) {
                cmn_err(CE_NOTE, "!plat_nodename_set: msg_status %d\n",
                    resp.msg_status);
        } else {
                DCMNERR(CE_NOTE, "!plat_nodename_set was successful\n");

                /*
                 * It is necessary to exchange capability the bitmap
                 * with SC before sending any ecc error information and
                 * indictment. We are calling the plat_ecc_capability_send()
                 * here just after sending the nodename successfully.
                 */
                rv = plat_ecc_capability_send();
                if (rv == 0) {
                        DCMNERR(CE_NOTE, "!plat_ecc_capability_send was"
                            "successful\n");
                }
        }
}

/*
 * flag to allow users switch between using OBP's
 * prom_get_unum() and mc-us3 driver's p2get_mem_unum()
 * (for main memory errors only).
 */
int sg_use_prom_get_unum = 0;

/*
 * Debugging flag: set to 1 to call into obp for get_unum, or set it to 0
 * to call into the unum cache system.  This is the E$ equivalent of
 * sg_use_prom_get_unum.
 */
int sg_use_prom_ecache_unum = 0;

/* used for logging ECC errors to the SC */
#define SG_MEMORY_ECC   1
#define SG_ECACHE_ECC   2
#define SG_UNKNOWN_ECC  (-1)

/*
 * plat_get_mem_unum() generates a string identifying either the
 * memory or E$ DIMM(s) during error logging. Depending on whether
 * the error is E$ or memory related, the appropriate support
 * routine is called to assist in the string generation.
 *
 * - For main memory errors we can use the mc-us3 drivers p2getunum()
 *   (or prom_get_unum() for debugging purposes).
 *
 * - For E$ errors we call sg_get_ecacheunum() to generate the unum (or
 *   prom_serengeti_get_ecacheunum() for debugging purposes).
 */

static int
sg_prom_get_unum(int synd_code, uint64_t paddr, char *buf, int buflen,
    int *lenp)
{
        if ((prom_get_unum(synd_code, (unsigned long long)paddr,
            buf, buflen, lenp)) != 0)
                return (EIO);
        else if (*lenp <= 1)
                return (EINVAL);
        else
                return (0);
}

/*ARGSUSED*/
int
plat_get_mem_unum(int synd_code, uint64_t flt_addr, int flt_bus_id,
    int flt_in_memory, ushort_t flt_status, char *buf, int buflen, int *lenp)
{
        /*
         * unum_func will either point to the memory drivers p2get_mem_unum()
         * or to prom_get_unum() for memory errors.
         */
        int (*unum_func)(int synd_code, uint64_t paddr, char *buf,
            int buflen, int *lenp) = p2get_mem_unum;

        /*
         * check if it's a Memory or an Ecache error.
         */
        if (flt_in_memory) {
                /*
                 * It's a main memory error.
                 *
                 * For debugging we allow the user to switch between
                 * using OBP's get_unum and the memory driver's get_unum
                 * so we create a pointer to the functions and switch
                 * depending on the sg_use_prom_get_unum flag.
                 */
                if (sg_use_prom_get_unum) {
                        DCMNERR(CE_NOTE, "Using prom_get_unum from OBP");
                        return (sg_prom_get_unum(synd_code,
                            P2ALIGN(flt_addr, 8), buf, buflen, lenp));
                } else if (unum_func != NULL) {
                        return (unum_func(synd_code, P2ALIGN(flt_addr, 8),
                            buf, buflen, lenp));
                } else {
                        return (ENOTSUP);
                }
        } else if (flt_status & ECC_ECACHE) {
                /*
                 * It's an E$ error.
                 */
                if (sg_use_prom_ecache_unum) {
                        /*
                         * We call to OBP to handle this.
                         */
                        DCMNERR(CE_NOTE,
                            "Using prom_serengeti_get_ecacheunum from OBP");
                        if (prom_serengeti_get_ecacheunum(flt_bus_id,
                            P2ALIGN(flt_addr, 8), buf, buflen, lenp) != 0) {
                                return (EIO);
                        }
                } else {
                        return (sg_get_ecacheunum(flt_bus_id, flt_addr,
                            buf, buflen, lenp));
                }
        } else {
                return (ENOTSUP);
        }

        return (0);
}

/*
 * This platform hook gets called from mc_add_mem_unum_label() in the mc-us3
 * driver giving each platform the opportunity to add platform
 * specific label information to the unum for ECC error logging purposes.
 */
void
plat_add_mem_unum_label(char *unum, int mcid, int bank, int dimm)
{
        char    new_unum[UNUM_NAMLEN] = "";
        int     node = SG_PORTID_TO_NODEID(mcid);
        int     board = SG_CPU_BD_PORTID_TO_BD_NUM(mcid);
        int     position = SG_PORTID_TO_CPU_POSN(mcid);

        /*
         * The mc-us3 driver deals with logical banks but for unum
         * purposes we need to use physical banks so that the correct
         * dimm can be physically located. Logical banks 0 and 2
         * make up physical bank 0. Logical banks 1 and 3 make up
         * physical bank 1. Here we do the necessary conversion.
         */
        bank = (bank % 2);

        if (dimm == -1) {
                SG_SET_FRU_NAME_NODE(new_unum, node);
                SG_SET_FRU_NAME_CPU_BOARD(new_unum, board);
                SG_SET_FRU_NAME_MODULE(new_unum, position);
                SG_SET_FRU_NAME_BANK(new_unum, bank);

        } else {
                SG_SET_FRU_NAME_NODE(new_unum, node);
                SG_SET_FRU_NAME_CPU_BOARD(new_unum, board);
                SG_SET_FRU_NAME_MODULE(new_unum, position);
                SG_SET_FRU_NAME_BANK(new_unum, bank);
                SG_SET_FRU_NAME_DIMM(new_unum, dimm);

                (void) strcat(new_unum, " ");
                (void) strcat(new_unum, unum);
        }

        (void) strcpy(unum, new_unum);
}

int
plat_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
{
        int     node = SG_PORTID_TO_NODEID(cpuid);
        int     board = SG_CPU_BD_PORTID_TO_BD_NUM(cpuid);

        if (snprintf(buf, buflen, "/N%d/%s%d", node,
            SG_HPU_TYPE_CPU_BOARD_ID, board) >= buflen) {
                return (ENOSPC);
        } else {
                *lenp = strlen(buf);
                return (0);
        }
}

static void (*sg_ecc_taskq_func)(sbbc_ecc_mbox_t *) = NULL;
static int (*sg_ecc_mbox_func)(sbbc_ecc_mbox_t *) = NULL;

/*
 * We log all ECC errors to the SC so we send a mailbox
 * message to the SC passing it the relevant data.
 * ECC mailbox messages are sent via a taskq mechanism to
 * prevent impaired system performance during ECC floods.
 * Indictments have already passed through a taskq, so they
 * are not queued here.
 */
int
plat_send_ecc_mailbox_msg(plat_ecc_message_type_t msg_type, void *datap)
{
        sbbc_ecc_mbox_t *msgp;
        uint16_t        msg_subtype;
        int             sleep_flag, log_error;
        size_t          msg_size;

        if (sg_ecc_taskq_func == NULL) {
                sg_ecc_taskq_func = (void (*)(sbbc_ecc_mbox_t *))
                    modgetsymvalue("sbbc_mbox_queue_ecc_event", 0);
                if (sg_ecc_taskq_func == NULL) {
                        cmn_err(CE_NOTE, "!plat_send_ecc_mailbox_msg: "
                            "sbbc_mbox_queue_ecc_event not found");
                        return (ENODEV);
                }
        }
        if (sg_ecc_mbox_func == NULL) {
                sg_ecc_mbox_func = (int (*)(sbbc_ecc_mbox_t *))
                    modgetsymvalue("sbbc_mbox_ecc_output", 0);
                if (sg_ecc_mbox_func == NULL) {
                        cmn_err(CE_NOTE, "!plat_send_ecc_mailbox_msg: "
                            "sbbc_mbox_ecc_output not found");
                        return (ENODEV);
                }
        }

        /*
         * Initialize the request and response structures
         */
        switch (msg_type) {
        case PLAT_ECC_ERROR_MESSAGE:
                msg_subtype = INFO_MBOX_ERROR_ECC;
                msg_size = sizeof (plat_ecc_error_data_t);
                sleep_flag = KM_NOSLEEP;
                log_error = 1;
                break;
        case PLAT_ECC_ERROR2_MESSAGE:
                msg_subtype = INFO_MBOX_ECC;
                msg_size = sizeof (plat_ecc_error2_data_t);
                sleep_flag = KM_NOSLEEP;
                log_error = 1;
                break;
        case PLAT_ECC_INDICTMENT_MESSAGE:
                msg_subtype = INFO_MBOX_ERROR_INDICT;
                msg_size = sizeof (plat_ecc_indictment_data_t);
                sleep_flag = KM_SLEEP;
                log_error = 0;
                break;
        case PLAT_ECC_INDICTMENT2_MESSAGE:
                msg_subtype = INFO_MBOX_ECC;
                msg_size = sizeof (plat_ecc_indictment2_data_t);
                sleep_flag = KM_SLEEP;
                log_error = 0;
                break;
        case PLAT_ECC_CAPABILITY_MESSAGE:
                msg_subtype = INFO_MBOX_ECC_CAP;
                msg_size = sizeof (plat_capability_data_t) +
                    strlen(utsname.release) + strlen(utsname.version) + 2;
                sleep_flag = KM_SLEEP;
                log_error = 0;
                break;
        case PLAT_ECC_DIMM_SID_MESSAGE:
                msg_subtype = INFO_MBOX_ECC;
                msg_size = sizeof (plat_dimm_sid_request_data_t);
                sleep_flag = KM_SLEEP;
                log_error = 0;
                break;
        default:
                return (EINVAL);
        }

        msgp = (sbbc_ecc_mbox_t *)kmem_zalloc(sizeof (sbbc_ecc_mbox_t),
            sleep_flag);
        if (msgp == NULL) {
                cmn_err(CE_NOTE, "!plat_send_ecc_mailbox_msg: "
                    "unable to allocate sbbc_ecc_mbox");
                return (ENOMEM);
        }

        msgp->ecc_log_error = log_error;

        msgp->ecc_req.msg_type.type = INFO_MBOX;
        msgp->ecc_req.msg_type.sub_type = msg_subtype;
        msgp->ecc_req.msg_status = 0;
        msgp->ecc_req.msg_len = (int)msg_size;
        msgp->ecc_req.msg_bytes = 0;
        msgp->ecc_req.msg_buf = (caddr_t)kmem_zalloc(msg_size, sleep_flag);
        msgp->ecc_req.msg_data[0] = 0;
        msgp->ecc_req.msg_data[1] = 0;

        if (msgp->ecc_req.msg_buf == NULL) {
                cmn_err(CE_NOTE, "!plat_send_ecc_mailbox_msg: "
                    "unable to allocate request msg_buf");
                kmem_free((void *)msgp, sizeof (sbbc_ecc_mbox_t));
                return (ENOMEM);
        }

        bcopy(datap, (void *)msgp->ecc_req.msg_buf, msg_size);

        /*
         * initialize the response back from the SC
         */
        msgp->ecc_resp.msg_type.type = INFO_MBOX;
        msgp->ecc_resp.msg_type.sub_type = msg_subtype;
        msgp->ecc_resp.msg_status = 0;
        msgp->ecc_resp.msg_len = 0;
        msgp->ecc_resp.msg_bytes = 0;
        msgp->ecc_resp.msg_buf = NULL;
        msgp->ecc_resp.msg_data[0] = 0;
        msgp->ecc_resp.msg_data[1] = 0;

        switch (msg_type) {
        case PLAT_ECC_ERROR_MESSAGE:
        case PLAT_ECC_ERROR2_MESSAGE:
                /*
                 * For Error Messages, we go through a taskq.
                 * Queue up message for processing
                 */
                (*sg_ecc_taskq_func)(msgp);
                return (0);

        case PLAT_ECC_CAPABILITY_MESSAGE:
                /*
                 * For indictment and capability messages, we've already gone
                 * through the taskq, so we can call the mailbox routine
                 * directly.  Find the symbol for the routine that sends
                 * the mailbox msg
                 */
                msgp->ecc_resp.msg_len = (int)msg_size;
                msgp->ecc_resp.msg_buf = (caddr_t)kmem_zalloc(msg_size,
                    sleep_flag);
                /* FALLTHRU */

        case PLAT_ECC_INDICTMENT_MESSAGE:
        case PLAT_ECC_INDICTMENT2_MESSAGE:
                return ((*sg_ecc_mbox_func)(msgp));

        case PLAT_ECC_DIMM_SID_MESSAGE:
                msgp->ecc_resp.msg_len = sizeof (plat_dimm_sid_board_data_t);
                msgp->ecc_resp.msg_buf = (caddr_t)kmem_zalloc(
                    sizeof (plat_dimm_sid_board_data_t), sleep_flag);

                return ((*sg_ecc_mbox_func)(msgp));

        default:
                ASSERT(0);
                return (EINVAL);
        }
}

/*
 * m is redundant on serengeti as the multiplyer is always 4
 */
/*ARGSUSED*/
int
plat_make_fru_cpuid(int sb, int m, int proc)
{
        return (MAKE_CPUID(sb, proc));
}

/*
 * board number for a given proc
 */
int
plat_make_fru_boardnum(int proc)
{
        return (SG_PORTID_TO_BOARD_NUM(proc));
}

static
void
cpu_sgn_update(ushort_t sig, uchar_t state, uchar_t sub_state, int cpuid)
{
        uint32_t signature = CPU_SIG_BLD(sig, state, sub_state);
        sig_state_t current_sgn;
        int i;

        if (iosram_write_ptr == NULL) {
                /*
                 * If the IOSRAM write pointer isn't set, we won't be able
                 * to write signatures to ANYTHING, so we may as well just
                 * write out an error message (if desired) and exit this
                 * routine now...
                 */
                DCMNERR(CE_WARN,
                    "cpu_sgn_update: iosram_write() not found;"
                    " cannot write signature 0x%x for CPU(s) or domain\n",
                    signature);
                return;
        }


        /*
         * Differentiate a panic reboot from a non-panic reboot in the
         * setting of the substate of the signature.
         *
         * If the new substate is REBOOT and we're rebooting due to a panic,
         * then set the new substate to a special value indicating a panic
         * reboot, SIGSUBST_PANIC_REBOOT.
         *
         * A panic reboot is detected by a current (previous) domain signature
         * state of SIGST_EXIT, and a new signature substate of SIGSUBST_REBOOT.
         * The domain signature state SIGST_EXIT is used as the panic flow
         * progresses.
         *
         * At the end of the panic flow, the reboot occurs but we should now
         * one that was involuntary, something that may be quite useful to know
         * at OBP level.
         */
        if (sub_state == SIGSUBST_REBOOT) {
                if (iosram_read_ptr == NULL) {
                        DCMNERR(CE_WARN,
                            "cpu_sgn_update: iosram_read() not found;"
                            " could not check current domain signature\n");
                } else {
                        (void) (*iosram_read_ptr)(SBBC_SIGBLCK_KEY,
                            SG_SGNBLK_DOMAINSIG_OFFSET,
                            (char *)&current_sgn, sizeof (current_sgn));
                        if (current_sgn.state_t.state == SIGST_EXIT)
                                signature = CPU_SIG_BLD(sig, state,
                                    SIGSUBST_PANIC_REBOOT);
                }
        }

        /*
         * cpuid == -1 indicates that the operation applies to all cpus.
         */
        if (cpuid >= 0) {
                (void) (*iosram_write_ptr)(SBBC_SIGBLCK_KEY,
                    SG_SGNBLK_CPUSIG_OFFSET(cpuid), (char *)&signature,
                    sizeof (signature));
        } else {
                for (i = 0; i < NCPU; i++) {
                        if (cpu[i] == NULL || !(cpu[i]->cpu_flags &
                            (CPU_EXISTS|CPU_QUIESCED))) {
                                continue;
                        }
                        (void) (*iosram_write_ptr)(SBBC_SIGBLCK_KEY,
                            SG_SGNBLK_CPUSIG_OFFSET(i), (char *)&signature,
                            sizeof (signature));
                }
        }

        if (state == SIGST_OFFLINE || state == SIGST_DETACHED) {
                return;
        }

        (void) (*iosram_write_ptr)(SBBC_SIGBLCK_KEY,
            SG_SGNBLK_DOMAINSIG_OFFSET, (char *)&signature,
            sizeof (signature));
}

void
startup_platform(void)
{
}

/*
 * A routine to convert a number (represented as a string) to
 * the integer value it represents.
 */

static int
isdigit(int ch)
{
        return (ch >= '0' && ch <= '9');
}

#define isspace(c)      ((c) == ' ' || (c) == '\t' || (c) == '\n')

static int
strtoi(char *p, char **pos)
{
        int n;
        int c, neg = 0;

        if (!isdigit(c = *p)) {
                while (isspace(c))
                        c = *++p;
                switch (c) {
                        case '-':
                                neg++;
                                /* FALLTHROUGH */
                        case '+':
                        c = *++p;
                }
                if (!isdigit(c)) {
                        if (pos != NULL)
                                *pos = p;
                        return (0);
                }
        }
        for (n = '0' - c; isdigit(c = *++p); ) {
                n *= 10; /* two steps to avoid unnecessary overflow */
                n += '0' - c; /* accum neg to avoid surprises at MAX */
        }
        if (pos != NULL)
                *pos = p;
        return (neg ? n : -n);
}

/*
 * Get the three parts of the Serengeti PROM version.
 * Used for feature readiness tests.
 *
 * Return 0 if version extracted successfully, -1 otherwise.
 */

int
sg_get_prom_version(int *sysp, int *intfp, int *bldp)
{
        int plen;
        char vers[512];
        static pnode_t node;
        static char version[] = "version";
        char *verp, *ep;

        node = prom_finddevice("/openprom");
        if (node == OBP_BADNODE)
                return (-1);

        plen = prom_getproplen(node, version);
        if (plen <= 0 || plen >= sizeof (vers))
                return (-1);
        (void) prom_getprop(node, version, vers);
        vers[plen] = '\0';

        /* Make sure it's an OBP flashprom */
        if (vers[0] != 'O' && vers[1] != 'B' && vers[2] != 'P') {
                cmn_err(CE_WARN, "sg_get_prom_version: "
                    "unknown <version> string in </openprom>\n");
                return (-1);
        }
        verp = &vers[4];

        *sysp = strtoi(verp, &ep);
        if (ep == verp || *ep != '.')
                return (-1);
        verp = ep + 1;

        *intfp = strtoi(verp, &ep);
        if (ep == verp || *ep != '.')
                return (-1);
        verp = ep + 1;

        *bldp = strtoi(verp, &ep);
        if (ep == verp || (*ep != '\0' && !isspace(*ep)))
                return (-1);
        return (0);
}

/*
 * Return 0 if system board Dynamic Reconfiguration
 * is supported by the firmware, -1 otherwise.
 */
int
sg_prom_sb_dr_check(void)
{
        static int prom_res = 1;

        if (prom_res == 1) {
                int sys, intf, bld;
                int rv;

                rv = sg_get_prom_version(&sys, &intf, &bld);
                if (rv == 0 && sys == 5 &&
                    (intf >= 12 || (intf == 11 && bld >= 200))) {
                        prom_res = 0;
                } else {
                        prom_res = -1;
                }
        }
        return (prom_res);
}

/*
 * Return 0 if cPCI Dynamic Reconfiguration
 * is supported by the firmware, -1 otherwise.
 */
int
sg_prom_cpci_dr_check(void)
{
        /*
         * The version check is currently the same as for
         * system boards. Since the two DR sub-systems are
         * independent, this could change.
         */
        return (sg_prom_sb_dr_check());
}

/*
 * Our implementation of this KDI op updates the CPU signature in the system
 * controller.  Note that we set the signature to OBP_SIG, rather than DBG_SIG.
 * The Forth words we execute will, among other things, transform our OBP_SIG
 * into DBG_SIG.  They won't function properly if we try to use DBG_SIG.
 */
static void
sg_system_claim(void)
{
        lbolt_debug_entry();

        prom_interpret("sigb-sig! my-sigb-sig!", OBP_SIG, OBP_SIG, 0, 0, 0);
}

static void
sg_system_release(void)
{
        prom_interpret("sigb-sig! my-sigb-sig!", OS_SIG, OS_SIG, 0, 0, 0);

        lbolt_debug_return();
}

static void
sg_console_claim(void)
{
        (void) prom_serengeti_set_console_input(SGCN_OBP_STR);
}

static void
sg_console_release(void)
{
        (void) prom_serengeti_set_console_input(SGCN_CLNT_STR);
}

void
plat_kdi_init(kdi_t *kdi)
{
        kdi->pkdi_system_claim = sg_system_claim;
        kdi->pkdi_system_release = sg_system_release;
        kdi->pkdi_console_claim = sg_console_claim;
        kdi->pkdi_console_release = sg_console_release;
}
Illumos