root/usr/src/uts/sun4/sys/async.h
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#ifndef _SYS_ASYNC_H
#define _SYS_ASYNC_H

#pragma ident   "%Z%%M% %I%     %E% SMI"

#include <sys/privregs.h>

#ifdef  __cplusplus
extern "C" {
#endif

#ifndef _ASM

#include <sys/errorq.h>

/*
 * The async_flt structure is used to record all pertinent information about
 * an asynchronous CPU or bus-related memory error.  Typically, the structure
 * is initialized by a high-level interrupt or trap handler, and then enqueued
 * for later processing.  Separate queues are maintained for correctable and
 * uncorrectable errors.  The current CPU module determines the size of the
 * queue elements, so that it may declare a CPU-specific fault structure
 * which contains a struct async_flt as its first member.  Each async_flt also
 * contains a callback function (flt_func) that is invoked by the processing
 * code in order to actually log messages when the event is dequeued.  This
 * function may be called from a softint, from trap() as part of AST handling
 * before the victim thread returns to userland, or as part of panic().  As
 * such, the flt_func should basically only be calling cmn_err (but NOT with
 * the CE_PANIC flag).  It must not call panic(), acquire locks, or block.
 * The owner of the event is responsible for determining whether the event is
 * fatal; if so, the owner should set flt_panic and panic() after enqueuing
 * the event.  The event will then be dequeued and logged as part of panic
 * processing.  If flt_panic is not set, the queue function will schedule a
 * soft interrupt to process the event.
 */

struct async_flt;
typedef void (*async_func_t)(struct async_flt *, char *);

struct async_flt {
        uint64_t        flt_id;         /* gethrtime() at time of fault */
        uint64_t        flt_stat;       /* async fault status register */
        uint64_t        flt_addr;       /* async fault address register */
        caddr_t         flt_pc;         /* program counter from error trap */
        async_func_t    flt_func;       /* logging function */
        uint_t          flt_bus_id;     /* hardware bus id# of cpu/sbus/pci */
        uint_t          flt_inst;       /* software instance of cpu/sbus/pci */
        ushort_t        flt_status;     /* error information */
        ushort_t        flt_synd;       /* ECC syndrome */
        uchar_t         flt_in_memory;  /* fault occurred in memory if != 0 */
        uchar_t         flt_class;      /* fault class (cpu or bus) */
        uchar_t         flt_prot;       /* type of fault protection (if any) */
        uchar_t         flt_priv;       /* fault occurred in kernel if != 0 */
        uchar_t         flt_panic;      /* fault caused owner to panic() */
        uchar_t         flt_tl;         /* fault occurred at TL > 0 */
        uchar_t         flt_core;       /* fault occurred during core() dump */
        uchar_t         flt_pad;        /* reserved for future use */
        uint64_t        flt_disp;       /* error disposition information */
        uint64_t        flt_payload;    /* ereport payload information */
        char            *flt_erpt_class; /* ereport class string */
};

/*
 * Bus nexus drivers can use the bus_func_register() interface to register
 * callback functions for error handling and panic handling.  The handler
 * functions should be registered and unregistered from driver attach and
 * detach context, where it is safe to perform a sleeping allocation.  The
 * callbacks themselves can be invoked from panic, or from the CPU module's
 * asynchronous trap handler at high PIL.  As such, these routines may only
 * test for errors and enqueue async_flt events.  They may not grab adaptive
 * locks, call panic(), or invoke bus_func_register() or bus_func_unregister().
 * Each callback function should return one of the BF_* return status values
 * below.  The bus_func_invoke() function calls all the registered handlers of
 * the specified type, and returns the maximum of their return values (e.g.
 * BF_FATAL if any callback returned BF_FATAL).  If any callback returns
 * BF_FATAL, the system will panic at the end of callback processing.
 */

typedef uint_t (*busfunc_t)(void *);

#define BF_TYPE_UE              1       /* check for uncorrectable errors */
#define BF_TYPE_ERRDIS          2       /* disable error detection */
#define BF_TYPE_RESINTR         3       /* reset interrupts */

#define BF_NONE                 0       /* no errors were detected */
#define BF_NONFATAL             1       /* one or more non-fatal errors found */
#define BF_FATAL                2       /* one or more fatal errors found */

typedef struct bus_func_desc {
        int bf_type;                    /* type of function (see above) */
        busfunc_t bf_func;              /* function to call */
        void *bf_arg;                   /* function argument */
        struct bus_func_desc *bf_next;  /* pointer to next registered desc */
} bus_func_desc_t;

extern void bus_func_register(int, busfunc_t, void *);
extern void bus_func_unregister(int, busfunc_t, void *);
extern void bus_async_log_err(struct async_flt *);
extern uint_t bus_func_invoke(int);

extern void ecc_cpu_call(struct async_flt *, char *, int);

extern void ce_scrub(struct async_flt *);
extern void ecc_page_zero(void *);

extern void error_init(void);

extern  int     ce_verbose_memory;
extern  int     ce_verbose_other;
extern  int     ce_show_data;
extern  int     ce_debug;
extern  int     ue_debug;

extern  int     aft_verbose;
extern  int     aft_panic;
extern  int     aft_testfatal;

extern struct async_flt panic_aflt;

extern errorq_t *ce_queue;
extern errorq_t *ue_queue;

#endif  /* !_ASM */

/*
 * ECC or parity error status for async_flt.flt_status.
 */
#define ECC_C_TRAP              0x0001  /* Trap 0x63 Corrected ECC Error */
#define ECC_I_TRAP              0x0002  /* Trap 0x0A Instr Access Error */
#define ECC_ECACHE              0x0004  /* Ecache ECC Error */
#define ECC_IOBUS               0x0008  /* Pci or sysio ECC Error */
#define ECC_INTERMITTENT        0x0010  /* Intermittent ECC Error */
#define ECC_PERSISTENT          0x0020  /* Persistent ECC Error */
#define ECC_STICKY              0x0040  /* Sticky ECC Error */
#define ECC_D_TRAP              0x0080  /* Trap 0x32 Data Access Error */
#define ECC_F_TRAP              0x0100  /* Cheetah Trap 0x70 Fast ECC Error */
#define ECC_DP_TRAP             0x0200  /* Cheetah+ Trap 0x71 D$ Parity Error */
#define ECC_IP_TRAP             0x0400  /* Cheetah+ Trap 0x72 I$ Parity Error */
#define ECC_ITLB_TRAP           0x0800  /* Panther ITLB Parity Error */
#define ECC_DTLB_TRAP           0x1000  /* Panther DTLB Parity Error */
#define ECC_IO_CE               0x2000  /* Pci or sysio CE */
#define ECC_IO_UE               0x4000  /* Pci or sysio UE */

/*
 * Trap type numbers corresponding to the fault types defined above.
 */
#define TRAP_TYPE_ECC_I         0x0A
#define TRAP_TYPE_ECC_D         0x32
#define TRAP_TYPE_ECC_F         0x70
#define TRAP_TYPE_ECC_C         0x63
#define TRAP_TYPE_ECC_DP        0x71
#define TRAP_TYPE_ECC_IP        0x72
#define TRAP_TYPE_ECC_ITLB      0x08
#define TRAP_TYPE_ECC_DTLB      0x30
#define TRAP_TYPE_UNKNOWN       0

/*
 * Fault classes for async_flt.flt_class.
 */
#define BUS_FAULT               0       /* originating from bus drivers */
#define CPU_FAULT               1       /* originating from CPUs */
#define RECIRC_BUS_FAULT        2       /* scheduled diagnostic */
#define RECIRC_CPU_FAULT        3       /* scheduled diagnostic */

/*
 * Invalid or unknown physical address for async_flt.flt_addr.
 */
#define AFLT_INV_ADDR   (-1ULL)

/*
 * Fault protection values for async_flt.flt_prot.  The async error handling
 * code may be able to recover from errors when kernel code has explicitly
 * protected itself using one of the mechanisms specified here.
 */
#define AFLT_PROT_NONE          0       /* no protection active */
#define AFLT_PROT_ACCESS        1       /* on_trap OT_DATA_ACCESS protection */
#define AFLT_PROT_EC            2       /* on_trap OT_DATA_EC protection */
#define AFLT_PROT_COPY          3       /* t_lofault protection (ucopy, etc.) */

/*
 * These flags are used to indicate the validity of certain data based on
 * the various overwrite priority features of the AFSR/AFAR:
 * AFAR, ESYND and MSYND, each of which have different overwrite priorities.
 *
 * Given a specific afsr error bit and the entire afsr, there are three cases:
 *   INVALID:   The specified bit is lower overwrite priority than some other
 *              error bit which is on in the afsr (or IVU/IVC).
 *   VALID:     The specified bit is higher priority than all other error bits
 *              which are on in the afsr.
 *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
 *              bit is on in the afsr.
 *
 * NB: The domain-to-SC communications depend on these values. If they are
 * changed, plat_ecc_unum.[ch] must be updated to match.
 */
#define AFLT_STAT_INVALID       0       /* higher priority afsr bit is on */
#define AFLT_STAT_VALID         1       /* this is highest priority afsr bit */
#define AFLT_STAT_AMBIGUOUS     2       /* two afsr bits of equal priority */

/*
 * Maximum length of unum string.
 */
#define UNUM_NAMLEN     60

/*
 * Maximum length of a DIMM serial id string + null
 */
#define DIMM_SERIAL_ID_LEN      16

#ifdef  __cplusplus
}
#endif

#endif  /* _SYS_ASYNC_H */