root/usr/src/uts/common/os/retire_store.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
#include <sys/ddi_impldefs.h>
#include <sys/ddi_implfuncs.h>
#include <sys/list.h>
#include <sys/reboot.h>
#include <sys/sysmacros.h>
#include <sys/console.h>
#include <sys/devcache.h>

/*
 * The nvpair name in the I/O retire specific sub-nvlist
 */
#define RIO_STORE_VERSION_STR   "rio-store-version"
#define RIO_STORE_MAGIC_STR     "rio-store-magic"
#define RIO_STORE_FLAGS_STR     "rio-store-flags"

#define RIO_STORE_VERSION_1     1
#define RIO_STORE_VERSION       RIO_STORE_VERSION_1

/*
 * decoded retire list element
 */

typedef enum rio_store_flags {
        RIO_STORE_F_INVAL = 0,
        RIO_STORE_F_RETIRED = 1,
        RIO_STORE_F_BYPASS = 2
} rio_store_flags_t;

typedef struct rio_store {
        char                    *rst_devpath;
        rio_store_flags_t       rst_flags;
        list_node_t             rst_next;
} rio_store_t;

#define RIO_STORE_MAGIC         0x601fcace      /* retire */

static int rio_store_decode(nvf_handle_t nvfh, nvlist_t *line_nvl, char *name);
static int rio_store_encode(nvf_handle_t nvfh, nvlist_t **ret_nvl);
static void retire_list_free(nvf_handle_t  nvfh);


/*
 * Retire I/O persistent store registration info
 */
static nvf_ops_t rio_store_ops = {
        "/etc/devices/retire_store",    /* path to store */
        rio_store_decode,               /* decode nvlist into retire_list */
        rio_store_encode,               /* encode retire_list into nvlist */
        retire_list_free,               /* free retire_list */
        NULL                            /* write complete callback */
};

static nvf_handle_t     rio_store_handle;
static char             store_path[MAXPATHLEN];
static int              store_debug = 0;
static int              bypass_msg = 0;
static int              retire_msg = 0;

#define STORE_DEBUG     0x0001
#define STORE_TRACE     0x0002

#define STORE_DBG(args)         if (store_debug & STORE_DEBUG)  cmn_err args
#define STORE_TRC(args)         if (store_debug & STORE_TRACE)  cmn_err args

/*
 * We don't use the simple read disable offered by the
 * caching framework (see devcache.c) as it will not
 * have the desired effect of bypassing the persistent
 * store. A simple read disable will
 *
 *      1. cause any additions to the cache to destroy the
 *         existing on-disk cache
 *
 *      2. prevent deletions from the existing on-disk
 *         cache which is needed for recovery from bad
 *         retire decisions.
 *
 * Use the following tunable instead
 *
 */
int     ddi_retire_store_bypass = 0;



/*
 * Initialize retire store data structures
 */
void
retire_store_init(void)
{
        if (boothowto & RB_ASKNAME) {

                printf("Retire store [%s] (/dev/null to bypass): ",
                    rio_store_ops.nvfr_cache_path);
                console_gets(store_path, sizeof (store_path) - 1);
                store_path[sizeof (store_path) - 1] = '\0';

                if (strcmp(store_path, "/dev/null") == 0) {
                        ddi_retire_store_bypass = 1;
                } else if (store_path[0] != '\0') {
                        if (store_path[0] != '/') {
                                printf("Invalid store path: %s. Using default"
                                    "\n", store_path);
                        } else {
                                rio_store_ops.nvfr_cache_path = store_path;
                        }
                }
        }

        rio_store_handle = nvf_register_file(&rio_store_ops);

        list_create(nvf_list(rio_store_handle), sizeof (rio_store_t),
            offsetof(rio_store_t, rst_next));
}

/*
 * Read and populate the in-core retire store
 */
void
retire_store_read(void)
{
        rw_enter(nvf_lock(rio_store_handle), RW_WRITER);
        ASSERT(list_head(nvf_list(rio_store_handle)) == NULL);
        (void) nvf_read_file(rio_store_handle);
        rw_exit(nvf_lock(rio_store_handle));
        STORE_DBG((CE_NOTE, "Read on-disk retire store"));
}

static void
rio_store_free(rio_store_t *rsp)
{
        int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;

        ASSERT(rsp);
        ASSERT(rsp->rst_devpath);
        ASSERT(rsp->rst_flags & RIO_STORE_F_RETIRED);
        ASSERT(!(rsp->rst_flags & ~flag_mask));

        STORE_TRC((CE_NOTE, "store: freed path: %s", rsp->rst_devpath));

        kmem_free(rsp->rst_devpath, strlen(rsp->rst_devpath) + 1);
        kmem_free(rsp, sizeof (*rsp));
}

static void
retire_list_free(nvf_handle_t  nvfh)
{
        list_t          *listp;
        rio_store_t     *rsp;

        ASSERT(nvfh == rio_store_handle);
        ASSERT(RW_WRITE_HELD(nvf_lock(nvfh)));

        listp = nvf_list(nvfh);
        while (rsp = list_head(listp)) {
                list_remove(listp, rsp);
                rio_store_free(rsp);
        }

        STORE_DBG((CE_NOTE, "store: freed retire list"));
}

static int
rio_store_decode(nvf_handle_t nvfh, nvlist_t *line_nvl, char *name)
{
        rio_store_t     *rsp;
        int32_t         version;
        int32_t         magic;
        int32_t         flags;
        int             rval;

        ASSERT(nvfh == rio_store_handle);
        ASSERT(RW_WRITE_HELD(nvf_lock(nvfh)));
        ASSERT(name);

        version = 0;
        rval = nvlist_lookup_int32(line_nvl, RIO_STORE_VERSION_STR, &version);
        if (rval != 0 || version != RIO_STORE_VERSION) {
                return (EINVAL);
        }

        magic = 0;
        rval = nvlist_lookup_int32(line_nvl, RIO_STORE_MAGIC_STR, &magic);
        if (rval != 0 || magic != RIO_STORE_MAGIC) {
                return (EINVAL);
        }

        flags = 0;
        rval = nvlist_lookup_int32(line_nvl, RIO_STORE_FLAGS_STR, &flags);
        if (rval != 0 || flags != RIO_STORE_F_RETIRED) {
                return (EINVAL);
        }

        if (ddi_retire_store_bypass) {
                flags |= RIO_STORE_F_BYPASS;
                if (!bypass_msg) {
                        bypass_msg = 1;
                        cmn_err(CE_WARN,
                            "Bypassing retire store /etc/devices/retire_store");
                }
        }

        rsp = kmem_zalloc(sizeof (rio_store_t), KM_SLEEP);
        rsp->rst_devpath = i_ddi_strdup(name, KM_SLEEP);
        rsp->rst_flags = flags;
        list_insert_tail(nvf_list(nvfh), rsp);

        STORE_TRC((CE_NOTE, "store: added to retire list: %s", name));
        if (!retire_msg) {
                retire_msg = 1;
                cmn_err(CE_NOTE, "One or more I/O devices have been retired");
        }

        return (0);
}

static int
rio_store_encode(nvf_handle_t nvfh, nvlist_t **ret_nvl)
{
        nvlist_t        *nvl;
        nvlist_t        *line_nvl;
        list_t          *listp;
        rio_store_t     *rsp;
        int             rval;

        ASSERT(nvfh == rio_store_handle);
        ASSERT(RW_WRITE_HELD(nvf_lock(nvfh)));

        *ret_nvl = NULL;

        nvl = NULL;
        rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
        if (rval != 0) {
                return (DDI_FAILURE);
        }

        listp = nvf_list(nvfh);
        for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) {
                int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
                int flags;
                ASSERT(rsp->rst_devpath);
                ASSERT(!(rsp->rst_flags & ~flag_mask));

                line_nvl = NULL;
                rval = nvlist_alloc(&line_nvl, NV_UNIQUE_NAME, KM_SLEEP);
                if (rval != 0) {
                        line_nvl = NULL;
                        goto error;
                }

                rval = nvlist_add_int32(line_nvl, RIO_STORE_VERSION_STR,
                        RIO_STORE_VERSION);
                if (rval != 0) {
                        goto error;
                }
                rval = nvlist_add_int32(line_nvl, RIO_STORE_MAGIC_STR,
                        RIO_STORE_MAGIC);
                if (rval != 0) {
                        goto error;
                }

                /* don't save the bypass flag */
                flags = RIO_STORE_F_RETIRED;
                rval = nvlist_add_int32(line_nvl, RIO_STORE_FLAGS_STR,
                        flags);
                if (rval != 0) {
                        goto error;
                }

                rval = nvlist_add_nvlist(nvl, rsp->rst_devpath, line_nvl);
                if (rval != 0) {
                        goto error;
                }
                nvlist_free(line_nvl);
                line_nvl = NULL;
        }

        *ret_nvl = nvl;
        STORE_DBG((CE_NOTE, "packed retire list into nvlist"));
        return (DDI_SUCCESS);

error:
        nvlist_free(line_nvl);
        ASSERT(nvl);
        nvlist_free(nvl);
        return (DDI_FAILURE);
}

int
e_ddi_retire_persist(char *devpath)
{
        rio_store_t     *rsp;
        rio_store_t     *new_rsp;
        list_t          *listp;
        char            *new_path;

        STORE_DBG((CE_NOTE, "e_ddi_retire_persist: entered: %s", devpath));

        new_rsp = kmem_zalloc(sizeof (*new_rsp), KM_SLEEP);
        new_rsp->rst_devpath = new_path = i_ddi_strdup(devpath, KM_SLEEP);
        new_rsp->rst_flags = RIO_STORE_F_RETIRED;

        rw_enter(nvf_lock(rio_store_handle), RW_WRITER);

        listp = nvf_list(rio_store_handle);
        for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) {
                int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
                ASSERT(!(rsp->rst_flags & ~flag_mask));

                /* already there */
                if (strcmp(devpath, rsp->rst_devpath) == 0) {
                        /* explicit retire, clear bypass flag (if any) */
                        rsp->rst_flags &= ~RIO_STORE_F_BYPASS;
                        ASSERT(rsp->rst_flags == RIO_STORE_F_RETIRED);
                        rw_exit(nvf_lock(rio_store_handle));
                        kmem_free(new_path, strlen(new_path) + 1);
                        kmem_free(new_rsp, sizeof (*new_rsp));
                        STORE_DBG((CE_NOTE, "store: already in. Clear bypass "
                            ": %s", devpath));
                        return (0);
                }

        }

        ASSERT(rsp == NULL);
        list_insert_tail(listp, new_rsp);

        nvf_mark_dirty(rio_store_handle);

        rw_exit(nvf_lock(rio_store_handle));

        nvf_wake_daemon();

        STORE_DBG((CE_NOTE, "store: New, added to list, dirty: %s", devpath));

        return (0);
}

int
e_ddi_retire_unpersist(char *devpath)
{
        rio_store_t     *rsp;
        rio_store_t     *next;
        list_t          *listp;
        int             is_dirty = 0;

        STORE_DBG((CE_NOTE, "e_ddi_retire_unpersist: entered: %s", devpath));

        rw_enter(nvf_lock(rio_store_handle), RW_WRITER);

        listp = nvf_list(rio_store_handle);
        for (rsp = list_head(listp); rsp; rsp = next) {
                next = list_next(listp, rsp);
                if (strcmp(devpath, rsp->rst_devpath) != 0)
                        continue;

                list_remove(listp, rsp);
                rio_store_free(rsp);

                STORE_DBG((CE_NOTE, "store: found in list. Freed: %s",
                    devpath));

                nvf_mark_dirty(rio_store_handle);
                is_dirty = 1;
        }

        rw_exit(nvf_lock(rio_store_handle));

        if (is_dirty)
                nvf_wake_daemon();

        return (is_dirty);
}

int
e_ddi_device_retired(char *devpath)
{
        list_t          *listp;
        rio_store_t     *rsp;
        size_t          len;
        int             retired;

        retired = 0;

        rw_enter(nvf_lock(rio_store_handle), RW_READER);

        listp = nvf_list(rio_store_handle);
        for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) {
                int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
                ASSERT(!(rsp->rst_flags & ~flag_mask));

                /*
                 * If the "bypass" flag is set, then the device
                 * is *not* retired for the current boot of the
                 * system. It indicates that the retire store
                 * was read but the devices in the retire store
                 * were not retired i.e. effectively the store
                 * was bypassed. For why we bother to even read
                 * the store when we bypass it, see the comments
                 * for the tunable ddi_retire_store_bypass.
                 */
                if (rsp->rst_flags & RIO_STORE_F_BYPASS) {
                        STORE_TRC((CE_NOTE, "store: found & bypassed: %s",
                            rsp->rst_devpath));
                        continue;
                }

                /*
                 * device is retired, if it or a parent exists
                 * in the in-core list
                 */
                len = strlen(rsp->rst_devpath);
                if (strncmp(devpath, rsp->rst_devpath, len) != 0)
                        continue;
                if (devpath[len] == '\0' || devpath[len] == '/') {
                        /* exact match or a child */
                        retired = 1;
                        STORE_TRC((CE_NOTE, "store: found & !bypassed: %s",
                            devpath));
                        break;
                }
        }
        rw_exit(nvf_lock(rio_store_handle));

        return (retired);
}