root/cddl/usr.sbin/zfsd/zfsd_event.cc
/*-
 * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions, and the following disclaimer,
 *    without modification.
 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
 *    substantially similar to the "NO WARRANTY" disclaimer below
 *    ("Disclaimer") and any redistribution must be conditioned upon
 *    including a substantially similar Disclaimer requirement for further
 *    binary redistribution.
 *
 * NO WARRANTY
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGES.
 *
 * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
 */

/**
 * \file zfsd_event.cc
 */
#include <sys/cdefs.h>
#include <sys/byteorder.h>
#include <sys/time.h>
#include <sys/fs/zfs.h>
#include <sys/vdev_impl.h>

#include <syslog.h>

#include <libzfs.h>
#include <libzutil.h>
/* 
 * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with
 * C++ flush methods
 */
#undef   flush
#undef  __init
#include <list>
#include <map>
#include <sstream>
#include <string>

#include <devdctl/guid.h>
#include <devdctl/event.h>
#include <devdctl/event_factory.h>
#include <devdctl/exception.h>
#include <devdctl/consumer.h>

#include "callout.h"
#include "vdev_iterator.h"
#include "zfsd_event.h"
#include "case_file.h"
#include "vdev.h"
#include "zfsd.h"
#include "zfsd_exception.h"
#include "zpool_list.h"
/*============================ Namespace Control =============================*/
using DevdCtl::Event;
using DevdCtl::Guid;
using DevdCtl::NVPairMap;
using std::stringstream;

/*=========================== Class Implementations ==========================*/

/*-------------------------------- GeomEvent --------------------------------*/

//- GeomEvent Static Public Methods -------------------------------------------
Event *
GeomEvent::Builder(Event::Type type,
                   NVPairMap &nvPairs,
                   const string &eventString)
{
        return (new GeomEvent(type, nvPairs, eventString));
}

//- GeomEvent Virtual Public Methods ------------------------------------------
Event *
GeomEvent::DeepCopy() const
{
        return (new GeomEvent(*this));
}
 
bool
GeomEvent::Process() const
{
        /*
         * We only use GEOM events to repair damaged pools.  So return early if
         * there are no damaged pools
         */
        if (CaseFile::Empty())
                return (false);

        /*
         * We are only concerned with arrivals and physical path changes,
         * because those can be used to satisfy online and autoreplace
         * operations
         */
        if (Value("type") != "GEOM::physpath" && Value("type") != "CREATE")
                return (false);

        /* Log the event since it is of interest. */
        Log(LOG_INFO);

        string devPath;
        if (!DevPath(devPath))
                return (false);

        int devFd(open(devPath.c_str(), O_RDONLY));
        if (devFd == -1)
                return (false);

        bool inUse;
        bool degraded;
        nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded));

        string physPath;
        bool havePhysPath(PhysicalPath(physPath));

        string devName;
        DevName(devName);
        close(devFd);

        if (inUse && devLabel != NULL) {
                OnlineByLabel(devPath, physPath, devLabel);
        } else if (degraded) {
                syslog(LOG_INFO, "%s is marked degraded.  Ignoring "
                       "as a replace by physical path candidate.\n",
                       devName.c_str());
        } else if (havePhysPath) {
                /* 
                 * TODO: attempt to resolve events using every casefile
                 * that matches this physpath
                 */
                CaseFile *caseFile(CaseFile::Find(physPath));
                if (caseFile != NULL) {
                        syslog(LOG_INFO,
                               "Found CaseFile(%s:%s:%s) - ReEvaluating\n",
                               caseFile->PoolGUIDString().c_str(),
                               caseFile->VdevGUIDString().c_str(),
                               zpool_state_to_name(caseFile->VdevState(),
                                                   VDEV_AUX_NONE));
                        caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL);
                }
        }
        return (false);
}

//- GeomEvent Protected Methods -----------------------------------------------
GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs,
                               const string &eventString)
 : DevdCtl::GeomEvent(type, nvpairs, eventString)
{
}

GeomEvent::GeomEvent(const GeomEvent &src)
 : DevdCtl::GeomEvent::GeomEvent(src)
{
}

nvlist_t *
GeomEvent::ReadLabel(int devFd, bool &inUse, bool &degraded)
{
        pool_state_t poolState;
        char        *poolName;
        boolean_t    b_inuse;
        int          nlabels;

        inUse    = false;
        degraded = false;
        poolName = NULL;
        if (zpool_in_use(g_zfsHandle, devFd, &poolState,
                         &poolName, &b_inuse) == 0) {
                nvlist_t *devLabel = NULL;

                inUse = b_inuse == B_TRUE;
                if (poolName != NULL)
                        free(poolName);

                if (zpool_read_label(devFd, &devLabel, &nlabels) != 0)
                        return (NULL);
                /*
                 * If we find a disk with fewer than the maximum number of
                 * labels, it might be the whole disk of a partitioned disk
                 * where ZFS resides on a partition.  In that case, we should do
                 * nothing and wait for the partition to appear.  Or, the disk
                 * might be damaged.  In that case, zfsd should do nothing and
                 * wait for the sysadmin to decide.
                 */
                if (nlabels != VDEV_LABELS || devLabel == NULL) {
                        nvlist_free(devLabel);
                        return (NULL);
                }

                try {
                        Vdev vdev(devLabel);
                        degraded = vdev.State() != VDEV_STATE_HEALTHY;
                        return (devLabel);
                } catch (ZfsdException &exp) {
                        string devName = fdevname(devFd);
                        string devPath = _PATH_DEV + devName;
                        string context("GeomEvent::ReadLabel: "
                                     + devPath + ": ");

                        exp.GetString().insert(0, context);
                        exp.Log();
                        nvlist_free(devLabel);
                }
        }
        return (NULL);
}

bool
GeomEvent::OnlineByLabel(const string &devPath, const string& physPath,
                              nvlist_t *devConfig)
{
        bool ret = false;
        try {
                CaseFileList case_list;
                /*
                 * A device with ZFS label information has been
                 * inserted.  If it matches a device for which we
                 * have a case, see if we can solve that case.
                 */
                syslog(LOG_INFO, "Interrogating VDEV label for %s\n",
                       devPath.c_str());
                Vdev vdev(devConfig);
                CaseFile::Find(vdev.PoolGUID(),vdev.GUID(), case_list);
                for (CaseFileList::iterator curr = case_list.begin();
                    curr != case_list.end(); curr++) {
                        ret |= (*curr)->ReEvaluate(devPath, physPath, &vdev);
                }
                return (ret);

        } catch (ZfsdException &exp) {
                string context("GeomEvent::OnlineByLabel: " + devPath + ": ");

                exp.GetString().insert(0, context);
                exp.Log();
        }
        return (ret);
}


/*--------------------------------- ZfsEvent ---------------------------------*/
//- ZfsEvent Static Public Methods ---------------------------------------------
DevdCtl::Event *
ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs,
                  const string &eventString)
{
        return (new ZfsEvent(type, nvpairs, eventString));
}

//- ZfsEvent Virtual Public Methods --------------------------------------------
Event *
ZfsEvent::DeepCopy() const
{
        return (new ZfsEvent(*this));
}

bool
ZfsEvent::Process() const
{
        string logstr("");

        if (!Contains("class") && !Contains("type")) {
                syslog(LOG_ERR,
                       "ZfsEvent::Process: Missing class or type data.");
                return (false);
        }

        /* On config syncs, replay any queued events first. */
        if (Value("type").find("sysevent.fs.zfs.config_sync") == 0) {
                /*
                 * Even if saved events are unconsumed the second time
                 * around, drop them.  Any events that still can't be
                 * consumed are probably referring to vdevs or pools that
                 * no longer exist.
                 */
                ZfsDaemon::Get().ReplayUnconsumedEvents(/*discard*/true);
                CaseFile::ReEvaluateByGuid(PoolGUID(), *this);
        }

        if (Value("type").find("sysevent.fs.zfs.") == 0) {
                /* Configuration changes, resilver events, etc. */
                ProcessPoolEvent();
                return (false);
        }

        if (!Contains("pool_guid") || !Contains("vdev_guid")) {
                /* Only currently interested in Vdev related events. */
                return (false);
        }

        CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID()));
        if (caseFile != NULL) {
                Log(LOG_INFO);
                syslog(LOG_INFO, "Evaluating existing case file\n");
                caseFile->ReEvaluate(*this);
                return (false);
        }

        /* Skip events that can't be handled. */
        Guid poolGUID(PoolGUID());
        /* If there are no replicas for a pool, then it's not manageable. */
        if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) {
                stringstream msg;
                msg << "No replicas available for pool "  << poolGUID;
                msg << ", ignoring";
                Log(LOG_INFO);
                syslog(LOG_INFO, "%s", msg.str().c_str());
                return (false);
        }

        /*
         * Create a case file for this vdev, and have it
         * evaluate the event.
         */
        ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
        if (zpl.empty()) {
                stringstream msg;
                int priority = LOG_INFO;
                msg << "ZfsEvent::Process: Event for unknown pool ";
                msg << poolGUID << " ";
                msg << "queued";
                Log(LOG_INFO);
                syslog(priority, "%s", msg.str().c_str());
                return (true);
        }

        nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID());
        if (vdevConfig == NULL) {
                stringstream msg;
                int priority = LOG_INFO;
                msg << "ZfsEvent::Process: Event for unknown vdev ";
                msg << VdevGUID() << " ";
                msg << "queued";
                Log(LOG_INFO);
                syslog(priority, "%s", msg.str().c_str());
                return (true);
        }

        Vdev vdev(zpl.front(), vdevConfig);
        caseFile = &CaseFile::Create(vdev);
        if (caseFile->VdevState() == VDEV_STATE_OFFLINE) {
                /*
                 * An administrator did this deliberately.  It's not considered
                 * an error that zfsd must fix.
                 */
                return (false);
        }
        if (caseFile->ReEvaluate(*this) == false) {
                stringstream msg;
                int priority = LOG_INFO;
                msg << "ZfsEvent::Process: Unconsumed event for vdev(";
                msg << zpool_get_name(zpl.front()) << ",";
                msg << vdev.GUID() << ") ";
                msg << "queued";
                Log(LOG_INFO);
                syslog(priority, "%s", msg.str().c_str());
                return (true);
        }
        return (false);
}

//- ZfsEvent Protected Methods -------------------------------------------------
ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs,
                           const string &eventString)
 : DevdCtl::ZfsEvent(type, nvpairs, eventString)
{
}

ZfsEvent::ZfsEvent(const ZfsEvent &src)
 : DevdCtl::ZfsEvent(src)
{
}

/*
 * Sometimes the kernel won't detach a spare when it is no longer needed.  This
 * can happen for example if a drive is removed, then either the pool is
 * exported or the machine is powered off, then the drive is reinserted, then
 * the machine is powered on or the pool is imported.  ZFSD must detach these
 * spares itself.
 */
void
ZfsEvent::CleanupSpares() const
{
        Guid poolGUID(PoolGUID());
        ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
        if (!zpl.empty()) {
                zpool_handle_t* hdl;

                hdl = zpl.front();
                VdevIterator(hdl).Each(TryDetach, (void*)hdl);
        }
}

void
ZfsEvent::ProcessPoolEvent() const
{
        bool degradedDevice(false);

        /* The pool is destroyed.  Discard any open cases */
        if (Value("type") == "sysevent.fs.zfs.pool_destroy") {
                Log(LOG_INFO);
                CaseFile::ReEvaluateByGuid(PoolGUID(), *this);
                return;
        }

        CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID()));
        if (caseFile != NULL) {
                if (caseFile->VdevState() != VDEV_STATE_UNKNOWN
                 && caseFile->VdevState() < VDEV_STATE_HEALTHY)
                        degradedDevice = true;

                Log(LOG_INFO);
                caseFile->ReEvaluate(*this);
        }
        else if (Value("type") == "sysevent.fs.zfs.resilver_finish")
        {
                /*
                 * It's possible to get a resilver_finish event with no
                 * corresponding casefile.  For example, if a damaged pool were
                 * exported, repaired, then reimported.
                 */
                Log(LOG_INFO);
                CleanupSpares();
        }

        if (Value("type") == "sysevent.fs.zfs.vdev_remove"
         && degradedDevice == false) {

                /* See if any other cases can make use of this device. */
                Log(LOG_INFO);
                ZfsDaemon::RequestSystemRescan();
        }
}

bool
ZfsEvent::TryDetach(Vdev &vdev, void *cbArg)
{
        /*
         * Outline:
         * if this device is a spare, and its parent includes one healthy,
         * non-spare child, then detach this device.
         */
        zpool_handle_t *hdl(static_cast<zpool_handle_t*>(cbArg));

        if (vdev.IsSpare()) {
                std::list<Vdev> siblings;
                std::list<Vdev>::iterator siblings_it;
                boolean_t cleanup = B_FALSE;

                Vdev parent = vdev.Parent();
                siblings = parent.Children();

                /* Determine whether the parent should be cleaned up */
                for (siblings_it = siblings.begin();
                     siblings_it != siblings.end();
                     siblings_it++) {
                        Vdev sibling = *siblings_it;

                        if (!sibling.IsSpare() &&
                             sibling.State() == VDEV_STATE_HEALTHY) {
                                cleanup = B_TRUE;
                                break;
                        }
                }

                if (cleanup) {
                        syslog(LOG_INFO, "Detaching spare vdev %s from pool %s",
                               vdev.Path().c_str(), zpool_get_name(hdl));
                        zpool_vdev_detach(hdl, vdev.Path().c_str());
                }

        }

        /* Always return false, because there may be other spares to detach */
        return (false);
}