root/usr/src/cmd/cmd-inet/usr.lib/vrrpd/vrrpd.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 */

/*
 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
 */

#include <sys/types.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysevent/vrrp.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/varargs.h>
#include <auth_attr.h>
#include <ctype.h>
#include <fcntl.h>
#include <stdlib.h>
#include <strings.h>
#include <errno.h>
#include <unistd.h>
#include <zone.h>
#include <libsysevent.h>
#include <limits.h>
#include <locale.h>
#include <arpa/inet.h>
#include <signal.h>
#include <assert.h>
#include <ucred.h>
#include <bsm/adt.h>
#include <bsm/adt_event.h>
#include <priv_utils.h>
#include <libdllink.h>
#include <libdlvnic.h>
#include <libipadm.h>
#include <pwd.h>
#include <libvrrpadm.h>
#include <net/route.h>
#include "vrrpd_impl.h"

/*
 * A VRRP router can be only start participating the VRRP protocol of a virtual
 * router when all the following conditions are met:
 *
 * - The VRRP router is enabled (vr->vvr_conf.vvc_enabled is _B_TRUE)
 * - The RX socket is successfully created over the physical interface to
 *   receive the VRRP multicast advertisement. Note that one RX socket can
 *   be shared by several VRRP routers configured over the same physical
 *   interface. (See vrrpd_init_rxsock())
 * - The TX socket is successfully created over the VNIC interface to send
 *   the VRRP advertisment. (See vrrpd_init_txsock())
 * - The primary IP address has been successfully selected over the physical
 *   interface. (See vrrpd_select_primary())
 *
 * If a VRRP router is enabled but the other conditions haven't be satisfied,
 * the router will be stay at the VRRP_STATE_INIT state. If all the above
 * conditions are met, the VRRP router will be transit to either
 * the VRRP_STATE_MASTER or the VRRP_STATE_BACKUP state, depends on the VRRP
 * protocol.
 */

#define skip_whitespace(p)      while (isspace(*(p))) ++(p)

#define BUFFSIZE        65536

#define VRRPCONF        "/etc/inet/vrrp.conf"

typedef struct vrrpd_rtsock_s {
        int             vrt_af;         /* address family */
        int             vrt_fd;         /* socket for the PF_ROUTE msg */
        iu_event_id_t   vrt_eid;        /* event ID */
} vrrpd_rtsock_t;

static ipadm_handle_t   vrrp_ipadm_handle = NULL;       /* libipadm handle */
static int              vrrp_logflag = 0;
boolean_t               vrrp_debug_level = 0;
iu_eh_t                 *vrrpd_eh = NULL;
iu_tq_t                 *vrrpd_timerq = NULL;
static vrrp_handle_t    vrrpd_vh = NULL;
static int              vrrpd_cmdsock_fd = -1;  /* socket to communicate */
                                                /* between vrrpd/libvrrpadm */
static iu_event_id_t    vrrpd_cmdsock_eid = -1;
static int              vrrpd_ctlsock_fd = -1;  /* socket to bring up/down */
                                                /* the virtual IP addresses */
static int              vrrpd_ctlsock6_fd = -1;
static vrrpd_rtsock_t   vrrpd_rtsocks[2] = {
        {AF_INET, -1, -1},
        {AF_INET6, -1, -1}
};
static iu_timer_id_t    vrrp_scan_timer_id = -1;

TAILQ_HEAD(vrrp_vr_list_s, vrrp_vr_s);
TAILQ_HEAD(vrrp_intf_list_s, vrrp_intf_s);
static struct vrrp_vr_list_s    vrrp_vr_list;
static struct vrrp_intf_list_s  vrrp_intf_list;
static char             vrrpd_conffile[MAXPATHLEN];

/*
 * Multicast address of VRRP advertisement in network byte order
 */
static vrrp_addr_t      vrrp_muladdr4;
static vrrp_addr_t      vrrp_muladdr6;

static int              vrrpd_scan_interval = 20000;    /* ms */
static int              pfds[2];

/*
 * macros to calculate skew_time and master_down_timer
 *
 * Note that the input is in centisecs and output are in msecs
 */
#define SKEW_TIME(pri, intv)    ((intv) * (256 - (pri)) / 256)
#define MASTER_DOWN_INTERVAL(pri, intv) (3 * (intv) + SKEW_TIME((pri), (intv)))

#define SKEW_TIME_VR(vr)        \
        SKEW_TIME((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
#define MASTER_DOWN_INTERVAL_VR(vr)     \
        MASTER_DOWN_INTERVAL((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)

#define VRRP_CONF_UPDATE        0x01
#define VRRP_CONF_DELETE        0x02

static char *af_str(int);

static iu_tq_callback_t vrrp_adv_timeout;
static iu_tq_callback_t vrrp_b2m_timeout;
static iu_eh_callback_t vrrpd_sock_handler;
static iu_eh_callback_t vrrpd_rtsock_handler;
static iu_eh_callback_t vrrpd_cmdsock_handler;

static int daemon_init();

static vrrp_err_t vrrpd_init();
static void vrrpd_fini();
static vrrp_err_t vrrpd_cmdsock_create();
static void vrrpd_cmdsock_destroy();
static vrrp_err_t vrrpd_rtsock_create();
static void vrrpd_rtsock_destroy();
static vrrp_err_t vrrpd_ctlsock_create();
static void vrrpd_ctlsock_destroy();

static void vrrpd_scan_timer(iu_tq_t *, void *);
static void vrrpd_scan(int);
static vrrp_err_t vrrpd_init_rxsock(vrrp_vr_t *);
static void vrrpd_fini_rxsock(vrrp_vr_t *);
static vrrp_err_t vrrpd_init_txsock(vrrp_vr_t *);
static vrrp_err_t vrrpd_init_txsock_v4(vrrp_vr_t *);
static vrrp_err_t vrrpd_init_txsock_v6(vrrp_vr_t *);
static void vrrpd_fini_txsock(vrrp_vr_t *);

static vrrp_err_t vrrpd_create_vr(vrrp_vr_conf_t *);
static vrrp_err_t vrrpd_enable_vr(vrrp_vr_t *);
static void vrrpd_disable_vr(vrrp_vr_t *, vrrp_intf_t *, boolean_t);
static void vrrpd_delete_vr(vrrp_vr_t *);

static vrrp_err_t vrrpd_create(vrrp_vr_conf_t *, boolean_t);
static vrrp_err_t vrrpd_delete(const char *);
static vrrp_err_t vrrpd_enable(const char *, boolean_t);
static vrrp_err_t vrrpd_disable(const char *);
static vrrp_err_t vrrpd_modify(vrrp_vr_conf_t *, uint32_t);
static void vrrpd_list(vrid_t, char *, int, vrrp_ret_list_t *, size_t *);
static void vrrpd_query(const char *, vrrp_ret_query_t *, size_t *);

static boolean_t vrrp_rd_prop_name(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_vrid(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_af(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_pri(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_adver_int(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_preempt(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_accept(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_ifname(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_enabled(vrrp_vr_conf_t *, const char *);
static int vrrp_wt_prop_name(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_vrid(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_af(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_pri(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_adver_int(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_preempt(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_accept(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_ifname(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_enabled(vrrp_vr_conf_t *, char *, size_t);

static void vrrpd_cmd_create(void *, void *, size_t *);
static void vrrpd_cmd_delete(void *, void *, size_t *);
static void vrrpd_cmd_enable(void *, void *, size_t *);
static void vrrpd_cmd_disable(void *, void *, size_t *);
static void vrrpd_cmd_modify(void *, void *, size_t *);
static void vrrpd_cmd_list(void *, void *, size_t *);
static void vrrpd_cmd_query(void *, void *, size_t *);

static vrrp_vr_t *vrrpd_lookup_vr_by_vrid(char *, vrid_t vrid_t, int);
static vrrp_vr_t *vrrpd_lookup_vr_by_name(const char *);
static vrrp_intf_t *vrrpd_lookup_if(const char *, int);
static vrrp_err_t vrrpd_create_if(const char *, int, uint32_t, vrrp_intf_t **);
static void vrrpd_delete_if(vrrp_intf_t *, boolean_t);
static vrrp_err_t vrrpd_create_ip(vrrp_intf_t *, const char *, vrrp_addr_t *,
    uint64_t flags);
static void vrrpd_delete_ip(vrrp_intf_t *, vrrp_ip_t *);

static void vrrpd_init_ipcache(int);
static void vrrpd_update_ipcache(int);
static ipadm_status_t vrrpd_walk_addr_info(int);
static vrrp_err_t vrrpd_add_ipaddr(char *, int, vrrp_addr_t *,
    int, uint64_t);
static vrrp_ip_t *vrrpd_select_primary(vrrp_intf_t *);
static void vrrpd_reselect_primary(vrrp_intf_t *);
static void vrrpd_reenable_all_vr();
static void vrrpd_remove_if(vrrp_intf_t *, boolean_t);

static uint16_t in_cksum(int, uint16_t, void *);
static uint16_t vrrp_cksum4(struct in_addr *, struct in_addr *,
    uint16_t, vrrp_pkt_t *);
static uint16_t vrrp_cksum6(struct in6_addr *, struct in6_addr *,
    uint16_t, vrrp_pkt_t *);
static size_t vrrpd_build_vrrp(vrrp_vr_t *, uchar_t *, int, boolean_t);

static void vrrpd_process_adv(vrrp_vr_t *, vrrp_addr_t *, vrrp_pkt_t *);
static vrrp_err_t vrrpd_send_adv(vrrp_vr_t *, boolean_t);

/* state transition functions */
static vrrp_err_t vrrpd_state_i2m(vrrp_vr_t *);
static vrrp_err_t vrrpd_state_i2b(vrrp_vr_t *);
static void vrrpd_state_m2i(vrrp_vr_t *);
static void vrrpd_state_b2i(vrrp_vr_t *);
static vrrp_err_t vrrpd_state_b2m(vrrp_vr_t *);
static vrrp_err_t vrrpd_state_m2b(vrrp_vr_t *);
static void vrrpd_state_trans(vrrp_state_t, vrrp_state_t, vrrp_vr_t *);

static vrrp_err_t vrrpd_set_noaccept(vrrp_vr_t *, boolean_t);
static vrrp_err_t vrrpd_virtualip_update(vrrp_vr_t *, boolean_t);
static vrrp_err_t vrrpd_virtualip_updateone(vrrp_intf_t *, vrrp_ip_t *,
    boolean_t);
static int vrrpd_post_event(const char *, vrrp_state_t, vrrp_state_t);

static void vrrpd_initconf();
static vrrp_err_t vrrpd_updateconf(vrrp_vr_conf_t *, uint_t);
static vrrp_err_t vrrpd_write_vrconf(char *, size_t, vrrp_vr_conf_t *);
static vrrp_err_t vrrpd_read_vrconf(char *, vrrp_vr_conf_t *);
static vrrp_err_t vrrpd_readprop(const char *, vrrp_vr_conf_t *);
static void vrrpd_cleanup();

static void vrrp_log(int, char *, ...);
static int timeval_to_milli(struct timeval);
static struct timeval timeval_delta(struct timeval, struct timeval);

typedef struct vrrpd_prop_s {
        char            *vs_propname;
        boolean_t       (*vs_propread)(vrrp_vr_conf_t *, const char *);
        int             (*vs_propwrite)(vrrp_vr_conf_t *, char *, size_t);
} vrrp_prop_t;

/*
 * persistent VRRP properties array
 */
static vrrp_prop_t vrrp_prop_info_tbl[] = {
        {"name", vrrp_rd_prop_name, vrrp_wt_prop_name},
        {"vrid", vrrp_rd_prop_vrid, vrrp_wt_prop_vrid},
        {"priority", vrrp_rd_prop_pri, vrrp_wt_prop_pri},
        {"adv_intval", vrrp_rd_prop_adver_int, vrrp_wt_prop_adver_int},
        {"preempt_mode", vrrp_rd_prop_preempt, vrrp_wt_prop_preempt},
        {"accept_mode", vrrp_rd_prop_accept, vrrp_wt_prop_accept},
        {"interface", vrrp_rd_prop_ifname, vrrp_wt_prop_ifname},
        {"af", vrrp_rd_prop_af, vrrp_wt_prop_af},
        {"enabled", vrrp_rd_prop_enabled, vrrp_wt_prop_enabled}
};

#define VRRP_PROP_INFO_TABSIZE  \
        (sizeof (vrrp_prop_info_tbl) / sizeof (vrrp_prop_t))

typedef void vrrp_cmd_func_t(void *, void *, size_t *);

typedef struct vrrp_cmd_info_s {
        vrrp_cmd_type_t vi_cmd;
        size_t          vi_reqsize;
        size_t          vi_acksize;     /* 0 if the size is variable */
        boolean_t       vi_setop;       /* Set operation? Check credentials */
        vrrp_cmd_func_t *vi_cmdfunc;
} vrrp_cmd_info_t;

static vrrp_cmd_info_t vrrp_cmd_info_tbl[] = {
        {VRRP_CMD_CREATE, sizeof (vrrp_cmd_create_t),
            sizeof (vrrp_ret_create_t), _B_TRUE, vrrpd_cmd_create},
        {VRRP_CMD_DELETE, sizeof (vrrp_cmd_delete_t),
            sizeof (vrrp_ret_delete_t), _B_TRUE, vrrpd_cmd_delete},
        {VRRP_CMD_ENABLE, sizeof (vrrp_cmd_enable_t),
            sizeof (vrrp_ret_enable_t), _B_TRUE, vrrpd_cmd_enable},
        {VRRP_CMD_DISABLE, sizeof (vrrp_cmd_disable_t),
            sizeof (vrrp_ret_disable_t), _B_TRUE, vrrpd_cmd_disable},
        {VRRP_CMD_MODIFY, sizeof (vrrp_cmd_modify_t),
            sizeof (vrrp_ret_modify_t), _B_TRUE, vrrpd_cmd_modify},
        {VRRP_CMD_QUERY, sizeof (vrrp_cmd_query_t), 0,
            _B_FALSE, vrrpd_cmd_query},
        {VRRP_CMD_LIST, sizeof (vrrp_cmd_list_t), 0,
            _B_FALSE, vrrpd_cmd_list}
};

#define VRRP_DOOR_INFO_TABLE_SIZE       \
        (sizeof (vrrp_cmd_info_tbl) / sizeof (vrrp_cmd_info_t))

static int
ipaddr_cmp(int af, vrrp_addr_t *addr1, vrrp_addr_t *addr2)
{
        if (af == AF_INET) {
                return (memcmp(&addr1->in4.sin_addr,
                    &addr2->in4.sin_addr, sizeof (struct in_addr)));
        } else {
                return (memcmp(&addr1->in6.sin6_addr,
                    &addr2->in6.sin6_addr, sizeof (struct in6_addr)));
        }
}

static vrrp_vr_t *
vrrpd_lookup_vr_by_vrid(char *ifname, vrid_t vrid, int af)
{
        vrrp_vr_t *vr;

        TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
                if (strcmp(vr->vvr_conf.vvc_link, ifname) == 0 &&
                    vr->vvr_conf.vvc_vrid == vrid &&
                    vr->vvr_conf.vvc_af == af) {
                        break;
                }
        }
        return (vr);
}

static vrrp_vr_t *
vrrpd_lookup_vr_by_name(const char *name)
{
        vrrp_vr_t *vr;

        TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
                if (strcmp(vr->vvr_conf.vvc_name, name) == 0)
                        break;
        }
        return (vr);
}

static vrrp_intf_t *
vrrpd_lookup_if(const char *ifname, int af)
{
        vrrp_intf_t     *intf;

        TAILQ_FOREACH(intf, &vrrp_intf_list, vvi_next) {
                if (strcmp(ifname, intf->vvi_ifname) == 0 &&
                    af == intf->vvi_af) {
                        break;
                }
        }
        return (intf);
}

static vrrp_err_t
vrrpd_create_if(const char *ifname, int af, uint32_t ifindex,
    vrrp_intf_t **intfp)
{
        vrrp_intf_t     *intf;

        vrrp_log(VRRP_DBG0, "vrrpd_create_if(%s, %s, %d)",
            ifname, af_str(af), ifindex);

        if (((*intfp) = malloc(sizeof (vrrp_intf_t))) == NULL) {
                vrrp_log(VRRP_ERR, "vrrpd_create_if(): failed to "
                    "allocate %s/%s interface", ifname, af_str(af));
                return (VRRP_ENOMEM);
        }

        intf = *intfp;
        TAILQ_INIT(&intf->vvi_iplist);
        (void) strlcpy(intf->vvi_ifname, ifname, sizeof (intf->vvi_ifname));
        intf->vvi_af = af;
        intf->vvi_sockfd = -1;
        intf->vvi_nvr = 0;
        intf->vvi_eid = -1;
        intf->vvi_pip = NULL;
        intf->vvi_ifindex = ifindex;
        intf->vvi_state = NODE_STATE_NEW;
        intf->vvi_vr_state = VRRP_STATE_INIT;
        TAILQ_INSERT_TAIL(&vrrp_intf_list, intf, vvi_next);
        return (VRRP_SUCCESS);
}

/*
 * An interface is deleted. If update_vr is true, the deletion of the interface
 * may cause the state transition of assoicated VRRP router (if this interface
 * is either the primary or the VNIC interface of the VRRP router); otherwise,
 * simply delete the interface without updating the VRRP router.
 */
static void
vrrpd_delete_if(vrrp_intf_t *intf, boolean_t update_vr)
{
        vrrp_ip_t       *ip;

        vrrp_log(VRRP_DBG0, "vrrpd_delete_if(%s, %s, %supdate_vr)",
            intf->vvi_ifname, af_str(intf->vvi_af), update_vr ? "" : "no_");

        if (update_vr) {
                /*
                 * If a this interface is the physical interface or the VNIC
                 * of a VRRP router, the deletion of the interface (no IP
                 * address exists on this interface) may cause the state
                 * transition of the VRRP router. call vrrpd_remove_if()
                 * to find all corresponding VRRP router and update their
                 * states.
                 */
                vrrpd_remove_if(intf, _B_FALSE);
        }

        /*
         * First remove and delete all the IP addresses on the interface
         */
        while (!TAILQ_EMPTY(&intf->vvi_iplist)) {
                ip = TAILQ_FIRST(&intf->vvi_iplist);
                vrrpd_delete_ip(intf, ip);
        }

        /*
         * Then remove and delete the interface
         */
        TAILQ_REMOVE(&vrrp_intf_list, intf, vvi_next);
        (void) free(intf);
}

static vrrp_err_t
vrrpd_create_ip(vrrp_intf_t *intf, const char *lifname, vrrp_addr_t *addr,
    uint64_t flags)
{
        vrrp_ip_t       *ip;
        char            abuf[INET6_ADDRSTRLEN];

        /* LINTED E_CONSTANT_CONDITION */
        VRRPADDR2STR(intf->vvi_af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
        vrrp_log(VRRP_DBG0, "vrrpd_create_ip(%s, %s, %s, 0x%x)",
            intf->vvi_ifname, lifname, abuf, flags);

        if ((ip = malloc(sizeof (vrrp_ip_t))) == NULL) {
                vrrp_log(VRRP_ERR, "vrrpd_create_ip(%s, %s):"
                    "failed to allocate IP", lifname, abuf);
                return (VRRP_ENOMEM);
        }

        (void) strncpy(ip->vip_lifname, lifname, sizeof (ip->vip_lifname));
        ip->vip_state = NODE_STATE_NEW;
        ip->vip_flags = flags;
        (void) memcpy(&ip->vip_addr, addr, sizeof (ip->vip_addr));

        /*
         * Make sure link-local IPv6 IP addresses are at the head of the list
         */
        if (intf->vvi_af == AF_INET6 &&
            IN6_IS_ADDR_LINKLOCAL(&addr->in6.sin6_addr)) {
                TAILQ_INSERT_HEAD(&intf->vvi_iplist, ip, vip_next);
        } else {
                TAILQ_INSERT_TAIL(&intf->vvi_iplist, ip, vip_next);
        }
        return (VRRP_SUCCESS);
}

static void
vrrpd_delete_ip(vrrp_intf_t *intf, vrrp_ip_t *ip)
{
        char    abuf[INET6_ADDRSTRLEN];
        int     af = intf->vvi_af;

        /* LINTED E_CONSTANT_CONDITION */
        VRRPADDR2STR(af, &ip->vip_addr, abuf, sizeof (abuf), _B_FALSE);
        vrrp_log(VRRP_DBG0, "vrrpd_delete_ip(%s, %s, %s) is %sprimary",
            intf->vvi_ifname, ip->vip_lifname, abuf,
            intf->vvi_pip == ip ? "" : "not ");

        if (intf->vvi_pip == ip)
                intf->vvi_pip = NULL;

        TAILQ_REMOVE(&intf->vvi_iplist, ip, vip_next);
        (void) free(ip);
}

static char *
rtm_event2str(uchar_t event)
{
        switch (event) {
        case RTM_NEWADDR:
                return ("RTM_NEWADDR");
        case RTM_DELADDR:
                return ("RTM_DELADDR");
        case RTM_IFINFO:
                return ("RTM_IFINFO");
        case RTM_ADD:
                return ("RTM_ADD");
        case RTM_DELETE:
                return ("RTM_DELETE");
        case RTM_CHANGE:
                return ("RTM_CHANGE");
        case RTM_OLDADD:
                return ("RTM_OLDADD");
        case RTM_OLDDEL:
                return ("RTM_OLDDEL");
        case RTM_CHGADDR:
                return ("RTM_CHGADDR");
        case RTM_FREEADDR:
                return ("RTM_FREEADDR");
        default:
                return ("RTM_OTHER");
        }
}

/*
 * This is called by the child process to inform the parent process to
 * exit with the given return value. Note that the child process
 * (the daemon process) informs the parent process to exit when anything
 * goes wrong or when all the intialization is done.
 */
static int
vrrpd_inform_parent_exit(int rv)
{
        int err = 0;

        /*
         * If vrrp_debug_level is none-zero, vrrpd is not running as
         * a daemon. Return directly.
         */
        if (vrrp_debug_level != 0)
                return (0);

        if (write(pfds[1], &rv, sizeof (int)) != sizeof (int)) {
                err = errno;
                (void) close(pfds[1]);
                return (err);
        }
        (void) close(pfds[1]);
        return (0);
}

int
main(int argc, char *argv[])
{
        int c, err;
        struct sigaction sa;
        sigset_t mask;
        struct rlimit rl;

        (void) setlocale(LC_ALL, "");
        (void) textdomain(TEXT_DOMAIN);

        /*
         * We need PRIV_SYS_CONFIG to post VRRP sysevent, PRIV_NET_RAWACESS
         * and PRIV_NET_ICMPACCESS to open  the raw socket, PRIV_SYS_IP_CONFIG
         * to bring up/down the virtual IP addresses, and PRIV_SYS_RESOURCE to
         * setrlimit().
         *
         * Note that sysevent is not supported in non-global zones.
         */
        if (getzoneid() == GLOBAL_ZONEID) {
                err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
                    PRIV_SYS_CONFIG, PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
                    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
        } else {
                err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
                    PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
                    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
        }

        if (err == -1) {
                vrrp_log(VRRP_ERR, "main(): init_daemon_priv() failed");
                return (EXIT_FAILURE);
        }

        /*
         * If vrrpd is started by other process, it will inherit the
         * signal block mask. We unblock all signals to make sure the
         * signal handling will work normally.
         */
        (void) sigfillset(&mask);
        (void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
        sa.sa_handler = vrrpd_cleanup;
        sa.sa_flags = 0;
        (void) sigemptyset(&sa.sa_mask);
        (void) sigaction(SIGINT, &sa, NULL);
        (void) sigaction(SIGQUIT, &sa, NULL);
        (void) sigaction(SIGTERM, &sa, NULL);

        vrrp_debug_level = 0;
        (void) strlcpy(vrrpd_conffile, VRRPCONF, sizeof (vrrpd_conffile));
        while ((c = getopt(argc, argv, "d:f:")) != EOF) {
                switch (c) {
                case 'd':
                        vrrp_debug_level = atoi(optarg);
                        break;
                case 'f':
                        (void) strlcpy(vrrpd_conffile, optarg,
                            sizeof (vrrpd_conffile));
                        break;
                default:
                        break;
                }
        }

        closefrom(3);
        if (vrrp_debug_level == 0 && (daemon_init() != 0)) {
                vrrp_log(VRRP_ERR, "main(): daemon_init() failed");
                return (EXIT_FAILURE);
        }

        rl.rlim_cur = RLIM_INFINITY;
        rl.rlim_max = RLIM_INFINITY;
        if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
                vrrp_log(VRRP_ERR, "main(): setrlimit() failed");
                goto child_out;
        }

        if (vrrpd_init() != VRRP_SUCCESS) {
                vrrp_log(VRRP_ERR, "main(): vrrpd_init() failed");
                goto child_out;
        }

        /*
         * Get rid of unneeded privileges.
         */
        __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
            PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, PRIV_SYS_RESOURCE, NULL);

        /*
         * Read the configuration and initialize the existing VRRP
         * configuration
         */
        vrrpd_initconf();

        /*
         * Inform the parent process that it can successfully exit.
         */
        if ((err = vrrpd_inform_parent_exit(EXIT_SUCCESS)) != 0) {
                vrrpd_cleanup();
                vrrp_log(VRRP_WARNING, "vrrpd_inform_parent_exit() failed: %s",
                    strerror(err));
                return (EXIT_FAILURE);
        }

        /*
         * Start the loop to handle the timer and the IO events.
         */
        switch (iu_handle_events(vrrpd_eh, vrrpd_timerq)) {
        case -1:
                vrrp_log(VRRP_ERR, "main(): iu_handle_events() failed "
                    "abnormally");
                break;
        default:
                break;
        }

        vrrpd_cleanup();
        return (EXIT_SUCCESS);

child_out:
        (void) vrrpd_inform_parent_exit(EXIT_FAILURE);
        return (EXIT_FAILURE);
}

static int
daemon_init()
{
        pid_t   pid;
        int     rv;

        vrrp_log(VRRP_DBG0, "daemon_init()");

        if (getenv("SMF_FMRI") == NULL) {
                vrrp_log(VRRP_ERR, "daemon_init(): vrrpd is an smf(7) managed "
                    "service and should not be run from the command line.");
                return (-1);
        }

        /*
         * Create the pipe used for the child process to inform the parent
         * process to exit after all initialization is done.
         */
        if (pipe(pfds) < 0) {
                vrrp_log(VRRP_ERR, "daemon_init(): pipe() failed: %s",
                    strerror(errno));
                return (-1);
        }

        if ((pid = fork()) < 0) {
                vrrp_log(VRRP_ERR, "daemon_init(): fork() failed: %s",
                    strerror(errno));
                (void) close(pfds[0]);
                (void) close(pfds[1]);
                return (-1);
        }

        if (pid != 0) { /* Parent */
                (void) close(pfds[1]);

                /*
                 * Read the child process's return value from the pfds.
                 * If the child process exits unexpectedly, read() returns -1.
                 */
                if (read(pfds[0], &rv, sizeof (int)) != sizeof (int)) {
                        vrrp_log(VRRP_ERR, "daemon_init(): child process "
                            "exited unexpectedly %s", strerror(errno));
                        (void) kill(pid, SIGTERM);
                        rv = EXIT_FAILURE;
                }
                (void) close(pfds[0]);
                exit(rv);
        }

        /*
         * in child process, became a daemon, and return to main() to continue.
         */
        (void) close(pfds[0]);
        (void) chdir("/");
        (void) setsid();
        (void) close(0);
        (void) close(1);
        (void) close(2);
        (void) open("/dev/null", O_RDWR, 0);
        (void) dup2(0, 1);
        (void) dup2(0, 2);
        openlog("vrrpd", LOG_PID, LOG_DAEMON);
        vrrp_logflag = 1;
        return (0);
}

static vrrp_err_t
vrrpd_init()
{
        vrrp_err_t      err = VRRP_ESYS;

        vrrp_log(VRRP_DBG0, "vrrpd_init()");

        TAILQ_INIT(&vrrp_vr_list);
        TAILQ_INIT(&vrrp_intf_list);

        if (vrrp_open(&vrrpd_vh) != VRRP_SUCCESS) {
                vrrp_log(VRRP_ERR, "vrrpd_init(): vrrp_open() failed");
                goto fail;
        }

        if ((vrrpd_timerq = iu_tq_create()) == NULL) {
                vrrp_log(VRRP_ERR, "vrrpd_init(): iu_tq_create() failed");
                goto fail;
        }

        if ((vrrpd_eh = iu_eh_create()) == NULL) {
                vrrp_log(VRRP_ERR, "vrrpd_init(): iu_eh_create() failed");
                goto fail;
        }

        /*
         * Create the AF_UNIX socket used to communicate with libvrrpadm.
         *
         * This socket is used to receive the administrative requests and
         * send back the results.
         */
        if (vrrpd_cmdsock_create() != VRRP_SUCCESS) {
                vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_cmdsock_create() "
                    "failed");
                goto fail;
        }

        /*
         * Create the VRRP control socket used to bring up/down the virtual
         * IP addresses. It is also used to set the IFF_NOACCEPT flag of
         * the virtual IP addresses.
         */
        if (vrrpd_ctlsock_create() != VRRP_SUCCESS) {
                vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_ctlsock_create() "
                    "failed");
                goto fail;
        }

        /*
         * Create the PF_ROUTER socket used to listen to the routing socket
         * messages and build the interface/IP address list.
         */
        if (vrrpd_rtsock_create() != VRRP_SUCCESS) {
                vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_rtsock_create() "
                    "failed");
                goto fail;
        }

        /* Open the libipadm handle */
        if (ipadm_open(&vrrp_ipadm_handle, 0) != IPADM_SUCCESS) {
                vrrp_log(VRRP_ERR, "vrrpd_init(): ipadm_open() failed");
                goto fail;
        }

        /*
         * Build the list of interfaces and IP addresses. Also, start the time
         * to scan the interfaces/IP addresses periodically.
         */
        vrrpd_scan(AF_INET);
        vrrpd_scan(AF_INET6);
        if ((vrrp_scan_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
            vrrpd_scan_interval, vrrpd_scan_timer, NULL)) == -1) {
                vrrp_log(VRRP_ERR, "vrrpd_init(): start scan_timer failed");
                goto fail;
        }

        /*
         * Initialize the VRRP multicast address.
         */
        bzero(&vrrp_muladdr4, sizeof (vrrp_addr_t));
        vrrp_muladdr4.in4.sin_family = AF_INET;
        (void) inet_pton(AF_INET, "224.0.0.18", &vrrp_muladdr4.in4.sin_addr);

        bzero(&vrrp_muladdr6, sizeof (vrrp_addr_t));
        vrrp_muladdr6.in6.sin6_family = AF_INET6;
        (void) inet_pton(AF_INET6, "ff02::12", &vrrp_muladdr6.in6.sin6_addr);

        return (VRRP_SUCCESS);

fail:
        vrrpd_fini();
        return (err);
}

static void
vrrpd_fini()
{
        vrrp_log(VRRP_DBG0, "vrrpd_fini()");

        (void) iu_cancel_timer(vrrpd_timerq, vrrp_scan_timer_id, NULL);
        vrrp_scan_timer_id = -1;

        vrrpd_rtsock_destroy();
        vrrpd_ctlsock_destroy();
        vrrpd_cmdsock_destroy();

        if (vrrpd_eh != NULL) {
                iu_eh_destroy(vrrpd_eh);
                vrrpd_eh = NULL;
        }

        if (vrrpd_timerq != NULL) {
                iu_tq_destroy(vrrpd_timerq);
                vrrpd_timerq = NULL;
        }

        vrrp_close(vrrpd_vh);
        vrrpd_vh = NULL;
        assert(TAILQ_EMPTY(&vrrp_vr_list));
        assert(TAILQ_EMPTY(&vrrp_intf_list));

        ipadm_close(vrrp_ipadm_handle);
}

static void
vrrpd_cleanup(void)
{
        vrrp_vr_t       *vr;
        vrrp_intf_t     *intf;

        vrrp_log(VRRP_DBG0, "vrrpd_cleanup()");

        while (!TAILQ_EMPTY(&vrrp_vr_list)) {
                vr = TAILQ_FIRST(&vrrp_vr_list);
                vrrpd_delete_vr(vr);
        }

        while (!TAILQ_EMPTY(&vrrp_intf_list)) {
                intf = TAILQ_FIRST(&vrrp_intf_list);
                vrrpd_delete_if(intf, _B_FALSE);
        }

        vrrpd_fini();
        closelog();
        exit(1);
}

/*
 * Read the configuration file and initialize all the existing VRRP routers.
 */
static void
vrrpd_initconf()
{
        FILE *fp;
        char line[LINE_MAX];
        int linenum = 0;
        vrrp_vr_conf_t conf;
        vrrp_err_t err;

        vrrp_log(VRRP_DBG0, "vrrpd_initconf()");

        if ((fp = fopen(vrrpd_conffile, "rF")) == NULL) {
                vrrp_log(VRRP_ERR, "failed to open the configuration file %s",
                    vrrpd_conffile);
                return;
        }

        while (fgets(line, sizeof (line), fp) != NULL) {
                linenum++;
                conf.vvc_vrid = VRRP_VRID_NONE;
                if ((err = vrrpd_read_vrconf(line, &conf)) != VRRP_SUCCESS) {
                        vrrp_log(VRRP_ERR, "failed to parse %d line %s",
                            linenum, line);
                        continue;
                }

                /*
                 * Blank or comment line
                 */
                if (conf.vvc_vrid == VRRP_VRID_NONE)
                        continue;

                /*
                 * No need to update the configuration since the VRRP router
                 * created/enabled based on the existing configuration.
                 */
                if ((err = vrrpd_create(&conf, _B_FALSE)) != VRRP_SUCCESS) {
                        vrrp_log(VRRP_ERR, "VRRP router %s creation failed: "
                            "%s", conf.vvc_name, vrrp_err2str(err));
                        continue;
                }

                if (conf.vvc_enabled &&
                    ((err = vrrpd_enable(conf.vvc_name, _B_FALSE)) !=
                    VRRP_SUCCESS)) {
                        vrrp_log(VRRP_ERR, "VRRP router %s enable failed: %s",
                            conf.vvc_name, vrrp_err2str(err));
                }
        }

        (void) fclose(fp);
}

/*
 * Create the AF_UNIX socket used to communicate with libvrrpadm.
 *
 * This socket is used to receive the administrative request and
 * send back the results.
 */
static vrrp_err_t
vrrpd_cmdsock_create()
{
        iu_event_id_t           eid;
        struct sockaddr_un      laddr;
        int                     sock, flags;

        vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_create()");

        if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): socket(AF_UNIX) "
                    "failed: %s", strerror(errno));
                return (VRRP_ESYS);
        }

        /*
         * Set it to be non-blocking.
         */
        flags = fcntl(sock, F_GETFL, 0);
        (void) fcntl(sock, F_SETFL, (flags | O_NONBLOCK));

        /*
         * Unlink first in case a previous daemon instance exited ungracefully.
         */
        (void) unlink(VRRPD_SOCKET);

        bzero(&laddr, sizeof (laddr));
        laddr.sun_family = AF_UNIX;
        (void) strlcpy(laddr.sun_path, VRRPD_SOCKET, sizeof (laddr.sun_path));
        if (bind(sock, (struct sockaddr *)&laddr, sizeof (laddr)) < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): bind() failed: %s",
                    strerror(errno));
                (void) close(sock);
                return (VRRP_ESYS);
        }

        if (listen(sock, 30) < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): listen() "
                    "failed: %s", strerror(errno));
                (void) close(sock);
                return (VRRP_ESYS);
        }

        if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
            vrrpd_cmdsock_handler, NULL)) == -1) {
                vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): iu_register_event()"
                    " failed");
                (void) close(sock);
                return (VRRP_ESYS);
        }

        vrrpd_cmdsock_fd = sock;
        vrrpd_cmdsock_eid = eid;
        return (VRRP_SUCCESS);
}

static void
vrrpd_cmdsock_destroy()
{
        vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_destroy()");

        (void) iu_unregister_event(vrrpd_eh, vrrpd_cmdsock_eid, NULL);
        (void) close(vrrpd_cmdsock_fd);
        vrrpd_cmdsock_fd = -1;
        vrrpd_cmdsock_eid = -1;
}

/*
 * Create the PF_ROUTER sockets used to listen to the routing socket
 * messages and build the interface/IP address list. Create one for
 * each address family (IPv4 and IPv6).
 */
static vrrp_err_t
vrrpd_rtsock_create()
{
        int             i, flags, sock;
        iu_event_id_t   eid;

        vrrp_log(VRRP_DBG0, "vrrpd_rtsock_create()");

        for (i = 0; i < 2; i++) {
                sock = socket(PF_ROUTE, SOCK_RAW, vrrpd_rtsocks[i].vrt_af);
                if (sock == -1) {
                        vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): socket() "
                            "failed: %s", strerror(errno));
                        break;
                }

                /*
                 * Set it to be non-blocking.
                 */
                if ((flags = fcntl(sock, F_GETFL, 0)) < 0) {
                        vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
                            "fcntl(F_GETFL) failed: %s", strerror(errno));
                        break;
                }

                if ((fcntl(sock, F_SETFL, flags | O_NONBLOCK)) < 0) {
                        vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
                            "fcntl(F_SETFL) failed: %s", strerror(errno));
                        break;
                }

                if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
                    vrrpd_rtsock_handler, &(vrrpd_rtsocks[i].vrt_af))) == -1) {
                        vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): register "
                            "rtsock %d(%s) failed", sock,
                            af_str(vrrpd_rtsocks[i].vrt_af));
                        break;
                }

                vrrpd_rtsocks[i].vrt_fd = sock;
                vrrpd_rtsocks[i].vrt_eid = eid;
        }

        if (i != 2) {
                (void) close(sock);
                vrrpd_rtsock_destroy();
                return (VRRP_ESYS);
        }

        return (VRRP_SUCCESS);
}

static void
vrrpd_rtsock_destroy()
{
        int             i;

        vrrp_log(VRRP_DBG0, "vrrpd_rtsock_destroy()");
        for (i = 0; i < 2; i++) {
                (void) iu_unregister_event(vrrpd_eh, vrrpd_rtsocks[i].vrt_eid,
                    NULL);
                (void) close(vrrpd_rtsocks[i].vrt_fd);
                vrrpd_rtsocks[i].vrt_eid = -1;
                vrrpd_rtsocks[i].vrt_fd = -1;
        }
}

/*
 * Create the VRRP control socket used to bring up/down the virtual
 * IP addresses. It is also used to set the IFF_NOACCEPT flag of
 * the virtual IP addresses.
 */
static vrrp_err_t
vrrpd_ctlsock_create()
{
        int     s, s6;
        int     on = _B_TRUE;

        if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET) "
                    "failed: %s", strerror(errno));
                return (VRRP_ESYS);
        }
        if (setsockopt(s, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
                    "setsockopt(INET, SO_VRRP) failed: %s", strerror(errno));
                (void) close(s);
                return (VRRP_ESYS);
        }

        if ((s6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET6) "
                    "failed: %s", strerror(errno));
                (void) close(s);
                return (VRRP_ESYS);
        }
        if (setsockopt(s6, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
                    "setsockopt(INET6, SO_VRRP) failed: %s", strerror(errno));
                (void) close(s);
                (void) close(s6);
                return (VRRP_ESYS);
        }

        vrrpd_ctlsock_fd = s;
        vrrpd_ctlsock6_fd = s6;
        return (VRRP_SUCCESS);
}

static void
vrrpd_ctlsock_destroy()
{
        (void) close(vrrpd_ctlsock_fd);
        vrrpd_ctlsock_fd = -1;
        (void) close(vrrpd_ctlsock6_fd);
        vrrpd_ctlsock6_fd = -1;
}

/*ARGSUSED*/
static void
vrrpd_cmd_create(void *arg1, void *arg2, size_t *arg2_sz)
{
        vrrp_cmd_create_t       *cmd = (vrrp_cmd_create_t *)arg1;
        vrrp_ret_create_t       *ret = (vrrp_ret_create_t *)arg2;
        vrrp_err_t              err;

        err = vrrpd_create(&cmd->vcc_conf, _B_TRUE);
        if (err == VRRP_SUCCESS && cmd->vcc_conf.vvc_enabled) {
                /*
                 * No need to update the configuration since it is already
                 * done in the above vrrpd_create() call
                 */
                err = vrrpd_enable(cmd->vcc_conf.vvc_name, _B_FALSE);
                if (err != VRRP_SUCCESS)
                        (void) vrrpd_delete(cmd->vcc_conf.vvc_name);
        }
        ret->vrc_err = err;
}

/*ARGSUSED*/
static void
vrrpd_cmd_delete(void *arg1, void *arg2, size_t *arg2_sz)
{
        vrrp_cmd_delete_t       *cmd = (vrrp_cmd_delete_t *)arg1;
        vrrp_ret_delete_t       *ret = (vrrp_ret_delete_t *)arg2;

        ret->vrd_err = vrrpd_delete(cmd->vcd_name);
}

/*ARGSUSED*/
static void
vrrpd_cmd_enable(void *arg1, void *arg2, size_t *arg2_sz)
{
        vrrp_cmd_enable_t       *cmd = (vrrp_cmd_enable_t *)arg1;
        vrrp_ret_enable_t       *ret = (vrrp_ret_enable_t *)arg2;

        ret->vrs_err = vrrpd_enable(cmd->vcs_name, _B_TRUE);
}

/*ARGSUSED*/
static void
vrrpd_cmd_disable(void *arg1, void *arg2, size_t *arg2_sz)
{
        vrrp_cmd_disable_t      *cmd = (vrrp_cmd_disable_t *)arg1;
        vrrp_ret_disable_t      *ret = (vrrp_ret_disable_t *)arg2;

        ret->vrx_err = vrrpd_disable(cmd->vcx_name);
}

/*ARGSUSED*/
static void
vrrpd_cmd_modify(void *arg1, void *arg2, size_t *arg2_sz)
{
        vrrp_cmd_modify_t       *cmd = (vrrp_cmd_modify_t *)arg1;
        vrrp_ret_modify_t       *ret = (vrrp_ret_modify_t *)arg2;

        ret->vrm_err = vrrpd_modify(&cmd->vcm_conf, cmd->vcm_mask);
}

static void
vrrpd_cmd_query(void *arg1, void *arg2, size_t *arg2_sz)
{
        vrrp_cmd_query_t        *cmd = (vrrp_cmd_query_t *)arg1;

        vrrpd_query(cmd->vcq_name, arg2, arg2_sz);
}

static void
vrrpd_cmd_list(void *arg1, void *arg2, size_t *arg2_sz)
{
        vrrp_cmd_list_t *cmd = (vrrp_cmd_list_t *)arg1;

        vrrpd_list(cmd->vcl_vrid, cmd->vcl_ifname, cmd->vcl_af, arg2, arg2_sz);
}

/*
 * Write-type requeset must have the solaris.network.vrrp authorization.
 */
static boolean_t
vrrp_auth_check(int connfd, vrrp_cmd_info_t *cinfo)
{
        ucred_t         *cred = NULL;
        uid_t           uid;
        struct passwd   *pw;
        boolean_t       success = _B_FALSE;

        vrrp_log(VRRP_DBG0, "vrrp_auth_check()");

        if (!cinfo->vi_setop)
                return (_B_TRUE);

        /*
         * Validate the credential
         */
        if (getpeerucred(connfd, &cred) == (uid_t)-1) {
                vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpeerucred() "
                    "failed: %s", strerror(errno));
                return (_B_FALSE);
        }

        if ((uid = ucred_getruid((const ucred_t *)cred)) == (uid_t)-1) {
                vrrp_log(VRRP_ERR, "vrrp_auth_check(): ucred_getruid() "
                    "failed: %s", strerror(errno));
                goto done;
        }

        if ((pw = getpwuid(uid)) == NULL) {
                vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpwuid() failed");
                goto done;
        }

        success = (chkauthattr("solaris.network.vrrp", pw->pw_name) == 1);

done:
        ucred_free(cred);
        return (success);
}

/*
 * Process the administrative request from libvrrpadm
 */
/* ARGSUSED */
static void
vrrpd_cmdsock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
    void *arg)
{
        vrrp_cmd_info_t         *cinfo = NULL;
        vrrp_err_t              err = VRRP_SUCCESS;
        uchar_t                 buf[BUFFSIZE], ackbuf[BUFFSIZE];
        size_t                  cursize, acksize, len;
        uint32_t                cmd;
        int                     connfd, i;
        struct sockaddr_in      from;
        socklen_t               fromlen;

        vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_handler()");

        fromlen = (socklen_t)sizeof (from);
        if ((connfd = accept(s, (struct sockaddr *)&from, &fromlen)) < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() accept(): %s",
                    strerror(errno));
                return;
        }

        /*
         * First get the type of the request
         */
        cursize = 0;
        while (cursize < sizeof (uint32_t)) {
                len = read(connfd, buf + cursize,
                    sizeof (uint32_t) - cursize);
                if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
                        continue;
                } else if (len > 0) {
                        cursize += len;
                        continue;
                }
                vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
                    "length");
                (void) close(connfd);
                return;
        }

        /* LINTED E_BAD_PTR_CAST_ALIGN */
        cmd = ((vrrp_cmd_t *)buf)->vc_cmd;
        for (i = 0; i < VRRP_DOOR_INFO_TABLE_SIZE; i++) {
                if (vrrp_cmd_info_tbl[i].vi_cmd == cmd) {
                        cinfo = vrrp_cmd_info_tbl + i;
                        break;
                }
        }

        if (cinfo == NULL) {
                vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid request "
                    "type %d", cmd);
                err = VRRP_EINVAL;
                goto done;
        }

        /*
         * Get the rest of the request.
         */
        assert(cursize == sizeof (uint32_t));
        while (cursize < cinfo->vi_reqsize) {
                len = read(connfd, buf + cursize,
                    cinfo->vi_reqsize - cursize);
                if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
                        continue;
                } else if (len > 0) {
                        cursize += len;
                        continue;
                }
                vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
                    "length");
                err = VRRP_EINVAL;
                goto done;
        }

        /*
         * Validate the authorization
         */
        if (!vrrp_auth_check(connfd, cinfo)) {
                vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): "
                    "not sufficient authorization");
                err = VRRP_EPERM;
        }

done:
        /*
         * Ack the request
         */
        if (err != 0) {
                /* LINTED E_BAD_PTR_CAST_ALIGN */
                ((vrrp_ret_t *)ackbuf)->vr_err = err;
                acksize = sizeof (vrrp_ret_t);
        } else {
                /*
                 * If the size of ack is varied, the cmdfunc callback
                 * will set the right size.
                 */
                if ((acksize = cinfo->vi_acksize) == 0)
                        acksize = sizeof (ackbuf);

                /* LINTED E_BAD_PTR_CAST_ALIGN */
                cinfo->vi_cmdfunc((vrrp_cmd_t *)buf, ackbuf, &acksize);
        }

        /*
         * Send the ack back.
         */
        cursize = 0;
        while (cursize < acksize) {
                len = sendto(connfd, ackbuf + cursize, acksize - cursize,
                    0, (struct sockaddr *)&from, fromlen);
                if (len == (size_t)-1 && errno == EAGAIN) {
                        continue;
                } else if (len > 0) {
                        cursize += len;
                        continue;
                } else {
                        vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() failed to "
                            "ack: %s", strerror(errno));
                        break;
                }
        }

        (void) shutdown(connfd, SHUT_RDWR);
        (void) close(connfd);
}

/*
 * Process the routing socket messages and update the interfaces/IP addresses
 * list
 */
/* ARGSUSED */
static void
vrrpd_rtsock_handler(iu_eh_t *eh, int s, short events,
    iu_event_id_t id, void *arg)
{
        char                    buf[BUFFSIZE];
        struct ifa_msghdr       *ifam;
        int                     nbytes;
        int                     af = *(int *)arg;
        boolean_t               scanif = _B_FALSE;

        for (;;) {
                nbytes = read(s, buf, sizeof (buf));
                if (nbytes <= 0) {
                        /* No more messages */
                        break;
                }

                /* LINTED E_BAD_PTR_CAST_ALIGN */
                ifam = (struct ifa_msghdr *)buf;
                if (ifam->ifam_version != RTM_VERSION) {
                        vrrp_log(VRRP_ERR, "vrrpd_rtsock_handler(): version %d "
                            "not understood", ifam->ifam_version);
                        break;
                }

                vrrp_log(VRRP_DBG0, "vrrpd_rtsock_handler(): recv %s event",
                    rtm_event2str(ifam->ifam_type));

                switch (ifam->ifam_type) {
                case RTM_FREEADDR:
                case RTM_CHGADDR:
                case RTM_NEWADDR:
                case RTM_DELADDR:
                        /*
                         * An IP address has been created/updated/deleted or
                         * brought up/down, re-initilialize the interface/IP
                         * address list.
                         */
                        scanif = _B_TRUE;
                        break;
                default:
                        /* Not interesting */
                        break;
                }
        }

        if (scanif)
                vrrpd_scan(af);
}

/*
 * Periodically scan the interface/IP addresses on the system.
 */
/* ARGSUSED */
static void
vrrpd_scan_timer(iu_tq_t *tq, void *arg)
{
        vrrp_log(VRRP_DBG0, "vrrpd_scan_timer()");
        vrrpd_scan(AF_INET);
        vrrpd_scan(AF_INET6);
}

/*
 * Get the list of the interface/IP addresses of the specified address
 * family.
 */
static void
vrrpd_scan(int af)
{
        vrrp_log(VRRP_DBG0, "vrrpd_scan(%s)", af_str(af));

again:
        vrrpd_init_ipcache(af);

        /* If interface index changes, walk again. */
        if (vrrpd_walk_addr_info(af) != IPADM_SUCCESS)
                goto again;

        vrrpd_update_ipcache(af);
}

/*
 * First mark all IP addresses of the specific address family to be removed.
 * This flag will then be cleared when we walk up all the IP addresses.
 */
static void
vrrpd_init_ipcache(int af)
{
        vrrp_intf_t     *intf, *next_intf;
        vrrp_ip_t       *ip, *nextip;
        char            abuf[INET6_ADDRSTRLEN];

        vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s)", af_str(af));

        next_intf = TAILQ_FIRST(&vrrp_intf_list);
        while ((intf = next_intf) != NULL) {
                next_intf = TAILQ_NEXT(intf, vvi_next);
                if (intf->vvi_af != af)
                        continue;

                /*
                 * If the interface is still marked as new, it means that this
                 * vrrpd_init_ipcache() call is a result of ifindex change,
                 * which causes the re-walk of all the interfaces (see
                 * vrrpd_add_ipaddr()), and some interfaces are still marked
                 * as new during the last walk. In this case, delete this
                 * interface with the "update_vr" argument to be _B_FALSE,
                 * since no VRRP router has been assoicated with this
                 * interface yet (the association is done in
                 * vrrpd_update_ipcache()).
                 *
                 * This interface will be re-added later if it still exists.
                 */
                if (intf->vvi_state == NODE_STATE_NEW) {
                        vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove %s "
                            "(%d), may be added later", intf->vvi_ifname,
                            intf->vvi_ifindex);
                        vrrpd_delete_if(intf, _B_FALSE);
                        continue;
                }

                for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
                    ip = nextip) {
                        nextip = TAILQ_NEXT(ip, vip_next);
                        /* LINTED E_CONSTANT_CONDITION */
                        VRRPADDR2STR(af, &ip->vip_addr, abuf,
                            INET6_ADDRSTRLEN, _B_FALSE);

                        if (ip->vip_state != NODE_STATE_NEW) {
                                vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s/%d, "
                                    "%s(%s/0x%x))", intf->vvi_ifname,
                                    intf->vvi_ifindex, ip->vip_lifname,
                                    abuf, ip->vip_flags);
                                ip->vip_state = NODE_STATE_STALE;
                                continue;
                        }

                        /*
                         * If the IP is still marked as new, it means that
                         * this vrrpd_init_ipcache() call is a result of
                         * ifindex change, which causes the re-walk of all
                         * the IP addresses (see vrrpd_add_ipaddr()).
                         * Delete this IP.
                         *
                         * This IP will be readded later if it still exists.
                         */
                        vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove "
                            "%s/%d , %s(%s)", intf->vvi_ifname,
                            intf->vvi_ifindex, ip->vip_lifname, abuf);
                        vrrpd_delete_ip(intf, ip);
                }
        }
}

/*
 * Walk all the IP addresses of the given family and update its
 * addresses list. Return IPADM_FAILURE if it is required to walk
 * all the interfaces again (one of the interface index changes in between).
 */
static ipadm_status_t
vrrpd_walk_addr_info(int af)
{
        ipadm_addr_info_t       *ainfo, *ainfop;
        ipadm_status_t          ipstatus;
        char                    *lifname;
        struct sockaddr_storage stor;
        vrrp_addr_t             *addr;
        int                     ifindex;
        uint64_t                flags;

        vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s)", af_str(af));

        ipstatus = ipadm_addr_info(vrrp_ipadm_handle, NULL, &ainfo, 0, 0);
        if (ipstatus != IPADM_SUCCESS) {
                vrrp_log(VRRP_ERR, "vrrpd_walk_addr_info(%s): "
                    "ipadm_addr_info() failed: %s",
                    af_str(af), ipadm_status2str(ipstatus));
                return (IPADM_SUCCESS);
        }

        for (ainfop = ainfo; ainfop != NULL; ainfop = IA_NEXT(ainfop)) {
                if (ainfop->ia_ifa.ifa_addr->sa_family != af)
                        continue;

                lifname = ainfop->ia_ifa.ifa_name;
                flags = ainfop->ia_ifa.ifa_flags;
                (void) memcpy(&stor, ainfop->ia_ifa.ifa_addr, sizeof (stor));
                addr = (vrrp_addr_t *)&stor;

                vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s): %s",
                    af_str(af), lifname);

                /* Skip virtual/IPMP/P2P interfaces */
                if (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT)) {
                        vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s): "
                            "skipped %s", af_str(af), lifname);
                        continue;
                }

                /* Filter out the all-zero IP address */
                if (VRRPADDR_UNSPECIFIED(af, addr))
                        continue;

                if ((ifindex = if_nametoindex(lifname)) == 0) {
                        if (errno != ENXIO && errno != ENOENT) {
                                vrrp_log(VRRP_ERR, "vrrpd_walk_addr_info(%s): "
                                    "if_nametoindex() failed for %s: %s",
                                    af_str(af), lifname, strerror(errno));
                        }
                        break;
                }

                /*
                 * The interface is unplumbed/replumbed during the walk.  Try
                 * to walk the IP addresses one more time.
                 */
                if (vrrpd_add_ipaddr(lifname, af, addr, ifindex, flags)
                    == VRRP_EAGAIN) {
                        ipstatus = IPADM_FAILURE;
                        break;
                }
        }

        ipadm_free_addr_info(ainfo);
        return (ipstatus);
}

/*
 * Given the information of each IP address, update the interface and
 * IP addresses list
 */
static vrrp_err_t
vrrpd_add_ipaddr(char *lifname, int af, vrrp_addr_t *addr, int ifindex,
    uint64_t flags)
{
        char            ifname[LIFNAMSIZ], *c;
        vrrp_intf_t     *intf;
        vrrp_ip_t       *ip;
        char            abuf[INET6_ADDRSTRLEN];
        vrrp_err_t      err;

        /* LINTED E_CONSTANT_CONDITION */
        VRRPADDR2STR(af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
        vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s, %d, 0x%x)", lifname,
            abuf, ifindex, flags);

        /*
         * Get the physical interface name from the logical interface name.
         */
        (void) strlcpy(ifname, lifname, sizeof (ifname));
        if ((c = strchr(ifname, ':')) != NULL)
                *c = '\0';

        if ((intf = vrrpd_lookup_if(ifname, af)) == NULL) {
                vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(): %s is new", ifname);
                err = vrrpd_create_if(ifname, af, ifindex, &intf);
                if (err != VRRP_SUCCESS)
                        return (err);
        } else if (intf->vvi_ifindex != ifindex) {
                /*
                 * If index changes, it means that this interface is
                 * unplumbed/replumbed since we last checked. If this
                 * interface is not used by any VRRP router, just
                 * update its ifindex, and the IP addresses list will
                 * be updated later. Otherwise, return EAGAIN to rewalk
                 * all the IP addresses from the beginning.
                 */
                vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s) ifindex changed ",
                    "from %d to %d", ifname, intf->vvi_ifindex, ifindex);
                if (!IS_PRIMARY_INTF(intf) && !IS_VIRTUAL_INTF(intf)) {
                        intf->vvi_ifindex = ifindex;
                } else {
                        /*
                         * delete this interface from the list if this
                         * interface has already been assoicated with
                         * any VRRP routers.
                         */
                        vrrpd_delete_if(intf, _B_TRUE);
                        return (VRRP_EAGAIN);
                }
        }

        /*
         * Does this IP address already exist?
         */
        TAILQ_FOREACH(ip, &intf->vvi_iplist, vip_next) {
                if (strcmp(ip->vip_lifname, lifname) == 0)
                        break;
        }

        if (ip != NULL) {
                vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP exists",
                    lifname, abuf);
                ip->vip_state = NODE_STATE_NONE;
                ip->vip_flags = flags;
                if (ipaddr_cmp(af, addr, &ip->vip_addr) != 0) {
                        /*
                         * Address has been changed, mark it as new
                         * If this address is already selected as the
                         * primary IP address, the new IP will be checked
                         * to see whether it is still qualified as the
                         * primary IP address. If not, the primary IP
                         * address will be reselected.
                         */
                        (void) memcpy(&ip->vip_addr, addr,
                            sizeof (vrrp_addr_t));

                        ip->vip_state = NODE_STATE_NEW;
                }
        } else {
                vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP is new",
                    lifname, abuf);

                err = vrrpd_create_ip(intf, lifname, addr, flags);
                if (err != VRRP_SUCCESS)
                        return (err);
        }
        return (VRRP_SUCCESS);
}

/*
 * Update the interface and IP addresses list. Remove the ones that have been
 * staled since last time we walk the IP addresses and updated the ones that
 * have been changed.
 */
static void
vrrpd_update_ipcache(int af)
{
        vrrp_intf_t     *intf, *nextif;
        vrrp_ip_t       *ip, *nextip;
        char            abuf[INET6_ADDRSTRLEN];
        boolean_t       primary_selected;
        boolean_t       primary_now_selected;
        boolean_t       need_reenable = _B_FALSE;

        vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(%s)", af_str(af));

        nextif = TAILQ_FIRST(&vrrp_intf_list);
        while ((intf = nextif) != NULL) {
                nextif = TAILQ_NEXT(intf, vvi_next);
                if (intf->vvi_af != af)
                        continue;

                /*
                 * Does the interface already select its primary IP address?
                 */
                primary_selected = (intf->vvi_pip != NULL);
                assert(!primary_selected || IS_PRIMARY_INTF(intf));

                /*
                 * Removed the IP addresses that have been unconfigured.
                 */
                for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
                    ip = nextip) {
                        nextip = TAILQ_NEXT(ip, vip_next);
                        if (ip->vip_state != NODE_STATE_STALE)
                                continue;

                        /* LINTED E_CONSTANT_CONDITION */
                        VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
                            _B_FALSE);
                        vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): IP %s "
                            "is removed over %s", abuf, intf->vvi_ifname);
                        vrrpd_delete_ip(intf, ip);
                }

                /*
                 * No IP addresses left, delete this interface.
                 */
                if (TAILQ_EMPTY(&intf->vvi_iplist)) {
                        vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
                            "no IP left over %s", intf->vvi_ifname);
                        vrrpd_delete_if(intf, _B_TRUE);
                        continue;
                }

                /*
                 * If this is selected ss the physical interface for any
                 * VRRP router, reselect the primary address if needed.
                 */
                if (IS_PRIMARY_INTF(intf)) {
                        vrrpd_reselect_primary(intf);
                        primary_now_selected = (intf->vvi_pip != NULL);

                        /*
                         * Cannot find the new primary IP address.
                         */
                        if (primary_selected && !primary_now_selected) {
                                vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache() "
                                    "reselect primary IP on %s failed",
                                    intf->vvi_ifname);
                                vrrpd_remove_if(intf, _B_TRUE);
                        } else if (!primary_selected && primary_now_selected) {
                                /*
                                 * The primary IP address is successfully
                                 * selected on the physical interfacew we
                                 * need to walk through all the VRRP routers
                                 * that is created on this physical interface
                                 * and see whether they can now be enabled.
                                 */
                                need_reenable = _B_TRUE;
                        }
                }

                /*
                 * For every new virtual IP address, bring up/down it based
                 * on the state of VRRP router.
                 *
                 * Note that it is fine to not update the IP's vip_flags field
                 * even if vrrpd_virtualip_updateone() changed the address's
                 * up/down state, since the vip_flags field is only used for
                 * select primary IP address over a physical interface, and
                 * vrrpd_virtualip_updateone() only affects the virtual IP
                 * address's status.
                 */
                for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
                    ip = nextip) {
                        nextip = TAILQ_NEXT(ip, vip_next);
                        /* LINTED E_CONSTANT_CONDITION */
                        VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
                            _B_FALSE);
                        vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
                            "IP %s over %s%s", abuf, intf->vvi_ifname,
                            ip->vip_state == NODE_STATE_NEW ? " is new" : "");

                        if (IS_VIRTUAL_INTF(intf)) {
                                /*
                                 * If this IP is new, update its up/down state
                                 * based on the virtual interface's state
                                 * (which is determined by the VRRP router's
                                 * state). Otherwise, check only and prompt
                                 * warnings if its up/down state has been
                                 * changed.
                                 */
                                if (vrrpd_virtualip_updateone(intf, ip,
                                    ip->vip_state == NODE_STATE_NONE) !=
                                    VRRP_SUCCESS) {
                                        vrrp_log(VRRP_DBG0,
                                            "vrrpd_update_ipcache(): "
                                            "IP %s over %s update failed", abuf,
                                            intf->vvi_ifname);
                                        vrrpd_delete_ip(intf, ip);
                                        continue;
                                }
                        }
                        ip->vip_state = NODE_STATE_NONE;
                }

                /*
                 * The IP address is deleted when it is failed to be brought
                 * up. If no IP addresses are left, delete this interface.
                 */
                if (TAILQ_EMPTY(&intf->vvi_iplist)) {
                        vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
                            "no IP left over %s", intf->vvi_ifname);
                        vrrpd_delete_if(intf, _B_TRUE);
                        continue;
                }

                if (intf->vvi_state == NODE_STATE_NEW) {
                        /*
                         * A new interface is found. This interface can be
                         * the primary interface or the virtual VNIC
                         * interface.  Again, we need to walk throught all
                         * the VRRP routers to see whether some of them can
                         * now be enabled because of the new primary IP
                         * address or the new virtual IP addresses.
                         */
                        intf->vvi_state = NODE_STATE_NONE;
                        need_reenable = _B_TRUE;
                }
        }

        if (need_reenable)
                vrrpd_reenable_all_vr();
}

/*
 * Reselect primary IP if:
 * - The existing primary IP is no longer qualified (removed or it is down or
 *   not a link-local IP for IPv6 VRRP router);
 * - This is a physical interface but no primary IP is chosen;
 */
static void
vrrpd_reselect_primary(vrrp_intf_t *intf)
{
        vrrp_ip_t       *ip;
        char            abuf[INET6_ADDRSTRLEN];

        assert(IS_PRIMARY_INTF(intf));

        /*
         * If the interface's old primary IP address is still valid, return
         */
        if (((ip = intf->vvi_pip) != NULL) && (QUALIFY_PRIMARY_ADDR(intf, ip)))
                return;

        if (ip != NULL) {
                /* LINTED E_CONSTANT_CONDITION */
                VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
                    sizeof (abuf), _B_FALSE);
                vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
                    "is no longer qualified", intf->vvi_ifname, abuf);
        }

        ip = vrrpd_select_primary(intf);
        intf->vvi_pip = ip;

        if (ip != NULL) {
                /* LINTED E_CONSTANT_CONDITION */
                VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
                    sizeof (abuf), _B_FALSE);
                vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
                    "is selected", intf->vvi_ifname, abuf);
        }
}

/*
 * Select the primary IP address. Since the link-local IP address is always
 * at the head of the IP address list, try to find the first UP IP address
 * and see whether it qualify.
 */
static vrrp_ip_t *
vrrpd_select_primary(vrrp_intf_t *pif)
{
        vrrp_ip_t       *pip;
        char            abuf[INET6_ADDRSTRLEN];

        vrrp_log(VRRP_DBG1, "vrrpd_select_primary(%s)", pif->vvi_ifname);

        TAILQ_FOREACH(pip, &pif->vvi_iplist, vip_next) {
                assert(pip->vip_state != NODE_STATE_STALE);

                /* LINTED E_CONSTANT_CONDITION */
                VRRPADDR2STR(pif->vvi_af, &pip->vip_addr, abuf,
                    INET6_ADDRSTRLEN, _B_FALSE);
                vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s): %s is %s",
                    pif->vvi_ifname, abuf,
                    (pip->vip_flags & IFF_UP) ? "up" : "down");

                if (pip->vip_flags & IFF_UP)
                        break;
        }

        /*
         * Is this valid primary IP address?
         */
        if (pip == NULL || !QUALIFY_PRIMARY_ADDR(pif, pip)) {
                vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s/%s) failed",
                    pif->vvi_ifname, af_str(pif->vvi_af));
                return (NULL);
        }
        return (pip);
}

/*
 * This is a new interface. Check whether any VRRP router is waiting for it
 */
static void
vrrpd_reenable_all_vr()
{
        vrrp_vr_t *vr;

        vrrp_log(VRRP_DBG0, "vrrpd_reenable_all_vr()");

        TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
                if (vr->vvr_conf.vvc_enabled)
                        (void) vrrpd_enable_vr(vr);
        }
}

/*
 * If primary_addr_gone is _B_TRUE, it means that we failed to select
 * the primary IP address on this (physical) interface; otherwise,
 * it means the interface is no longer available.
 */
static void
vrrpd_remove_if(vrrp_intf_t *intf, boolean_t primary_addr_gone)
{
        vrrp_vr_t *vr;

        vrrp_log(VRRP_DBG0, "vrrpd_remove_if(%s): %s", intf->vvi_ifname,
            primary_addr_gone ? "primary address gone" : "interface deleted");

        TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
                if (vr->vvr_conf.vvc_enabled)
                        vrrpd_disable_vr(vr, intf, primary_addr_gone);
        }
}

/*
 * Update the VRRP configuration file based on the given configuration.
 * op is either VRRP_CONF_UPDATE or VRRP_CONF_DELETE
 */
static vrrp_err_t
vrrpd_updateconf(vrrp_vr_conf_t *newconf, uint_t op)
{
        vrrp_vr_conf_t  conf;
        FILE            *fp, *nfp;
        int             nfd;
        char            line[LINE_MAX];
        char            newfile[MAXPATHLEN];
        boolean_t       found = _B_FALSE;
        vrrp_err_t      err = VRRP_SUCCESS;

        vrrp_log(VRRP_DBG0, "vrrpd_updateconf(%s, %s)", newconf->vvc_name,
            op == VRRP_CONF_UPDATE ? "update" : "delete");

        if ((fp = fopen(vrrpd_conffile, "r+F")) == NULL) {
                if (errno != ENOENT) {
                        vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s for "
                            "update failed: %s", vrrpd_conffile,
                            strerror(errno));
                        return (VRRP_EDB);
                }

                if ((fp = fopen(vrrpd_conffile, "w+F")) == NULL) {
                        vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s for "
                            "write failed: %s", vrrpd_conffile,
                            strerror(errno));
                        return (VRRP_EDB);
                }
        }

        (void) snprintf(newfile, MAXPATHLEN, "%s.new", vrrpd_conffile);
        if ((nfd = open(newfile, O_WRONLY | O_CREAT | O_TRUNC,
            S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s",
                    newfile, strerror(errno));
                (void) fclose(fp);
                return (VRRP_EDB);
        }

        if ((nfp = fdopen(nfd, "wF")) == NULL) {
                vrrp_log(VRRP_ERR, "vrrpd_updateconf(): fdopen(%s) failed: %s",
                    newfile, strerror(errno));
                goto done;
        }

        while (fgets(line, sizeof (line), fp) != NULL) {
                conf.vvc_vrid = VRRP_VRID_NONE;
                if (!found && (err = vrrpd_read_vrconf(line, &conf)) !=
                    VRRP_SUCCESS) {
                        vrrp_log(VRRP_ERR, "vrrpd_updateconf(): invalid "
                            "configuration format: %s", line);
                        goto done;
                }

                /*
                 * Write this line out if:
                 * - this is a comment line; or
                 * - we've done updating/deleting the the given VR; or
                 * - if the name of the VR read from this line does not match
                 *   the VR name that we are about to update/delete;
                 */
                if (found || conf.vvc_vrid == VRRP_VRID_NONE ||
                    strcmp(conf.vvc_name, newconf->vvc_name) != 0) {
                        if (fputs(line, nfp) != EOF)
                                continue;

                        vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
                            "write line %s", line);
                        err = VRRP_EDB;
                        goto done;
                }

                /*
                 * Otherwise, update/skip the line.
                 */
                found = _B_TRUE;
                if (op == VRRP_CONF_DELETE)
                        continue;

                assert(op == VRRP_CONF_UPDATE);
                if ((err = vrrpd_write_vrconf(line, sizeof (line),
                    newconf)) != VRRP_SUCCESS) {
                        vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
                            "update configuration for %s", newconf->vvc_name);
                        goto done;
                }
                if (fputs(line, nfp) == EOF) {
                        vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
                            "write line %s", line);
                        err = VRRP_EDB;
                        goto done;
                }
        }

        /*
         * If we get to the end of the file and have not seen the router that
         * we are about to update, write it out.
         */
        if (!found && op == VRRP_CONF_UPDATE) {
                if ((err = vrrpd_write_vrconf(line, sizeof (line),
                    newconf)) == VRRP_SUCCESS && fputs(line, nfp) == EOF) {
                        vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
                            "write line %s", line);
                        err = VRRP_EDB;
                }
        } else if (!found && op == VRRP_CONF_DELETE) {
                vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to find "
                    "configuation for %s", newconf->vvc_name);
                err = VRRP_ENOTFOUND;
        }

        if (err != VRRP_SUCCESS)
                goto done;

        if (fflush(nfp) == EOF || rename(newfile, vrrpd_conffile) < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
                    "rename file %s", newfile);
                err = VRRP_EDB;
        }

done:
        (void) fclose(fp);
        (void) fclose(nfp);
        (void) unlink(newfile);
        return (err);
}

static vrrp_err_t
vrrpd_write_vrconf(char *line, size_t len, vrrp_vr_conf_t *conf)
{
        vrrp_prop_t     *prop;
        int             n, i;

        vrrp_log(VRRP_DBG0, "vrrpd_write_vrconf(%s)", conf->vvc_name);

        for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
                prop = &vrrp_prop_info_tbl[i];
                n = snprintf(line, len, i == 0 ? "%s=" : " %s=",
                    prop->vs_propname);
                if (n < 0 || n >= len)
                        break;
                len -= n;
                line += n;
                n = prop->vs_propwrite(conf, line, len);
                if (n < 0 || n >= len)
                        break;
                len -= n;
                line += n;
        }
        if (i != VRRP_PROP_INFO_TABSIZE) {
                vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
                    "small", conf->vvc_name);
                return (VRRP_EDB);
        }
        n = snprintf(line, len, "\n");
        if (n < 0 || n >= len) {
                vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
                    "small", conf->vvc_name);
                return (VRRP_EDB);
        }
        return (VRRP_SUCCESS);
}

static vrrp_err_t
vrrpd_read_vrconf(char *line, vrrp_vr_conf_t *conf)
{
        char            *str, *token;
        char            *next;
        vrrp_err_t      err = VRRP_SUCCESS;
        char            tmpbuf[MAXLINELEN];

        str = tmpbuf;
        (void) strlcpy(tmpbuf, line, MAXLINELEN);

        /*
         * Skip leading spaces, blank lines, and comments.
         */
        skip_whitespace(str);
        if ((str - tmpbuf == strlen(tmpbuf)) || (*str == '#')) {
                conf->vvc_vrid = VRRP_VRID_NONE;
                return (VRRP_SUCCESS);
        }

        /*
         * Read each VR properties.
         */
        for (token = strtok_r(str, " \n\t", &next); token != NULL;
            token = strtok_r(NULL, " \n\t", &next)) {
                if ((err = vrrpd_readprop(token, conf)) != VRRP_SUCCESS)
                        break;
        }

        /* All properties read but no VRID defined */
        if (err == VRRP_SUCCESS && conf->vvc_vrid == VRRP_VRID_NONE)
                err = VRRP_EINVAL;

        return (err);
}

static vrrp_err_t
vrrpd_readprop(const char *str, vrrp_vr_conf_t *conf)
{
        vrrp_prop_t     *prop;
        char            *pstr;
        int             i;

        if ((pstr = strchr(str, '=')) == NULL) {
                vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
                return (VRRP_EINVAL);
        }

        *pstr++ = '\0';
        for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
                prop = &vrrp_prop_info_tbl[i];
                if (strcasecmp(str, prop->vs_propname) == 0) {
                        if (prop->vs_propread(conf, pstr))
                                break;
                }
        }

        if (i == VRRP_PROP_INFO_TABSIZE) {
                vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
                return (VRRP_EINVAL);
        }

        return (VRRP_SUCCESS);
}

static boolean_t
vrrp_rd_prop_name(vrrp_vr_conf_t *conf, const char *str)
{
        size_t size = sizeof (conf->vvc_name);
        return (strlcpy(conf->vvc_name, str, size) < size);
}

static boolean_t
vrrp_rd_prop_vrid(vrrp_vr_conf_t *conf, const char *str)
{
        conf->vvc_vrid = strtol(str, NULL, 0);
        return (!(conf->vvc_vrid < VRRP_VRID_MIN ||
            conf->vvc_vrid > VRRP_VRID_MAX ||
            (conf->vvc_vrid == 0 && errno != 0)));
}

static boolean_t
vrrp_rd_prop_af(vrrp_vr_conf_t *conf, const char *str)
{
        if (strcasecmp(str, "AF_INET") == 0)
                conf->vvc_af = AF_INET;
        else if (strcasecmp(str, "AF_INET6") == 0)
                conf->vvc_af = AF_INET6;
        else
                return (_B_FALSE);
        return (_B_TRUE);
}

static boolean_t
vrrp_rd_prop_pri(vrrp_vr_conf_t *conf, const char *str)
{
        conf->vvc_pri = strtol(str, NULL, 0);
        return (!(conf->vvc_pri < VRRP_PRI_MIN ||
            conf->vvc_pri > VRRP_PRI_OWNER ||
            (conf->vvc_pri == 0 && errno != 0)));
}

static boolean_t
vrrp_rd_prop_adver_int(vrrp_vr_conf_t *conf, const char *str)
{
        conf->vvc_adver_int = strtol(str, NULL, 0);
        return (!(conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
            conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX ||
            (conf->vvc_adver_int == 0 && errno != 0)));
}

static boolean_t
vrrp_rd_prop_preempt(vrrp_vr_conf_t *conf, const char *str)
{
        if (strcasecmp(str, "true") == 0)
                conf->vvc_preempt = _B_TRUE;
        else if (strcasecmp(str, "false") == 0)
                conf->vvc_preempt = _B_FALSE;
        else
                return (_B_FALSE);
        return (_B_TRUE);
}

static boolean_t
vrrp_rd_prop_accept(vrrp_vr_conf_t *conf, const char *str)
{
        if (strcasecmp(str, "true") == 0)
                conf->vvc_accept = _B_TRUE;
        else if (strcasecmp(str, "false") == 0)
                conf->vvc_accept = _B_FALSE;
        else
                return (_B_FALSE);
        return (_B_TRUE);
}

static boolean_t
vrrp_rd_prop_enabled(vrrp_vr_conf_t *conf, const char *str)
{
        if (strcasecmp(str, "enabled") == 0)
                conf->vvc_enabled = _B_TRUE;
        else if (strcasecmp(str, "disabled") == 0)
                conf->vvc_enabled = _B_FALSE;
        else
                return (_B_FALSE);
        return (_B_TRUE);
}

static boolean_t
vrrp_rd_prop_ifname(vrrp_vr_conf_t *conf, const char *str)
{
        size_t size = sizeof (conf->vvc_link);
        return (strlcpy(conf->vvc_link, str, size) < size);
}

static int
vrrp_wt_prop_name(vrrp_vr_conf_t *conf, char *str, size_t size)
{
        return (snprintf(str, size, "%s", conf->vvc_name));
}

static int
vrrp_wt_prop_pri(vrrp_vr_conf_t *conf, char *str, size_t size)
{
        return (snprintf(str, size, "%d", conf->vvc_pri));
}

static int
vrrp_wt_prop_adver_int(vrrp_vr_conf_t *conf, char *str, size_t size)
{
        return (snprintf(str, size, "%d", conf->vvc_adver_int));
}

static int
vrrp_wt_prop_preempt(vrrp_vr_conf_t *conf, char *str, size_t size)
{
        return (snprintf(str, size, "%s",
            conf->vvc_preempt ? "true" : "false"));
}

static int
vrrp_wt_prop_accept(vrrp_vr_conf_t *conf, char *str, size_t size)
{
        return (snprintf(str, size, "%s",
            conf->vvc_accept ? "true" : "false"));
}

static int
vrrp_wt_prop_enabled(vrrp_vr_conf_t *conf, char *str, size_t size)
{
        return (snprintf(str, size, "%s",
            conf->vvc_enabled ? "enabled" : "disabled"));
}

static int
vrrp_wt_prop_vrid(vrrp_vr_conf_t *conf, char *str, size_t size)
{
        return (snprintf(str, size, "%d", conf->vvc_vrid));
}

static int
vrrp_wt_prop_af(vrrp_vr_conf_t *conf, char *str, size_t size)
{
        return (snprintf(str, size, "%s",
            conf->vvc_af == AF_INET ? "AF_INET" : "AF_INET6"));
}

static int
vrrp_wt_prop_ifname(vrrp_vr_conf_t *conf, char *str, size_t size)
{
        return (snprintf(str, size, "%s", conf->vvc_link));
}

static char *
af_str(int af)
{
        if (af == 4 || af == AF_INET)
                return ("AF_INET");
        else if (af == 6 || af == AF_INET6)
                return ("AF_INET6");
        else if (af == AF_UNSPEC)
                return ("AF_UNSPEC");
        else
                return ("AF_error");
}

static vrrp_err_t
vrrpd_create_vr(vrrp_vr_conf_t *conf)
{
        vrrp_vr_t       *vr;

        vrrp_log(VRRP_DBG0, "vrrpd_create_vr(%s)", conf->vvc_name);

        if ((vr = malloc(sizeof (vrrp_vr_t))) == NULL) {
                vrrp_log(VRRP_ERR, "vrrpd_create_vr(): memory allocation for %s"
                    " failed", conf->vvc_name);
                return (VRRP_ENOMEM);
        }

        bzero(vr, sizeof (vrrp_vr_t));
        vr->vvr_state = VRRP_STATE_NONE;
        vr->vvr_timer_id = -1;
        vrrpd_state_trans(VRRP_STATE_NONE, VRRP_STATE_INIT, vr);
        (void) memcpy(&vr->vvr_conf, conf, sizeof (vrrp_vr_conf_t));
        vr->vvr_conf.vvc_enabled = _B_FALSE;
        TAILQ_INSERT_HEAD(&vrrp_vr_list, vr, vvr_next);
        return (VRRP_SUCCESS);
}

static void
vrrpd_delete_vr(vrrp_vr_t *vr)
{
        vrrp_log(VRRP_DBG0, "vrrpd_delete_vr(%s)", vr->vvr_conf.vvc_name);
        if (vr->vvr_conf.vvc_enabled)
                vrrpd_disable_vr(vr, NULL, _B_FALSE);
        assert(vr->vvr_state == VRRP_STATE_INIT);
        vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_NONE, vr);
        TAILQ_REMOVE(&vrrp_vr_list, vr, vvr_next);
        (void) free(vr);
}

static vrrp_err_t
vrrpd_enable_vr(vrrp_vr_t *vr)
{
        vrrp_err_t      rx_err, tx_err, err = VRRP_EINVAL;

        vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s)", vr->vvr_conf.vvc_name);

        assert(vr->vvr_conf.vvc_enabled);

        /*
         * This VRRP router has been successfully enabled and start
         * participating.
         */
        if (vr->vvr_state != VRRP_STATE_INIT)
                return (VRRP_SUCCESS);

        if ((rx_err = vrrpd_init_rxsock(vr)) == VRRP_SUCCESS) {
                /*
                 * Select the primary IP address. Even if this time
                 * primary IP selection failed, we will reselect the
                 * primary IP address when new IP address comes up.
                 */
                vrrpd_reselect_primary(vr->vvr_pif);
                if (vr->vvr_pif->vvi_pip == NULL) {
                        vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s): "
                            "select_primary over %s failed",
                            vr->vvr_conf.vvc_name, vr->vvr_pif->vvi_ifname);
                        rx_err = VRRP_ENOPRIM;
                }
        }

        /*
         * Initialize the TX socket used for this vrrp_vr_t to send the
         * multicast packets.
         */
        tx_err = vrrpd_init_txsock(vr);

        /*
         * Only start the state transition if sockets for both RX and TX are
         * initialized correctly.
         */
        if (rx_err != VRRP_SUCCESS || tx_err != VRRP_SUCCESS) {
                /*
                 * Record the error information for diagnose purpose.
                 */
                vr->vvr_err = (rx_err == VRRP_SUCCESS) ? tx_err : rx_err;
                return (err);
        }

        if (vr->vvr_conf.vvc_pri == 255)
                err = vrrpd_state_i2m(vr);
        else
                err = vrrpd_state_i2b(vr);

        if (err != VRRP_SUCCESS) {
                vr->vvr_err = err;
                vr->vvr_pif->vvi_pip = NULL;
                vrrpd_fini_txsock(vr);
                vrrpd_fini_rxsock(vr);
        }
        return (err);
}

/*
 * Given the removed interface, see whether the given VRRP router would
 * be affected and stop participating the VRRP protocol.
 *
 * If intf is NULL, VR disabling request is coming from the admin.
 */
static void
vrrpd_disable_vr(vrrp_vr_t *vr, vrrp_intf_t *intf, boolean_t primary_addr_gone)
{
        vrrp_log(VRRP_DBG0, "vrrpd_disable_vr(%s): %s%s", vr->vvr_conf.vvc_name,
            intf == NULL ? "requested by admin" : intf->vvi_ifname,
            intf == NULL ? "" : (primary_addr_gone ? "primary address gone" :
            "interface deleted"));

        /*
         * An interface is deleted, see whether this interface is the
         * physical interface or the VNIC of the given VRRP router.
         * If so, continue to disable the VRRP router.
         */
        if (!primary_addr_gone && (intf != NULL) && (intf != vr->vvr_pif) &&
            (intf != vr->vvr_vif)) {
                return;
        }

        /*
         * If this is the case that the primary IP address is gone,
         * and we failed to reselect another primary IP address,
         * continue to disable the VRRP router.
         */
        if (primary_addr_gone && intf != vr->vvr_pif)
                return;

        vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabling",
            vr->vvr_conf.vvc_name);

        if (vr->vvr_state == VRRP_STATE_MASTER) {
                /*
                 * If this router is disabled by the administrator, send
                 * the zero-priority advertisement to indicate the Master
                 * stops participating VRRP.
                 */
                if (intf == NULL)
                        (void) vrrpd_send_adv(vr, _B_TRUE);

                vrrpd_state_m2i(vr);
        } else  if (vr->vvr_state == VRRP_STATE_BACKUP) {
                vrrpd_state_b2i(vr);
        }

        /*
         * If no primary IP address can be selected, the VRRP router
         * stays at the INIT state and will become BACKUP and MASTER when
         * a primary IP address is reselected.
         */
        if (primary_addr_gone) {
                vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): primary IP "
                    "is removed", vr->vvr_conf.vvc_name);
                vr->vvr_err = VRRP_ENOPRIM;
        } else if (intf == NULL) {
                /*
                 * The VRRP router is disable by the administrator
                 */
                vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabled by admin",
                    vr->vvr_conf.vvc_name);
                vr->vvr_err = VRRP_SUCCESS;
                vrrpd_fini_txsock(vr);
                vrrpd_fini_rxsock(vr);
        } else if (intf == vr->vvr_pif) {
                vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): physical interface "
                    "%s removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
                vr->vvr_err = VRRP_ENOPRIM;
                vrrpd_fini_rxsock(vr);
        } else if (intf == vr->vvr_vif) {
                vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): VNIC interface %s"
                    " removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
                vr->vvr_err = VRRP_ENOVIRT;
                vrrpd_fini_txsock(vr);
        }
}

vrrp_err_t
vrrpd_create(vrrp_vr_conf_t *conf, boolean_t updateconf)
{
        vrrp_err_t      err = VRRP_SUCCESS;

        vrrp_log(VRRP_DBG0, "vrrpd_create(%s, %s, %d)", conf->vvc_name,
            conf->vvc_link, conf->vvc_vrid);

        assert(conf != NULL);

        /*
         * Sanity check
         */
        if ((strlen(conf->vvc_name) == 0) ||
            (strlen(conf->vvc_link) == 0) ||
            (conf->vvc_vrid < VRRP_VRID_MIN ||
            conf->vvc_vrid > VRRP_VRID_MAX) ||
            (conf->vvc_pri < VRRP_PRI_MIN ||
            conf->vvc_pri > VRRP_PRI_OWNER) ||
            (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
            conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) ||
            (conf->vvc_af != AF_INET && conf->vvc_af != AF_INET6) ||
            (conf->vvc_pri == VRRP_PRI_OWNER && !conf->vvc_accept)) {
                vrrp_log(VRRP_DBG1, "vrrpd_create(%s): invalid argument",
                    conf->vvc_name);
                return (VRRP_EINVAL);
        }

        if (!vrrp_valid_name(conf->vvc_name)) {
                vrrp_log(VRRP_DBG1, "vrrpd_create(): %s is not a valid router "
                    "name", conf->vvc_name);
                return (VRRP_EINVALVRNAME);
        }

        if (vrrpd_lookup_vr_by_name(conf->vvc_name) != NULL) {
                vrrp_log(VRRP_DBG1, "vrrpd_create(): %s already exists",
                    conf->vvc_name);
                return (VRRP_EINSTEXIST);
        }

        if (vrrpd_lookup_vr_by_vrid(conf->vvc_link, conf->vvc_vrid,
            conf->vvc_af) != NULL) {
                vrrp_log(VRRP_DBG1, "vrrpd_create(): VRID %d/%s over %s "
                    "already exists", conf->vvc_vrid, af_str(conf->vvc_af),
                    conf->vvc_link);
                return (VRRP_EVREXIST);
        }

        if (updateconf && (err = vrrpd_updateconf(conf,
            VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
                vrrp_log(VRRP_ERR, "vrrpd_create(): failed to update "
                    "configuration for %s", conf->vvc_name);
                return (err);
        }

        err = vrrpd_create_vr(conf);
        if (err != VRRP_SUCCESS && updateconf)
                (void) vrrpd_updateconf(conf, VRRP_CONF_DELETE);

        return (err);
}

static vrrp_err_t
vrrpd_delete(const char *vn)
{
        vrrp_vr_t       *vr;
        vrrp_err_t      err;

        vrrp_log(VRRP_DBG0, "vrrpd_delete(%s)", vn);

        if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
                vrrp_log(VRRP_DBG1, "vrrpd_delete(): %s not exists", vn);
                return (VRRP_ENOTFOUND);
        }

        err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_DELETE);
        if (err != VRRP_SUCCESS) {
                vrrp_log(VRRP_ERR, "vrrpd_delete(): failed to delete "
                    "configuration for %s", vr->vvr_conf.vvc_name);
                return (err);
        }

        vrrpd_delete_vr(vr);
        return (VRRP_SUCCESS);
}

static vrrp_err_t
vrrpd_enable(const char *vn, boolean_t updateconf)
{
        vrrp_vr_t               *vr;
        vrrp_vr_conf_t          *conf;
        uint32_t                flags;
        datalink_class_t        class;
        vrrp_err_t              err = VRRP_SUCCESS;

        vrrp_log(VRRP_DBG0, "vrrpd_enable(%s)", vn);

        if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
                vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s does not exist", vn);
                return (VRRP_ENOTFOUND);
        }

        /*
         * The VR is already enabled.
         */
        conf = &vr->vvr_conf;
        if (conf->vvc_enabled) {
                vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s is already "
                    "enabled", vn);
                return (VRRP_EALREADY);
        }

        /*
         * Check whether the link exists.
         */
        if ((strlen(conf->vvc_link) == 0) || dladm_name2info(vrrpd_vh->vh_dh,
            conf->vvc_link, NULL, &flags, &class, NULL) != DLADM_STATUS_OK ||
            !(flags & DLADM_OPT_ACTIVE) || ((class != DATALINK_CLASS_PHYS) &&
            (class != DATALINK_CLASS_VLAN) && (class != DATALINK_CLASS_AGGR) &&
            (class != DATALINK_CLASS_VNIC))) {
                vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): invalid link %s",
                    vn, conf->vvc_link);
                return (VRRP_EINVALLINK);
        }

        /*
         * Get the associated VNIC name by the given interface/vrid/
         * address famitly.
         */
        err = vrrp_get_vnicname(vrrpd_vh, conf->vvc_vrid,
            conf->vvc_af, conf->vvc_link, NULL, NULL, vr->vvr_vnic,
            sizeof (vr->vvr_vnic));
        if (err != VRRP_SUCCESS) {
                vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): no VNIC for VRID %d/%s "
                    "over %s", vn, conf->vvc_vrid, af_str(conf->vvc_af),
                    conf->vvc_link);
                err = VRRP_ENOVNIC;
                goto fail;
        }

        /*
         * Find the right VNIC, primary interface and get the list of the
         * protected IP adressses and primary IP address. Note that if
         * either interface is NULL (no IP addresses configured over the
         * interface), we will still continue and mark this VRRP router
         * as "enabled".
         */
        vr->vvr_conf.vvc_enabled = _B_TRUE;
        if (updateconf && (err = vrrpd_updateconf(&vr->vvr_conf,
            VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
                vrrp_log(VRRP_ERR, "vrrpd_enable(): failed to update "
                    "configuration for %s", vr->vvr_conf.vvc_name);
                goto fail;
        }

        /*
         * If vrrpd_setup_vr() fails, it is possible that there is no IP
         * addresses over ether the primary interface or the VNIC yet,
         * return success in this case, the VRRP router will stay in
         * the initialized state and start to work when the IP address is
         * configured.
         */
        (void) vrrpd_enable_vr(vr);
        return (VRRP_SUCCESS);

fail:
        vr->vvr_conf.vvc_enabled = _B_FALSE;
        vr->vvr_vnic[0] = '\0';
        return (err);
}

static vrrp_err_t
vrrpd_disable(const char *vn)
{
        vrrp_vr_t       *vr;
        vrrp_err_t      err;

        vrrp_log(VRRP_DBG0, "vrrpd_disable(%s)", vn);

        if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
                vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s does not exist", vn);
                return (VRRP_ENOTFOUND);
        }

        /*
         * The VR is already disable.
         */
        if (!vr->vvr_conf.vvc_enabled) {
                vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s was not enabled", vn);
                return (VRRP_EALREADY);
        }

        vr->vvr_conf.vvc_enabled = _B_FALSE;
        err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
        if (err != VRRP_SUCCESS) {
                vr->vvr_conf.vvc_enabled = _B_TRUE;
                vrrp_log(VRRP_ERR, "vrrpd_disable(): failed to update "
                    "configuration for %s", vr->vvr_conf.vvc_name);
                return (err);
        }

        vrrpd_disable_vr(vr, NULL, _B_FALSE);
        vr->vvr_vnic[0] = '\0';
        return (VRRP_SUCCESS);
}

static vrrp_err_t
vrrpd_modify(vrrp_vr_conf_t *conf, uint32_t mask)
{
        vrrp_vr_t       *vr;
        vrrp_vr_conf_t  savconf;
        int             pri;
        boolean_t       accept, set_accept = _B_FALSE;
        vrrp_err_t      err;

        vrrp_log(VRRP_DBG0, "vrrpd_modify(%s)", conf->vvc_name);

        if (mask == 0)
                return (VRRP_SUCCESS);

        if ((vr = vrrpd_lookup_vr_by_name(conf->vvc_name)) == NULL) {
                vrrp_log(VRRP_DBG1, "vrrpd_modify(): cannot find the given "
                    "VR instance: %s", conf->vvc_name);
                return (VRRP_ENOTFOUND);
        }

        if (mask & VRRP_CONF_INTERVAL) {
                if (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
                    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) {
                        vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
                            "adver_interval %d", conf->vvc_name,
                            conf->vvc_adver_int);
                        return (VRRP_EINVAL);
                }
        }

        pri = vr->vvr_conf.vvc_pri;
        if (mask & VRRP_CONF_PRIORITY) {
                if (conf->vvc_pri < VRRP_PRI_MIN ||
                    conf->vvc_pri > VRRP_PRI_OWNER) {
                        vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
                            "priority %d", conf->vvc_name, conf->vvc_pri);
                        return (VRRP_EINVAL);
                }
                pri = conf->vvc_pri;
        }

        accept = vr->vvr_conf.vvc_accept;
        if (mask & VRRP_CONF_ACCEPT)
                accept = conf->vvc_accept;

        if (pri == VRRP_PRI_OWNER && !accept) {
                vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): accept mode must be "
                    "true for VRRP address owner", conf->vvc_name);
                return (VRRP_EINVAL);
        }

        if ((mask & VRRP_CONF_ACCEPT) && (vr->vvr_conf.vvc_accept != accept)) {
                err = vrrpd_set_noaccept(vr, !accept);
                if (err != VRRP_SUCCESS) {
                        vrrp_log(VRRP_ERR, "vrrpd_modify(%s): access mode "
                            "updating failed: %s", conf->vvc_name,
                            vrrp_err2str(err));
                        return (err);
                }
                set_accept = _B_TRUE;
        }

        /*
         * Save the current configuration, so it can be restored if the
         * following fails.
         */
        (void) memcpy(&savconf, &vr->vvr_conf, sizeof (vrrp_vr_conf_t));
        if (mask & VRRP_CONF_PREEMPT)
                vr->vvr_conf.vvc_preempt = conf->vvc_preempt;

        if (mask & VRRP_CONF_ACCEPT)
                vr->vvr_conf.vvc_accept = accept;

        if (mask & VRRP_CONF_PRIORITY)
                vr->vvr_conf.vvc_pri = pri;

        if (mask & VRRP_CONF_INTERVAL)
                vr->vvr_conf.vvc_adver_int = conf->vvc_adver_int;

        err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
        if (err != VRRP_SUCCESS) {
                vrrp_log(VRRP_ERR, "vrrpd_modify(%s): configuration update "
                    "failed: %s", conf->vvc_name, vrrp_err2str(err));
                if (set_accept)
                        (void) vrrpd_set_noaccept(vr, accept);
                (void) memcpy(&vr->vvr_conf, &savconf, sizeof (vrrp_vr_conf_t));
                return (err);
        }

        if ((mask & VRRP_CONF_PRIORITY) && (vr->vvr_state == VRRP_STATE_BACKUP))
                vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);

        if ((mask & VRRP_CONF_INTERVAL) && (vr->vvr_state == VRRP_STATE_MASTER))
                vr->vvr_timeout = conf->vvc_adver_int;

        return (VRRP_SUCCESS);
}

static void
vrrpd_list(vrid_t vrid, char *ifname, int af, vrrp_ret_list_t *ret,
    size_t *sizep)
{
        vrrp_vr_t       *vr;
        char            *p = (char *)ret + sizeof (vrrp_ret_list_t);
        size_t          size = (*sizep) - sizeof (vrrp_ret_list_t);

        vrrp_log(VRRP_DBG0, "vrrpd_list(%d_%s_%s)", vrid, ifname, af_str(af));

        ret->vrl_cnt = 0;
        TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
                if (vrid !=  VRRP_VRID_NONE && vr->vvr_conf.vvc_vrid != vrid)
                        continue;

                if (strlen(ifname) != 0 && strcmp(ifname,
                    vr->vvr_conf.vvc_link) == 0) {
                        continue;
                }

                if ((af == AF_INET || af == AF_INET6) &&
                    vr->vvr_conf.vvc_af != af)
                        continue;

                if (size < VRRP_NAME_MAX) {
                        vrrp_log(VRRP_DBG1, "vrrpd_list(): buffer size too "
                            "small to hold %d router names", ret->vrl_cnt);
                        *sizep = sizeof (vrrp_ret_list_t);
                        ret->vrl_err = VRRP_ETOOSMALL;
                        return;
                }
                (void) strlcpy(p, vr->vvr_conf.vvc_name, VRRP_NAME_MAX);
                p += (strlen(vr->vvr_conf.vvc_name) + 1);
                ret->vrl_cnt++;
                size -= VRRP_NAME_MAX;
        }

        *sizep = sizeof (vrrp_ret_list_t) + ret->vrl_cnt * VRRP_NAME_MAX;
        vrrp_log(VRRP_DBG1, "vrrpd_list() return %d", ret->vrl_cnt);
        ret->vrl_err = VRRP_SUCCESS;
}

static void
vrrpd_query(const char *vn, vrrp_ret_query_t *ret, size_t *sizep)
{
        vrrp_queryinfo_t        *infop;
        vrrp_vr_t               *vr;
        vrrp_intf_t             *vif;
        vrrp_ip_t               *ip;
        struct timeval          now;
        uint32_t                vipcnt = 0;
        size_t                  size = *sizep;

        vrrp_log(VRRP_DBG1, "vrrpd_query(%s)", vn);

        if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
                vrrp_log(VRRP_DBG1, "vrrpd_query(): %s does not exist", vn);
                *sizep = sizeof (vrrp_ret_query_t);
                ret->vrq_err = VRRP_ENOTFOUND;
                return;
        }

        /*
         * Get the virtual IP list if the router is not in the INIT state.
         */
        if (vr->vvr_state != VRRP_STATE_INIT) {
                vif = vr->vvr_vif;
                TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
                        vipcnt++;
                }
        }

        *sizep = sizeof (vrrp_ret_query_t);
        *sizep += (vipcnt == 0) ? 0 : (vipcnt - 1) * sizeof (vrrp_addr_t);
        if (*sizep > size) {
                vrrp_log(VRRP_ERR, "vrrpd_query(): not enough space to hold "
                    "%d virtual IPs", vipcnt);
                *sizep = sizeof (vrrp_ret_query_t);
                ret->vrq_err = VRRP_ETOOSMALL;
                return;
        }

        (void) gettimeofday(&now, NULL);

        bzero(ret, *sizep);
        infop = &ret->vrq_qinfo;
        (void) memcpy(&infop->show_vi,
            &(vr->vvr_conf), sizeof (vrrp_vr_conf_t));
        (void) memcpy(&infop->show_vs,
            &(vr->vvr_sinfo), sizeof (vrrp_stateinfo_t));
        (void) strlcpy(infop->show_va.va_vnic, vr->vvr_vnic, MAXLINKNAMELEN);
        infop->show_vt.vt_since_last_tran = timeval_to_milli(
            timeval_delta(now, vr->vvr_sinfo.vs_st_time));

        if (vr->vvr_state == VRRP_STATE_INIT) {
                ret->vrq_err = VRRP_SUCCESS;
                return;
        }

        vipcnt = 0;
        TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
                (void) memcpy(&infop->show_va.va_vips[vipcnt++],
                    &ip->vip_addr, sizeof (vrrp_addr_t));
        }
        infop->show_va.va_vipcnt = vipcnt;

        (void) memcpy(&infop->show_va.va_primary,
            &vr->vvr_pif->vvi_pip->vip_addr, sizeof (vrrp_addr_t));

        (void) memcpy(&infop->show_vp, &(vr->vvr_peer), sizeof (vrrp_peer_t));

        /*
         * Check whether there is a peer.
         */
        if (!VRRPADDR_UNSPECIFIED(vr->vvr_conf.vvc_af,
            &(vr->vvr_peer.vp_addr))) {
                infop->show_vt.vt_since_last_adv = timeval_to_milli(
                    timeval_delta(now, vr->vvr_peer.vp_time));
        }

        if (vr->vvr_state == VRRP_STATE_BACKUP) {
                infop->show_vt.vt_master_down_intv =
                    MASTER_DOWN_INTERVAL_VR(vr);
        }

        ret->vrq_err = VRRP_SUCCESS;
}

/*
 * Build the VRRP packet (not including the IP header). Return the
 * payload length.
 *
 * If zero_pri is set to be B_TRUE, then this is the specical zero-priority
 * advertisement which is sent by the Master to indicate that it has been
 * stopped participating in VRRP.
 */
static size_t
vrrpd_build_vrrp(vrrp_vr_t *vr, uchar_t *buf, int buflen, boolean_t zero_pri)
{
        /* LINTED E_BAD_PTR_CAST_ALIGN */
        vrrp_pkt_t      *vp = (vrrp_pkt_t *)buf;
        /* LINTED E_BAD_PTR_CAST_ALIGN */
        struct in_addr  *a4 = (struct in_addr *)(vp + 1);
        /* LINTED E_BAD_PTR_CAST_ALIGN */
        struct in6_addr *a6 = (struct in6_addr *)(vp + 1);
        vrrp_intf_t     *vif = vr->vvr_vif;
        vrrp_ip_t       *vip;
        int             af = vif->vvi_af;
        size_t          size = sizeof (vrrp_pkt_t);
        uint16_t        rsvd_adver_int;
        int             nip = 0;

        vrrp_log(VRRP_DBG1, "vrrpd_build_vrrp(%s, %s_priority): intv %d",
            vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non-zero",
            vr->vvr_conf.vvc_adver_int);

        TAILQ_FOREACH(vip, &vif->vvi_iplist, vip_next) {
                if ((size += ((af == AF_INET) ? sizeof (struct in_addr) :
                    sizeof (struct in6_addr))) > buflen) {
                        vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): buffer size "
                            "not big enough %d", vr->vvr_conf.vvc_name, size);
                        return (0);
                }

                if (af == AF_INET)
                        a4[nip++] = vip->vip_addr.in4.sin_addr;
                else
                        a6[nip++] = vip->vip_addr.in6.sin6_addr;
        }

        if (nip == 0) {
                vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): no virtual IP "
                    "address", vr->vvr_conf.vvc_name);
                return (0);
        }

        vp->vp_vers_type = (VRRP_VERSION << 4) | VRRP_PKT_ADVERT;
        vp->vp_vrid = vr->vvr_conf.vvc_vrid;
        vp->vp_prio = zero_pri ? VRRP_PRIO_ZERO : vr->vvr_conf.vvc_pri;

        rsvd_adver_int = MSEC2CENTISEC(vr->vvr_conf.vvc_adver_int) & 0x0fff;
        vp->vp_rsvd_adver_int = htons(rsvd_adver_int);
        vp->vp_ipnum = nip;

        /*
         * Set the checksum to 0 first, then caculate it.
         */
        vp->vp_chksum = 0;
        if (af == AF_INET) {
                vp->vp_chksum = vrrp_cksum4(
                    &vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr,
                    &vrrp_muladdr4.in4.sin_addr, size, vp);
        } else {
                vp->vp_chksum = vrrp_cksum6(
                    &vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
                    &vrrp_muladdr6.in6.sin6_addr, size, vp);
        }

        return (size);
}

/*
 * We need to build the IPv4 header on our own.
 */
static vrrp_err_t
vrrpd_send_adv_v4(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
{
        /* LINTED E_BAD_PTR_CAST_ALIGN */
        struct ip *ip = (struct ip *)buf;
        size_t plen;

        vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s)", vr->vvr_conf.vvc_name);

        if ((plen = vrrpd_build_vrrp(vr, buf + sizeof (struct ip),
            len - sizeof (struct ip), zero_pri)) == 0) {
                return (VRRP_ETOOSMALL);
        }

        ip->ip_hl = sizeof (struct ip) >> 2;
        ip->ip_v = IPV4_VERSION;
        ip->ip_tos = 0;
        plen += sizeof (struct ip);
        ip->ip_len = htons(plen);
        ip->ip_off = 0;
        ip->ip_ttl = VRRP_IP_TTL;
        ip->ip_p = IPPROTO_VRRP;
        ip->ip_src = vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr;
        ip->ip_dst = vrrp_muladdr4.in4.sin_addr;

        /*
         * The kernel will set the IP cksum and the IPv4 identification.
         */
        ip->ip_id = 0;
        ip->ip_sum = 0;

        if ((len = sendto(vr->vvr_vif->vvi_sockfd, buf, plen, 0,
            (const struct sockaddr *)&vrrp_muladdr4,
            sizeof (struct sockaddr_in))) != plen) {
                vrrp_log(VRRP_ERR, "vrrpd_send_adv_v4(): sendto() on "
                    "(vrid:%d, %s, %s) failed: %s sent:%d expect:%d",
                    vr->vvr_conf.vvc_vrid, vr->vvr_vif->vvi_ifname,
                    af_str(vr->vvr_conf.vvc_af), strerror(errno), len, plen);
                return (VRRP_ESYS);
        }

        vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s) succeed",
            vr->vvr_conf.vvc_name);
        return (VRRP_SUCCESS);
}

static vrrp_err_t
vrrpd_send_adv_v6(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
{
        struct msghdr msg6;
        size_t hoplimit_space = 0;
        size_t pktinfo_space = 0;
        size_t bufspace = 0;
        struct in6_pktinfo *pktinfop;
        struct cmsghdr *cmsgp;
        uchar_t *cmsg_datap;
        struct iovec iov;
        size_t plen;

        vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s)", vr->vvr_conf.vvc_name);

        if ((plen = vrrpd_build_vrrp(vr, buf, len, zero_pri)) == 0)
                return (VRRP_ETOOSMALL);

        msg6.msg_control = NULL;
        msg6.msg_controllen = 0;

        hoplimit_space = sizeof (int);
        bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
            hoplimit_space + _MAX_ALIGNMENT;

        pktinfo_space = sizeof (struct in6_pktinfo);
        bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
            pktinfo_space + _MAX_ALIGNMENT;

        /*
         * We need to temporarily set the msg6.msg_controllen to bufspace
         * (we will later trim it to actual length used). This is needed because
         * CMSG_NXTHDR() uses it to check we have not exceeded the bounds.
         */
        bufspace += sizeof (struct cmsghdr);
        msg6.msg_controllen = bufspace;

        msg6.msg_control = (struct cmsghdr *)malloc(bufspace);
        if (msg6.msg_control == NULL) {
                vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): memory allocation "
                    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
                return (VRRP_ENOMEM);
        }

        cmsgp = CMSG_FIRSTHDR(&msg6);

        cmsgp->cmsg_level = IPPROTO_IPV6;
        cmsgp->cmsg_type = IPV6_HOPLIMIT;
        cmsg_datap = CMSG_DATA(cmsgp);
        /* LINTED */
        *(int *)cmsg_datap = VRRP_IP_TTL;
        cmsgp->cmsg_len = cmsg_datap + hoplimit_space - (uchar_t *)cmsgp;
        cmsgp = CMSG_NXTHDR(&msg6, cmsgp);

        cmsgp->cmsg_level = IPPROTO_IPV6;
        cmsgp->cmsg_type = IPV6_PKTINFO;
        cmsg_datap = CMSG_DATA(cmsgp);

        /* LINTED */
        pktinfop = (struct in6_pktinfo *)cmsg_datap;
        /*
         * We don't know if pktinfop->ipi6_addr is aligned properly,
         * therefore let's use bcopy, instead of assignment.
         */
        (void) bcopy(&vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
            &pktinfop->ipi6_addr, sizeof (struct in6_addr));

        /*
         *  We can assume pktinfop->ipi6_ifindex is 32 bit aligned.
         */
        pktinfop->ipi6_ifindex = vr->vvr_vif->vvi_ifindex;
        cmsgp->cmsg_len = cmsg_datap + pktinfo_space - (uchar_t *)cmsgp;
        cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
        msg6.msg_controllen = (char *)cmsgp - (char *)msg6.msg_control;

        msg6.msg_name = &vrrp_muladdr6;
        msg6.msg_namelen = sizeof (struct sockaddr_in6);

        iov.iov_base = buf;
        iov.iov_len = plen;
        msg6.msg_iov = &iov;
        msg6.msg_iovlen = 1;

        if ((len = sendmsg(vr->vvr_vif->vvi_sockfd,
            (const struct msghdr *)&msg6, 0)) != plen) {
                vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): sendmsg() failed: "
                    "%s expect %d sent %d", vr->vvr_conf.vvc_name,
                    strerror(errno), plen, len);
                (void) free(msg6.msg_control);
                return (VRRP_ESYS);
        }

        vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s) succeed",
            vr->vvr_conf.vvc_name);
        (void) free(msg6.msg_control);
        return (VRRP_SUCCESS);
}

/*
 * Send the VRRP advertisement packets.
 */
static vrrp_err_t
vrrpd_send_adv(vrrp_vr_t *vr, boolean_t zero_pri)
{
        uint64_t buf[(IP_MAXPACKET + 1)/8];

        vrrp_log(VRRP_DBG1, "vrrpd_send_adv(%s, %s_priority)",
            vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non_zero");

        assert(vr->vvr_pif->vvi_pip != NULL);

        if (vr->vvr_pif->vvi_pip == NULL) {
                vrrp_log(VRRP_DBG0, "vrrpd_send_adv(%s): no primary IP "
                    "address", vr->vvr_conf.vvc_name);
                return (VRRP_EINVAL);
        }

        if (vr->vvr_conf.vvc_af == AF_INET) {
                return (vrrpd_send_adv_v4(vr, (uchar_t *)buf,
                    sizeof (buf), zero_pri));
        } else {
                return (vrrpd_send_adv_v6(vr, (uchar_t *)buf,
                    sizeof (buf), zero_pri));
        }
}

static void
vrrpd_process_adv(vrrp_vr_t *vr, vrrp_addr_t *from, vrrp_pkt_t *vp)
{
        vrrp_vr_conf_t *conf = &vr->vvr_conf;
        char            peer[INET6_ADDRSTRLEN];
        char            local[INET6_ADDRSTRLEN];
        int             addr_cmp;
        uint16_t        peer_adver_int;

        /* LINTED E_CONSTANT_CONDITION */
        VRRPADDR2STR(vr->vvr_conf.vvc_af, from, peer, INET6_ADDRSTRLEN,
            _B_FALSE);
        vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s) from %s", conf->vvc_name,
            peer);

        if (vr->vvr_state <= VRRP_STATE_INIT) {
                vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): state: %s, not "
                    "ready", conf->vvc_name, vrrp_state2str(vr->vvr_state));
                return;
        }

        peer_adver_int = CENTISEC2MSEC(ntohs(vp->vp_rsvd_adver_int) & 0x0fff);

        /* LINTED E_CONSTANT_CONDITION */
        VRRPADDR2STR(vr->vvr_pif->vvi_af, &vr->vvr_pif->vvi_pip->vip_addr,
            local, INET6_ADDRSTRLEN, _B_FALSE);
        vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local/state/pri"
            "(%s/%s/%d) peer/pri/intv(%s/%d/%d)", conf->vvc_name, local,
            vrrp_state2str(vr->vvr_state), conf->vvc_pri, peer,
            vp->vp_prio, peer_adver_int);

        addr_cmp = ipaddr_cmp(vr->vvr_pif->vvi_af, from,
            &vr->vvr_pif->vvi_pip->vip_addr);
        if (addr_cmp == 0) {
                vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local message",
                    conf->vvc_name);
                return;
        } else if (conf->vvc_pri == vp->vp_prio) {
                vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): peer IP %s is %s"
                    " than the local IP %s", conf->vvc_name, peer,
                    addr_cmp > 0 ? "greater" : "less", local);
        }

        if (conf->vvc_pri == 255) {
                vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): virtual address "
                    "owner received advertisement from %s", conf->vvc_name,
                    peer);
                return;
        }

        (void) gettimeofday(&vr->vvr_peer_time, NULL);
        (void) memcpy(&vr->vvr_peer_addr, from, sizeof (vrrp_addr_t));
        vr->vvr_peer_prio = vp->vp_prio;
        vr->vvr_peer_adver_int = peer_adver_int;

        if (vr->vvr_state == VRRP_STATE_BACKUP) {
                vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
                if ((vp->vp_prio == VRRP_PRIO_ZERO) ||
                    (conf->vvc_preempt == _B_FALSE ||
                    vp->vp_prio >= conf->vvc_pri)) {
                        (void) iu_cancel_timer(vrrpd_timerq,
                            vr->vvr_timer_id, NULL);
                        if (vp->vp_prio == VRRP_PRIO_ZERO) {
                                /* the master stops participating in VRRP */
                                vr->vvr_timeout = SKEW_TIME_VR(vr);
                        } else {
                                vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
                        }
                        if ((vr->vvr_timer_id = iu_schedule_timer_ms(
                            vrrpd_timerq, vr->vvr_timeout, vrrp_b2m_timeout,
                            vr)) == -1) {
                                vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
                                    "start vrrp_b2m_timeout(%d) failed",
                                    conf->vvc_name, vr->vvr_timeout);
                        } else {
                                vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
                                    "start vrrp_b2m_timeout(%d)",
                                    conf->vvc_name, vr->vvr_timeout);
                        }
                }
        } else if (vr->vvr_state == VRRP_STATE_MASTER) {
                if (vp->vp_prio == VRRP_PRIO_ZERO) {
                        (void) vrrpd_send_adv(vr, _B_FALSE);
                        (void) iu_cancel_timer(vrrpd_timerq,
                            vr->vvr_timer_id, NULL);
                        if ((vr->vvr_timer_id = iu_schedule_timer_ms(
                            vrrpd_timerq, vr->vvr_timeout, vrrp_adv_timeout,
                            vr)) == -1) {
                                vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
                                    "start vrrp_adv_timeout(%d) failed",
                                    conf->vvc_name, vr->vvr_timeout);
                        } else {
                                vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
                                    "start vrrp_adv_timeout(%d)",
                                    conf->vvc_name, vr->vvr_timeout);
                        }
                } else if (vp->vp_prio > conf->vvc_pri ||
                    (vp->vp_prio == conf->vvc_pri && addr_cmp > 0)) {
                        (void) vrrpd_state_m2b(vr);
                }
        } else {
                assert(_B_FALSE);
        }
}

static vrrp_err_t
vrrpd_process_vrrp(vrrp_intf_t *pif, vrrp_pkt_t *vp, size_t len,
    vrrp_addr_t *from)
{
        vrrp_vr_t       *vr;
        uint8_t         vers_type;
        uint16_t        saved_cksum, cksum;
        char            peer[INET6_ADDRSTRLEN];

        /* LINTED E_CONSTANT_CONDITION */
        VRRPADDR2STR(pif->vvi_af, from, peer, INET6_ADDRSTRLEN, _B_FALSE);
        vrrp_log(VRRP_DBG0, "vrrpd_process_vrrp(%s) from %s", pif->vvi_ifname,
            peer);

        if (len < sizeof (vrrp_pkt_t)) {
                vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid message "
                    "length %d", len);
                return (VRRP_EINVAL);
        }

        /*
         * Verify: VRRP version number and packet type.
         */
        vers_type = ((vp->vp_vers_type & VRRP_VER_MASK) >> 4);
        if (vers_type != VRRP_VERSION) {
                vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) unsupported "
                    "version %d", pif->vvi_ifname, vers_type);
                return (VRRP_EINVAL);
        }

        if (vp->vp_ipnum == 0) {
                vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): zero IPvX count",
                    pif->vvi_ifname);
                return (VRRP_EINVAL);
        }

        if (len - sizeof (vrrp_pkt_t) !=
            vp->vp_ipnum * (pif->vvi_af == AF_INET ? sizeof (struct in_addr) :
            sizeof (struct in6_addr))) {
                vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid IPvX count"
                    " %d", pif->vvi_ifname, vp->vp_ipnum);
                return (VRRP_EINVAL);
        }

        vers_type = (vp->vp_vers_type & VRRP_TYPE_MASK);

        /*
         * verify: VRRP checksum. Note that vrrp_cksum returns network byte
         * order checksum value;
         */
        saved_cksum = vp->vp_chksum;
        vp->vp_chksum = 0;
        if (pif->vvi_af == AF_INET) {
                cksum = vrrp_cksum4(&from->in4.sin_addr,
                    &vrrp_muladdr4.in4.sin_addr, len, vp);
        } else {
                cksum = vrrp_cksum6(&from->in6.sin6_addr,
                    &vrrp_muladdr6.in6.sin6_addr, len, vp);
        }

        if (cksum != saved_cksum) {
                vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) invalid "
                    "checksum: expected/real(0x%x/0x%x)", pif->vvi_ifname,
                    cksum, saved_cksum);
                return (VRRP_EINVAL);
        }

        if ((vr = vrrpd_lookup_vr_by_vrid(pif->vvi_ifname, vp->vp_vrid,
            pif->vvi_af)) != NULL && vers_type == VRRP_PKT_ADVERT) {
                vrrpd_process_adv(vr, from, vp);
        } else {
                vrrp_log(VRRP_DBG1, "vrrpd_process_vrrp(%s) VRID(%d/%s) "
                    "not configured", pif->vvi_ifname, vp->vp_vrid,
                    af_str(pif->vvi_af));
        }
        return (VRRP_SUCCESS);
}

/*
 * IPv4 socket, the IPv4 header is included.
 */
static vrrp_err_t
vrrpd_process_adv_v4(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
{
        char            abuf[INET6_ADDRSTRLEN];
        struct ip       *ip;

        vrrp_log(VRRP_DBG0, "vrrpd_process_adv_v4(%s, %d)",
            pif->vvi_ifname, len);

        ip = (struct ip *)msgp->msg_iov->iov_base;

        /* Sanity check */
        if (len < sizeof (struct ip) || len < ntohs(ip->ip_len)) {
                vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid length "
                    "%d", pif->vvi_ifname, len);
                return (VRRP_EINVAL);
        }

        assert(ip->ip_v == IPV4_VERSION);
        assert(ip->ip_p == IPPROTO_VRRP);
        assert(msgp->msg_namelen == sizeof (struct sockaddr_in));

        if (vrrp_muladdr4.in4.sin_addr.s_addr != ip->ip_dst.s_addr) {
                vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
                    "destination %s", pif->vvi_ifname,
                    inet_ntop(pif->vvi_af, &(ip->ip_dst), abuf, sizeof (abuf)));
                return (VRRP_EINVAL);
        }

        if (ip->ip_ttl != VRRP_IP_TTL) {
                vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
                    "ttl %d", pif->vvi_ifname, ip->ip_ttl);
                return (VRRP_EINVAL);
        }

        /*
         * Note that the ip_len contains only the IP payload length.
         */
        return (vrrpd_process_vrrp(pif,
            /* LINTED E_BAD_PTR_CAST_ALIGN */
            (vrrp_pkt_t *)((char *)ip + ip->ip_hl * 4), ntohs(ip->ip_len),
            (vrrp_addr_t *)msgp->msg_name));
}

/*
 * IPv6 socket, check the ancillary_data.
 */
static vrrp_err_t
vrrpd_process_adv_v6(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
{
        struct cmsghdr          *cmsgp;
        uchar_t                 *cmsg_datap;
        struct in6_pktinfo      *pktinfop;
        char                    abuf[INET6_ADDRSTRLEN];
        int                     ttl;

        vrrp_log(VRRP_DBG1, "vrrpd_process_adv_v6(%s, %d)",
            pif->vvi_ifname, len);

        /* Sanity check */
        if (len < sizeof (vrrp_pkt_t)) {
                vrrp_log(VRRP_ERR, "vrrpd_process_adv_v6(%s): invalid length "
                    "%d", pif->vvi_ifname, len);
                return (VRRP_EINVAL);
        }

        assert(msgp->msg_namelen == sizeof (struct sockaddr_in6));

        for (cmsgp = CMSG_FIRSTHDR(msgp); cmsgp != NULL;
            cmsgp = CMSG_NXTHDR(msgp, cmsgp)) {
                assert(cmsgp->cmsg_level == IPPROTO_IPV6);
                cmsg_datap = CMSG_DATA(cmsgp);

                switch (cmsgp->cmsg_type) {
                case IPV6_HOPLIMIT:
                        /* LINTED E_BAD_PTR_CAST_ALIGN */
                        if ((ttl = *(int *)cmsg_datap) == VRRP_IP_TTL)
                                break;

                        vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
                            "ttl %d", pif->vvi_ifname, ttl);
                        return (VRRP_EINVAL);
                case IPV6_PKTINFO:
                        /* LINTED E_BAD_PTR_CAST_ALIGN */
                        pktinfop = (struct in6_pktinfo *)cmsg_datap;
                        if (IN6_ARE_ADDR_EQUAL(&pktinfop->ipi6_addr,
                            &vrrp_muladdr6.in6.sin6_addr)) {
                                break;
                        }

                        vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
                            "destination %s", pif->vvi_ifname,
                            inet_ntop(pif->vvi_af, &pktinfop->ipi6_addr, abuf,
                            sizeof (abuf)));
                        return (VRRP_EINVAL);
                }
        }

        return (vrrpd_process_vrrp(pif, msgp->msg_iov->iov_base, len,
            msgp->msg_name));
}

/* ARGSUSED */
static void
vrrpd_sock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
    void *arg)
{
        struct msghdr           msg;
        vrrp_addr_t             from;
        uint64_t                buf[(IP_MAXPACKET + 1)/8];
        uint64_t                ancillary_data[(IP_MAXPACKET + 1)/8];
        vrrp_intf_t             *pif = arg;
        int                     af = pif->vvi_af;
        int                     len;
        struct iovec            iov;

        vrrp_log(VRRP_DBG1, "vrrpd_sock_handler(%s)", pif->vvi_ifname);

        msg.msg_name = (struct sockaddr *)&from;
        msg.msg_namelen = (af == AF_INET) ? sizeof (struct sockaddr_in) :
            sizeof (struct sockaddr_in6);
        iov.iov_base = (char *)buf;
        iov.iov_len = sizeof (buf);
        msg.msg_iov = &iov;
        msg.msg_iovlen = 1;
        msg.msg_control = ancillary_data;
        msg.msg_controllen = sizeof (ancillary_data);

        if ((len = recvmsg(s, &msg, 0)) == -1) {
                vrrp_log(VRRP_ERR, "vrrpd_sock_handler() recvmsg(%s) "
                    "failed: %s", pif->vvi_ifname, strerror(errno));
                return;
        }

        /*
         * Ignore packets whose control buffers that don't fit
         */
        if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
                vrrp_log(VRRP_ERR, "vrrpd_sock_handler() %s buffer not "
                    "big enough", pif->vvi_ifname);
                return;
        }

        if (af == AF_INET)
                (void) vrrpd_process_adv_v4(pif, &msg, len);
        else
                (void) vrrpd_process_adv_v6(pif, &msg, len);
}

/*
 * Create the socket which is used to receive VRRP packets. Virtual routers
 * that configured on the same physical interface share the same socket.
 */
static vrrp_err_t
vrrpd_init_rxsock(vrrp_vr_t *vr)
{
        vrrp_intf_t *pif;       /* Physical interface used to recv packets */
        struct group_req greq;
        struct sockaddr_storage *muladdr;
        int af, proto;
        int on = 1;
        vrrp_err_t err = VRRP_SUCCESS;

        vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s)", vr->vvr_conf.vvc_name);

        /*
         * The RX sockets may already been initialized.
         */
        if ((pif = vr->vvr_pif) != NULL) {
                vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) already done on %s",
                    vr->vvr_conf.vvc_name, pif->vvi_ifname);
                assert(pif->vvi_sockfd != -1);
                return (VRRP_SUCCESS);
        }

        /*
         * If no IP addresses configured on the primary interface,
         * return failure.
         */
        af = vr->vvr_conf.vvc_af;
        pif = vrrpd_lookup_if(vr->vvr_conf.vvc_link, af);
        if (pif == NULL) {
                vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): no IP address "
                    "over %s/%s", vr->vvr_conf.vvc_name,
                    vr->vvr_conf.vvc_link, af_str(af));
                return (VRRP_ENOPRIM);
        }

        proto = (af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6);
        if (pif->vvi_nvr++ == 0) {
                assert(pif->vvi_sockfd < 0);
                pif->vvi_sockfd = socket(af, SOCK_RAW, IPPROTO_VRRP);
                if (pif->vvi_sockfd < 0) {
                        vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): socket() "
                            "failed %s", vr->vvr_conf.vvc_name,
                            strerror(errno));
                        err = VRRP_ESYS;
                        goto done;
                }

                /*
                 * Join the multicast group to receive VRRP packets.
                 */
                if (af == AF_INET) {
                        muladdr = (struct sockaddr_storage *)
                            (void *)&vrrp_muladdr4;
                } else {
                        muladdr = (struct sockaddr_storage *)
                            (void *)&vrrp_muladdr6;
                }

                greq.gr_interface = pif->vvi_ifindex;
                (void) memcpy(&greq.gr_group, muladdr,
                    sizeof (struct sockaddr_storage));
                if (setsockopt(pif->vvi_sockfd, proto, MCAST_JOIN_GROUP, &greq,
                    sizeof (struct group_req)) < 0) {
                        vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
                            "join_group(%d) failed: %s", vr->vvr_conf.vvc_name,
                            pif->vvi_ifindex, strerror(errno));
                        err = VRRP_ESYS;
                        goto done;
                } else {
                        vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): "
                            "join_group(%d) succeeded", vr->vvr_conf.vvc_name,
                            pif->vvi_ifindex);
                }

                /*
                 * Unlike IPv4, the IPv6 raw socket does not pass the IP header
                 * when a packet is received. Call setsockopt() to receive such
                 * information.
                 */
                if (af == AF_INET6) {
                        /*
                         * Enable receipt of destination address info
                         */
                        if (setsockopt(pif->vvi_sockfd, proto, IPV6_RECVPKTINFO,
                            (char *)&on, sizeof (on)) < 0) {
                                vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
                                    "enable recvpktinfo failed: %s",
                                    vr->vvr_conf.vvc_name, strerror(errno));
                                err = VRRP_ESYS;
                                goto done;
                        }

                        /*
                         * Enable receipt of hoplimit info
                         */
                        if (setsockopt(pif->vvi_sockfd, proto,
                            IPV6_RECVHOPLIMIT, (char *)&on, sizeof (on)) < 0) {
                                vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
                                    "enable recvhoplimit failed: %s",
                                    vr->vvr_conf.vvc_name, strerror(errno));
                                err = VRRP_ESYS;
                                goto done;
                        }
                }

                if ((pif->vvi_eid = iu_register_event(vrrpd_eh,
                    pif->vvi_sockfd, POLLIN, vrrpd_sock_handler, pif)) == -1) {
                        vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
                            "iu_register_event() failed",
                            vr->vvr_conf.vvc_name);
                        err = VRRP_ESYS;
                        goto done;
                }
        } else {
                vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) over %s already "
                    "done %d", vr->vvr_conf.vvc_name, pif->vvi_ifname,
                    pif->vvi_nvr);
                assert(IS_PRIMARY_INTF(pif));
        }

done:
        vr->vvr_pif = pif;
        if (err != VRRP_SUCCESS)
                vrrpd_fini_rxsock(vr);

        return (err);
}

/*
 * Delete the socket which is used to receive VRRP packets for the given
 * VRRP router. Since all virtual routers that configured on the same
 * physical interface share the same socket, the socket is only closed
 * when the last VRRP router share this socket is deleted.
 */
static void
vrrpd_fini_rxsock(vrrp_vr_t *vr)
{
        vrrp_intf_t     *pif = vr->vvr_pif;

        vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s)", vr->vvr_conf.vvc_name);

        if (pif == NULL)
                return;

        if (--pif->vvi_nvr == 0) {
                vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s",
                    vr->vvr_conf.vvc_name, pif->vvi_ifname);
                (void) iu_unregister_event(vrrpd_eh, pif->vvi_eid, NULL);
                (void) close(pif->vvi_sockfd);
                pif->vvi_pip = NULL;
                pif->vvi_sockfd = -1;
                pif->vvi_eid = -1;
        } else {
                vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s %d",
                    vr->vvr_conf.vvc_name, pif->vvi_ifname, pif->vvi_nvr);
        }
        vr->vvr_pif = NULL;
}

/*
 * Create the socket which is used to send VRRP packets. Further, set
 * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
 */
static vrrp_err_t
vrrpd_init_txsock(vrrp_vr_t *vr)
{
        int             af;
        vrrp_intf_t     *vif;
        vrrp_err_t      err;

        vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s)", vr->vvr_conf.vvc_name);

        if (vr->vvr_vif != NULL) {
                vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) already done on %s",
                    vr->vvr_conf.vvc_name, vr->vvr_vif->vvi_ifname);
                return (VRRP_SUCCESS);
        }

        af = vr->vvr_conf.vvc_af;
        if ((vif = vrrpd_lookup_if(vr->vvr_vnic, af)) == NULL) {
                vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) no IP address over "
                    "%s/%s", vr->vvr_conf.vvc_name, vr->vvr_vnic, af_str(af));
                return (VRRP_ENOVIRT);
        }

        vr->vvr_vif = vif;
        if (vr->vvr_conf.vvc_af == AF_INET)
                err = vrrpd_init_txsock_v4(vr);
        else
                err = vrrpd_init_txsock_v6(vr);

        if (err != VRRP_SUCCESS)
                goto done;

        /*
         * The interface should start with IFF_NOACCEPT flag not set, only
         * call this function when the VRRP router requires IFF_NOACCEPT.
         */
        if (!vr->vvr_conf.vvc_accept)
                err = vrrpd_set_noaccept(vr, _B_TRUE);

done:
        if (err != VRRP_SUCCESS) {
                (void) close(vif->vvi_sockfd);
                vif->vvi_sockfd = -1;
                vr->vvr_vif = NULL;
        }

        return (err);
}

/*
 * Create the IPv4 socket which is used to send VRRP packets. Note that
 * the destination MAC address of VRRP advertisement must be the virtual
 * MAC address, so we specify the output interface to be the specific VNIC.
 */
static vrrp_err_t
vrrpd_init_txsock_v4(vrrp_vr_t *vr)
{
        vrrp_intf_t *vif;       /* VNIC interface used to send packets */
        vrrp_ip_t *vip;         /* The first IP over the VNIC */
        int on = 1;
        char off = 0;
        vrrp_err_t err = VRRP_SUCCESS;
        char abuf[INET6_ADDRSTRLEN];

        vif = vr->vvr_vif;
        assert(vr->vvr_conf.vvc_af == AF_INET);
        assert(vif != NULL);

        vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) over %s",
            vr->vvr_conf.vvc_name, vif->vvi_ifname);

        if (vif->vvi_sockfd != -1) {
                vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) already done "
                    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
                return (VRRP_SUCCESS);
        }

        vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
        if (vif->vvi_sockfd < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): socket() "
                    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
                err = VRRP_ESYS;
                goto done;
        }

        /*
         * Include the IP header, so that we can specify the IP address/ttl.
         */
        if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_HDRINCL, (char *)&on,
            sizeof (on)) < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): ip_hdrincl "
                    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
                err = VRRP_ESYS;
                goto done;
        }

        /*
         * Disable multicast loopback.
         */
        if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_LOOP, &off,
            sizeof (char)) == -1) {
                vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): disable "
                    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
                    strerror(errno));
                err = VRRP_ESYS;
                goto done;
        }

        vip = TAILQ_FIRST(&vif->vvi_iplist);
        /* LINTED E_CONSTANT_CONDITION */
        VRRPADDR2STR(vif->vvi_af, &vip->vip_addr, abuf, INET6_ADDRSTRLEN,
            _B_FALSE);

        /*
         * Set the output interface to send the VRRP packet.
         */
        if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_IF,
            &vip->vip_addr.in4.sin_addr, sizeof (struct in_addr)) < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
                    "failed: %s", vr->vvr_conf.vvc_name, abuf, strerror(errno));
                err = VRRP_ESYS;
        } else {
                vrrp_log(VRRP_DBG0, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
                    "succeed", vr->vvr_conf.vvc_name, abuf);
        }

done:
        if (err != VRRP_SUCCESS) {
                (void) close(vif->vvi_sockfd);
                vif->vvi_sockfd = -1;
        }

        return (err);
}

/*
 * Create the IPv6 socket which is used to send VRRP packets. Note that
 * the destination must be the virtual MAC address, so we specify the output
 * interface to be the specific VNIC.
 */
static vrrp_err_t
vrrpd_init_txsock_v6(vrrp_vr_t *vr)
{
        vrrp_intf_t *vif;       /* VNIC interface used to send packets */
        int off = 0, ttl = VRRP_IP_TTL;
        vrrp_err_t err = VRRP_SUCCESS;

        vif = vr->vvr_vif;
        assert(vr->vvr_conf.vvc_af == AF_INET6);
        assert(vif != NULL);

        vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) over %s",
            vr->vvr_conf.vvc_name, vif->vvi_ifname);

        if (vif->vvi_sockfd != -1) {
                vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) already done "
                    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
                return (VRRP_SUCCESS);
        }

        vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
        if (vif->vvi_sockfd < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): socket() "
                    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
                err = VRRP_ESYS;
                goto done;
        }

        /*
         * Disable multicast loopback.
         */
        if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
            &off, sizeof (int)) == -1) {
                vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): disable "
                    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
                    strerror(errno));
                err = VRRP_ESYS;
                goto done;
        }

        /*
         * Set the multicast TTL.
         */
        if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
            &ttl, sizeof (int)) == -1) {
                vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): enable "
                    "multicast_hops %d failed: %s", vr->vvr_conf.vvc_name,
                    ttl, strerror(errno));
                err = VRRP_ESYS;
                goto done;
        }

        /*
         * Set the output interface to send the VRRP packet.
         */
        if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_IF,
            &vif->vvi_ifindex, sizeof (uint32_t)) < 0) {
                vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): multicast_if(%d) "
                    "failed: %s", vr->vvr_conf.vvc_name, vif->vvi_ifindex,
                    strerror(errno));
                err = VRRP_ESYS;
        } else {
                vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s): multicast_if(%d)"
                    " succeed", vr->vvr_conf.vvc_name, vif->vvi_ifindex);
        }

done:
        if (err != VRRP_SUCCESS) {
                (void) close(vif->vvi_sockfd);
                vif->vvi_sockfd = -1;
        }

        return (err);
}

/*
 * Delete the socket which is used to send VRRP packets. Further, clear
 * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
 */
static void
vrrpd_fini_txsock(vrrp_vr_t *vr)
{
        vrrp_intf_t *vif = vr->vvr_vif;

        vrrp_log(VRRP_DBG1, "vrrpd_fini_txsock(%s)", vr->vvr_conf.vvc_name);

        if (vif != NULL) {
                if (!vr->vvr_conf.vvc_accept)
                        (void) vrrpd_set_noaccept(vr, _B_FALSE);
                (void) close(vif->vvi_sockfd);
                vif->vvi_sockfd = -1;
                vr->vvr_vif = NULL;
        }
}

/*
 * Given the the pseudo header cksum value (sum), caculate the cksum with
 * the rest of VRRP packet.
 */
static uint16_t
in_cksum(int sum, uint16_t plen, void *p)
{
        int nleft;
        uint16_t *w;
        uint16_t answer;
        uint16_t odd_byte = 0;

        nleft = plen;
        w = (uint16_t *)p;
        while (nleft > 1) {
                sum += *w++;
                nleft -= 2;
        }

        /* mop up an odd byte, if necessary */
        if (nleft == 1) {
                *(uchar_t *)(&odd_byte) = *(uchar_t *)w;
                sum += odd_byte;
        }

        /*
         * add back carry outs from top 16 bits to low 16 bits
         */
        sum = (sum >> 16) + (sum & 0xffff);     /* add hi 16 to low 16 */
        sum += (sum >> 16);                     /* add carry */
        answer = ~sum;                          /* truncate to 16 bits */
        return (answer == 0 ? ~0 : answer);
}

/* Pseudo header for v4 */
struct pshv4 {
        struct in_addr  ph4_src;
        struct in_addr  ph4_dst;
        uint8_t         ph4_zero;       /* always zero */
        uint8_t         ph4_protocol;   /* protocol used, IPPROTO_VRRP */
        uint16_t        ph4_len;        /* VRRP payload len */
};

/*
 * Checksum routine for VRRP checksum. Note that plen is the upper-layer
 * packet length (in the host byte order), and both IP source and destination
 * addresses are in the network byte order.
 */
static uint16_t
vrrp_cksum4(struct in_addr *src, struct in_addr *dst, uint16_t plen,
    vrrp_pkt_t *vp)
{
        struct pshv4 ph4;
        int nleft;
        uint16_t *w;
        int sum = 0;

        ph4.ph4_src = *src;
        ph4.ph4_dst = *dst;
        ph4.ph4_zero = 0;
        ph4.ph4_protocol = IPPROTO_VRRP;
        ph4.ph4_len = htons(plen);

        /*
         *  Our algorithm is simple, using a 32 bit accumulator (sum),
         *  we add sequential 16 bit words to it, and at the end, fold
         *  back all the carry bits from the top 16 bits into the lower
         *  16 bits.
         */
        nleft = sizeof (struct pshv4);
        w = (uint16_t *)&ph4;
        while (nleft > 0) {
                sum += *w++;
                nleft -= 2;
        }

        return (in_cksum(sum, plen, vp));
}

/* Pseudo header for v6 */
struct pshv6 {
        struct in6_addr ph6_src;
        struct in6_addr ph6_dst;
        uint32_t        ph6_len;        /* VRRP payload len */
        uint32_t        ph6_zero : 24,
                        ph6_protocol : 8; /* protocol used, IPPROTO_VRRP */
};

/*
 * Checksum routine for VRRP checksum. Note that plen is the upper-layer
 * packet length (in the host byte order), and both IP source and destination
 * addresses are in the network byte order.
 */
static uint16_t
vrrp_cksum6(struct in6_addr *src, struct in6_addr *dst, uint16_t plen,
    vrrp_pkt_t *vp)
{
        struct pshv6 ph6;
        int nleft;
        uint16_t *w;
        int sum = 0;

        ph6.ph6_src = *src;
        ph6.ph6_dst = *dst;
        ph6.ph6_zero = 0;
        ph6.ph6_protocol = IPPROTO_VRRP;
        ph6.ph6_len = htonl((uint32_t)plen);

        /*
         *  Our algorithm is simple, using a 32 bit accumulator (sum),
         *  we add sequential 16 bit words to it, and at the end, fold
         *  back all the carry bits from the top 16 bits into the lower
         *  16 bits.
         */
        nleft = sizeof (struct pshv6);
        w = (uint16_t *)&ph6;
        while (nleft > 0) {
                sum += *w++;
                nleft -= 2;
        }

        return (in_cksum(sum, plen, vp));
}

vrrp_err_t
vrrpd_state_i2m(vrrp_vr_t *vr)
{
        vrrp_err_t      err;

        vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s)", vr->vvr_conf.vvc_name);

        vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_MASTER, vr);
        if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
                return (err);

        (void) vrrpd_send_adv(vr, _B_FALSE);

        vr->vvr_err = VRRP_SUCCESS;
        vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
        if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
            vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
                vrrp_log(VRRP_ERR, "vrrpd_state_i2m(): unable to start timer");
                return (VRRP_ESYS);
        } else {
                vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s): start "
                    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
                    vr->vvr_timeout);
        }
        return (VRRP_SUCCESS);
}

vrrp_err_t
vrrpd_state_i2b(vrrp_vr_t *vr)
{
        vrrp_err_t      err;

        vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s)", vr->vvr_conf.vvc_name);

        vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_BACKUP, vr);
        if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
                return (err);

        /*
         * Reinitialize the Master advertisement interval to be the configured
         * value.
         */
        vr->vvr_err = VRRP_SUCCESS;
        vr->vvr_master_adver_int = vr->vvr_conf.vvc_adver_int;
        vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
        if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
            vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
                vrrp_log(VRRP_ERR, "vrrpd_state_i2b(): unable to set timer");
                return (VRRP_ESYS);
        } else {
                vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s): start "
                    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
                    vr->vvr_timeout);
        }
        return (VRRP_SUCCESS);
}

void
vrrpd_state_m2i(vrrp_vr_t *vr)
{
        vrrp_log(VRRP_DBG1, "vrrpd_state_m2i(%s)", vr->vvr_conf.vvc_name);

        vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_INIT, vr);
        (void) vrrpd_virtualip_update(vr, _B_TRUE);
        bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
        (void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
}

void
vrrpd_state_b2i(vrrp_vr_t *vr)
{
        vrrp_log(VRRP_DBG1, "vrrpd_state_b2i(%s)", vr->vvr_conf.vvc_name);

        bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
        (void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
        vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_INIT, vr);
        (void) vrrpd_virtualip_update(vr, _B_TRUE);
}

/* ARGSUSED */
static void
vrrp_b2m_timeout(iu_tq_t *tq, void *arg)
{
        vrrp_vr_t *vr = (vrrp_vr_t *)arg;

        vrrp_log(VRRP_DBG1, "vrrp_b2m_timeout(%s)", vr->vvr_conf.vvc_name);
        (void) vrrpd_state_b2m(vr);
}

/* ARGSUSED */
static void
vrrp_adv_timeout(iu_tq_t *tq, void *arg)
{
        vrrp_vr_t *vr = (vrrp_vr_t *)arg;

        vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s)", vr->vvr_conf.vvc_name);

        (void) vrrpd_send_adv(vr, _B_FALSE);
        if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
            vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
                vrrp_log(VRRP_ERR, "vrrp_adv_timeout(%s): start timer failed",
                    vr->vvr_conf.vvc_name);
        } else {
                vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s): start "
                    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
                    vr->vvr_timeout);
        }
}

vrrp_err_t
vrrpd_state_b2m(vrrp_vr_t *vr)
{
        vrrp_err_t      err;

        vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s)", vr->vvr_conf.vvc_name);

        vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_MASTER, vr);
        if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
                return (err);
        (void) vrrpd_send_adv(vr, _B_FALSE);

        vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
        if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
            vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
                vrrp_log(VRRP_ERR, "vrrpd_state_b2m(%s): start timer failed",
                    vr->vvr_conf.vvc_name);
                return (VRRP_ESYS);
        } else {
                vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s): start "
                    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
                    vr->vvr_timeout);
        }
        return (VRRP_SUCCESS);
}

vrrp_err_t
vrrpd_state_m2b(vrrp_vr_t *vr)
{
        vrrp_err_t      err;

        vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s)", vr->vvr_conf.vvc_name);

        vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_BACKUP, vr);
        if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
                return (err);

        /*
         * Cancel the adver_timer.
         */
        vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
        (void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
        vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
        if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
            vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
                vrrp_log(VRRP_ERR, "vrrpd_state_m2b(%s): start timer failed",
                    vr->vvr_conf.vvc_name);
        } else {
                vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s) start "
                    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
                    vr->vvr_timeout);
        }
        return (VRRP_SUCCESS);
}

/*
 * Set the IFF_NOACCESS flag on the VNIC interface of the VRRP router
 * based on its access mode.
 */
static vrrp_err_t
vrrpd_set_noaccept(vrrp_vr_t *vr, boolean_t on)
{
        vrrp_intf_t *vif = vr->vvr_vif;
        uint64_t curr_flags;
        struct lifreq lifr;
        int s;

        vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
            vr->vvr_conf.vvc_name, on ? "on" : "off");

        /*
         * Possibly no virtual address exists on this VRRP router yet.
         */
        if (vif == NULL)
                return (VRRP_SUCCESS);

        vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
            vif->vvi_ifname, vrrp_state2str(vr->vvr_state));

        s = (vif->vvi_af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
        (void) strncpy(lifr.lifr_name, vif->vvi_ifname,
            sizeof (lifr.lifr_name));
        if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
                if (errno != ENXIO && errno != ENOENT) {
                        vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(): "
                            "SIOCGLIFFLAGS on %s failed: %s",
                            vif->vvi_ifname, strerror(errno));
                }
                return (VRRP_ESYS);
        }

        curr_flags = lifr.lifr_flags;
        if (on)
                lifr.lifr_flags |= IFF_NOACCEPT;
        else
                lifr.lifr_flags &= ~IFF_NOACCEPT;

        if (lifr.lifr_flags != curr_flags) {
                if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
                        if (errno != ENXIO && errno != ENOENT) {
                                vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(%s): "
                                    "SIOCSLIFFLAGS 0x%llx on %s failed: %s",
                                    on ? "no_accept" : "accept",
                                    lifr.lifr_flags, vif->vvi_ifname,
                                    strerror(errno));
                        }
                        return (VRRP_ESYS);
                }
        }
        return (VRRP_SUCCESS);
}

static vrrp_err_t
vrrpd_virtualip_updateone(vrrp_intf_t *vif, vrrp_ip_t *ip, boolean_t checkonly)
{
        vrrp_state_t    state = vif->vvi_vr_state;
        struct lifreq   lifr;
        char            abuf[INET6_ADDRSTRLEN];
        int             af = vif->vvi_af;
        uint64_t        curr_flags;
        int             s;

        assert(IS_VIRTUAL_INTF(vif));

        /* LINTED E_CONSTANT_CONDITION */
        VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
        vrrp_log(VRRP_DBG1, "vrrpd_virtualip_updateone(%s, %s%s)",
            vif->vvi_ifname, abuf, checkonly ? ", checkonly" : "");

        s = (af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
        (void) strncpy(lifr.lifr_name, ip->vip_lifname,
            sizeof (lifr.lifr_name));
        if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
                if (errno != ENXIO && errno != ENOENT) {
                        vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s): "
                            "SIOCGLIFFLAGS on %s/%s failed: %s",
                            vif->vvi_ifname, lifr.lifr_name, abuf,
                            strerror(errno));
                }
                return (VRRP_ESYS);
        }

        curr_flags = lifr.lifr_flags;
        if (state == VRRP_STATE_MASTER)
                lifr.lifr_flags |= IFF_UP;
        else
                lifr.lifr_flags &= ~IFF_UP;

        if (lifr.lifr_flags == curr_flags)
                return (VRRP_SUCCESS);

        if (checkonly) {
                vrrp_log(VRRP_ERR, "VRRP virtual IP %s/%s was brought %s",
                    ip->vip_lifname, abuf,
                    state == VRRP_STATE_MASTER ? "down" : "up");
                return (VRRP_ESYS);
        } else if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
                if (errno != ENXIO && errno != ENOENT) {
                        vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s, %s): "
                            "bring %s %s/%s failed: %s",
                            vif->vvi_ifname, vrrp_state2str(state),
                            state == VRRP_STATE_MASTER ? "up" : "down",
                            ip->vip_lifname, abuf, strerror(errno));
                }
                return (VRRP_ESYS);
        }
        return (VRRP_SUCCESS);
}

static vrrp_err_t
vrrpd_virtualip_update(vrrp_vr_t *vr, boolean_t checkonly)
{
        vrrp_state_t            state;
        vrrp_intf_t             *vif = vr->vvr_vif;
        vrrp_ip_t               *ip, *nextip;
        char                    abuf[INET6_ADDRSTRLEN];
        vrrp_err_t              err;

        vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update(%s, %s, %s)%s",
            vr->vvr_conf.vvc_name, vrrp_state2str(vr->vvr_state),
            vif->vvi_ifname, checkonly ? " checkonly" : "");

        state = vr->vvr_state;
        assert(vif != NULL);
        assert(IS_VIRTUAL_INTF(vif));
        assert(vif->vvi_vr_state != state);
        vif->vvi_vr_state = state;
        for (ip = TAILQ_FIRST(&vif->vvi_iplist); ip != NULL; ip = nextip) {
                nextip = TAILQ_NEXT(ip, vip_next);
                err = vrrpd_virtualip_updateone(vif, ip, _B_FALSE);
                if (!checkonly && err != VRRP_SUCCESS) {
                        /* LINTED E_CONSTANT_CONDITION */
                        VRRPADDR2STR(vif->vvi_af, &ip->vip_addr, abuf,
                            INET6_ADDRSTRLEN, _B_FALSE);
                        vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update() update "
                            "%s over %s failed", abuf, vif->vvi_ifname);
                        vrrpd_delete_ip(vif, ip);
                }
        }

        /*
         * The IP address is deleted when it is failed to be brought
         * up. If no IP addresses are left, delete this interface.
         */
        if (!checkonly && TAILQ_EMPTY(&vif->vvi_iplist)) {
                vrrp_log(VRRP_DBG0, "vrrpd_virtualip_update(): "
                    "no IP left over %s", vif->vvi_ifname);
                vrrpd_delete_if(vif, _B_TRUE);
                return (VRRP_ENOVIRT);
        }
        return (VRRP_SUCCESS);
}

void
vrrpd_state_trans(vrrp_state_t prev_s, vrrp_state_t s, vrrp_vr_t *vr)
{
        vrrp_log(VRRP_DBG1, "vrrpd_state_trans(%s): %s --> %s",
            vr->vvr_conf.vvc_name, vrrp_state2str(prev_s), vrrp_state2str(s));

        assert(vr->vvr_state == prev_s);
        vr->vvr_state = s;
        vr->vvr_prev_state = prev_s;
        (void) gettimeofday(&vr->vvr_st_time, NULL);
        (void) vrrpd_post_event(vr->vvr_conf.vvc_name, prev_s, s);
}

static int
vrrpd_post_event(const char *name, vrrp_state_t prev_st, vrrp_state_t st)
{
        sysevent_id_t   eid;
        nvlist_t        *nvl = NULL;

        /*
         * sysevent is not supported in the non-global zone
         */
        if (getzoneid() != GLOBAL_ZONEID)
                return (0);

        if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
                goto failed;

        if (nvlist_add_uint8(nvl, VRRP_EVENT_VERSION,
            VRRP_EVENT_CUR_VERSION) != 0)
                goto failed;

        if (nvlist_add_string(nvl, VRRP_EVENT_ROUTER_NAME, name) != 0)
                goto failed;

        if (nvlist_add_uint8(nvl, VRRP_EVENT_STATE, st) != 0)
                goto failed;

        if (nvlist_add_uint8(nvl, VRRP_EVENT_PREV_STATE, prev_st) != 0)
                goto failed;

        if (sysevent_post_event(EC_VRRP, ESC_VRRP_STATE_CHANGE,
            SUNW_VENDOR, VRRP_EVENT_PUBLISHER, nvl, &eid) == 0) {
                nvlist_free(nvl);
                return (0);
        }

failed:
        vrrp_log(VRRP_ERR, "vrrpd_post_event(): `state change (%s --> %s)' "
            "sysevent posting failed: %s", vrrp_state2str(prev_st),
            vrrp_state2str(st), strerror(errno));

        nvlist_free(nvl);
        return (-1);
}

/*
 * timeval processing functions
 */
static int
timeval_to_milli(struct timeval tv)
{
        return ((int)(tv.tv_sec * 1000 + tv.tv_usec / 1000 + 0.5));
}

static struct timeval
timeval_delta(struct timeval t1, struct timeval t2)
{
        struct timeval t;
        t.tv_sec = t1.tv_sec - t2.tv_sec;
        t.tv_usec = t1.tv_usec - t2.tv_usec;

        if (t.tv_usec < 0) {
                t.tv_usec += 1000000;
                t.tv_sec--;
        }
        return (t);
}

/*
 * print error messages to the terminal or to syslog
 */
static void
vrrp_log(int level, char *message, ...)
{
        va_list ap;
        int log_level = -1;

        va_start(ap, message);

        if (vrrp_logflag == 0) {
                if (level <= vrrp_debug_level) {
                        /*
                         * VRRP_ERR goes to stderr, others go to stdout
                         */
                        FILE *out = (level <= VRRP_ERR) ? stderr : stdout;
                        (void) fprintf(out, "vrrpd: ");
                        /* LINTED: E_SEC_PRINTF_VAR_FMT */
                        (void) vfprintf(out, message, ap);
                        (void) fprintf(out, "\n");
                        (void) fflush(out);
                }
                va_end(ap);
                return;
        }

        /*
         * translate VRRP_* to LOG_*
         */
        switch (level) {
        case VRRP_ERR:
                log_level = LOG_ERR;
                break;
        case VRRP_WARNING:
                log_level = LOG_WARNING;
                break;
        case VRRP_NOTICE:
                log_level = LOG_NOTICE;
                break;
        case VRRP_DBG0:
                log_level = LOG_INFO;
                break;
        default:
                log_level = LOG_DEBUG;
                break;
        }

        /* LINTED: E_SEC_PRINTF_VAR_FMT */
        (void) vsyslog(log_level, message, ap);
        va_end(ap);
}