#include "mpd_defs.h"
#include "mpd_tables.h"
int debug = 0;
static int pollfd_num = 0;
static struct pollfd *pollfds = NULL;
int user_failure_detection_time;
int user_probe_interval;
typedef struct mib_item_s {
struct mib_item_s *mi_next;
struct opthdr mi_opthdr;
void *mi_valp;
} mib_item_t;
static int rtsock_v4;
static int rtsock_v6;
int ifsock_v4 = -1;
int ifsock_v6 = -1;
static int lsock_v4;
static int lsock_v6;
static int mibfd = -1;
static boolean_t force_mcast = _B_FALSE;
static uint_t last_initifs_time;
static char **argv0;
boolean_t handle_link_notifications = _B_TRUE;
static int ipRouteEntrySize;
static int ipv6RouteEntrySize;
static void initlog(void);
static void run_timeouts(void);
static void initifs(void);
static void check_if_removed(struct phyint_instance *pii);
static void select_test_ifs(void);
static void update_router_list(mib_item_t *item);
static void mib_get_constants(mib_item_t *item);
static int mibwalk(void (*proc)(mib_item_t *));
static void ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len);
static void ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len);
static void router_add_common(int af, char *ifname,
struct in6_addr nexthop);
static void init_router_targets();
static void cleanup(void);
static int setup_listener(int af);
static void check_config(void);
static void check_testconfig(void);
static void check_addr_unique(struct phyint_instance *,
struct sockaddr_storage *);
static void init_host_targets(void);
static void dup_host_targets(struct phyint_instance *desired_pii);
static void loopback_cmd(int sock, int family);
static boolean_t daemonize(void);
static int closefunc(void *, int);
static unsigned int process_cmd(int newfd, union mi_commands *mpi);
static unsigned int process_query(int fd, mi_query_t *miq);
static unsigned int send_addrinfo(int fd, ipmp_addrinfo_t *adinfop);
static unsigned int send_groupinfo(int fd, ipmp_groupinfo_t *grinfop);
static unsigned int send_grouplist(int fd, ipmp_grouplist_t *grlistp);
static unsigned int send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop);
static unsigned int send_result(int fd, unsigned int error, int syserror);
addrlist_t *localaddrs;
uint_t
getcurrenttime(void)
{
uint_t cur_time;
cur_time = (uint_t)(gethrtime() / 1000000LL);
return (cur_time);
}
uint64_t
getcurrentsec(void)
{
return (gethrtime() / NANOSEC);
}
int
poll_add(int fd)
{
int i;
int new_num;
struct pollfd *newfds;
retry:
for (i = 0; i < pollfd_num; i++) {
if (pollfds[i].fd == fd)
return (0);
}
for (i = 0; i < pollfd_num; i++) {
if (pollfds[i].fd == -1) {
pollfds[i].fd = fd;
return (0);
}
}
new_num = pollfd_num + 32;
newfds = realloc(pollfds, new_num * sizeof (struct pollfd));
if (newfds == NULL) {
logperror("poll_add: realloc");
return (-1);
}
for (i = pollfd_num; i < new_num; i++) {
newfds[i].fd = -1;
newfds[i].events = POLLIN;
}
pollfd_num = new_num;
pollfds = newfds;
goto retry;
}
int
poll_remove(int fd)
{
int i;
for (i = 0; i < pollfd_num; i++) {
if (pollfds[i].fd == fd) {
pollfds[i].fd = -1;
return (0);
}
}
return (-1);
}
static boolean_t
pii_process(int af, char *name, struct phyint_instance **pii_p)
{
int err;
struct phyint_instance *pii;
struct phyint_instance *pii_other;
if (debug & D_PHYINT)
logdebug("pii_process(%s %s)\n", AF_STR(af), name);
pii = phyint_inst_lookup(af, name);
if (pii == NULL) {
pii = phyint_inst_init_from_k(af, name);
} else {
err = phyint_inst_update_from_k(pii);
switch (err) {
case PI_IOCTL_ERROR:
pii->pii_in_use = 1;
break;
case PI_GROUP_CHANGED:
case PI_IFINDEX_CHANGED:
pii_other = phyint_inst_other(pii);
if (pii_other != NULL)
phyint_inst_delete(pii_other);
phyint_inst_delete(pii);
pii = phyint_inst_init_from_k(af, name);
break;
case PI_DELETED:
pii->pii_in_use = 0;
break;
case PI_OK:
pii->pii_in_use = 1;
break;
default:
logerr("pii_process: Unknown status %d\n", err);
break;
}
}
*pii_p = pii;
if (pii != NULL)
return (pii->pii_in_use ? _B_TRUE : _B_FALSE);
else
return (_B_FALSE);
}
static void
initifs()
{
int i, nlifr;
int af;
char *cp;
char *buf;
int sockfd;
uint64_t flags;
struct lifnum lifn;
struct lifconf lifc;
struct lifreq lifreq;
struct lifreq *lifr;
struct logint *li;
struct phyint_instance *pii;
struct phyint_instance *next_pii;
struct phyint_group *pg, *next_pg;
char pi_name[LIFNAMSIZ + 1];
if (debug & D_PHYINT)
logdebug("initifs: Scanning interfaces\n");
last_initifs_time = getcurrenttime();
addrlist_free(&localaddrs);
for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
pii->pii_in_use = 0;
for (li = pii->pii_logint; li != NULL; li = li->li_next) {
li->li_in_use = 0;
if (pii->pii_probe_logint == li)
li->li_dupaddr = 0;
}
}
for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
pg->pg_in_use = _B_FALSE;
addrlist_free(&pg->pg_addrs);
}
lifn.lifn_family = AF_UNSPEC;
lifn.lifn_flags = LIFC_ALLZONES | LIFC_UNDER_IPMP;
again:
if (ioctl(ifsock_v4, SIOCGLIFNUM, (char *)&lifn) < 0) {
logperror("initifs: ioctl (get interface count)");
return;
}
lifn.lifn_count += 4;
if ((buf = calloc(lifn.lifn_count, sizeof (struct lifreq))) == NULL) {
logperror("initifs: calloc");
return;
}
lifc.lifc_family = AF_UNSPEC;
lifc.lifc_flags = LIFC_ALLZONES | LIFC_UNDER_IPMP;
lifc.lifc_len = lifn.lifn_count * sizeof (struct lifreq);
lifc.lifc_buf = buf;
if (ioctl(ifsock_v4, SIOCGLIFCONF, (char *)&lifc) < 0) {
logperror("initifs: ioctl (get interface configuration)");
free(buf);
return;
}
nlifr = lifc.lifc_len / sizeof (struct lifreq);
if (nlifr >= lifn.lifn_count) {
free(buf);
goto again;
}
for (lifr = lifc.lifc_req, i = 0; i < nlifr; i++, lifr++) {
af = lifr->lifr_addr.ss_family;
sockfd = (af == AF_INET) ? ifsock_v4 : ifsock_v6;
(void) strlcpy(lifreq.lifr_name, lifr->lifr_name, LIFNAMSIZ);
if (ioctl(sockfd, SIOCGLIFFLAGS, &lifreq) == -1) {
if (errno != ENXIO)
logperror("initifs: ioctl (SIOCGLIFFLAGS)");
continue;
}
flags = lifreq.lifr_flags;
if (flags & IFF_UP) {
(void) addrlist_add(&localaddrs, lifr->lifr_name, flags,
&lifr->lifr_addr);
}
if (flags & IFF_IPMP) {
if (ioctl(sockfd, SIOCGLIFGROUPNAME, &lifreq) == -1) {
if (errno != ENXIO)
logperror("initifs: ioctl "
"(SIOCGLIFGROUPNAME)");
continue;
}
pg = phyint_group_lookup(lifreq.lifr_groupname);
if (pg == NULL) {
pg = phyint_group_create(lifreq.lifr_groupname);
if (pg == NULL) {
logerr("initifs: cannot create group "
"%s\n", lifreq.lifr_groupname);
continue;
}
phyint_group_insert(pg);
}
pg->pg_in_use = _B_TRUE;
if (!addrlist_add(&pg->pg_addrs, lifr->lifr_name, flags,
&lifr->lifr_addr)) {
logerr("initifs: insufficient memory to track "
"data address information for %s\n",
lifr->lifr_name);
}
continue;
}
(void) strlcpy(pi_name, lifr->lifr_name, sizeof (pi_name));
if ((cp = strchr(pi_name, IF_SEPARATOR)) != NULL)
*cp = '\0';
if (pii_process(af, pi_name, &pii)) {
logint_init_from_k(pii, lifr->lifr_name);
check_addr_unique(pii, &lifr->lifr_addr);
}
}
free(buf);
for (pii = phyint_instances; pii != NULL; pii = next_pii) {
next_pii = pii->pii_next;
check_if_removed(pii);
}
for (pg = phyint_groups; pg != NULL; pg = next_pg) {
next_pg = pg->pg_next;
if (!pg->pg_in_use) {
phyint_group_delete(pg);
continue;
}
phyint_group_refresh_state(pg);
}
select_test_ifs();
process_link_state_changes();
}
static void
check_addr_unique(struct phyint_instance *ourpii, struct sockaddr_storage *ss)
{
struct phyint *pi;
struct phyint_group *pg;
struct in6_addr addr;
struct phyint_instance *pii;
struct sockaddr_in *sin;
if (ss->ss_family == AF_INET) {
sin = (struct sockaddr_in *)ss;
IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &addr);
} else {
assert(ss->ss_family == AF_INET6);
addr = ((struct sockaddr_in6 *)ss)->sin6_addr;
}
pg = ourpii->pii_phyint->pi_group;
if (pg == phyint_anongroup)
return;
for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
pii = PHYINT_INSTANCE(pi, ss->ss_family);
if (pii == NULL || pii == ourpii ||
pii->pii_probe_logint == NULL)
continue;
if (IN6_ARE_ADDR_EQUAL(&addr, &pii->pii_probe_logint->li_addr))
pii->pii_probe_logint->li_dupaddr = 1;
}
}
void
stop_probing(struct phyint *pi)
{
struct phyint_instance *pii;
pii = pi->pi_v4;
if (pii != NULL) {
if (pii->pii_probe_sock != -1)
close_probe_socket(pii, _B_TRUE);
pii->pii_probe_logint = NULL;
}
pii = pi->pi_v6;
if (pii != NULL) {
if (pii->pii_probe_sock != -1)
close_probe_socket(pii, _B_TRUE);
pii->pii_probe_logint = NULL;
}
phyint_chstate(pi, PI_OFFLINE);
}
enum { BAD_TESTFLAGS, OK_TESTFLAGS, BEST_TESTFLAGS };
static int
rate_testflags(uint64_t flags)
{
if ((flags & (IFF_NOFAILOVER | IFF_UP)) != (IFF_NOFAILOVER | IFF_UP))
return (BAD_TESTFLAGS);
if ((flags & (IFF_NOXMIT | IFF_NOLOCAL)) != 0)
return (BAD_TESTFLAGS);
if ((flags & (IFF_IPV6 | IFF_DEPRECATED)) == IFF_DEPRECATED)
return (BEST_TESTFLAGS);
if ((flags & (IFF_IPV6 | IFF_DEPRECATED)) == IFF_IPV6)
return (BEST_TESTFLAGS);
return (OK_TESTFLAGS);
}
static void
select_test_ifs(void)
{
struct phyint *pi;
struct phyint_instance *pii;
struct phyint_instance *next_pii;
struct logint *li;
struct logint *probe_logint;
boolean_t target_scan_reqd = _B_FALSE;
int rating;
if (debug & D_PHYINT)
logdebug("select_test_ifs\n");
for (pii = phyint_instances; pii != NULL; pii = next_pii) {
next_pii = pii->pii_next;
probe_logint = NULL;
if (pii->pii_phyint->pi_flags & IFF_OFFLINE) {
if (pii->pii_phyint->pi_state != PI_OFFLINE) {
logerr("shouldn't be probing offline"
" interface %s (state is: %u)."
" Stopping probes.\n",
pii->pii_phyint->pi_name,
pii->pii_phyint->pi_state);
stop_probing(pii->pii_phyint);
}
continue;
} else {
if (pii->pii_phyint->pi_state == PI_OFFLINE)
continue;
}
li = pii->pii_probe_logint;
if (li != NULL) {
if (rate_testflags(li->li_flags) == BEST_TESTFLAGS)
continue;
}
for (li = pii->pii_logint; li != NULL; li = li->li_next) {
if (pii->pii_af == AF_INET &&
IN6_IS_ADDR_V4MAPPED_ANY(&li->li_addr))
continue;
if (pii->pii_af == AF_INET6 &&
!IN6_IS_ADDR_LINKLOCAL(&li->li_addr))
continue;
rating = rate_testflags(li->li_flags);
if (rating == BAD_TESTFLAGS)
continue;
probe_logint = li;
if (rating == BEST_TESTFLAGS)
break;
}
if (pii->pii_probe_logint != NULL &&
pii->pii_probe_logint != probe_logint) {
if (pii->pii_probe_sock != -1)
close_probe_socket(pii, _B_TRUE);
pii->pii_probe_logint = NULL;
}
if (probe_logint == NULL) {
int pr_ndx;
pr_ndx = PROBE_INDEX_PREV(pii->pii_probe_next);
if (pii->pii_probes[pr_ndx].pr_status != PR_UNUSED) {
clear_pii_probe_stats(pii);
reset_crtt_all(pii->pii_phyint);
}
continue;
} else if (probe_logint == pii->pii_probe_logint) {
continue;
}
pii->pii_probe_logint = probe_logint;
if (!phyint_inst_sockinit(pii)) {
if (debug & D_PHYINT) {
logdebug("select_test_ifs: "
"phyint_sockinit failed\n");
}
phyint_inst_delete(pii);
continue;
}
if (PROBE_CAPABLE(pii)) {
if (pii->pii_phyint->pi_state == PI_NOTARGETS)
phyint_chstate(pii->pii_phyint, PI_RUNNING);
} else if (!PROBE_CAPABLE(phyint_inst_other(pii))) {
if (pii->pii_phyint->pi_state == PI_RUNNING)
phyint_chstate(pii->pii_phyint, PI_NOTARGETS);
}
if (pii->pii_targets == NULL)
target_scan_reqd = _B_TRUE;
if (!pii->pii_basetime_inited && PROBE_ENABLED(pii)) {
start_timer(pii);
pii->pii_basetime_inited = 1;
}
}
for (pi = phyints; pi != NULL; pi = pi->pi_next) {
if ((!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) &&
(pi->pi_state == PI_FAILED ||
pi->pi_state == PI_NOTARGETS)) {
phyint_check_for_repair(pi);
}
}
check_testconfig();
if (target_scan_reqd) {
init_router_targets();
init_host_targets();
}
}
static void
check_testconfig(void)
{
struct phyint *pi;
struct logint *li;
char abuf[INET6_ADDRSTRLEN];
int pri;
for (pi = phyints; pi != NULL; pi = pi->pi_next) {
if (pi->pi_flags & IFF_OFFLINE)
continue;
if (PROBE_ENABLED(pi->pi_v4) || PROBE_ENABLED(pi->pi_v6)) {
if (pi->pi_taddrmsg_printed ||
pi->pi_duptaddrmsg_printed) {
if (pi->pi_duptaddrmsg_printed)
pri = LOG_ERR;
else
pri = LOG_INFO;
logmsg(pri, "Test address now configured on "
"interface %s; enabling probe-based "
"failure detection on it\n", pi->pi_name);
pi->pi_taddrmsg_printed = 0;
pi->pi_duptaddrmsg_printed = 0;
}
continue;
}
li = NULL;
if (pi->pi_v4 != NULL && pi->pi_v4->pii_probe_logint != NULL &&
pi->pi_v4->pii_probe_logint->li_dupaddr)
li = pi->pi_v4->pii_probe_logint;
if (pi->pi_v6 != NULL && pi->pi_v6->pii_probe_logint != NULL &&
pi->pi_v6->pii_probe_logint->li_dupaddr)
li = pi->pi_v6->pii_probe_logint;
if (li != NULL && li->li_dupaddr) {
if (pi->pi_duptaddrmsg_printed)
continue;
logerr("Test address %s is not unique in group; "
"disabling probe-based failure detection on %s\n",
pr_addr(li->li_phyint_inst->pii_af,
li->li_addr, abuf, sizeof (abuf)), pi->pi_name);
pi->pi_duptaddrmsg_printed = 1;
continue;
}
if (getcurrentsec() < pi->pi_taddrthresh)
continue;
if (!pi->pi_taddrmsg_printed) {
logtrace("No test address configured on interface %s; "
"disabling probe-based failure detection on it\n",
pi->pi_name);
pi->pi_taddrmsg_printed = 1;
}
}
}
static void
check_config(void)
{
struct phyint_group *pg;
struct phyint *pi;
boolean_t v4_in_group;
boolean_t v6_in_group;
for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
if (pg == phyint_anongroup)
continue;
v4_in_group = _B_FALSE;
v6_in_group = _B_FALSE;
for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
if (pi->pi_v4 != NULL)
v4_in_group = _B_TRUE;
if (pi->pi_v6 != NULL)
v6_in_group = _B_TRUE;
}
for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
if (pi->pi_flags & IFF_OFFLINE)
continue;
if (v4_in_group == _B_TRUE && pi->pi_v4 == NULL) {
if (!pi->pi_cfgmsg_printed) {
logerr("IP interface %s in group %s is"
" not plumbed for IPv4, affecting"
" IPv4 connectivity\n",
pi->pi_name,
pi->pi_group->pg_name);
pi->pi_cfgmsg_printed = 1;
}
} else if (v6_in_group == _B_TRUE &&
pi->pi_v6 == NULL) {
if (!pi->pi_cfgmsg_printed) {
logerr("IP interface %s in group %s is"
" not plumbed for IPv6, affecting"
" IPv6 connectivity\n",
pi->pi_name,
pi->pi_group->pg_name);
pi->pi_cfgmsg_printed = 1;
}
} else {
if (pi->pi_cfgmsg_printed) {
logerr("IP interface %s is now"
" consistent with group %s "
" and connectivity is restored\n",
pi->pi_name, pi->pi_group->pg_name);
pi->pi_cfgmsg_printed = 0;
}
}
}
}
}
static uint_t timer_next;
static boolean_t timer_active = _B_FALSE;
static void
timer_init(void)
{
timer_next = getcurrenttime() + TIMER_INFINITY;
run_timeouts();
}
void
timer_schedule(uint_t delay)
{
uint_t now;
struct itimerval itimerval;
if (debug & D_TIMER)
logdebug("timer_schedule(%u)\n", delay);
assert(delay <= TIMER_INFINITY);
now = getcurrenttime();
if (delay == 0) {
delay = 1;
}
if (timer_active && TIME_GE(now + delay, timer_next)) {
if (debug & D_TIMER) {
logdebug("timer_schedule(%u) - no action: "
"now %u next %u\n", delay, now, timer_next);
}
return;
}
timer_next = now + delay;
itimerval.it_value.tv_sec = delay / 1000;
itimerval.it_value.tv_usec = (delay % 1000) * 1000;
itimerval.it_interval.tv_sec = 0;
itimerval.it_interval.tv_usec = 0;
if (debug & D_TIMER) {
logdebug("timer_schedule(%u): sec %ld usec %ld\n",
delay, itimerval.it_value.tv_sec,
itimerval.it_value.tv_usec);
}
timer_active = _B_TRUE;
if (setitimer(ITIMER_REAL, &itimerval, NULL) < 0) {
logperror("timer_schedule: setitimer");
exit(2);
}
}
static void
timer_cancel(void)
{
struct itimerval itimerval;
if (debug & D_TIMER)
logdebug("timer_cancel()\n");
bzero(&itimerval, sizeof (itimerval));
if (setitimer(ITIMER_REAL, &itimerval, NULL) < 0)
logperror("timer_cancel: setitimer");
}
static void
run_timeouts(void)
{
uint_t next;
uint_t next_event_time;
struct phyint_instance *pii;
struct phyint_instance *next_pii;
static boolean_t timeout_running;
assert(!timeout_running);
timeout_running = _B_TRUE;
if (debug & D_TIMER)
logdebug("run_timeouts()\n");
if ((getcurrenttime() - last_initifs_time) > IF_SCAN_INTERVAL) {
initifs();
check_config();
}
next = TIMER_INFINITY;
for (pii = phyint_instances; pii != NULL; pii = next_pii) {
next_pii = pii->pii_next;
next_event_time = phyint_inst_timer(pii);
if (next_event_time != TIMER_INFINITY && next_event_time < next)
next = next_event_time;
if (debug & D_TIMER) {
logdebug("run_timeouts(%s %s): next scheduled for"
" this phyint inst %u, next scheduled global"
" %u ms\n",
AF_STR(pii->pii_af), pii->pii_phyint->pi_name,
next_event_time, next);
}
}
if (next > IF_SCAN_INTERVAL)
next = IF_SCAN_INTERVAL;
if (debug & D_TIMER)
logdebug("run_timeouts: %u ms\n", next);
timer_schedule(next);
timeout_running = _B_FALSE;
}
static int eventpipe_read = -1;
static int eventpipe_write = -1;
boolean_t cleanup_started = _B_FALSE;
static void
sig_handler(int signo)
{
uchar_t buf = (uchar_t)signo;
if (cleanup_started)
return;
if (eventpipe_write == -1) {
logerr("sig_handler: no pipe found\n");
return;
}
if (write(eventpipe_write, &buf, sizeof (buf)) < 0)
logperror("sig_handler: write");
}
extern struct probes_missed probes_missed;
static void
in_signal(int fd)
{
uchar_t buf;
uint64_t sent, acked, lost, unacked, unknown;
struct phyint_instance *pii;
int pr_ndx;
switch (read(fd, &buf, sizeof (buf))) {
case -1:
logperror("in_signal: read");
exit(1);
case 1:
break;
case 0:
logerr("in_signal: read end of file\n");
exit(1);
default:
logerr("in_signal: read > 1\n");
exit(1);
}
if (debug & D_TIMER)
logdebug("in_signal() got %d\n", buf);
switch (buf) {
case SIGALRM:
if (debug & D_TIMER) {
uint_t now = getcurrenttime();
logdebug("in_signal(SIGALRM) delta %u\n",
now - timer_next);
}
timer_active = _B_FALSE;
run_timeouts();
break;
case SIGUSR1:
logdebug("Printing configuration:\n");
phyint_inst_print_all();
logerr("Missed sending total of %d probes spread over"
" %d occurrences\n", probes_missed.pm_nprobes,
probes_missed.pm_ntimes);
for (pii = phyint_instances; pii != NULL;
pii = pii->pii_next) {
unacked = 0;
acked = pii->pii_cum_stats.acked;
lost = pii->pii_cum_stats.lost;
sent = pii->pii_cum_stats.sent;
unknown = pii->pii_cum_stats.unknown;
for (pr_ndx = 0; pr_ndx < PROBE_STATS_COUNT; pr_ndx++) {
switch (pii->pii_probes[pr_ndx].pr_status) {
case PR_ACKED:
acked++;
break;
case PR_LOST:
lost++;
break;
case PR_UNACKED:
unacked++;
break;
}
}
logerr("\nProbe stats on (%s %s)\n"
"Number of probes sent %lld\n"
"Number of probe acks received %lld\n"
"Number of probes/acks lost %lld\n"
"Number of valid unacknowledged probes %lld\n"
"Number of ambiguous probe acks received %lld\n",
AF_STR(pii->pii_af), pii->pii_name,
sent, acked, lost, unacked, unknown);
}
break;
case SIGHUP:
logerr("SIGHUP: restart and reread config file\n");
timer_cancel();
cleanup();
(void) execv(argv0[0], argv0);
_exit(0177);
case SIGINT:
case SIGTERM:
case SIGQUIT:
cleanup();
exit(0);
default:
logerr("in_signal: unknown signal: %d\n", buf);
}
}
static void
cleanup(void)
{
struct phyint_instance *pii;
struct phyint_instance *next_pii;
cleanup_started = _B_TRUE;
for (pii = phyint_instances; pii != NULL; pii = next_pii) {
next_pii = pii->pii_next;
phyint_inst_delete(pii);
}
(void) close(ifsock_v4);
(void) close(ifsock_v6);
(void) close(rtsock_v4);
(void) close(rtsock_v6);
(void) close(lsock_v4);
(void) close(lsock_v6);
(void) close(0);
(void) close(1);
(void) close(2);
(void) close(mibfd);
(void) close(eventpipe_read);
(void) close(eventpipe_write);
}
static void
setup_eventpipe(void)
{
int fds[2];
struct sigaction act;
if ((pipe(fds)) < 0) {
logperror("setup_eventpipe: pipe");
exit(1);
}
eventpipe_read = fds[0];
eventpipe_write = fds[1];
if (poll_add(eventpipe_read) == -1) {
exit(1);
}
act.sa_handler = sig_handler;
act.sa_flags = SA_RESTART;
(void) sigaction(SIGALRM, &act, NULL);
(void) sigset(SIGHUP, sig_handler);
(void) sigset(SIGUSR1, sig_handler);
(void) sigset(SIGTERM, sig_handler);
(void) sigset(SIGINT, sig_handler);
(void) sigset(SIGQUIT, sig_handler);
}
static int
setup_rtsock(int af)
{
int s;
int flags;
int aware = RTAW_UNDER_IPMP;
s = socket(PF_ROUTE, SOCK_RAW, af);
if (s == -1) {
logperror("setup_rtsock: socket PF_ROUTE");
exit(1);
}
if (setsockopt(s, SOL_ROUTE, RT_AWARE, &aware, sizeof (aware)) == -1) {
logperror("setup_rtsock: setsockopt RT_AWARE");
(void) close(s);
exit(1);
}
if ((flags = fcntl(s, F_GETFL, 0)) < 0) {
logperror("setup_rtsock: fcntl F_GETFL");
(void) close(s);
exit(1);
}
if ((fcntl(s, F_SETFL, flags | O_NONBLOCK)) < 0) {
logperror("setup_rtsock: fcntl F_SETFL");
(void) close(s);
exit(1);
}
if (poll_add(s) == -1) {
(void) close(s);
exit(1);
}
return (s);
}
static boolean_t
process_rtm_ifinfo(if_msghdr_t *ifm, int type)
{
struct sockaddr_dl *sdl;
struct phyint *pi;
uint64_t old_flags;
struct phyint_instance *pii;
assert(ifm->ifm_type == RTM_IFINFO && ifm->ifm_addrs == RTA_IFP);
sdl = (struct sockaddr_dl *)((char *)ifm + ifm->ifm_msglen -
sizeof (struct sockaddr_dl));
assert(sdl->sdl_family == AF_LINK);
if (sdl->sdl_nlen >= sizeof (sdl->sdl_data)) {
if (debug & D_LINKNOTE)
logdebug("process_rtm_ifinfo: phyint name too long\n");
return (_B_TRUE);
}
sdl->sdl_data[sdl->sdl_nlen] = 0;
pi = phyint_lookup(sdl->sdl_data);
if (pi == NULL) {
if (debug & D_LINKNOTE)
logdebug("process_rtm_ifinfo: phyint lookup failed"
" for %s\n", sdl->sdl_data);
return (_B_TRUE);
}
pii = (type == AF_INET) ? pi->pi_v4 : pi->pi_v6;
if (pii == NULL) {
if (debug & D_LINKNOTE)
logdebug("process_rtm_ifinfo: no instance of address "
"family %s for %s\n", AF_STR(type), pi->pi_name);
return (_B_TRUE);
}
old_flags = pii->pii_flags;
pii->pii_flags = PHYINT_FLAGS(ifm->ifm_flags);
pi->pi_flags = pii->pii_flags;
if (debug & D_LINKNOTE) {
logdebug("process_rtm_ifinfo: %s address family: %s, "
"old flags: %llx, new flags: %llx\n", pi->pi_name,
AF_STR(type), old_flags, pi->pi_flags);
}
if ((old_flags ^ pii->pii_flags) & IFF_STANDBY) {
phyint_changed(pi);
if (pii->pii_flags & IFF_STANDBY)
phyint_standby_refresh_inactive(pi);
}
if ((old_flags ^ pii->pii_flags) != IFF_RUNNING) {
struct phyint_instance *pii_other;
pii_other = phyint_inst_other(pii);
if (pii_other != NULL)
pii_other->pii_flags = pii->pii_flags;
return (_B_TRUE);
}
return (_B_FALSE);
}
static void
process_rtsock(int rtsock_v4, int rtsock_v6)
{
int nbytes;
int64_t msg[2048 / 8];
struct rt_msghdr *rtm;
boolean_t need_if_scan = _B_FALSE;
boolean_t need_rt_scan = _B_FALSE;
boolean_t rtm_ifinfo_seen = _B_FALSE;
int type;
for (type = AF_INET; ; type = AF_INET6) {
for (;;) {
nbytes = read((type == AF_INET) ? rtsock_v4 :
rtsock_v6, msg, sizeof (msg));
if (nbytes <= 0) {
break;
}
rtm = (struct rt_msghdr *)msg;
if (rtm->rtm_version != RTM_VERSION) {
logerr("process_rtsock: version %d "
"not understood\n", rtm->rtm_version);
break;
}
if (debug & D_PHYINT) {
logdebug("process_rtsock: message %d\n",
rtm->rtm_type);
}
switch (rtm->rtm_type) {
case RTM_NEWADDR:
case RTM_DELADDR:
need_if_scan = _B_TRUE;
break;
case RTM_IFINFO:
rtm_ifinfo_seen = _B_TRUE;
need_if_scan |= process_rtm_ifinfo(
(if_msghdr_t *)rtm, type);
break;
case RTM_ADD:
case RTM_DELETE:
case RTM_CHANGE:
case RTM_OLDADD:
case RTM_OLDDEL:
need_rt_scan = _B_TRUE;
break;
default:
break;
}
}
if (type == AF_INET6)
break;
}
if (need_if_scan) {
if (debug & D_LINKNOTE && rtm_ifinfo_seen)
logdebug("process_rtsock: synchronizing with kernel\n");
initifs();
} else if (rtm_ifinfo_seen) {
if (debug & D_LINKNOTE)
logdebug("process_rtsock: "
"link up/down notification(s) seen\n");
process_link_state_changes();
}
if (need_rt_scan)
init_router_targets();
}
static void
check_if_removed(struct phyint_instance *pii)
{
struct logint *li;
struct logint *next_li;
if (!pii->pii_in_use) {
logtrace("%s %s has been removed from kernel\n",
AF_STR(pii->pii_af), pii->pii_phyint->pi_name);
phyint_inst_delete(pii);
} else {
for (li = pii->pii_logint; li != NULL; li = next_li) {
next_li = li->li_next;
if (!li->li_in_use) {
logint_delete(li);
}
}
}
}
static void
update_router_list(mib_item_t *item)
{
for (; item != NULL; item = item->mi_next) {
if (item->mi_opthdr.name == 0)
continue;
if (item->mi_opthdr.level == MIB2_IP &&
item->mi_opthdr.name == MIB2_IP_ROUTE) {
ire_process_v4((mib2_ipRouteEntry_t *)item->mi_valp,
item->mi_opthdr.len);
} else if (item->mi_opthdr.level == MIB2_IP6 &&
item->mi_opthdr.name == MIB2_IP6_ROUTE) {
ire_process_v6((mib2_ipv6RouteEntry_t *)item->mi_valp,
item->mi_opthdr.len);
}
}
}
static void
oct2ifname(const Octet_t *octp, char *ifname, size_t ifsize)
{
char *cp;
size_t len = MIN(octp->o_length, ifsize - 1);
(void) strncpy(ifname, octp->o_bytes, len);
ifname[len] = '\0';
if ((cp = strchr(ifname, IF_SEPARATOR)) != NULL)
*cp = '\0';
}
static void
ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len)
{
char ifname[LIFNAMSIZ];
mib2_ipRouteEntry_t *rp, *rp1, *endp;
struct in_addr nexthop_v4;
struct in6_addr nexthop;
if (debug & D_TARGET)
logdebug("ire_process_v4(len %d)\n", len);
if (len == 0)
return;
assert((len % ipRouteEntrySize) == 0);
endp = buf + (len / ipRouteEntrySize);
for (rp = buf; rp < endp; rp++) {
if (!(rp->ipRouteInfo.re_ire_type & IRE_OFFSUBNET))
continue;
nexthop_v4.s_addr = rp->ipRouteNextHop;
for (rp1 = buf; rp1 < endp; rp1++) {
if (!(rp1->ipRouteInfo.re_ire_type & IRE_INTERFACE) ||
rp1->ipRouteIfIndex.o_length == 0)
continue;
if ((rp1->ipRouteDest & rp1->ipRouteMask) !=
(nexthop_v4.s_addr & rp1->ipRouteMask))
continue;
oct2ifname(&rp1->ipRouteIfIndex, ifname, LIFNAMSIZ);
IN6_INADDR_TO_V4MAPPED(&nexthop_v4, &nexthop);
router_add_common(AF_INET, ifname, nexthop);
}
}
}
void
router_add_common(int af, char *ifname, struct in6_addr nexthop)
{
struct phyint_instance *pii;
struct phyint *pi;
if (debug & D_TARGET)
logdebug("router_add_common(%s %s)\n", AF_STR(af), ifname);
pii = phyint_inst_lookup(af, ifname);
if (pii == NULL)
return;
if (own_address(nexthop))
return;
pi = pii->pii_phyint;
if (pi->pi_group == phyint_anongroup) {
target_add(pii, nexthop, _B_TRUE);
} else {
pi = pi->pi_group->pg_phyint;
for (; pi != NULL; pi = pi->pi_pgnext)
target_add(PHYINT_INSTANCE(pi, af), nexthop, _B_TRUE);
}
}
static void
ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len)
{
struct lifreq lifr;
char ifname[LIFNAMSIZ];
char grname[LIFGRNAMSIZ];
mib2_ipv6RouteEntry_t *rp, *rp1, *endp;
struct in6_addr nexthop_v6;
if (debug & D_TARGET)
logdebug("ire_process_v6(len %d)\n", len);
if (len == 0)
return;
assert((len % ipv6RouteEntrySize) == 0);
endp = buf + (len / ipv6RouteEntrySize);
for (rp = buf; rp < endp; rp++) {
if (!(rp->ipv6RouteInfo.re_ire_type & IRE_OFFSUBNET) ||
!IN6_IS_ADDR_LINKLOCAL(&rp->ipv6RouteNextHop))
continue;
nexthop_v6 = rp->ipv6RouteNextHop;
if (rp->ipv6RouteIfIndex.o_length == 0)
continue;
oct2ifname(&rp->ipv6RouteIfIndex, lifr.lifr_name, LIFNAMSIZ);
if (ioctl(ifsock_v6, SIOCGLIFGROUPNAME, &lifr) == -1 ||
strlcpy(grname, lifr.lifr_groupname, LIFGRNAMSIZ) == 0) {
continue;
}
for (rp1 = buf; rp1 < endp; rp1++) {
if (!(rp1->ipv6RouteInfo.re_ire_type & IRE_INTERFACE) ||
rp1->ipv6RouteIfIndex.o_length == 0) {
continue;
}
oct2ifname(&rp1->ipv6RouteIfIndex, ifname, LIFNAMSIZ);
(void) strlcpy(lifr.lifr_name, ifname, LIFNAMSIZ);
if (ioctl(ifsock_v6, SIOCGLIFGROUPNAME, &lifr) != -1 &&
strcmp(lifr.lifr_groupname, grname) == 0) {
router_add_common(AF_INET6, ifname, nexthop_v6);
}
}
}
}
static void
init_router_targets(void)
{
struct target *tg;
struct target *next_tg;
struct phyint_instance *pii;
struct phyint *pi;
if (force_mcast)
return;
for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
pi = pii->pii_phyint;
if (!pii->pii_targets_are_routers)
continue;
for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next)
tg->tg_in_use = 0;
}
if (mibwalk(update_router_list) == -1)
exit(1);
for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
pi = pii->pii_phyint;
if (!pii->pii_targets_are_routers)
continue;
for (tg = pii->pii_targets; tg != NULL; tg = next_tg) {
next_tg = tg->tg_next;
if (!tg->tg_in_use && !GROUP_FAILED(pi->pi_group))
target_delete(tg);
}
}
}
static void
init_host_targets(void)
{
struct phyint_instance *pii;
struct phyint_group *pg;
for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
pg = pii->pii_phyint->pi_group;
if (pg != phyint_anongroup && pii->pii_targets == NULL)
dup_host_targets(pii);
}
}
static void
dup_host_targets(struct phyint_instance *desired_pii)
{
int af;
struct phyint *pi;
struct phyint_instance *pii;
struct target *tg;
assert(desired_pii->pii_phyint->pi_group != phyint_anongroup);
af = desired_pii->pii_af;
for (pi = desired_pii->pii_phyint; pi != NULL; pi = pi->pi_pgnext) {
pii = PHYINT_INSTANCE(pi, af);
if ((pii == NULL) || (pii == desired_pii) ||
pii->pii_targets_are_routers)
continue;
for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
target_create(desired_pii, tg->tg_address, _B_FALSE);
}
}
}
static void
usage(char *cmd)
{
(void) fprintf(stderr, "usage: %s\n", cmd);
}
#define MPATHD_DEFAULT_FILE "/etc/default/mpathd"
static char *
getdefault(char *name)
{
char namebuf[BUFSIZ];
char *value = NULL;
if (defopen(MPATHD_DEFAULT_FILE) == 0) {
char *cp;
int flags;
flags = defcntl(DC_GETFLAGS, 0);
TURNOFF(flags, DC_CASE);
(void) defcntl(DC_SETFLAGS, flags);
(void) strncpy(namebuf, name, sizeof (namebuf) - 2);
(void) strncat(namebuf, "=", 2);
if ((cp = defread(namebuf)) != NULL)
value = strdup(cp);
(void) defopen((char *)NULL);
}
return (value);
}
boolean_t failback_enabled = _B_TRUE;
boolean_t track_all_phyints = _B_FALSE;
static boolean_t adopt = _B_FALSE;
static boolean_t foreground = _B_FALSE;
int
main(int argc, char *argv[])
{
int i;
int c;
struct phyint *pi;
struct phyint_instance *pii;
char *value;
argv0 = argv;
srandom(gethostid());
(void) setlocale(LC_ALL, "");
value = getdefault("FAILURE_DETECTION_TIME");
if (value != NULL) {
user_failure_detection_time =
(int)strtol((char *)value, NULL, 0);
if (user_failure_detection_time <= 0) {
user_failure_detection_time = FAILURE_DETECTION_TIME;
logerr("Invalid failure detection time %s, assuming "
"default of %d ms\n", value,
user_failure_detection_time);
} else if (user_failure_detection_time <
MIN_FAILURE_DETECTION_TIME) {
user_failure_detection_time =
MIN_FAILURE_DETECTION_TIME;
logerr("Too small failure detection time of %s, "
"assuming minimum of %d ms\n", value,
user_failure_detection_time);
}
free(value);
} else {
user_failure_detection_time = FAILURE_DETECTION_TIME;
}
user_probe_interval = user_failure_detection_time /
(NUM_PROBE_FAILS + 2);
value = getdefault("FAILBACK");
if (value != NULL) {
if (strcasecmp(value, "yes") == 0)
failback_enabled = _B_TRUE;
else if (strcasecmp(value, "no") == 0)
failback_enabled = _B_FALSE;
else
logerr("Invalid value for FAILBACK %s\n", value);
free(value);
} else {
failback_enabled = _B_TRUE;
}
value = getdefault("TRACK_INTERFACES_ONLY_WITH_GROUPS");
if (value != NULL) {
if (strcasecmp(value, "yes") == 0)
track_all_phyints = _B_FALSE;
else if (strcasecmp(value, "no") == 0)
track_all_phyints = _B_TRUE;
else
logerr("Invalid value for "
"TRACK_INTERFACES_ONLY_WITH_GROUPS %s\n", value);
free(value);
} else {
track_all_phyints = _B_FALSE;
}
while ((c = getopt(argc, argv, "adD:ml")) != EOF) {
switch (c) {
case 'a':
adopt = _B_TRUE;
break;
case 'm':
force_mcast = _B_TRUE;
break;
case 'd':
debug = D_ALL;
foreground = _B_TRUE;
break;
case 'D':
i = (int)strtol(optarg, NULL, 0);
if (i == 0) {
(void) fprintf(stderr, "Bad debug flags: %s\n",
optarg);
exit(1);
}
debug |= i;
foreground = _B_TRUE;
break;
case 'l':
handle_link_notifications = _B_FALSE;
break;
default:
usage(argv[0]);
exit(1);
}
}
lsock_v4 = setup_listener(AF_INET);
lsock_v6 = setup_listener(AF_INET6);
if (lsock_v4 < 0 && lsock_v6 < 0) {
logerr("main: setup_listener failed for both IPv4 and IPv6\n");
exit(1);
}
if (!foreground) {
if (!daemonize()) {
logerr("cannot daemonize\n");
exit(EXIT_FAILURE);
}
initlog();
}
ifsock_v4 = socket(AF_INET, SOCK_DGRAM, 0);
if (ifsock_v4 < 0) {
logperror("main: IPv4 socket open");
exit(1);
}
ifsock_v6 = socket(AF_INET6, SOCK_DGRAM, 0);
if (ifsock_v6 < 0) {
logperror("main: IPv6 socket open");
exit(1);
}
setup_eventpipe();
rtsock_v4 = setup_rtsock(AF_INET);
rtsock_v6 = setup_rtsock(AF_INET6);
if (phyint_init() == -1) {
logerr("cannot initialize physical interface structures");
exit(1);
}
if (mibwalk(mib_get_constants) == -1)
exit(1);
timer_init();
initifs();
if (adopt && phyint_instances == NULL)
exit(0);
for (;;) {
if (poll(pollfds, pollfd_num, -1) < 0) {
if (errno == EINTR)
continue;
logperror("main: poll");
exit(1);
}
for (i = 0; i < pollfd_num; i++) {
if ((pollfds[i].fd == -1) ||
!(pollfds[i].revents & POLLIN))
continue;
if (pollfds[i].fd == eventpipe_read) {
in_signal(eventpipe_read);
break;
}
if (pollfds[i].fd == rtsock_v4 ||
pollfds[i].fd == rtsock_v6) {
process_rtsock(rtsock_v4, rtsock_v6);
break;
}
for (pii = phyint_instances; pii != NULL;
pii = pii->pii_next) {
if (pollfds[i].fd == pii->pii_probe_sock) {
if (pii->pii_af == AF_INET)
in_data(pii);
else
in6_data(pii);
break;
}
}
for (pi = phyints; pi != NULL; pi = pi->pi_next) {
if (pi->pi_notes != 0 &&
pollfds[i].fd == dlpi_fd(pi->pi_dh)) {
(void) dlpi_recv(pi->pi_dh, NULL, NULL,
NULL, NULL, 0, NULL);
break;
}
}
if (pollfds[i].fd == lsock_v4)
loopback_cmd(lsock_v4, AF_INET);
else if (pollfds[i].fd == lsock_v6)
loopback_cmd(lsock_v6, AF_INET6);
}
}
return (EXIT_SUCCESS);
}
static int
setup_listener(int af)
{
int sock;
int on;
int len;
int ret;
struct sockaddr_storage laddr;
struct sockaddr_in *sin;
struct sockaddr_in6 *sin6;
struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
assert(af == AF_INET || af == AF_INET6);
sock = socket(af, SOCK_STREAM, 0);
if (sock < 0) {
logperror("setup_listener: socket");
exit(1);
}
on = 1;
if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
sizeof (on)) < 0) {
logperror("setup_listener: setsockopt (SO_REUSEADDR)");
exit(1);
}
bzero(&laddr, sizeof (laddr));
laddr.ss_family = af;
if (af == AF_INET) {
sin = (struct sockaddr_in *)&laddr;
sin->sin_port = htons(MPATHD_PORT);
sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
len = sizeof (struct sockaddr_in);
} else {
sin6 = (struct sockaddr_in6 *)&laddr;
sin6->sin6_port = htons(MPATHD_PORT);
sin6->sin6_addr = loopback_addr;
len = sizeof (struct sockaddr_in6);
}
ret = bind(sock, (struct sockaddr *)&laddr, len);
if (ret < 0) {
if (errno == EADDRINUSE) {
logerr("main: is another instance of in.mpathd "
"already active?\n");
exit(1);
} else {
(void) close(sock);
return (-1);
}
}
if (listen(sock, 30) < 0) {
logperror("main: listen");
exit(1);
}
if (poll_add(sock) == -1) {
(void) close(sock);
exit(1);
}
return (sock);
}
static struct {
const char *name;
unsigned int size;
} commands[] = {
{ "MI_PING", sizeof (uint32_t) },
{ "MI_OFFLINE", sizeof (mi_offline_t) },
{ "MI_UNDO_OFFLINE", sizeof (mi_undo_offline_t) },
{ "MI_QUERY", sizeof (mi_query_t) }
};
static void
loopback_cmd(int sock, int family)
{
int newfd;
ssize_t len;
boolean_t is_priv = _B_FALSE;
struct sockaddr_storage peer;
struct sockaddr_in *peer_sin;
struct sockaddr_in6 *peer_sin6;
socklen_t peerlen;
union mi_commands mpi;
char abuf[INET6_ADDRSTRLEN];
uint_t cmd;
int retval;
peerlen = sizeof (peer);
newfd = accept(sock, (struct sockaddr *)&peer, &peerlen);
if (newfd < 0) {
logperror("loopback_cmd: accept");
return;
}
switch (family) {
case AF_INET:
if (peerlen != sizeof (struct sockaddr_in)) {
logerr("loopback_cmd: AF_INET peerlen %d\n", peerlen);
(void) close(newfd);
return;
}
peer_sin = (struct sockaddr_in *)&peer;
is_priv = ntohs(peer_sin->sin_port) < IPPORT_RESERVED;
(void) inet_ntop(AF_INET, &peer_sin->sin_addr.s_addr,
abuf, sizeof (abuf));
if (ntohl(peer_sin->sin_addr.s_addr) != INADDR_LOOPBACK) {
logerr("Attempt to connect from addr %s port %d\n",
abuf, ntohs(peer_sin->sin_port));
(void) close(newfd);
return;
}
break;
case AF_INET6:
if (peerlen != sizeof (struct sockaddr_in6)) {
logerr("loopback_cmd: AF_INET6 peerlen %d\n", peerlen);
(void) close(newfd);
return;
}
peer_sin6 = (struct sockaddr_in6 *)&peer;
is_priv = ntohs(peer_sin6->sin6_port) < IPPORT_RESERVED;
(void) inet_ntop(AF_INET6, &peer_sin6->sin6_addr, abuf,
sizeof (abuf));
if (!IN6_IS_ADDR_LOOPBACK(&peer_sin6->sin6_addr)) {
logerr("Attempt to connect from addr %s port %d\n",
abuf, ntohs(peer_sin6->sin6_port));
(void) close(newfd);
return;
}
break;
default:
logdebug("loopback_cmd: family %d\n", family);
(void) close(newfd);
return;
}
len = read(newfd, &mpi, sizeof (mpi));
if (len < sizeof (uint32_t)) {
logerr("loopback_cmd: bad command format or read returns "
"partial data %d\n", len);
(void) close(newfd);
return;
}
cmd = mpi.mi_command;
if (cmd >= MI_NCMD) {
logerr("loopback_cmd: unknown command id `%d'\n", cmd);
(void) close(newfd);
return;
}
if (!is_priv && (cmd != MI_QUERY && cmd != MI_PING)) {
logerr("Unprivileged request from %s for privileged "
"command %s\n", abuf, commands[cmd].name);
(void) close(newfd);
return;
}
if (len < commands[cmd].size) {
logerr("loopback_cmd: short %s command (expected %d, got %d)\n",
commands[cmd].name, commands[cmd].size, len);
(void) close(newfd);
return;
}
retval = process_cmd(newfd, &mpi);
if (retval != IPMP_SUCCESS) {
logerr("failed processing %s: %s\n", commands[cmd].name,
ipmp_errmsg(retval));
}
(void) close(newfd);
}
static unsigned int
process_cmd(int newfd, union mi_commands *mpi)
{
struct phyint *pi;
struct mi_offline *mio;
struct mi_undo_offline *miu;
unsigned int retval;
switch (mpi->mi_command) {
case MI_PING:
return (send_result(newfd, IPMP_SUCCESS, 0));
case MI_OFFLINE:
mio = &mpi->mi_ocmd;
pi = phyint_lookup(mio->mio_ifname);
if (pi == NULL)
return (send_result(newfd, IPMP_EUNKIF, 0));
retval = phyint_offline(pi, mio->mio_min_redundancy);
if (retval == IPMP_FAILURE)
return (send_result(newfd, IPMP_FAILURE, errno));
return (send_result(newfd, retval, 0));
case MI_UNDO_OFFLINE:
miu = &mpi->mi_ucmd;
pi = phyint_lookup(miu->miu_ifname);
if (pi == NULL)
return (send_result(newfd, IPMP_EUNKIF, 0));
retval = phyint_undo_offline(pi);
if (retval == IPMP_FAILURE)
return (send_result(newfd, IPMP_FAILURE, errno));
return (send_result(newfd, retval, 0));
case MI_QUERY:
return (process_query(newfd, &mpi->mi_qcmd));
default:
break;
}
return (send_result(newfd, IPMP_EPROTO, 0));
}
static unsigned int
process_query(int fd, mi_query_t *miq)
{
ipmp_addrinfo_t *adinfop;
ipmp_addrinfolist_t *adlp;
ipmp_groupinfo_t *grinfop;
ipmp_groupinfolist_t *grlp;
ipmp_grouplist_t *grlistp;
ipmp_ifinfo_t *ifinfop;
ipmp_ifinfolist_t *iflp;
ipmp_snap_t *snap;
unsigned int retval;
switch (miq->miq_inforeq) {
case IPMP_ADDRINFO:
retval = getgraddrinfo(miq->miq_grname, &miq->miq_addr,
&adinfop);
if (retval != IPMP_SUCCESS)
return (send_result(fd, retval, errno));
retval = send_result(fd, IPMP_SUCCESS, 0);
if (retval == IPMP_SUCCESS)
retval = send_addrinfo(fd, adinfop);
ipmp_freeaddrinfo(adinfop);
return (retval);
case IPMP_GROUPLIST:
retval = getgrouplist(&grlistp);
if (retval != IPMP_SUCCESS)
return (send_result(fd, retval, errno));
retval = send_result(fd, IPMP_SUCCESS, 0);
if (retval == IPMP_SUCCESS)
retval = send_grouplist(fd, grlistp);
ipmp_freegrouplist(grlistp);
return (retval);
case IPMP_GROUPINFO:
miq->miq_grname[LIFGRNAMSIZ - 1] = '\0';
retval = getgroupinfo(miq->miq_grname, &grinfop);
if (retval != IPMP_SUCCESS)
return (send_result(fd, retval, errno));
retval = send_result(fd, IPMP_SUCCESS, 0);
if (retval == IPMP_SUCCESS)
retval = send_groupinfo(fd, grinfop);
ipmp_freegroupinfo(grinfop);
return (retval);
case IPMP_IFINFO:
miq->miq_ifname[LIFNAMSIZ - 1] = '\0';
retval = getifinfo(miq->miq_ifname, &ifinfop);
if (retval != IPMP_SUCCESS)
return (send_result(fd, retval, errno));
retval = send_result(fd, IPMP_SUCCESS, 0);
if (retval == IPMP_SUCCESS)
retval = send_ifinfo(fd, ifinfop);
ipmp_freeifinfo(ifinfop);
return (retval);
case IPMP_SNAP:
initifs();
retval = getsnap(&snap);
if (retval != IPMP_SUCCESS)
return (send_result(fd, retval, errno));
retval = send_result(fd, IPMP_SUCCESS, 0);
if (retval != IPMP_SUCCESS)
goto out;
retval = send_grouplist(fd, snap->sn_grlistp);
if (retval != IPMP_SUCCESS)
goto out;
retval = ipmp_writetlv(fd, IPMP_IFCNT, sizeof (uint32_t),
&snap->sn_nif);
if (retval != IPMP_SUCCESS)
goto out;
iflp = snap->sn_ifinfolistp;
for (; iflp != NULL; iflp = iflp->ifl_next) {
retval = send_ifinfo(fd, iflp->ifl_ifinfop);
if (retval != IPMP_SUCCESS)
goto out;
}
retval = ipmp_writetlv(fd, IPMP_GROUPCNT, sizeof (uint32_t),
&snap->sn_ngroup);
if (retval != IPMP_SUCCESS)
goto out;
grlp = snap->sn_grinfolistp;
for (; grlp != NULL; grlp = grlp->grl_next) {
retval = send_groupinfo(fd, grlp->grl_grinfop);
if (retval != IPMP_SUCCESS)
goto out;
}
retval = ipmp_writetlv(fd, IPMP_ADDRCNT, sizeof (uint32_t),
&snap->sn_naddr);
if (retval != IPMP_SUCCESS)
goto out;
adlp = snap->sn_adinfolistp;
for (; adlp != NULL; adlp = adlp->adl_next) {
retval = send_addrinfo(fd, adlp->adl_adinfop);
if (retval != IPMP_SUCCESS)
goto out;
}
out:
ipmp_snap_free(snap);
return (retval);
default:
break;
}
return (send_result(fd, IPMP_EPROTO, 0));
}
static unsigned int
send_groupinfo(int fd, ipmp_groupinfo_t *grinfop)
{
ipmp_iflist_t *iflistp = grinfop->gr_iflistp;
ipmp_addrlist_t *adlistp = grinfop->gr_adlistp;
ipmp_groupinfo_xfer_t grxfer;
unsigned int retval;
memset(&grxfer, 0, sizeof (grxfer));
grxfer.grx_sig = grinfop->gr_sig;
grxfer.grx_state = grinfop->gr_state;
grxfer.grx_fdt = grinfop->gr_fdt;
memcpy(grxfer.grx_name, grinfop->gr_name, sizeof (grxfer.grx_name));
memcpy(grxfer.grx_ifname, grinfop->gr_ifname,
sizeof (grxfer.grx_ifname));
memcpy(grxfer.grx_m4ifname, grinfop->gr_m4ifname,
sizeof (grxfer.grx_m4ifname));
memcpy(grxfer.grx_m6ifname, grinfop->gr_m6ifname,
sizeof (grxfer.grx_m6ifname));
memcpy(grxfer.grx_bcifname, grinfop->gr_bcifname,
sizeof (grxfer.grx_bcifname));
retval = ipmp_writetlv(fd, IPMP_GROUPINFO, sizeof (grxfer), &grxfer);
if (retval != IPMP_SUCCESS)
return (retval);
retval = ipmp_writetlv(fd, IPMP_IFLIST,
IPMP_IFLIST_SIZE(iflistp->il_nif), iflistp);
if (retval != IPMP_SUCCESS)
return (retval);
return (ipmp_writetlv(fd, IPMP_ADDRLIST,
IPMP_ADDRLIST_SIZE(adlistp->al_naddr), adlistp));
}
static unsigned int
send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop)
{
ipmp_addrlist_t *adlist4p = ifinfop->if_targinfo4.it_targlistp;
ipmp_addrlist_t *adlist6p = ifinfop->if_targinfo6.it_targlistp;
ipmp_ifinfo_xfer_t ifxfer;
unsigned int retval;
memset(&ifxfer, 0, sizeof (ifxfer));
ifxfer.ifx_state = ifinfop->if_state;
ifxfer.ifx_type = ifinfop->if_type;
ifxfer.ifx_linkstate = ifinfop->if_linkstate;
ifxfer.ifx_probestate = ifinfop->if_probestate;
ifxfer.ifx_flags = ifinfop->if_flags;
ifxfer.ifx_targinfo4.itx_testaddr = ifinfop->if_targinfo4.it_testaddr;
ifxfer.ifx_targinfo4.itx_targmode = ifinfop->if_targinfo4.it_targmode;
ifxfer.ifx_targinfo6.itx_testaddr = ifinfop->if_targinfo6.it_testaddr;
ifxfer.ifx_targinfo6.itx_targmode = ifinfop->if_targinfo6.it_targmode;
memcpy(ifxfer.ifx_name, ifinfop->if_name, sizeof (ifxfer.ifx_name));
memcpy(ifxfer.ifx_group, ifinfop->if_group, sizeof (ifxfer.ifx_group));
memcpy(ifxfer.ifx_targinfo4.itx_name, ifinfop->if_targinfo4.it_name,
sizeof (ifxfer.ifx_targinfo4.itx_name));
memcpy(ifxfer.ifx_targinfo6.itx_name, ifinfop->if_targinfo6.it_name,
sizeof (ifxfer.ifx_targinfo6.itx_name));
retval = ipmp_writetlv(fd, IPMP_IFINFO, sizeof (ifxfer), &ifxfer);
if (retval != IPMP_SUCCESS)
return (retval);
retval = ipmp_writetlv(fd, IPMP_ADDRLIST,
IPMP_ADDRLIST_SIZE(adlist4p->al_naddr), adlist4p);
if (retval != IPMP_SUCCESS)
return (retval);
return (ipmp_writetlv(fd, IPMP_ADDRLIST,
IPMP_ADDRLIST_SIZE(adlist6p->al_naddr), adlist6p));
}
static unsigned int
send_addrinfo(int fd, ipmp_addrinfo_t *adinfop)
{
return (ipmp_writetlv(fd, IPMP_ADDRINFO, sizeof (*adinfop), adinfop));
}
static unsigned int
send_grouplist(int fd, ipmp_grouplist_t *grlistp)
{
return (ipmp_writetlv(fd, IPMP_GROUPLIST,
IPMP_GROUPLIST_SIZE(grlistp->gl_ngroup), grlistp));
}
static unsigned int
send_result(int fd, unsigned int error, int syserror)
{
mi_result_t me;
me.me_mpathd_error = error;
if (error == IPMP_FAILURE)
me.me_sys_error = syserror;
else
me.me_sys_error = 0;
return (ipmp_write(fd, &me, sizeof (me)));
}
static boolean_t
daemonize(void)
{
switch (fork()) {
case -1:
return (_B_FALSE);
case 0:
if (setsid() == -1)
return (_B_FALSE);
switch (fork()) {
case -1:
return (_B_FALSE);
case 0:
(void) chdir("/");
(void) umask(022);
(void) fdwalk(closefunc, NULL);
break;
default:
_exit(EXIT_SUCCESS);
}
break;
default:
_exit(EXIT_SUCCESS);
}
return (_B_TRUE);
}
static int
closefunc(void *not_used, int fd)
{
if (fd != lsock_v4 && fd != lsock_v6)
(void) close(fd);
return (0);
}
#include <syslog.h>
static int logging = 0;
static void
initlog(void)
{
logging++;
openlog("in.mpathd", LOG_PID, LOG_DAEMON);
}
void
logmsg(int pri, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
if (logging)
vsyslog(pri, fmt, ap);
else
(void) vfprintf(stderr, fmt, ap);
va_end(ap);
}
void
logperror(const char *str)
{
if (logging)
syslog(LOG_ERR, "%s: %m\n", str);
else
(void) fprintf(stderr, "%s: %s\n", str, strerror(errno));
}
void
logperror_pii(struct phyint_instance *pii, const char *str)
{
if (logging) {
syslog(LOG_ERR, "%s (%s %s): %m\n",
str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name);
} else {
(void) fprintf(stderr, "%s (%s %s): %s\n",
str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name,
strerror(errno));
}
}
void
logperror_li(struct logint *li, const char *str)
{
struct phyint_instance *pii = li->li_phyint_inst;
if (logging) {
syslog(LOG_ERR, "%s (%s %s): %m\n",
str, AF_STR(pii->pii_af), li->li_name);
} else {
(void) fprintf(stderr, "%s (%s %s): %s\n",
str, AF_STR(pii->pii_af), li->li_name,
strerror(errno));
}
}
void
close_probe_socket(struct phyint_instance *pii, boolean_t polled)
{
if (polled)
(void) poll_remove(pii->pii_probe_sock);
(void) close(pii->pii_probe_sock);
pii->pii_probe_sock = -1;
pii->pii_basetime_inited = 0;
}
boolean_t
addrlist_add(addrlist_t **addrsp, const char *name, uint64_t flags,
struct sockaddr_storage *ssp)
{
addrlist_t *addrp;
if ((addrp = malloc(sizeof (addrlist_t))) == NULL)
return (_B_FALSE);
(void) strlcpy(addrp->al_name, name, LIFNAMSIZ);
addrp->al_flags = flags;
addrp->al_addr = *ssp;
addrp->al_next = *addrsp;
*addrsp = addrp;
return (_B_TRUE);
}
void
addrlist_free(addrlist_t **addrsp)
{
addrlist_t *addrp, *next_addrp;
for (addrp = *addrsp; addrp != NULL; addrp = next_addrp) {
next_addrp = addrp->al_next;
free(addrp);
}
*addrsp = NULL;
}
static int
mibwalk(void (*proc)(mib_item_t *))
{
mib_item_t *head_item = NULL;
mib_item_t *last_item = NULL;
mib_item_t *tmp;
struct strbuf ctlbuf, databuf;
int flags;
int rval;
uintptr_t buf[512 / sizeof (uintptr_t)];
struct T_optmgmt_req *tor = (struct T_optmgmt_req *)buf;
struct T_optmgmt_ack *toa = (struct T_optmgmt_ack *)buf;
struct T_error_ack *tea = (struct T_error_ack *)buf;
struct opthdr *req, *optp;
int status = -1;
if (mibfd == -1) {
if ((mibfd = open("/dev/ip", O_RDWR)) < 0) {
logperror("mibwalk(): ip open");
return (status);
}
}
tor->PRIM_type = T_SVR4_OPTMGMT_REQ;
tor->OPT_offset = sizeof (struct T_optmgmt_req);
tor->OPT_length = sizeof (struct opthdr);
tor->MGMT_flags = T_CURRENT;
req = (struct opthdr *)&tor[1];
req->level = EXPER_IP_AND_ALL_IRES;
req->name = 0;
req->len = 0;
ctlbuf.buf = (char *)&buf;
ctlbuf.len = tor->OPT_length + tor->OPT_offset;
if (putmsg(mibfd, &ctlbuf, NULL, 0) == -1) {
logperror("mibwalk(): putmsg(ctl)");
return (status);
}
for (;;) {
errno = flags = 0;
ctlbuf.maxlen = sizeof (buf);
rval = getmsg(mibfd, &ctlbuf, NULL, &flags);
if (rval & MORECTL || rval < 0) {
if (errno == EINTR)
continue;
logerr("mibwalk(): getmsg(ctl) ret: %d err: %d\n",
rval, errno);
goto error;
}
if (ctlbuf.len < sizeof (t_scalar_t)) {
logerr("mibwalk(): ctlbuf.len %d\n", ctlbuf.len);
goto error;
}
switch (toa->PRIM_type) {
case T_ERROR_ACK:
if (ctlbuf.len < sizeof (struct T_error_ack)) {
logerr("mibwalk(): T_ERROR_ACK ctlbuf "
"too short: %d\n", ctlbuf.len);
goto error;
}
logerr("mibwalk(): T_ERROR_ACK: TLI_err = 0x%lx: %s\n"
" UNIX_err = 0x%lx\n", tea->TLI_error,
t_strerror(tea->TLI_error), tea->UNIX_error);
goto error;
case T_OPTMGMT_ACK:
optp = (struct opthdr *)&toa[1];
if (ctlbuf.len < (sizeof (struct T_optmgmt_ack) +
sizeof (struct opthdr))) {
logerr("mibwalk(): T_OPTMGMT_ACK ctlbuf too "
"short: %d\n", ctlbuf.len);
goto error;
}
if (toa->MGMT_flags != T_SUCCESS) {
logerr("mibwalk(): MGMT_flags != T_SUCCESS: "
"0x%lx\n", toa->MGMT_flags);
goto error;
}
break;
default:
goto error;
}
assert(toa->PRIM_type == T_OPTMGMT_ACK);
if (rval == 0) {
if (optp->len == 0 && optp->name == 0 &&
optp->level == 0) {
break;
}
continue;
}
if ((tmp = malloc(sizeof (*tmp))) == NULL) {
logperror("mibwalk(): malloc() failed.");
goto error;
}
if (last_item != NULL)
last_item->mi_next = tmp;
else
head_item = tmp;
last_item = tmp;
last_item->mi_next = NULL;
last_item->mi_opthdr = *optp;
last_item->mi_valp = malloc(optp->len);
if (last_item->mi_valp == NULL) {
logperror("mibwalk(): malloc() failed.");
goto error;
}
databuf.maxlen = last_item->mi_opthdr.len;
databuf.buf = (char *)last_item->mi_valp;
databuf.len = 0;
for (;;) {
flags = 0;
if ((rval = getmsg(mibfd, NULL, &databuf,
&flags)) != 0) {
if (rval < 0 && errno == EINTR)
continue;
logperror("mibwalk(): getmsg(data)");
goto error;
}
break;
}
}
status = 0;
(*proc)(head_item);
error:
while (head_item != NULL) {
tmp = head_item;
head_item = tmp->mi_next;
free(tmp->mi_valp);
free(tmp);
}
return (status);
}
static void
mib_get_constants(mib_item_t *item)
{
mib2_ip_t *ipv4;
mib2_ipv6IfStatsEntry_t *ipv6;
for (; item != NULL; item = item->mi_next) {
if (item->mi_opthdr.name != 0)
continue;
if (item->mi_opthdr.level == MIB2_IP) {
ipv4 = (mib2_ip_t *)item->mi_valp;
ipRouteEntrySize = ipv4->ipRouteEntrySize;
} else if (item->mi_opthdr.level == MIB2_IP6) {
ipv6 = (mib2_ipv6IfStatsEntry_t *)item->mi_valp;
ipv6RouteEntrySize = ipv6->ipv6RouteEntrySize;
}
}
}