#include "mpd_defs.h"
#include "mpd_tables.h"
#define PROBE_UNI 0x1234
#define PROBE_MULTI 0x5678
#define PROBE_RTT 0x9abc
#define MSEC_PERMIN (60 * MILLISEC)
struct pr_icmp
{
uint8_t pr_icmp_type;
uint8_t pr_icmp_code;
uint16_t pr_icmp_cksum;
uint16_t pr_icmp_id;
uint16_t pr_icmp_seq;
uint64_t pr_icmp_timestamp;
uint32_t pr_icmp_mtype;
};
static struct in6_addr all_nodes_mcast_v6 = { { 0xff, 0x2, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x1 } };
static struct in_addr all_nodes_mcast_v4 = { { { 0xe0, 0x0, 0x0, 0x1 } } };
static hrtime_t last_fdt_bumpup_time;
static void *find_ancillary(struct msghdr *msg, int cmsg_level,
int cmsg_type);
static void pi_set_crtt(struct target *tg, int64_t m,
boolean_t is_probe_uni);
static void incoming_echo_reply(struct phyint_instance *pii,
struct pr_icmp *reply, struct in6_addr fromaddr, struct timeval *recv_tvp);
static void incoming_rtt_reply(struct phyint_instance *pii,
struct pr_icmp *reply, struct in6_addr fromaddr);
static void incoming_mcast_reply(struct phyint_instance *pii,
struct pr_icmp *reply, struct in6_addr fromaddr);
static boolean_t check_pg_crtt_improved(struct phyint_group *pg);
static boolean_t check_pii_crtt_improved(struct phyint_instance *pii);
static boolean_t check_exception_target(struct phyint_instance *pii,
struct target *target);
static void probe_fail_info(struct phyint_instance *pii,
struct target *cur_tg, struct probe_fail_count *pfinfo);
static void probe_success_info(struct phyint_instance *pii,
struct target *cur_tg, struct probe_success_count *psinfo);
static boolean_t phyint_repaired(struct phyint *pi);
static boolean_t highest_ack_tg(uint16_t seq, struct target *tg);
static int in_cksum(ushort_t *addr, int len);
static void reset_snxt_basetimes(void);
static int ns2ms(int64_t ns);
static int64_t tv2ns(struct timeval *);
struct probes_missed probes_missed;
static void
probe(struct phyint_instance *pii, uint_t probe_type, hrtime_t start_hrtime)
{
hrtime_t sent_hrtime;
struct timeval sent_tv;
struct pr_icmp probe_pkt;
struct sockaddr_storage targ;
uint_t targaddrlen;
int pr_ndx;
boolean_t sent = _B_FALSE;
int rval;
if (debug & D_TARGET) {
logdebug("probe(%s %s %d %lld)\n", AF_STR(pii->pii_af),
pii->pii_name, probe_type, start_hrtime);
}
assert(pii->pii_probe_sock != -1);
assert(probe_type == PROBE_UNI || probe_type == PROBE_MULTI ||
probe_type == PROBE_RTT);
probe_pkt.pr_icmp_type = (pii->pii_af == AF_INET) ?
ICMP_ECHO_REQUEST : ICMP6_ECHO_REQUEST;
probe_pkt.pr_icmp_code = 0;
probe_pkt.pr_icmp_cksum = 0;
probe_pkt.pr_icmp_seq = htons(pii->pii_snxt);
probe_pkt.pr_icmp_id = pii->pii_icmpid;
probe_pkt.pr_icmp_timestamp = htonll(start_hrtime);
probe_pkt.pr_icmp_mtype = htonl(probe_type);
assert(probe_type == PROBE_MULTI || ((pii->pii_target_next != NULL) &&
pii->pii_rtt_target_next != NULL));
bzero(&targ, sizeof (targ));
targ.ss_family = pii->pii_af;
if (pii->pii_af == AF_INET6) {
struct in6_addr *addr6;
addr6 = &((struct sockaddr_in6 *)&targ)->sin6_addr;
targaddrlen = sizeof (struct sockaddr_in6);
if (probe_type == PROBE_MULTI) {
*addr6 = all_nodes_mcast_v6;
} else if (probe_type == PROBE_UNI) {
*addr6 = pii->pii_target_next->tg_address;
} else {
*addr6 = pii->pii_rtt_target_next->tg_address;
}
} else {
struct in_addr *addr4;
addr4 = &((struct sockaddr_in *)&targ)->sin_addr;
targaddrlen = sizeof (struct sockaddr_in);
if (probe_type == PROBE_MULTI) {
*addr4 = all_nodes_mcast_v4;
} else if (probe_type == PROBE_UNI) {
IN6_V4MAPPED_TO_INADDR(
&pii->pii_target_next->tg_address, addr4);
} else {
IN6_V4MAPPED_TO_INADDR(
&pii->pii_rtt_target_next->tg_address, addr4);
}
probe_pkt.pr_icmp_cksum =
in_cksum((ushort_t *)&probe_pkt, (int)sizeof (probe_pkt));
}
sent_hrtime = gethrtime();
(void) gettimeofday(&sent_tv, NULL);
rval = sendto(pii->pii_probe_sock, &probe_pkt, sizeof (probe_pkt), 0,
(struct sockaddr *)&targ, targaddrlen);
if (rval == sizeof (probe_pkt) ||
(rval == -1 && errno == EWOULDBLOCK)) {
sent = _B_TRUE;
} else {
logperror_pii(pii, "probe: probe sendto");
}
if (probe_type == PROBE_UNI && sent) {
pr_ndx = pii->pii_probe_next;
assert(pr_ndx >= 0 && pr_ndx < PROBE_STATS_COUNT);
if (pii->pii_probes[pr_ndx].pr_status == PR_LOST)
pii->pii_cum_stats.lost++;
else if (pii->pii_probes[pr_ndx].pr_status == PR_ACKED)
pii->pii_cum_stats.acked++;
pii->pii_cum_stats.sent++;
pii->pii_probes[pr_ndx].pr_id = pii->pii_snxt;
pii->pii_probes[pr_ndx].pr_tv_sent = sent_tv;
pii->pii_probes[pr_ndx].pr_hrtime_sent = sent_hrtime;
pii->pii_probes[pr_ndx].pr_hrtime_start = start_hrtime;
pii->pii_probes[pr_ndx].pr_target = pii->pii_target_next;
probe_chstate(&pii->pii_probes[pr_ndx], pii, PR_UNACKED);
pii->pii_probe_next = PROBE_INDEX_NEXT(pii->pii_probe_next);
pii->pii_target_next = target_next(pii->pii_target_next);
assert(pii->pii_target_next != NULL);
pii->pii_rtt_target_next = pii->pii_target_next;
pii->pii_snxt++;
} else if (probe_type == PROBE_RTT) {
pii->pii_rtt_target_next =
target_next(pii->pii_rtt_target_next);
assert(pii->pii_rtt_target_next != NULL);
}
}
void
in_data(struct phyint_instance *pii)
{
struct sockaddr_in from;
struct in6_addr fromaddr;
static uint64_t in_packet[(IP_MAXPACKET + 1)/8];
static uint64_t ancillary_data[(IP_MAXPACKET + 1)/8];
struct ip *ip;
int iphlen;
int len;
char abuf[INET_ADDRSTRLEN];
struct msghdr msg;
struct iovec iov;
struct pr_icmp *reply;
struct timeval *recv_tvp;
if (debug & D_PROBE) {
logdebug("in_data(%s %s)\n",
AF_STR(pii->pii_af), pii->pii_name);
}
iov.iov_base = (char *)in_packet;
iov.iov_len = sizeof (in_packet);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_name = (struct sockaddr *)&from;
msg.msg_namelen = sizeof (from);
msg.msg_control = ancillary_data;
msg.msg_controllen = sizeof (ancillary_data);
if ((len = recvmsg(pii->pii_probe_sock, &msg, 0)) < 0) {
logperror_pii(pii, "in_data: recvmsg");
return;
}
if (LINK_DOWN(pii->pii_phyint))
return;
(void) inet_ntop(AF_INET, &from.sin_addr, abuf, sizeof (abuf));
if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
if (debug & D_PKTBAD) {
logdebug("Truncated message: msg_flags 0x%x from %s\n",
msg.msg_flags, abuf);
}
return;
}
ip = (struct ip *)in_packet;
iphlen = ip->ip_hl << 2;
if (len < iphlen + ICMP_MINLEN) {
if (debug & D_PKTBAD) {
logdebug("in_data: packet too short (%d bytes)"
" from %s\n", len, abuf);
}
return;
}
len -= iphlen;
reply = (struct pr_icmp *)((char *)in_packet + iphlen);
if (reply->pr_icmp_type != ICMP_ECHO_REPLY)
return;
if (reply->pr_icmp_id != pii->pii_icmpid) {
return;
}
if (reply->pr_icmp_code != 0) {
logtrace("probe reply code %d from %s on %s\n",
reply->pr_icmp_code, abuf, pii->pii_name);
return;
}
if (len < sizeof (struct pr_icmp)) {
logtrace("probe reply too short: %d bytes from %s on %s\n",
len, abuf, pii->pii_name);
return;
}
recv_tvp = find_ancillary(&msg, SOL_SOCKET, SCM_TIMESTAMP);
if (recv_tvp == NULL) {
logtrace("message without timestamp from %s on %s\n",
abuf, pii->pii_name);
return;
}
IN6_INADDR_TO_V4MAPPED(&from.sin_addr, &fromaddr);
if (reply->pr_icmp_mtype == htonl(PROBE_UNI))
incoming_echo_reply(pii, reply, fromaddr, recv_tvp);
else if (reply->pr_icmp_mtype == htonl(PROBE_MULTI)) {
incoming_mcast_reply(pii, reply, fromaddr);
} else if (reply->pr_icmp_mtype == htonl(PROBE_RTT)) {
incoming_rtt_reply(pii, reply, fromaddr);
} else {
logtrace("probe reply type: %d from %s on %s\n",
reply->pr_icmp_mtype, abuf, pii->pii_name);
return;
}
}
void
in6_data(struct phyint_instance *pii)
{
struct sockaddr_in6 from;
static uint64_t in_packet[(IP_MAXPACKET + 1)/8];
static uint64_t ancillary_data[(IP_MAXPACKET + 1)/8];
int len;
char abuf[INET6_ADDRSTRLEN];
struct msghdr msg;
struct iovec iov;
void *opt;
struct pr_icmp *reply;
struct timeval *recv_tvp;
if (debug & D_PROBE) {
logdebug("in6_data(%s %s)\n",
AF_STR(pii->pii_af), pii->pii_name);
}
iov.iov_base = (char *)in_packet;
iov.iov_len = sizeof (in_packet);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_name = (struct sockaddr *)&from;
msg.msg_namelen = sizeof (from);
msg.msg_control = ancillary_data;
msg.msg_controllen = sizeof (ancillary_data);
if ((len = recvmsg(pii->pii_probe_sock, &msg, 0)) < 0) {
logperror_pii(pii, "in6_data: recvmsg");
return;
}
if (LINK_DOWN(pii->pii_phyint))
return;
(void) inet_ntop(AF_INET6, &from.sin6_addr, abuf, sizeof (abuf));
if (len < ICMP_MINLEN) {
if (debug & D_PKTBAD) {
logdebug("Truncated message: msg_flags 0x%x from %s\n",
msg.msg_flags, abuf);
}
return;
}
if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
if (debug & D_PKTBAD) {
logdebug("Truncated message: msg_flags 0x%x from %s\n",
msg.msg_flags, abuf);
}
return;
}
reply = (struct pr_icmp *)in_packet;
if (reply->pr_icmp_type != ICMP6_ECHO_REPLY)
return;
if (reply->pr_icmp_id != pii->pii_icmpid) {
return;
}
if (!IN6_IS_ADDR_LINKLOCAL(&from.sin6_addr)) {
logtrace("ICMPv6 echo reply source address not linklocal from "
"%s on %s\n", abuf, pii->pii_name);
return;
}
opt = find_ancillary(&msg, IPPROTO_IPV6, IPV6_RTHDR);
if (opt != NULL) {
logtrace("message with routing header from %s on %s\n",
abuf, pii->pii_name);
return;
}
if (reply->pr_icmp_code != 0) {
logtrace("probe reply code: %d from %s on %s\n",
reply->pr_icmp_code, abuf, pii->pii_name);
return;
}
if (len < (sizeof (struct pr_icmp))) {
logtrace("probe reply too short: %d bytes from %s on %s\n",
len, abuf, pii->pii_name);
return;
}
recv_tvp = find_ancillary(&msg, SOL_SOCKET, SCM_TIMESTAMP);
if (recv_tvp == NULL) {
logtrace("message without timestamp from %s on %s\n",
abuf, pii->pii_name);
return;
}
if (reply->pr_icmp_mtype == htonl(PROBE_UNI)) {
incoming_echo_reply(pii, reply, from.sin6_addr, recv_tvp);
} else if (reply->pr_icmp_mtype == htonl(PROBE_MULTI)) {
incoming_mcast_reply(pii, reply, from.sin6_addr);
} else if (reply->pr_icmp_mtype == htonl(PROBE_RTT)) {
incoming_rtt_reply(pii, reply, from.sin6_addr);
} else {
logtrace("probe reply type: %d from %s on %s\n",
reply->pr_icmp_mtype, abuf, pii->pii_name);
}
}
static void
incoming_rtt_reply(struct phyint_instance *pii, struct pr_icmp *reply,
struct in6_addr fromaddr)
{
int64_t m;
char abuf[INET6_ADDRSTRLEN];
struct target *target;
struct phyint_group *pg;
(void) pr_addr(pii->pii_af, fromaddr, abuf, sizeof (abuf));
if (debug & D_PROBE) {
logdebug("incoming_rtt_reply: %s %s %s\n",
AF_STR(pii->pii_af), pii->pii_name, abuf);
}
target = target_lookup(pii, fromaddr);
if (target == NULL)
return;
m = (int64_t)(gethrtime() - ntohll(reply->pr_icmp_timestamp));
if (m < 0)
return;
pg = pii->pii_phyint->pi_group;
if ((pii->pii_state != PI_RUNNING) || GROUP_FAILED(pg))
return;
pi_set_crtt(target, m, _B_FALSE);
if ((target->tg_crtt < (pg->pg_probeint / LOWER_FDT_TRIGGER)) &&
(user_failure_detection_time < pg->pg_fdt) &&
(last_fdt_bumpup_time + MIN_SETTLING_TIME < gethrtime())) {
if (check_pg_crtt_improved(pg)) {
pg->pg_fdt = MAX(pg->pg_fdt / NEXT_FDT_MULTIPLE,
user_failure_detection_time);
pg->pg_probeint = pg->pg_fdt / (NUM_PROBE_FAILS + 2);
if (pii->pii_phyint->pi_group != phyint_anongroup) {
logerr("Improved failure detection time %d ms "
"on (%s %s) for group \"%s\"\n",
pg->pg_fdt, AF_STR(pii->pii_af),
pii->pii_name,
pii->pii_phyint->pi_group->pg_name);
}
if (user_failure_detection_time == pg->pg_fdt) {
pg->pg_probeint = user_probe_interval;
reset_snxt_basetimes();
}
}
}
}
static void
incoming_echo_reply(struct phyint_instance *pii, struct pr_icmp *reply,
struct in6_addr fromaddr, struct timeval *recv_tvp)
{
int64_t m;
hrtime_t cur_hrtime;
char abuf[INET6_ADDRSTRLEN];
int pr_ndx;
struct target *target;
boolean_t exception;
uint64_t pr_icmp_timestamp;
uint16_t pr_icmp_seq;
struct probe_stats *pr_statp;
struct phyint_group *pg = pii->pii_phyint->pi_group;
(void) pr_addr(pii->pii_af, fromaddr, abuf, sizeof (abuf));
if (debug & D_PROBE) {
logdebug("incoming_echo_reply: %s %s %s seq %u recv_tvp %lld\n",
AF_STR(pii->pii_af), pii->pii_name, abuf,
ntohs(reply->pr_icmp_seq), tv2ns(recv_tvp));
}
pr_icmp_timestamp = ntohll(reply->pr_icmp_timestamp);
pr_icmp_seq = ntohs(reply->pr_icmp_seq);
if (SEQ_GE(pr_icmp_seq, pii->pii_snxt) ||
SEQ_LT(pr_icmp_seq, pii->pii_snxt - PROBE_STATS_COUNT)) {
logtrace("out of window probe seq %u snxt %u on %s from %s\n",
pr_icmp_seq, pii->pii_snxt, pii->pii_name, abuf);
pii->pii_cum_stats.unknown++;
return;
}
cur_hrtime = gethrtime();
m = (int64_t)(cur_hrtime - pr_icmp_timestamp);
if (m < 0) {
logerr("incoming_echo_reply: rtt wraparound cur_hrtime %lld "
"reply timestamp %lld\n", cur_hrtime, pr_icmp_timestamp);
}
pr_ndx = MOD_SUB(pii->pii_probe_next,
(uint16_t)(pii->pii_snxt - pr_icmp_seq), PROBE_STATS_COUNT);
assert(PR_STATUS_VALID(pii->pii_probes[pr_ndx].pr_status));
target = pii->pii_probes[pr_ndx].pr_target;
if (target != NULL) {
if (!IN6_ARE_ADDR_EQUAL(&target->tg_address, &fromaddr)) {
logtrace("probe status %d Fake probe reply seq %u "
"snxt %u on %s from %s\n",
pii->pii_probes[pr_ndx].pr_status,
pr_icmp_seq, pii->pii_snxt, pii->pii_name, abuf);
pii->pii_cum_stats.unknown++;
return;
} else if (pii->pii_probes[pr_ndx].pr_status == PR_ACKED) {
logtrace("probe status %d Duplicate probe reply seq %u "
"snxt %u on %s from %s\n",
pii->pii_probes[pr_ndx].pr_status,
pr_icmp_seq, pii->pii_snxt, pii->pii_name, abuf);
pii->pii_cum_stats.unknown++;
return;
}
} else {
assert(pii->pii_probes[pr_ndx].pr_status != PR_UNACKED);
if (pii->pii_probes[pr_ndx].pr_status == PR_UNUSED) {
logtrace("probe status %d Fake probe reply seq %u "
"snxt %u on %s from %s\n",
pii->pii_probes[pr_ndx].pr_status,
pr_icmp_seq, pii->pii_snxt, pii->pii_name, abuf);
}
pii->pii_cum_stats.unknown++;
return;
}
if ((m < 0) || (ns2ms(m) > PROBE_STATS_COUNT * pg->pg_probeint)) {
goto out;
}
if ((pii->pii_state != PI_RUNNING) || GROUP_FAILED(pg))
goto out;
if (!highest_ack_tg(pr_icmp_seq, target))
goto out;
pi_set_crtt(target, m, _B_TRUE);
if (target->tg_crtt > pg->pg_probeint) {
exception = check_exception_target(pii, target);
if (exception) {
if (pii->pii_targets_are_routers) {
if (target->tg_status == TG_ACTIVE)
pii->pii_ntargets--;
target->tg_status = TG_SLOW;
target->tg_latime = gethrtime();
target->tg_rtt_sa = -1;
target->tg_crtt = 0;
target->tg_rtt_sd = 0;
if (pii->pii_target_next == target) {
pii->pii_target_next =
target_next(target);
}
} else {
target_delete(target);
probe(pii, PROBE_MULTI, cur_hrtime);
}
} else {
pg->pg_probeint = target->tg_crtt * NEXT_FDT_MULTIPLE;
pg->pg_fdt = pg->pg_probeint * (NUM_PROBE_FAILS + 2);
last_fdt_bumpup_time = gethrtime();
if (pg != phyint_anongroup) {
logtrace("Cannot meet requested failure"
" detection time of %d ms on (%s %s) new"
" failure detection time for group \"%s\""
" is %d ms\n", user_failure_detection_time,
AF_STR(pii->pii_af), pii->pii_name,
pg->pg_name, pg->pg_fdt);
}
}
} else if ((target->tg_crtt < (pg->pg_probeint / LOWER_FDT_TRIGGER)) &&
(user_failure_detection_time < pg->pg_fdt) &&
(last_fdt_bumpup_time + MIN_SETTLING_TIME < gethrtime())) {
if (check_pg_crtt_improved(pg)) {
pg->pg_fdt = MAX(pg->pg_fdt / NEXT_FDT_MULTIPLE,
user_failure_detection_time);
pg->pg_probeint = pg->pg_fdt / (NUM_PROBE_FAILS + 2);
if (pg != phyint_anongroup) {
logtrace("Improved failure detection time %d ms"
" on (%s %s) for group \"%s\"\n",
pg->pg_fdt, AF_STR(pii->pii_af),
pii->pii_name, pg->pg_name);
}
if (user_failure_detection_time == pg->pg_fdt) {
pg->pg_probeint = user_probe_interval;
reset_snxt_basetimes();
}
}
}
out:
pr_statp = &pii->pii_probes[pr_ndx];
pr_statp->pr_hrtime_ackproc = cur_hrtime;
pr_statp->pr_hrtime_ackrecv = pr_statp->pr_hrtime_sent +
(tv2ns(recv_tvp) - tv2ns(&pr_statp->pr_tv_sent));
probe_chstate(pr_statp, pii, PR_ACKED);
if (SEQ_GE(pii->pii_rack, pii->pii_snxt) ||
SEQ_LT(pii->pii_rack, pii->pii_snxt - PROBE_STATS_COUNT) ||
SEQ_GT(pr_icmp_seq, pii->pii_rack)) {
pii->pii_rack = pr_icmp_seq;
}
}
static boolean_t
highest_ack_tg(uint16_t seq, struct target *tg)
{
struct phyint_instance *pii;
int pr_ndx;
uint16_t pr_seq;
pii = tg->tg_phyint_inst;
pr_ndx = PROBE_INDEX_PREV(pii->pii_probe_next);
pr_seq = pii->pii_snxt;
pr_seq--;
for (; pr_ndx != pii->pii_probe_next;
pr_ndx = PROBE_INDEX_PREV(pr_ndx), pr_seq--) {
if (pii->pii_probes[pr_ndx].pr_target == tg &&
pii->pii_probes[pr_ndx].pr_status == PR_ACKED) {
if (SEQ_GT(pr_seq, seq))
return (_B_FALSE);
}
}
return (_B_TRUE);
}
static boolean_t
check_pg_crtt_improved(struct phyint_group *pg)
{
struct phyint *pi;
if (debug & D_PROBE)
logdebug("check_pg_crtt_improved()\n");
for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
if (!check_pii_crtt_improved(pi->pi_v4) ||
!check_pii_crtt_improved(pi->pi_v6))
return (_B_FALSE);
}
return (_B_TRUE);
}
boolean_t
check_pii_crtt_improved(struct phyint_instance *pii) {
struct target *tg;
if (pii == NULL)
return (_B_TRUE);
if (!PROBE_CAPABLE(pii) ||
pii->pii_phyint->pi_state == PI_FAILED)
return (_B_TRUE);
for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
if (tg->tg_status != TG_ACTIVE)
continue;
if (tg->tg_crtt > (pii->pii_phyint->pi_group->pg_probeint /
LOWER_FDT_TRIGGER)) {
return (_B_FALSE);
}
}
return (_B_TRUE);
}
static boolean_t
check_exception_target(struct phyint_instance *pii, struct target *target)
{
struct target *tg;
char abuf[INET6_ADDRSTRLEN];
if (debug & D_PROBE) {
logdebug("check_exception_target(%s %s target %s)\n",
AF_STR(pii->pii_af), pii->pii_name,
pr_addr(pii->pii_af, target->tg_address,
abuf, sizeof (abuf)));
}
if (pii->pii_ntargets < MIN_PROBE_TARGETS + 1)
return (_B_FALSE);
for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
if (tg != target && tg->tg_status == TG_ACTIVE) {
if (tg->tg_crtt >
pii->pii_phyint->pi_group->pg_probeint /
EXCEPTION_FACTOR) {
return (_B_FALSE);
}
}
}
return (_B_TRUE);
}
static void
incoming_mcast_reply(struct phyint_instance *pii, struct pr_icmp *reply,
struct in6_addr fromaddr)
{
int af;
char abuf[INET6_ADDRSTRLEN];
struct phyint *pi;
if (debug & D_PROBE) {
logdebug("incoming_mcast_reply(%s %s %s)\n",
AF_STR(pii->pii_af), pii->pii_name,
pr_addr(pii->pii_af, fromaddr, abuf, sizeof (abuf)));
}
assert(pii->pii_ntargets <= MAX_PROBE_TARGETS);
if (pii->pii_targets != NULL) {
if (pii->pii_targets_are_routers ||
(pii->pii_ntargets == MAX_PROBE_TARGETS)) {
return;
}
}
if (IN6_IS_ADDR_UNSPECIFIED(&fromaddr) ||
IN6_IS_ADDR_V4MAPPED_ANY(&fromaddr)) {
logtrace("probe response from %s on %s\n",
pr_addr(pii->pii_af, fromaddr, abuf, sizeof (abuf)),
pii->pii_name);
return;
}
af = pii->pii_af;
if (own_address(fromaddr))
return;
pi = pii->pii_phyint;
if (pi->pi_group == phyint_anongroup) {
target_add(pii, fromaddr, _B_FALSE);
} else {
pi = pi->pi_group->pg_phyint;
for (; pi != NULL; pi = pi->pi_pgnext)
target_add(PHYINT_INSTANCE(pi, af), fromaddr, _B_FALSE);
}
}
static int64_t
compute_crtt(int64_t *sap, int64_t *svp, int64_t m)
{
int64_t sa = *sap;
int64_t sv = *svp;
int64_t crtt;
int64_t saved_m = m;
assert(*sap >= -1);
assert(*svp >= 0);
if (sa != -1) {
m -= sa >> 3;
if ((sa += m) < 0) {
sa = 0;
}
if (m < 0)
m = -m;
m -= sv >> 2;
sv += m;
} else {
sa = (m << 3);
sv = (m << 1);
}
crtt = (sa >> 3) + sv;
if (debug & D_PROBE) {
logerr("compute_crtt: m = %lld sa = %lld, sv = %lld -> "
"crtt = %lld\n", saved_m, sa, sv, crtt);
}
*sap = sa;
*svp = sv;
return (crtt);
}
static void
pi_set_crtt(struct target *tg, int64_t m, boolean_t is_probe_uni)
{
struct phyint_instance *pii = tg->tg_phyint_inst;
int probe_interval = pii->pii_phyint->pi_group->pg_probeint;
int64_t sa = tg->tg_rtt_sa;
int64_t sv = tg->tg_rtt_sd;
int new_crtt;
int i;
if (debug & D_PROBE)
logdebug("pi_set_crtt: target - m %lld\n", m);
tg->tg_deferred[tg->tg_num_deferred] = m;
new_crtt = ns2ms(compute_crtt(&sa, &sv, m));
if ((new_crtt > probe_interval) && is_probe_uni) {
if (debug & D_PROBE) {
logdebug("Received a suspect probe on %s, new_crtt ="
" %d, probe_interval = %d, num_deferred = %d\n",
pii->pii_probe_logint->li_name, new_crtt,
probe_interval, tg->tg_num_deferred);
}
if (tg->tg_num_deferred == MAXDEFERREDRTT) {
if (debug & D_PROBE) {
logdebug("Received MAXDEFERREDRTT probes which "
"would cause an increased probe_interval. "
"Integrating queued rtt data points.\n");
}
for (i = 0; i <= tg->tg_num_deferred; i++) {
tg->tg_crtt = ns2ms(compute_crtt(&tg->tg_rtt_sa,
&tg->tg_rtt_sd, tg->tg_deferred[i]));
}
tg->tg_num_deferred = 0;
} else {
tg->tg_num_deferred++;
}
return;
}
if (is_probe_uni || new_crtt < tg->tg_crtt) {
tg->tg_rtt_sa = sa;
tg->tg_rtt_sd = sv;
tg->tg_crtt = new_crtt;
if (is_probe_uni)
tg->tg_num_deferred = 0;
}
}
static void *
find_ancillary(struct msghdr *msg, int cmsg_level, int cmsg_type)
{
struct cmsghdr *cmsg;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
cmsg = CMSG_NXTHDR(msg, cmsg)) {
if (cmsg->cmsg_level == cmsg_level &&
cmsg->cmsg_type == cmsg_type) {
return (CMSG_DATA(cmsg));
}
}
return (NULL);
}
void
phyint_activate_another(struct phyint *pi)
{
struct phyint *pi2;
struct phyint *inactivepi = NULL;
if (pi->pi_group == phyint_anongroup)
return;
for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
if (pi == pi2 || !phyint_is_functioning(pi2) ||
!(pi2->pi_flags & IFF_INACTIVE))
continue;
inactivepi = pi2;
if (pi2->pi_flags & IFF_STANDBY)
break;
}
if (inactivepi != NULL)
(void) change_pif_flags(inactivepi, 0, IFF_INACTIVE);
}
void
phyint_transition_to_running(struct phyint *pi)
{
struct phyint *pi2;
struct phyint *actstandbypi = NULL;
uint_t nactive = 0, nnonstandby = 0;
boolean_t onlining = (pi->pi_state == PI_OFFLINE);
boolean_t initial = (pi->pi_state == PI_INIT);
uint64_t set, clear;
if (pi->pi_group != phyint_anongroup) {
pi2 = pi->pi_group->pg_phyint;
for (; pi2 != NULL; pi2 = pi2->pi_pgnext) {
if (!(pi2->pi_flags & IFF_STANDBY))
nnonstandby++;
if (phyint_is_functioning(pi2) &&
!(pi2->pi_flags & IFF_INACTIVE)) {
nactive++;
if (pi2->pi_flags & IFF_STANDBY)
actstandbypi = pi2;
}
}
}
set = 0;
clear = (onlining ? IFF_OFFLINE : IFF_FAILED);
if (pi->pi_flags & IFF_STANDBY) {
if (nactive >= nnonstandby)
set |= IFF_INACTIVE;
else
clear |= IFF_INACTIVE;
} else if (onlining || failback_enabled) {
if (nactive >= nnonstandby && actstandbypi != NULL)
(void) change_pif_flags(actstandbypi, IFF_INACTIVE, 0);
} else if (!initial && !GROUP_FAILED(pi->pi_group)) {
set |= IFF_INACTIVE;
}
(void) change_pif_flags(pi, set, clear);
phyint_chstate(pi, PI_RUNNING);
phyint_group_refresh_state(pi->pi_group);
}
void
phyint_standby_refresh_inactive(struct phyint *pi)
{
struct phyint *pi2;
uint_t nactive = 0, nnonstandby = 0;
if (pi->pi_group == phyint_anongroup) {
(void) change_pif_flags(pi, 0, IFF_INACTIVE);
return;
}
if (!phyint_is_functioning(pi))
return;
for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
if (!(pi2->pi_flags & IFF_STANDBY))
nnonstandby++;
if (phyint_is_functioning(pi2) &&
!(pi2->pi_flags & IFF_INACTIVE))
nactive++;
}
if (nactive == 0 || nactive < nnonstandby)
(void) change_pif_flags(pi, 0, IFF_INACTIVE);
else if (nactive > nnonstandby)
(void) change_pif_flags(pi, IFF_INACTIVE, 0);
}
void
phyint_check_for_repair(struct phyint *pi)
{
if (!phyint_repaired(pi))
return;
if (pi->pi_group == phyint_anongroup) {
logerr("IP interface repair detected on %s\n", pi->pi_name);
} else {
logerr("IP interface repair detected on %s of group %s\n",
pi->pi_name, pi->pi_group->pg_name);
}
if (pi->pi_state == PI_OFFLINE) {
(void) change_pif_flags(pi, 0, IFF_FAILED);
return;
}
phyint_transition_to_running(pi);
}
static void
phyint_inst_check_for_failure(struct phyint_instance *pii)
{
struct phyint *pi = pii->pii_phyint;
struct phyint *pi2;
boolean_t was_active;
switch (failure_state(pii)) {
case PHYINT_FAILURE:
was_active = ((pi->pi_flags & IFF_INACTIVE) == 0);
(void) change_pif_flags(pi, IFF_FAILED, IFF_INACTIVE);
if (pi->pi_group == phyint_anongroup) {
logerr("IP interface failure detected on %s\n",
pii->pii_name);
} else {
logerr("IP interface failure detected on %s of group"
" %s\n", pii->pii_name, pi->pi_group->pg_name);
}
if (was_active)
phyint_activate_another(pi);
if (pi->pi_state != PI_OFFLINE) {
phyint_chstate(pi, PI_FAILED);
reset_crtt_all(pi);
}
break;
case GROUP_FAILURE:
pi2 = pi->pi_group->pg_phyint;
for (; pi2 != NULL; pi2 = pi2->pi_pgnext) {
(void) change_pif_flags(pi2, IFF_FAILED, IFF_INACTIVE);
if (pi2->pi_state == PI_OFFLINE)
continue;
reset_crtt_all(pi2);
if (pi2->pi_state == PI_RUNNING)
phyint_chstate(pi2, PI_FAILED);
}
break;
default:
break;
}
}
uint_t
phyint_inst_timer(struct phyint_instance *pii)
{
int pr_ndx;
uint_t timeout;
struct target *cur_tg;
struct probe_stats *pr_statp;
struct phyint_instance *pii_other;
struct phyint *pi;
int valid_unack_count;
int i;
int interval;
uint_t check_time;
uint_t cur_time;
hrtime_t cur_hrtime;
int probe_interval = pii->pii_phyint->pi_group->pg_probeint;
cur_hrtime = gethrtime();
cur_time = ns2ms(cur_hrtime);
if (debug & D_TIMER) {
logdebug("phyint_inst_timer(%s %s)\n",
AF_STR(pii->pii_af), pii->pii_name);
}
pii_other = phyint_inst_other(pii);
if (!PROBE_ENABLED(pii) && !PROBE_ENABLED(pii_other)) {
pi = pii->pii_phyint;
if (pi->pi_state == PI_FAILED && LINK_UP(pi)) {
check_time = pi->pi_whenup[pi->pi_whendx] + MSEC_PERMIN;
if (check_time > cur_time)
return (check_time - cur_time);
phyint_check_for_repair(pi);
}
}
if (!PROBE_ENABLED(pii))
return (TIMER_INFINITY);
if (TIME_LT(cur_time, pii->pii_snxt_time))
return (pii->pii_snxt_time - cur_time);
if (LINK_DOWN(pii->pii_phyint))
return (TIMER_INFINITY);
interval = GET_RANDOM(
(int)(MIN_RANDOM_FACTOR * user_probe_interval),
(int)(MAX_RANDOM_FACTOR * user_probe_interval));
pii->pii_snxt_time = pii->pii_snxt_basetime + interval;
if (TIME_GT(cur_time, pii->pii_snxt_time)) {
int n;
n = (cur_time - pii->pii_snxt_time) / user_probe_interval;
pii->pii_snxt_time += (n + 1) * user_probe_interval;
pii->pii_snxt_basetime += (n + 1) * user_probe_interval;
logtrace("missed sending %d probes cur_time %u snxt_time %u"
" snxt_basetime %u\n", n + 1, cur_time, pii->pii_snxt_time,
pii->pii_snxt_basetime);
probes_missed.pm_nprobes += n + 1;
probes_missed.pm_ntimes++;
}
pii->pii_snxt_basetime += user_probe_interval;
interval = pii->pii_snxt_time - cur_time;
if (debug & D_TARGET) {
logdebug("cur_time %u snxt_time %u snxt_basetime %u"
" interval %u\n", cur_time, pii->pii_snxt_time,
pii->pii_snxt_basetime, interval);
}
if (pii->pii_target_next == NULL) {
assert(pii->pii_ntargets == 0);
pii->pii_fd_snxt_basetime = pii->pii_snxt_basetime;
probe(pii, PROBE_MULTI, cur_time);
return (interval);
}
if ((user_probe_interval != probe_interval) &&
TIME_LT(pii->pii_snxt_time, pii->pii_fd_snxt_basetime)) {
probe(pii, PROBE_RTT, cur_hrtime);
return (interval);
}
if (user_probe_interval == probe_interval) {
pii->pii_fd_snxt_basetime = pii->pii_snxt_basetime;
} else {
pii->pii_fd_snxt_basetime += probe_interval;
if (TIME_GT(cur_time, pii->pii_fd_snxt_basetime)) {
int n;
n = (cur_time - pii->pii_fd_snxt_basetime) /
probe_interval;
pii->pii_fd_snxt_basetime += (n + 1) * probe_interval;
}
}
pr_ndx = PROBE_INDEX_PREV(pii->pii_probe_next);
valid_unack_count = 0;
for (i = 0; i < 2; i++) {
pr_statp = &pii->pii_probes[pr_ndx];
cur_tg = pii->pii_probes[pr_ndx].pr_target;
switch (pr_statp->pr_status) {
case PR_ACKED:
if (pii->pii_fd_hrtime > gethrtime()) {
if (debug & D_PROBE) {
logdebug("successful probe on %s; "
"ending quiet period\n",
pii->pii_phyint->pi_name);
}
pii->pii_fd_hrtime = gethrtime();
}
break;
case PR_UNACKED:
assert(cur_tg != NULL);
timeout = ns2ms(pr_statp->pr_hrtime_start);
if (cur_tg->tg_crtt != 0) {
timeout += cur_tg->tg_crtt;
} else {
timeout += probe_interval;
}
if (TIME_LT(timeout, cur_time)) {
pr_statp->pr_time_lost = timeout;
probe_chstate(pr_statp, pii, PR_LOST);
} else if (i == 1) {
pr_statp->pr_time_lost = cur_time;
probe_chstate(pr_statp, pii, PR_LOST);
} else {
valid_unack_count++;
}
break;
}
pr_ndx = PROBE_INDEX_PREV(pr_ndx);
}
assert(valid_unack_count == 0 || valid_unack_count == 1);
switch (pii->pii_phyint->pi_state) {
case PI_FAILED:
if (pii->pii_rack + valid_unack_count + 1 == pii->pii_snxt) {
phyint_check_for_repair(pii->pii_phyint);
}
break;
case PI_RUNNING:
if (pii->pii_fd_hrtime - cur_hrtime > 0)
break;
if (pii->pii_rack + valid_unack_count + 1 != pii->pii_snxt) {
phyint_inst_check_for_failure(pii);
}
break;
default:
logerr("phyint_inst_timer: invalid state %d\n",
pii->pii_phyint->pi_state);
abort();
}
if (pii->pii_target_next != NULL) {
probe(pii, PROBE_UNI, cur_hrtime);
if (!pii->pii_targets_are_routers && pii->pii_ntargets == 1)
probe(pii, PROBE_MULTI, cur_hrtime);
} else {
probe(pii, PROBE_MULTI, cur_hrtime);
}
return (interval);
}
void
start_timer(struct phyint_instance *pii)
{
uint32_t interval;
interval = GET_RANDOM(0,
(int)pii->pii_phyint->pi_group->pg_probeint);
pii->pii_snxt_basetime = getcurrenttime() + interval;
pii->pii_fd_snxt_basetime = pii->pii_snxt_basetime;
pii->pii_snxt_time = pii->pii_snxt_basetime;
timer_schedule(interval);
}
static void
restart_timer(struct phyint_instance *pii)
{
if (pii->pii_basetime_inited != 0) {
if (debug & D_LINKNOTE)
logdebug("restart timer: restarting timer on %s, "
"address family %s\n", pii->pii_phyint->pi_name,
AF_STR(pii->pii_af));
start_timer(pii);
}
}
static void
process_link_state_down(struct phyint *pi)
{
logerr("The link has gone down on %s\n", pi->pi_name);
if (PROBE_CAPABLE(pi->pi_v4))
clear_pii_probe_stats(pi->pi_v4);
if (PROBE_CAPABLE(pi->pi_v6))
clear_pii_probe_stats(pi->pi_v6);
if ((pi->pi_state == PI_RUNNING) ||
(pi->pi_state != PI_FAILED && !GROUP_FAILED(pi->pi_group))) {
if (debug & D_LINKNOTE) {
logdebug("process_link_state_down:"
" checking for failure on %s\n", pi->pi_name);
}
if (pi->pi_v4 != NULL)
phyint_inst_check_for_failure(pi->pi_v4);
else if (pi->pi_v6 != NULL)
phyint_inst_check_for_failure(pi->pi_v6);
}
}
static void
process_link_state_up(struct phyint *pi)
{
logerr("The link has come up on %s\n", pi->pi_name);
if (pi->pi_v4)
restart_timer(pi->pi_v4);
if (pi->pi_v6)
restart_timer(pi->pi_v6);
phyint_check_for_repair(pi);
pi->pi_whenup[pi->pi_whendx++] = getcurrenttime();
if (pi->pi_whendx == LINK_UP_PERMIN)
pi->pi_whendx = 0;
}
void
process_link_state_changes(void)
{
struct phyint *pi;
for (pi = phyints; pi != NULL; pi = pi->pi_next) {
boolean_t old_link_state_up = LINK_UP(pi);
UPDATE_LINK_STATE(pi);
if (LINK_DOWN(pi)) {
if (old_link_state_up)
process_link_state_down(pi);
} else {
if (!old_link_state_up)
process_link_state_up(pi);
}
}
}
void
reset_crtt_all(struct phyint *pi)
{
struct phyint_instance *pii;
struct target *tg;
pii = pi->pi_v4;
if (pii != NULL) {
for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
tg->tg_crtt = 0;
tg->tg_rtt_sa = -1;
tg->tg_rtt_sd = 0;
}
}
pii = pi->pi_v6;
if (pii != NULL) {
for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
tg->tg_crtt = 0;
tg->tg_rtt_sa = -1;
tg->tg_rtt_sd = 0;
}
}
}
static int
phyint_inst_probe_failure_state(struct phyint_instance *pii, uint_t *tff)
{
uint_t pi_tff;
struct target *cur_tg;
struct probe_fail_count pfinfo;
struct phyint_instance *pii_other;
int pr_ndx;
pr_ndx = PROBE_INDEX_PREV(pii->pii_probe_next);
cur_tg = pii->pii_probes[pr_ndx].pr_target;
probe_fail_info(pii, cur_tg, &pfinfo);
pi_tff = pfinfo.pf_tff;
if (pfinfo.pf_nfail < NUM_PROBE_FAILS &&
pfinfo.pf_nfail_tg >= NUM_PROBE_FAILS) {
if (pii->pii_targets_are_routers) {
if (cur_tg->tg_status == TG_ACTIVE)
pii->pii_ntargets--;
cur_tg->tg_status = TG_DEAD;
cur_tg->tg_crtt = 0;
cur_tg->tg_rtt_sa = -1;
cur_tg->tg_rtt_sd = 0;
if (pii->pii_target_next == cur_tg)
pii->pii_target_next = target_next(cur_tg);
} else {
target_delete(cur_tg);
probe(pii, PROBE_MULTI, gethrtime());
}
return (PHYINT_OK);
}
if (pfinfo.pf_nfail < NUM_PROBE_FAILS)
return (PHYINT_OK);
pii_other = phyint_inst_other(pii);
if (PROBE_CAPABLE(pii_other)) {
probe_fail_info(pii_other, NULL, &pfinfo);
if (pfinfo.pf_nfail >= NUM_PROBE_FAILS) {
if (TIME_LT(pfinfo.pf_tff, pi_tff))
pi_tff = pfinfo.pf_tff;
} else {
return (PHYINT_OK);
}
}
*tff = pi_tff;
return (PHYINT_FAILURE);
}
int
failure_state(struct phyint_instance *pii)
{
struct probe_success_count psinfo;
uint_t pi2_tls;
uint_t pi_tff;
struct phyint *pi2;
struct phyint *pi;
struct phyint_instance *pii2;
struct phyint_group *pg;
int retval;
if (debug & D_FAILREP)
logdebug("phyint_failed(%s)\n", pii->pii_name);
pi = pii->pii_phyint;
pg = pi->pi_group;
if (LINK_UP(pi) && phyint_inst_probe_failure_state(pii, &pi_tff) ==
PHYINT_OK)
return (PHYINT_OK);
if (pg == phyint_anongroup)
return (PHYINT_FAILURE);
retval = GROUP_FAILURE;
for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
if (pi2 == pi)
continue;
if (LINK_DOWN(pi)) {
if ((pi2->pi_state == PI_RUNNING) &&
!GROUP_FAILED(pg) && FLAGS_TO_LINK_STATE(pi2)) {
retval = PHYINT_FAILURE;
break;
}
continue;
}
if (LINK_DOWN(pi2))
continue;
if (!PROBE_ENABLED(pi2->pi_v4) && !PROBE_ENABLED(pi2->pi_v6)) {
retval = PHYINT_FAILURE;
break;
}
pii2 = pi2->pi_v4;
if (pii2 != NULL) {
probe_success_info(pii2, NULL, &psinfo);
if (psinfo.ps_tls_valid) {
pi2_tls = psinfo.ps_tls;
if (TIME_GT(pi2_tls, pi_tff) &&
(pi2->pi_state == PI_RUNNING) &&
!GROUP_FAILED(pg) &&
FLAGS_TO_LINK_STATE(pi2)) {
retval = PHYINT_FAILURE;
break;
}
}
}
pii2 = pi2->pi_v6;
if (pii2 != NULL) {
probe_success_info(pii2, NULL, &psinfo);
if (psinfo.ps_tls_valid) {
pi2_tls = psinfo.ps_tls;
if (TIME_GT(pi2_tls, pi_tff) &&
(pi2->pi_state == PI_RUNNING) &&
!GROUP_FAILED(pg) &&
FLAGS_TO_LINK_STATE(pi2)) {
retval = PHYINT_FAILURE;
break;
}
}
}
}
phyint_group_refresh_state(pg);
return (retval);
}
static void
probe_success_info(struct phyint_instance *pii, struct target *cur_tg,
struct probe_success_count *psinfo)
{
uint_t i;
struct probe_stats *pr_statp;
uint_t most_recent;
uint_t second_most_recent;
boolean_t pi_found_failure = _B_FALSE;
boolean_t tg_found_failure = _B_FALSE;
uint_t now;
uint_t timeout;
struct target *tg;
if (debug & D_FAILREP)
logdebug("probe_success_info(%s)\n", pii->pii_name);
bzero(psinfo, sizeof (*psinfo));
now = getcurrenttime();
most_recent = PROBE_INDEX_PREV(pii->pii_probe_next);
second_most_recent = PROBE_INDEX_PREV(most_recent);
for (i = most_recent; i != pii->pii_probe_next;
i = PROBE_INDEX_PREV(i)) {
pr_statp = &pii->pii_probes[i];
switch (pr_statp->pr_status) {
case PR_UNACKED:
assert(i == most_recent || i == second_most_recent);
tg = pr_statp->pr_target;
assert(tg != NULL);
timeout = ns2ms(pr_statp->pr_hrtime_start);
if (tg->tg_crtt != 0) {
timeout += tg->tg_crtt;
} else {
timeout +=
pii->pii_phyint->pi_group->pg_probeint;
}
if (TIME_LT(timeout, now)) {
pr_statp->pr_time_lost = timeout;
probe_chstate(pr_statp, pii, PR_LOST);
pi_found_failure = _B_TRUE;
if (cur_tg != NULL && tg == cur_tg) {
tg_found_failure = _B_TRUE;
}
}
break;
case PR_ACKED:
if (!pi_found_failure)
psinfo->ps_nsucc++;
if (cur_tg != NULL && pr_statp->pr_target == cur_tg &&
!tg_found_failure) {
psinfo->ps_nsucc_tg++;
}
if (!psinfo->ps_tls_valid) {
psinfo->ps_tls =
ns2ms(pr_statp->pr_hrtime_ackproc);
psinfo->ps_tls_valid = _B_TRUE;
}
break;
case PR_LOST:
pi_found_failure = _B_TRUE;
if (cur_tg != NULL && pr_statp->pr_target == cur_tg) {
tg_found_failure = _B_TRUE;
}
break;
default:
return;
}
}
}
static void
probe_fail_info(struct phyint_instance *pii, struct target *cur_tg,
struct probe_fail_count *pfinfo)
{
int i;
struct probe_stats *pr_statp;
boolean_t tg_found_success = _B_FALSE;
boolean_t pi_found_success = _B_FALSE;
int most_recent;
int second_most_recent;
uint_t now;
uint_t timeout;
struct target *tg;
if (debug & D_FAILREP)
logdebug("probe_fail_info(%s)\n", pii->pii_name);
bzero(pfinfo, sizeof (*pfinfo));
now = getcurrenttime();
most_recent = PROBE_INDEX_PREV(pii->pii_probe_next);
second_most_recent = PROBE_INDEX_PREV(most_recent);
for (i = most_recent; i != pii->pii_probe_next;
i = PROBE_INDEX_PREV(i)) {
pr_statp = &pii->pii_probes[i];
assert(PR_STATUS_VALID(pr_statp->pr_status));
switch (pr_statp->pr_status) {
case PR_UNACKED:
assert(i == most_recent || i == second_most_recent);
tg = pr_statp->pr_target;
assert(tg != NULL);
timeout = ns2ms(pr_statp->pr_hrtime_start);
if (tg->tg_crtt != 0) {
timeout += tg->tg_crtt;
} else {
timeout +=
pii->pii_phyint->pi_group->pg_probeint;
}
if (TIME_GT(timeout, now))
break;
pr_statp->pr_time_lost = timeout;
probe_chstate(pr_statp, pii, PR_LOST);
case PR_LOST:
if (!pi_found_success) {
pfinfo->pf_nfail++;
pfinfo->pf_tff = pr_statp->pr_time_lost;
}
if (cur_tg != NULL && pr_statp->pr_target == cur_tg &&
!tg_found_success) {
pfinfo->pf_nfail_tg++;
}
break;
default:
pi_found_success = _B_TRUE;
if (cur_tg != NULL && pr_statp->pr_target == cur_tg) {
tg_found_success = _B_TRUE;
}
}
}
}
void
probe_chstate(struct probe_stats *pr, struct phyint_instance *pii, int state)
{
if (pr->pr_status == state)
return;
pr->pr_status = state;
(void) probe_state_event(pr, pii);
}
static boolean_t
phyint_repaired(struct phyint *pi)
{
struct probe_success_count psinfo;
struct phyint_instance *pii;
struct target *cur_tg;
int pr_ndx;
uint_t cur_time;
if (debug & D_FAILREP)
logdebug("phyint_repaired(%s)\n", pi->pi_name);
if (LINK_DOWN(pi))
return (_B_FALSE);
if (!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) {
cur_time = getcurrenttime();
if ((pi->pi_whenup[pi->pi_whendx] == 0 ||
(cur_time - pi->pi_whenup[pi->pi_whendx]) > MSEC_PERMIN)) {
pi->pi_lfmsg_printed = 0;
return (_B_TRUE);
}
if (!pi->pi_lfmsg_printed) {
logerr("The link has come up on %s more than %d times "
"in the last minute; disabling repair until it "
"stabilizes\n", pi->pi_name, LINK_UP_PERMIN);
pi->pi_lfmsg_printed = 1;
}
return (_B_FALSE);
}
pii = pi->pi_v4;
if (PROBE_CAPABLE(pii)) {
pr_ndx = PROBE_INDEX_PREV(pii->pii_probe_next);
cur_tg = pii->pii_probes[pr_ndx].pr_target;
probe_success_info(pii, cur_tg, &psinfo);
if (psinfo.ps_nsucc >= NUM_PROBE_REPAIRS ||
psinfo.ps_nsucc_tg >= NUM_PROBE_REPAIRS)
return (_B_TRUE);
}
pii = pi->pi_v6;
if (PROBE_CAPABLE(pii)) {
pr_ndx = PROBE_INDEX_PREV(pii->pii_probe_next);
cur_tg = pii->pii_probes[pr_ndx].pr_target;
probe_success_info(pii, cur_tg, &psinfo);
if (psinfo.ps_nsucc >= NUM_PROBE_REPAIRS ||
psinfo.ps_nsucc_tg >= NUM_PROBE_REPAIRS)
return (_B_TRUE);
}
return (_B_FALSE);
}
boolean_t
change_pif_flags(struct phyint *pi, uint64_t set, uint64_t clear)
{
int ifsock;
struct lifreq lifr;
uint64_t old_flags;
if (debug & D_FAILREP) {
logdebug("change_pif_flags(%s): set %llx clear %llx\n",
pi->pi_name, set, clear);
}
if (pi->pi_v4 != NULL)
ifsock = ifsock_v4;
else
ifsock = ifsock_v6;
(void) strlcpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) {
if (errno != ENXIO)
logperror("change_pif_flags: ioctl (get flags)");
return (_B_FALSE);
}
old_flags = lifr.lifr_flags;
lifr.lifr_flags |= set;
lifr.lifr_flags &= ~clear;
if (old_flags == lifr.lifr_flags) {
return (_B_TRUE);
}
if (ioctl(ifsock, SIOCSLIFFLAGS, (char *)&lifr) < 0) {
if (errno != ENXIO)
logperror("change_pif_flags: ioctl (set flags)");
return (_B_FALSE);
}
pi->pi_flags |= set;
pi->pi_flags &= ~clear;
if (pi->pi_v4 != NULL)
pi->pi_v4->pii_flags = pi->pi_flags;
if (pi->pi_v6 != NULL)
pi->pi_v6->pii_flags = pi->pi_flags;
return (_B_TRUE);
}
static int
in_cksum(ushort_t *addr, int len)
{
register int nleft = len;
register ushort_t *w = addr;
register ushort_t answer;
ushort_t odd_byte = 0;
register int sum = 0;
while (nleft > 1) {
sum += *w++;
nleft -= 2;
}
if (nleft == 1) {
*(uchar_t *)(&odd_byte) = *(uchar_t *)w;
sum += odd_byte;
}
sum = (sum >> 16) + (sum & 0xffff);
sum += (sum >> 16);
answer = ~sum;
return (answer);
}
static void
reset_snxt_basetimes(void)
{
struct phyint_instance *pii;
for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
pii->pii_fd_snxt_basetime = pii->pii_snxt_basetime;
}
}
boolean_t
own_address(struct in6_addr addr)
{
addrlist_t *addrp;
struct sockaddr_storage ss;
int af = IN6_IS_ADDR_V4MAPPED(&addr) ? AF_INET : AF_INET6;
addr2storage(af, &addr, &ss);
for (addrp = localaddrs; addrp != NULL; addrp = addrp->al_next) {
if (sockaddrcmp(&ss, &addrp->al_addr))
return (_B_TRUE);
}
return (_B_FALSE);
}
static int
ns2ms(int64_t ns)
{
return (NSEC2MSEC(ns));
}
static int64_t
tv2ns(struct timeval *tvp)
{
return (tvp->tv_sec * NANOSEC + tvp->tv_usec * 1000);
}