#include <sys/types.h>
#include <sys/stream.h>
#include <sys/stropts.h>
#include <sys/strlog.h>
#include <sys/dlpi.h>
#include <sys/ddi.h>
#include <sys/cmn_err.h>
#include <sys/policy.h>
#include <sys/systm.h>
#include <sys/strsun.h>
#include <sys/kmem.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/strsubr.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <net/if_dl.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <inet/ipsec_impl.h>
#include <inet/common.h>
#include <inet/mi.h>
#include <inet/mib2.h>
#include <inet/ip.h>
#include <inet/ip_impl.h>
#include <inet/ip6.h>
#include <inet/ip_ndp.h>
#include <inet/arp.h>
#include <inet/ip_if.h>
#include <inet/ip_ire.h>
#include <inet/ip_ftable.h>
#include <inet/ip_rts.h>
#include <inet/nd.h>
#include <net/pfkeyv2.h>
#include <inet/sadb.h>
#include <inet/tcp.h>
#include <inet/ipclassifier.h>
#include <sys/zone.h>
#include <net/radix.h>
#include <sys/tsol/label.h>
#include <sys/tsol/tnet.h>
#define IS_DEFAULT_ROUTE(ire) \
(((ire)->ire_type & IRE_DEFAULT) || \
(((ire)->ire_type & IRE_INTERFACE) && ((ire)->ire_addr == 0)))
#define IP_SRC_MULTIHOMING(isv6, ipst) \
(isv6 ? ipst->ips_ipv6_strict_src_multihoming : \
ipst->ips_ip_strict_src_multihoming)
static ire_t *route_to_dst(const struct sockaddr *, zoneid_t, ip_stack_t *);
static void ire_del_host_redir(ire_t *, char *);
static boolean_t ire_find_best_route(struct radix_node *, void *);
ire_t *
ire_ftable_lookup_v4(ipaddr_t addr, ipaddr_t mask, ipaddr_t gateway,
int type, const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl,
int flags, uint32_t xmit_hint, ip_stack_t *ipst, uint_t *generationp)
{
ire_t *ire;
struct rt_sockaddr rdst, rmask;
struct rt_entry *rt;
ire_ftable_args_t margs;
ASSERT(ill == NULL || !ill->ill_isv6);
if ((flags & (MATCH_IRE_ILL|MATCH_IRE_SRC_ILL)) && (ill == NULL))
return (NULL);
bzero(&rdst, sizeof (rdst));
rdst.rt_sin_len = sizeof (rdst);
rdst.rt_sin_family = AF_INET;
rdst.rt_sin_addr.s_addr = addr;
bzero(&rmask, sizeof (rmask));
rmask.rt_sin_len = sizeof (rmask);
rmask.rt_sin_family = AF_INET;
rmask.rt_sin_addr.s_addr = mask;
bzero(&margs, sizeof (margs));
margs.ift_addr = addr;
margs.ift_mask = mask;
margs.ift_gateway = gateway;
margs.ift_type = type;
margs.ift_ill = ill;
margs.ift_zoneid = zoneid;
margs.ift_tsl = tsl;
margs.ift_flags = flags;
RADIX_NODE_HEAD_RLOCK(ipst->ips_ip_ftable);
rt = (struct rt_entry *)ipst->ips_ip_ftable->rnh_matchaddr_args(&rdst,
ipst->ips_ip_ftable, ire_find_best_route, &margs);
ire = margs.ift_best_ire;
if (rt == NULL) {
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
return (NULL);
}
ASSERT(ire != NULL);
DTRACE_PROBE2(ire__found, ire_ftable_args_t *, &margs, ire_t *, ire);
if (ire->ire_bucket->irb_ire_cnt > 1 && !(flags & MATCH_IRE_GW)) {
if (ipst->ips_ip_ecmp_behavior == 2 ||
(ipst->ips_ip_ecmp_behavior == 1 &&
IS_DEFAULT_ROUTE(ire))) {
ire_t *next_ire;
margs.ift_best_ire = NULL;
next_ire = ire_round_robin(ire->ire_bucket, &margs,
xmit_hint, ire, ipst);
if (next_ire == NULL) {
goto done;
}
ire_refrele(ire);
ire = next_ire;
}
}
done:
if (generationp != NULL)
*generationp = ire->ire_generation;
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
if ((ire->ire_type & IRE_LOCAL) && zoneid != ALL_ZONES &&
ire->ire_zoneid != zoneid && ire->ire_zoneid != ALL_ZONES &&
ipst->ips_ip_restrict_interzone_loopback) {
ire = ire_alt_local(ire, zoneid, tsl, ill, generationp);
ASSERT(ire != NULL);
}
return (ire);
}
ire_t *
ire_ftable_lookup_simple_v4(ipaddr_t addr, uint32_t xmit_hint, ip_stack_t *ipst,
uint_t *generationp)
{
ire_t *ire;
struct rt_sockaddr rdst;
struct rt_entry *rt;
irb_t *irb;
rdst.rt_sin_len = sizeof (rdst);
rdst.rt_sin_family = AF_INET;
rdst.rt_sin_addr.s_addr = addr;
RADIX_NODE_HEAD_RLOCK(ipst->ips_ip_ftable);
rt = (struct rt_entry *)ipst->ips_ip_ftable->rnh_matchaddr_args(&rdst,
ipst->ips_ip_ftable, NULL, NULL);
if (rt == NULL)
goto bad;
irb = &rt->rt_irb;
if (irb->irb_ire_cnt == 0)
goto bad;
rw_enter(&irb->irb_lock, RW_READER);
ire = irb->irb_ire;
if (ire == NULL) {
rw_exit(&irb->irb_lock);
goto bad;
}
while (IRE_IS_CONDEMNED(ire)) {
ire = ire->ire_next;
if (ire == NULL) {
rw_exit(&irb->irb_lock);
goto bad;
}
}
ire_refhold(ire);
rw_exit(&irb->irb_lock);
if (ire->ire_bucket->irb_ire_cnt > 1) {
if (ipst->ips_ip_ecmp_behavior == 2 ||
(ipst->ips_ip_ecmp_behavior == 1 &&
IS_DEFAULT_ROUTE(ire))) {
ire_t *next_ire;
ire_ftable_args_t margs;
bzero(&margs, sizeof (margs));
margs.ift_addr = addr;
margs.ift_zoneid = ALL_ZONES;
next_ire = ire_round_robin(ire->ire_bucket, &margs,
xmit_hint, ire, ipst);
if (next_ire == NULL) {
if (generationp != NULL)
*generationp = ire->ire_generation;
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
return (ire);
}
ire_refrele(ire);
ire = next_ire;
}
}
if (generationp != NULL)
*generationp = ire->ire_generation;
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
return (ire);
bad:
if (generationp != NULL)
*generationp = IRE_GENERATION_VERIFY;
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
return (ire_reject(ipst, B_FALSE));
}
ill_t *
ire_lookup_multi_ill_v4(ipaddr_t group, zoneid_t zoneid, ip_stack_t *ipst,
boolean_t *multirtp, ipaddr_t *setsrcp)
{
ire_t *ire;
ill_t *ill;
ire = ire_route_recursive_v4(group, 0, NULL, zoneid, NULL,
MATCH_IRE_DSTONLY, IRR_NONE, 0, ipst, setsrcp, NULL, NULL);
ASSERT(ire != NULL);
if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
ire_refrele(ire);
return (NULL);
}
if (multirtp != NULL)
*multirtp = (ire->ire_flags & RTF_MULTIRT) != 0;
ill = ire_nexthop_ill(ire);
ire_refrele(ire);
return (ill);
}
void
ire_del_host_redir(ire_t *ire, char *gateway)
{
if ((ire->ire_flags & RTF_DYNAMIC) &&
(ire->ire_gateway_addr == *(ipaddr_t *)gateway))
ire_delete(ire);
}
void
ire_delete_host_redirects(ipaddr_t gateway, ip_stack_t *ipst)
{
struct rtfuncarg rtfarg;
bzero(&rtfarg, sizeof (rtfarg));
rtfarg.rt_func = ire_del_host_redir;
rtfarg.rt_arg = (void *)&gateway;
rtfarg.rt_zoneid = ALL_ZONES;
rtfarg.rt_ipst = ipst;
(void) ipst->ips_ip_ftable->rnh_walktree_mt(ipst->ips_ip_ftable,
rtfunc, &rtfarg, irb_refhold_rn, irb_refrele_rn);
}
irb_t *
ire_get_bucket(ire_t *ire)
{
struct radix_node *rn;
struct rt_entry *rt;
struct rt_sockaddr rmask, rdst;
irb_t *irb = NULL;
ip_stack_t *ipst = ire->ire_ipst;
ASSERT(ipst->ips_ip_ftable != NULL);
bzero(&rdst, sizeof (rdst));
rdst.rt_sin_len = sizeof (rdst);
rdst.rt_sin_family = AF_INET;
rdst.rt_sin_addr.s_addr = ire->ire_addr;
bzero(&rmask, sizeof (rmask));
rmask.rt_sin_len = sizeof (rmask);
rmask.rt_sin_family = AF_INET;
rmask.rt_sin_addr.s_addr = ire->ire_mask;
R_Malloc(rt, rt_entry_cache, sizeof (*rt));
if (rt == NULL)
return (NULL);
bzero(rt, sizeof (*rt));
rt->rt_nodes->rn_key = (char *)&rt->rt_dst;
rt->rt_dst = rdst;
irb = &rt->rt_irb;
irb->irb_marks |= IRB_MARK_DYNAMIC;
irb->irb_ipst = ipst;
rw_init(&irb->irb_lock, NULL, RW_DEFAULT, NULL);
RADIX_NODE_HEAD_WLOCK(ipst->ips_ip_ftable);
rn = ipst->ips_ip_ftable->rnh_addaddr(&rt->rt_dst, &rmask,
ipst->ips_ip_ftable, (struct radix_node *)rt);
if (rn == NULL) {
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
Free(rt, rt_entry_cache);
rt = NULL;
irb = NULL;
RADIX_NODE_HEAD_RLOCK(ipst->ips_ip_ftable);
rn = ipst->ips_ip_ftable->rnh_lookup(&rdst, &rmask,
ipst->ips_ip_ftable);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
rt = (struct rt_entry *)rn;
}
}
if (rt != NULL) {
irb = &rt->rt_irb;
irb_refhold(irb);
}
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
return (irb);
}
uint_t
ifindex_lookup(const struct sockaddr *ipaddr, zoneid_t zoneid)
{
uint_t ifindex = 0;
ire_t *ire;
ill_t *ill;
netstack_t *ns;
ip_stack_t *ipst;
if (zoneid == ALL_ZONES)
ns = netstack_find_by_zoneid(GLOBAL_ZONEID);
else
ns = netstack_find_by_zoneid(zoneid);
ASSERT(ns != NULL);
if (ns->netstack_stackid != GLOBAL_NETSTACKID)
zoneid = GLOBAL_ZONEID;
ipst = ns->netstack_ip;
ASSERT(ipaddr->sa_family == AF_INET || ipaddr->sa_family == AF_INET6);
if ((ire = route_to_dst(ipaddr, zoneid, ipst)) != NULL) {
ill = ire_nexthop_ill(ire);
if (ill != NULL) {
ifindex = ill->ill_phyint->phyint_ifindex;
ill_refrele(ill);
}
ire_refrele(ire);
}
netstack_rele(ns);
return (ifindex);
}
static ire_t *
route_to_dst(const struct sockaddr *dst_addr, zoneid_t zoneid, ip_stack_t *ipst)
{
ire_t *ire = NULL;
int match_flags;
match_flags = MATCH_IRE_DSTONLY;
if (dst_addr->sa_family == AF_INET) {
ire = ire_route_recursive_v4(
((struct sockaddr_in *)dst_addr)->sin_addr.s_addr, 0, NULL,
zoneid, NULL, match_flags, IRR_ALLOCATE, 0, ipst, NULL,
NULL, NULL);
} else {
ire = ire_route_recursive_v6(
&((struct sockaddr_in6 *)dst_addr)->sin6_addr, 0, NULL,
zoneid, NULL, match_flags, IRR_ALLOCATE, 0, ipst, NULL,
NULL, NULL);
}
ASSERT(ire != NULL);
if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
ire_refrele(ire);
return (NULL);
}
return (ire);
}
int
ipfil_sendpkt(const struct sockaddr *dst_addr, mblk_t *mp, uint_t ifindex,
zoneid_t zoneid)
{
ipaddr_t nexthop;
netstack_t *ns;
ip_stack_t *ipst;
ip_xmit_attr_t ixas;
int error;
ASSERT(mp != NULL);
if (zoneid == ALL_ZONES)
ns = netstack_find_by_zoneid(GLOBAL_ZONEID);
else
ns = netstack_find_by_zoneid(zoneid);
ASSERT(ns != NULL);
if (ns->netstack_stackid != GLOBAL_NETSTACKID)
zoneid = GLOBAL_ZONEID;
ipst = ns->netstack_ip;
ASSERT(dst_addr->sa_family == AF_INET ||
dst_addr->sa_family == AF_INET6);
bzero(&ixas, sizeof (ixas));
ixas.ixa_flags = IXAF_NO_IPSEC | IXAF_DONTFRAG | IXAF_NO_PFHOOK;
ixas.ixa_cred = kcred;
ixas.ixa_cpid = NOPID;
ixas.ixa_tsl = NULL;
ixas.ixa_ipst = ipst;
ixas.ixa_ifindex = ifindex;
if (dst_addr->sa_family == AF_INET) {
ipha_t *ipha = (ipha_t *)mp->b_rptr;
ixas.ixa_flags |= IXAF_IS_IPV4;
nexthop = ((struct sockaddr_in *)dst_addr)->sin_addr.s_addr;
if (nexthop != ipha->ipha_dst) {
ixas.ixa_flags |= IXAF_NEXTHOP_SET;
ixas.ixa_nexthop_v4 = nexthop;
}
ixas.ixa_multicast_ttl = ipha->ipha_ttl;
} else {
ip6_t *ip6h = (ip6_t *)mp->b_rptr;
in6_addr_t *nexthop6;
nexthop6 = &((struct sockaddr_in6 *)dst_addr)->sin6_addr;
if (!IN6_ARE_ADDR_EQUAL(nexthop6, &ip6h->ip6_dst)) {
ixas.ixa_flags |= IXAF_NEXTHOP_SET;
ixas.ixa_nexthop_v6 = *nexthop6;
}
ixas.ixa_multicast_ttl = ip6h->ip6_hops;
}
error = ip_output_simple(mp, &ixas);
ixa_cleanup(&ixas);
netstack_rele(ns);
switch (error) {
case 0:
break;
case EHOSTUNREACH:
case ENETUNREACH:
error = ENONET;
break;
default:
error = ECOMM;
break;
}
return (error);
}
boolean_t
ire_find_best_route(struct radix_node *rn, void *arg)
{
struct rt_entry *rt = (struct rt_entry *)rn;
irb_t *irb_ptr;
ire_t *ire;
ire_ftable_args_t *margs = arg;
ipaddr_t match_mask;
ASSERT(rt != NULL);
irb_ptr = &rt->rt_irb;
if (irb_ptr->irb_ire_cnt == 0)
return (B_FALSE);
rw_enter(&irb_ptr->irb_lock, RW_READER);
for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) {
if (IRE_IS_CONDEMNED(ire))
continue;
ASSERT((margs->ift_flags & MATCH_IRE_SHORTERMASK) == 0);
if (margs->ift_flags & MATCH_IRE_MASK)
match_mask = margs->ift_mask;
else
match_mask = ire->ire_mask;
if (ire_match_args(ire, margs->ift_addr, match_mask,
margs->ift_gateway, margs->ift_type, margs->ift_ill,
margs->ift_zoneid, margs->ift_tsl,
margs->ift_flags)) {
ire_refhold(ire);
rw_exit(&irb_ptr->irb_lock);
margs->ift_best_ire = ire;
return (B_TRUE);
}
}
rw_exit(&irb_ptr->irb_lock);
return (B_FALSE);
}
void
irb_refrele_ftable(irb_t *irb)
{
for (;;) {
rw_enter(&irb->irb_lock, RW_WRITER);
ASSERT(irb->irb_refcnt != 0);
if (irb->irb_refcnt != 1) {
irb->irb_refcnt--;
rw_exit(&irb->irb_lock);
return;
} else {
if (irb->irb_marks & IRB_MARK_CONDEMNED) {
ire_t *ire_list;
ire_list = ire_unlink(irb);
rw_exit(&irb->irb_lock);
if (ire_list != NULL)
ire_cleanup(ire_list);
continue;
}
if (irb->irb_nire != 0) {
irb->irb_refcnt--;
rw_exit(&irb->irb_lock);
return;
} else {
rw_exit(&irb->irb_lock);
if (irb_inactive(irb))
return;
}
}
}
}
ire_t *
ire_round_robin(irb_t *irb_ptr, ire_ftable_args_t *margs, uint_t hash,
ire_t *orig_ire, ip_stack_t *ipst)
{
ire_t *ire, *maybe_ire = NULL;
uint_t maybe_badcnt = 0;
uint_t maxwalk;
hash = hash ^ (hash >> 8) ^ (hash >> 16);
rw_enter(&irb_ptr->irb_lock, RW_WRITER);
maxwalk = irb_ptr->irb_ire_cnt;
if (maxwalk == 0) {
rw_exit(&irb_ptr->irb_lock);
return (NULL);
}
hash %= maxwalk;
irb_refhold_locked(irb_ptr);
rw_exit(&irb_ptr->irb_lock);
ire = irb_ptr->irb_ire;
while (maxwalk > 0) {
if (IRE_IS_CONDEMNED(ire))
goto next_ire_skip;
if (hash != 0) {
hash--;
goto next_ire_skip;
}
if (ire->ire_type != orig_ire->ire_type ||
((ire->ire_flags ^ orig_ire->ire_flags) & RTF_MULTIRT) != 0)
goto next_ire;
if (ire->ire_ipversion == IPV6_VERSION) {
if (!IN6_ARE_ADDR_EQUAL(&orig_ire->ire_addr_v6,
&ire->ire_addr_v6))
goto next_ire;
}
if (ire->ire_ipversion == IPV4_VERSION ?
!ire_match_args(ire, margs->ift_addr,
ire->ire_mask, margs->ift_gateway,
margs->ift_type, margs->ift_ill, margs->ift_zoneid,
margs->ift_tsl, margs->ift_flags) :
!ire_match_args_v6(ire, &margs->ift_addr_v6,
&ire->ire_mask_v6, &margs->ift_gateway_v6,
margs->ift_type, margs->ift_ill, margs->ift_zoneid,
margs->ift_tsl, margs->ift_flags))
goto next_ire;
if (margs->ift_zoneid != ALL_ZONES &&
(ire->ire_type & IRE_OFFLINK)) {
if (ire->ire_ipversion == IPV4_VERSION) {
if (!ire_gateway_ok_zone_v4(
ire->ire_gateway_addr, margs->ift_zoneid,
ire->ire_ill, margs->ift_tsl, ipst,
B_TRUE))
goto next_ire;
} else {
if (!ire_gateway_ok_zone_v6(
&ire->ire_gateway_addr_v6,
margs->ift_zoneid, ire->ire_ill,
margs->ift_tsl, ipst, B_TRUE))
goto next_ire;
}
}
mutex_enter(&ire->ire_lock);
if (ire->ire_badcnt != 0 &&
(TICK_TO_SEC(ddi_get_lbolt64()) - ire->ire_last_badcnt >
ipst->ips_ip_ire_badcnt_lifetime))
ire->ire_badcnt = 0;
mutex_exit(&ire->ire_lock);
if (ire->ire_badcnt == 0) {
ire_refhold(ire);
if (ire->ire_ipversion == IPV4_VERSION) {
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
irb_refrele(irb_ptr);
RADIX_NODE_HEAD_RLOCK(ipst->ips_ip_ftable);
} else {
rw_exit(&ipst->ips_ip6_ire_head_lock);
irb_refrele(irb_ptr);
rw_enter(&ipst->ips_ip6_ire_head_lock,
RW_READER);
}
return (ire);
}
if (maybe_ire == NULL) {
maybe_ire = ire;
maybe_badcnt = ire->ire_badcnt;
} else if (ire->ire_badcnt < maybe_badcnt) {
maybe_ire = ire;
maybe_badcnt = ire->ire_badcnt;
}
next_ire:
maxwalk--;
next_ire_skip:
ire = ire->ire_next;
if (ire == NULL)
ire = irb_ptr->irb_ire;
}
if (maybe_ire != NULL)
ire_refhold(maybe_ire);
if (ire->ire_ipversion == IPV4_VERSION) {
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
irb_refrele(irb_ptr);
RADIX_NODE_HEAD_RLOCK(ipst->ips_ip_ftable);
} else {
rw_exit(&ipst->ips_ip6_ire_head_lock);
irb_refrele(irb_ptr);
rw_enter(&ipst->ips_ip6_ire_head_lock, RW_READER);
}
return (maybe_ire);
}
void
irb_refhold_rn(struct radix_node *rn)
{
if ((rn->rn_flags & RNF_ROOT) == 0)
irb_refhold(&((rt_t *)(rn))->rt_irb);
}
void
irb_refrele_rn(struct radix_node *rn)
{
if ((rn->rn_flags & RNF_ROOT) == 0)
irb_refrele_ftable(&((rt_t *)(rn))->rt_irb);
}
static ill_t *
ip_select_src_ill(const in6_addr_t *v6src, zoneid_t zoneid, ip_stack_t *ipst)
{
ipif_t *ipif;
ill_t *ill;
boolean_t isv6 = !IN6_IS_ADDR_V4MAPPED(v6src);
ipaddr_t v4src;
if (isv6) {
ipif = ipif_lookup_addr_nondup_v6(v6src, NULL, zoneid, ipst);
} else {
IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
ipif = ipif_lookup_addr_nondup(v4src, NULL, zoneid, ipst);
}
if (ipif == NULL)
return (NULL);
ill = ipif->ipif_ill;
ill_refhold(ill);
ipif_refrele(ipif);
return (ill);
}
static boolean_t
ip_verify_src_on_ill(const in6_addr_t v6src, ill_t *ill, zoneid_t zoneid)
{
ipif_t *ipif;
ip_stack_t *ipst;
ipaddr_t v4src;
if (ill == NULL)
return (B_FALSE);
ipst = ill->ill_ipst;
if (ill->ill_isv6) {
ipif = ipif_lookup_addr_nondup_v6(&v6src, ill, zoneid, ipst);
} else {
IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
ipif = ipif_lookup_addr_nondup(v4src, ill, zoneid, ipst);
}
if (ipif != NULL) {
ipif_refrele(ipif);
return (B_TRUE);
} else {
return (B_FALSE);
}
}
ire_t *
ip_select_route(const in6_addr_t *v6dst, const in6_addr_t v6src,
ip_xmit_attr_t *ixa, uint_t *generationp, in6_addr_t *setsrcp,
int *errorp, boolean_t *multirtp)
{
uint_t match_args;
uint_t ire_type;
ill_t *ill = NULL;
ire_t *ire;
ip_stack_t *ipst = ixa->ixa_ipst;
ipaddr_t v4dst;
in6_addr_t v6nexthop;
iaflags_t ixaflags = ixa->ixa_flags;
nce_t *nce;
boolean_t preferred_src_aware = B_FALSE;
boolean_t verify_src;
boolean_t isv6 = !(ixa->ixa_flags & IXAF_IS_IPV4);
int src_multihoming = IP_SRC_MULTIHOMING(isv6, ipst);
verify_src = (!V6_OR_V4_INADDR_ANY(v6src) &&
(ixa->ixa_flags & IXAF_VERIFY_SOURCE));
match_args = MATCH_IRE_SECATTR;
IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
if (setsrcp != NULL)
ASSERT(IN6_IS_ADDR_UNSPECIFIED(setsrcp));
if (errorp != NULL)
ASSERT(*errorp == 0);
if (isv6 ? IN6_IS_ADDR_MULTICAST(v6dst) : CLASSD(v4dst)) {
if (ixa->ixa_multicast_ifindex != 0) {
ill = ill_lookup_on_ifindex(ixa->ixa_multicast_ifindex,
isv6, ipst);
} else if (ixaflags & IXAF_SCOPEID_SET) {
ASSERT(ixa->ixa_scopeid != 0);
ill = ill_lookup_on_ifindex(ixa->ixa_scopeid,
isv6, ipst);
} else if (ixa->ixa_ifindex != 0) {
ill = ill_lookup_on_ifindex(ixa->ixa_ifindex,
isv6, ipst);
} else if (src_multihoming != 0 && verify_src) {
ill = ip_select_src_ill(&v6src, ixa->ixa_zoneid, ipst);
verify_src = B_FALSE;
if (ill != NULL && IS_VNI(ill)) {
ill_t *usesrc = ill;
ill = ill_lookup_usesrc(usesrc);
ill_refrele(usesrc);
}
} else if (!isv6) {
ipaddr_t v4setsrc = INADDR_ANY;
ill = ill_lookup_group_v4(v4dst, ixa->ixa_zoneid,
ipst, multirtp, &v4setsrc);
if (setsrcp != NULL)
IN6_IPADDR_TO_V4MAPPED(v4setsrc, setsrcp);
} else {
ill = ill_lookup_group_v6(v6dst, ixa->ixa_zoneid,
ipst, multirtp, setsrcp);
}
if (ill != NULL && IS_VNI(ill)) {
ill_refrele(ill);
ill = NULL;
}
if (ill == NULL) {
if (errorp != NULL)
*errorp = ENXIO;
ire = ire_reject(ipst, isv6);
return (ire);
}
if (!(ill->ill_flags & ILLF_MULTICAST)) {
ill_refrele(ill);
if (errorp != NULL)
*errorp = EHOSTUNREACH;
ire = ire_reject(ipst, isv6);
return (ire);
}
if (verify_src && src_multihoming == 2 &&
!ip_verify_src_on_ill(v6src, ill, ixa->ixa_zoneid)) {
if (errorp != NULL)
*errorp = EADDRNOTAVAIL;
ill_refrele(ill);
ire = ire_reject(ipst, isv6);
return (ire);
}
ire = ire_multicast(ill);
ill_refrele(ill);
if (generationp != NULL)
*generationp = ire->ire_generation;
if (errorp != NULL &&
(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
*errorp = EHOSTUNREACH;
}
return (ire);
}
if (ixa->ixa_ifindex != 0 || (ixaflags & IXAF_SCOPEID_SET)) {
if (ixaflags & IXAF_SCOPEID_SET) {
ASSERT(ixa->ixa_scopeid != 0);
ill = ill_lookup_on_ifindex(ixa->ixa_scopeid,
isv6, ipst);
} else {
ASSERT(ixa->ixa_ifindex != 0);
ill = ill_lookup_on_ifindex(ixa->ixa_ifindex,
isv6, ipst);
}
if (ill != NULL && IS_VNI(ill)) {
ill_refrele(ill);
ill = NULL;
}
if (ill == NULL) {
if (errorp != NULL)
*errorp = ENXIO;
ire = ire_reject(ipst, isv6);
return (ire);
}
match_args |= MATCH_IRE_ILL;
if (IS_UNDER_IPMP(ill))
match_args |= MATCH_IRE_TESTHIDDEN;
if (verify_src && src_multihoming == 2 &&
!ip_verify_src_on_ill(v6src, ill, ixa->ixa_zoneid)) {
if (errorp != NULL)
*errorp = EADDRNOTAVAIL;
ill_refrele(ill);
ire = ire_reject(ipst, isv6);
return (ire);
}
} else if (src_multihoming != 0 && verify_src) {
ill = ip_select_src_ill(&v6src, ixa->ixa_zoneid, ipst);
if (ill == NULL) {
char addrbuf[INET6_ADDRSTRLEN];
ip3dbg(("%s not a valid src for unicast",
inet_ntop(AF_INET6, &v6src, addrbuf,
sizeof (addrbuf))));
if (errorp != NULL)
*errorp = EADDRNOTAVAIL;
ire = ire_reject(ipst, isv6);
return (ire);
}
match_args |= MATCH_IRE_SRC_ILL;
preferred_src_aware = (src_multihoming == 1);
}
if (ixaflags & IXAF_NEXTHOP_SET) {
v6nexthop = ixa->ixa_nexthop_v6;
} else {
v6nexthop = *v6dst;
}
ire_type = 0;
if (ixaflags & (IXAF_DONTROUTE|IXAF_NEXTHOP_SET)) {
match_args |= MATCH_IRE_TYPE;
ire_type = IRE_ONLINK;
}
retry:
if (!isv6) {
ipaddr_t v4nexthop;
ipaddr_t v4setsrc = INADDR_ANY;
IN6_V4MAPPED_TO_IPADDR(&v6nexthop, v4nexthop);
ire = ire_route_recursive_v4(v4nexthop, ire_type, ill,
ixa->ixa_zoneid, ixa->ixa_tsl, match_args, IRR_ALLOCATE,
ixa->ixa_xmit_hint, ipst, &v4setsrc, NULL, generationp);
if (setsrcp != NULL)
IN6_IPADDR_TO_V4MAPPED(v4setsrc, setsrcp);
} else {
ire = ire_route_recursive_v6(&v6nexthop, ire_type, ill,
ixa->ixa_zoneid, ixa->ixa_tsl, match_args, IRR_ALLOCATE,
ixa->ixa_xmit_hint, ipst, setsrcp, NULL, generationp);
}
#ifdef DEBUG
if (match_args & MATCH_IRE_TESTHIDDEN) {
ip3dbg(("looking for hidden; dst %x ire %p\n",
v4dst, (void *)ire));
}
#endif
if (ill != NULL) {
ill_refrele(ill);
ill = NULL;
}
if ((ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
(ire->ire_type & IRE_MULTICAST)) {
if (preferred_src_aware) {
ire_refrele(ire);
preferred_src_aware = B_FALSE;
match_args &= ~MATCH_IRE_SRC_ILL;
goto retry;
}
return (ire);
}
mutex_enter(&ire->ire_lock);
nce = ire->ire_nce_cache;
if (nce == NULL || nce->nce_is_condemned) {
mutex_exit(&ire->ire_lock);
(void) ire_revalidate_nce(ire);
} else {
mutex_exit(&ire->ire_lock);
}
return (ire);
}
ire_t *
ip_select_route_pkt(mblk_t *mp, ip_xmit_attr_t *ixa, uint_t *generationp,
int *errorp, boolean_t *multirtp)
{
if (ixa->ixa_flags & IXAF_IS_IPV4) {
ipha_t *ipha = (ipha_t *)mp->b_rptr;
in6_addr_t v6dst, v6src;
IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &v6dst);
IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &v6src);
return (ip_select_route(&v6dst, v6src, ixa, generationp,
NULL, errorp, multirtp));
} else {
ip6_t *ip6h = (ip6_t *)mp->b_rptr;
return (ip_select_route(&ip6h->ip6_dst, ip6h->ip6_src,
ixa, generationp, NULL, errorp, multirtp));
}
}
ire_t *
ip_select_route_v4(ipaddr_t dst, ipaddr_t src, ip_xmit_attr_t *ixa,
uint_t *generationp, ipaddr_t *v4setsrcp, int *errorp, boolean_t *multirtp)
{
in6_addr_t v6dst, v6src;
ire_t *ire;
in6_addr_t setsrc;
ASSERT(ixa->ixa_flags & IXAF_IS_IPV4);
IN6_IPADDR_TO_V4MAPPED(dst, &v6dst);
IN6_IPADDR_TO_V4MAPPED(src, &v6src);
setsrc = ipv6_all_zeros;
ire = ip_select_route(&v6dst, v6src, ixa, generationp, &setsrc, errorp,
multirtp);
if (v4setsrcp != NULL)
IN6_V4MAPPED_TO_IPADDR(&setsrc, *v4setsrcp);
return (ire);
}
ire_t *
ire_route_recursive_impl_v4(ire_t *ire,
ipaddr_t nexthop, uint_t ire_type, const ill_t *ill_arg,
zoneid_t zoneid, const ts_label_t *tsl, uint_t match_args,
uint_t irr_flags, uint32_t xmit_hint, ip_stack_t *ipst, ipaddr_t *setsrcp,
tsol_ire_gw_secattr_t **gwattrp, uint_t *generationp)
{
int i, j;
ire_t *ires[MAX_IRE_RECURSION];
uint_t generation;
uint_t generations[MAX_IRE_RECURSION];
boolean_t need_refrele = B_FALSE;
boolean_t invalidate = B_FALSE;
ill_t *ill = NULL;
uint_t maskoff = (IRE_LOCAL|IRE_LOOPBACK|IRE_BROADCAST);
if (setsrcp != NULL)
ASSERT(*setsrcp == INADDR_ANY);
if (gwattrp != NULL)
ASSERT(*gwattrp == NULL);
i = 0;
while (i < MAX_IRE_RECURSION - 1) {
if (ire == NULL) {
ire = ire_ftable_lookup_v4(nexthop, 0, 0, ire_type,
(ill != NULL? ill : ill_arg), zoneid, tsl,
match_args, xmit_hint, ipst, &generation);
} else {
ire_refhold(ire);
if (generationp != NULL)
generation = *generationp;
else
generation = IRE_GENERATION_VERIFY;
}
if (ire == NULL) {
if (i > 0 && (irr_flags & IRR_INCOMPLETE)) {
ire = ires[0];
ire_refhold(ire);
} else {
ire = ire_reject(ipst, B_FALSE);
}
goto error;
}
if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))
goto error;
ASSERT(!(ire->ire_type & IRE_MULTICAST));
if ((ire->ire_type & IRE_IF_CLONE) && !ire_clone_verify(ire)) {
ire_refrele(ire);
ire = NULL;
continue;
}
match_args |= MATCH_IRE_DIRECT;
if ((ire->ire_type & IRE_OFFLINK) &&
!(ire->ire_flags & RTF_INDIRECT)) {
ire_type = IRE_IF_ALL;
} else {
if (!(match_args & MATCH_IRE_TYPE))
ire_type = (IRE_OFFLINK|IRE_ONLINK);
ire_type &= ~maskoff;
}
match_args |= MATCH_IRE_TYPE;
ires[i] = ire;
generations[i] = generation;
i++;
if ((ire->ire_flags & RTF_SETSRC) &&
setsrcp != NULL && *setsrcp == INADDR_ANY) {
ASSERT(ire->ire_setsrc_addr != INADDR_ANY);
*setsrcp = ire->ire_setsrc_addr;
}
if (ire->ire_gw_secattr != NULL &&
gwattrp != NULL && *gwattrp == NULL)
*gwattrp = ire->ire_gw_secattr;
mutex_enter(&ire->ire_lock);
if (ire->ire_dep_parent != NULL &&
ire->ire_dep_parent->ire_generation ==
ire->ire_dep_parent_generation) {
mutex_exit(&ire->ire_lock);
ire = NULL;
goto done;
}
mutex_exit(&ire->ire_lock);
if (ire->ire_nce_capable) {
ire = NULL;
goto done;
}
ASSERT(!(ire->ire_type & IRE_IF_CLONE));
if (ire->ire_type & IRE_INTERFACE) {
in6_addr_t v6nexthop;
ire_t *clone;
ASSERT(ire->ire_masklen != IPV4_ABITS);
if (!(irr_flags & IRR_ALLOCATE)) {
invalidate = B_TRUE;
ire = NULL;
goto done;
}
IN6_IPADDR_TO_V4MAPPED(nexthop, &v6nexthop);
clone = ire_create_if_clone(ire, &v6nexthop,
&generation);
if (clone == NULL) {
invalidate = B_TRUE;
ire = ire_blackhole(ipst, B_FALSE);
goto error;
}
ASSERT(i >= 1);
ASSERT(i < MAX_IRE_RECURSION);
ires[i] = ires[i-1];
generations[i] = generations[i-1];
ires[i-1] = clone;
generations[i-1] = generation;
i++;
ire = NULL;
goto done;
}
match_args &= (MATCH_IRE_TYPE | MATCH_IRE_DIRECT);
nexthop = ire->ire_gateway_addr;
if (ill == NULL && ire->ire_ill != NULL) {
ill = ire->ire_ill;
need_refrele = B_TRUE;
ill_refhold(ill);
match_args |= MATCH_IRE_ILL;
}
ire = NULL;
}
ASSERT(ire == NULL);
ire = ire_reject(ipst, B_FALSE);
error:
ASSERT(ire != NULL);
if (need_refrele)
ill_refrele(ill);
if (i > 0 && (ires[0]->ire_flags & RTF_MULTIRT))
(void) ire_no_good(ires[0]);
cleanup:
ire_dep_unbuild(ires, i);
for (j = 0; j < i; j++)
ire_refrele(ires[j]);
ASSERT((ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
(irr_flags & IRR_INCOMPLETE));
if (generationp != NULL)
*generationp = IRE_GENERATION_VERIFY;
return (ire);
done:
ASSERT(ire == NULL);
if (need_refrele) {
ill_refrele(ill);
ill = NULL;
}
if (i > 1 && !ire_dep_build(ires, generations, i)) {
ire = ire_reject(ipst, B_FALSE);
goto cleanup;
}
for (j = 1; j < i; j++)
ire_refrele(ires[j]);
if (invalidate) {
ire_dep_invalidate_generations(ires[0]);
generation = IRE_GENERATION_VERIFY;
} else {
if (ires[0]->ire_dep_parent == NULL)
generation = ires[0]->ire_generation;
else
generation = ire_dep_validate_generations(ires[0]);
if (generations[0] != ires[0]->ire_generation) {
generation = IRE_GENERATION_VERIFY;
}
}
if (generationp != NULL)
*generationp = generation;
return (ires[0]);
}
ire_t *
ire_route_recursive_v4(ipaddr_t nexthop, uint_t ire_type, const ill_t *ill,
zoneid_t zoneid, const ts_label_t *tsl, uint_t match_args,
uint_t irr_flags, uint32_t xmit_hint, ip_stack_t *ipst, ipaddr_t *setsrcp,
tsol_ire_gw_secattr_t **gwattrp, uint_t *generationp)
{
return (ire_route_recursive_impl_v4(NULL, nexthop, ire_type, ill,
zoneid, tsl, match_args, irr_flags, xmit_hint, ipst, setsrcp,
gwattrp, generationp));
}
ire_t *
ire_route_recursive_dstonly_v4(ipaddr_t nexthop, uint_t irr_flags,
uint32_t xmit_hint, ip_stack_t *ipst)
{
ire_t *ire;
ire_t *ire1;
uint_t generation;
ire = ire_ftable_lookup_simple_v4(nexthop, xmit_hint, ipst,
&generation);
ASSERT(ire != NULL);
mutex_enter(&ire->ire_lock);
if (ire->ire_dep_parent != NULL) {
if (ire->ire_dep_parent->ire_generation ==
ire->ire_dep_parent_generation) {
mutex_exit(&ire->ire_lock);
return (ire);
}
mutex_exit(&ire->ire_lock);
} else {
mutex_exit(&ire->ire_lock);
if (ire->ire_nce_capable)
return (ire);
}
ire1 = ire_route_recursive_impl_v4(ire, nexthop, 0, NULL, ALL_ZONES,
NULL, MATCH_IRE_DSTONLY, irr_flags, xmit_hint, ipst, NULL, NULL,
&generation);
ire_refrele(ire);
return (ire1);
}
boolean_t
ire_clone_verify(ire_t *ire)
{
ASSERT((ire->ire_type & IRE_IF_CLONE) != 0);
mutex_enter(&ire->ire_lock);
if (ire->ire_dep_parent != NULL &&
ire->ire_dep_parent->ire_generation !=
ire->ire_dep_parent_generation) {
mutex_exit(&ire->ire_lock);
ire_delete(ire);
return (B_FALSE);
}
mutex_exit(&ire->ire_lock);
return (B_TRUE);
}