#include <sys/types.h>
#include <sys/stream.h>
#include <sys/stropts.h>
#include <sys/strsun.h>
#include <sys/strsubr.h>
#include <sys/ddi.h>
#include <sys/cmn_err.h>
#include <sys/policy.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <net/if_dl.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <inet/common.h>
#include <inet/mi.h>
#include <inet/ip.h>
#include <inet/ip6.h>
#include <inet/ip_ndp.h>
#include <inet/arp.h>
#include <inet/ip_if.h>
#include <inet/ip_ire.h>
#include <inet/ip_ftable.h>
#include <inet/ip_rts.h>
#include <inet/nd.h>
#include <inet/tunables.h>
#include <inet/tcp.h>
#include <inet/ipclassifier.h>
#include <sys/zone.h>
#include <sys/cpuvar.h>
#include <sys/tsol/label.h>
#include <sys/tsol/tnet.h>
struct kmem_cache *rt_entry_cache;
typedef struct nce_clookup_s {
ipaddr_t ncecl_addr;
boolean_t ncecl_found;
} nce_clookup_t;
uint32_t ip6_ftable_hash_size = IP6_FTABLE_HASH_SIZE;
struct kmem_cache *ire_cache;
struct kmem_cache *ncec_cache;
struct kmem_cache *nce_cache;
static ire_t ire_null;
static ire_t *ire_add_v4(ire_t *ire);
static void ire_delete_v4(ire_t *ire);
static void ire_dep_invalidate_children(ire_t *child);
static void ire_walk_ipvers(pfv_t func, void *arg, uchar_t vers,
zoneid_t zoneid, ip_stack_t *);
static void ire_walk_ill_ipvers(uint_t match_flags, uint_t ire_type,
pfv_t func, void *arg, uchar_t vers, ill_t *ill);
#ifdef DEBUG
static void ire_trace_cleanup(const ire_t *);
#endif
static void ire_dep_incr_generation_locked(ire_t *);
void
irb_refhold(irb_t *irb)
{
rw_enter(&irb->irb_lock, RW_WRITER);
irb->irb_refcnt++;
ASSERT(irb->irb_refcnt != 0);
rw_exit(&irb->irb_lock);
}
void
irb_refhold_locked(irb_t *irb)
{
ASSERT(RW_WRITE_HELD(&irb->irb_lock));
irb->irb_refcnt++;
ASSERT(irb->irb_refcnt != 0);
}
void
irb_refrele(irb_t *irb)
{
if (irb->irb_marks & IRB_MARK_DYNAMIC) {
irb_refrele_ftable(irb);
} else {
rw_enter(&irb->irb_lock, RW_WRITER);
ASSERT(irb->irb_refcnt != 0);
if (--irb->irb_refcnt == 0 &&
(irb->irb_marks & IRB_MARK_CONDEMNED)) {
ire_t *ire_list;
ire_list = ire_unlink(irb);
rw_exit(&irb->irb_lock);
ASSERT(ire_list != NULL);
ire_cleanup(ire_list);
} else {
rw_exit(&irb->irb_lock);
}
}
}
void
ire_refhold(ire_t *ire)
{
atomic_inc_32(&(ire)->ire_refcnt);
ASSERT((ire)->ire_refcnt != 0);
#ifdef DEBUG
ire_trace_ref(ire);
#endif
}
void
ire_refhold_notr(ire_t *ire)
{
atomic_inc_32(&(ire)->ire_refcnt);
ASSERT((ire)->ire_refcnt != 0);
}
void
ire_refhold_locked(ire_t *ire)
{
#ifdef DEBUG
ire_trace_ref(ire);
#endif
ire->ire_refcnt++;
}
void
ire_refrele(ire_t *ire)
{
#ifdef DEBUG
ire_untrace_ref(ire);
#endif
ASSERT((ire)->ire_refcnt != 0);
membar_exit();
if (atomic_dec_32_nv(&(ire)->ire_refcnt) == 0)
ire_inactive(ire);
}
void
ire_refrele_notr(ire_t *ire)
{
ASSERT((ire)->ire_refcnt != 0);
membar_exit();
if (atomic_dec_32_nv(&(ire)->ire_refcnt) == 0)
ire_inactive(ire);
}
int
ip_ire_delete(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
{
uchar_t *addr_ucp;
uint_t ipversion;
sin_t *sin;
sin6_t *sin6;
ipaddr_t v4addr;
in6_addr_t v6addr;
ire_t *ire;
ipid_t *ipid;
zoneid_t zoneid;
ip_stack_t *ipst;
ASSERT(q->q_next == NULL);
zoneid = IPCL_ZONEID(Q_TO_CONN(q));
ipst = CONNQ_TO_IPST(q);
if (ioc_cr != NULL && secpolicy_ip_config(ioc_cr, B_FALSE) != 0)
return (EPERM);
ipid = (ipid_t *)mp->b_rptr;
addr_ucp = mi_offset_param(mp, ipid->ipid_addr_offset,
ipid->ipid_addr_length);
if (addr_ucp == NULL || !OK_32PTR(addr_ucp))
return (EINVAL);
switch (ipid->ipid_addr_length) {
case sizeof (sin_t):
sin = (sin_t *)addr_ucp;
addr_ucp = (uchar_t *)&sin->sin_addr.s_addr;
ipversion = IPV4_VERSION;
break;
case sizeof (sin6_t):
sin6 = (sin6_t *)addr_ucp;
addr_ucp = (uchar_t *)&sin6->sin6_addr;
ipversion = IPV6_VERSION;
break;
default:
return (EINVAL);
}
if (ipversion == IPV4_VERSION) {
bcopy(addr_ucp, &v4addr, IP_ADDR_LEN);
ire = ire_ftable_lookup_v4(v4addr, 0, 0, 0, NULL,
zoneid, NULL, MATCH_IRE_DSTONLY, 0, ipst, NULL);
} else {
bcopy(addr_ucp, &v6addr, IPV6_ADDR_LEN);
ire = ire_ftable_lookup_v6(&v6addr, NULL, NULL, 0, NULL,
zoneid, NULL, MATCH_IRE_DSTONLY, 0, ipst, NULL);
}
if (ire != NULL) {
if (ipversion == IPV4_VERSION) {
ip_rts_change(RTM_LOSING, ire->ire_addr,
ire->ire_gateway_addr, ire->ire_mask,
(Q_TO_CONN(q))->conn_laddr_v4, 0, 0, 0,
(RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_IFA),
ire->ire_ipst);
}
(void) ire_no_good(ire);
ire_refrele(ire);
}
return (0);
}
int
ire_init_v4(ire_t *ire, uchar_t *addr, uchar_t *mask, uchar_t *gateway,
ushort_t type, ill_t *ill, zoneid_t zoneid, uint_t flags,
tsol_gc_t *gc, ip_stack_t *ipst)
{
int error;
if (gc != NULL && !is_system_labeled())
return (EINVAL);
BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_alloced);
if (addr != NULL)
bcopy(addr, &ire->ire_addr, IP_ADDR_LEN);
if (gateway != NULL)
bcopy(gateway, &ire->ire_gateway_addr, IP_ADDR_LEN);
switch (type) {
case IRE_LOOPBACK:
case IRE_HOST:
case IRE_BROADCAST:
case IRE_LOCAL:
case IRE_IF_CLONE:
ire->ire_mask = IP_HOST_MASK;
ire->ire_masklen = IPV4_ABITS;
break;
case IRE_PREFIX:
case IRE_DEFAULT:
case IRE_IF_RESOLVER:
case IRE_IF_NORESOLVER:
if (mask != NULL) {
bcopy(mask, &ire->ire_mask, IP_ADDR_LEN);
ire->ire_masklen = ip_mask_to_plen(ire->ire_mask);
}
break;
case IRE_MULTICAST:
case IRE_NOROUTE:
ASSERT(mask == NULL);
break;
default:
ASSERT(0);
return (EINVAL);
}
error = ire_init_common(ire, type, ill, zoneid, flags, IPV4_VERSION,
gc, ipst);
if (error != 0)
return (error);
ire->ire_postfragfn = ip_xmit;
switch (ire->ire_type) {
case IRE_LOCAL:
ire->ire_sendfn = ire_send_local_v4;
ire->ire_recvfn = ire_recv_local_v4;
ASSERT(ire->ire_ill != NULL);
if (ire->ire_ill->ill_flags & ILLF_NOACCEPT)
ire->ire_recvfn = ire_recv_noaccept_v6;
break;
case IRE_LOOPBACK:
ire->ire_sendfn = ire_send_local_v4;
ire->ire_recvfn = ire_recv_loopback_v4;
break;
case IRE_BROADCAST:
ire->ire_postfragfn = ip_postfrag_loopcheck;
ire->ire_sendfn = ire_send_broadcast_v4;
ire->ire_recvfn = ire_recv_broadcast_v4;
break;
case IRE_MULTICAST:
ire->ire_postfragfn = ip_postfrag_loopcheck;
ire->ire_sendfn = ire_send_multicast_v4;
ire->ire_recvfn = ire_recv_multicast_v4;
break;
default:
ire->ire_sendfn = ire_send_wire_v4;
ire->ire_recvfn = ire_recv_forward_v4;
break;
}
if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
ire->ire_sendfn = ire_send_noroute_v4;
ire->ire_recvfn = ire_recv_noroute_v4;
} else if (ire->ire_flags & RTF_MULTIRT) {
ire->ire_postfragfn = ip_postfrag_multirt_v4;
ire->ire_sendfn = ire_send_multirt_v4;
if (ire->ire_type != IRE_BROADCAST)
ire->ire_recvfn = ire_recv_multirt_v4;
}
ire->ire_nce_capable = ire_determine_nce_capable(ire);
return (0);
}
boolean_t
ire_determine_nce_capable(ire_t *ire)
{
int max_masklen;
if ((ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
(ire->ire_type & IRE_MULTICAST))
return (B_TRUE);
if (ire->ire_ipversion == IPV4_VERSION)
max_masklen = IPV4_ABITS;
else
max_masklen = IPV6_ABITS;
if ((ire->ire_type & IRE_ONLINK) && ire->ire_masklen == max_masklen)
return (B_TRUE);
return (B_FALSE);
}
ire_t *
ire_create(uchar_t *addr, uchar_t *mask, uchar_t *gateway,
ushort_t type, ill_t *ill, zoneid_t zoneid, uint_t flags, tsol_gc_t *gc,
ip_stack_t *ipst)
{
ire_t *ire;
int error;
ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP);
if (ire == NULL) {
DTRACE_PROBE(kmem__cache__alloc);
return (NULL);
}
*ire = ire_null;
error = ire_init_v4(ire, addr, mask, gateway, type, ill, zoneid, flags,
gc, ipst);
if (error != 0) {
DTRACE_PROBE2(ire__init, ire_t *, ire, int, error);
kmem_cache_free(ire_cache, ire);
return (NULL);
}
return (ire);
}
int
ire_init_common(ire_t *ire, ushort_t type, ill_t *ill, zoneid_t zoneid,
uint_t flags, uchar_t ipversion, tsol_gc_t *gc, ip_stack_t *ipst)
{
int error;
#ifdef DEBUG
if (ill != NULL) {
if (ill->ill_isv6)
ASSERT(ipversion == IPV6_VERSION);
else
ASSERT(ipversion == IPV4_VERSION);
}
#endif
if (is_system_labeled()) {
if ((type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST |
IRE_IF_ALL | IRE_MULTICAST | IRE_NOROUTE)) != 0) {
if (gc != NULL)
GC_REFRELE(gc);
} else {
error = tsol_ire_init_gwattr(ire, ipversion, gc);
if (error != 0)
return (error);
}
}
ire->ire_type = type;
ire->ire_flags = RTF_UP | flags;
ire->ire_create_time = (uint32_t)gethrestime_sec();
ire->ire_generation = IRE_GENERATION_INITIAL;
ire->ire_ill = ill;
ire->ire_zoneid = zoneid;
ire->ire_ipversion = ipversion;
mutex_init(&ire->ire_lock, NULL, MUTEX_DEFAULT, NULL);
ire->ire_refcnt = 1;
ire->ire_identical_ref = 1;
ire->ire_ipst = ipst;
ire->ire_trace_disable = B_FALSE;
return (0);
}
ire_t **
ire_create_bcast(ill_t *ill, ipaddr_t addr, zoneid_t zoneid, ire_t **irep)
{
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(IAM_WRITER_ILL(ill));
*irep++ = ire_create(
(uchar_t *)&addr,
(uchar_t *)&ip_g_all_ones,
NULL,
IRE_BROADCAST,
ill,
zoneid,
RTF_KERNEL,
NULL,
ipst);
return (irep);
}
ire_t *
ire_lookup_bcast(ill_t *ill, ipaddr_t addr, zoneid_t zoneid)
{
ire_t *ire;
int match_args;
match_args = MATCH_IRE_TYPE | MATCH_IRE_ILL | MATCH_IRE_GW |
MATCH_IRE_MASK | MATCH_IRE_ZONEONLY;
if (IS_UNDER_IPMP(ill))
match_args |= MATCH_IRE_TESTHIDDEN;
ire = ire_ftable_lookup_v4(
addr,
ip_g_all_ones,
0,
IRE_BROADCAST,
ill,
zoneid,
NULL,
match_args,
0,
ill->ill_ipst,
NULL);
return (ire);
}
void
ire_walk(pfv_t func, void *arg, ip_stack_t *ipst)
{
ire_walk_ipvers(func, arg, 0, ALL_ZONES, ipst);
}
void
ire_walk_v4(pfv_t func, void *arg, zoneid_t zoneid, ip_stack_t *ipst)
{
ire_walk_ipvers(func, arg, IPV4_VERSION, zoneid, ipst);
}
void
ire_walk_v6(pfv_t func, void *arg, zoneid_t zoneid, ip_stack_t *ipst)
{
ire_walk_ipvers(func, arg, IPV6_VERSION, zoneid, ipst);
}
static void
ire_walk_ipvers(pfv_t func, void *arg, uchar_t vers, zoneid_t zoneid,
ip_stack_t *ipst)
{
if (vers != IPV6_VERSION) {
ire_walk_ill_tables(0, 0, func, arg, IP_MASK_TABLE_SIZE,
0, NULL,
NULL, zoneid, ipst);
}
if (vers != IPV4_VERSION) {
ire_walk_ill_tables(0, 0, func, arg, IP6_MASK_TABLE_SIZE,
ipst->ips_ip6_ftable_hash_size,
ipst->ips_ip_forwarding_table_v6,
NULL, zoneid, ipst);
}
}
void
ire_walk_ill(uint_t match_flags, uint_t ire_type, pfv_t func, void *arg,
ill_t *ill)
{
uchar_t vers = (ill->ill_isv6 ? IPV6_VERSION : IPV4_VERSION);
ire_walk_ill_ipvers(match_flags, ire_type, func, arg, vers, ill);
}
static void
ire_walk_ill_ipvers(uint_t match_flags, uint_t ire_type, pfv_t func,
void *arg, uchar_t vers, ill_t *ill)
{
ip_stack_t *ipst = ill->ill_ipst;
if (vers == IPV4_VERSION) {
ire_walk_ill_tables(match_flags, ire_type, func, arg,
IP_MASK_TABLE_SIZE,
0, NULL,
ill, ALL_ZONES, ipst);
}
if (vers != IPV4_VERSION) {
ire_walk_ill_tables(match_flags, ire_type, func, arg,
IP6_MASK_TABLE_SIZE, ipst->ips_ip6_ftable_hash_size,
ipst->ips_ip_forwarding_table_v6,
ill, ALL_ZONES, ipst);
}
}
boolean_t
ire_walk_ill_match(uint_t match_flags, uint_t ire_type, ire_t *ire,
ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst)
{
ill_t *dst_ill = ire->ire_ill;
ASSERT(match_flags != 0 || zoneid != ALL_ZONES);
if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid &&
ire->ire_zoneid != ALL_ZONES) {
if (ire->ire_type & IRE_ONLINK) {
uint_t ifindex;
ifindex = dst_ill->ill_usesrc_ifindex;
if (ifindex == 0)
return (B_FALSE);
if (!ipif_zone_avail(ifindex, dst_ill->ill_isv6,
zoneid, ipst)) {
return (B_FALSE);
}
}
if (dst_ill != NULL && (ire->ire_type & IRE_OFFLINK)) {
ipif_t *tipif;
mutex_enter(&dst_ill->ill_lock);
for (tipif = dst_ill->ill_ipif;
tipif != NULL; tipif = tipif->ipif_next) {
if (!IPIF_IS_CONDEMNED(tipif) &&
(tipif->ipif_flags & IPIF_UP) &&
(tipif->ipif_zoneid == zoneid ||
tipif->ipif_zoneid == ALL_ZONES))
break;
}
mutex_exit(&dst_ill->ill_lock);
if (tipif == NULL) {
return (B_FALSE);
}
}
}
if ((ire->ire_type & IRE_OFFLINK) && zoneid != ALL_ZONES) {
in6_addr_t gw_addr_v6;
boolean_t reach;
if (ire->ire_ipversion == IPV4_VERSION) {
reach = ire_gateway_ok_zone_v4(ire->ire_gateway_addr,
zoneid, dst_ill, NULL, ipst, B_FALSE);
} else {
ASSERT(ire->ire_ipversion == IPV6_VERSION);
mutex_enter(&ire->ire_lock);
gw_addr_v6 = ire->ire_gateway_addr_v6;
mutex_exit(&ire->ire_lock);
reach = ire_gateway_ok_zone_v6(&gw_addr_v6, zoneid,
dst_ill, NULL, ipst, B_FALSE);
}
if (!reach) {
if (zoneid != GLOBAL_ZONEID)
return (B_FALSE);
if (ire->ire_ipversion == IPV4_VERSION) {
reach = ire_gateway_ok_zone_v4(
ire->ire_gateway_addr, ALL_ZONES,
dst_ill, NULL, ipst, B_FALSE);
} else {
reach = ire_gateway_ok_zone_v6(&gw_addr_v6,
ALL_ZONES, dst_ill, NULL, ipst, B_FALSE);
}
if (reach) {
return (B_FALSE);
}
}
}
if (((!(match_flags & MATCH_IRE_TYPE)) ||
(ire->ire_type & ire_type)) &&
((!(match_flags & MATCH_IRE_ILL)) ||
(dst_ill == ill ||
dst_ill != NULL && IS_IN_SAME_ILLGRP(dst_ill, ill)))) {
return (B_TRUE);
}
return (B_FALSE);
}
int
rtfunc(struct radix_node *rn, void *arg)
{
struct rtfuncarg *rtf = arg;
struct rt_entry *rt;
irb_t *irb;
ire_t *ire;
boolean_t ret;
rt = (struct rt_entry *)rn;
ASSERT(rt != NULL);
irb = &rt->rt_irb;
for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
if ((rtf->rt_match_flags != 0) ||
(rtf->rt_zoneid != ALL_ZONES)) {
ret = ire_walk_ill_match(rtf->rt_match_flags,
rtf->rt_ire_type, ire,
rtf->rt_ill, rtf->rt_zoneid, rtf->rt_ipst);
} else {
ret = B_TRUE;
}
if (ret)
(*rtf->rt_func)(ire, rtf->rt_arg);
}
return (0);
}
void
ire_walk_ill_tables(uint_t match_flags, uint_t ire_type, pfv_t func,
void *arg, size_t ftbl_sz, size_t htbl_sz, irb_t **ipftbl,
ill_t *ill, zoneid_t zoneid,
ip_stack_t *ipst)
{
irb_t *irb_ptr;
irb_t *irb;
ire_t *ire;
int i, j;
boolean_t ret;
struct rtfuncarg rtfarg;
ASSERT((!(match_flags & MATCH_IRE_ILL)) || (ill != NULL));
ASSERT(!(match_flags & MATCH_IRE_TYPE) || (ire_type != 0));
if (ipftbl == ipst->ips_ip_forwarding_table_v6) {
for (i = (ftbl_sz - 1); i >= 0; i--) {
if ((irb_ptr = ipftbl[i]) == NULL)
continue;
for (j = 0; j < htbl_sz; j++) {
irb = &irb_ptr[j];
if (irb->irb_ire == NULL)
continue;
irb_refhold(irb);
for (ire = irb->irb_ire; ire != NULL;
ire = ire->ire_next) {
if (match_flags == 0 &&
zoneid == ALL_ZONES) {
ret = B_TRUE;
} else {
ret =
ire_walk_ill_match(
match_flags,
ire_type, ire, ill,
zoneid, ipst);
}
if (ret)
(*func)(ire, arg);
}
irb_refrele(irb);
}
}
} else {
bzero(&rtfarg, sizeof (rtfarg));
rtfarg.rt_func = func;
rtfarg.rt_arg = arg;
if (match_flags != 0) {
rtfarg.rt_match_flags = match_flags;
}
rtfarg.rt_ire_type = ire_type;
rtfarg.rt_ill = ill;
rtfarg.rt_zoneid = zoneid;
rtfarg.rt_ipst = ipst;
(void) ipst->ips_ip_ftable->rnh_walktree_mt(
ipst->ips_ip_ftable,
rtfunc, &rtfarg, irb_refhold_rn, irb_refrele_rn);
}
}
int
ip_mask_to_plen(ipaddr_t mask)
{
return (mask == 0 ? 0 : IP_ABITS - (ffs(ntohl(mask)) -1));
}
ipaddr_t
ip_plen_to_mask(uint_t masklen)
{
if (masklen == 0)
return (0);
return (htonl(IP_HOST_MASK << (IP_ABITS - masklen)));
}
void
ire_atomic_end(irb_t *irb_ptr, ire_t *ire)
{
ill_t *ill;
ill = ire->ire_ill;
if (ill != NULL)
mutex_exit(&ill->ill_lock);
rw_exit(&irb_ptr->irb_lock);
}
int
ire_atomic_start(irb_t *irb_ptr, ire_t *ire)
{
ill_t *ill;
ill = ire->ire_ill;
rw_enter(&irb_ptr->irb_lock, RW_WRITER);
if (ill != NULL) {
mutex_enter(&ill->ill_lock);
if ((ill->ill_state_flags &
(ILL_CONDEMNED|ILL_DOWN_IN_PROGRESS)) != 0) {
ire_atomic_end(irb_ptr, ire);
DTRACE_PROBE1(ire__add__on__dying__ill, ire_t *, ire);
return (ENXIO);
}
if (IS_UNDER_IPMP(ill)) {
int error = 0;
mutex_enter(&ill->ill_phyint->phyint_lock);
if (!ipmp_ill_is_active(ill) &&
IRE_HIDDEN_TYPE(ire->ire_type) &&
!ire->ire_testhidden) {
error = EINVAL;
}
mutex_exit(&ill->ill_phyint->phyint_lock);
if (error != 0) {
ire_atomic_end(irb_ptr, ire);
return (error);
}
}
}
return (0);
}
ire_t *
ire_add(ire_t *ire)
{
if (IRE_HIDDEN_TYPE(ire->ire_type) &&
ire->ire_ill != NULL && IS_UNDER_IPMP(ire->ire_ill)) {
ire->ire_testhidden = B_TRUE;
}
if (ire->ire_ipversion == IPV6_VERSION)
return (ire_add_v6(ire));
else
return (ire_add_v4(ire));
}
static ire_t *
ire_add_v4(ire_t *ire)
{
ire_t *ire1;
irb_t *irb_ptr;
ire_t **irep;
int match_flags;
int error;
ip_stack_t *ipst = ire->ire_ipst;
if (ire->ire_ill != NULL)
ASSERT(!MUTEX_HELD(&ire->ire_ill->ill_lock));
ASSERT(ire->ire_ipversion == IPV4_VERSION);
ire->ire_addr &= ire->ire_mask;
match_flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW);
if (ire->ire_ill != NULL) {
match_flags |= MATCH_IRE_ILL;
}
irb_ptr = ire_get_bucket(ire);
if (irb_ptr == NULL) {
printf("no bucket for %p\n", (void *)ire);
ire_delete(ire);
return (NULL);
}
error = ire_atomic_start(irb_ptr, ire);
if (error != 0) {
printf("no ire_atomic_start for %p\n", (void *)ire);
ire_delete(ire);
irb_refrele(irb_ptr);
return (NULL);
}
if (ire->ire_testhidden)
match_flags |= MATCH_IRE_TESTHIDDEN;
for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
if (IRE_IS_CONDEMNED(ire1))
continue;
if (ire1->ire_zoneid != ire->ire_zoneid)
continue;
if (ire1->ire_type != ire->ire_type)
continue;
if (ire_match_args(ire1, ire->ire_addr, ire->ire_mask,
ire->ire_gateway_addr, ire->ire_type, ire->ire_ill,
ire->ire_zoneid, NULL, match_flags)) {
if (ire->ire_type != IRE_IF_CLONE) {
atomic_inc_32(&ire1->ire_identical_ref);
DTRACE_PROBE2(ire__add__exist, ire_t *, ire1,
ire_t *, ire);
}
ire_refhold(ire1);
ire_atomic_end(irb_ptr, ire);
ire_delete(ire);
irb_refrele(irb_ptr);
return (ire1);
}
}
irep = (ire_t **)irb_ptr;
if ((ire->ire_type & IRE_IF_CLONE) ||
((ire->ire_type & IRE_BROADCAST) &&
!(ire->ire_flags & RTF_MULTIRT))) {
while ((ire1 = *irep) != NULL)
irep = &ire1->ire_next;
}
ire1 = *irep;
if (ire1 != NULL)
ire1->ire_ptpn = &ire->ire_next;
ire->ire_next = ire1;
ire->ire_ptpn = irep;
membar_producer();
*irep = ire;
ire->ire_bucket = irb_ptr;
ire_refhold_locked(ire);
BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_inserted);
irb_ptr->irb_ire_cnt++;
if (irb_ptr->irb_marks & IRB_MARK_DYNAMIC)
irb_ptr->irb_nire++;
if (ire->ire_ill != NULL) {
ire->ire_ill->ill_ire_cnt++;
ASSERT(ire->ire_ill->ill_ire_cnt != 0);
}
ire_atomic_end(irb_ptr, ire);
ire_flush_cache_v4(ire, IRE_FLUSH_ADD);
if (ire->ire_ill != NULL)
ASSERT(!MUTEX_HELD(&ire->ire_ill->ill_lock));
irb_refrele(irb_ptr);
return (ire);
}
void
ire_cleanup(ire_t *ire)
{
ire_t *ire_next;
ip_stack_t *ipst = ire->ire_ipst;
ASSERT(ire != NULL);
while (ire != NULL) {
ire_next = ire->ire_next;
if (ire->ire_ipversion == IPV4_VERSION) {
ire_delete_v4(ire);
BUMP_IRE_STATS(ipst->ips_ire_stats_v4,
ire_stats_deleted);
} else {
ASSERT(ire->ire_ipversion == IPV6_VERSION);
ire_delete_v6(ire);
BUMP_IRE_STATS(ipst->ips_ire_stats_v6,
ire_stats_deleted);
}
ire->ire_next = NULL;
ire_refrele_notr(ire);
ire = ire_next;
}
}
ire_t *
ire_unlink(irb_t *irb)
{
ire_t *ire;
ire_t *ire1;
ire_t **ptpn;
ire_t *ire_list = NULL;
ASSERT(RW_WRITE_HELD(&irb->irb_lock));
ASSERT(((irb->irb_marks & IRB_MARK_DYNAMIC) && irb->irb_refcnt == 1) ||
(irb->irb_refcnt == 0));
ASSERT(irb->irb_marks & IRB_MARK_CONDEMNED);
ASSERT(irb->irb_ire != NULL);
for (ire = irb->irb_ire; ire != NULL; ire = ire1) {
ire1 = ire->ire_next;
if (IRE_IS_CONDEMNED(ire)) {
ptpn = ire->ire_ptpn;
ire1 = ire->ire_next;
if (ire1)
ire1->ire_ptpn = ptpn;
*ptpn = ire1;
ire->ire_ptpn = NULL;
ire->ire_next = NULL;
ire->ire_next = ire_list;
ire_list = ire;
}
}
irb->irb_marks &= ~IRB_MARK_CONDEMNED;
return (ire_list);
}
boolean_t
irb_inactive(irb_t *irb)
{
struct rt_entry *rt;
struct radix_node *rn;
ip_stack_t *ipst = irb->irb_ipst;
ASSERT(irb->irb_ipst != NULL);
rt = IRB2RT(irb);
rn = (struct radix_node *)rt;
RADIX_NODE_HEAD_WLOCK(ipst->ips_ip_ftable);
rw_enter(&irb->irb_lock, RW_WRITER);
if (irb->irb_refcnt == 1 && irb->irb_nire == 0) {
rn = ipst->ips_ip_ftable->rnh_deladdr(rn->rn_key, rn->rn_mask,
ipst->ips_ip_ftable);
DTRACE_PROBE1(irb__free, rt_t *, rt);
ASSERT((void *)rn == (void *)rt);
Free(rt, rt_entry_cache);
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
return (B_TRUE);
}
rw_exit(&irb->irb_lock);
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
return (B_FALSE);
}
void
ire_delete(ire_t *ire)
{
ire_t *ire1;
ire_t **ptpn;
irb_t *irb;
ip_stack_t *ipst = ire->ire_ipst;
if ((irb = ire->ire_bucket) == NULL) {
ire_make_condemned(ire);
ire_refrele_notr(ire);
return;
}
if (ire->ire_type & IRE_IF_CLONE) {
ire_t *parent;
rw_enter(&ipst->ips_ire_dep_lock, RW_READER);
if ((parent = ire->ire_dep_parent) != NULL) {
parent->ire_ob_pkt_count += ire->ire_ob_pkt_count;
parent->ire_ib_pkt_count += ire->ire_ib_pkt_count;
ire->ire_ob_pkt_count = 0;
ire->ire_ib_pkt_count = 0;
}
rw_exit(&ipst->ips_ire_dep_lock);
}
rw_enter(&irb->irb_lock, RW_WRITER);
if (ire->ire_ptpn == NULL) {
rw_exit(&irb->irb_lock);
return;
}
if (!IRE_IS_CONDEMNED(ire)) {
ASSERT(ire->ire_identical_ref >= 1);
if (atomic_dec_32_nv(&ire->ire_identical_ref) != 0) {
rw_exit(&irb->irb_lock);
return;
}
irb->irb_ire_cnt--;
ire_make_condemned(ire);
}
if (irb->irb_refcnt != 0) {
irb->irb_marks |= IRB_MARK_CONDEMNED;
rw_exit(&irb->irb_lock);
return;
}
ptpn = ire->ire_ptpn;
ire1 = ire->ire_next;
if (ire1 != NULL)
ire1->ire_ptpn = ptpn;
ASSERT(ptpn != NULL);
*ptpn = ire1;
ire->ire_ptpn = NULL;
ire->ire_next = NULL;
if (ire->ire_ipversion == IPV6_VERSION) {
BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_deleted);
} else {
BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_deleted);
}
rw_exit(&irb->irb_lock);
if (ire->ire_ipversion == IPV6_VERSION) {
ire_delete_v6(ire);
} else {
ire_delete_v4(ire);
}
ire_refrele_notr(ire);
}
static void
ire_delete_v4(ire_t *ire)
{
ip_stack_t *ipst = ire->ire_ipst;
ASSERT(ire->ire_refcnt >= 1);
ASSERT(ire->ire_ipversion == IPV4_VERSION);
ire_flush_cache_v4(ire, IRE_FLUSH_DELETE);
if (ire->ire_type == IRE_DEFAULT) {
ire_delete_host_redirects(ire->ire_gateway_addr, ipst);
}
if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != NULL)
ire_dep_delete_if_clone(ire);
rw_enter(&ipst->ips_ire_dep_lock, RW_WRITER);
if (ire->ire_dep_parent != NULL)
ire_dep_remove(ire);
while (ire->ire_dep_children != NULL)
ire_dep_remove(ire->ire_dep_children);
rw_exit(&ipst->ips_ire_dep_lock);
}
void
ire_inactive(ire_t *ire)
{
ill_t *ill;
irb_t *irb;
ip_stack_t *ipst = ire->ire_ipst;
ASSERT(ire->ire_refcnt == 0);
ASSERT(ire->ire_ptpn == NULL);
ASSERT(ire->ire_next == NULL);
ASSERT(IRE_IS_CONDEMNED(ire));
atomic_add_32(&ipst->ips_num_ire_condemned, -1);
if (ire->ire_gw_secattr != NULL) {
ire_gw_secattr_free(ire->ire_gw_secattr);
ire->ire_gw_secattr = NULL;
}
ASSERT(ire->ire_nce_cache == NULL);
ASSERT(ire->ire_dep_parent == NULL);
ASSERT(ire->ire_dep_sib_next == NULL);
ASSERT(ire->ire_dep_sib_ptpn == NULL);
ASSERT(ire->ire_dep_children == NULL);
irb = ire->ire_bucket;
ill = ire->ire_ill;
if (irb != NULL && ill != NULL) {
mutex_enter(&ill->ill_lock);
ASSERT(ill->ill_ire_cnt != 0);
DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
(char *), "ire", (void *), ire);
ill->ill_ire_cnt--;
if (ILL_DOWN_OK(ill)) {
ipif_ill_refrele_tail(ill);
} else {
mutex_exit(&ill->ill_lock);
}
}
ire->ire_ill = NULL;
if (irb != NULL && (irb->irb_marks & IRB_MARK_DYNAMIC)) {
rw_enter(&irb->irb_lock, RW_WRITER);
irb->irb_nire--;
irb_refhold_locked(irb);
rw_exit(&irb->irb_lock);
irb_refrele(irb);
}
#ifdef DEBUG
ire_trace_cleanup(ire);
#endif
mutex_destroy(&ire->ire_lock);
if (ire->ire_ipversion == IPV6_VERSION) {
BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_freed);
} else {
BUMP_IRE_STATS(ipst->ips_ire_stats_v4, ire_stats_freed);
}
kmem_cache_free(ire_cache, ire);
}
static boolean_t
ire_update_generation(struct radix_node *rn, void *arg)
{
struct rt_entry *rt = (struct rt_entry *)rn;
irb_increment_generation(&rt->rt_irb);
return (B_FALSE);
}
void
irb_increment_generation(irb_t *irb)
{
ire_t *ire;
ip_stack_t *ipst;
if (irb == NULL || irb->irb_ire_cnt == 0)
return;
ipst = irb->irb_ipst;
rw_enter(&ipst->ips_ire_dep_lock, RW_READER);
rw_enter(&irb->irb_lock, RW_WRITER);
for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
if (!IRE_IS_CONDEMNED(ire))
ire_increment_generation(ire);
ire_dep_incr_generation_locked(ire);
}
rw_exit(&irb->irb_lock);
rw_exit(&ipst->ips_ire_dep_lock);
}
void
ire_flush_cache_v4(ire_t *ire, int flag)
{
irb_t *irb = ire->ire_bucket;
struct rt_entry *rt = IRB2RT(irb);
ip_stack_t *ipst = ire->ire_ipst;
if (ire->ire_type & IRE_IF_CLONE)
return;
RADIX_NODE_HEAD_WLOCK(ipst->ips_ip_ftable);
if (flag == IRE_FLUSH_ADD) {
ire_increment_generation(ipst->ips_ire_reject_v4);
ire_increment_generation(ipst->ips_ire_blackhole_v4);
}
if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) {
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
return;
}
switch (flag) {
case IRE_FLUSH_DELETE:
case IRE_FLUSH_GWCHANGE:
ire_dep_incr_generation(ire);
break;
case IRE_FLUSH_ADD:
(void) ipst->ips_ip_ftable->rnh_matchaddr_args(&rt->rt_dst,
ipst->ips_ip_ftable, ire_update_generation, NULL);
break;
}
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
}
boolean_t
ire_match_args(ire_t *ire, ipaddr_t addr, ipaddr_t mask, ipaddr_t gateway,
int type, const ill_t *ill, zoneid_t zoneid,
const ts_label_t *tsl, int match_flags)
{
ill_t *ire_ill = NULL, *dst_ill;
ip_stack_t *ipst = ire->ire_ipst;
ASSERT(ire->ire_ipversion == IPV4_VERSION);
ASSERT((ire->ire_addr & ~ire->ire_mask) == 0);
ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_SRC_ILL))) ||
(ill != NULL && !ill->ill_isv6));
if (ire->ire_testhidden) {
if (!(match_flags & MATCH_IRE_TESTHIDDEN))
return (B_FALSE);
}
if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid &&
ire->ire_zoneid != ALL_ZONES) {
if (match_flags & MATCH_IRE_ZONEONLY)
return (B_FALSE);
if (ire->ire_type & IRE_LOOPBACK)
return (B_FALSE);
if (ire->ire_type & IRE_LOCAL)
goto matchit;
dst_ill = ire->ire_ill;
if (ire->ire_type & IRE_ONLINK) {
uint_t ifindex;
ifindex = dst_ill->ill_usesrc_ifindex;
if (ifindex == 0)
return (B_FALSE);
if (!ipif_zone_avail(ifindex, dst_ill->ill_isv6,
zoneid, ipst)) {
ip3dbg(("ire_match_args: no usrsrc for zone"
" dst_ill %p\n", (void *)dst_ill));
return (B_FALSE);
}
}
if (dst_ill != NULL && (ire->ire_type & IRE_OFFLINK)) {
ipif_t *tipif;
mutex_enter(&dst_ill->ill_lock);
for (tipif = dst_ill->ill_ipif;
tipif != NULL; tipif = tipif->ipif_next) {
if (!IPIF_IS_CONDEMNED(tipif) &&
(tipif->ipif_flags & IPIF_UP) &&
(tipif->ipif_zoneid == zoneid ||
tipif->ipif_zoneid == ALL_ZONES))
break;
}
mutex_exit(&dst_ill->ill_lock);
if (tipif == NULL) {
return (B_FALSE);
}
}
}
matchit:
ire_ill = ire->ire_ill;
if (match_flags & MATCH_IRE_ILL) {
if ((match_flags & MATCH_IRE_TESTHIDDEN) &&
!(ire->ire_type & IRE_LOCAL)) {
if (ire->ire_ill != ill)
return (B_FALSE);
} else {
match_flags &= ~MATCH_IRE_TESTHIDDEN;
if (ire_ill == NULL || !IS_ON_SAME_LAN(ill, ire_ill))
return (B_FALSE);
}
}
if (match_flags & MATCH_IRE_SRC_ILL) {
if (ire_ill == NULL)
return (B_FALSE);
if (!IS_ON_SAME_LAN(ill, ire_ill)) {
if (ire_ill->ill_usesrc_ifindex == 0 ||
(ire_ill->ill_usesrc_ifindex !=
ill->ill_phyint->phyint_ifindex))
return (B_FALSE);
}
}
if ((ire->ire_addr == (addr & mask)) &&
((!(match_flags & MATCH_IRE_GW)) ||
(ire->ire_gateway_addr == gateway)) &&
((!(match_flags & MATCH_IRE_DIRECT)) ||
!(ire->ire_flags & RTF_INDIRECT)) &&
((!(match_flags & MATCH_IRE_TYPE)) || (ire->ire_type & type)) &&
((!(match_flags & MATCH_IRE_TESTHIDDEN)) || ire->ire_testhidden) &&
((!(match_flags & MATCH_IRE_MASK)) || (ire->ire_mask == mask)) &&
((!(match_flags & MATCH_IRE_SECATTR)) ||
(!is_system_labeled()) ||
(tsol_ire_match_gwattr(ire, tsl) == 0))) {
return (B_TRUE);
}
return (B_FALSE);
}
ire_t *
ire_alt_local(ire_t *ire, zoneid_t zoneid, const ts_label_t *tsl,
const ill_t *ill, uint_t *generationp)
{
ip_stack_t *ipst = ire->ire_ipst;
ire_t *alt_ire;
uint_t ire_type;
uint_t generation;
uint_t match_flags;
ASSERT(ire->ire_type & IRE_LOCAL);
ASSERT(ire->ire_ill != NULL);
ire_type = (IRE_ONLINK | IRE_OFFLINK) & ~(IRE_LOCAL|IRE_LOOPBACK);
match_flags = MATCH_IRE_TYPE | MATCH_IRE_SECATTR;
if (ill != NULL)
match_flags |= MATCH_IRE_ILL;
if (ire->ire_ipversion == IPV4_VERSION) {
alt_ire = ire_route_recursive_v4(ire->ire_addr, ire_type,
ill, zoneid, tsl, match_flags, IRR_ALLOCATE, 0, ipst, NULL,
NULL, &generation);
} else {
alt_ire = ire_route_recursive_v6(&ire->ire_addr_v6, ire_type,
ill, zoneid, tsl, match_flags, IRR_ALLOCATE, 0, ipst, NULL,
NULL, &generation);
}
ASSERT(alt_ire != NULL);
if (alt_ire->ire_ill == ire->ire_ill) {
ire_refrele(alt_ire);
} else {
ire_refrele(ire);
ire = alt_ire;
if (generationp != NULL)
*generationp = generation;
}
return (ire);
}
boolean_t
ire_find_zoneid(struct radix_node *rn, void *arg)
{
struct rt_entry *rt = (struct rt_entry *)rn;
irb_t *irb;
ire_t *ire;
ire_ftable_args_t *margs = arg;
ASSERT(rt != NULL);
irb = &rt->rt_irb;
if (irb->irb_ire_cnt == 0)
return (B_FALSE);
rw_enter(&irb->irb_lock, RW_READER);
for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
if (IRE_IS_CONDEMNED(ire))
continue;
if (!(ire->ire_type & IRE_INTERFACE))
continue;
if (ire->ire_zoneid != ALL_ZONES &&
ire->ire_zoneid != margs->ift_zoneid)
continue;
if (margs->ift_ill != NULL && margs->ift_ill != ire->ire_ill)
continue;
if (is_system_labeled() &&
tsol_ire_match_gwattr(ire, margs->ift_tsl) != 0)
continue;
rw_exit(&irb->irb_lock);
return (B_TRUE);
}
rw_exit(&irb->irb_lock);
return (B_FALSE);
}
boolean_t
ire_gateway_ok_zone_v4(ipaddr_t gateway, zoneid_t zoneid, ill_t *ill,
const ts_label_t *tsl, ip_stack_t *ipst, boolean_t lock_held)
{
struct rt_sockaddr rdst;
struct rt_entry *rt;
ire_ftable_args_t margs;
ASSERT(ill == NULL || !ill->ill_isv6);
if (lock_held)
ASSERT(RW_READ_HELD(&ipst->ips_ip_ftable->rnh_lock));
else
RADIX_NODE_HEAD_RLOCK(ipst->ips_ip_ftable);
bzero(&rdst, sizeof (rdst));
rdst.rt_sin_len = sizeof (rdst);
rdst.rt_sin_family = AF_INET;
rdst.rt_sin_addr.s_addr = gateway;
bzero(&margs, sizeof (margs));
margs.ift_ill = ill;
margs.ift_zoneid = zoneid;
margs.ift_tsl = tsl;
rt = (struct rt_entry *)ipst->ips_ip_ftable->rnh_matchaddr_args(&rdst,
ipst->ips_ip_ftable, ire_find_zoneid, (void *)&margs);
if (!lock_held)
RADIX_NODE_HEAD_UNLOCK(ipst->ips_ip_ftable);
return (rt != NULL);
}
static void
ire_delete_reclaim(ire_t *ire, char *arg)
{
ip_stack_t *ipst = ire->ire_ipst;
uint_t fraction = *(uint_t *)arg;
uint_t rand;
if ((ire->ire_flags & RTF_DYNAMIC) ||
(ire->ire_type & IRE_IF_CLONE)) {
rand = (uint_t)ddi_get_lbolt() +
IRE_ADDR_HASH_V6(ire->ire_addr_v6, 256);
if ((rand/fraction)*fraction == rand) {
IP_STAT(ipst, ip_ire_reclaim_deleted);
ire_delete(ire);
}
}
}
static void
ip_ire_reclaim_stack(ip_stack_t *ipst)
{
uint_t fraction = ipst->ips_ip_ire_reclaim_fraction;
IP_STAT(ipst, ip_ire_reclaim_calls);
ire_walk(ire_delete_reclaim, &fraction, ipst);
ipcl_walk(conn_ixa_cleanup, (void *)B_FALSE, ipst);
}
void
ip_ire_reclaim(void *args)
{
netstack_handle_t nh;
netstack_t *ns;
ip_stack_t *ipst;
netstack_next_init(&nh);
while ((ns = netstack_next(&nh)) != NULL) {
if ((ipst = ns->netstack_ip) == NULL) {
netstack_rele(ns);
continue;
}
ip_ire_reclaim_stack(ipst);
netstack_rele(ns);
}
netstack_next_fini(&nh);
}
static void
power2_roundup(uint32_t *value)
{
int i;
for (i = 1; i < 31; i++) {
if (*value <= (1 << i))
break;
}
*value = (1 << i);
}
void
ip_ire_g_init()
{
ire_cache = kmem_cache_create("ire_cache",
sizeof (ire_t), 0, NULL, NULL,
ip_ire_reclaim, NULL, NULL, 0);
ncec_cache = kmem_cache_create("ncec_cache",
sizeof (ncec_t), 0, NULL, NULL,
ip_nce_reclaim, NULL, NULL, 0);
nce_cache = kmem_cache_create("nce_cache",
sizeof (nce_t), 0, NULL, NULL,
NULL, NULL, NULL, 0);
rt_entry_cache = kmem_cache_create("rt_entry",
sizeof (struct rt_entry), 0, NULL, NULL, NULL, NULL, NULL, 0);
rn_init();
}
void
ip_ire_init(ip_stack_t *ipst)
{
ire_t *ire;
int error;
mutex_init(&ipst->ips_ire_ft_init_lock, NULL, MUTEX_DEFAULT, 0);
(void) rn_inithead((void **)&ipst->ips_ip_ftable, 32);
ipst->ips_ip6_ftable_hash_size = ip6_ftable_hash_size;
power2_roundup(&ipst->ips_ip6_ftable_hash_size);
ire = kmem_cache_alloc(ire_cache, KM_SLEEP);
*ire = ire_null;
error = ire_init_v4(ire, 0, 0, 0, IRE_NOROUTE, NULL, ALL_ZONES,
RTF_REJECT|RTF_UP, NULL, ipst);
ASSERT(error == 0);
ipst->ips_ire_reject_v4 = ire;
ire = kmem_cache_alloc(ire_cache, KM_SLEEP);
*ire = ire_null;
error = ire_init_v6(ire, 0, 0, 0, IRE_NOROUTE, NULL, ALL_ZONES,
RTF_REJECT|RTF_UP, NULL, ipst);
ASSERT(error == 0);
ipst->ips_ire_reject_v6 = ire;
ire = kmem_cache_alloc(ire_cache, KM_SLEEP);
*ire = ire_null;
error = ire_init_v4(ire, 0, 0, 0, IRE_NOROUTE, NULL, ALL_ZONES,
RTF_BLACKHOLE|RTF_UP, NULL, ipst);
ASSERT(error == 0);
ipst->ips_ire_blackhole_v4 = ire;
ire = kmem_cache_alloc(ire_cache, KM_SLEEP);
*ire = ire_null;
error = ire_init_v6(ire, 0, 0, 0, IRE_NOROUTE, NULL, ALL_ZONES,
RTF_BLACKHOLE|RTF_UP, NULL, ipst);
ASSERT(error == 0);
ipst->ips_ire_blackhole_v6 = ire;
rw_init(&ipst->ips_ip6_ire_head_lock, NULL, RW_DEFAULT, NULL);
rw_init(&ipst->ips_ire_dep_lock, NULL, RW_DEFAULT, NULL);
}
void
ip_ire_g_fini(void)
{
kmem_cache_destroy(ire_cache);
kmem_cache_destroy(ncec_cache);
kmem_cache_destroy(nce_cache);
kmem_cache_destroy(rt_entry_cache);
rn_fini();
}
void
ip_ire_fini(ip_stack_t *ipst)
{
int i;
ire_make_condemned(ipst->ips_ire_reject_v6);
ire_refrele_notr(ipst->ips_ire_reject_v6);
ipst->ips_ire_reject_v6 = NULL;
ire_make_condemned(ipst->ips_ire_reject_v4);
ire_refrele_notr(ipst->ips_ire_reject_v4);
ipst->ips_ire_reject_v4 = NULL;
ire_make_condemned(ipst->ips_ire_blackhole_v6);
ire_refrele_notr(ipst->ips_ire_blackhole_v6);
ipst->ips_ire_blackhole_v6 = NULL;
ire_make_condemned(ipst->ips_ire_blackhole_v4);
ire_refrele_notr(ipst->ips_ire_blackhole_v4);
ipst->ips_ire_blackhole_v4 = NULL;
ire_walk(ire_delete, NULL, ipst);
rn_freehead(ipst->ips_ip_ftable);
ipst->ips_ip_ftable = NULL;
rw_destroy(&ipst->ips_ire_dep_lock);
rw_destroy(&ipst->ips_ip6_ire_head_lock);
mutex_destroy(&ipst->ips_ire_ft_init_lock);
for (i = 0; i < IP6_MASK_TABLE_SIZE; i++) {
irb_t *ptr;
int j;
if ((ptr = ipst->ips_ip_forwarding_table_v6[i]) == NULL)
continue;
for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) {
ASSERT(ptr[j].irb_ire == NULL);
rw_destroy(&ptr[j].irb_lock);
}
mi_free(ptr);
ipst->ips_ip_forwarding_table_v6[i] = NULL;
}
}
#ifdef DEBUG
void
ire_trace_ref(ire_t *ire)
{
mutex_enter(&ire->ire_lock);
if (ire->ire_trace_disable) {
mutex_exit(&ire->ire_lock);
return;
}
if (th_trace_ref(ire, ire->ire_ipst)) {
mutex_exit(&ire->ire_lock);
} else {
ire->ire_trace_disable = B_TRUE;
mutex_exit(&ire->ire_lock);
ire_trace_cleanup(ire);
}
}
void
ire_untrace_ref(ire_t *ire)
{
mutex_enter(&ire->ire_lock);
if (!ire->ire_trace_disable)
th_trace_unref(ire);
mutex_exit(&ire->ire_lock);
}
static void
ire_trace_cleanup(const ire_t *ire)
{
th_trace_cleanup(ire, ire->ire_trace_disable);
}
#endif
static nce_t *
ire_nce_init(ill_t *ill, const void *addr, int ire_type)
{
int err;
nce_t *nce = NULL;
uint16_t ncec_flags;
uchar_t *hwaddr;
boolean_t need_refrele = B_FALSE;
ill_t *in_ill = ill;
boolean_t is_unicast;
uint_t hwaddr_len;
is_unicast = ((ire_type & (IRE_MULTICAST|IRE_BROADCAST)) == 0);
if (IS_IPMP(ill) ||
((ire_type & IRE_BROADCAST) && IS_UNDER_IPMP(ill))) {
if ((ill = ipmp_ill_hold_xmit_ill(ill, is_unicast)) == NULL)
return (NULL);
need_refrele = B_TRUE;
}
ncec_flags = (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0;
switch (ire_type) {
case IRE_BROADCAST:
ASSERT(!ill->ill_isv6);
ncec_flags |= (NCE_F_BCAST|NCE_F_NONUD);
break;
case IRE_MULTICAST:
ncec_flags |= (NCE_F_MCAST|NCE_F_NONUD);
break;
}
if (ill->ill_net_type == IRE_IF_NORESOLVER && is_unicast) {
hwaddr = ill->ill_dest_addr;
} else {
hwaddr = NULL;
}
hwaddr_len = ill->ill_phys_addr_length;
retry:
if (!ill->ill_isv6) {
err = nce_lookup_then_add_v4(ill, hwaddr, hwaddr_len, addr,
ncec_flags, ND_UNCHANGED, &nce);
} else {
err = nce_lookup_then_add_v6(ill, hwaddr, hwaddr_len, addr,
ncec_flags, ND_UNCHANGED, &nce);
}
switch (err) {
case 0:
break;
case EEXIST:
if (((ncec_flags ^ nce->nce_common->ncec_flags) &
NCE_F_BCAST) != 0) {
ASSERT(!ill->ill_isv6);
ncec_delete(nce->nce_common);
nce_refrele(nce);
goto retry;
}
break;
default:
DTRACE_PROBE2(nce__init__fail, ill_t *, ill, int, err);
if (need_refrele)
ill_refrele(ill);
return (NULL);
}
rw_enter(&ill->ill_ipst->ips_ill_g_lock, RW_READER);
mutex_enter(&ill->ill_lock);
mutex_enter(&ill->ill_phyint->phyint_lock);
if (need_refrele && IS_UNDER_IPMP(ill) && !ipmp_ill_is_active(ill)) {
mutex_exit(&ill->ill_phyint->phyint_lock);
nce_delete(nce);
mutex_exit(&ill->ill_lock);
rw_exit(&ill->ill_ipst->ips_ill_g_lock);
nce_refrele(nce);
ill_refrele(ill);
if ((ill = ipmp_ill_hold_xmit_ill(in_ill, is_unicast)) == NULL)
return (NULL);
goto retry;
} else {
mutex_exit(&ill->ill_phyint->phyint_lock);
mutex_exit(&ill->ill_lock);
rw_exit(&ill->ill_ipst->ips_ill_g_lock);
}
ASSERT(nce->nce_ill == ill);
if (need_refrele)
ill_refrele(ill);
return (nce);
}
nce_t *
arp_nce_init(ill_t *ill, in_addr_t addr4, int ire_type)
{
return (ire_nce_init(ill, &addr4, ire_type));
}
nce_t *
ndp_nce_init(ill_t *ill, const in6_addr_t *addr6, int ire_type)
{
ASSERT((ire_type & IRE_BROADCAST) == 0);
return (ire_nce_init(ill, addr6, ire_type));
}
void
ire_make_condemned(ire_t *ire)
{
ip_stack_t *ipst = ire->ire_ipst;
nce_t *nce;
mutex_enter(&ire->ire_lock);
ASSERT(ire->ire_bucket == NULL ||
RW_WRITE_HELD(&ire->ire_bucket->irb_lock));
ASSERT(!IRE_IS_CONDEMNED(ire));
ire->ire_generation = IRE_GENERATION_CONDEMNED;
atomic_inc_32(&ipst->ips_num_ire_condemned);
nce = ire->ire_nce_cache;
ire->ire_nce_cache = NULL;
mutex_exit(&ire->ire_lock);
if (nce != NULL)
nce_refrele(nce);
}
void
ire_increment_generation(ire_t *ire)
{
uint_t generation;
mutex_enter(&ire->ire_lock);
if (!IRE_IS_CONDEMNED(ire)) {
generation = ire->ire_generation + 1;
if (generation == IRE_GENERATION_CONDEMNED)
generation = IRE_GENERATION_INITIAL;
ASSERT(generation != IRE_GENERATION_VERIFY);
ire->ire_generation = generation;
}
mutex_exit(&ire->ire_lock);
}
void
ire_increment_multicast_generation(ip_stack_t *ipst, boolean_t isv6)
{
ill_t *ill;
ill_walk_context_t ctx;
rw_enter(&ipst->ips_ill_g_lock, RW_READER);
if (isv6)
ill = ILL_START_WALK_V6(&ctx, ipst);
else
ill = ILL_START_WALK_V4(&ctx, ipst);
for (; ill != NULL; ill = ill_next(&ctx, ill)) {
if (ILL_IS_CONDEMNED(ill))
continue;
if (ill->ill_ire_multicast != NULL)
ire_increment_generation(ill->ill_ire_multicast);
}
rw_exit(&ipst->ips_ill_g_lock);
}
ire_t *
ire_reject(ip_stack_t *ipst, boolean_t isv6)
{
ire_t *ire;
if (isv6)
ire = ipst->ips_ire_reject_v6;
else
ire = ipst->ips_ire_reject_v4;
ASSERT(ire->ire_generation != IRE_GENERATION_CONDEMNED);
ire_refhold(ire);
return (ire);
}
ire_t *
ire_blackhole(ip_stack_t *ipst, boolean_t isv6)
{
ire_t *ire;
if (isv6)
ire = ipst->ips_ire_blackhole_v6;
else
ire = ipst->ips_ire_blackhole_v4;
ASSERT(ire->ire_generation != IRE_GENERATION_CONDEMNED);
ire_refhold(ire);
return (ire);
}
ire_t *
ire_multicast(ill_t *ill)
{
ire_t *ire = ill->ill_ire_multicast;
ASSERT(ire == NULL || ire->ire_generation != IRE_GENERATION_CONDEMNED);
if (ire == NULL)
ire = ire_blackhole(ill->ill_ipst, ill->ill_isv6);
else
ire_refhold(ire);
return (ire);
}
ire_t *
ire_nexthop(ire_t *ire)
{
ip_stack_t *ipst = ire->ire_ipst;
rw_enter(&ipst->ips_ire_dep_lock, RW_READER);
while (ire != NULL) {
if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
goto done;
}
if (ire->ire_type & IRE_ONLINK)
goto done;
ire = ire->ire_dep_parent;
}
rw_exit(&ipst->ips_ire_dep_lock);
return (NULL);
done:
ire_refhold(ire);
rw_exit(&ipst->ips_ire_dep_lock);
return (ire);
}
ill_t *
ire_nexthop_ill(ire_t *ire)
{
ill_t *ill;
ire = ire_nexthop(ire);
if (ire == NULL)
return (NULL);
ill = ire->ire_ill;
if (ill != NULL)
ill_refhold(ill);
ire_refrele(ire);
return (ill);
}
#ifdef DEBUG
static boolean_t
parent_has_child(ire_t *parent, ire_t *child)
{
ire_t *ire;
ire_t *prev;
ire = parent->ire_dep_children;
prev = NULL;
while (ire != NULL) {
if (prev == NULL) {
ASSERT(ire->ire_dep_sib_ptpn ==
&(parent->ire_dep_children));
} else {
ASSERT(ire->ire_dep_sib_ptpn ==
&(prev->ire_dep_sib_next));
}
if (ire == child)
return (B_TRUE);
prev = ire;
ire = ire->ire_dep_sib_next;
}
return (B_FALSE);
}
static void
ire_dep_verify(ire_t *ire)
{
ire_t *parent = ire->ire_dep_parent;
ire_t *child = ire->ire_dep_children;
ASSERT(ire->ire_ipversion == IPV4_VERSION ||
ire->ire_ipversion == IPV6_VERSION);
if (parent != NULL) {
ASSERT(parent->ire_ipversion == IPV4_VERSION ||
parent->ire_ipversion == IPV6_VERSION);
ASSERT(parent->ire_refcnt >= 1);
ASSERT(parent_has_child(parent, ire));
}
if (child != NULL) {
ASSERT(child->ire_ipversion == IPV4_VERSION ||
child->ire_ipversion == IPV6_VERSION);
ASSERT(child->ire_dep_parent == ire);
ASSERT(child->ire_dep_sib_ptpn != NULL);
ASSERT(parent_has_child(ire, child));
}
}
#endif
void
ire_dep_remove(ire_t *ire)
{
ip_stack_t *ipst = ire->ire_ipst;
ire_t *parent = ire->ire_dep_parent;
ire_t *next;
nce_t *nce;
ASSERT(RW_WRITE_HELD(&ipst->ips_ire_dep_lock));
ASSERT(ire->ire_dep_parent != NULL);
ASSERT(ire->ire_dep_sib_ptpn != NULL);
#ifdef DEBUG
ire_dep_verify(ire);
ire_dep_verify(parent);
#endif
next = ire->ire_dep_sib_next;
if (next != NULL)
next->ire_dep_sib_ptpn = ire->ire_dep_sib_ptpn;
ASSERT(*(ire->ire_dep_sib_ptpn) == ire);
*(ire->ire_dep_sib_ptpn) = ire->ire_dep_sib_next;
ire->ire_dep_sib_ptpn = NULL;
ire->ire_dep_sib_next = NULL;
mutex_enter(&ire->ire_lock);
parent = ire->ire_dep_parent;
ire->ire_dep_parent = NULL;
mutex_exit(&ire->ire_lock);
if (ire->ire_dep_children != NULL)
ire_dep_invalidate_children(ire->ire_dep_children);
mutex_enter(&ire->ire_lock);
nce = ire->ire_nce_cache;
ire->ire_nce_cache = NULL;
mutex_exit(&ire->ire_lock);
if (nce != NULL)
nce_refrele(nce);
#ifdef DEBUG
ire_dep_verify(ire);
ire_dep_verify(parent);
#endif
ire_refrele_notr(parent);
ire_refrele_notr(ire);
}
static void
ire_dep_parent_insert(ire_t *child, ire_t *parent)
{
ip_stack_t *ipst = child->ire_ipst;
ire_t *next;
ASSERT(RW_WRITE_HELD(&ipst->ips_ire_dep_lock));
ASSERT(child->ire_dep_parent == NULL);
#ifdef DEBUG
ire_dep_verify(child);
ire_dep_verify(parent);
#endif
ASSERT(child->ire_dep_sib_ptpn == NULL);
ASSERT(child->ire_dep_sib_next == NULL);
ire_refhold_notr(parent);
ire_refhold_notr(child);
next = parent->ire_dep_children;
if (next != NULL) {
ASSERT(next->ire_dep_sib_ptpn == &(parent->ire_dep_children));
child->ire_dep_sib_next = next;
next->ire_dep_sib_ptpn = &(child->ire_dep_sib_next);
}
parent->ire_dep_children = child;
child->ire_dep_sib_ptpn = &(parent->ire_dep_children);
mutex_enter(&child->ire_lock);
child->ire_dep_parent = parent;
mutex_exit(&child->ire_lock);
#ifdef DEBUG
ire_dep_verify(child);
ire_dep_verify(parent);
#endif
}
boolean_t
ire_dep_build(ire_t *ires[], uint_t generations[], uint_t count)
{
ire_t *ire = ires[0];
ip_stack_t *ipst;
uint_t i;
ASSERT(count > 0);
if (count == 1) {
return (B_TRUE);
}
ipst = ire->ire_ipst;
rw_enter(&ipst->ips_ire_dep_lock, RW_WRITER);
for (i = 0; i < count-1; i++) {
if (ires[i]->ire_dep_parent != NULL &&
ires[i]->ire_dep_parent != ires[i+1])
ire_dep_remove(ires[i]);
}
for (i = 0; i < count - 1; i++) {
ASSERT(ires[i]->ire_ipversion == IPV4_VERSION ||
ires[i]->ire_ipversion == IPV6_VERSION);
if (ires[i]->ire_dep_parent != ires[i+1])
ire_dep_parent_insert(ires[i], ires[i+1]);
mutex_enter(&ires[i+1]->ire_lock);
if (IRE_IS_CONDEMNED(ires[i+1])) {
mutex_exit(&ires[i+1]->ire_lock);
rw_exit(&ipst->ips_ire_dep_lock);
return (B_FALSE);
}
mutex_exit(&ires[i+1]->ire_lock);
mutex_enter(&ires[i]->ire_lock);
ires[i]->ire_dep_parent_generation = generations[i+1];
mutex_exit(&ires[i]->ire_lock);
}
rw_exit(&ipst->ips_ire_dep_lock);
return (B_TRUE);
}
void
ire_dep_unbuild(ire_t *ires[], uint_t count)
{
ip_stack_t *ipst;
uint_t i;
if (count == 0) {
return;
}
ipst = ires[0]->ire_ipst;
rw_enter(&ipst->ips_ire_dep_lock, RW_WRITER);
for (i = 0; i < count; i++) {
ASSERT(ires[i]->ire_ipversion == IPV4_VERSION ||
ires[i]->ire_ipversion == IPV6_VERSION);
if (ires[i]->ire_dep_parent != NULL)
ire_dep_remove(ires[i]);
mutex_enter(&ires[i]->ire_lock);
ires[i]->ire_dep_parent_generation = IRE_GENERATION_VERIFY;
mutex_exit(&ires[i]->ire_lock);
}
rw_exit(&ipst->ips_ire_dep_lock);
}
nce_t *
ire_handle_condemned_nce(nce_t *nce, ire_t *ire, ipha_t *ipha, ip6_t *ip6h,
boolean_t fail_if_better)
{
if (nce->nce_common->ncec_state == ND_UNREACHABLE) {
if (ire_no_good(ire) && fail_if_better) {
return (NULL);
}
}
if (ire_revalidate_nce(ire) == ENETUNREACH) {
(void) ire_no_good(ire);
return (NULL);
}
if (ire->ire_ipversion == IPV4_VERSION) {
ASSERT(ipha != NULL);
nce = ire_to_nce(ire, ipha->ipha_dst, NULL);
} else {
ASSERT(ip6h != NULL);
nce = ire_to_nce(ire, INADDR_ANY, &ip6h->ip6_dst);
}
if (nce == NULL)
return (NULL);
if (nce->nce_is_condemned) {
nce_refrele(nce);
return (NULL);
}
return (nce);
}
boolean_t
ire_no_good(ire_t *ire)
{
ip_stack_t *ipst = ire->ire_ipst;
ire_t *ire2;
nce_t *nce;
if (ire->ire_flags & RTF_DYNAMIC) {
ire_delete(ire);
return (B_TRUE);
}
if (ire->ire_flags & RTF_INDIRECT) {
rw_enter(&ipst->ips_ire_dep_lock, RW_READER);
if (ire->ire_dep_parent != NULL &&
(ire->ire_dep_parent->ire_flags & RTF_DYNAMIC)) {
ire2 = ire->ire_dep_parent;
ire_refhold(ire2);
} else {
ire2 = NULL;
}
rw_exit(&ipst->ips_ire_dep_lock);
if (ire2 != NULL) {
ire_delete(ire2);
ire_refrele(ire2);
return (B_TRUE);
}
}
mutex_enter(&ire->ire_lock);
ire->ire_badcnt++;
ire->ire_last_badcnt = TICK_TO_SEC(ddi_get_lbolt64());
nce = ire->ire_nce_cache;
if (nce != NULL && nce->nce_is_condemned &&
nce->nce_common->ncec_state == ND_UNREACHABLE)
ire->ire_nce_cache = NULL;
else
nce = NULL;
mutex_exit(&ire->ire_lock);
if (nce != NULL)
nce_refrele(nce);
ire_increment_generation(ire);
ire_dep_incr_generation(ire);
return (ire->ire_bucket->irb_ire_cnt > 1);
}
uint_t
ire_dep_validate_generations(ire_t *ire)
{
ip_stack_t *ipst = ire->ire_ipst;
uint_t generation;
ire_t *ire1;
rw_enter(&ipst->ips_ire_dep_lock, RW_READER);
generation = ire->ire_generation;
for (ire1 = ire; ire1 != NULL; ire1 = ire1->ire_dep_parent) {
ASSERT(ire1->ire_ipversion == IPV4_VERSION ||
ire1->ire_ipversion == IPV6_VERSION);
if (ire1->ire_dep_parent == NULL)
break;
if (ire1->ire_dep_parent_generation !=
ire1->ire_dep_parent->ire_generation)
goto mismatch;
}
rw_exit(&ipst->ips_ire_dep_lock);
return (generation);
mismatch:
generation = IRE_GENERATION_VERIFY;
while (ire != ire1) {
ASSERT(ire->ire_ipversion == IPV4_VERSION ||
ire->ire_ipversion == IPV6_VERSION);
mutex_enter(&ire->ire_lock);
ire->ire_dep_parent_generation = IRE_GENERATION_VERIFY;
mutex_exit(&ire->ire_lock);
ire = ire->ire_dep_parent;
}
rw_exit(&ipst->ips_ire_dep_lock);
return (generation);
}
void
ire_dep_invalidate_generations(ire_t *ire)
{
ip_stack_t *ipst = ire->ire_ipst;
rw_enter(&ipst->ips_ire_dep_lock, RW_READER);
while (ire != NULL) {
ASSERT(ire->ire_ipversion == IPV4_VERSION ||
ire->ire_ipversion == IPV6_VERSION);
mutex_enter(&ire->ire_lock);
ire->ire_dep_parent_generation = IRE_GENERATION_VERIFY;
mutex_exit(&ire->ire_lock);
ire = ire->ire_dep_parent;
}
rw_exit(&ipst->ips_ire_dep_lock);
}
static void
ire_dep_invalidate_children(ire_t *child)
{
ip_stack_t *ipst = child->ire_ipst;
ASSERT(RW_WRITE_HELD(&ipst->ips_ire_dep_lock));
if (child->ire_dep_children != NULL)
ire_dep_invalidate_children(child->ire_dep_children);
while (child != NULL) {
mutex_enter(&child->ire_lock);
child->ire_dep_parent_generation = IRE_GENERATION_VERIFY;
mutex_exit(&child->ire_lock);
child = child->ire_dep_sib_next;
}
}
static void
ire_dep_increment_children(ire_t *child)
{
ip_stack_t *ipst = child->ire_ipst;
ASSERT(RW_READ_HELD(&ipst->ips_ire_dep_lock));
if (child->ire_dep_children != NULL)
ire_dep_increment_children(child->ire_dep_children);
while (child != NULL) {
if (!IRE_IS_CONDEMNED(child))
ire_increment_generation(child);
child = child->ire_dep_sib_next;
}
}
static void
ire_dep_incr_generation_locked(ire_t *parent)
{
ASSERT(RW_READ_HELD(&parent->ire_ipst->ips_ire_dep_lock));
if (parent->ire_dep_children != NULL)
ire_dep_increment_children(parent->ire_dep_children);
}
void
ire_dep_incr_generation(ire_t *parent)
{
ip_stack_t *ipst = parent->ire_ipst;
rw_enter(&ipst->ips_ire_dep_lock, RW_READER);
ire_dep_incr_generation_locked(parent);
rw_exit(&ipst->ips_ire_dep_lock);
}
int
ire_revalidate_nce(ire_t *ire)
{
nce_t *nce, *old_nce;
ire_t *nexthop;
if (ire->ire_type & IRE_MULTICAST)
return (0);
ASSERT(!ire->ire_testhidden || !IS_IPMP(ire->ire_ill));
nexthop = ire_nexthop(ire);
if (nexthop == NULL) {
(void) ire_no_good(ire);
return (ENETUNREACH);
}
if (ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) {
ASSERT(ire->ire_ill != NULL);
if (ire->ire_ipversion == IPV4_VERSION)
nce = nce_lookup_v4(ire->ire_ill, &ire->ire_addr);
else
nce = nce_lookup_v6(ire->ire_ill, &ire->ire_addr_v6);
} else {
ASSERT(nexthop->ire_type & IRE_ONLINK);
if (ire->ire_ipversion == IPV4_VERSION) {
nce = arp_nce_init(nexthop->ire_ill, nexthop->ire_addr,
nexthop->ire_type);
} else {
nce = ndp_nce_init(nexthop->ire_ill,
&nexthop->ire_addr_v6, nexthop->ire_type);
}
}
if (nce == NULL) {
ire_refrele(nexthop);
return (ENOMEM);
}
if (nexthop != ire) {
mutex_enter(&nexthop->ire_lock);
old_nce = nexthop->ire_nce_cache;
if (!IRE_IS_CONDEMNED(nexthop)) {
nce_refhold(nce);
nexthop->ire_nce_cache = nce;
} else {
nexthop->ire_nce_cache = NULL;
}
mutex_exit(&nexthop->ire_lock);
if (old_nce != NULL)
nce_refrele(old_nce);
}
ire_refrele(nexthop);
mutex_enter(&ire->ire_lock);
old_nce = ire->ire_nce_cache;
if (!IRE_IS_CONDEMNED(ire)) {
nce_refhold(nce);
ire->ire_nce_cache = nce;
} else {
ire->ire_nce_cache = NULL;
}
mutex_exit(&ire->ire_lock);
if (old_nce != NULL)
nce_refrele(old_nce);
nce_refrele(nce);
return (0);
}
nce_t *
ire_to_nce(ire_t *ire, ipaddr_t v4nexthop, const in6_addr_t *v6nexthop)
{
nce_t *nce;
if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))
return (NULL);
ASSERT(!ire->ire_testhidden || !IS_IPMP(ire->ire_ill));
mutex_enter(&ire->ire_lock);
nce = ire->ire_nce_cache;
if (nce != NULL) {
nce_refhold(nce);
mutex_exit(&ire->ire_lock);
return (nce);
}
mutex_exit(&ire->ire_lock);
if (ire->ire_type & IRE_MULTICAST) {
ASSERT(ire->ire_ill != NULL);
if (ire->ire_ipversion == IPV4_VERSION) {
ASSERT(v6nexthop == NULL);
nce = arp_nce_init(ire->ire_ill, v4nexthop,
ire->ire_type);
} else {
ASSERT(v6nexthop != NULL);
ASSERT(v4nexthop == 0);
nce = ndp_nce_init(ire->ire_ill, v6nexthop,
ire->ire_type);
}
return (nce);
}
return (NULL);
}
nce_t *
ire_to_nce_pkt(ire_t *ire, mblk_t *mp)
{
ipha_t *ipha;
ip6_t *ip6h;
if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
ipha = (ipha_t *)mp->b_rptr;
return (ire_to_nce(ire, ipha->ipha_dst, NULL));
} else {
ip6h = (ip6_t *)mp->b_rptr;
return (ire_to_nce(ire, INADDR_ANY, &ip6h->ip6_dst));
}
}
ire_t *
ire_create_if_clone(ire_t *ire_if, const in6_addr_t *addr, uint_t *generationp)
{
ire_t *ire;
ire_t *nire;
if (ire_if->ire_ipversion == IPV4_VERSION) {
ipaddr_t v4addr;
ipaddr_t mask = IP_HOST_MASK;
ASSERT(IN6_IS_ADDR_V4MAPPED(addr));
IN6_V4MAPPED_TO_IPADDR(addr, v4addr);
ire = ire_create(
(uchar_t *)&v4addr,
(uchar_t *)&mask,
(uchar_t *)&ire_if->ire_gateway_addr,
IRE_IF_CLONE,
ire_if->ire_ill,
ire_if->ire_zoneid,
ire_if->ire_flags | RTF_HOST,
NULL,
ire_if->ire_ipst);
} else {
ASSERT(!IN6_IS_ADDR_V4MAPPED(addr));
ire = ire_create_v6(
addr,
&ipv6_all_ones,
&ire_if->ire_gateway_addr_v6,
IRE_IF_CLONE,
ire_if->ire_ill,
ire_if->ire_zoneid,
ire_if->ire_flags | RTF_HOST,
NULL,
ire_if->ire_ipst);
}
if (ire == NULL)
return (NULL);
ire->ire_metrics = ire_if->ire_metrics;
nire = ire_add(ire);
if (nire == NULL)
return (NULL);
if (generationp != NULL)
*generationp = nire->ire_generation;
return (nire);
}
void
ire_dep_delete_if_clone(ire_t *parent)
{
ip_stack_t *ipst = parent->ire_ipst;
ire_t *child, *next;
restart:
rw_enter(&ipst->ips_ire_dep_lock, RW_READER);
if (parent->ire_dep_children == NULL) {
rw_exit(&ipst->ips_ire_dep_lock);
return;
}
child = parent->ire_dep_children;
while (child != NULL) {
next = child->ire_dep_sib_next;
if ((child->ire_type & IRE_IF_CLONE) &&
!IRE_IS_CONDEMNED(child)) {
ire_refhold(child);
rw_exit(&ipst->ips_ire_dep_lock);
ire_delete(child);
ASSERT(IRE_IS_CONDEMNED(child));
ire_refrele(child);
goto restart;
}
child = next;
}
rw_exit(&ipst->ips_ire_dep_lock);
}
void
ire_rebind(ire_t *ire)
{
ire_t *gw_ire, *new_ire;
int match_flags = MATCH_IRE_TYPE;
ill_t *gw_ill;
boolean_t isv6 = (ire->ire_ipversion == IPV6_VERSION);
ip_stack_t *ipst = ire->ire_ipst;
ASSERT(ire->ire_unbound);
again:
if (isv6) {
gw_ire = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0,
IRE_INTERFACE, NULL, ALL_ZONES, NULL, match_flags, 0,
ipst, NULL);
} else {
gw_ire = ire_ftable_lookup_v4(ire->ire_gateway_addr, 0, 0,
IRE_INTERFACE, NULL, ALL_ZONES, NULL, match_flags, 0,
ipst, NULL);
}
if (gw_ire == NULL) {
if (match_flags & MATCH_IRE_TESTHIDDEN)
return;
match_flags |= MATCH_IRE_TESTHIDDEN;
goto again;
}
gw_ill = gw_ire->ire_ill;
if (isv6) {
new_ire = ire_create_v6(&ire->ire_addr_v6, &ire->ire_mask_v6,
&ire->ire_gateway_addr_v6, ire->ire_type, gw_ill,
ire->ire_zoneid, ire->ire_flags, NULL, ipst);
} else {
new_ire = ire_create((uchar_t *)&ire->ire_addr,
(uchar_t *)&ire->ire_mask,
(uchar_t *)&ire->ire_gateway_addr, ire->ire_type, gw_ill,
ire->ire_zoneid, ire->ire_flags, NULL, ipst);
}
ire_refrele(gw_ire);
if (new_ire == NULL)
return;
new_ire->ire_unbound = B_TRUE;
new_ire = ire_add(new_ire);
if (new_ire != NULL)
ire_refrele(new_ire);
}