#include <sys/types.h>
#include <sys/stream.h>
#include <sys/stropts.h>
#include <sys/strlog.h>
#include <sys/systm.h>
#include <sys/ddi.h>
#include <sys/cmn_err.h>
#include <sys/zone.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/vtrace.h>
#include <sys/debug.h>
#include <net/if.h>
#include <sys/sockio.h>
#include <netinet/in.h>
#include <net/if_dl.h>
#include <inet/ipsec_impl.h>
#include <inet/common.h>
#include <inet/mi.h>
#include <inet/nd.h>
#include <inet/tunables.h>
#include <inet/mib2.h>
#include <netinet/ip6.h>
#include <inet/ip.h>
#include <inet/snmpcom.h>
#include <netinet/igmp.h>
#include <netinet/igmp_var.h>
#include <netinet/udp.h>
#include <netinet/ip_mroute.h>
#include <inet/ip_multi.h>
#include <inet/ip_ire.h>
#include <inet/ip_ndp.h>
#include <inet/ip_if.h>
#include <inet/ipclassifier.h>
#include <netinet/pim.h>
#define NO_VIF MAXVIFS
#define EXPIRE_TIMEOUT (hz/4)
#define UPCALL_EXPIRE 6
#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
((g) >> 20) ^ ((g) >> 10) ^ (g))
#define TBF_REPROCESS (hz / 100)
#define PIM_REGISTER_MARKER 0xffffffff
static int add_mfc(struct mfcctl *, ip_stack_t *);
static int add_vif(struct vifctl *, conn_t *, ip_stack_t *);
static int del_mfc(struct mfcctl *, ip_stack_t *);
static int del_vif(vifi_t *, ip_stack_t *);
static void del_vifp(struct vif *);
static void encap_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
static void expire_upcalls(void *);
static void fill_route(struct mfc *, struct mfcctl *, ip_stack_t *);
static void free_queue(struct mfc *);
static int get_assert(uchar_t *, ip_stack_t *);
static int get_lsg_cnt(struct sioc_lsg_req *, ip_stack_t *);
static int get_sg_cnt(struct sioc_sg_req *, ip_stack_t *);
static int get_version(uchar_t *);
static int get_vif_cnt(struct sioc_vif_req *, ip_stack_t *);
static int ip_mdq(mblk_t *, ipha_t *, ill_t *,
ipaddr_t, struct mfc *);
static int ip_mrouter_init(conn_t *, uchar_t *, int, ip_stack_t *);
static void phyint_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
static int register_mforward(mblk_t *, ip_recv_attr_t *);
static void register_send(ipha_t *, mblk_t *, struct vif *, ipaddr_t);
static int set_assert(int *, ip_stack_t *);
static int priority(struct vif *, ipha_t *);
static void tbf_control(struct vif *, mblk_t *, ipha_t *);
static int tbf_dq_sel(struct vif *, ipha_t *);
static void tbf_process_q(struct vif *);
static void tbf_queue(struct vif *, mblk_t *);
static void tbf_reprocess_q(void *);
static void tbf_send_packet(struct vif *, mblk_t *);
static void tbf_update_tokens(struct vif *);
static void release_mfc(struct mfcb *);
static boolean_t is_mrouter_off(ip_stack_t *);
#define ENCAP_TTL 64
static ipha_t multicast_encap_iphdr = {
IP_SIMPLE_HDR_VERSION,
0,
sizeof (ipha_t),
0,
0,
ENCAP_TTL, IPPROTO_ENCAP,
0,
};
#define ASSERT_MSG_TIME 3000000000
#define VIF_REFHOLD(vifp) { \
mutex_enter(&(vifp)->v_lock); \
(vifp)->v_refcnt++; \
mutex_exit(&(vifp)->v_lock); \
}
#define VIF_REFRELE_LOCKED(vifp) { \
(vifp)->v_refcnt--; \
if ((vifp)->v_refcnt == 0 && \
((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \
del_vifp(vifp); \
} else { \
mutex_exit(&(vifp)->v_lock); \
} \
}
#define VIF_REFRELE(vifp) { \
mutex_enter(&(vifp)->v_lock); \
(vifp)->v_refcnt--; \
if ((vifp)->v_refcnt == 0 && \
((vifp)->v_marks & VIF_MARK_CONDEMNED)) { \
del_vifp(vifp); \
} else { \
mutex_exit(&(vifp)->v_lock); \
} \
}
#define MFCB_REFHOLD(mfcb) { \
mutex_enter(&(mfcb)->mfcb_lock); \
(mfcb)->mfcb_refcnt++; \
ASSERT((mfcb)->mfcb_refcnt != 0); \
mutex_exit(&(mfcb)->mfcb_lock); \
}
#define MFCB_REFRELE(mfcb) { \
mutex_enter(&(mfcb)->mfcb_lock); \
ASSERT((mfcb)->mfcb_refcnt != 0); \
if (--(mfcb)->mfcb_refcnt == 0 && \
((mfcb)->mfcb_marks & MFCB_MARK_CONDEMNED)) { \
release_mfc(mfcb); \
} \
mutex_exit(&(mfcb)->mfcb_lock); \
}
#define MFCFIND(mfcbp, o, g, rt) { \
struct mfc *_mb_rt = NULL; \
rt = NULL; \
_mb_rt = mfcbp->mfcb_mfc; \
while (_mb_rt) { \
if ((_mb_rt->mfc_origin.s_addr == o) && \
(_mb_rt->mfc_mcastgrp.s_addr == g) && \
(_mb_rt->mfc_rte == NULL) && \
(!(_mb_rt->mfc_marks & MFCB_MARK_CONDEMNED))) { \
rt = _mb_rt; \
break; \
} \
_mb_rt = _mb_rt->mfc_next; \
} \
}
#define TV_DELTA(a, b, delta) { \
int xxs; \
\
delta = (a).tv_nsec - (b).tv_nsec; \
if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \
switch (xxs) { \
case 2: \
delta += 1000000000; \
\
case 1: \
delta += 1000000000; \
break; \
default: \
delta += (1000000000 * xxs); \
} \
} \
}
#define TV_LT(a, b) (((a).tv_nsec < (b).tv_nsec && \
(a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
int
ip_mrouter_set(int cmd, conn_t *connp, int checkonly, uchar_t *data,
int datalen)
{
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
if (cmd != MRT_INIT && connp != ipst->ips_ip_g_mrouter) {
mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
return (EACCES);
}
mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
if (checkonly) {
switch (cmd) {
case MRT_INIT:
case MRT_DONE:
case MRT_ADD_VIF:
case MRT_DEL_VIF:
case MRT_ADD_MFC:
case MRT_DEL_MFC:
case MRT_ASSERT:
return (0);
default:
return (EOPNOTSUPP);
}
}
if (cmd != MRT_INIT && cmd != MRT_DONE) {
if (is_mrouter_off(ipst))
return (EINVAL);
}
switch (cmd) {
case MRT_INIT: return (ip_mrouter_init(connp, data, datalen, ipst));
case MRT_DONE: return (ip_mrouter_done(ipst));
case MRT_ADD_VIF: return (add_vif((struct vifctl *)data, connp, ipst));
case MRT_DEL_VIF: return (del_vif((vifi_t *)data, ipst));
case MRT_ADD_MFC: return (add_mfc((struct mfcctl *)data, ipst));
case MRT_DEL_MFC: return (del_mfc((struct mfcctl *)data, ipst));
case MRT_ASSERT: return (set_assert((int *)data, ipst));
default: return (EOPNOTSUPP);
}
}
int
ip_mrouter_get(int cmd, conn_t *connp, uchar_t *data)
{
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
if (connp != ipst->ips_ip_g_mrouter)
return (EACCES);
switch (cmd) {
case MRT_VERSION: return (get_version((uchar_t *)data));
case MRT_ASSERT: return (get_assert((uchar_t *)data, ipst));
default: return (EOPNOTSUPP);
}
}
int
mrt_ioctl(ipif_t *ipif, sin_t *sin, queue_t *q, mblk_t *mp,
ip_ioctl_cmd_t *ipip, void *if_req)
{
mblk_t *mp1;
struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
conn_t *connp = Q_TO_CONN(q);
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
mp1 = mp->b_cont->b_cont;
switch (iocp->ioc_cmd) {
case (SIOCGETVIFCNT):
return (get_vif_cnt((struct sioc_vif_req *)mp1->b_rptr, ipst));
case (SIOCGETSGCNT):
return (get_sg_cnt((struct sioc_sg_req *)mp1->b_rptr, ipst));
case (SIOCGETLSGCNT):
return (get_lsg_cnt((struct sioc_lsg_req *)mp1->b_rptr, ipst));
default:
return (EINVAL);
}
}
static int
get_sg_cnt(struct sioc_sg_req *req, ip_stack_t *ipst)
{
struct mfc *rt;
struct mfcb *mfcbp;
mfcbp = &ipst->ips_mfcs[MFCHASH(req->src.s_addr, req->grp.s_addr)];
MFCB_REFHOLD(mfcbp);
MFCFIND(mfcbp, req->src.s_addr, req->grp.s_addr, rt);
if (rt != NULL) {
mutex_enter(&rt->mfc_mutex);
req->pktcnt = rt->mfc_pkt_cnt;
req->bytecnt = rt->mfc_byte_cnt;
req->wrong_if = rt->mfc_wrong_if;
mutex_exit(&rt->mfc_mutex);
} else
req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffffU;
MFCB_REFRELE(mfcbp);
return (0);
}
static int
get_lsg_cnt(struct sioc_lsg_req *req, ip_stack_t *ipst)
{
return (ENXIO);
}
static int
get_vif_cnt(struct sioc_vif_req *req, ip_stack_t *ipst)
{
vifi_t vifi = req->vifi;
if (vifi >= ipst->ips_numvifs)
return (EINVAL);
req->icount = ipst->ips_vifs[vifi].v_pkt_in;
req->ocount = ipst->ips_vifs[vifi].v_pkt_out;
req->ibytes = ipst->ips_vifs[vifi].v_bytes_in;
req->obytes = ipst->ips_vifs[vifi].v_bytes_out;
return (0);
}
static int
get_version(uchar_t *data)
{
int *v = (int *)data;
*v = 0x0305;
return (0);
}
static int
set_assert(int *i, ip_stack_t *ipst)
{
if ((*i != 1) && (*i != 0))
return (EINVAL);
ipst->ips_pim_assert = *i;
return (0);
}
static int
get_assert(uchar_t *data, ip_stack_t *ipst)
{
int *i = (int *)data;
*i = ipst->ips_pim_assert;
return (0);
}
static int
ip_mrouter_init(conn_t *connp, uchar_t *data, int datalen, ip_stack_t *ipst)
{
int *v;
if (data == NULL || (datalen != sizeof (int)))
return (ENOPROTOOPT);
v = (int *)data;
if (*v != 1)
return (ENOPROTOOPT);
mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
if (ipst->ips_ip_g_mrouter != NULL) {
mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
return (EADDRINUSE);
}
if (!IPCL_IS_RAWIP(connp)) {
mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
return (EINVAL);
}
ipst->ips_ip_g_mrouter = connp;
connp->conn_multi_router = 1;
if (!WE_ARE_FORWARDING(ipst)) {
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(connp->conn_rq, 1, SL_TRACE,
"ip_mrouter_init: turning on forwarding");
}
ipst->ips_saved_ip_forwarding = ipst->ips_ip_forwarding;
ipst->ips_ip_forwarding = IP_FORWARD_ALWAYS;
}
mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
return (0);
}
void
ip_mrouter_stack_init(ip_stack_t *ipst)
{
mutex_init(&ipst->ips_ip_g_mrouter_mutex, NULL, MUTEX_DEFAULT, NULL);
ipst->ips_vifs = kmem_zalloc(sizeof (struct vif) * (MAXVIFS+1),
KM_SLEEP);
ipst->ips_mrtstat = kmem_zalloc(sizeof (struct mrtstat), KM_SLEEP);
ipst->ips_mfcs = kmem_zalloc(sizeof (struct mfcb) * MFCTBLSIZ,
KM_SLEEP);
ipst->ips_tbfs = kmem_zalloc(sizeof (struct tbf) * MAXVIFS, KM_SLEEP);
mutex_init(&ipst->ips_last_encap_lock, NULL, MUTEX_DEFAULT, NULL);
ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl);
ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl);
}
int
ip_mrouter_done(ip_stack_t *ipst)
{
conn_t *mrouter;
vifi_t vifi;
struct mfc *mfc_rt;
int i;
mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
if (ipst->ips_ip_g_mrouter == NULL) {
mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
return (EINVAL);
}
mrouter = ipst->ips_ip_g_mrouter;
if (ipst->ips_saved_ip_forwarding != -1) {
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"ip_mrouter_done: turning off forwarding");
}
ipst->ips_ip_forwarding = ipst->ips_saved_ip_forwarding;
ipst->ips_saved_ip_forwarding = -1;
}
mutex_enter(&ipst->ips_last_encap_lock);
ipst->ips_last_encap_src = 0;
ipst->ips_last_encap_vif = NULL;
mutex_exit(&ipst->ips_last_encap_lock);
mrouter->conn_multi_router = 0;
mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
for (vifi = 0; vifi < MAXVIFS; vifi++) {
struct vif *vifp = ipst->ips_vifs + vifi;
mutex_enter(&vifp->v_lock);
if (vifp->v_marks & VIF_MARK_GOOD) {
ASSERT(vifp->v_ipif != NULL);
ipif_refhold(vifp->v_ipif);
if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
ipif_t *ipif = vifp->v_ipif;
ilm_t *ilm = vifp->v_ilm;
vifp->v_ilm = NULL;
vifp->v_marks &= ~VIF_MARK_GOOD;
vifp->v_marks |= VIF_MARK_CONDEMNED;
mutex_exit(&(vifp)->v_lock);
if (ilm != NULL) {
ill_t *ill = ipif->ipif_ill;
(void) ip_delmulti(ilm);
ASSERT(ill->ill_mrouter_cnt > 0);
atomic_dec_32(&ill->ill_mrouter_cnt);
}
mutex_enter(&vifp->v_lock);
}
ipif_refrele(vifp->v_ipif);
VIF_REFRELE_LOCKED(vifp);
} else {
mutex_exit(&vifp->v_lock);
continue;
}
}
mutex_enter(&ipst->ips_numvifs_mutex);
ipst->ips_numvifs = 0;
ipst->ips_pim_assert = 0;
ipst->ips_reg_vif_num = ALL_VIFS;
mutex_exit(&ipst->ips_numvifs_mutex);
for (i = 0; i < MFCTBLSIZ; i++) {
mutex_enter(&ipst->ips_mfcs[i].mfcb_lock);
ipst->ips_mfcs[i].mfcb_refcnt++;
ipst->ips_mfcs[i].mfcb_marks |= MFCB_MARK_CONDEMNED;
mutex_exit(&ipst->ips_mfcs[i].mfcb_lock);
mfc_rt = ipst->ips_mfcs[i].mfcb_mfc;
while (mfc_rt) {
mutex_enter(&mfc_rt->mfc_mutex);
if (mfc_rt->mfc_rte != NULL) {
if (mfc_rt->mfc_timeout_id != 0) {
mfc_rt->mfc_timeout_id = 0;
mutex_exit(&mfc_rt->mfc_mutex);
(void) untimeout(
mfc_rt->mfc_timeout_id);
mfc_rt->mfc_timeout_id = 0;
mutex_enter(&mfc_rt->mfc_mutex);
}
}
mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED;
mutex_exit(&mfc_rt->mfc_mutex);
mfc_rt = mfc_rt->mfc_next;
}
MFCB_REFRELE(&ipst->ips_mfcs[i]);
}
mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
ipst->ips_ip_g_mrouter = NULL;
mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
return (0);
}
void
ip_mrouter_stack_destroy(ip_stack_t *ipst)
{
struct mfcb *mfcbp;
struct mfc *rt;
int i;
for (i = 0; i < MFCTBLSIZ; i++) {
mfcbp = &ipst->ips_mfcs[i];
while ((rt = mfcbp->mfcb_mfc) != NULL) {
(void) printf("ip_mrouter_stack_destroy: free for %d\n",
i);
mfcbp->mfcb_mfc = rt->mfc_next;
free_queue(rt);
mi_free(rt);
}
}
kmem_free(ipst->ips_vifs, sizeof (struct vif) * (MAXVIFS+1));
ipst->ips_vifs = NULL;
kmem_free(ipst->ips_mrtstat, sizeof (struct mrtstat));
ipst->ips_mrtstat = NULL;
kmem_free(ipst->ips_mfcs, sizeof (struct mfcb) * MFCTBLSIZ);
ipst->ips_mfcs = NULL;
kmem_free(ipst->ips_tbfs, sizeof (struct tbf) * MAXVIFS);
ipst->ips_tbfs = NULL;
mutex_destroy(&ipst->ips_last_encap_lock);
mutex_destroy(&ipst->ips_ip_g_mrouter_mutex);
}
static boolean_t
is_mrouter_off(ip_stack_t *ipst)
{
conn_t *mrouter;
mutex_enter(&ipst->ips_ip_g_mrouter_mutex);
if (ipst->ips_ip_g_mrouter == NULL) {
mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
return (B_TRUE);
}
mrouter = ipst->ips_ip_g_mrouter;
if (mrouter->conn_multi_router == 0) {
mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
return (B_TRUE);
}
mutex_exit(&ipst->ips_ip_g_mrouter_mutex);
return (B_FALSE);
}
static void
unlock_good_vif(struct vif *vifp)
{
ASSERT(vifp->v_ipif != NULL);
ipif_refrele(vifp->v_ipif);
VIF_REFRELE(vifp);
}
static boolean_t
lock_good_vif(struct vif *vifp)
{
mutex_enter(&vifp->v_lock);
if (!(vifp->v_marks & VIF_MARK_GOOD)) {
mutex_exit(&vifp->v_lock);
return (B_FALSE);
}
ASSERT(vifp->v_ipif != NULL);
mutex_enter(&vifp->v_ipif->ipif_ill->ill_lock);
if (!IPIF_CAN_LOOKUP(vifp->v_ipif)) {
mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock);
mutex_exit(&vifp->v_lock);
return (B_FALSE);
}
ipif_refhold_locked(vifp->v_ipif);
mutex_exit(&vifp->v_ipif->ipif_ill->ill_lock);
vifp->v_refcnt++;
mutex_exit(&vifp->v_lock);
return (B_TRUE);
}
static int
add_vif(struct vifctl *vifcp, conn_t *connp, ip_stack_t *ipst)
{
struct vif *vifp = ipst->ips_vifs + vifcp->vifc_vifi;
ipif_t *ipif;
int error = 0;
struct tbf *v_tbf = ipst->ips_tbfs + vifcp->vifc_vifi;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
ilm_t *ilm;
ill_t *ill;
ASSERT(connp != NULL);
if (vifcp->vifc_vifi >= MAXVIFS)
return (EINVAL);
if (is_mrouter_off(ipst))
return (EINVAL);
mutex_enter(&vifp->v_lock);
if (vifp->v_lcl_addr.s_addr != 0 ||
vifp->v_marks != 0 ||
vifp->v_refcnt != 0) {
mutex_exit(&vifp->v_lock);
return (EADDRINUSE);
}
if (vifcp->vifc_lcl_addr.s_addr == 0) {
mutex_exit(&vifp->v_lock);
return (EINVAL);
}
vifp->v_refcnt++;
mutex_exit(&vifp->v_lock);
ipif = ipif_lookup_addr((ipaddr_t)vifcp->vifc_lcl_addr.s_addr, NULL,
IPCL_ZONEID(connp), ipst);
if (ipif == NULL) {
VIF_REFRELE(vifp);
return (EADDRNOTAVAIL);
}
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"add_vif: src 0x%x enter",
vifcp->vifc_lcl_addr.s_addr);
}
mutex_enter(&vifp->v_lock);
mutex_enter(&ipst->ips_last_encap_lock);
ipst->ips_last_encap_src = 0;
ipst->ips_last_encap_vif = NULL;
mutex_exit(&ipst->ips_last_encap_lock);
if (vifcp->vifc_flags & VIFF_TUNNEL) {
if ((vifcp->vifc_flags & VIFF_SRCRT) != 0) {
cmn_err(CE_WARN,
"add_vif: source route tunnels not supported\n");
VIF_REFRELE_LOCKED(vifp);
ipif_refrele(ipif);
return (EOPNOTSUPP);
}
vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
} else {
if (vifcp->vifc_flags & VIFF_REGISTER) {
mutex_enter(&ipst->ips_numvifs_mutex);
if (ipst->ips_reg_vif_num == ALL_VIFS) {
ipst->ips_reg_vif_num = vifcp->vifc_vifi;
mutex_exit(&ipst->ips_numvifs_mutex);
} else {
mutex_exit(&ipst->ips_numvifs_mutex);
VIF_REFRELE_LOCKED(vifp);
ipif_refrele(ipif);
return (EADDRINUSE);
}
}
if ((ipif->ipif_ill->ill_flags & ILLF_MULTICAST) == 0) {
VIF_REFRELE_LOCKED(vifp);
ipif_refrele(ipif);
if (vifcp->vifc_flags & VIFF_REGISTER) {
mutex_enter(&ipst->ips_numvifs_mutex);
ipst->ips_reg_vif_num = ALL_VIFS;
mutex_exit(&ipst->ips_numvifs_mutex);
}
return (EOPNOTSUPP);
}
mutex_exit(&vifp->v_lock);
ill = ipif->ipif_ill;
if (IS_UNDER_IPMP(ill))
ill = ipmp_ill_hold_ipmp_ill(ill);
if (ill == NULL) {
ilm = NULL;
} else {
ilm = ip_addmulti(&ipv6_all_zeros, ill,
ipif->ipif_zoneid, &error);
if (ilm != NULL)
atomic_inc_32(&ill->ill_mrouter_cnt);
if (IS_UNDER_IPMP(ipif->ipif_ill)) {
ill_refrele(ill);
ill = ipif->ipif_ill;
}
}
mutex_enter(&vifp->v_lock);
if (ilm == NULL || is_mrouter_off(ipst)) {
if (ilm != NULL) {
(void) ip_delmulti(ilm);
ASSERT(ill->ill_mrouter_cnt > 0);
atomic_dec_32(&ill->ill_mrouter_cnt);
}
if (vifcp->vifc_flags & VIFF_REGISTER) {
mutex_enter(&ipst->ips_numvifs_mutex);
ipst->ips_reg_vif_num = ALL_VIFS;
mutex_exit(&ipst->ips_numvifs_mutex);
}
VIF_REFRELE_LOCKED(vifp);
ipif_refrele(ipif);
return (error?error:EINVAL);
}
vifp->v_ilm = ilm;
}
vifp->v_tbf = v_tbf;
gethrestime(&vifp->v_tbf->tbf_last_pkt_t);
vifp->v_tbf->tbf_n_tok = 0;
vifp->v_tbf->tbf_q_len = 0;
vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
vifp->v_flags = vifcp->vifc_flags;
vifp->v_threshold = vifcp->vifc_threshold;
vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
vifp->v_ipif = ipif;
ipif_refrele(ipif);
vifp->v_rate_limit = vifcp->vifc_rate_limit * (1024/1000);
vifp->v_timeout_id = 0;
vifp->v_pkt_in = 0;
vifp->v_pkt_out = 0;
vifp->v_bytes_in = 0;
vifp->v_bytes_out = 0;
mutex_init(&vifp->v_tbf->tbf_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_enter(&ipst->ips_numvifs_mutex);
if (ipst->ips_numvifs <= vifcp->vifc_vifi)
ipst->ips_numvifs = vifcp->vifc_vifi + 1;
mutex_exit(&ipst->ips_numvifs_mutex);
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"add_vif: #%d, lcladdr %x, %s %x, thresh %x, rate %d",
vifcp->vifc_vifi,
ntohl(vifcp->vifc_lcl_addr.s_addr),
(vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
ntohl(vifcp->vifc_rmt_addr.s_addr),
vifcp->vifc_threshold, vifcp->vifc_rate_limit);
}
vifp->v_marks = VIF_MARK_GOOD;
mutex_exit(&vifp->v_lock);
return (0);
}
static void
del_vifp(struct vif *vifp)
{
struct tbf *t = vifp->v_tbf;
mblk_t *mp0;
vifi_t vifi;
ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
ASSERT(vifp->v_marks & VIF_MARK_CONDEMNED);
ASSERT(t != NULL);
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"del_vif: src 0x%x\n", vifp->v_lcl_addr.s_addr);
}
if (vifp->v_timeout_id != 0) {
(void) untimeout(vifp->v_timeout_id);
vifp->v_timeout_id = 0;
}
mutex_enter(&t->tbf_lock);
while (t->tbf_q != NULL) {
mp0 = t->tbf_q;
t->tbf_q = t->tbf_q->b_next;
mp0->b_prev = mp0->b_next = NULL;
freemsg(mp0);
}
mutex_exit(&t->tbf_lock);
mutex_enter(&ipst->ips_last_encap_lock);
if (vifp == ipst->ips_last_encap_vif) {
ipst->ips_last_encap_vif = NULL;
ipst->ips_last_encap_src = 0;
}
mutex_exit(&ipst->ips_last_encap_lock);
mutex_destroy(&t->tbf_lock);
bzero(vifp->v_tbf, sizeof (*(vifp->v_tbf)));
mutex_enter(&ipst->ips_numvifs_mutex);
for (vifi = ipst->ips_numvifs; vifi != 0; vifi--)
if (ipst->ips_vifs[vifi - 1].v_lcl_addr.s_addr != 0)
break;
ipst->ips_numvifs = vifi;
mutex_exit(&ipst->ips_numvifs_mutex);
bzero(vifp, sizeof (*vifp));
}
static int
del_vif(vifi_t *vifip, ip_stack_t *ipst)
{
struct vif *vifp = ipst->ips_vifs + *vifip;
if (*vifip >= ipst->ips_numvifs)
return (EINVAL);
mutex_enter(&vifp->v_lock);
if (vifp->v_lcl_addr.s_addr == 0 ||
!(vifp->v_marks & VIF_MARK_GOOD)) {
mutex_exit(&vifp->v_lock);
return (EADDRNOTAVAIL);
}
vifp->v_marks &= ~VIF_MARK_GOOD;
vifp->v_marks |= VIF_MARK_CONDEMNED;
if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
ipif_t *ipif = vifp->v_ipif;
ilm_t *ilm = vifp->v_ilm;
vifp->v_ilm = NULL;
ASSERT(ipif != NULL);
mutex_exit(&(vifp)->v_lock);
if (ilm != NULL) {
(void) ip_delmulti(ilm);
ASSERT(ipif->ipif_ill->ill_mrouter_cnt > 0);
atomic_dec_32(&ipif->ipif_ill->ill_mrouter_cnt);
}
mutex_enter(&(vifp)->v_lock);
}
if (vifp->v_flags & VIFF_REGISTER) {
mutex_enter(&ipst->ips_numvifs_mutex);
ipst->ips_reg_vif_num = ALL_VIFS;
mutex_exit(&ipst->ips_numvifs_mutex);
}
VIF_REFRELE_LOCKED(vifp);
return (0);
}
static int
add_mfc(struct mfcctl *mfccp, ip_stack_t *ipst)
{
struct mfc *rt;
struct rtdetq *rte;
ushort_t nstl;
int i;
struct mfcb *mfcbp;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
if (mfccp->mfcc_parent > MAXVIFS) {
ip0dbg(("ADD_MFC: mfcc_parent out of range %d",
(int)mfccp->mfcc_parent));
return (EINVAL);
}
if ((mfccp->mfcc_parent != NO_VIF) &&
(ipst->ips_vifs[mfccp->mfcc_parent].v_ipif == NULL)) {
ip0dbg(("ADD_MFC: NULL ipif for parent vif %d\n",
(int)mfccp->mfcc_parent));
return (EINVAL);
}
if (is_mrouter_off(ipst)) {
return (EINVAL);
}
mfcbp = &ipst->ips_mfcs[MFCHASH(mfccp->mfcc_origin.s_addr,
mfccp->mfcc_mcastgrp.s_addr)];
MFCB_REFHOLD(mfcbp);
MFCFIND(mfcbp, mfccp->mfcc_origin.s_addr,
mfccp->mfcc_mcastgrp.s_addr, rt);
if (rt) {
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"add_mfc: update o %x grp %x parent %x",
ntohl(mfccp->mfcc_origin.s_addr),
ntohl(mfccp->mfcc_mcastgrp.s_addr),
mfccp->mfcc_parent);
}
mutex_enter(&rt->mfc_mutex);
rt->mfc_parent = mfccp->mfcc_parent;
mutex_enter(&ipst->ips_numvifs_mutex);
for (i = 0; i < (int)ipst->ips_numvifs; i++)
rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
mutex_exit(&ipst->ips_numvifs_mutex);
mutex_exit(&rt->mfc_mutex);
MFCB_REFRELE(mfcbp);
return (0);
}
for (rt = mfcbp->mfcb_mfc, nstl = 0; rt; rt = rt->mfc_next) {
mutex_enter(&rt->mfc_mutex);
if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
(rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
(rt->mfc_rte != NULL) &&
!(rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
if (nstl++ != 0)
cmn_err(CE_WARN,
"add_mfc: %s o %x g %x p %x",
"multiple kernel entries",
ntohl(mfccp->mfcc_origin.s_addr),
ntohl(mfccp->mfcc_mcastgrp.s_addr),
mfccp->mfcc_parent);
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1,
SL_TRACE,
"add_mfc: o %x g %x p %x",
ntohl(mfccp->mfcc_origin.s_addr),
ntohl(mfccp->mfcc_mcastgrp.s_addr),
mfccp->mfcc_parent);
}
fill_route(rt, mfccp, ipst);
if (rt->mfc_timeout_id != 0) {
timeout_id_t id;
id = rt->mfc_timeout_id;
rt->mfc_timeout_id = 0;
mutex_exit(&rt->mfc_mutex);
(void) untimeout(id);
mutex_enter(&rt->mfc_mutex);
}
while (rt->mfc_rte != NULL) {
rte = rt->mfc_rte;
rt->mfc_rte = rte->rte_next;
mutex_exit(&rt->mfc_mutex);
(void) ip_mdq(rte->mp, (ipha_t *)
rte->mp->b_rptr, rte->ill, 0, rt);
freemsg(rte->mp);
mi_free((char *)rte);
mutex_enter(&rt->mfc_mutex);
}
}
mutex_exit(&rt->mfc_mutex);
}
if (nstl == 0) {
mutex_enter(&(mfcbp->mfcb_lock));
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"add_mfc: no upcall o %x g %x p %x",
ntohl(mfccp->mfcc_origin.s_addr),
ntohl(mfccp->mfcc_mcastgrp.s_addr),
mfccp->mfcc_parent);
}
if (is_mrouter_off(ipst)) {
mutex_exit(&mfcbp->mfcb_lock);
MFCB_REFRELE(mfcbp);
return (EINVAL);
}
for (rt = mfcbp->mfcb_mfc; rt; rt = rt->mfc_next) {
mutex_enter(&rt->mfc_mutex);
if ((rt->mfc_origin.s_addr ==
mfccp->mfcc_origin.s_addr) &&
(rt->mfc_mcastgrp.s_addr ==
mfccp->mfcc_mcastgrp.s_addr) &&
(!(rt->mfc_marks & MFCB_MARK_CONDEMNED))) {
fill_route(rt, mfccp, ipst);
mutex_exit(&rt->mfc_mutex);
break;
}
mutex_exit(&rt->mfc_mutex);
}
if (rt == NULL) {
rt = (struct mfc *)mi_zalloc(sizeof (struct mfc));
if (rt == NULL) {
ip1dbg(("add_mfc: out of memory\n"));
mutex_exit(&mfcbp->mfcb_lock);
MFCB_REFRELE(mfcbp);
return (ENOBUFS);
}
mutex_enter(&rt->mfc_mutex);
fill_route(rt, mfccp, ipst);
rt->mfc_next = mfcbp->mfcb_mfc;
mfcbp->mfcb_mfc = rt;
mutex_exit(&rt->mfc_mutex);
}
mutex_exit(&mfcbp->mfcb_lock);
}
MFCB_REFRELE(mfcbp);
return (0);
}
static void
fill_route(struct mfc *rt, struct mfcctl *mfccp, ip_stack_t *ipst)
{
int i;
rt->mfc_origin = mfccp->mfcc_origin;
rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;
rt->mfc_parent = mfccp->mfcc_parent;
mutex_enter(&ipst->ips_numvifs_mutex);
for (i = 0; i < (int)ipst->ips_numvifs; i++) {
rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
}
mutex_exit(&ipst->ips_numvifs_mutex);
rt->mfc_pkt_cnt = 0;
rt->mfc_byte_cnt = 0;
rt->mfc_wrong_if = 0;
rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_nsec = 0;
}
static void
free_queue(struct mfc *mfcp)
{
struct rtdetq *rte0;
while ((rte0 = mfcp->mfc_rte) != NULL) {
mfcp->mfc_rte = rte0->rte_next;
freemsg(rte0->mp);
mi_free((char *)rte0);
}
}
void
release_mfc(struct mfcb *mfcbp)
{
struct mfc *current_mfcp;
struct mfc *prev_mfcp;
prev_mfcp = current_mfcp = mfcbp->mfcb_mfc;
while (current_mfcp != NULL) {
if (current_mfcp->mfc_marks & MFCB_MARK_CONDEMNED) {
if (current_mfcp == mfcbp->mfcb_mfc) {
mfcbp->mfcb_mfc = current_mfcp->mfc_next;
free_queue(current_mfcp);
mi_free(current_mfcp);
prev_mfcp = current_mfcp = mfcbp->mfcb_mfc;
continue;
}
ASSERT(prev_mfcp != NULL);
prev_mfcp->mfc_next = current_mfcp->mfc_next;
free_queue(current_mfcp);
mi_free(current_mfcp);
current_mfcp = NULL;
} else {
prev_mfcp = current_mfcp;
}
current_mfcp = prev_mfcp->mfc_next;
}
mfcbp->mfcb_marks &= ~MFCB_MARK_CONDEMNED;
ASSERT(mfcbp->mfcb_mfc != NULL || mfcbp->mfcb_marks == 0);
}
static int
del_mfc(struct mfcctl *mfccp, ip_stack_t *ipst)
{
struct in_addr origin;
struct in_addr mcastgrp;
struct mfc *rt;
uint_t hash;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
origin = mfccp->mfcc_origin;
mcastgrp = mfccp->mfcc_mcastgrp;
hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"del_mfc: o %x g %x",
ntohl(origin.s_addr),
ntohl(mcastgrp.s_addr));
}
MFCB_REFHOLD(&ipst->ips_mfcs[hash]);
for (rt = ipst->ips_mfcs[hash].mfcb_mfc; rt; rt = rt->mfc_next) {
mutex_enter(&rt->mfc_mutex);
if (origin.s_addr == rt->mfc_origin.s_addr &&
mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
rt->mfc_rte == NULL &&
!(rt->mfc_marks & MFCB_MARK_CONDEMNED))
break;
mutex_exit(&rt->mfc_mutex);
}
if (rt == NULL) {
MFCB_REFRELE(&ipst->ips_mfcs[hash]);
return (EADDRNOTAVAIL);
}
ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED;
if (rt->mfc_timeout_id != 0) {
ip0dbg(("del_mfc: TIMEOUT NOT 0, rte not null"));
rt->mfc_timeout_id = 0;
mutex_exit(&rt->mfc_mutex);
(void) untimeout(rt->mfc_timeout_id);
mutex_enter(&rt->mfc_mutex);
}
ASSERT(rt->mfc_rte == NULL);
rt->mfc_marks |= MFCB_MARK_CONDEMNED;
mutex_exit(&rt->mfc_mutex);
MFCB_REFRELE(&ipst->ips_mfcs[hash]);
return (0);
}
#define TUNNEL_LEN 12
int
ip_mforward(mblk_t *mp, ip_recv_attr_t *ira)
{
ipha_t *ipha = (ipha_t *)mp->b_rptr;
ill_t *ill = ira->ira_ill;
struct mfc *rt;
ipaddr_t src, dst, tunnel_src = 0;
static int srctun = 0;
vifi_t vifi;
boolean_t pim_reg_packet = B_FALSE;
struct mfcb *mfcbp;
ip_stack_t *ipst = ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
ill_t *rill = ira->ira_rill;
ASSERT(ira->ira_pktlen == msgdsize(mp));
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"ip_mforward: RECV ipha_src %x, ipha_dst %x, ill %s",
ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst),
ill->ill_name);
}
dst = ipha->ipha_dst;
if (ira->ira_flags & IRAF_PIM_REGISTER)
pim_reg_packet = B_TRUE;
else if (ira->ira_flags & IRAF_MROUTE_TUNNEL_SET)
tunnel_src = ira->ira_mroute_tunnel;
if (CLASSD(dst) && (ipha->ipha_ttl <= 1 ||
(ipaddr_t)ntohl(dst) <= INADDR_MAX_LOCAL_GROUP)) {
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"ip_mforward: not forwarded ttl %d,"
" dst 0x%x ill %s",
ipha->ipha_ttl, ntohl(dst), ill->ill_name);
}
if (tunnel_src != 0)
return (1);
else
return (0);
}
if ((tunnel_src != 0) || pim_reg_packet) {
if (ipst->ips_ip_mrtdebug > 1) {
if (tunnel_src != 0) {
(void) mi_strlog(mrouter->conn_rq, 1,
SL_TRACE,
"ip_mforward: ill %s arrived via ENCAP TUN",
ill->ill_name);
} else if (pim_reg_packet) {
(void) mi_strlog(mrouter->conn_rq, 1,
SL_TRACE,
"ip_mforward: ill %s arrived via"
" REGISTER VIF",
ill->ill_name);
}
}
} else if ((ipha->ipha_version_and_hdr_length & 0xf) <
(uint_t)(IP_SIMPLE_HDR_LENGTH + TUNNEL_LEN) >> 2 ||
((uchar_t *)(ipha + 1))[1] != IPOPT_LSRR) {
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"ip_mforward: ill %s arrived via PHYINT",
ill->ill_name);
}
} else {
if ((srctun++ % 1000) == 0) {
cmn_err(CE_WARN,
"ip_mforward: received source-routed pkt from %x",
ntohl(ipha->ipha_src));
}
return (-1);
}
ipst->ips_mrtstat->mrts_fwd_in++;
src = ipha->ipha_src;
if (is_mrouter_off(ipst))
return (-1);
mfcbp = &ipst->ips_mfcs[MFCHASH(src, dst)];
MFCB_REFHOLD(mfcbp);
MFCFIND(mfcbp, src, dst, rt);
if (rt != NULL) {
int ret = 0;
ipst->ips_mrtstat->mrts_mfc_hits++;
if (pim_reg_packet) {
ASSERT(ipst->ips_reg_vif_num != ALL_VIFS);
ret = ip_mdq(mp, ipha,
ipst->ips_vifs[ipst->ips_reg_vif_num].
v_ipif->ipif_ill,
0, rt);
} else {
ret = ip_mdq(mp, ipha, ill, tunnel_src, rt);
}
MFCB_REFRELE(mfcbp);
return (ret);
} else {
struct mfc *mfc_rt = NULL;
mblk_t *mp0 = NULL;
mblk_t *mp_copy = NULL;
struct rtdetq *rte = NULL;
struct rtdetq *rte_m, *rte1, *prev_rte;
uint_t hash;
int npkts;
boolean_t new_mfc = B_FALSE;
ipst->ips_mrtstat->mrts_mfc_misses++;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"ip_mforward: no rte ill %s src %x g %x misses %d",
ill->ill_name, ntohl(src), ntohl(dst),
(int)ipst->ips_mrtstat->mrts_mfc_misses);
}
hash = MFCHASH(src, dst);
mutex_enter(&(ipst->ips_mfcs[hash].mfcb_lock));
if (is_mrouter_off(ipst)) {
mutex_exit(&mfcbp->mfcb_lock);
MFCB_REFRELE(mfcbp);
return (-1);
}
for (mfc_rt = ipst->ips_mfcs[hash].mfcb_mfc; mfc_rt;
mfc_rt = mfc_rt->mfc_next) {
mutex_enter(&mfc_rt->mfc_mutex);
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1,
SL_TRACE,
"ip_mforward: MFCTAB hash %d o 0x%x"
" g 0x%x\n",
hash, ntohl(mfc_rt->mfc_origin.s_addr),
ntohl(mfc_rt->mfc_mcastgrp.s_addr));
}
if ((src == mfc_rt->mfc_origin.s_addr) &&
(dst == mfc_rt->mfc_mcastgrp.s_addr) &&
(mfc_rt->mfc_rte != NULL) &&
!(mfc_rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
break;
}
mutex_exit(&mfc_rt->mfc_mutex);
}
if (mfc_rt == NULL) {
mfc_rt = (struct mfc *)mi_zalloc(sizeof (struct mfc));
if (mfc_rt == NULL) {
ipst->ips_mrtstat->mrts_fwd_drop++;
ip1dbg(("ip_mforward: out of memory "
"for mfc, mfc_rt\n"));
goto error_return;
} else
new_mfc = B_TRUE;
mp_copy = copymsg(mp);
if (mp_copy == NULL) {
ipst->ips_mrtstat->mrts_fwd_drop++;
ip1dbg(("ip_mforward: out of memory for "
"mblk, mp_copy\n"));
goto error_return;
}
mutex_enter(&mfc_rt->mfc_mutex);
}
rte = (struct rtdetq *)mi_zalloc(sizeof (struct rtdetq));
if (rte == NULL) {
ipst->ips_mrtstat->mrts_fwd_drop++;
mutex_exit(&mfc_rt->mfc_mutex);
ip1dbg(("ip_mforward: out of memory for"
" rtdetq, rte\n"));
goto error_return;
}
mp0 = copymsg(mp);
if (mp0 == NULL) {
ipst->ips_mrtstat->mrts_fwd_drop++;
ip1dbg(("ip_mforward: out of memory for mblk, mp0\n"));
mutex_exit(&mfc_rt->mfc_mutex);
goto error_return;
}
rte->mp = mp0;
if (pim_reg_packet) {
ASSERT(ipst->ips_reg_vif_num != ALL_VIFS);
rte->ill =
ipst->ips_vifs[ipst->ips_reg_vif_num].
v_ipif->ipif_ill;
} else {
rte->ill = ill;
}
rte->rte_next = NULL;
for (rte_m = mfc_rt->mfc_rte, npkts = 0; rte_m;
rte_m = rte_m->rte_next)
npkts++;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"ip_mforward: upcalls %d\n", npkts);
}
if (npkts > MAX_UPQ) {
ipst->ips_mrtstat->mrts_upq_ovflw++;
mutex_exit(&mfc_rt->mfc_mutex);
goto error_return;
}
if (npkts == 0) {
int i = 0;
mfc_rt->mfc_origin.s_addr = src;
mfc_rt->mfc_mcastgrp.s_addr = dst;
mutex_enter(&ipst->ips_numvifs_mutex);
for (i = 0; i < (int)ipst->ips_numvifs; i++)
mfc_rt->mfc_ttls[i] = 0;
mutex_exit(&ipst->ips_numvifs_mutex);
mfc_rt->mfc_parent = ALL_VIFS;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1,
SL_TRACE,
"ip_mforward: NEW MFCTAB hash %d o 0x%x "
"g 0x%x\n", hash,
ntohl(mfc_rt->mfc_origin.s_addr),
ntohl(mfc_rt->mfc_mcastgrp.s_addr));
}
mfc_rt->mfc_next = ipst->ips_mfcs[hash].mfcb_mfc;
ipst->ips_mfcs[hash].mfcb_mfc = mfc_rt;
mfc_rt->mfc_rte = NULL;
}
if (mfc_rt->mfc_rte == NULL)
mfc_rt->mfc_rte = rte;
else {
prev_rte = mfc_rt->mfc_rte;
for (rte1 = mfc_rt->mfc_rte->rte_next; rte1;
prev_rte = rte1, rte1 = rte1->rte_next)
;
prev_rte->rte_next = rte;
}
if (npkts == 0) {
struct igmpmsg *im;
ASSERT(mp_copy != NULL);
im = (struct igmpmsg *)mp_copy->b_rptr;
im->im_msgtype = IGMPMSG_NOCACHE;
im->im_mbz = 0;
mutex_enter(&ipst->ips_numvifs_mutex);
if (pim_reg_packet) {
im->im_vif = (uchar_t)ipst->ips_reg_vif_num;
mutex_exit(&ipst->ips_numvifs_mutex);
} else {
for (vifi = 0;
vifi < ipst->ips_numvifs;
vifi++) {
if (ipst->ips_vifs[vifi].v_ipif == NULL)
continue;
if (ipst->ips_vifs[vifi].
v_ipif->ipif_ill == ill) {
im->im_vif = (uchar_t)vifi;
break;
}
}
mutex_exit(&ipst->ips_numvifs_mutex);
ASSERT(vifi < ipst->ips_numvifs);
}
ipst->ips_mrtstat->mrts_upcalls++;
mfc_rt->mfc_timeout_id = timeout(expire_upcalls,
mfc_rt, EXPIRE_TIMEOUT * UPCALL_EXPIRE);
mutex_exit(&mfc_rt->mfc_mutex);
mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
ira->ira_ill = ira->ira_rill = NULL;
(mrouter->conn_recv)(mrouter, mp_copy, NULL, ira);
ira->ira_ill = ill;
ira->ira_rill = rill;
} else {
mutex_exit(&mfc_rt->mfc_mutex);
mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("ip_mforward - upcall already waiting",
mp_copy, ill);
freemsg(mp_copy);
}
MFCB_REFRELE(mfcbp);
if (tunnel_src != 0)
return (1);
else
return (0);
error_return:
mutex_exit(&(ipst->ips_mfcs[hash].mfcb_lock));
MFCB_REFRELE(mfcbp);
if (mfc_rt != NULL && (new_mfc == B_TRUE))
mi_free((char *)mfc_rt);
if (rte != NULL)
mi_free((char *)rte);
if (mp_copy != NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("ip_mforward error", mp_copy, ill);
freemsg(mp_copy);
}
if (mp0 != NULL)
freemsg(mp0);
return (-1);
}
}
static void
expire_upcalls(void *arg)
{
struct mfc *mfc_rt = arg;
uint_t hash;
struct mfc *prev_mfc, *mfc0;
ip_stack_t *ipst;
conn_t *mrouter;
if (mfc_rt->mfc_rte == NULL || mfc_rt->mfc_rte->ill != NULL) {
cmn_err(CE_WARN, "expire_upcalls: no ILL\n");
return;
}
ipst = mfc_rt->mfc_rte->ill->ill_ipst;
mrouter = ipst->ips_ip_g_mrouter;
hash = MFCHASH(mfc_rt->mfc_origin.s_addr, mfc_rt->mfc_mcastgrp.s_addr);
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"expire_upcalls: hash %d s %x g %x",
hash, ntohl(mfc_rt->mfc_origin.s_addr),
ntohl(mfc_rt->mfc_mcastgrp.s_addr));
}
MFCB_REFHOLD(&ipst->ips_mfcs[hash]);
mutex_enter(&mfc_rt->mfc_mutex);
if (mfc_rt->mfc_timeout_id == 0)
goto done;
ipst->ips_mrtstat->mrts_cache_cleanups++;
mfc_rt->mfc_timeout_id = 0;
for (prev_mfc = mfc0 = ipst->ips_mfcs[hash].mfcb_mfc; mfc0;
prev_mfc = mfc0, mfc0 = mfc0->mfc_next)
if (mfc0 == mfc_rt)
break;
ASSERT(prev_mfc != NULL);
ASSERT(mfc0 != NULL);
ipst->ips_mfcs[hash].mfcb_marks |= MFCB_MARK_CONDEMNED;
mfc_rt->mfc_marks |= MFCB_MARK_CONDEMNED;
done:
mutex_exit(&mfc_rt->mfc_mutex);
MFCB_REFRELE(&ipst->ips_mfcs[hash]);
}
static int
ip_mdq(mblk_t *mp, ipha_t *ipha, ill_t *ill, ipaddr_t tunnel_src,
struct mfc *rt)
{
vifi_t vifi;
struct vif *vifp;
ipaddr_t dst = ipha->ipha_dst;
size_t plen = msgdsize(mp);
vifi_t num_of_vifs;
ip_stack_t *ipst = ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
ip_recv_attr_t iras;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"ip_mdq: SEND src %x, ipha_dst %x, ill %s",
ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst),
ill->ill_name);
}
#define MC_SEND(ipha, mp, vifp, dst) { \
if ((vifp)->v_flags & VIFF_TUNNEL) \
encap_send((ipha), (mp), (vifp), (dst)); \
else if ((vifp)->v_flags & VIFF_REGISTER) \
register_send((ipha), (mp), (vifp), (dst)); \
else \
phyint_send((ipha), (mp), (vifp), (dst)); \
}
vifi = rt->mfc_parent;
if (vifi == NO_VIF) {
ip1dbg(("ip_mdq: no route for origin ill %s, vifi is NO_VIF\n",
ill->ill_name));
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"ip_mdq: vifi is NO_VIF ill = %s", ill->ill_name);
}
return (-1);
}
if (!lock_good_vif(&ipst->ips_vifs[vifi]))
return (-1);
ASSERT(ipst->ips_vifs[vifi].v_ipif != NULL);
if (vifi >= ipst->ips_numvifs) {
cmn_err(CE_WARN, "ip_mdq: illegal vifi %d numvifs "
"%d ill %s viftable ill %s\n",
(int)vifi, (int)ipst->ips_numvifs, ill->ill_name,
ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name);
unlock_good_vif(&ipst->ips_vifs[vifi]);
return (-1);
}
if ((ipst->ips_vifs[vifi].v_ipif->ipif_ill != ill) ||
(ipst->ips_vifs[vifi].v_rmt_addr.s_addr != tunnel_src)) {
ip1dbg(("ip_mdq: arrived wrong if, vifi %d "
"numvifs %d ill %s viftable ill %s\n",
(int)vifi, (int)ipst->ips_numvifs, ill->ill_name,
ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name));
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"ip_mdq: arrived wrong if, vifi %d ill "
"%s viftable ill %s\n",
(int)vifi, ill->ill_name,
ipst->ips_vifs[vifi].v_ipif->ipif_ill->ill_name);
}
ipst->ips_mrtstat->mrts_wrong_if++;
rt->mfc_wrong_if++;
if (ipst->ips_pim_assert && rt->mfc_ttls[vifi] > 0 &&
(ill->ill_ipif->ipif_flags & IPIF_BROADCAST) &&
!(ipst->ips_vifs[vifi].v_flags & VIFF_TUNNEL)) {
mblk_t *mp_copy;
struct igmpmsg *im;
mp_copy = copymsg(mp);
if (mp_copy == NULL) {
ipst->ips_mrtstat->mrts_fwd_drop++;
ip1dbg(("ip_mdq: out of memory "
"for mblk, mp_copy\n"));
unlock_good_vif(&ipst->ips_vifs[vifi]);
return (-1);
}
im = (struct igmpmsg *)mp_copy->b_rptr;
im->im_msgtype = IGMPMSG_WRONGVIF;
im->im_mbz = 0;
im->im_vif = (ushort_t)vifi;
bzero(&iras, sizeof (iras));
iras.ira_flags = IRAF_IS_IPV4;
iras.ira_ip_hdr_length =
IPH_HDR_LENGTH(mp_copy->b_rptr);
iras.ira_pktlen = msgdsize(mp_copy);
iras.ira_ttl = ipha->ipha_ttl;
(mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras);
ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
}
unlock_good_vif(&ipst->ips_vifs[vifi]);
if (tunnel_src != 0)
return (1);
else
return (0);
}
if (ipha->ipha_src == ipst->ips_vifs[vifi].v_lcl_addr.s_addr) {
ipst->ips_vifs[vifi].v_pkt_out++;
ipst->ips_vifs[vifi].v_bytes_out += plen;
} else {
ipst->ips_vifs[vifi].v_pkt_in++;
ipst->ips_vifs[vifi].v_bytes_in += plen;
}
mutex_enter(&rt->mfc_mutex);
rt->mfc_pkt_cnt++;
rt->mfc_byte_cnt += plen;
mutex_exit(&rt->mfc_mutex);
unlock_good_vif(&ipst->ips_vifs[vifi]);
mutex_enter(&ipst->ips_numvifs_mutex);
num_of_vifs = ipst->ips_numvifs;
mutex_exit(&ipst->ips_numvifs_mutex);
for (vifp = ipst->ips_vifs, vifi = 0;
vifi < num_of_vifs;
vifp++, vifi++) {
if (!lock_good_vif(vifp))
continue;
if ((rt->mfc_ttls[vifi] > 0) &&
(ipha->ipha_ttl > rt->mfc_ttls[vifi])) {
ASSERT(vifp->v_ipif != NULL);
vifp->v_pkt_out++;
vifp->v_bytes_out += plen;
MC_SEND(ipha, mp, vifp, dst);
ipst->ips_mrtstat->mrts_fwd_out++;
}
unlock_good_vif(vifp);
}
if (tunnel_src != 0)
return (1);
else
return (0);
}
static void
phyint_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
{
mblk_t *mp_copy;
ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
mp_copy = copymsg(mp);
if (mp_copy == NULL) {
ipst->ips_mrtstat->mrts_fwd_drop++;
ip1dbg(("phyint_send: out of memory for mblk, mp_copy\n"));
return;
}
if (vifp->v_rate_limit <= 0)
tbf_send_packet(vifp, mp_copy);
else {
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"phyint_send: tbf_contr rate %d "
"vifp 0x%p mp 0x%p dst 0x%x",
vifp->v_rate_limit, (void *)vifp, (void *)mp, dst);
}
tbf_control(vifp, mp_copy, (ipha_t *)mp_copy->b_rptr);
}
}
static void
register_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
{
struct igmpmsg *im;
mblk_t *mp_copy;
ipha_t *ipha_copy;
ill_t *ill = vifp->v_ipif->ipif_ill;
ip_stack_t *ipst = ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
ip_recv_attr_t iras;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"register_send: src %x, dst %x\n",
ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst));
}
mp_copy = allocb(sizeof (struct igmpmsg) + sizeof (ipha_t), BPRI_MED);
if (mp_copy == NULL) {
++ipst->ips_mrtstat->mrts_pim_nomemory;
if (ipst->ips_ip_mrtdebug > 3) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"register_send: allocb failure.");
}
return;
}
mp_copy->b_wptr = mp_copy->b_rptr + sizeof (struct igmpmsg);
if ((mp_copy->b_cont = copymsg(mp)) == NULL) {
++ipst->ips_mrtstat->mrts_pim_nomemory;
if (ipst->ips_ip_mrtdebug > 3) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"register_send: copymsg failure.");
}
freeb(mp_copy);
return;
}
ipha_copy = (ipha_t *)mp_copy->b_rptr;
*ipha_copy = multicast_encap_iphdr;
im = (struct igmpmsg *)mp_copy->b_rptr;
im->im_msgtype = IGMPMSG_WHOLEPKT;
im->im_src.s_addr = ipha->ipha_src;
im->im_dst.s_addr = ipha->ipha_dst;
im->im_mbz = 0;
++ipst->ips_mrtstat->mrts_upcalls;
if (IPCL_IS_NONSTR(mrouter) ? mrouter->conn_flow_cntrld :
!canputnext(mrouter->conn_rq)) {
++ipst->ips_mrtstat->mrts_pim_regsend_drops;
if (ipst->ips_ip_mrtdebug > 3) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"register_send: register upcall failure.");
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("mrts_pim_regsend_drops", mp_copy, ill);
freemsg(mp_copy);
} else {
bzero(&iras, sizeof (iras));
iras.ira_flags = IRAF_IS_IPV4;
iras.ira_ip_hdr_length = sizeof (ipha_t);
iras.ira_pktlen = msgdsize(mp_copy);
iras.ira_ttl = ipha->ipha_ttl;
(mrouter->conn_recv)(mrouter, mp_copy, NULL, &iras);
ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
}
}
static boolean_t
pim_validate_cksum(mblk_t *mp, ipha_t *ip, struct pim *pimp)
{
mblk_t *mp_dup;
if ((mp_dup = dupmsg(mp)) == NULL)
return (B_FALSE);
mp_dup->b_rptr += IPH_HDR_LENGTH(ip);
if (pimp->pim_type == PIM_REGISTER)
mp_dup->b_wptr = mp_dup->b_rptr + PIM_MINLEN;
if (IP_CSUM(mp_dup, 0, 0)) {
freemsg(mp_dup);
return (B_FALSE);
}
freemsg(mp_dup);
return (B_TRUE);
}
mblk_t *
pim_input(mblk_t *mp, ip_recv_attr_t *ira)
{
ipha_t *eip, *ip;
int iplen, pimlen, iphlen;
struct pim *pimp;
uint32_t *reghdr;
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
if (pullupmsg(mp, -1) == 0) {
++ipst->ips_mrtstat->mrts_pim_nomemory;
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("mrts_pim_nomemory", mp, ill);
freemsg(mp);
return (NULL);
}
ip = (ipha_t *)mp->b_rptr;
iplen = ip->ipha_length;
iphlen = IPH_HDR_LENGTH(ip);
pimlen = ntohs(iplen) - iphlen;
if (pimlen < PIM_MINLEN) {
++ipst->ips_mrtstat->mrts_pim_malformed;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"pim_input: length not at least minlen");
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("mrts_pim_malformed", mp, ill);
freemsg(mp);
return (NULL);
}
pimp = (struct pim *)((caddr_t)ip + iphlen);
if (pimp->pim_vers != PIM_VERSION) {
++ipst->ips_mrtstat->mrts_pim_badversion;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"pim_input: unknown version of PIM");
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("mrts_pim_badversion", mp, ill);
freemsg(mp);
return (NULL);
}
if (!pim_validate_cksum(mp, ip, pimp)) {
++ipst->ips_mrtstat->mrts_pim_rcv_badcsum;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"pim_input: invalid checksum");
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("pim_rcv_badcsum", mp, ill);
freemsg(mp);
return (NULL);
}
if (pimp->pim_type != PIM_REGISTER)
return (mp);
reghdr = (uint32_t *)(pimp + 1);
eip = (ipha_t *)(reghdr + 1);
if (!CLASSD(eip->ipha_dst)) {
++ipst->ips_mrtstat->mrts_pim_badregisters;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"pim_input: Inner pkt not mcast .. !");
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("mrts_pim_badregisters", mp, ill);
freemsg(mp);
return (NULL);
}
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"register from %x, to %x, len %d",
ntohl(eip->ipha_src),
ntohl(eip->ipha_dst),
ntohs(eip->ipha_length));
}
if (!(ntohl(*reghdr) & PIM_NULL_REGISTER) &&
ipst->ips_reg_vif_num != ALL_VIFS) {
mblk_t *mp_copy;
uint_t saved_pktlen;
if ((mp_copy = copymsg(mp)) == NULL) {
++ipst->ips_mrtstat->mrts_pim_nomemory;
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("mrts_pim_nomemory", mp, ill);
freemsg(mp);
return (NULL);
}
mp_copy->b_rptr += iphlen + sizeof (pim_t) + sizeof (*reghdr);
saved_pktlen = ira->ira_pktlen;
ira->ira_pktlen -= iphlen + sizeof (pim_t) + sizeof (*reghdr);
if (register_mforward(mp_copy, ira) != 0) {
freemsg(mp);
ira->ira_pktlen = saved_pktlen;
return (NULL);
}
ira->ira_pktlen = saved_pktlen;
}
return (mp);
}
static int
register_mforward(mblk_t *mp, ip_recv_attr_t *ira)
{
ire_t *ire;
ipha_t *ipha = (ipha_t *)mp->b_rptr;
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
ASSERT(ipst->ips_reg_vif_num <= ipst->ips_numvifs);
if (ipst->ips_ip_mrtdebug > 3) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"register_mforward: src %x, dst %x\n",
ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst));
}
ira->ira_flags |= IRAF_PIM_REGISTER;
++ipst->ips_mrtstat->mrts_pim_regforwards;
if (!CLASSD(ipha->ipha_dst)) {
ire = ire_route_recursive_v4(ipha->ipha_dst, 0, NULL, ALL_ZONES,
ira->ira_tsl, MATCH_IRE_SECATTR, IRR_ALLOCATE, 0, ipst,
NULL, NULL, NULL);
} else {
ire = ire_multicast(ill);
}
ASSERT(ire != NULL);
if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("mrts_pim RTF_REJECT", mp, ill);
freemsg(mp);
ire_refrele(ire);
return (-1);
}
ASSERT(ire->ire_type & IRE_MULTICAST);
(*ire->ire_recvfn)(ire, mp, ipha, ira);
ire_refrele(ire);
return (0);
}
static void
encap_send(ipha_t *ipha, mblk_t *mp, struct vif *vifp, ipaddr_t dst)
{
mblk_t *mp_copy;
ipha_t *ipha_copy;
size_t len;
ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"encap_send: vif %ld enter",
(ptrdiff_t)(vifp - ipst->ips_vifs));
}
len = ntohs(ipha->ipha_length);
mp_copy = allocb(32 + sizeof (multicast_encap_iphdr), BPRI_MED);
if (mp_copy == NULL)
return;
mp_copy->b_rptr += 32;
mp_copy->b_wptr = mp_copy->b_rptr + sizeof (multicast_encap_iphdr);
if ((mp_copy->b_cont = copymsg(mp)) == NULL) {
freeb(mp_copy);
return;
}
ipha_copy = (ipha_t *)mp_copy->b_rptr;
*ipha_copy = multicast_encap_iphdr;
ASSERT((len + sizeof (ipha_t)) <= IP_MAXPACKET);
ipha_copy->ipha_length = htons(len + sizeof (ipha_t));
ipha_copy->ipha_src = vifp->v_lcl_addr.s_addr;
ipha_copy->ipha_dst = vifp->v_rmt_addr.s_addr;
ASSERT(ipha_copy->ipha_ident == 0);
ipha = (ipha_t *)mp_copy->b_cont->b_rptr;
ipha->ipha_ttl--;
ipha->ipha_hdr_checksum = 0;
ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
ipha_copy->ipha_ttl = ipha->ipha_ttl;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"encap_send: group 0x%x", ntohl(ipha->ipha_dst));
}
if (vifp->v_rate_limit <= 0)
tbf_send_packet(vifp, mp_copy);
else
tbf_control(vifp, mp_copy, ipha);
}
void
ip_mroute_decap(mblk_t *mp, ip_recv_attr_t *ira)
{
ipha_t *ipha = (ipha_t *)mp->b_rptr;
ipha_t *ipha_encap;
int hlen = IPH_HDR_LENGTH(ipha);
int hlen_encap;
ipaddr_t src;
struct vif *vifp;
ire_t *ire;
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
ipha_encap = (ipha_t *)((char *)ipha + hlen);
if (mp->b_wptr - mp->b_rptr < hlen + IP_SIMPLE_HDR_LENGTH) {
ipha = ip_pullup(mp, hlen + IP_SIMPLE_HDR_LENGTH, ira);
if (ipha == NULL) {
ipst->ips_mrtstat->mrts_bad_tunnel++;
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("ip_mroute_decap: too short", mp, ill);
freemsg(mp);
return;
}
ipha_encap = (ipha_t *)((char *)ipha + hlen);
}
hlen_encap = IPH_HDR_LENGTH(ipha_encap);
if (mp->b_wptr - mp->b_rptr < hlen + hlen_encap) {
ipha = ip_pullup(mp, hlen + hlen_encap, ira);
if (ipha == NULL) {
ipst->ips_mrtstat->mrts_bad_tunnel++;
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("ip_mroute_decap: too short", mp, ill);
freemsg(mp);
return;
}
ipha_encap = (ipha_t *)((char *)ipha + hlen);
}
if (!CLASSD(ipha_encap->ipha_dst)) {
ipst->ips_mrtstat->mrts_bad_tunnel++;
ip1dbg(("ip_mroute_decap: bad tunnel\n"));
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("mrts_bad_tunnel", mp, ill);
freemsg(mp);
return;
}
src = (ipaddr_t)ipha->ipha_src;
mutex_enter(&ipst->ips_last_encap_lock);
if (src != ipst->ips_last_encap_src) {
struct vif *vife;
vifp = ipst->ips_vifs;
vife = vifp + ipst->ips_numvifs;
ipst->ips_last_encap_src = src;
ipst->ips_last_encap_vif = 0;
for (; vifp < vife; ++vifp) {
if (!lock_good_vif(vifp))
continue;
if (vifp->v_rmt_addr.s_addr == src) {
if (vifp->v_flags & VIFF_TUNNEL)
ipst->ips_last_encap_vif = vifp;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq,
1, SL_TRACE,
"ip_mroute_decap: good tun "
"vif %ld with %x",
(ptrdiff_t)(vifp - ipst->ips_vifs),
ntohl(src));
}
unlock_good_vif(vifp);
break;
}
unlock_good_vif(vifp);
}
}
if ((vifp = ipst->ips_last_encap_vif) == 0) {
mutex_exit(&ipst->ips_last_encap_lock);
ipst->ips_mrtstat->mrts_bad_tunnel++;
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("mrts_bad_tunnel", mp, ill);
freemsg(mp);
ip1dbg(("ip_mroute_decap: vif %ld no tunnel with %x\n",
(ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(src)));
return;
}
mutex_exit(&ipst->ips_last_encap_lock);
ira->ira_flags |= IRAF_MROUTE_TUNNEL_SET;
ira->ira_mroute_tunnel = src;
mp->b_rptr += hlen;
ira->ira_pktlen -= hlen;
ira->ira_ip_hdr_length = hlen_encap;
if (ipha_encap->ipha_protocol == IPPROTO_RSVP &&
ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) {
ire = ire_route_recursive_v4(INADDR_BROADCAST, 0, ill,
ALL_ZONES, ira->ira_tsl, MATCH_IRE_ILL|MATCH_IRE_SECATTR,
IRR_ALLOCATE, 0, ipst, NULL, NULL, NULL);
} else {
ire = ire_multicast(ill);
}
ASSERT(ire != NULL);
if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip_drop_input("ip_mroute_decap: RTF_REJECT", mp, ill);
freemsg(mp);
ire_refrele(ire);
return;
}
ire->ire_ib_pkt_count++;
ASSERT(ire->ire_type & (IRE_MULTICAST|IRE_BROADCAST));
(*ire->ire_recvfn)(ire, mp, ipha_encap, ira);
ire_refrele(ire);
}
void
reset_mrt_vif_ipif(ipif_t *ipif)
{
vifi_t vifi, tmp_vifi;
vifi_t num_of_vifs;
ip_stack_t *ipst = ipif->ipif_ill->ill_ipst;
mutex_enter(&ipst->ips_numvifs_mutex);
num_of_vifs = ipst->ips_numvifs;
mutex_exit(&ipst->ips_numvifs_mutex);
for (vifi = num_of_vifs; vifi != 0; vifi--) {
tmp_vifi = vifi - 1;
if (ipst->ips_vifs[tmp_vifi].v_ipif == ipif) {
(void) del_vif(&tmp_vifi, ipst);
}
}
}
void
reset_mrt_ill(ill_t *ill)
{
struct mfc *rt;
struct rtdetq *rte;
int i;
ip_stack_t *ipst = ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
timeout_id_t id;
for (i = 0; i < MFCTBLSIZ; i++) {
MFCB_REFHOLD(&ipst->ips_mfcs[i]);
if ((rt = ipst->ips_mfcs[i].mfcb_mfc) != NULL) {
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1,
SL_TRACE,
"reset_mrt_ill: mfctable [%d]", i);
}
while (rt != NULL) {
mutex_enter(&rt->mfc_mutex);
while ((rte = rt->mfc_rte) != NULL) {
if (rte->ill == ill &&
(id = rt->mfc_timeout_id) != 0) {
mutex_exit(&rt->mfc_mutex);
(void) untimeout(id);
mutex_enter(&rt->mfc_mutex);
}
if (rte->ill == ill) {
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(
mrouter->conn_rq,
1, SL_TRACE,
"reset_mrt_ill: "
"ill 0x%p", (void *)ill);
}
rt->mfc_rte = rte->rte_next;
freemsg(rte->mp);
mi_free((char *)rte);
}
}
mutex_exit(&rt->mfc_mutex);
rt = rt->mfc_next;
}
}
MFCB_REFRELE(&ipst->ips_mfcs[i]);
}
}
static void
tbf_control(struct vif *vifp, mblk_t *mp, ipha_t *ipha)
{
size_t p_len = msgdsize(mp);
struct tbf *t = vifp->v_tbf;
timeout_id_t id = 0;
ill_t *ill = vifp->v_ipif->ipif_ill;
ip_stack_t *ipst = ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
if (p_len > MAX_BKT_SIZE) {
ipst->ips_mrtstat->mrts_pkt2large++;
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
ip_drop_output("tbf_control - too large", mp, ill);
freemsg(mp);
return;
}
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"tbf_ctrl: SEND vif %ld, qlen %d, ipha_dst 0x%x",
(ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len,
ntohl(ipha->ipha_dst));
}
mutex_enter(&t->tbf_lock);
tbf_update_tokens(vifp);
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"tbf_control: vif %ld, TOKENS %d, pkt len %lu, qlen %d",
(ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_n_tok, p_len,
t->tbf_q_len);
}
if (t->tbf_q_len == 0) {
if (p_len <= t->tbf_n_tok) {
t->tbf_n_tok -= p_len;
mutex_exit(&t->tbf_lock);
tbf_send_packet(vifp, mp);
return;
} else {
tbf_queue(vifp, mp);
ASSERT(vifp->v_timeout_id == 0);
vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp,
TBF_REPROCESS);
}
} else if (t->tbf_q_len < t->tbf_max_q_len) {
tbf_queue(vifp, mp);
tbf_process_q(vifp);
} else {
size_t hdr_length = IPH_HDR_LENGTH(ipha) +
sizeof (struct udphdr);
if ((mp->b_wptr - mp->b_rptr) < hdr_length) {
if (!pullupmsg(mp, hdr_length)) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsOutDiscards);
ip_drop_output("tbf_control - pullup", mp, ill);
freemsg(mp);
ip1dbg(("tbf_ctl: couldn't pullup udp hdr, "
"vif %ld src 0x%x dst 0x%x\n",
(ptrdiff_t)(vifp - ipst->ips_vifs),
ntohl(ipha->ipha_src),
ntohl(ipha->ipha_dst)));
mutex_exit(&vifp->v_tbf->tbf_lock);
return;
} else
ipha = (ipha_t *)mp->b_rptr;
}
if (!tbf_dq_sel(vifp, ipha)) {
ipst->ips_mrtstat->mrts_q_overflow++;
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
ip_drop_output("mrts_q_overflow", mp, ill);
freemsg(mp);
} else {
tbf_queue(vifp, mp);
tbf_process_q(vifp);
}
}
if (t->tbf_q_len == 0) {
id = vifp->v_timeout_id;
vifp->v_timeout_id = 0;
}
mutex_exit(&vifp->v_tbf->tbf_lock);
if (id != 0)
(void) untimeout(id);
}
static void
tbf_queue(struct vif *vifp, mblk_t *mp)
{
struct tbf *t = vifp->v_tbf;
ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"tbf_queue: vif %ld", (ptrdiff_t)(vifp - ipst->ips_vifs));
}
ASSERT(MUTEX_HELD(&t->tbf_lock));
if (t->tbf_t == NULL) {
t->tbf_q = mp;
} else {
t->tbf_t->b_next = mp;
}
t->tbf_t = mp;
mp->b_next = mp->b_prev = NULL;
t->tbf_q_len++;
}
static void
tbf_process_q(struct vif *vifp)
{
mblk_t *mp;
struct tbf *t = vifp->v_tbf;
size_t len;
ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"tbf_process_q 1: vif %ld qlen = %d",
(ptrdiff_t)(vifp - ipst->ips_vifs), t->tbf_q_len);
}
ASSERT(MUTEX_HELD(&t->tbf_lock));
while (t->tbf_q_len > 0) {
mp = t->tbf_q;
len = (size_t)msgdsize(mp);
if (len <= t->tbf_n_tok) {
t->tbf_n_tok -= len;
t->tbf_q = mp->b_next;
if (--t->tbf_q_len == 0) {
t->tbf_t = NULL;
}
mp->b_next = NULL;
mutex_exit(&t->tbf_lock);
tbf_send_packet(vifp, mp);
mutex_enter(&t->tbf_lock);
} else
break;
}
}
static void
tbf_reprocess_q(void *arg)
{
struct vif *vifp = arg;
ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
mutex_enter(&vifp->v_tbf->tbf_lock);
vifp->v_timeout_id = 0;
tbf_update_tokens(vifp);
tbf_process_q(vifp);
if (vifp->v_tbf->tbf_q_len > 0) {
vifp->v_timeout_id = timeout(tbf_reprocess_q, vifp,
TBF_REPROCESS);
}
mutex_exit(&vifp->v_tbf->tbf_lock);
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"tbf_reprcess_q: vif %ld timeout id = %p",
(ptrdiff_t)(vifp - ipst->ips_vifs), vifp->v_timeout_id);
}
}
static int
tbf_dq_sel(struct vif *vifp, ipha_t *ipha)
{
uint_t p;
struct tbf *t = vifp->v_tbf;
mblk_t **np;
mblk_t *last, *mp;
ill_t *ill = vifp->v_ipif->ipif_ill;
ip_stack_t *ipst = ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"dq_sel: vif %ld dst 0x%x",
(ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(ipha->ipha_dst));
}
ASSERT(MUTEX_HELD(&t->tbf_lock));
p = priority(vifp, ipha);
np = &t->tbf_q;
last = NULL;
while ((mp = *np) != NULL) {
if (p > (priority(vifp, (ipha_t *)mp->b_rptr))) {
*np = mp->b_next;
if (mp == t->tbf_t)
t->tbf_t = last;
mp->b_prev = mp->b_next = NULL;
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
ip_drop_output("tbf_dq_send", mp, ill);
freemsg(mp);
if (--t->tbf_q_len == 0) {
t->tbf_t = NULL;
}
ipst->ips_mrtstat->mrts_drop_sel++;
return (1);
}
np = &mp->b_next;
last = mp;
}
return (0);
}
static void
tbf_send_packet(struct vif *vifp, mblk_t *mp)
{
ipif_t *ipif = vifp->v_ipif;
ill_t *ill = ipif->ipif_ill;
ip_stack_t *ipst = ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
ipha_t *ipha;
ipha = (ipha_t *)mp->b_rptr;
if (vifp->v_flags & VIFF_TUNNEL) {
ip_xmit_attr_t ixas;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"tbf_send_packet: ENCAP tunnel vif %ld",
(ptrdiff_t)(vifp - ipst->ips_vifs));
}
bzero(&ixas, sizeof (ixas));
ixas.ixa_flags =
IXAF_IS_IPV4 | IXAF_NO_TTL_CHANGE | IXAF_VERIFY_SOURCE;
ixas.ixa_ipst = ipst;
ixas.ixa_ifindex = 0;
ixas.ixa_cred = kcred;
ixas.ixa_cpid = NOPID;
ixas.ixa_tsl = NULL;
ixas.ixa_zoneid = GLOBAL_ZONEID;
ixas.ixa_pktlen = ntohs(ipha->ipha_length);
ixas.ixa_ip_hdr_length = IPH_HDR_LENGTH(ipha);
(void) ip_output_simple(mp, &ixas);
ixa_cleanup(&ixas);
return;
} else {
ipaddr_t dst;
ip_recv_attr_t iras;
nce_t *nce;
bzero(&iras, sizeof (iras));
iras.ira_flags = IRAF_IS_IPV4;
iras.ira_ill = iras.ira_rill = ill;
iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
iras.ira_zoneid = GLOBAL_ZONEID;
iras.ira_pktlen = ntohs(ipha->ipha_length);
iras.ira_ip_hdr_length = IPH_HDR_LENGTH(ipha);
dst = ipha->ipha_dst;
if (ill_hasmembers_v4(ill, dst)) {
iras.ira_flags |= IRAF_LOOPBACK_COPY;
}
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"tbf_send_pkt: phyint forward vif %ld dst = 0x%x",
(ptrdiff_t)(vifp - ipst->ips_vifs), ntohl(dst));
}
if (ipif->ipif_flags & IPIF_POINTOPOINT) {
dst = ipif->ipif_pp_dst_addr;
nce = arp_nce_init(ill, dst, ill->ill_net_type);
} else {
nce = arp_nce_init(ill, dst, IRE_MULTICAST);
}
if (nce == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
ip_drop_output("tbf_send_packet - no nce", mp, ill);
freemsg(mp);
return;
}
ip_forward_xmit_v4(nce, ill, mp, ipha, &iras, ill->ill_mc_mtu,
0);
ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE));
nce_refrele(nce);
}
}
static void
tbf_update_tokens(struct vif *vifp)
{
timespec_t tp;
hrtime_t tm;
struct tbf *t = vifp->v_tbf;
ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
ASSERT(MUTEX_HELD(&t->tbf_lock));
gethrestime(&tp);
TV_DELTA(tp, t->tbf_last_pkt_t, tm);
t->tbf_n_tok += (tm/1000) * vifp->v_rate_limit / 1024 / 8;
t->tbf_last_pkt_t = tp;
if (t->tbf_n_tok > MAX_BKT_SIZE)
t->tbf_n_tok = MAX_BKT_SIZE;
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"tbf_update_tok: tm %lld tok %d vif %ld",
tm, t->tbf_n_tok, (ptrdiff_t)(vifp - ipst->ips_vifs));
}
}
static int
priority(struct vif *vifp, ipha_t *ipha)
{
int prio;
ip_stack_t *ipst = vifp->v_ipif->ipif_ill->ill_ipst;
conn_t *mrouter = ipst->ips_ip_g_mrouter;
ASSERT(MUTEX_HELD(&vifp->v_tbf->tbf_lock));
if (ipha->ipha_protocol == IPPROTO_UDP) {
struct udphdr *udp =
(struct udphdr *)((char *)ipha + IPH_HDR_LENGTH(ipha));
switch (ntohs(udp->uh_dport) & 0xc000) {
case 0x4000:
prio = 70;
break;
case 0x8000:
prio = 60;
break;
case 0xc000:
prio = 55;
break;
default:
prio = 50;
break;
}
if (ipst->ips_ip_mrtdebug > 1) {
(void) mi_strlog(mrouter->conn_rq, 1, SL_TRACE,
"priority: port %x prio %d\n",
ntohs(udp->uh_dport), prio);
}
} else
prio = 50;
return (prio);
}
int
ip_mroute_stats(mblk_t *mp, ip_stack_t *ipst)
{
ipst->ips_mrtstat->mrts_vifctlSize = sizeof (struct vifctl);
ipst->ips_mrtstat->mrts_mfcctlSize = sizeof (struct mfcctl);
if (!snmp_append_data(mp, (char *)ipst->ips_mrtstat,
sizeof (struct mrtstat))) {
ip0dbg(("ip_mroute_stats: failed %ld bytes\n",
(size_t)sizeof (struct mrtstat)));
return (0);
}
return (1);
}
int
ip_mroute_vif(mblk_t *mp, ip_stack_t *ipst)
{
struct vifctl vi;
vifi_t vifi;
mutex_enter(&ipst->ips_numvifs_mutex);
for (vifi = 0; vifi < ipst->ips_numvifs; vifi++) {
if (ipst->ips_vifs[vifi].v_lcl_addr.s_addr == 0)
continue;
vi.vifc_vifi = vifi;
vi.vifc_flags = ipst->ips_vifs[vifi].v_flags;
vi.vifc_threshold = ipst->ips_vifs[vifi].v_threshold;
vi.vifc_rate_limit = ipst->ips_vifs[vifi].v_rate_limit;
vi.vifc_lcl_addr = ipst->ips_vifs[vifi].v_lcl_addr;
vi.vifc_rmt_addr = ipst->ips_vifs[vifi].v_rmt_addr;
vi.vifc_pkt_in = ipst->ips_vifs[vifi].v_pkt_in;
vi.vifc_pkt_out = ipst->ips_vifs[vifi].v_pkt_out;
if (!snmp_append_data(mp, (char *)&vi, sizeof (vi))) {
ip0dbg(("ip_mroute_vif: failed %ld bytes\n",
(size_t)sizeof (vi)));
mutex_exit(&ipst->ips_numvifs_mutex);
return (0);
}
}
mutex_exit(&ipst->ips_numvifs_mutex);
return (1);
}
int
ip_mroute_mrt(mblk_t *mp, ip_stack_t *ipst)
{
int i, j;
struct mfc *rt;
struct mfcctl mfcc;
if (is_mrouter_off(ipst))
return (1);
for (i = 0; i < MFCTBLSIZ; i++) {
MFCB_REFHOLD(&ipst->ips_mfcs[i]);
for (rt = ipst->ips_mfcs[i].mfcb_mfc; rt; rt = rt->mfc_next) {
mutex_enter(&rt->mfc_mutex);
if (rt->mfc_rte != NULL ||
(rt->mfc_marks & MFCB_MARK_CONDEMNED)) {
mutex_exit(&rt->mfc_mutex);
continue;
}
mfcc.mfcc_origin = rt->mfc_origin;
mfcc.mfcc_mcastgrp = rt->mfc_mcastgrp;
mfcc.mfcc_parent = rt->mfc_parent;
mfcc.mfcc_pkt_cnt = rt->mfc_pkt_cnt;
mutex_enter(&ipst->ips_numvifs_mutex);
for (j = 0; j < (int)ipst->ips_numvifs; j++)
mfcc.mfcc_ttls[j] = rt->mfc_ttls[j];
for (j = (int)ipst->ips_numvifs; j < MAXVIFS; j++)
mfcc.mfcc_ttls[j] = 0;
mutex_exit(&ipst->ips_numvifs_mutex);
mutex_exit(&rt->mfc_mutex);
if (!snmp_append_data(mp, (char *)&mfcc,
sizeof (mfcc))) {
MFCB_REFRELE(&ipst->ips_mfcs[i]);
ip0dbg(("ip_mroute_mrt: failed %ld bytes\n",
(size_t)sizeof (mfcc)));
return (0);
}
}
MFCB_REFRELE(&ipst->ips_mfcs[i]);
}
return (1);
}