#include <sys/types.h>
#include <sys/callb.h>
#include <sys/sdt.h>
#include <sys/strsubr.h>
#include <sys/strsun.h>
#include <sys/vlan.h>
#include <inet/ipsec_impl.h>
#include <inet/ip_impl.h>
#include <inet/sadb.h>
#include <inet/ipsecesp.h>
#include <inet/ipsecah.h>
#include <sys/mac_impl.h>
#include <sys/mac_client_impl.h>
#include <sys/mac_soft_ring.h>
#include <sys/mac_flow_impl.h>
#include <sys/mac_stat.h>
static void mac_rx_soft_ring_drain(mac_soft_ring_t *);
static void mac_soft_ring_fire(void *);
static void mac_soft_ring_worker(mac_soft_ring_t *);
static void mac_tx_soft_ring_drain(mac_soft_ring_t *);
uint32_t mac_tx_soft_ring_max_q_cnt = 100000;
uint32_t mac_tx_soft_ring_hiwat = 1000;
extern kmem_cache_t *mac_soft_ring_cache;
#define ADD_SOFTRING_TO_SET(mac_srs, softring) { \
if (mac_srs->srs_soft_ring_head == NULL) { \
mac_srs->srs_soft_ring_head = softring; \
mac_srs->srs_soft_ring_tail = softring; \
} else { \
\
softring->s_ring_prev = \
mac_srs->srs_soft_ring_tail; \
mac_srs->srs_soft_ring_tail->s_ring_next = softring; \
mac_srs->srs_soft_ring_tail = softring; \
} \
mac_srs->srs_soft_ring_count++; \
}
void
mac_soft_ring_worker_wakeup(mac_soft_ring_t *ringp)
{
ASSERT(MUTEX_HELD(&ringp->s_ring_lock));
if (!(ringp->s_ring_state & S_RING_PROC) &&
!(ringp->s_ring_state & S_RING_BLANK) &&
(ringp->s_ring_tid == NULL)) {
if (ringp->s_ring_wait != 0) {
ringp->s_ring_tid =
timeout(mac_soft_ring_fire, ringp,
ringp->s_ring_wait);
} else {
cv_signal(&ringp->s_ring_async);
}
}
}
static mac_soft_ring_t *
mac_soft_ring_create_i(int id, clock_t wait, const mac_soft_ring_state_t type,
pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs,
processorid_t cpuid)
{
mac_soft_ring_t *ringp;
char name[S_RING_NAMELEN];
VERIFY3U(type & SR_STATE, ==, 0);
bzero(name, 64);
ringp = kmem_cache_alloc(mac_soft_ring_cache, KM_SLEEP);
if (type & ST_RING_TCP) {
(void) snprintf(name, sizeof (name),
"mac_tcp_soft_ring_%d_%p", id, (void *)mac_srs);
} else if (type & ST_RING_TCP6) {
(void) snprintf(name, sizeof (name),
"mac_tcp6_soft_ring_%d_%p", id, (void *)mac_srs);
} else if (type & ST_RING_UDP) {
(void) snprintf(name, sizeof (name),
"mac_udp_soft_ring_%d_%p", id, (void *)mac_srs);
} else if (type & ST_RING_UDP6) {
(void) snprintf(name, sizeof (name),
"mac_udp6_soft_ring_%d_%p", id, (void *)mac_srs);
} else if (type & ST_RING_OTH) {
(void) snprintf(name, sizeof (name),
"mac_oth_soft_ring_%d_%p", id, (void *)mac_srs);
} else {
ASSERT(type & ST_RING_TX);
(void) snprintf(name, sizeof (name),
"mac_tx_soft_ring_%d_%p", id, (void *)mac_srs);
}
bzero(ringp, sizeof (mac_soft_ring_t));
(void) strncpy(ringp->s_ring_name, name, S_RING_NAMELEN + 1);
ringp->s_ring_name[S_RING_NAMELEN] = '\0';
mutex_init(&ringp->s_ring_lock, NULL, MUTEX_DEFAULT, NULL);
ringp->s_ring_notify_cb_info.mcbi_lockp = &ringp->s_ring_lock;
ringp->s_ring_state = type;
ringp->s_ring_wait = MSEC_TO_TICK(wait);
ringp->s_ring_mcip = mcip;
ringp->s_ring_set = mac_srs;
mutex_enter(&mac_srs->srs_lock);
ADD_SOFTRING_TO_SET(mac_srs, ringp);
mutex_exit(&mac_srs->srs_lock);
ringp->s_ring_cpuid = ringp->s_ring_cpuid_save = -1;
ringp->s_ring_worker = thread_create(NULL, 0,
mac_soft_ring_worker, ringp, 0, &p0, TS_RUN, pri);
if (cpuid != -1)
(void) mac_soft_ring_bind(ringp, cpuid);
return (ringp);
}
mac_soft_ring_t *
mac_soft_ring_create_rx(int id, clock_t wait, const mac_soft_ring_state_t type,
pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs,
processorid_t cpuid, mac_direct_rx_t rx_func, void *x_arg1)
{
VERIFY3U((type & ST_RING_TX), ==, 0);
mac_soft_ring_t *ringp = mac_soft_ring_create_i(id, wait, type, pri,
mcip, mac_srs, cpuid);
ringp->s_ring_drain_func = mac_rx_soft_ring_drain;
ringp->s_ring_rx_func = rx_func;
ringp->s_ring_rx_arg1 = x_arg1;
ringp->s_ring_rx_arg2 = NULL;
if (mac_srs->srs_type & SRST_ENQUEUE) {
ringp->s_ring_state |= ST_RING_WORKER_ONLY;
}
mac_soft_ring_stat_create(ringp);
return (ringp);
}
mac_soft_ring_t *
mac_soft_ring_create_tx(int id, clock_t wait, const mac_soft_ring_state_t type,
pri_t pri, mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs,
processorid_t cpuid, mac_ring_t *ring)
{
VERIFY3U((type & ST_RING_TX), ==, 0);
VERIFY(ring != NULL);
mac_soft_ring_t *ringp = mac_soft_ring_create_i(id, wait,
type | ST_RING_TX, pri, mcip, mac_srs, cpuid);
ringp->s_ring_drain_func = mac_tx_soft_ring_drain;
ringp->s_ring_tx_arg1 = mcip;
ringp->s_ring_tx_arg2 = ring;
ringp->s_ring_tx_max_q_cnt = mac_tx_soft_ring_max_q_cnt;
ringp->s_ring_tx_hiwat =
(mac_tx_soft_ring_hiwat > mac_tx_soft_ring_max_q_cnt) ?
mac_tx_soft_ring_max_q_cnt : mac_tx_soft_ring_hiwat;
if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
mac_srs_tx_t *tx = &mac_srs->srs_tx;
VERIFY3P(tx->st_soft_rings[ring->mr_index], ==, NULL);
tx->st_soft_rings[ring->mr_index] = ringp;
}
mac_soft_ring_stat_create(ringp);
return (ringp);
}
void
mac_soft_ring_free(mac_soft_ring_t *softring)
{
ASSERT((softring->s_ring_state &
(S_RING_CONDEMNED | S_RING_CONDEMNED_DONE | S_RING_PROC)) ==
(S_RING_CONDEMNED | S_RING_CONDEMNED_DONE));
mac_drop_chain(softring->s_ring_first, "softring free");
softring->s_ring_tx_arg2 = NULL;
mac_soft_ring_stat_delete(softring);
mac_callback_free(softring->s_ring_notify_cb_list);
kmem_cache_free(mac_soft_ring_cache, softring);
}
int mac_soft_ring_thread_bind = 1;
cpu_t *
mac_soft_ring_bind(mac_soft_ring_t *ringp, processorid_t cpuid)
{
cpu_t *cp;
boolean_t clear = B_FALSE;
ASSERT(MUTEX_HELD(&cpu_lock));
if (mac_soft_ring_thread_bind == 0) {
DTRACE_PROBE1(mac__soft__ring__no__cpu__bound,
mac_soft_ring_t *, ringp);
return (NULL);
}
cp = cpu_get(cpuid);
if (cp == NULL || !cpu_is_online(cp))
return (NULL);
mutex_enter(&ringp->s_ring_lock);
ringp->s_ring_state |= S_RING_BOUND;
if (ringp->s_ring_cpuid != -1)
clear = B_TRUE;
ringp->s_ring_cpuid = cpuid;
mutex_exit(&ringp->s_ring_lock);
if (clear)
thread_affinity_clear(ringp->s_ring_worker);
DTRACE_PROBE2(mac__soft__ring__cpu__bound, mac_soft_ring_t *,
ringp, processorid_t, cpuid);
thread_affinity_set(ringp->s_ring_worker, cpuid);
return (cp);
}
void
mac_soft_ring_unbind(mac_soft_ring_t *ringp)
{
ASSERT(MUTEX_HELD(&cpu_lock));
mutex_enter(&ringp->s_ring_lock);
if (!(ringp->s_ring_state & S_RING_BOUND)) {
ASSERT(ringp->s_ring_cpuid == -1);
mutex_exit(&ringp->s_ring_lock);
return;
}
ringp->s_ring_cpuid = -1;
ringp->s_ring_state &= ~S_RING_BOUND;
thread_affinity_clear(ringp->s_ring_worker);
mutex_exit(&ringp->s_ring_lock);
}
static void
mac_soft_ring_fire(void *arg)
{
mac_soft_ring_t *ringp = arg;
mutex_enter(&ringp->s_ring_lock);
if (ringp->s_ring_tid == NULL) {
mutex_exit(&ringp->s_ring_lock);
return;
}
ringp->s_ring_tid = NULL;
if (!(ringp->s_ring_state & S_RING_PROC)) {
cv_signal(&ringp->s_ring_async);
}
mutex_exit(&ringp->s_ring_lock);
}
static void
mac_rx_soft_ring_drain(mac_soft_ring_t *ringp)
{
mblk_t *mp;
void *arg1;
mac_resource_handle_t arg2;
timeout_id_t tid;
mac_direct_rx_t proc;
size_t sz;
int cnt;
mac_soft_ring_set_t *mac_srs = ringp->s_ring_set;
ringp->s_ring_run = curthread;
ASSERT(mutex_owned(&ringp->s_ring_lock));
ASSERT(!(ringp->s_ring_state & S_RING_PROC));
if ((tid = ringp->s_ring_tid) != NULL)
ringp->s_ring_tid = NULL;
ringp->s_ring_state |= S_RING_PROC;
proc = ringp->s_ring_rx_func;
arg1 = ringp->s_ring_rx_arg1;
arg2 = ringp->s_ring_rx_arg2;
while ((ringp->s_ring_first != NULL) &&
!(ringp->s_ring_state & S_RING_PAUSE)) {
mp = ringp->s_ring_first;
ringp->s_ring_first = NULL;
ringp->s_ring_last = NULL;
cnt = ringp->s_ring_count;
ringp->s_ring_count = 0;
sz = ringp->s_ring_size;
ringp->s_ring_size = 0;
mutex_exit(&ringp->s_ring_lock);
if (tid != NULL) {
(void) untimeout(tid);
tid = NULL;
}
(*proc)(arg1, arg2, mp, NULL);
mutex_enter(&mac_srs->srs_lock);
MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt);
MAC_UPDATE_SRS_SIZE_LOCKED(mac_srs, sz);
mutex_exit(&mac_srs->srs_lock);
mutex_enter(&ringp->s_ring_lock);
}
ringp->s_ring_state &= ~S_RING_PROC;
if (ringp->s_ring_state & S_RING_CLIENT_WAIT)
cv_signal(&ringp->s_ring_client_cv);
ringp->s_ring_run = NULL;
}
static void
mac_soft_ring_worker(mac_soft_ring_t *ringp)
{
kmutex_t *lock = &ringp->s_ring_lock;
kcondvar_t *async = &ringp->s_ring_async;
mac_soft_ring_set_t *srs = ringp->s_ring_set;
callb_cpr_t cprinfo;
CALLB_CPR_INIT(&cprinfo, lock, callb_generic_cpr, "mac_soft_ring");
mutex_enter(lock);
start:
for (;;) {
while (((ringp->s_ring_first == NULL ||
(ringp->s_ring_state & (S_RING_BLOCK|S_RING_BLANK))) &&
!(ringp->s_ring_state & S_RING_PAUSE)) ||
(ringp->s_ring_state & S_RING_PROC)) {
CALLB_CPR_SAFE_BEGIN(&cprinfo);
cv_wait(async, lock);
CALLB_CPR_SAFE_END(&cprinfo, lock);
}
if (ringp->s_ring_state & S_RING_PAUSE)
goto done;
ringp->s_ring_drain_func(ringp);
}
done:
mutex_exit(lock);
mutex_enter(&srs->srs_lock);
mutex_enter(lock);
ringp->s_ring_state |= S_RING_QUIESCE_DONE;
if (!(ringp->s_ring_state & S_RING_CONDEMNED)) {
srs->srs_soft_ring_quiesced_count++;
cv_broadcast(&srs->srs_async);
mutex_exit(&srs->srs_lock);
while (!(ringp->s_ring_state &
(S_RING_RESTART | S_RING_CONDEMNED)))
cv_wait(&ringp->s_ring_async, &ringp->s_ring_lock);
mutex_exit(lock);
mutex_enter(&srs->srs_lock);
mutex_enter(lock);
srs->srs_soft_ring_quiesced_count--;
if (ringp->s_ring_state & S_RING_RESTART) {
ASSERT(!(ringp->s_ring_state & S_RING_CONDEMNED));
ringp->s_ring_state &= ~(S_RING_RESTART |
S_RING_QUIESCE | S_RING_QUIESCE_DONE);
cv_broadcast(&srs->srs_async);
mutex_exit(&srs->srs_lock);
goto start;
}
}
ASSERT(ringp->s_ring_state & S_RING_CONDEMNED);
ringp->s_ring_state |= S_RING_CONDEMNED_DONE;
CALLB_CPR_EXIT(&cprinfo);
srs->srs_soft_ring_condemned_count++;
cv_broadcast(&srs->srs_async);
mutex_exit(&srs->srs_lock);
thread_exit();
}
int
mac_soft_ring_intr_enable(void *arg)
{
mac_soft_ring_t *ringp = (mac_soft_ring_t *)arg;
mutex_enter(&ringp->s_ring_lock);
ringp->s_ring_state &= ~S_RING_BLANK;
if (ringp->s_ring_first != NULL)
mac_soft_ring_worker_wakeup(ringp);
mutex_exit(&ringp->s_ring_lock);
return (0);
}
boolean_t
mac_soft_ring_intr_disable(void *arg)
{
mac_soft_ring_t *ringp = (mac_soft_ring_t *)arg;
boolean_t sring_blanked = B_FALSE;
mutex_enter(&ringp->s_ring_lock);
if (!(ringp->s_ring_state & S_RING_PROC)) {
ringp->s_ring_state |= S_RING_BLANK;
sring_blanked = B_TRUE;
}
mutex_exit(&ringp->s_ring_lock);
return (sring_blanked);
}
mblk_t *
mac_soft_ring_poll(mac_soft_ring_t *ringp, size_t bytes_to_pickup)
{
mblk_t *head, *tail;
mblk_t *mp;
size_t sz = 0;
int cnt = 0;
mac_soft_ring_set_t *mac_srs = ringp->s_ring_set;
ASSERT(mac_srs != NULL);
mutex_enter(&ringp->s_ring_lock);
head = tail = mp = ringp->s_ring_first;
if (head == NULL) {
mutex_exit(&ringp->s_ring_lock);
return (NULL);
}
if (ringp->s_ring_size <= bytes_to_pickup) {
head = ringp->s_ring_first;
ringp->s_ring_first = NULL;
ringp->s_ring_last = NULL;
cnt = ringp->s_ring_count;
ringp->s_ring_count = 0;
sz = ringp->s_ring_size;
ringp->s_ring_size = 0;
} else {
while (mp && sz <= bytes_to_pickup) {
sz += msgdsize(mp);
cnt++;
tail = mp;
mp = mp->b_next;
}
ringp->s_ring_count -= cnt;
ringp->s_ring_size -= sz;
tail->b_next = NULL;
if (mp == NULL) {
ringp->s_ring_first = NULL;
ringp->s_ring_last = NULL;
ASSERT(ringp->s_ring_count == 0);
} else {
ringp->s_ring_first = mp;
}
}
mutex_exit(&ringp->s_ring_lock);
mutex_enter(&mac_srs->srs_lock);
MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt);
MAC_UPDATE_SRS_SIZE_LOCKED(mac_srs, sz);
mutex_exit(&mac_srs->srs_lock);
return (head);
}
void
mac_soft_ring_dls_bypass_enable(mac_soft_ring_t *softring,
mac_direct_rx_t rx_func, void *rx_arg1)
{
VERIFY3P(rx_func, !=, NULL);
mutex_enter(&softring->s_ring_lock);
softring->s_ring_rx_func = rx_func;
softring->s_ring_rx_arg1 = rx_arg1;
mutex_exit(&softring->s_ring_lock);
}
void
mac_soft_ring_dls_bypass_disable(mac_soft_ring_t *softring,
mac_client_impl_t *mcip)
{
mutex_enter(&softring->s_ring_lock);
while (softring->s_ring_state & S_RING_PROC) {
softring->s_ring_state |= S_RING_CLIENT_WAIT;
cv_wait(&softring->s_ring_client_cv,
&softring->s_ring_lock);
}
softring->s_ring_state &= ~S_RING_CLIENT_WAIT;
softring->s_ring_rx_func = mac_rx_deliver;
softring->s_ring_rx_arg1 = mcip;
mutex_exit(&softring->s_ring_lock);
}
void
mac_soft_ring_poll_enable(mac_soft_ring_t *sr, mac_direct_rx_t drx,
void *drx_arg, mac_resource_cb_t *rcb, uint32_t pri)
{
mac_rx_fifo_t mrf;
VERIFY((sr->s_ring_state & (ST_RING_TCP | ST_RING_TCP6)) != 0);
VERIFY3P(rcb->mrc_arg, !=, NULL);
VERIFY3P(sr->s_ring_rx_arg2, ==, NULL);
mac_soft_ring_dls_bypass_enable(sr, drx, drx_arg);
bzero(&mrf, sizeof (mrf));
mrf.mrf_type = MAC_RX_FIFO;
mrf.mrf_receive = (mac_receive_t)mac_soft_ring_poll;
mrf.mrf_intr_enable =
(mac_intr_enable_t)mac_soft_ring_intr_enable;
mrf.mrf_intr_disable =
(mac_intr_disable_t)mac_soft_ring_intr_disable;
mrf.mrf_rx_arg = sr;
mrf.mrf_intr_handle = (mac_intr_handle_t)sr;
mrf.mrf_cpu_id = sr->s_ring_cpuid;
mrf.mrf_flow_priority = pri;
sr->s_ring_rx_arg2 = rcb->mrc_add(rcb->mrc_arg,
(mac_resource_t *)&mrf);
}
void
mac_soft_ring_poll_disable(mac_soft_ring_t *sr, mac_resource_cb_t *rcb,
mac_client_impl_t *mcip)
{
VERIFY((sr->s_ring_state & (ST_RING_TCP | ST_RING_TCP6)) != 0);
if (sr->s_ring_rx_arg2 != NULL) {
VERIFY3P(rcb->mrc_arg, !=, NULL);
rcb->mrc_remove(rcb->mrc_arg, sr->s_ring_rx_arg2);
sr->s_ring_rx_arg2 = NULL;
}
mac_soft_ring_dls_bypass_disable(sr, mcip);
}
void
mac_soft_ring_signal(mac_soft_ring_t *softring,
const mac_soft_ring_state_t sr_flag)
{
mutex_enter(&softring->s_ring_lock);
softring->s_ring_state |= sr_flag;
cv_signal(&softring->s_ring_async);
mutex_exit(&softring->s_ring_lock);
}
static void
mac_tx_soft_ring_drain(mac_soft_ring_t *ringp)
{
mblk_t *mp;
void *arg1;
void *arg2;
mblk_t *tail;
uint_t saved_pkt_count, saved_size;
mac_tx_stats_t stats;
mac_soft_ring_set_t *mac_srs = ringp->s_ring_set;
saved_pkt_count = saved_size = 0;
ringp->s_ring_run = curthread;
ASSERT(mutex_owned(&ringp->s_ring_lock));
ASSERT(!(ringp->s_ring_state & S_RING_PROC));
ringp->s_ring_state |= S_RING_PROC;
arg1 = ringp->s_ring_tx_arg1;
arg2 = ringp->s_ring_tx_arg2;
while (ringp->s_ring_first != NULL) {
mp = ringp->s_ring_first;
tail = ringp->s_ring_last;
saved_pkt_count = ringp->s_ring_count;
saved_size = ringp->s_ring_size;
ringp->s_ring_first = NULL;
ringp->s_ring_last = NULL;
ringp->s_ring_count = 0;
ringp->s_ring_size = 0;
mutex_exit(&ringp->s_ring_lock);
mp = mac_tx_send(arg1, arg2, mp, &stats);
mutex_enter(&ringp->s_ring_lock);
if (mp != NULL) {
tail->b_next = ringp->s_ring_first;
ringp->s_ring_first = mp;
ringp->s_ring_count +=
(saved_pkt_count - stats.mts_opackets);
ringp->s_ring_size += (saved_size - stats.mts_obytes);
if (ringp->s_ring_last == NULL)
ringp->s_ring_last = tail;
if (ringp->s_ring_tx_woken_up) {
ringp->s_ring_tx_woken_up = B_FALSE;
} else {
ringp->s_ring_state |= S_RING_BLOCK;
ringp->s_st_stat.mts_blockcnt++;
}
ringp->s_ring_state &= ~S_RING_PROC;
ringp->s_ring_run = NULL;
return;
} else {
ringp->s_ring_tx_woken_up = B_FALSE;
SRS_TX_STATS_UPDATE(mac_srs, &stats);
SOFTRING_TX_STATS_UPDATE(ringp, &stats);
}
}
if (ringp->s_ring_count == 0 && ringp->s_ring_state &
(S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED)) {
mac_client_impl_t *mcip = ringp->s_ring_mcip;
boolean_t wakeup_required = B_FALSE;
if (ringp->s_ring_state &
(S_RING_TX_HIWAT|S_RING_WAKEUP_CLIENT)) {
wakeup_required = B_TRUE;
}
ringp->s_ring_state &=
~(S_RING_TX_HIWAT | S_RING_WAKEUP_CLIENT | S_RING_ENQUEUED);
mutex_exit(&ringp->s_ring_lock);
if (wakeup_required) {
mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)ringp);
mac_tx_notify(mcip->mci_upper_mip != NULL ?
mcip->mci_upper_mip : mcip->mci_mip);
}
mutex_enter(&ringp->s_ring_lock);
}
ringp->s_ring_state &= ~S_RING_PROC;
ringp->s_ring_run = NULL;
}