#include <sys/types.h>
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/cpuvar.h>
#include <sys/cmn_err.h>
#include <inet/common.h>
#include <inet/ip.h>
#include <netinet/ip6.h>
#include <inet/ip_if.h>
#include <inet/ip_ire.h>
#include <inet/nd.h>
#include <inet/ipclassifier.h>
#include <sys/types.h>
#include <sys/conf.h>
#include <sys/sunddi.h>
#include <sys/dlpi.h>
#include <sys/squeue_impl.h>
#include <sys/tihdr.h>
#include <inet/udp_impl.h>
#include <sys/strsubr.h>
#include <sys/zone.h>
#include <sys/dld.h>
#include <sys/atomic.h>
static squeue_set_t **sqset_global_list;
static uint_t sqset_global_size;
kmutex_t sqset_lock;
static void (*ip_squeue_create_callback)(squeue_t *) = NULL;
static squeue_t *ip_squeue_create(pri_t);
static squeue_set_t *ip_squeue_set_create(processorid_t);
static int ip_squeue_cpu_setup(cpu_setup_t, int, void *);
static void ip_squeue_set_move(squeue_t *, squeue_set_t *);
static void ip_squeue_set_destroy(cpu_t *);
static void ip_squeue_clean(void *, mblk_t *, void *);
#define CPU_ISON(c) (c != NULL && CPU_ACTIVE(c) && (c->cpu_flags & CPU_EXISTS))
static squeue_t *
ip_squeue_create(pri_t pri)
{
squeue_t *sqp;
sqp = squeue_create(pri);
ASSERT(sqp != NULL);
if (ip_squeue_create_callback != NULL)
ip_squeue_create_callback(sqp);
return (sqp);
}
static squeue_set_t *
ip_squeue_set_create(processorid_t id)
{
squeue_set_t *sqs;
squeue_set_t *src = sqset_global_list[0];
squeue_t **lastsqp, *sq;
squeue_t **defaultq_lastp = NULL;
sqs = kmem_zalloc(sizeof (squeue_set_t), KM_SLEEP);
sqs->sqs_cpuid = id;
if (id == -1) {
ASSERT(sqset_global_size == 0);
sqset_global_list[0] = sqs;
sqset_global_size = 1;
return (sqs);
}
ASSERT(MUTEX_HELD(&cpu_lock));
mutex_enter(&sqset_lock);
lastsqp = &src->sqs_head;
while (*lastsqp) {
if ((*lastsqp)->sq_bind == id &&
(*lastsqp)->sq_state & SQS_DEFAULT) {
ASSERT(!((*lastsqp)->sq_state & SQS_ILL_BOUND));
defaultq_lastp = lastsqp;
break;
}
if (defaultq_lastp == NULL &&
!((*lastsqp)->sq_state & (SQS_ILL_BOUND | SQS_DEFAULT))) {
defaultq_lastp = lastsqp;
}
lastsqp = &(*lastsqp)->sq_next;
}
if (defaultq_lastp != NULL) {
sq = *defaultq_lastp;
*defaultq_lastp = sq->sq_next;
sq->sq_next = NULL;
if (!(sq->sq_state & SQS_DEFAULT)) {
mutex_enter(&sq->sq_lock);
sq->sq_state |= SQS_DEFAULT;
mutex_exit(&sq->sq_lock);
}
} else {
sq = ip_squeue_create(SQUEUE_DEFAULT_PRIORITY);
sq->sq_state |= SQS_DEFAULT;
}
sq->sq_set = sqs;
sqs->sqs_default = sq;
squeue_bind(sq, id);
ASSERT(sqset_global_size <= NCPU);
sqset_global_list[sqset_global_size++] = sqs;
mutex_exit(&sqset_lock);
return (sqs);
}
squeue_t *
ip_squeue_getfree(pri_t pri)
{
squeue_set_t *sqs = sqset_global_list[0];
squeue_t *sq;
mutex_enter(&sqset_lock);
for (sq = sqs->sqs_head; sq != NULL; sq = sq->sq_next) {
if (!(sq->sq_state & (SQS_DEFAULT | SQS_ILL_BOUND)))
break;
}
if (sq == NULL) {
sq = ip_squeue_create(pri);
sq->sq_set = sqs;
sq->sq_next = sqs->sqs_head;
sqs->sqs_head = sq;
}
ASSERT(!(sq->sq_state & (SQS_POLL_THR_CONTROL | SQS_WORKER_THR_CONTROL |
SQS_POLL_CLEANUP_DONE | SQS_POLL_QUIESCE_DONE |
SQS_POLL_THR_QUIESCED)));
mutex_enter(&sq->sq_lock);
sq->sq_state |= SQS_ILL_BOUND;
mutex_exit(&sq->sq_lock);
mutex_exit(&sqset_lock);
if (sq->sq_priority != pri) {
thread_lock(sq->sq_worker);
(void) thread_change_pri(sq->sq_worker, pri, 0);
thread_unlock(sq->sq_worker);
thread_lock(sq->sq_poll_thr);
(void) thread_change_pri(sq->sq_poll_thr, pri, 0);
thread_unlock(sq->sq_poll_thr);
sq->sq_priority = pri;
}
return (sq);
}
void
ip_squeue_init(void (*callback)(squeue_t *))
{
int i;
squeue_set_t *sqs;
ASSERT(sqset_global_list == NULL);
ip_squeue_create_callback = callback;
squeue_init();
mutex_init(&sqset_lock, NULL, MUTEX_DEFAULT, NULL);
sqset_global_list =
kmem_zalloc(sizeof (squeue_set_t *) * (NCPU+1), KM_SLEEP);
sqset_global_size = 0;
sqs = ip_squeue_set_create(-1);
ASSERT(sqs != NULL);
mutex_enter(&cpu_lock);
for (i = 0; i < NCPU; i++) {
cpu_t *cp = cpu_get(i);
if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) {
cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
ASSERT(cp->cpu_squeue_set != NULL);
}
}
register_cpu_setup_func(ip_squeue_cpu_setup, NULL);
mutex_exit(&cpu_lock);
}
squeue_t *
ip_squeue_random(uint_t index)
{
squeue_set_t *sqs = NULL;
squeue_t *sq;
ASSERT(sqset_global_size > 1);
mutex_enter(&sqset_lock);
if (!ip_squeue_fanout)
sqs = CPU->cpu_squeue_set;
if (sqs == NULL)
sqs = sqset_global_list[(index % (sqset_global_size - 1)) + 1];
sq = sqs->sqs_default;
mutex_exit(&sqset_lock);
ASSERT(sq);
return (sq);
}
static void
ip_squeue_set_move(squeue_t *sq, squeue_set_t *newset)
{
squeue_set_t *set;
squeue_t **lastsqp;
processorid_t cpuid = newset->sqs_cpuid;
ASSERT(!(sq->sq_state & SQS_DEFAULT));
ASSERT(!MUTEX_HELD(&sq->sq_lock));
ASSERT(MUTEX_HELD(&sqset_lock));
set = sq->sq_set;
if (set == newset)
return;
lastsqp = &set->sqs_head;
while (*lastsqp != sq)
lastsqp = &(*lastsqp)->sq_next;
*lastsqp = sq->sq_next;
sq->sq_next = newset->sqs_head;
newset->sqs_head = sq;
sq->sq_set = newset;
if (cpuid == -1)
squeue_unbind(sq);
else
squeue_bind(sq, cpuid);
}
int
ip_squeue_cpu_move(squeue_t *sq, processorid_t cpuid)
{
cpu_t *cpu;
squeue_set_t *set;
if (sq->sq_state & SQS_DEFAULT)
return (-1);
ASSERT(MUTEX_HELD(&cpu_lock));
cpu = cpu_get(cpuid);
if (!CPU_ISON(cpu))
return (-1);
mutex_enter(&sqset_lock);
set = cpu->cpu_squeue_set;
if (set != NULL)
ip_squeue_set_move(sq, set);
mutex_exit(&sqset_lock);
return ((set == NULL) ? -1 : 0);
}
void
ip_squeue_bind_ring(ill_t *ill, ill_rx_ring_t *rx_ring, processorid_t cpuid)
{
ASSERT(ILL_MAC_PERIM_HELD(ill));
ASSERT(rx_ring->rr_ill == ill);
mutex_enter(&ill->ill_lock);
if (rx_ring->rr_ring_state == RR_FREE ||
rx_ring->rr_ring_state == RR_FREE_INPROG) {
mutex_exit(&ill->ill_lock);
return;
}
if (ip_squeue_cpu_move(rx_ring->rr_sqp, cpuid) != -1)
rx_ring->rr_ring_state = RR_SQUEUE_BOUND;
mutex_exit(&ill->ill_lock);
}
void *
ip_squeue_add_ring(ill_t *ill, void *mrp)
{
mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp;
ill_rx_ring_t *rx_ring, *ring_tbl;
int ip_rx_index;
squeue_t *sq = NULL;
pri_t pri;
ASSERT(ILL_MAC_PERIM_HELD(ill));
ASSERT(mrfp->mrf_type == MAC_RX_FIFO);
ASSERT(ill->ill_dld_capab != NULL);
ring_tbl = ill->ill_dld_capab->idc_poll.idp_ring_tbl;
mutex_enter(&ill->ill_lock);
for (ip_rx_index = 0; ip_rx_index < ILL_MAX_RINGS; ip_rx_index++) {
rx_ring = &ring_tbl[ip_rx_index];
if (rx_ring->rr_ring_state == RR_FREE)
break;
}
if (ip_rx_index == ILL_MAX_RINGS) {
cmn_err(CE_NOTE,
"Reached maximum number of receiving rings (%d) for %s\n",
ILL_MAX_RINGS, ill->ill_name);
mutex_exit(&ill->ill_lock);
return (NULL);
}
bzero(rx_ring, sizeof (ill_rx_ring_t));
rx_ring->rr_rx = mrfp->mrf_receive;
rx_ring->rr_ip_accept = (ill->ill_isv6 != 0) ?
(ip_accept_t)ip_accept_tcp_v6 :
(ip_accept_t)ip_accept_tcp;
rx_ring->rr_intr_handle = mrfp->mrf_intr_handle;
rx_ring->rr_intr_enable = (ip_mac_intr_enable_t)mrfp->mrf_intr_enable;
rx_ring->rr_intr_disable =
(ip_mac_intr_disable_t)mrfp->mrf_intr_disable;
rx_ring->rr_rx_handle = mrfp->mrf_rx_arg;
rx_ring->rr_ill = ill;
pri = mrfp->mrf_flow_priority;
sq = ip_squeue_getfree(pri);
mutex_enter(&sq->sq_lock);
sq->sq_rx_ring = rx_ring;
rx_ring->rr_sqp = sq;
sq->sq_state |= SQS_POLL_CAPAB;
rx_ring->rr_ring_state = RR_SQUEUE_UNBOUND;
sq->sq_ill = ill;
mutex_exit(&sq->sq_lock);
mutex_exit(&ill->ill_lock);
DTRACE_PROBE4(ill__ring__add, char *, ill->ill_name, ill_t *, ill, int,
ip_rx_index, void *, mrfp->mrf_rx_arg);
mutex_enter(&cpu_lock);
(void) ip_squeue_bind_ring(ill, rx_ring, mrfp->mrf_cpu_id);
mutex_exit(&cpu_lock);
return (rx_ring);
}
void
ip_squeue_clean_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
{
squeue_t *sqp;
ASSERT(ILL_MAC_PERIM_HELD(ill));
ASSERT(rx_ring != NULL);
mutex_enter(&ill->ill_lock);
if (rx_ring->rr_ring_state == RR_FREE) {
mutex_exit(&ill->ill_lock);
return;
}
rx_ring->rr_ring_state = RR_FREE_INPROG;
sqp = rx_ring->rr_sqp;
mutex_enter(&sqp->sq_lock);
sqp->sq_state |= SQS_POLL_CLEANUP;
cv_signal(&sqp->sq_worker_cv);
mutex_exit(&ill->ill_lock);
while (!(sqp->sq_state & SQS_POLL_CLEANUP_DONE))
cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
sqp->sq_state &= ~SQS_POLL_CLEANUP_DONE;
ASSERT(!(sqp->sq_state & (SQS_POLL_THR_CONTROL |
SQS_WORKER_THR_CONTROL | SQS_POLL_QUIESCE_DONE |
SQS_POLL_THR_QUIESCED)));
cv_signal(&sqp->sq_worker_cv);
mutex_exit(&sqp->sq_lock);
mutex_enter(&sqset_lock);
ip_squeue_set_move(sqp, sqset_global_list[0]);
mutex_exit(&sqset_lock);
mutex_enter(&sqp->sq_lock);
sqp->sq_state &= ~SQS_ILL_BOUND;
mutex_exit(&sqp->sq_lock);
mutex_enter(&ill->ill_lock);
rx_ring->rr_ring_state = RR_FREE;
mutex_exit(&ill->ill_lock);
}
void
ip_squeue_quiesce_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
{
squeue_t *sqp;
ASSERT(ILL_MAC_PERIM_HELD(ill));
ASSERT(rx_ring != NULL);
sqp = rx_ring->rr_sqp;
mutex_enter(&sqp->sq_lock);
sqp->sq_state |= SQS_POLL_QUIESCE;
cv_signal(&sqp->sq_worker_cv);
while (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE))
cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
mutex_exit(&sqp->sq_lock);
}
void
ip_squeue_restart_ring(ill_t *ill, ill_rx_ring_t *rx_ring)
{
squeue_t *sqp;
ASSERT(ILL_MAC_PERIM_HELD(ill));
ASSERT(rx_ring != NULL);
sqp = rx_ring->rr_sqp;
mutex_enter(&sqp->sq_lock);
if (!(sqp->sq_state & SQS_POLL_QUIESCE_DONE)) {
mutex_exit(&sqp->sq_lock);
return;
}
sqp->sq_state |= SQS_POLL_RESTART;
cv_signal(&sqp->sq_worker_cv);
while (!(sqp->sq_state & SQS_POLL_RESTART_DONE))
cv_wait(&sqp->sq_ctrlop_done_cv, &sqp->sq_lock);
sqp->sq_state &= ~SQS_POLL_RESTART_DONE;
mutex_exit(&sqp->sq_lock);
}
void
ip_squeue_clean_all(ill_t *ill)
{
int idx;
ill_rx_ring_t *rx_ring;
for (idx = 0; idx < ILL_MAX_RINGS; idx++) {
rx_ring = &ill->ill_dld_capab->idc_poll.idp_ring_tbl[idx];
ip_squeue_clean_ring(ill, rx_ring);
}
}
squeue_t *
ip_squeue_get(ill_rx_ring_t *ill_rx_ring)
{
squeue_t *sqp;
if ((ill_rx_ring == NULL) || ((sqp = ill_rx_ring->rr_sqp) == NULL))
return (IP_SQUEUE_GET(CPU_PSEUDO_RANDOM()));
return (sqp);
}
static void
ip_squeue_set_destroy(cpu_t *cpu)
{
int i;
squeue_t *sqp, *lastsqp = NULL;
squeue_set_t *sqs, *unbound = sqset_global_list[0];
mutex_enter(&sqset_lock);
if ((sqs = cpu->cpu_squeue_set) == NULL) {
mutex_exit(&sqset_lock);
return;
}
for (sqp = sqs->sqs_head; sqp; lastsqp = sqp, sqp = sqp->sq_next) {
squeue_unbind(sqp);
sqp->sq_set = unbound;
}
if (sqs->sqs_head) {
lastsqp->sq_next = unbound->sqs_head;
unbound->sqs_head = sqs->sqs_head;
}
sqp = sqs->sqs_default;
ASSERT(sqp != NULL);
ASSERT((sqp->sq_state & (SQS_DEFAULT|SQS_ILL_BOUND)) == SQS_DEFAULT);
sqp->sq_next = unbound->sqs_head;
unbound->sqs_head = sqp;
squeue_unbind(sqp);
sqp->sq_set = unbound;
for (i = 1; i < sqset_global_size; i++)
if (sqset_global_list[i] == sqs)
break;
ASSERT(i < sqset_global_size);
sqset_global_list[i] = sqset_global_list[sqset_global_size - 1];
sqset_global_list[sqset_global_size - 1] = NULL;
sqset_global_size--;
mutex_exit(&sqset_lock);
kmem_free(sqs, sizeof (*sqs));
}
static int
ip_squeue_cpu_setup(cpu_setup_t what, int id, void *arg)
{
cpu_t *cp = cpu_get(id);
ASSERT(MUTEX_HELD(&cpu_lock));
switch (what) {
case CPU_CONFIG:
case CPU_ON:
case CPU_INIT:
case CPU_CPUPART_IN:
if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL)
cp->cpu_squeue_set = ip_squeue_set_create(cp->cpu_id);
break;
case CPU_UNCONFIG:
case CPU_OFF:
case CPU_CPUPART_OUT:
if (cp->cpu_squeue_set != NULL) {
ip_squeue_set_destroy(cp);
cp->cpu_squeue_set = NULL;
}
break;
default:
break;
}
return (0);
}