#include <sys/cdefs.h>
#include "opt_device_polling.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/proc.h>
#include <sys/epoch.h>
#include <sys/eventhandler.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/netisr.h>
#include <net/vnet.h>
void hardclock_device_poll(void);
static struct mtx poll_mtx;
#define MIN_POLL_BURST_MAX 10
#define MAX_POLL_BURST_MAX 20000
static uint32_t poll_burst = 5;
static uint32_t poll_burst_max = 150;
static uint32_t poll_each_burst = 5;
static SYSCTL_NODE(_kern, OID_AUTO, polling, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"Device polling parameters");
SYSCTL_UINT(_kern_polling, OID_AUTO, burst, CTLFLAG_RD,
&poll_burst, 0, "Current polling burst size");
static int netisr_poll_scheduled;
static int netisr_pollmore_scheduled;
static int poll_shutting_down;
static int poll_burst_max_sysctl(SYSCTL_HANDLER_ARGS)
{
uint32_t val = poll_burst_max;
int error;
error = sysctl_handle_int(oidp, &val, 0, req);
if (error || !req->newptr )
return (error);
if (val < MIN_POLL_BURST_MAX || val > MAX_POLL_BURST_MAX)
return (EINVAL);
mtx_lock(&poll_mtx);
poll_burst_max = val;
if (poll_burst > poll_burst_max)
poll_burst = poll_burst_max;
if (poll_each_burst > poll_burst_max)
poll_each_burst = MIN_POLL_BURST_MAX;
mtx_unlock(&poll_mtx);
return (0);
}
SYSCTL_PROC(_kern_polling, OID_AUTO, burst_max,
CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(uint32_t),
poll_burst_max_sysctl, "I",
"Max Polling burst size");
static int poll_each_burst_sysctl(SYSCTL_HANDLER_ARGS)
{
uint32_t val = poll_each_burst;
int error;
error = sysctl_handle_int(oidp, &val, 0, req);
if (error || !req->newptr )
return (error);
if (val < 1)
return (EINVAL);
mtx_lock(&poll_mtx);
if (val > poll_burst_max) {
mtx_unlock(&poll_mtx);
return (EINVAL);
}
poll_each_burst = val;
mtx_unlock(&poll_mtx);
return (0);
}
SYSCTL_PROC(_kern_polling, OID_AUTO, each_burst,
CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(uint32_t),
poll_each_burst_sysctl, "I",
"Max size of each burst");
static uint32_t poll_in_idle_loop=0;
SYSCTL_UINT(_kern_polling, OID_AUTO, idle_poll, CTLFLAG_RW,
&poll_in_idle_loop, 0, "Enable device polling in idle loop");
static uint32_t user_frac = 50;
static int user_frac_sysctl(SYSCTL_HANDLER_ARGS)
{
uint32_t val = user_frac;
int error;
error = sysctl_handle_int(oidp, &val, 0, req);
if (error || !req->newptr )
return (error);
if (val > 99)
return (EINVAL);
mtx_lock(&poll_mtx);
user_frac = val;
mtx_unlock(&poll_mtx);
return (0);
}
SYSCTL_PROC(_kern_polling, OID_AUTO, user_frac,
CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(uint32_t),
user_frac_sysctl, "I",
"Desired user fraction of cpu time");
static uint32_t reg_frac_count = 0;
static uint32_t reg_frac = 20 ;
static int reg_frac_sysctl(SYSCTL_HANDLER_ARGS)
{
uint32_t val = reg_frac;
int error;
error = sysctl_handle_int(oidp, &val, 0, req);
if (error || !req->newptr )
return (error);
if (val < 1 || val > hz)
return (EINVAL);
mtx_lock(&poll_mtx);
reg_frac = val;
if (reg_frac_count >= reg_frac)
reg_frac_count = 0;
mtx_unlock(&poll_mtx);
return (0);
}
SYSCTL_PROC(_kern_polling, OID_AUTO, reg_frac,
CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(uint32_t),
reg_frac_sysctl, "I",
"Every this many cycles check registers");
static uint32_t short_ticks;
SYSCTL_UINT(_kern_polling, OID_AUTO, short_ticks, CTLFLAG_RD,
&short_ticks, 0, "Hardclock ticks shorter than they should be");
static uint32_t lost_polls;
SYSCTL_UINT(_kern_polling, OID_AUTO, lost_polls, CTLFLAG_RD,
&lost_polls, 0, "How many times we would have lost a poll tick");
static uint32_t pending_polls;
SYSCTL_UINT(_kern_polling, OID_AUTO, pending_polls, CTLFLAG_RD,
&pending_polls, 0, "Do we need to poll again");
static int residual_burst = 0;
SYSCTL_INT(_kern_polling, OID_AUTO, residual_burst, CTLFLAG_RD,
&residual_burst, 0, "# of residual cycles in burst");
static uint32_t poll_handlers;
SYSCTL_UINT(_kern_polling, OID_AUTO, handlers, CTLFLAG_RD,
&poll_handlers, 0, "Number of registered poll handlers");
static uint32_t phase;
SYSCTL_UINT(_kern_polling, OID_AUTO, phase, CTLFLAG_RD,
&phase, 0, "Polling phase");
static uint32_t suspect;
SYSCTL_UINT(_kern_polling, OID_AUTO, suspect, CTLFLAG_RD,
&suspect, 0, "suspect event");
static uint32_t stalled;
SYSCTL_UINT(_kern_polling, OID_AUTO, stalled, CTLFLAG_RD,
&stalled, 0, "potential stalls");
static uint32_t idlepoll_sleeping;
SYSCTL_UINT(_kern_polling, OID_AUTO, idlepoll_sleeping, CTLFLAG_RD,
&idlepoll_sleeping, 0, "idlepoll is sleeping");
#define POLL_LIST_LEN 128
struct pollrec {
poll_handler_t *handler;
struct ifnet *ifp;
};
static struct pollrec pr[POLL_LIST_LEN];
static void
poll_shutdown(void *arg, int howto)
{
poll_shutting_down = 1;
}
static void
init_device_poll(void)
{
mtx_init(&poll_mtx, "polling", NULL, MTX_DEF);
EVENTHANDLER_REGISTER(shutdown_post_sync, poll_shutdown, NULL,
SHUTDOWN_PRI_LAST);
}
SYSINIT(device_poll, SI_SUB_SOFTINTR, SI_ORDER_MIDDLE, init_device_poll, NULL);
void
hardclock_device_poll(void)
{
static struct timeval prev_t, t;
int delta;
if (poll_handlers == 0 || poll_shutting_down)
return;
microuptime(&t);
delta = (t.tv_usec - prev_t.tv_usec) +
(t.tv_sec - prev_t.tv_sec)*1000000;
if (delta * hz < 500000)
short_ticks++;
else
prev_t = t;
if (pending_polls > 100) {
stalled++;
pending_polls = 0;
phase = 0;
}
if (phase <= 2) {
if (phase != 0)
suspect++;
phase = 1;
netisr_poll_scheduled = 1;
netisr_pollmore_scheduled = 1;
netisr_sched_poll();
phase = 2;
}
if (pending_polls++ > 0)
lost_polls++;
}
static void
ether_poll(int count)
{
struct epoch_tracker et;
int i;
mtx_lock(&poll_mtx);
if (count > poll_each_burst)
count = poll_each_burst;
NET_EPOCH_ENTER(et);
for (i = 0 ; i < poll_handlers ; i++)
pr[i].handler(pr[i].ifp, POLL_ONLY, count);
NET_EPOCH_EXIT(et);
mtx_unlock(&poll_mtx);
}
static struct timeval poll_start_t;
void
netisr_pollmore(void)
{
struct timeval t;
int kern_load;
if (poll_handlers == 0)
return;
mtx_lock(&poll_mtx);
if (!netisr_pollmore_scheduled) {
mtx_unlock(&poll_mtx);
return;
}
netisr_pollmore_scheduled = 0;
phase = 5;
if (residual_burst > 0) {
netisr_poll_scheduled = 1;
netisr_pollmore_scheduled = 1;
netisr_sched_poll();
mtx_unlock(&poll_mtx);
return;
}
microuptime(&t);
kern_load = (t.tv_usec - poll_start_t.tv_usec) +
(t.tv_sec - poll_start_t.tv_sec)*1000000;
kern_load = (kern_load * hz) / 10000;
if (kern_load > (100 - user_frac)) {
if (poll_burst > 1)
poll_burst--;
} else {
if (poll_burst < poll_burst_max)
poll_burst++;
}
pending_polls--;
if (pending_polls == 0)
phase = 0;
else {
poll_burst -= (poll_burst / 8);
if (poll_burst < 1)
poll_burst = 1;
netisr_poll_scheduled = 1;
netisr_pollmore_scheduled = 1;
netisr_sched_poll();
phase = 6;
}
mtx_unlock(&poll_mtx);
}
void
netisr_poll(void)
{
int i, cycles;
enum poll_cmd arg = POLL_ONLY;
NET_EPOCH_ASSERT();
if (poll_handlers == 0)
return;
mtx_lock(&poll_mtx);
if (!netisr_poll_scheduled) {
mtx_unlock(&poll_mtx);
return;
}
netisr_poll_scheduled = 0;
phase = 3;
if (residual_burst == 0) {
microuptime(&poll_start_t);
if (++reg_frac_count == reg_frac) {
arg = POLL_AND_CHECK_STATUS;
reg_frac_count = 0;
}
residual_burst = poll_burst;
}
cycles = (residual_burst < poll_each_burst) ?
residual_burst : poll_each_burst;
residual_burst -= cycles;
for (i = 0 ; i < poll_handlers ; i++)
pr[i].handler(pr[i].ifp, arg, cycles);
phase = 4;
mtx_unlock(&poll_mtx);
}
int
ether_poll_register(poll_handler_t *h, if_t ifp)
{
int i;
KASSERT(h != NULL, ("%s: handler is NULL", __func__));
KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__));
mtx_lock(&poll_mtx);
if (poll_handlers >= POLL_LIST_LEN) {
static int verbose = 10 ;
if (verbose >0) {
log(LOG_ERR, "poll handlers list full, "
"maybe a broken driver ?\n");
verbose--;
}
mtx_unlock(&poll_mtx);
return (ENOMEM);
}
for (i = 0 ; i < poll_handlers ; i++)
if (pr[i].ifp == ifp && pr[i].handler != NULL) {
mtx_unlock(&poll_mtx);
log(LOG_DEBUG, "ether_poll_register: %s: handler"
" already registered\n", if_name(ifp));
return (EEXIST);
}
pr[poll_handlers].handler = h;
pr[poll_handlers].ifp = ifp;
poll_handlers++;
mtx_unlock(&poll_mtx);
if (idlepoll_sleeping)
wakeup(&idlepoll_sleeping);
return (0);
}
int
ether_poll_deregister(if_t ifp)
{
int i;
KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__));
mtx_lock(&poll_mtx);
for (i = 0 ; i < poll_handlers ; i++)
if (pr[i].ifp == ifp)
break;
if (i == poll_handlers) {
log(LOG_DEBUG, "ether_poll_deregister: %s: not found!\n",
if_name(ifp));
mtx_unlock(&poll_mtx);
return (ENOENT);
}
poll_handlers--;
if (i < poll_handlers) {
pr[i].handler = pr[poll_handlers].handler;
pr[i].ifp = pr[poll_handlers].ifp;
}
mtx_unlock(&poll_mtx);
return (0);
}
static void
poll_idle(void)
{
struct thread *td = curthread;
struct rtprio rtp;
rtp.prio = RTP_PRIO_MAX;
rtp.type = RTP_PRIO_IDLE;
PROC_SLOCK(td->td_proc);
rtp_to_pri(&rtp, td);
PROC_SUNLOCK(td->td_proc);
for (;;) {
if (poll_in_idle_loop && poll_handlers > 0) {
idlepoll_sleeping = 0;
ether_poll(poll_each_burst);
sched_relinquish(td);
} else {
idlepoll_sleeping = 1;
tsleep(&idlepoll_sleeping, 0, "pollid", hz * 3);
}
}
}
static struct proc *idlepoll;
static struct kproc_desc idlepoll_kp = {
"idlepoll",
poll_idle,
&idlepoll
};
SYSINIT(idlepoll, SI_SUB_KTHREAD_VM, SI_ORDER_ANY, kproc_start,
&idlepoll_kp);