#include "opt_hwpmc_hooks.h"
#include "opt_hwt_hooks.h"
#include "opt_sched.h"
#include <sys/systm.h>
#include <sys/cpuset.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/kthread.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/runq.h>
#include <sys/sched.h>
#include <sys/sdt.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/sx.h>
#include <sys/turnstile.h>
#include <sys/umtxvar.h>
#include <machine/pcb.h>
#include <machine/smp.h>
#ifdef HWPMC_HOOKS
#include <sys/pmckern.h>
#endif
#ifdef HWT_HOOKS
#include <dev/hwt/hwt_hook.h>
#endif
#ifdef SMP
#define INVERSE_ESTCPU_WEIGHT (8 * smp_cpus)
#else
#define INVERSE_ESTCPU_WEIGHT 8
#endif
#define NICE_WEIGHT 1
#define ESTCPULIM(e) \
min((e), INVERSE_ESTCPU_WEIGHT * \
(NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) + \
PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE) \
+ INVERSE_ESTCPU_WEIGHT - 1)
#define TS_NAME_LEN (MAXCOMLEN + sizeof(" td ") + sizeof(__XSTRING(UINT_MAX)))
struct td_sched {
fixpt_t ts_pctcpu;
u_int ts_estcpu;
int ts_cpticks;
int ts_slptime;
int ts_slice;
int ts_flags;
struct runq *ts_runq;
#ifdef KTR
char ts_name[TS_NAME_LEN];
#endif
};
#define TDF_DIDRUN TDF_SCHED0
#define TDF_BOUND TDF_SCHED1
#define TDF_SLICEEND TDF_SCHED2
#define TDP_RESCHED TDP_SCHED1
#define TSF_AFFINITY 0x0001
#define SKE_RUNQ_PCPU(ts) \
((ts)->ts_runq != 0 && (ts)->ts_runq != &runq)
#define THREAD_CAN_SCHED(td, cpu) \
CPU_ISSET((cpu), &(td)->td_cpuset->cs_mask)
_Static_assert(sizeof(struct thread) + sizeof(struct td_sched) <=
sizeof(struct thread0_storage),
"increase struct thread0_storage.t0st_sched size");
static struct mtx sched_lock;
static int realstathz = 127;
static int sched_tdcnt;
static int sched_slice = 12;
static void setup_runqs(void);
static void schedcpu(void);
static void schedcpu_thread(void);
static void sched_priority(struct thread *td, u_char prio);
static void maybe_resched(struct thread *td);
static void updatepri(struct thread *td);
static void resetpriority(struct thread *td);
static void resetpriority_thread(struct thread *td);
#ifdef SMP
static int sched_pickcpu(struct thread *td);
static int forward_wakeup(int cpunum);
static void kick_other_cpu(int pri, int cpuid);
#endif
static struct kproc_desc sched_kp = {
"schedcpu",
schedcpu_thread,
NULL
};
static void
sched_4bsd_schedcpu(void)
{
kproc_start(&sched_kp);
}
static struct runq runq;
#ifdef SMP
static struct runq runq_pcpu[MAXCPU];
long runq_length[MAXCPU];
static cpuset_t idle_cpus_mask;
#endif
struct pcpuidlestat {
u_int idlecalls;
u_int oldidlecalls;
};
DPCPU_DEFINE_STATIC(struct pcpuidlestat, idlestat);
static void
setup_runqs(void)
{
#ifdef SMP
int i;
for (i = 0; i < MAXCPU; ++i)
runq_init(&runq_pcpu[i]);
#endif
runq_init(&runq);
}
static int
sysctl_kern_4bsd_quantum(SYSCTL_HANDLER_ARGS)
{
int error, new_val, period;
period = 1000000 / realstathz;
new_val = period * sched_slice;
error = sysctl_handle_int(oidp, &new_val, 0, req);
if (error != 0 || req->newptr == NULL)
return (error);
if (new_val <= 0)
return (EINVAL);
sched_slice = imax(1, (new_val + period / 2) / period);
hogticks = imax(1, (2 * hz * sched_slice + realstathz / 2) /
realstathz);
return (0);
}
SYSCTL_NODE(_kern_sched, OID_AUTO, 4bsd, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"4BSD Scheduler");
SYSCTL_PROC(_kern_sched_4bsd, OID_AUTO, quantum,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
sysctl_kern_4bsd_quantum, "I",
"Quantum for timeshare threads in microseconds");
SYSCTL_INT(_kern_sched_4bsd, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0,
"Quantum for timeshare threads in stathz ticks");
#ifdef SMP
static SYSCTL_NODE(_kern_sched_4bsd, OID_AUTO, ipiwakeup,
CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
"Kernel SMP");
static int runq_fuzz = 1;
SYSCTL_INT(_kern_sched_4bsd, OID_AUTO, runq_fuzz, CTLFLAG_RW,
&runq_fuzz, 0, "");
static int forward_wakeup_enabled = 1;
SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW,
&forward_wakeup_enabled, 0,
"Forwarding of wakeup to idle CPUs");
static int forward_wakeups_requested = 0;
SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD,
&forward_wakeups_requested, 0,
"Requests for Forwarding of wakeup to idle CPUs");
static int forward_wakeups_delivered = 0;
SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD,
&forward_wakeups_delivered, 0,
"Completed Forwarding of wakeup to idle CPUs");
static int forward_wakeup_use_mask = 1;
SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW,
&forward_wakeup_use_mask, 0,
"Use the mask of idle cpus");
static int forward_wakeup_use_loop = 0;
SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW,
&forward_wakeup_use_loop, 0,
"Use a loop to find idle cpus");
#endif
#if 0
static int sched_followon = 0;
SYSCTL_INT(_kern_sched_4bsd, OID_AUTO, followon, CTLFLAG_RW,
&sched_followon, 0,
"allow threads to share a quantum");
#endif
static __inline void
sched_load_add(void)
{
sched_tdcnt++;
KTR_COUNTER0(KTR_SCHED, "load", "global load", sched_tdcnt);
SDT_PROBE2(sched, , , load__change, NOCPU, sched_tdcnt);
}
static __inline void
sched_load_rem(void)
{
sched_tdcnt--;
KTR_COUNTER0(KTR_SCHED, "load", "global load", sched_tdcnt);
SDT_PROBE2(sched, , , load__change, NOCPU, sched_tdcnt);
}
static void
maybe_resched_ast(struct thread *td, int tda)
{
MPASS(td == curthread);
if ((td->td_pflags & TDP_RESCHED) != 0) {
td->td_pflags &= ~TDP_RESCHED;
ast_scheduler(td, tda);
}
}
static void
maybe_resched(struct thread *td)
{
struct thread *ctd;
ctd = curthread;
THREAD_LOCK_ASSERT(td, MA_OWNED);
if (td->td_priority < ctd->td_priority)
ctd->td_pflags |= TDP_RESCHED;
}
static int
maybe_preempt(struct thread *td)
{
#ifdef PREEMPTION
struct thread *ctd;
int cpri, pri;
ctd = curthread;
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT((td->td_inhibitors == 0),
("maybe_preempt: trying to run inhibited thread"));
pri = td->td_priority;
cpri = ctd->td_priority;
if (KERNEL_PANICKED() || pri >= cpri ||
TD_IS_INHIBITED(ctd))
return (0);
#ifndef FULL_PREEMPTION
if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE)
return (0);
#endif
CTR0(KTR_PROC, "maybe_preempt: scheduling preemption");
ctd->td_owepreempt = 1;
return (1);
#else
return (0);
#endif
}
#define loadfactor(loadav) (2 * (loadav))
#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
extern fixpt_t ccpu;
#define CCPU_SHIFT 11
static void
schedcpu(void)
{
fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
struct thread *td;
struct proc *p;
struct td_sched *ts;
int awake;
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
PROC_LOCK(p);
if (p->p_state == PRS_NEW) {
PROC_UNLOCK(p);
continue;
}
FOREACH_THREAD_IN_PROC(p, td) {
awake = 0;
ts = td_get_sched(td);
thread_lock(td);
if (TD_ON_RUNQ(td)) {
awake = 1;
td->td_flags &= ~TDF_DIDRUN;
} else if (TD_IS_RUNNING(td)) {
awake = 1;
} else if (td->td_flags & TDF_DIDRUN) {
awake = 1;
td->td_flags &= ~TDF_DIDRUN;
}
ts->ts_pctcpu = (ts->ts_pctcpu * ccpu) >> FSHIFT;
if (ts->ts_cpticks != 0) {
#if (FSHIFT >= CCPU_SHIFT)
ts->ts_pctcpu += (realstathz == 100)
? ((fixpt_t) ts->ts_cpticks) <<
(FSHIFT - CCPU_SHIFT) :
100 * (((fixpt_t) ts->ts_cpticks)
<< (FSHIFT - CCPU_SHIFT)) / realstathz;
#else
ts->ts_pctcpu += ((FSCALE - ccpu) *
(ts->ts_cpticks *
FSCALE / realstathz)) >> FSHIFT;
#endif
ts->ts_cpticks = 0;
}
if (awake) {
if (ts->ts_slptime > 1) {
updatepri(td);
}
ts->ts_slptime = 0;
} else
ts->ts_slptime++;
if (ts->ts_slptime > 1) {
thread_unlock(td);
continue;
}
ts->ts_estcpu = decay_cpu(loadfac, ts->ts_estcpu);
resetpriority(td);
resetpriority_thread(td);
thread_unlock(td);
}
PROC_UNLOCK(p);
}
sx_sunlock(&allproc_lock);
}
static void
schedcpu_thread(void)
{
for (;;) {
schedcpu();
pause("-", hz);
}
}
static void
updatepri(struct thread *td)
{
struct td_sched *ts;
fixpt_t loadfac;
unsigned int newcpu;
ts = td_get_sched(td);
loadfac = loadfactor(averunnable.ldavg[0]);
if (ts->ts_slptime > 5 * loadfac)
ts->ts_estcpu = 0;
else {
newcpu = ts->ts_estcpu;
ts->ts_slptime--;
while (newcpu && --ts->ts_slptime)
newcpu = decay_cpu(loadfac, newcpu);
ts->ts_estcpu = newcpu;
}
}
static void
resetpriority(struct thread *td)
{
u_int newpriority;
if (td->td_pri_class != PRI_TIMESHARE)
return;
newpriority = PUSER +
td_get_sched(td)->ts_estcpu / INVERSE_ESTCPU_WEIGHT +
NICE_WEIGHT * (td->td_proc->p_nice - PRIO_MIN);
newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
PRI_MAX_TIMESHARE);
sched_user_prio(td, newpriority);
}
static void
resetpriority_thread(struct thread *td)
{
if (td->td_priority < PRI_MIN_TIMESHARE ||
td->td_priority > PRI_MAX_TIMESHARE)
return;
maybe_resched(td);
sched_prio(td, td->td_user_pri);
}
static void
sched_4bsd_setup(void)
{
ccpu = 0.95122942450071400909 * FSCALE;
setup_runqs();
sched_load_add();
ast_register(TDA_SCHED_PRIV, ASTR_UNCOND, 0, maybe_resched_ast);
}
static void
sched_4bsd_initticks(void)
{
realstathz = stathz ? stathz : hz;
sched_slice = realstathz / 10;
hogticks = imax(1, (2 * hz * sched_slice + realstathz / 2) /
realstathz);
}
static void
sched_4bsd_init(void)
{
thread0.td_lock = &sched_lock;
td_get_sched(&thread0)->ts_slice = sched_slice;
mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN);
}
static void
sched_4bsd_init_ap(void)
{
}
static bool
sched_4bsd_runnable(void)
{
#ifdef SMP
return (runq_not_empty(&runq) ||
runq_not_empty(&runq_pcpu[PCPU_GET(cpuid)]));
#else
return (runq_not_empty(&runq));
#endif
}
static int
sched_4bsd_rr_interval(void)
{
return (imax(1, (sched_slice * hz + realstathz / 2) / realstathz));
}
static void
sched_clock_tick(struct thread *td)
{
struct pcpuidlestat *stat;
struct td_sched *ts;
THREAD_LOCK_ASSERT(td, MA_OWNED);
ts = td_get_sched(td);
ts->ts_cpticks++;
ts->ts_estcpu = ESTCPULIM(ts->ts_estcpu + 1);
if ((ts->ts_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
resetpriority(td);
resetpriority_thread(td);
}
if (!TD_IS_IDLETHREAD(td) && --ts->ts_slice <= 0) {
ts->ts_slice = sched_slice;
if (PRI_BASE(td->td_pri_class) == PRI_ITHD) {
SCHED_STAT_INC(ithread_preemptions);
td->td_owepreempt = 1;
if (td->td_base_pri + RQ_PPQ < PRI_MAX_ITHD) {
SCHED_STAT_INC(ithread_demotions);
sched_prio(td, td->td_base_pri + RQ_PPQ);
}
} else {
td->td_flags |= TDF_SLICEEND;
ast_sched_locked(td, TDA_SCHED);
}
}
stat = DPCPU_PTR(idlestat);
stat->oldidlecalls = stat->idlecalls;
stat->idlecalls = 0;
}
static void
sched_4bsd_clock(struct thread *td, int cnt)
{
for ( ; cnt > 0; cnt--)
sched_clock_tick(td);
}
static void
sched_4bsd_exit(struct proc *p, struct thread *td)
{
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "proc exit",
"prio:%d", td->td_priority);
PROC_LOCK_ASSERT(p, MA_OWNED);
sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
}
static void
sched_4bsd_exit_thread(struct thread *td, struct thread *child)
{
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(child), "exit",
"prio:%d", child->td_priority);
thread_lock(td);
td_get_sched(td)->ts_estcpu = ESTCPULIM(td_get_sched(td)->ts_estcpu +
td_get_sched(child)->ts_estcpu);
thread_unlock(td);
thread_lock(child);
if ((child->td_flags & TDF_NOLOAD) == 0)
sched_load_rem();
thread_unlock(child);
}
static void
sched_4bsd_fork(struct thread *td, struct thread *childtd)
{
sched_fork_thread(td, childtd);
}
static void
sched_4bsd_fork_thread(struct thread *td, struct thread *childtd)
{
struct td_sched *ts, *tsc;
childtd->td_oncpu = NOCPU;
childtd->td_lastcpu = NOCPU;
childtd->td_lock = &sched_lock;
childtd->td_cpuset = cpuset_ref(td->td_cpuset);
childtd->td_domain.dr_policy = td->td_cpuset->cs_domain;
childtd->td_priority = childtd->td_base_pri;
ts = td_get_sched(childtd);
bzero(ts, sizeof(*ts));
tsc = td_get_sched(td);
ts->ts_estcpu = tsc->ts_estcpu;
ts->ts_flags |= (tsc->ts_flags & TSF_AFFINITY);
ts->ts_slice = 1;
}
static void
sched_4bsd_nice(struct proc *p, int nice)
{
struct thread *td;
PROC_LOCK_ASSERT(p, MA_OWNED);
p->p_nice = nice;
FOREACH_THREAD_IN_PROC(p, td) {
thread_lock(td);
resetpriority(td);
resetpriority_thread(td);
thread_unlock(td);
}
}
static void
sched_4bsd_class(struct thread *td, int class)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
td->td_pri_class = class;
}
static void
sched_priority(struct thread *td, u_char prio)
{
KTR_POINT3(KTR_SCHED, "thread", sched_tdname(td), "priority change",
"prio:%d", td->td_priority, "new prio:%d", prio, KTR_ATTR_LINKED,
sched_tdname(curthread));
SDT_PROBE3(sched, , , change__pri, td, td->td_proc, prio);
if (td != curthread && prio > td->td_priority) {
KTR_POINT3(KTR_SCHED, "thread", sched_tdname(curthread),
"lend prio", "prio:%d", td->td_priority, "new prio:%d",
prio, KTR_ATTR_LINKED, sched_tdname(td));
SDT_PROBE4(sched, , , lend__pri, td, td->td_proc, prio,
curthread);
}
THREAD_LOCK_ASSERT(td, MA_OWNED);
if (td->td_priority == prio)
return;
td->td_priority = prio;
if (TD_ON_RUNQ(td) && td->td_rqindex != RQ_PRI_TO_QUEUE_IDX(prio)) {
sched_rem(td);
sched_add(td, SRQ_BORING | SRQ_HOLDTD);
}
}
static void
sched_4bsd_lend_prio(struct thread *td, u_char prio)
{
td->td_flags |= TDF_BORROWING;
sched_priority(td, prio);
}
static void
sched_4bsd_unlend_prio(struct thread *td, u_char prio)
{
u_char base_pri;
if (td->td_base_pri >= PRI_MIN_TIMESHARE &&
td->td_base_pri <= PRI_MAX_TIMESHARE)
base_pri = td->td_user_pri;
else
base_pri = td->td_base_pri;
if (prio >= base_pri) {
td->td_flags &= ~TDF_BORROWING;
sched_prio(td, base_pri);
} else
sched_lend_prio(td, prio);
}
static void
sched_4bsd_prio(struct thread *td, u_char prio)
{
u_char oldprio;
td->td_base_pri = prio;
if (td->td_flags & TDF_BORROWING && td->td_priority < prio)
return;
oldprio = td->td_priority;
sched_priority(td, prio);
if (TD_ON_LOCK(td) && oldprio != prio)
turnstile_adjust(td, oldprio);
}
static void
sched_4bsd_ithread_prio(struct thread *td, u_char prio)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
MPASS(td->td_pri_class == PRI_ITHD);
td->td_base_ithread_pri = prio;
sched_prio(td, prio);
}
static void
sched_4bsd_user_prio(struct thread *td, u_char prio)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
td->td_base_user_pri = prio;
if (td->td_lend_user_pri <= prio)
return;
td->td_user_pri = prio;
}
static void
sched_4bsd_lend_user_prio(struct thread *td, u_char prio)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
td->td_lend_user_pri = prio;
td->td_user_pri = min(prio, td->td_base_user_pri);
if (td->td_priority > td->td_user_pri)
sched_prio(td, td->td_user_pri);
else if (td->td_priority != td->td_user_pri)
ast_sched_locked(td, TDA_SCHED);
}
static void
sched_4bsd_lend_user_prio_cond(struct thread *td, u_char prio)
{
if (td->td_lend_user_pri == prio)
return;
thread_lock(td);
sched_lend_user_prio(td, prio);
thread_unlock(td);
}
static void
sched_4bsd_sleep(struct thread *td, int pri)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
td->td_slptick = ticks;
td_get_sched(td)->ts_slptime = 0;
if (pri != 0 && PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
sched_prio(td, pri);
}
static void
sched_4bsd_sswitch(struct thread *td, int flags)
{
struct thread *newtd;
struct mtx *tmtx;
int preempted;
tmtx = &sched_lock;
THREAD_LOCK_ASSERT(td, MA_OWNED);
td->td_lastcpu = td->td_oncpu;
preempted = (td->td_flags & TDF_SLICEEND) == 0 &&
(flags & SW_PREEMPT) != 0;
td->td_flags &= ~TDF_SLICEEND;
ast_unsched_locked(td, TDA_SCHED);
td->td_owepreempt = 0;
td->td_oncpu = NOCPU;
if (td->td_flags & TDF_IDLETD) {
TD_SET_CAN_RUN(td);
#ifdef SMP
CPU_CLR(PCPU_GET(cpuid), &idle_cpus_mask);
#endif
} else {
if (TD_IS_RUNNING(td)) {
sched_add(td, SRQ_HOLDTD | SRQ_OURSELF | SRQ_YIELDING |
(preempted ? SRQ_PREEMPTED : 0));
}
}
if (td->td_lock != &sched_lock) {
mtx_lock_spin(&sched_lock);
tmtx = thread_lock_block(td);
mtx_unlock_spin(tmtx);
}
if ((td->td_flags & TDF_NOLOAD) == 0)
sched_load_rem();
newtd = choosethread();
MPASS(newtd->td_lock == &sched_lock);
#if (KTR_COMPILE & KTR_SCHED) != 0
if (TD_IS_IDLETHREAD(td))
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle",
"prio:%d", td->td_priority);
else
KTR_STATE3(KTR_SCHED, "thread", sched_tdname(td), KTDSTATE(td),
"prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg,
"lockname:\"%s\"", td->td_lockname);
#endif
if (td != newtd) {
#ifdef HWPMC_HOOKS
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
#endif
#ifdef HWT_HOOKS
HWT_CALL_HOOK(td, HWT_SWITCH_OUT, NULL);
HWT_CALL_HOOK(newtd, HWT_SWITCH_IN, NULL);
#endif
SDT_PROBE2(sched, , , off__cpu, newtd, newtd->td_proc);
lock_profile_release_lock(&sched_lock.lock_object, true);
#ifdef KDTRACE_HOOKS
if (dtrace_vtime_active)
(*dtrace_vtime_switch_func)(newtd);
#endif
cpu_switch(td, newtd, tmtx);
lock_profile_obtain_lock_success(&sched_lock.lock_object, true,
0, 0, __FILE__, __LINE__);
SDT_PROBE0(sched, , , on__cpu);
#ifdef HWPMC_HOOKS
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
#endif
} else {
td->td_lock = &sched_lock;
SDT_PROBE0(sched, , , remain__cpu);
}
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
"prio:%d", td->td_priority);
#ifdef SMP
if (td->td_flags & TDF_IDLETD)
CPU_SET(PCPU_GET(cpuid), &idle_cpus_mask);
#endif
sched_lock.mtx_lock = (uintptr_t)td;
td->td_oncpu = PCPU_GET(cpuid);
spinlock_enter();
mtx_unlock_spin(&sched_lock);
}
static void
sched_4bsd_wakeup(struct thread *td, int srqflags)
{
struct td_sched *ts;
THREAD_LOCK_ASSERT(td, MA_OWNED);
ts = td_get_sched(td);
if (ts->ts_slptime > 1) {
updatepri(td);
resetpriority(td);
}
td->td_slptick = 0;
ts->ts_slptime = 0;
ts->ts_slice = sched_slice;
if (PRI_BASE(td->td_pri_class) == PRI_ITHD &&
td->td_base_pri != td->td_base_ithread_pri)
sched_prio(td, td->td_base_ithread_pri);
sched_add(td, srqflags);
}
#ifdef SMP
static int
forward_wakeup(int cpunum)
{
struct pcpu *pc;
cpuset_t dontuse, map, map2;
u_int id, me;
int iscpuset;
mtx_assert(&sched_lock, MA_OWNED);
CTR0(KTR_RUNQ, "forward_wakeup()");
if ((!forward_wakeup_enabled) ||
(forward_wakeup_use_mask == 0 && forward_wakeup_use_loop == 0))
return (0);
if (!smp_started || KERNEL_PANICKED())
return (0);
forward_wakeups_requested++;
me = PCPU_GET(cpuid);
if (CPU_ISSET(me, &idle_cpus_mask) &&
(cpunum == NOCPU || me == cpunum))
return (0);
CPU_SETOF(me, &dontuse);
CPU_OR(&dontuse, &dontuse, &stopped_cpus);
CPU_OR(&dontuse, &dontuse, &hlt_cpus_mask);
CPU_ZERO(&map2);
if (forward_wakeup_use_loop) {
STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
id = pc->pc_cpuid;
if (!CPU_ISSET(id, &dontuse) &&
pc->pc_curthread == pc->pc_idlethread) {
CPU_SET(id, &map2);
}
}
}
if (forward_wakeup_use_mask) {
map = idle_cpus_mask;
CPU_ANDNOT(&map, &map, &dontuse);
if (forward_wakeup_use_loop) {
if (CPU_CMP(&map, &map2)) {
printf("map != map2, loop method preferred\n");
map = map2;
}
}
} else {
map = map2;
}
if (cpunum != NOCPU) {
KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
iscpuset = CPU_ISSET(cpunum, &map);
if (iscpuset == 0)
CPU_ZERO(&map);
else
CPU_SETOF(cpunum, &map);
}
if (!CPU_EMPTY(&map)) {
forward_wakeups_delivered++;
STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
id = pc->pc_cpuid;
if (!CPU_ISSET(id, &map))
continue;
if (cpu_idle_wakeup(pc->pc_cpuid))
CPU_CLR(id, &map);
}
if (!CPU_EMPTY(&map))
ipi_selected(map, IPI_AST);
return (1);
}
if (cpunum == NOCPU)
printf("forward_wakeup: Idle processor not found\n");
return (0);
}
static void
kick_other_cpu(int pri, int cpuid)
{
struct pcpu *pcpu;
int cpri;
pcpu = pcpu_find(cpuid);
if (CPU_ISSET(cpuid, &idle_cpus_mask)) {
forward_wakeups_delivered++;
if (!cpu_idle_wakeup(cpuid))
ipi_cpu(cpuid, IPI_AST);
return;
}
cpri = pcpu->pc_curthread->td_priority;
if (pri >= cpri)
return;
#if defined(IPI_PREEMPTION) && defined(PREEMPTION)
#if !defined(FULL_PREEMPTION)
if (pri <= PRI_MAX_ITHD)
#endif
{
ipi_cpu(cpuid, IPI_PREEMPT);
return;
}
#endif
if (pcpu->pc_curthread->td_lock == &sched_lock) {
ast_sched_locked(pcpu->pc_curthread, TDA_SCHED);
ipi_cpu(cpuid, IPI_AST);
}
}
#endif
#ifdef SMP
static int
sched_pickcpu(struct thread *td)
{
int best, cpu;
mtx_assert(&sched_lock, MA_OWNED);
if (td->td_lastcpu != NOCPU && THREAD_CAN_SCHED(td, td->td_lastcpu))
best = td->td_lastcpu;
else
best = NOCPU;
CPU_FOREACH(cpu) {
if (!THREAD_CAN_SCHED(td, cpu))
continue;
if (best == NOCPU)
best = cpu;
else if (runq_length[cpu] < runq_length[best])
best = cpu;
}
KASSERT(best != NOCPU, ("no valid CPUs"));
return (best);
}
#endif
static void
sched_4bsd_add(struct thread *td, int flags)
#ifdef SMP
{
cpuset_t tidlemsk;
struct td_sched *ts;
u_int cpu, cpuid;
int forwarded = 0;
int single_cpu = 0;
ts = td_get_sched(td);
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT((td->td_inhibitors == 0),
("sched_add: trying to run inhibited thread"));
KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
("sched_add: bad thread state"));
KASSERT(td->td_flags & TDF_INMEM,
("sched_add: thread swapped out"));
KTR_STATE2(KTR_SCHED, "thread", sched_tdname(td), "runq add",
"prio:%d", td->td_priority, KTR_ATTR_LINKED,
sched_tdname(curthread));
KTR_POINT1(KTR_SCHED, "thread", sched_tdname(curthread), "wokeup",
KTR_ATTR_LINKED, sched_tdname(td));
SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL,
flags & SRQ_PREEMPTED);
if (td->td_lock != &sched_lock) {
mtx_lock_spin(&sched_lock);
if ((flags & SRQ_HOLD) != 0)
td->td_lock = &sched_lock;
else
thread_lock_set(td, &sched_lock);
}
TD_SET_RUNQ(td);
if (smp_started && (td->td_pinned != 0 || td->td_flags & TDF_BOUND ||
ts->ts_flags & TSF_AFFINITY)) {
if (td->td_pinned != 0)
cpu = td->td_lastcpu;
else if (td->td_flags & TDF_BOUND) {
KASSERT(SKE_RUNQ_PCPU(ts),
("sched_add: bound td_sched not on cpu runq"));
cpu = ts->ts_runq - &runq_pcpu[0];
} else
cpu = sched_pickcpu(td);
ts->ts_runq = &runq_pcpu[cpu];
single_cpu = 1;
CTR3(KTR_RUNQ,
"sched_add: Put td_sched:%p(td:%p) on cpu%d runq", ts, td,
cpu);
} else {
CTR2(KTR_RUNQ,
"sched_add: adding td_sched:%p (td:%p) to gbl runq", ts,
td);
cpu = NOCPU;
ts->ts_runq = &runq;
}
if ((td->td_flags & TDF_NOLOAD) == 0)
sched_load_add();
runq_add(ts->ts_runq, td, flags);
if (cpu != NOCPU)
runq_length[cpu]++;
cpuid = PCPU_GET(cpuid);
if (single_cpu && cpu != cpuid) {
kick_other_cpu(td->td_priority, cpu);
} else {
if (!single_cpu) {
tidlemsk = idle_cpus_mask;
CPU_ANDNOT(&tidlemsk, &tidlemsk, &hlt_cpus_mask);
CPU_CLR(cpuid, &tidlemsk);
if (!CPU_ISSET(cpuid, &idle_cpus_mask) &&
((flags & SRQ_INTR) == 0) &&
!CPU_EMPTY(&tidlemsk))
forwarded = forward_wakeup(cpu);
}
if (!forwarded) {
if (!maybe_preempt(td))
maybe_resched(td);
}
}
if ((flags & SRQ_HOLDTD) == 0)
thread_unlock(td);
}
#else
{
struct td_sched *ts;
ts = td_get_sched(td);
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT((td->td_inhibitors == 0),
("sched_add: trying to run inhibited thread"));
KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
("sched_add: bad thread state"));
KASSERT(td->td_flags & TDF_INMEM,
("sched_add: thread swapped out"));
KTR_STATE2(KTR_SCHED, "thread", sched_tdname(td), "runq add",
"prio:%d", td->td_priority, KTR_ATTR_LINKED,
sched_tdname(curthread));
KTR_POINT1(KTR_SCHED, "thread", sched_tdname(curthread), "wokeup",
KTR_ATTR_LINKED, sched_tdname(td));
SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL,
flags & SRQ_PREEMPTED);
if (td->td_lock != &sched_lock) {
mtx_lock_spin(&sched_lock);
if ((flags & SRQ_HOLD) != 0)
td->td_lock = &sched_lock;
else
thread_lock_set(td, &sched_lock);
}
TD_SET_RUNQ(td);
CTR2(KTR_RUNQ, "sched_add: adding td_sched:%p (td:%p) to runq", ts, td);
ts->ts_runq = &runq;
if ((td->td_flags & TDF_NOLOAD) == 0)
sched_load_add();
runq_add(ts->ts_runq, td, flags);
if (!maybe_preempt(td))
maybe_resched(td);
if ((flags & SRQ_HOLDTD) == 0)
thread_unlock(td);
}
#endif
static void
sched_4bsd_rem(struct thread *td)
{
struct td_sched *ts;
ts = td_get_sched(td);
KASSERT(td->td_flags & TDF_INMEM,
("sched_rem: thread swapped out"));
KASSERT(TD_ON_RUNQ(td),
("sched_rem: thread not on run queue"));
mtx_assert(&sched_lock, MA_OWNED);
KTR_STATE2(KTR_SCHED, "thread", sched_tdname(td), "runq rem",
"prio:%d", td->td_priority, KTR_ATTR_LINKED,
sched_tdname(curthread));
SDT_PROBE3(sched, , , dequeue, td, td->td_proc, NULL);
if ((td->td_flags & TDF_NOLOAD) == 0)
sched_load_rem();
#ifdef SMP
if (ts->ts_runq != &runq)
runq_length[ts->ts_runq - runq_pcpu]--;
#endif
runq_remove(ts->ts_runq, td);
TD_SET_CAN_RUN(td);
}
static struct thread *
sched_4bsd_choose(void)
{
struct thread *td;
struct runq *rq;
mtx_assert(&sched_lock, MA_OWNED);
#ifdef SMP
struct thread *tdcpu;
rq = &runq;
td = runq_choose_fuzz(&runq, runq_fuzz);
tdcpu = runq_choose(&runq_pcpu[PCPU_GET(cpuid)]);
if (td == NULL ||
(tdcpu != NULL &&
tdcpu->td_priority < td->td_priority)) {
CTR2(KTR_RUNQ, "choosing td %p from pcpu runq %d", tdcpu,
PCPU_GET(cpuid));
td = tdcpu;
rq = &runq_pcpu[PCPU_GET(cpuid)];
} else {
CTR1(KTR_RUNQ, "choosing td_sched %p from main runq", td);
}
#else
rq = &runq;
td = runq_choose(&runq);
#endif
if (td) {
#ifdef SMP
if (td == tdcpu)
runq_length[PCPU_GET(cpuid)]--;
#endif
runq_remove(rq, td);
td->td_flags |= TDF_DIDRUN;
KASSERT(td->td_flags & TDF_INMEM,
("sched_choose: thread swapped out"));
return (td);
}
return (PCPU_GET(idlethread));
}
static void
sched_4bsd_preempt(struct thread *td)
{
int flags;
SDT_PROBE2(sched, , , surrender, td, td->td_proc);
if (td->td_critnest > 1) {
td->td_owepreempt = 1;
} else {
thread_lock(td);
flags = SW_INVOL | SW_PREEMPT;
flags |= TD_IS_IDLETHREAD(td) ? SWT_REMOTEWAKEIDLE :
SWT_REMOTEPREEMPT;
mi_switch(flags);
}
}
static void
sched_4bsd_userret_slowpath(struct thread *td)
{
thread_lock(td);
td->td_priority = td->td_user_pri;
td->td_base_pri = td->td_user_pri;
thread_unlock(td);
}
static void
sched_4bsd_bind(struct thread *td, int cpu)
{
#ifdef SMP
struct td_sched *ts = td_get_sched(td);
#endif
THREAD_LOCK_ASSERT(td, MA_OWNED|MA_NOTRECURSED);
KASSERT(td == curthread, ("sched_bind: can only bind curthread"));
td->td_flags |= TDF_BOUND;
#ifdef SMP
ts->ts_runq = &runq_pcpu[cpu];
if (PCPU_GET(cpuid) == cpu)
return;
mi_switch(SW_VOL | SWT_BIND);
thread_lock(td);
#endif
}
static void
sched_4bsd_unbind(struct thread* td)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(td == curthread, ("sched_unbind: can only bind curthread"));
td->td_flags &= ~TDF_BOUND;
}
static int
sched_4bsd_is_bound(struct thread *td)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
return (td->td_flags & TDF_BOUND);
}
static void
sched_4bsd_relinquish(struct thread *td)
{
thread_lock(td);
mi_switch(SW_VOL | SWT_RELINQUISH);
}
static int
sched_4bsd_load(void)
{
return (sched_tdcnt);
}
static int
sched_4bsd_sizeof_proc(void)
{
return (sizeof(struct proc));
}
static int
sched_4bsd_sizeof_thread(void)
{
return (sizeof(struct thread) + sizeof(struct td_sched));
}
static fixpt_t
sched_4bsd_pctcpu(struct thread *td)
{
struct td_sched *ts;
THREAD_LOCK_ASSERT(td, MA_OWNED);
ts = td_get_sched(td);
return (ts->ts_pctcpu);
}
static u_int
sched_4bsd_estcpu(struct thread *td)
{
return (td_get_sched(td)->ts_estcpu);
}
static void
sched_4bsd_idletd(void *dummy)
{
struct pcpuidlestat *stat;
THREAD_NO_SLEEPING();
stat = DPCPU_PTR(idlestat);
for (;;) {
mtx_assert(&Giant, MA_NOTOWNED);
while (!sched_runnable()) {
cpu_idle(stat->idlecalls + stat->oldidlecalls > 64);
stat->idlecalls++;
}
mtx_lock_spin(&sched_lock);
mi_switch(SW_VOL | SWT_IDLE);
}
}
static void
sched_throw_tail(struct thread *td)
{
struct thread *newtd;
mtx_assert(&sched_lock, MA_OWNED);
KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
newtd = choosethread();
#ifdef HWT_HOOKS
if (td)
HWT_CALL_HOOK(td, HWT_SWITCH_OUT, NULL);
HWT_CALL_HOOK(newtd, HWT_SWITCH_IN, NULL);
#endif
cpu_throw(td, newtd);
}
static void
sched_4bsd_ap_entry(void)
{
mtx_lock_spin(&sched_lock);
spinlock_exit();
PCPU_SET(switchtime, cpu_ticks());
PCPU_SET(switchticks, ticks);
sched_throw_tail(NULL);
}
static void
sched_4bsd_throw(struct thread *td)
{
MPASS(td != NULL);
MPASS(td->td_lock == &sched_lock);
lock_profile_release_lock(&sched_lock.lock_object, true);
td->td_lastcpu = td->td_oncpu;
td->td_oncpu = NOCPU;
sched_throw_tail(td);
}
static void
sched_4bsd_fork_exit(struct thread *td)
{
td->td_oncpu = PCPU_GET(cpuid);
sched_lock.mtx_lock = (uintptr_t)td;
lock_profile_obtain_lock_success(&sched_lock.lock_object, true,
0, 0, __FILE__, __LINE__);
THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
"prio:%d", td->td_priority);
SDT_PROBE0(sched, , , on__cpu);
}
static char *
sched_4bsd_tdname(struct thread *td)
{
#ifdef KTR
struct td_sched *ts;
ts = td_get_sched(td);
if (ts->ts_name[0] == '\0')
snprintf(ts->ts_name, sizeof(ts->ts_name),
"%s tid %d", td->td_name, td->td_tid);
return (ts->ts_name);
#else
return (td->td_name);
#endif
}
static void
sched_4bsd_clear_tdname(struct thread *td)
{
#ifdef KTR
struct td_sched *ts;
ts = td_get_sched(td);
ts->ts_name[0] = '\0';
#endif
}
static void
sched_4bsd_affinity(struct thread *td)
{
#ifdef SMP
struct td_sched *ts;
int cpu;
THREAD_LOCK_ASSERT(td, MA_OWNED);
ts = td_get_sched(td);
ts->ts_flags &= ~TSF_AFFINITY;
CPU_FOREACH(cpu) {
if (!THREAD_CAN_SCHED(td, cpu)) {
ts->ts_flags |= TSF_AFFINITY;
break;
}
}
if (!(ts->ts_flags & TSF_AFFINITY))
return;
if (td->td_pinned != 0 || td->td_flags & TDF_BOUND)
return;
switch (TD_GET_STATE(td)) {
case TDS_RUNQ:
if (ts->ts_runq != &runq &&
THREAD_CAN_SCHED(td, ts->ts_runq - runq_pcpu))
return;
sched_rem(td);
sched_add(td, SRQ_HOLDTD | SRQ_BORING);
break;
case TDS_RUNNING:
if (THREAD_CAN_SCHED(td, td->td_oncpu))
return;
ast_sched_locked(td, TDA_SCHED);
if (td != curthread)
ipi_cpu(cpu, IPI_AST);
break;
default:
break;
}
#endif
}
static bool
sched_4bsd_do_timer_accounting(void)
{
#ifdef SMP
return (!CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask));
#else
return (true);
#endif
}
static int
sched_4bsd_find_l2_neighbor(int cpu)
{
return (-1);
}
struct sched_instance sched_4bsd_instance = {
#define SLOT(name) .name = sched_4bsd_##name
SLOT(load),
SLOT(rr_interval),
SLOT(runnable),
SLOT(exit),
SLOT(fork),
SLOT(fork_exit),
SLOT(class),
SLOT(nice),
SLOT(ap_entry),
SLOT(exit_thread),
SLOT(estcpu),
SLOT(fork_thread),
SLOT(ithread_prio),
SLOT(lend_prio),
SLOT(lend_user_prio),
SLOT(lend_user_prio_cond),
SLOT(pctcpu),
SLOT(prio),
SLOT(sleep),
SLOT(sswitch),
SLOT(throw),
SLOT(unlend_prio),
SLOT(user_prio),
SLOT(userret_slowpath),
SLOT(add),
SLOT(choose),
SLOT(clock),
SLOT(idletd),
SLOT(preempt),
SLOT(relinquish),
SLOT(rem),
SLOT(wakeup),
SLOT(bind),
SLOT(unbind),
SLOT(is_bound),
SLOT(affinity),
SLOT(sizeof_proc),
SLOT(sizeof_thread),
SLOT(tdname),
SLOT(clear_tdname),
SLOT(do_timer_accounting),
SLOT(find_l2_neighbor),
SLOT(init),
SLOT(init_ap),
SLOT(setup),
SLOT(initticks),
SLOT(schedcpu),
#undef SLOT
};
DECLARE_SCHEDULER(fourbsd_sched_selector, "4BSD", &sched_4bsd_instance);