root/kernel/cgroup/cpuset-internal.h
/* SPDX-License-Identifier: GPL-2.0-or-later */

#ifndef __CPUSET_INTERNAL_H
#define __CPUSET_INTERNAL_H

#include <linux/cgroup.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/cpuset.h>
#include <linux/spinlock.h>
#include <linux/union_find.h>
#include <linux/sched/isolation.h>

/* See "Frequency meter" comments, below. */

struct fmeter {
        int cnt;                /* unprocessed events count */
        int val;                /* most recent output value */
        time64_t time;          /* clock (secs) when val computed */
        spinlock_t lock;        /* guards read or write of above */
};

/*
 * Invalid partition error code
 */
enum prs_errcode {
        PERR_NONE = 0,
        PERR_INVCPUS,
        PERR_INVPARENT,
        PERR_NOTPART,
        PERR_NOTEXCL,
        PERR_NOCPUS,
        PERR_HOTPLUG,
        PERR_CPUSEMPTY,
        PERR_HKEEPING,
        PERR_ACCESS,
        PERR_REMOTE,
};

/* bits in struct cpuset flags field */
typedef enum {
        CS_CPU_EXCLUSIVE,
        CS_MEM_EXCLUSIVE,
        CS_MEM_HARDWALL,
        CS_MEMORY_MIGRATE,
        CS_SCHED_LOAD_BALANCE,
        CS_SPREAD_PAGE,
        CS_SPREAD_SLAB,
} cpuset_flagbits_t;

/* The various types of files and directories in a cpuset file system */

typedef enum {
        FILE_MEMORY_MIGRATE,
        FILE_CPULIST,
        FILE_MEMLIST,
        FILE_EFFECTIVE_CPULIST,
        FILE_EFFECTIVE_MEMLIST,
        FILE_SUBPARTS_CPULIST,
        FILE_EXCLUSIVE_CPULIST,
        FILE_EFFECTIVE_XCPULIST,
        FILE_ISOLATED_CPULIST,
        FILE_CPU_EXCLUSIVE,
        FILE_MEM_EXCLUSIVE,
        FILE_MEM_HARDWALL,
        FILE_SCHED_LOAD_BALANCE,
        FILE_PARTITION_ROOT,
        FILE_SCHED_RELAX_DOMAIN_LEVEL,
        FILE_MEMORY_PRESSURE_ENABLED,
        FILE_MEMORY_PRESSURE,
        FILE_SPREAD_PAGE,
        FILE_SPREAD_SLAB,
} cpuset_filetype_t;

struct cpuset {
        struct cgroup_subsys_state css;

        unsigned long flags;            /* "unsigned long" so bitops work */

        /*
         * On default hierarchy:
         *
         * The user-configured masks can only be changed by writing to
         * cpuset.cpus and cpuset.mems, and won't be limited by the
         * parent masks.
         *
         * The effective masks is the real masks that apply to the tasks
         * in the cpuset. They may be changed if the configured masks are
         * changed or hotplug happens.
         *
         * effective_mask == configured_mask & parent's effective_mask,
         * and if it ends up empty, it will inherit the parent's mask.
         *
         *
         * On legacy hierarchy:
         *
         * The user-configured masks are always the same with effective masks.
         */

        /* user-configured CPUs and Memory Nodes allow to tasks */
        cpumask_var_t cpus_allowed;
        nodemask_t mems_allowed;

        /* effective CPUs and Memory Nodes allow to tasks */
        cpumask_var_t effective_cpus;
        nodemask_t effective_mems;

        /*
         * Exclusive CPUs dedicated to current cgroup (default hierarchy only)
         *
         * The effective_cpus of a valid partition root comes solely from its
         * effective_xcpus and some of the effective_xcpus may be distributed
         * to sub-partitions below & hence excluded from its effective_cpus.
         * For a valid partition root, its effective_cpus have no relationship
         * with cpus_allowed unless its exclusive_cpus isn't set.
         *
         * This value will only be set if either exclusive_cpus is set or
         * when this cpuset becomes a local partition root.
         */
        cpumask_var_t effective_xcpus;

        /*
         * Exclusive CPUs as requested by the user (default hierarchy only)
         *
         * Its value is independent of cpus_allowed and designates the set of
         * CPUs that can be granted to the current cpuset or its children when
         * it becomes a valid partition root. The effective set of exclusive
         * CPUs granted (effective_xcpus) depends on whether those exclusive
         * CPUs are passed down by its ancestors and not yet taken up by
         * another sibling partition root along the way.
         *
         * If its value isn't set, it defaults to cpus_allowed.
         */
        cpumask_var_t exclusive_cpus;

        /*
         * This is old Memory Nodes tasks took on.
         *
         * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
         * - A new cpuset's old_mems_allowed is initialized when some
         *   task is moved into it.
         * - old_mems_allowed is used in cpuset_migrate_mm() when we change
         *   cpuset.mems_allowed and have tasks' nodemask updated, and
         *   then old_mems_allowed is updated to mems_allowed.
         */
        nodemask_t old_mems_allowed;

        /*
         * Tasks are being attached to this cpuset.  Used to prevent
         * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
         */
        int attach_in_progress;

        /* partition root state */
        int partition_root_state;

        /*
         * Whether cpuset is a remote partition.
         * It used to be a list anchoring all remote partitions — we can switch back
         * to a list if we need to iterate over the remote partitions.
         */
        bool remote_partition;

        /*
         * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
         * know when to rebuild associated root domain bandwidth information.
         */
        int nr_deadline_tasks;
        int nr_migrate_dl_tasks;
        u64 sum_migrate_dl_bw;

        /* Invalid partition error code, not lock protected */
        enum prs_errcode prs_err;

        /* Handle for cpuset.cpus.partition */
        struct cgroup_file partition_file;

#ifdef CONFIG_CPUSETS_V1
        struct fmeter fmeter;           /* memory_pressure filter */

        /* for custom sched domain */
        int relax_domain_level;

        /* Used to merge intersecting subsets for generate_sched_domains */
        struct uf_node node;
#endif
};

extern struct cpuset top_cpuset;

static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
{
        return css ? container_of(css, struct cpuset, css) : NULL;
}

/* Retrieve the cpuset for a task */
static inline struct cpuset *task_cs(struct task_struct *task)
{
        return css_cs(task_css(task, cpuset_cgrp_id));
}

static inline struct cpuset *parent_cs(struct cpuset *cs)
{
        return css_cs(cs->css.parent);
}

/* convenient tests for these bits */
static inline bool is_cpuset_online(struct cpuset *cs)
{
        return css_is_online(&cs->css) && !css_is_dying(&cs->css);
}

static inline int is_cpu_exclusive(const struct cpuset *cs)
{
        return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
}

static inline int is_mem_exclusive(const struct cpuset *cs)
{
        return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
}

static inline int is_mem_hardwall(const struct cpuset *cs)
{
        return test_bit(CS_MEM_HARDWALL, &cs->flags);
}

static inline int is_sched_load_balance(const struct cpuset *cs)
{
        return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
}

static inline int is_memory_migrate(const struct cpuset *cs)
{
        return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
}

static inline int is_spread_page(const struct cpuset *cs)
{
        return test_bit(CS_SPREAD_PAGE, &cs->flags);
}

static inline int is_spread_slab(const struct cpuset *cs)
{
        return test_bit(CS_SPREAD_SLAB, &cs->flags);
}

/*
 * Helper routine for generate_sched_domains().
 * Do cpusets a, b have overlapping effective cpus_allowed masks?
 */
static inline int cpusets_overlap(struct cpuset *a, struct cpuset *b)
{
        return cpumask_intersects(a->effective_cpus, b->effective_cpus);
}

static inline int nr_cpusets(void)
{
        /* jump label reference count + the top-level cpuset */
        return static_key_count(&cpusets_enabled_key.key) + 1;
}

static inline bool cpuset_is_populated(struct cpuset *cs)
{
        lockdep_assert_cpuset_lock_held();

        /* Cpusets in the process of attaching should be considered as populated */
        return cgroup_is_populated(cs->css.cgroup) ||
                cs->attach_in_progress;
}

/**
 * cpuset_for_each_child - traverse online children of a cpuset
 * @child_cs: loop cursor pointing to the current child
 * @pos_css: used for iteration
 * @parent_cs: target cpuset to walk children of
 *
 * Walk @child_cs through the online children of @parent_cs.  Must be used
 * with RCU read locked.
 */
#define cpuset_for_each_child(child_cs, pos_css, parent_cs)             \
        css_for_each_child((pos_css), &(parent_cs)->css)                \
                if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))

/**
 * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
 * @des_cs: loop cursor pointing to the current descendant
 * @pos_css: used for iteration
 * @root_cs: target cpuset to walk ancestor of
 *
 * Walk @des_cs through the online descendants of @root_cs.  Must be used
 * with RCU read locked.  The caller may modify @pos_css by calling
 * css_rightmost_descendant() to skip subtree.  @root_cs is included in the
 * iteration and the first node to be visited.
 */
#define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs)        \
        css_for_each_descendant_pre((pos_css), &(root_cs)->css)         \
                if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))

void rebuild_sched_domains_locked(void);
void cpuset_callback_lock_irq(void);
void cpuset_callback_unlock_irq(void);
void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus);
void cpuset_update_tasks_nodemask(struct cpuset *cs);
int cpuset_update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on);
ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
                                    char *buf, size_t nbytes, loff_t off);
int cpuset_common_seq_show(struct seq_file *sf, void *v);
void cpuset_full_lock(void);
void cpuset_full_unlock(void);

/*
 * cpuset-v1.c
 */
#ifdef CONFIG_CPUSETS_V1
extern struct cftype cpuset1_files[];
void cpuset1_update_task_spread_flags(struct cpuset *cs,
                                        struct task_struct *tsk);
void cpuset1_update_tasks_flags(struct cpuset *cs);
void cpuset1_hotplug_update_tasks(struct cpuset *cs,
                            struct cpumask *new_cpus, nodemask_t *new_mems,
                            bool cpus_updated, bool mems_updated);
int cpuset1_validate_change(struct cpuset *cur, struct cpuset *trial);
bool cpuset1_cpus_excl_conflict(struct cpuset *cs1, struct cpuset *cs2);
void cpuset1_init(struct cpuset *cs);
void cpuset1_online_css(struct cgroup_subsys_state *css);
int cpuset1_generate_sched_domains(cpumask_var_t **domains,
                        struct sched_domain_attr **attributes);

#else
static inline void cpuset1_update_task_spread_flags(struct cpuset *cs,
                                        struct task_struct *tsk) {}
static inline void cpuset1_update_tasks_flags(struct cpuset *cs) {}
static inline void cpuset1_hotplug_update_tasks(struct cpuset *cs,
                            struct cpumask *new_cpus, nodemask_t *new_mems,
                            bool cpus_updated, bool mems_updated) {}
static inline int cpuset1_validate_change(struct cpuset *cur,
                                struct cpuset *trial) { return 0; }
static inline bool cpuset1_cpus_excl_conflict(struct cpuset *cs1,
                                        struct cpuset *cs2) { return false; }
static inline void cpuset1_init(struct cpuset *cs) {}
static inline void cpuset1_online_css(struct cgroup_subsys_state *css) {}
static inline int cpuset1_generate_sched_domains(cpumask_var_t **domains,
                        struct sched_domain_attr **attributes) { return 0; };

#endif /* CONFIG_CPUSETS_V1 */

#endif /* __CPUSET_INTERNAL_H */