root/lib/debugobjects.c
// SPDX-License-Identifier: GPL-2.0
/*
 * Generic infrastructure for lifetime debugging of objects.
 *
 * Copyright (C) 2008, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
 */

#define pr_fmt(fmt) "ODEBUG: " fmt

#include <linux/cpu.h>
#include <linux/debugobjects.h>
#include <linux/debugfs.h>
#include <linux/hash.h>
#include <linux/kmemleak.h>
#include <linux/sched.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/task_stack.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/static_key.h>

#define ODEBUG_HASH_BITS        14
#define ODEBUG_HASH_SIZE        (1 << ODEBUG_HASH_BITS)

/* Must be power of two */
#define ODEBUG_BATCH_SIZE       16

/* Initial values. Must all be a multiple of batch size */
#define ODEBUG_POOL_SIZE        (64 * ODEBUG_BATCH_SIZE)
#define ODEBUG_POOL_MIN_LEVEL   (ODEBUG_POOL_SIZE / 4)

#define ODEBUG_POOL_PERCPU_SIZE (8 * ODEBUG_BATCH_SIZE)

#define ODEBUG_CHUNK_SHIFT      PAGE_SHIFT
#define ODEBUG_CHUNK_SIZE       (1 << ODEBUG_CHUNK_SHIFT)
#define ODEBUG_CHUNK_MASK       (~(ODEBUG_CHUNK_SIZE - 1))

/*
 * We limit the freeing of debug objects via workqueue at a maximum
 * frequency of 10Hz and about 1024 objects for each freeing operation.
 * So it is freeing at most 10k debug objects per second.
 */
#define ODEBUG_FREE_WORK_MAX    (1024 / ODEBUG_BATCH_SIZE)
#define ODEBUG_FREE_WORK_DELAY  DIV_ROUND_UP(HZ, 10)

struct debug_bucket {
        struct hlist_head       list;
        raw_spinlock_t          lock;
};

struct pool_stats {
        unsigned int            cur_used;
        unsigned int            max_used;
        unsigned int            min_fill;
};

struct obj_pool {
        struct hlist_head       objects;
        unsigned int            cnt;
        unsigned int            min_cnt;
        unsigned int            max_cnt;
        struct pool_stats       stats;
} ____cacheline_aligned;


static DEFINE_PER_CPU_ALIGNED(struct obj_pool, pool_pcpu)  = {
        .max_cnt        = ODEBUG_POOL_PERCPU_SIZE,
};

static struct debug_bucket      obj_hash[ODEBUG_HASH_SIZE];

static struct debug_obj         obj_static_pool[ODEBUG_POOL_SIZE] __initdata;

static DEFINE_RAW_SPINLOCK(pool_lock);

static struct obj_pool pool_global = {
        .min_cnt                = ODEBUG_POOL_MIN_LEVEL,
        .max_cnt                = ODEBUG_POOL_SIZE,
        .stats                  = {
                .min_fill       = ODEBUG_POOL_SIZE,
        },
};

static struct obj_pool pool_to_free = {
        .max_cnt        = UINT_MAX,
};

static HLIST_HEAD(pool_boot);

static unsigned long            avg_usage;
static bool                     obj_freeing;

static int __data_racy                  debug_objects_maxchain __read_mostly;
static int __data_racy __maybe_unused   debug_objects_maxchecked __read_mostly;
static int __data_racy                  debug_objects_fixups __read_mostly;
static int __data_racy                  debug_objects_warnings __read_mostly;
static bool __data_racy                 debug_objects_enabled __read_mostly
                                        = CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;

static const struct debug_obj_descr     *descr_test  __read_mostly;
static struct kmem_cache                *obj_cache __ro_after_init;

/*
 * Track numbers of kmem_cache_alloc()/free() calls done.
 */
static int __data_racy          debug_objects_allocated;
static int __data_racy          debug_objects_freed;

static void free_obj_work(struct work_struct *work);
static DECLARE_DELAYED_WORK(debug_obj_work, free_obj_work);

static DEFINE_STATIC_KEY_FALSE(obj_cache_enabled);

static int __init enable_object_debug(char *str)
{
        debug_objects_enabled = true;
        return 0;
}
early_param("debug_objects", enable_object_debug);

static int __init disable_object_debug(char *str)
{
        debug_objects_enabled = false;
        return 0;
}
early_param("no_debug_objects", disable_object_debug);

static const char *obj_states[ODEBUG_STATE_MAX] = {
        [ODEBUG_STATE_NONE]             = "none",
        [ODEBUG_STATE_INIT]             = "initialized",
        [ODEBUG_STATE_INACTIVE]         = "inactive",
        [ODEBUG_STATE_ACTIVE]           = "active",
        [ODEBUG_STATE_DESTROYED]        = "destroyed",
        [ODEBUG_STATE_NOTAVAILABLE]     = "not available",
};

static __always_inline unsigned int pool_count(struct obj_pool *pool)
{
        return READ_ONCE(pool->cnt);
}

static __always_inline bool pool_should_refill(struct obj_pool *pool)
{
        return pool_count(pool) < pool->min_cnt;
}

static __always_inline bool pool_must_refill(struct obj_pool *pool)
{
        return pool_count(pool) < pool->min_cnt / 2;
}

static bool pool_move_batch(struct obj_pool *dst, struct obj_pool *src)
{
        struct hlist_node *last, *next_batch, *first_batch;
        struct debug_obj *obj;

        if (dst->cnt >= dst->max_cnt || !src->cnt)
                return false;

        first_batch = src->objects.first;
        obj = hlist_entry(first_batch, typeof(*obj), node);
        last = obj->batch_last;
        next_batch = last->next;

        /* Move the next batch to the front of the source pool */
        src->objects.first = next_batch;
        if (next_batch)
                next_batch->pprev = &src->objects.first;

        /* Add the extracted batch to the destination pool */
        last->next = dst->objects.first;
        if (last->next)
                last->next->pprev = &last->next;
        first_batch->pprev = &dst->objects.first;
        dst->objects.first = first_batch;

        WRITE_ONCE(src->cnt, src->cnt - ODEBUG_BATCH_SIZE);
        WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE);
        return true;
}

static bool pool_push_batch(struct obj_pool *dst, struct hlist_head *head)
{
        struct hlist_node *last;
        struct debug_obj *obj;

        if (dst->cnt >= dst->max_cnt)
                return false;

        obj = hlist_entry(head->first, typeof(*obj), node);
        last = obj->batch_last;

        hlist_splice_init(head, last, &dst->objects);
        WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE);
        return true;
}

static bool pool_pop_batch(struct hlist_head *head, struct obj_pool *src)
{
        struct hlist_node *last, *next;
        struct debug_obj *obj;

        if (!src->cnt)
                return false;

        /* Move the complete list to the head */
        hlist_move_list(&src->objects, head);

        obj = hlist_entry(head->first, typeof(*obj), node);
        last = obj->batch_last;
        next = last->next;
        /* Disconnect the batch from the list */
        last->next = NULL;

        /* Move the node after last back to the source pool. */
        src->objects.first = next;
        if (next)
                next->pprev = &src->objects.first;

        WRITE_ONCE(src->cnt, src->cnt - ODEBUG_BATCH_SIZE);
        return true;
}

static struct debug_obj *__alloc_object(struct hlist_head *list)
{
        struct debug_obj *obj;

        if (unlikely(!list->first))
                return NULL;

        obj = hlist_entry(list->first, typeof(*obj), node);
        hlist_del(&obj->node);
        return obj;
}

static void pcpu_refill_stats(void)
{
        struct pool_stats *stats = &pool_global.stats;

        WRITE_ONCE(stats->cur_used, stats->cur_used + ODEBUG_BATCH_SIZE);

        if (stats->cur_used > stats->max_used)
                stats->max_used = stats->cur_used;

        if (pool_global.cnt < stats->min_fill)
                stats->min_fill = pool_global.cnt;
}

static struct debug_obj *pcpu_alloc(void)
{
        struct obj_pool *pcp = this_cpu_ptr(&pool_pcpu);

        lockdep_assert_irqs_disabled();

        for (;;) {
                struct debug_obj *obj = __alloc_object(&pcp->objects);

                if (likely(obj)) {
                        pcp->cnt--;
                        /*
                         * If this emptied a batch try to refill from the
                         * free pool. Don't do that if this was the top-most
                         * batch as pcpu_free() expects the per CPU pool
                         * to be less than ODEBUG_POOL_PERCPU_SIZE.
                         */
                        if (unlikely(pcp->cnt < (ODEBUG_POOL_PERCPU_SIZE - ODEBUG_BATCH_SIZE) &&
                                     !(pcp->cnt % ODEBUG_BATCH_SIZE))) {
                                /*
                                 * Don't try to allocate from the regular pool here
                                 * to not exhaust it prematurely.
                                 */
                                if (pool_count(&pool_to_free)) {
                                        guard(raw_spinlock)(&pool_lock);
                                        pool_move_batch(pcp, &pool_to_free);
                                        pcpu_refill_stats();
                                }
                        }
                        return obj;
                }

                guard(raw_spinlock)(&pool_lock);
                if (!pool_move_batch(pcp, &pool_to_free)) {
                        if (!pool_move_batch(pcp, &pool_global))
                                return NULL;
                }
                pcpu_refill_stats();
        }
}

static void pcpu_free(struct debug_obj *obj)
{
        struct obj_pool *pcp = this_cpu_ptr(&pool_pcpu);
        struct debug_obj *first;

        lockdep_assert_irqs_disabled();

        if (!(pcp->cnt % ODEBUG_BATCH_SIZE)) {
                obj->batch_last = &obj->node;
        } else {
                first = hlist_entry(pcp->objects.first, typeof(*first), node);
                obj->batch_last = first->batch_last;
        }
        hlist_add_head(&obj->node, &pcp->objects);
        pcp->cnt++;

        /* Pool full ? */
        if (pcp->cnt < ODEBUG_POOL_PERCPU_SIZE)
                return;

        /* Remove a batch from the per CPU pool */
        guard(raw_spinlock)(&pool_lock);
        /* Try to fit the batch into the pool_global first */
        if (!pool_move_batch(&pool_global, pcp))
                pool_move_batch(&pool_to_free, pcp);
        WRITE_ONCE(pool_global.stats.cur_used, pool_global.stats.cur_used - ODEBUG_BATCH_SIZE);
}

static void free_object_list(struct hlist_head *head)
{
        struct hlist_node *tmp;
        struct debug_obj *obj;
        int cnt = 0;

        hlist_for_each_entry_safe(obj, tmp, head, node) {
                hlist_del(&obj->node);
                kmem_cache_free(obj_cache, obj);
                cnt++;
        }
        debug_objects_freed += cnt;
}

static void fill_pool_from_freelist(void)
{
        static unsigned long state;

        /*
         * Reuse objs from the global obj_to_free list; they will be
         * reinitialized when allocating.
         */
        if (!pool_count(&pool_to_free))
                return;

        /*
         * Prevent the context from being scheduled or interrupted after
         * setting the state flag;
         */
        guard(irqsave)();

        /*
         * Avoid lock contention on &pool_lock and avoid making the cache
         * line exclusive by testing the bit before attempting to set it.
         */
        if (test_bit(0, &state) || test_and_set_bit(0, &state))
                return;

        /* Avoid taking the lock when there is no work to do */
        while (pool_should_refill(&pool_global) && pool_count(&pool_to_free)) {
                guard(raw_spinlock)(&pool_lock);
                /* Move a batch if possible */
                pool_move_batch(&pool_global, &pool_to_free);
        }
        clear_bit(0, &state);
}

static bool kmem_alloc_batch(struct hlist_head *head, struct kmem_cache *cache, gfp_t gfp)
{
        struct hlist_node *last = NULL;
        struct debug_obj *obj;

        for (int cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
                obj = kmem_cache_zalloc(cache, gfp);
                if (!obj) {
                        free_object_list(head);
                        return false;
                }
                debug_objects_allocated++;

                if (!last)
                        last = &obj->node;
                obj->batch_last = last;

                hlist_add_head(&obj->node, head);
        }
        return true;
}

static void fill_pool(void)
{
        static atomic_t cpus_allocating;

        /*
         * Avoid allocation and lock contention when:
         *   - One other CPU is already allocating
         *   - the global pool has not reached the critical level yet
         */
        if (!pool_must_refill(&pool_global) && atomic_read(&cpus_allocating))
                return;

        atomic_inc(&cpus_allocating);
        while (pool_should_refill(&pool_global)) {
                gfp_t gfp = __GFP_HIGH | __GFP_NOWARN;
                HLIST_HEAD(head);

                /*
                 * Allow reclaim only in preemptible context and during
                 * early boot. If not preemptible, the caller might hold
                 * locks causing a deadlock in the allocator.
                 *
                 * If the reclaim flag is not set during early boot then
                 * allocations, which happen before deferred page
                 * initialization has completed, will fail.
                 *
                 * In preemptible context the flag is harmless and not a
                 * performance issue as that's usually invoked from slow
                 * path initialization context.
                 */
                if (preemptible() || system_state < SYSTEM_SCHEDULING)
                        gfp |= __GFP_KSWAPD_RECLAIM;

                if (!kmem_alloc_batch(&head, obj_cache, gfp))
                        break;

                guard(raw_spinlock_irqsave)(&pool_lock);
                if (!pool_push_batch(&pool_global, &head))
                        pool_push_batch(&pool_to_free, &head);
        }
        atomic_dec(&cpus_allocating);
}

/*
 * Lookup an object in the hash bucket.
 */
static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b)
{
        struct debug_obj *obj;
        int cnt = 0;

        hlist_for_each_entry(obj, &b->list, node) {
                cnt++;
                if (obj->object == addr)
                        return obj;
        }
        if (cnt > debug_objects_maxchain)
                debug_objects_maxchain = cnt;

        return NULL;
}

static void calc_usage(void)
{
        static DEFINE_RAW_SPINLOCK(avg_lock);
        static unsigned long avg_period;
        unsigned long cur, now = jiffies;

        if (!time_after_eq(now, READ_ONCE(avg_period)))
                return;

        if (!raw_spin_trylock(&avg_lock))
                return;

        WRITE_ONCE(avg_period, now + msecs_to_jiffies(10));
        cur = READ_ONCE(pool_global.stats.cur_used) * ODEBUG_FREE_WORK_MAX;
        WRITE_ONCE(avg_usage, calc_load(avg_usage, EXP_5, cur));
        raw_spin_unlock(&avg_lock);
}

static struct debug_obj *alloc_object(void *addr, struct debug_bucket *b,
                                      const struct debug_obj_descr *descr)
{
        struct debug_obj *obj;

        calc_usage();

        if (static_branch_likely(&obj_cache_enabled))
                obj = pcpu_alloc();
        else
                obj = __alloc_object(&pool_boot);

        if (likely(obj)) {
                obj->object = addr;
                obj->descr  = descr;
                obj->state  = ODEBUG_STATE_NONE;
                obj->astate = 0;
                hlist_add_head(&obj->node, &b->list);
        }
        return obj;
}

/* workqueue function to free objects. */
static void free_obj_work(struct work_struct *work)
{
        static unsigned long last_use_avg;
        unsigned long cur_used, last_used, delta;
        unsigned int max_free = 0;

        WRITE_ONCE(obj_freeing, false);

        /* Rate limit freeing based on current use average */
        cur_used = READ_ONCE(avg_usage);
        last_used = last_use_avg;
        last_use_avg = cur_used;

        if (!pool_count(&pool_to_free))
                return;

        if (cur_used <= last_used) {
                delta = (last_used - cur_used) / ODEBUG_FREE_WORK_MAX;
                max_free = min(delta, ODEBUG_FREE_WORK_MAX);
        }

        for (int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
                HLIST_HEAD(tofree);

                /* Acquire and drop the lock for each batch */
                scoped_guard(raw_spinlock_irqsave, &pool_lock) {
                        if (!pool_to_free.cnt)
                                return;

                        /* Refill the global pool if possible */
                        if (pool_move_batch(&pool_global, &pool_to_free)) {
                                /* Don't free as there seems to be demand */
                                max_free = 0;
                        } else if (max_free) {
                                pool_pop_batch(&tofree, &pool_to_free);
                                max_free--;
                        } else {
                                return;
                        }
                }
                free_object_list(&tofree);
        }
}

static void __free_object(struct debug_obj *obj)
{
        guard(irqsave)();
        if (static_branch_likely(&obj_cache_enabled))
                pcpu_free(obj);
        else
                hlist_add_head(&obj->node, &pool_boot);
}

/*
 * Put the object back into the pool and schedule work to free objects
 * if necessary.
 */
static void free_object(struct debug_obj *obj)
{
        __free_object(obj);
        if (!READ_ONCE(obj_freeing) && pool_count(&pool_to_free)) {
                WRITE_ONCE(obj_freeing, true);
                schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
        }
}

static void put_objects(struct hlist_head *list)
{
        struct hlist_node *tmp;
        struct debug_obj *obj;

        /*
         * Using free_object() puts the objects into reuse or schedules
         * them for freeing and it get's all the accounting correct.
         */
        hlist_for_each_entry_safe(obj, tmp, list, node) {
                hlist_del(&obj->node);
                free_object(obj);
        }
}

#ifdef CONFIG_HOTPLUG_CPU
static int object_cpu_offline(unsigned int cpu)
{
        /* Remote access is safe as the CPU is dead already */
        struct obj_pool *pcp = per_cpu_ptr(&pool_pcpu, cpu);

        put_objects(&pcp->objects);
        pcp->cnt = 0;
        return 0;
}
#endif

/* Out of memory. Free all objects from hash */
static void debug_objects_oom(void)
{
        struct debug_bucket *db = obj_hash;
        HLIST_HEAD(freelist);

        pr_warn("Out of memory. ODEBUG disabled\n");

        for (int i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
                scoped_guard(raw_spinlock_irqsave, &db->lock)
                        hlist_move_list(&db->list, &freelist);

                put_objects(&freelist);
        }
}

/*
 * We use the pfn of the address for the hash. That way we can check
 * for freed objects simply by checking the affected bucket.
 */
static struct debug_bucket *get_bucket(unsigned long addr)
{
        unsigned long hash;

        hash = hash_long((addr >> ODEBUG_CHUNK_SHIFT), ODEBUG_HASH_BITS);
        return &obj_hash[hash];
}

static void debug_print_object(struct debug_obj *obj, char *msg)
{
        const struct debug_obj_descr *descr = obj->descr;
        static int limit;

        /*
         * Don't report if lookup_object_or_alloc() by the current thread
         * failed because lookup_object_or_alloc()/debug_objects_oom() by a
         * concurrent thread turned off debug_objects_enabled and cleared
         * the hash buckets.
         */
        if (!debug_objects_enabled)
                return;

        if (limit < 5 && descr != descr_test) {
                void *hint = descr->debug_hint ?
                        descr->debug_hint(obj->object) : NULL;
                limit++;
                WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
                                 "object: %p object type: %s hint: %pS\n",
                        msg, obj_states[obj->state], obj->astate,
                        obj->object, descr->name, hint);
        }
        debug_objects_warnings++;
}

/*
 * Try to repair the damage, so we have a better chance to get useful
 * debug output.
 */
static bool
debug_object_fixup(bool (*fixup)(void *addr, enum debug_obj_state state),
                   void * addr, enum debug_obj_state state)
{
        if (fixup && fixup(addr, state)) {
                debug_objects_fixups++;
                return true;
        }
        return false;
}

static void debug_object_is_on_stack(void *addr, int onstack)
{
        int is_on_stack;
        static int limit;

        if (limit > 4)
                return;

        is_on_stack = object_is_on_stack(addr);
        if (is_on_stack == onstack)
                return;

        limit++;
        if (is_on_stack)
                pr_warn("object %p is on stack %p, but NOT annotated.\n", addr,
                         task_stack_page(current));
        else
                pr_warn("object %p is NOT on stack %p, but annotated.\n", addr,
                         task_stack_page(current));

        WARN_ON(1);
}

static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket *b,
                                                const struct debug_obj_descr *descr,
                                                bool onstack, bool alloc_ifstatic)
{
        struct debug_obj *obj = lookup_object(addr, b);
        enum debug_obj_state state = ODEBUG_STATE_NONE;

        if (likely(obj))
                return obj;

        /*
         * debug_object_init() unconditionally allocates untracked
         * objects. It does not matter whether it is a static object or
         * not.
         *
         * debug_object_assert_init() and debug_object_activate() allow
         * allocation only if the descriptor callback confirms that the
         * object is static and considered initialized. For non-static
         * objects the allocation needs to be done from the fixup callback.
         */
        if (unlikely(alloc_ifstatic)) {
                if (!descr->is_static_object || !descr->is_static_object(addr))
                        return ERR_PTR(-ENOENT);
                /* Statically allocated objects are considered initialized */
                state = ODEBUG_STATE_INIT;
        }

        obj = alloc_object(addr, b, descr);
        if (likely(obj)) {
                obj->state = state;
                debug_object_is_on_stack(addr, onstack);
                return obj;
        }

        /* Out of memory. Do the cleanup outside of the locked region */
        debug_objects_enabled = false;
        return NULL;
}

static void debug_objects_fill_pool(void)
{
        if (!static_branch_likely(&obj_cache_enabled))
                return;

        if (likely(!pool_should_refill(&pool_global)))
                return;

        /* Try reusing objects from obj_to_free_list */
        fill_pool_from_freelist();

        if (likely(!pool_should_refill(&pool_global)))
                return;

        /*
         * On RT enabled kernels the pool refill must happen in preemptible
         * context -- for !RT kernels we rely on the fact that spinlock_t and
         * raw_spinlock_t are basically the same type and this lock-type
         * inversion works just fine.
         */
        if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) {
                /*
                 * Annotate away the spinlock_t inside raw_spinlock_t warning
                 * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching
                 * the preemptible() condition above.
                 */
                static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_CONFIG);
                lock_map_acquire_try(&fill_pool_map);
                fill_pool();
                lock_map_release(&fill_pool_map);
        }
}

static void
__debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack)
{
        struct debug_obj *obj, o;
        struct debug_bucket *db;
        unsigned long flags;

        debug_objects_fill_pool();

        db = get_bucket((unsigned long) addr);

        raw_spin_lock_irqsave(&db->lock, flags);

        obj = lookup_object_or_alloc(addr, db, descr, onstack, false);
        if (unlikely(!obj)) {
                raw_spin_unlock_irqrestore(&db->lock, flags);
                debug_objects_oom();
                return;
        }

        switch (obj->state) {
        case ODEBUG_STATE_NONE:
        case ODEBUG_STATE_INIT:
        case ODEBUG_STATE_INACTIVE:
                obj->state = ODEBUG_STATE_INIT;
                raw_spin_unlock_irqrestore(&db->lock, flags);
                return;
        default:
                break;
        }

        o = *obj;
        raw_spin_unlock_irqrestore(&db->lock, flags);
        debug_print_object(&o, "init");

        if (o.state == ODEBUG_STATE_ACTIVE)
                debug_object_fixup(descr->fixup_init, addr, o.state);
}

/**
 * debug_object_init - debug checks when an object is initialized
 * @addr:       address of the object
 * @descr:      pointer to an object specific debug description structure
 */
void debug_object_init(void *addr, const struct debug_obj_descr *descr)
{
        if (!debug_objects_enabled)
                return;

        __debug_object_init(addr, descr, 0);
}
EXPORT_SYMBOL_GPL(debug_object_init);

/**
 * debug_object_init_on_stack - debug checks when an object on stack is
 *                              initialized
 * @addr:       address of the object
 * @descr:      pointer to an object specific debug description structure
 */
void debug_object_init_on_stack(void *addr, const struct debug_obj_descr *descr)
{
        if (!debug_objects_enabled)
                return;

        __debug_object_init(addr, descr, 1);
}
EXPORT_SYMBOL_GPL(debug_object_init_on_stack);

/**
 * debug_object_activate - debug checks when an object is activated
 * @addr:       address of the object
 * @descr:      pointer to an object specific debug description structure
 * Returns 0 for success, -EINVAL for check failed.
 */
int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
{
        struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
        struct debug_bucket *db;
        struct debug_obj *obj;
        unsigned long flags;

        if (!debug_objects_enabled)
                return 0;

        debug_objects_fill_pool();

        db = get_bucket((unsigned long) addr);

        raw_spin_lock_irqsave(&db->lock, flags);

        obj = lookup_object_or_alloc(addr, db, descr, false, true);
        if (unlikely(!obj)) {
                raw_spin_unlock_irqrestore(&db->lock, flags);
                debug_objects_oom();
                return 0;
        } else if (likely(!IS_ERR(obj))) {
                switch (obj->state) {
                case ODEBUG_STATE_ACTIVE:
                case ODEBUG_STATE_DESTROYED:
                        o = *obj;
                        break;
                case ODEBUG_STATE_INIT:
                case ODEBUG_STATE_INACTIVE:
                        obj->state = ODEBUG_STATE_ACTIVE;
                        fallthrough;
                default:
                        raw_spin_unlock_irqrestore(&db->lock, flags);
                        return 0;
                }
        }

        raw_spin_unlock_irqrestore(&db->lock, flags);
        debug_print_object(&o, "activate");

        switch (o.state) {
        case ODEBUG_STATE_ACTIVE:
        case ODEBUG_STATE_NOTAVAILABLE:
                if (debug_object_fixup(descr->fixup_activate, addr, o.state))
                        return 0;
                fallthrough;
        default:
                return -EINVAL;
        }
}
EXPORT_SYMBOL_GPL(debug_object_activate);

/**
 * debug_object_deactivate - debug checks when an object is deactivated
 * @addr:       address of the object
 * @descr:      pointer to an object specific debug description structure
 */
void debug_object_deactivate(void *addr, const struct debug_obj_descr *descr)
{
        struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
        struct debug_bucket *db;
        struct debug_obj *obj;
        unsigned long flags;

        if (!debug_objects_enabled)
                return;

        db = get_bucket((unsigned long) addr);

        raw_spin_lock_irqsave(&db->lock, flags);

        obj = lookup_object(addr, db);
        if (obj) {
                switch (obj->state) {
                case ODEBUG_STATE_DESTROYED:
                        break;
                case ODEBUG_STATE_INIT:
                case ODEBUG_STATE_INACTIVE:
                case ODEBUG_STATE_ACTIVE:
                        if (obj->astate)
                                break;
                        obj->state = ODEBUG_STATE_INACTIVE;
                        fallthrough;
                default:
                        raw_spin_unlock_irqrestore(&db->lock, flags);
                        return;
                }
                o = *obj;
        }

        raw_spin_unlock_irqrestore(&db->lock, flags);
        debug_print_object(&o, "deactivate");
}
EXPORT_SYMBOL_GPL(debug_object_deactivate);

/**
 * debug_object_destroy - debug checks when an object is destroyed
 * @addr:       address of the object
 * @descr:      pointer to an object specific debug description structure
 */
void debug_object_destroy(void *addr, const struct debug_obj_descr *descr)
{
        struct debug_obj *obj, o;
        struct debug_bucket *db;
        unsigned long flags;

        if (!debug_objects_enabled)
                return;

        db = get_bucket((unsigned long) addr);

        raw_spin_lock_irqsave(&db->lock, flags);

        obj = lookup_object(addr, db);
        if (!obj) {
                raw_spin_unlock_irqrestore(&db->lock, flags);
                return;
        }

        switch (obj->state) {
        case ODEBUG_STATE_ACTIVE:
        case ODEBUG_STATE_DESTROYED:
                break;
        case ODEBUG_STATE_NONE:
        case ODEBUG_STATE_INIT:
        case ODEBUG_STATE_INACTIVE:
                obj->state = ODEBUG_STATE_DESTROYED;
                fallthrough;
        default:
                raw_spin_unlock_irqrestore(&db->lock, flags);
                return;
        }

        o = *obj;
        raw_spin_unlock_irqrestore(&db->lock, flags);
        debug_print_object(&o, "destroy");

        if (o.state == ODEBUG_STATE_ACTIVE)
                debug_object_fixup(descr->fixup_destroy, addr, o.state);
}
EXPORT_SYMBOL_GPL(debug_object_destroy);

/**
 * debug_object_free - debug checks when an object is freed
 * @addr:       address of the object
 * @descr:      pointer to an object specific debug description structure
 */
void debug_object_free(void *addr, const struct debug_obj_descr *descr)
{
        struct debug_obj *obj, o;
        struct debug_bucket *db;
        unsigned long flags;

        if (!debug_objects_enabled)
                return;

        db = get_bucket((unsigned long) addr);

        raw_spin_lock_irqsave(&db->lock, flags);

        obj = lookup_object(addr, db);
        if (!obj) {
                raw_spin_unlock_irqrestore(&db->lock, flags);
                return;
        }

        switch (obj->state) {
        case ODEBUG_STATE_ACTIVE:
                break;
        default:
                hlist_del(&obj->node);
                raw_spin_unlock_irqrestore(&db->lock, flags);
                free_object(obj);
                return;
        }

        o = *obj;
        raw_spin_unlock_irqrestore(&db->lock, flags);
        debug_print_object(&o, "free");

        debug_object_fixup(descr->fixup_free, addr, o.state);
}
EXPORT_SYMBOL_GPL(debug_object_free);

/**
 * debug_object_assert_init - debug checks when object should be init-ed
 * @addr:       address of the object
 * @descr:      pointer to an object specific debug description structure
 */
void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
{
        struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
        struct debug_bucket *db;
        struct debug_obj *obj;
        unsigned long flags;

        if (!debug_objects_enabled)
                return;

        debug_objects_fill_pool();

        db = get_bucket((unsigned long) addr);

        raw_spin_lock_irqsave(&db->lock, flags);
        obj = lookup_object_or_alloc(addr, db, descr, false, true);
        raw_spin_unlock_irqrestore(&db->lock, flags);
        if (likely(!IS_ERR_OR_NULL(obj)))
                return;

        /* If NULL the allocation has hit OOM */
        if (!obj) {
                debug_objects_oom();
                return;
        }

        /* Object is neither tracked nor static. It's not initialized. */
        debug_print_object(&o, "assert_init");
        debug_object_fixup(descr->fixup_assert_init, addr, ODEBUG_STATE_NOTAVAILABLE);
}
EXPORT_SYMBOL_GPL(debug_object_assert_init);

/**
 * debug_object_active_state - debug checks object usage state machine
 * @addr:       address of the object
 * @descr:      pointer to an object specific debug description structure
 * @expect:     expected state
 * @next:       state to move to if expected state is found
 */
void
debug_object_active_state(void *addr, const struct debug_obj_descr *descr,
                          unsigned int expect, unsigned int next)
{
        struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
        struct debug_bucket *db;
        struct debug_obj *obj;
        unsigned long flags;

        if (!debug_objects_enabled)
                return;

        db = get_bucket((unsigned long) addr);

        raw_spin_lock_irqsave(&db->lock, flags);

        obj = lookup_object(addr, db);
        if (obj) {
                switch (obj->state) {
                case ODEBUG_STATE_ACTIVE:
                        if (obj->astate != expect)
                                break;
                        obj->astate = next;
                        raw_spin_unlock_irqrestore(&db->lock, flags);
                        return;
                default:
                        break;
                }
                o = *obj;
        }

        raw_spin_unlock_irqrestore(&db->lock, flags);
        debug_print_object(&o, "active_state");
}
EXPORT_SYMBOL_GPL(debug_object_active_state);

#ifdef CONFIG_DEBUG_OBJECTS_FREE
static void __debug_check_no_obj_freed(const void *address, unsigned long size)
{
        unsigned long flags, oaddr, saddr, eaddr, paddr, chunks;
        int cnt, objs_checked = 0;
        struct debug_obj *obj, o;
        struct debug_bucket *db;
        struct hlist_node *tmp;

        saddr = (unsigned long) address;
        eaddr = saddr + size;
        paddr = saddr & ODEBUG_CHUNK_MASK;
        chunks = ((eaddr - paddr) + (ODEBUG_CHUNK_SIZE - 1));
        chunks >>= ODEBUG_CHUNK_SHIFT;

        for (;chunks > 0; chunks--, paddr += ODEBUG_CHUNK_SIZE) {
                db = get_bucket(paddr);

repeat:
                cnt = 0;
                raw_spin_lock_irqsave(&db->lock, flags);
                hlist_for_each_entry_safe(obj, tmp, &db->list, node) {
                        cnt++;
                        oaddr = (unsigned long) obj->object;
                        if (oaddr < saddr || oaddr >= eaddr)
                                continue;

                        switch (obj->state) {
                        case ODEBUG_STATE_ACTIVE:
                                o = *obj;
                                raw_spin_unlock_irqrestore(&db->lock, flags);
                                debug_print_object(&o, "free");
                                debug_object_fixup(o.descr->fixup_free, (void *)oaddr, o.state);
                                goto repeat;
                        default:
                                hlist_del(&obj->node);
                                __free_object(obj);
                                break;
                        }
                }
                raw_spin_unlock_irqrestore(&db->lock, flags);

                if (cnt > debug_objects_maxchain)
                        debug_objects_maxchain = cnt;

                objs_checked += cnt;
        }

        if (objs_checked > debug_objects_maxchecked)
                debug_objects_maxchecked = objs_checked;

        /* Schedule work to actually kmem_cache_free() objects */
        if (!READ_ONCE(obj_freeing) && pool_count(&pool_to_free)) {
                WRITE_ONCE(obj_freeing, true);
                schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
        }
}

void debug_check_no_obj_freed(const void *address, unsigned long size)
{
        if (debug_objects_enabled)
                __debug_check_no_obj_freed(address, size);
}
#endif

#ifdef CONFIG_DEBUG_FS

static int debug_stats_show(struct seq_file *m, void *v)
{
        unsigned int cpu, pool_used, pcp_free = 0;

        /*
         * pool_global.stats.cur_used is the number of batches currently
         * handed out to per CPU pools. Convert it to number of objects
         * and subtract the number of free objects in the per CPU pools.
         * As this is lockless the number is an estimate.
         */
        for_each_possible_cpu(cpu)
                pcp_free += per_cpu(pool_pcpu.cnt, cpu);

        pool_used = READ_ONCE(pool_global.stats.cur_used);
        pcp_free = min(pool_used, pcp_free);
        pool_used -= pcp_free;

        seq_printf(m, "max_chain     : %d\n", debug_objects_maxchain);
        seq_printf(m, "max_checked   : %d\n", debug_objects_maxchecked);
        seq_printf(m, "warnings      : %d\n", debug_objects_warnings);
        seq_printf(m, "fixups        : %d\n", debug_objects_fixups);
        seq_printf(m, "pool_free     : %u\n", pool_count(&pool_global) + pcp_free);
        seq_printf(m, "pool_pcp_free : %u\n", pcp_free);
        seq_printf(m, "pool_min_free : %u\n", data_race(pool_global.stats.min_fill));
        seq_printf(m, "pool_used     : %u\n", pool_used);
        seq_printf(m, "pool_max_used : %u\n", data_race(pool_global.stats.max_used));
        seq_printf(m, "on_free_list  : %u\n", pool_count(&pool_to_free));
        seq_printf(m, "objs_allocated: %d\n", debug_objects_allocated);
        seq_printf(m, "objs_freed    : %d\n", debug_objects_freed);
        return 0;
}
DEFINE_SHOW_ATTRIBUTE(debug_stats);

static int __init debug_objects_init_debugfs(void)
{
        struct dentry *dbgdir;

        if (!debug_objects_enabled)
                return 0;

        dbgdir = debugfs_create_dir("debug_objects", NULL);

        debugfs_create_file("stats", 0444, dbgdir, NULL, &debug_stats_fops);

        return 0;
}
__initcall(debug_objects_init_debugfs);

#else
static inline void debug_objects_init_debugfs(void) { }
#endif

#ifdef CONFIG_DEBUG_OBJECTS_SELFTEST

/* Random data structure for the self test */
struct self_test {
        unsigned long   dummy1[6];
        int             static_init;
        unsigned long   dummy2[3];
};

static __initconst const struct debug_obj_descr descr_type_test;

static bool __init is_static_object(void *addr)
{
        struct self_test *obj = addr;

        return obj->static_init;
}

/*
 * fixup_init is called when:
 * - an active object is initialized
 */
static bool __init fixup_init(void *addr, enum debug_obj_state state)
{
        struct self_test *obj = addr;

        switch (state) {
        case ODEBUG_STATE_ACTIVE:
                debug_object_deactivate(obj, &descr_type_test);
                debug_object_init(obj, &descr_type_test);
                return true;
        default:
                return false;
        }
}

/*
 * fixup_activate is called when:
 * - an active object is activated
 * - an unknown non-static object is activated
 */
static bool __init fixup_activate(void *addr, enum debug_obj_state state)
{
        struct self_test *obj = addr;

        switch (state) {
        case ODEBUG_STATE_NOTAVAILABLE:
                return true;
        case ODEBUG_STATE_ACTIVE:
                debug_object_deactivate(obj, &descr_type_test);
                debug_object_activate(obj, &descr_type_test);
                return true;

        default:
                return false;
        }
}

/*
 * fixup_destroy is called when:
 * - an active object is destroyed
 */
static bool __init fixup_destroy(void *addr, enum debug_obj_state state)
{
        struct self_test *obj = addr;

        switch (state) {
        case ODEBUG_STATE_ACTIVE:
                debug_object_deactivate(obj, &descr_type_test);
                debug_object_destroy(obj, &descr_type_test);
                return true;
        default:
                return false;
        }
}

/*
 * fixup_free is called when:
 * - an active object is freed
 */
static bool __init fixup_free(void *addr, enum debug_obj_state state)
{
        struct self_test *obj = addr;

        switch (state) {
        case ODEBUG_STATE_ACTIVE:
                debug_object_deactivate(obj, &descr_type_test);
                debug_object_free(obj, &descr_type_test);
                return true;
        default:
                return false;
        }
}

static int __init
check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
{
        struct debug_bucket *db;
        struct debug_obj *obj;
        unsigned long flags;
        int res = -EINVAL;

        db = get_bucket((unsigned long) addr);

        raw_spin_lock_irqsave(&db->lock, flags);

        obj = lookup_object(addr, db);
        if (!obj && state != ODEBUG_STATE_NONE) {
                WARN(1, KERN_ERR "ODEBUG: selftest object not found\n");
                goto out;
        }
        if (obj && obj->state != state) {
                WARN(1, KERN_ERR "ODEBUG: selftest wrong state: %d != %d\n",
                       obj->state, state);
                goto out;
        }
        if (fixups != debug_objects_fixups) {
                WARN(1, KERN_ERR "ODEBUG: selftest fixups failed %d != %d\n",
                       fixups, debug_objects_fixups);
                goto out;
        }
        if (warnings != debug_objects_warnings) {
                WARN(1, KERN_ERR "ODEBUG: selftest warnings failed %d != %d\n",
                       warnings, debug_objects_warnings);
                goto out;
        }
        res = 0;
out:
        raw_spin_unlock_irqrestore(&db->lock, flags);
        if (res)
                debug_objects_enabled = false;
        return res;
}

static __initconst const struct debug_obj_descr descr_type_test = {
        .name                   = "selftest",
        .is_static_object       = is_static_object,
        .fixup_init             = fixup_init,
        .fixup_activate         = fixup_activate,
        .fixup_destroy          = fixup_destroy,
        .fixup_free             = fixup_free,
};

static __initdata struct self_test obj = { .static_init = 0 };

static bool __init debug_objects_selftest(void)
{
        int fixups, oldfixups, warnings, oldwarnings;
        unsigned long flags;

        local_irq_save(flags);

        fixups = oldfixups = debug_objects_fixups;
        warnings = oldwarnings = debug_objects_warnings;
        descr_test = &descr_type_test;

        debug_object_init(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_INIT, fixups, warnings))
                goto out;
        debug_object_activate(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
                goto out;
        debug_object_activate(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_ACTIVE, ++fixups, ++warnings))
                goto out;
        debug_object_deactivate(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_INACTIVE, fixups, warnings))
                goto out;
        debug_object_destroy(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, warnings))
                goto out;
        debug_object_init(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, ++warnings))
                goto out;
        debug_object_activate(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, ++warnings))
                goto out;
        debug_object_deactivate(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, ++warnings))
                goto out;
        debug_object_free(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings))
                goto out;

        obj.static_init = 1;
        debug_object_activate(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
                goto out;
        debug_object_init(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_INIT, ++fixups, ++warnings))
                goto out;
        debug_object_free(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings))
                goto out;

#ifdef CONFIG_DEBUG_OBJECTS_FREE
        debug_object_init(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_INIT, fixups, warnings))
                goto out;
        debug_object_activate(&obj, &descr_type_test);
        if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
                goto out;
        __debug_check_no_obj_freed(&obj, sizeof(obj));
        if (check_results(&obj, ODEBUG_STATE_NONE, ++fixups, ++warnings))
                goto out;
#endif
        pr_info("selftest passed\n");

out:
        debug_objects_fixups = oldfixups;
        debug_objects_warnings = oldwarnings;
        descr_test = NULL;

        local_irq_restore(flags);
        return debug_objects_enabled;
}
#else
static inline bool debug_objects_selftest(void) { return true; }
#endif

/*
 * Called during early boot to initialize the hash buckets and link
 * the static object pool objects into the poll list. After this call
 * the object tracker is fully operational.
 */
void __init debug_objects_early_init(void)
{
        int i;

        for (i = 0; i < ODEBUG_HASH_SIZE; i++)
                raw_spin_lock_init(&obj_hash[i].lock);

        /* Keep early boot simple and add everything to the boot list */
        for (i = 0; i < ODEBUG_POOL_SIZE; i++)
                hlist_add_head(&obj_static_pool[i].node, &pool_boot);
}

/*
 * Convert the statically allocated objects to dynamic ones.
 * debug_objects_mem_init() is called early so only one CPU is up and
 * interrupts are disabled, which means it is safe to replace the active
 * object references.
 */
static bool __init debug_objects_replace_static_objects(struct kmem_cache *cache)
{
        struct debug_bucket *db = obj_hash;
        struct hlist_node *tmp;
        struct debug_obj *obj;
        HLIST_HEAD(objects);
        int i;

        for (i = 0; i < ODEBUG_POOL_SIZE; i += ODEBUG_BATCH_SIZE) {
                if (!kmem_alloc_batch(&objects, cache, GFP_KERNEL))
                        goto free;
                pool_push_batch(&pool_global, &objects);
        }

        /* Disconnect the boot pool. */
        pool_boot.first = NULL;

        /* Replace the active object references */
        for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
                hlist_move_list(&db->list, &objects);

                hlist_for_each_entry(obj, &objects, node) {
                        struct debug_obj *new = pcpu_alloc();

                        /* copy object data */
                        *new = *obj;
                        hlist_add_head(&new->node, &db->list);
                }
        }
        return true;
free:
        /* Can't use free_object_list() as the cache is not populated yet */
        hlist_for_each_entry_safe(obj, tmp, &pool_global.objects, node) {
                hlist_del(&obj->node);
                kmem_cache_free(cache, obj);
        }
        return false;
}

/*
 * Called after the kmem_caches are functional to setup a dedicated
 * cache pool, which has the SLAB_DEBUG_OBJECTS flag set. This flag
 * prevents that the debug code is called on kmem_cache_free() for the
 * debug tracker objects to avoid recursive calls.
 */
void __init debug_objects_mem_init(void)
{
        struct kmem_cache *cache;
        int extras;

        if (!debug_objects_enabled)
                return;

        if (!debug_objects_selftest())
                return;

        cache = kmem_cache_create("debug_objects_cache", sizeof (struct debug_obj), 0,
                                  SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE, NULL);

        if (!cache || !debug_objects_replace_static_objects(cache)) {
                debug_objects_enabled = false;
                pr_warn("Out of memory.\n");
                return;
        }

        /*
         * Adjust the thresholds for allocating and freeing objects
         * according to the number of possible CPUs available in the
         * system.
         */
        extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
        pool_global.max_cnt += extras;
        pool_global.min_cnt += extras;

        /* Everything worked. Expose the cache */
        obj_cache = cache;
        static_branch_enable(&obj_cache_enabled);

#ifdef CONFIG_HOTPLUG_CPU
        cpuhp_setup_state_nocalls(CPUHP_DEBUG_OBJ_DEAD, "object:offline", NULL,
                                  object_cpu_offline);
#endif
        return;
}