#define pr_fmt(fmt) "stackdepot: " fmt
#include <linux/debugfs.h>
#include <linux/gfp.h>
#include <linux/jhash.h>
#include <linux/kernel.h>
#include <linux/kmsan.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/poison.h>
#include <linux/printk.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/refcount.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/stacktrace.h>
#include <linux/stackdepot.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/memblock.h>
#include <linux/kasan-enabled.h>
static unsigned int stack_max_pools __read_mostly =
MIN((1LL << DEPOT_POOL_INDEX_BITS) - 1, 8192);
static bool stack_depot_disabled;
static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
static bool __stack_depot_early_init_passed __initdata;
#define STACK_HASH_TABLE_SCALE 14
#define STACK_BUCKET_NUMBER_ORDER_MIN 12
#define STACK_BUCKET_NUMBER_ORDER_MAX 20
#define STACK_HASH_SEED 0x9747b28c
static struct list_head *stack_table;
static unsigned int stack_bucket_number_order;
static unsigned int stack_hash_mask;
static DEFINE_RAW_SPINLOCK(pool_lock);
static void **stack_pools __pt_guarded_by(&pool_lock);
static void *new_pool;
static int pools_num;
static size_t pool_offset __guarded_by(&pool_lock) = DEPOT_POOL_SIZE;
static __guarded_by(&pool_lock) LIST_HEAD(free_stacks);
enum depot_counter_id {
DEPOT_COUNTER_REFD_ALLOCS,
DEPOT_COUNTER_REFD_FREES,
DEPOT_COUNTER_REFD_INUSE,
DEPOT_COUNTER_FREELIST_SIZE,
DEPOT_COUNTER_PERSIST_COUNT,
DEPOT_COUNTER_PERSIST_BYTES,
DEPOT_COUNTER_COUNT,
};
static long counters[DEPOT_COUNTER_COUNT];
static const char *const counter_names[] = {
[DEPOT_COUNTER_REFD_ALLOCS] = "refcounted_allocations",
[DEPOT_COUNTER_REFD_FREES] = "refcounted_frees",
[DEPOT_COUNTER_REFD_INUSE] = "refcounted_in_use",
[DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size",
[DEPOT_COUNTER_PERSIST_COUNT] = "persistent_count",
[DEPOT_COUNTER_PERSIST_BYTES] = "persistent_bytes",
};
static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT);
static int __init disable_stack_depot(char *str)
{
return kstrtobool(str, &stack_depot_disabled);
}
early_param("stack_depot_disable", disable_stack_depot);
static int __init parse_max_pools(char *str)
{
const long long limit = (1LL << (DEPOT_POOL_INDEX_BITS)) - 1;
unsigned int max_pools;
int rv;
rv = kstrtouint(str, 0, &max_pools);
if (rv)
return rv;
if (max_pools < 1024) {
pr_err("stack_depot_max_pools below 1024, using default of %u\n",
stack_max_pools);
goto out;
}
if (max_pools > limit) {
pr_err("stack_depot_max_pools exceeds %lld, using default of %u\n",
limit, stack_max_pools);
goto out;
}
stack_max_pools = max_pools;
out:
return 0;
}
early_param("stack_depot_max_pools", parse_max_pools);
void __init stack_depot_request_early_init(void)
{
WARN_ON(__stack_depot_early_init_passed);
__stack_depot_early_init_requested = true;
}
static void init_stack_table(unsigned long entries)
{
unsigned long i;
for (i = 0; i < entries; i++)
INIT_LIST_HEAD(&stack_table[i]);
}
int __init stack_depot_early_init(void)
{
unsigned long entries = 0;
if (WARN_ON(__stack_depot_early_init_passed))
return 0;
__stack_depot_early_init_passed = true;
if (stack_depot_disabled) {
pr_info("disabled\n");
return 0;
}
if (kasan_enabled() && !stack_bucket_number_order)
stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX;
if (!__stack_depot_early_init_requested)
return 0;
if (stack_bucket_number_order)
entries = 1UL << stack_bucket_number_order;
pr_info("allocating hash table via alloc_large_system_hash\n");
stack_table = alloc_large_system_hash("stackdepot",
sizeof(struct list_head),
entries,
STACK_HASH_TABLE_SCALE,
HASH_EARLY,
NULL,
&stack_hash_mask,
1UL << STACK_BUCKET_NUMBER_ORDER_MIN,
1UL << STACK_BUCKET_NUMBER_ORDER_MAX);
if (!stack_table) {
pr_err("hash table allocation failed, disabling\n");
stack_depot_disabled = true;
return -ENOMEM;
}
if (!entries) {
entries = stack_hash_mask + 1;
}
init_stack_table(entries);
pr_info("allocating space for %u stack pools via memblock\n",
stack_max_pools);
stack_pools =
memblock_alloc(stack_max_pools * sizeof(void *), PAGE_SIZE);
if (!stack_pools) {
pr_err("stack pools allocation failed, disabling\n");
memblock_free(stack_table, entries * sizeof(struct list_head));
stack_depot_disabled = true;
return -ENOMEM;
}
return 0;
}
int stack_depot_init(void)
{
static DEFINE_MUTEX(stack_depot_init_mutex);
unsigned long entries;
int ret = 0;
mutex_lock(&stack_depot_init_mutex);
if (stack_depot_disabled || stack_table)
goto out_unlock;
if (stack_bucket_number_order) {
entries = 1UL << stack_bucket_number_order;
} else {
int scale = STACK_HASH_TABLE_SCALE;
entries = nr_free_buffer_pages();
entries = roundup_pow_of_two(entries);
if (scale > PAGE_SHIFT)
entries >>= (scale - PAGE_SHIFT);
else
entries <<= (PAGE_SHIFT - scale);
}
if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN)
entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN;
if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX)
entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX;
pr_info("allocating hash table of %lu entries via kvcalloc\n", entries);
stack_table = kvzalloc_objs(struct list_head, entries);
if (!stack_table) {
pr_err("hash table allocation failed, disabling\n");
stack_depot_disabled = true;
ret = -ENOMEM;
goto out_unlock;
}
stack_hash_mask = entries - 1;
init_stack_table(entries);
pr_info("allocating space for %u stack pools via kvcalloc\n",
stack_max_pools);
stack_pools = kvcalloc(stack_max_pools, sizeof(void *), GFP_KERNEL);
if (!stack_pools) {
pr_err("stack pools allocation failed, disabling\n");
kvfree(stack_table);
stack_depot_disabled = true;
ret = -ENOMEM;
}
out_unlock:
mutex_unlock(&stack_depot_init_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(stack_depot_init);
static bool depot_init_pool(void **prealloc)
__must_hold(&pool_lock)
{
lockdep_assert_held(&pool_lock);
if (unlikely(pools_num >= stack_max_pools)) {
WARN_ON_ONCE(pools_num > stack_max_pools);
WARN_ON_ONCE(!new_pool);
WARN_ONCE(1, "Stack depot reached limit capacity");
return false;
}
if (!new_pool && *prealloc) {
WRITE_ONCE(new_pool, *prealloc);
*prealloc = NULL;
}
if (!new_pool)
return false;
stack_pools[pools_num] = new_pool;
if (pools_num < stack_max_pools)
WRITE_ONCE(new_pool, NULL);
else
WRITE_ONCE(new_pool, STACK_DEPOT_POISON);
WRITE_ONCE(pools_num, pools_num + 1);
ASSERT_EXCLUSIVE_WRITER(pools_num);
pool_offset = 0;
return true;
}
static void depot_keep_new_pool(void **prealloc)
__must_hold(&pool_lock)
{
lockdep_assert_held(&pool_lock);
if (new_pool)
return;
WRITE_ONCE(new_pool, *prealloc);
*prealloc = NULL;
}
static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size)
__must_hold(&pool_lock)
{
struct stack_record *stack;
void *current_pool;
u32 pool_index;
lockdep_assert_held(&pool_lock);
if (pool_offset + size > DEPOT_POOL_SIZE) {
if (!depot_init_pool(prealloc))
return NULL;
}
if (WARN_ON_ONCE(pools_num < 1))
return NULL;
pool_index = pools_num - 1;
current_pool = stack_pools[pool_index];
if (WARN_ON_ONCE(!current_pool))
return NULL;
stack = current_pool + pool_offset;
stack->handle.pool_index_plus_1 = pool_index + 1;
stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN;
stack->handle.extra = 0;
INIT_LIST_HEAD(&stack->hash_list);
pool_offset += size;
return stack;
}
static struct stack_record *depot_pop_free(void)
__must_hold(&pool_lock)
{
struct stack_record *stack;
lockdep_assert_held(&pool_lock);
if (list_empty(&free_stacks))
return NULL;
stack = list_first_entry(&free_stacks, struct stack_record, free_list);
if (!poll_state_synchronize_rcu(stack->rcu_state))
return NULL;
list_del(&stack->free_list);
counters[DEPOT_COUNTER_FREELIST_SIZE]--;
return stack;
}
static inline size_t depot_stack_record_size(struct stack_record *s, unsigned int nr_entries)
{
const size_t used = flex_array_size(s, entries, nr_entries);
const size_t unused = sizeof(s->entries) - used;
WARN_ON_ONCE(sizeof(s->entries) < used);
return ALIGN(sizeof(struct stack_record) - unused, 1 << DEPOT_STACK_ALIGN);
}
static struct stack_record *
depot_alloc_stack(unsigned long *entries, unsigned int nr_entries, u32 hash, depot_flags_t flags, void **prealloc)
__must_hold(&pool_lock)
{
struct stack_record *stack = NULL;
size_t record_size;
lockdep_assert_held(&pool_lock);
if (WARN_ON_ONCE(!nr_entries))
return NULL;
if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES)
nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES;
if (flags & STACK_DEPOT_FLAG_GET) {
record_size = depot_stack_record_size(stack, CONFIG_STACKDEPOT_MAX_FRAMES);
stack = depot_pop_free();
} else {
record_size = depot_stack_record_size(stack, nr_entries);
}
if (!stack) {
stack = depot_pop_free_pool(prealloc, record_size);
if (!stack)
return NULL;
}
stack->hash = hash;
stack->size = nr_entries;
memcpy(stack->entries, entries, flex_array_size(stack, entries, nr_entries));
if (flags & STACK_DEPOT_FLAG_GET) {
refcount_set(&stack->count, 1);
counters[DEPOT_COUNTER_REFD_ALLOCS]++;
counters[DEPOT_COUNTER_REFD_INUSE]++;
} else {
refcount_set(&stack->count, REFCOUNT_SATURATED);
counters[DEPOT_COUNTER_PERSIST_COUNT]++;
counters[DEPOT_COUNTER_PERSIST_BYTES] += record_size;
}
kmsan_unpoison_memory(stack, record_size);
return stack;
}
static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle)
__must_not_hold(&pool_lock)
{
const int pools_num_cached = READ_ONCE(pools_num);
union handle_parts parts = { .handle = handle };
void *pool;
u32 pool_index = parts.pool_index_plus_1 - 1;
size_t offset = parts.offset << DEPOT_STACK_ALIGN;
struct stack_record *stack;
lockdep_assert_not_held(&pool_lock);
if (pool_index >= pools_num_cached) {
WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n",
pool_index, pools_num_cached, handle);
return NULL;
}
pool = context_unsafe(stack_pools[pool_index]);
if (WARN_ON(!pool))
return NULL;
stack = pool + offset;
if (WARN_ON(!refcount_read(&stack->count)))
return NULL;
return stack;
}
static void depot_free_stack(struct stack_record *stack)
__must_not_hold(&pool_lock)
{
unsigned long flags;
lockdep_assert_not_held(&pool_lock);
raw_spin_lock_irqsave(&pool_lock, flags);
printk_deferred_enter();
list_del_rcu(&stack->hash_list);
stack->rcu_state = get_state_synchronize_rcu();
list_add_tail(&stack->free_list, &free_stacks);
counters[DEPOT_COUNTER_FREELIST_SIZE]++;
counters[DEPOT_COUNTER_REFD_FREES]++;
counters[DEPOT_COUNTER_REFD_INUSE]--;
printk_deferred_exit();
raw_spin_unlock_irqrestore(&pool_lock, flags);
}
static inline u32 hash_stack(unsigned long *entries, unsigned int size)
{
return jhash2((u32 *)entries,
array_size(size, sizeof(*entries)) / sizeof(u32),
STACK_HASH_SEED);
}
static inline
int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
unsigned int n)
{
for ( ; n-- ; u1++, u2++) {
if (*u1 != *u2)
return 1;
}
return 0;
}
static inline struct stack_record *find_stack(struct list_head *bucket,
unsigned long *entries, int size,
u32 hash, depot_flags_t flags)
{
struct stack_record *stack, *ret = NULL;
rcu_read_lock_sched_notrace();
list_for_each_entry_rcu(stack, bucket, hash_list) {
if (stack->hash != hash || stack->size != size)
continue;
if (data_race(stackdepot_memcmp(entries, stack->entries, size)))
continue;
if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count))
continue;
ret = stack;
break;
}
rcu_read_unlock_sched_notrace();
return ret;
}
depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
unsigned int nr_entries,
gfp_t alloc_flags,
depot_flags_t depot_flags)
{
struct list_head *bucket;
struct stack_record *found = NULL;
depot_stack_handle_t handle = 0;
struct page *page = NULL;
void *prealloc = NULL;
bool allow_spin = gfpflags_allow_spinning(alloc_flags);
bool can_alloc = (depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC) && allow_spin;
unsigned long flags;
u32 hash;
if (WARN_ON(depot_flags & ~STACK_DEPOT_FLAGS_MASK))
return 0;
nr_entries = filter_irq_stacks(entries, nr_entries);
if (unlikely(nr_entries == 0) || stack_depot_disabled)
return 0;
hash = hash_stack(entries, nr_entries);
bucket = &stack_table[hash & stack_hash_mask];
found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
if (found)
goto exit;
if (unlikely(can_alloc && !READ_ONCE(new_pool))) {
page = alloc_pages(gfp_nested_mask(alloc_flags),
DEPOT_POOL_ORDER);
if (page)
prealloc = page_address(page);
}
if (in_nmi() || !allow_spin) {
WARN_ON_ONCE(can_alloc);
if (!raw_spin_trylock_irqsave(&pool_lock, flags))
goto exit;
} else {
raw_spin_lock_irqsave(&pool_lock, flags);
}
printk_deferred_enter();
found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
if (!found) {
struct stack_record *new =
depot_alloc_stack(entries, nr_entries, hash, depot_flags, &prealloc);
if (new) {
list_add_rcu(&new->hash_list, bucket);
found = new;
}
}
if (prealloc) {
depot_keep_new_pool(&prealloc);
}
printk_deferred_exit();
raw_spin_unlock_irqrestore(&pool_lock, flags);
exit:
if (prealloc) {
if (!allow_spin)
free_pages_nolock(virt_to_page(prealloc), DEPOT_POOL_ORDER);
else
free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER);
}
if (found)
handle = found->handle.handle;
return handle;
}
EXPORT_SYMBOL_GPL(stack_depot_save_flags);
depot_stack_handle_t stack_depot_save(unsigned long *entries,
unsigned int nr_entries,
gfp_t alloc_flags)
{
return stack_depot_save_flags(entries, nr_entries, alloc_flags,
STACK_DEPOT_FLAG_CAN_ALLOC);
}
EXPORT_SYMBOL_GPL(stack_depot_save);
struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle)
{
if (!handle)
return NULL;
return depot_fetch_stack(handle);
}
unsigned int stack_depot_fetch(depot_stack_handle_t handle,
unsigned long **entries)
{
struct stack_record *stack;
*entries = NULL;
kmsan_unpoison_memory(entries, sizeof(*entries));
if (!handle || stack_depot_disabled)
return 0;
stack = depot_fetch_stack(handle);
if (WARN(!stack, "corrupt handle or use after stack_depot_put()"))
return 0;
*entries = stack->entries;
return stack->size;
}
EXPORT_SYMBOL_GPL(stack_depot_fetch);
void stack_depot_put(depot_stack_handle_t handle)
{
struct stack_record *stack;
if (!handle || stack_depot_disabled)
return;
stack = depot_fetch_stack(handle);
if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()"))
return;
if (refcount_dec_and_test(&stack->count))
depot_free_stack(stack);
}
EXPORT_SYMBOL_GPL(stack_depot_put);
void stack_depot_print(depot_stack_handle_t stack)
{
unsigned long *entries;
unsigned int nr_entries;
nr_entries = stack_depot_fetch(stack, &entries);
if (nr_entries > 0)
stack_trace_print(entries, nr_entries, 0);
}
EXPORT_SYMBOL_GPL(stack_depot_print);
int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
int spaces)
{
unsigned long *entries;
unsigned int nr_entries;
nr_entries = stack_depot_fetch(handle, &entries);
return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries,
spaces) : 0;
}
EXPORT_SYMBOL_GPL(stack_depot_snprint);
depot_stack_handle_t __must_check stack_depot_set_extra_bits(
depot_stack_handle_t handle, unsigned int extra_bits)
{
union handle_parts parts = { .handle = handle };
if (!handle)
return 0;
parts.extra = extra_bits;
return parts.handle;
}
EXPORT_SYMBOL(stack_depot_set_extra_bits);
unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle)
{
union handle_parts parts = { .handle = handle };
return parts.extra;
}
EXPORT_SYMBOL(stack_depot_get_extra_bits);
static int stats_show(struct seq_file *seq, void *v)
{
seq_printf(seq, "pools: %d\n", data_race(pools_num));
for (int i = 0; i < DEPOT_COUNTER_COUNT; i++)
seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i]));
return 0;
}
DEFINE_SHOW_ATTRIBUTE(stats);
static int depot_debugfs_init(void)
{
struct dentry *dir;
if (stack_depot_disabled)
return 0;
dir = debugfs_create_dir("stackdepot", NULL);
debugfs_create_file("stats", 0444, dir, NULL, &stats_fops);
return 0;
}
late_initcall(depot_debugfs_init);