root/drivers/md/dm-pcache/cache.h
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _PCACHE_CACHE_H
#define _PCACHE_CACHE_H

#include "segment.h"

/* Garbage collection thresholds */
#define PCACHE_CACHE_GC_PERCENT_MIN       0                   /* Minimum GC percentage */
#define PCACHE_CACHE_GC_PERCENT_MAX       90                  /* Maximum GC percentage */
#define PCACHE_CACHE_GC_PERCENT_DEFAULT   70                  /* Default GC percentage */

#define PCACHE_CACHE_SUBTREE_SIZE               (4 * PCACHE_MB)     /* 4MB total tree size */
#define PCACHE_CACHE_SUBTREE_SIZE_MASK          0x3FFFFF            /* Mask for tree size */
#define PCACHE_CACHE_SUBTREE_SIZE_SHIFT         22                  /* Bit shift for tree size */

/* Maximum number of keys per key set */
#define PCACHE_KSET_KEYS_MAX            128
#define PCACHE_CACHE_SEGS_MAX           (1024 * 1024)   /* maximum cache size for each device is 16T */
#define PCACHE_KSET_ONMEDIA_SIZE_MAX    struct_size_t(struct pcache_cache_kset_onmedia, data, PCACHE_KSET_KEYS_MAX)
#define PCACHE_KSET_SIZE                (sizeof(struct pcache_cache_kset) + sizeof(struct pcache_cache_key_onmedia) * PCACHE_KSET_KEYS_MAX)

/* Maximum number of keys to clean in one round of clean_work */
#define PCACHE_CLEAN_KEYS_MAX             10

/* Writeback and garbage collection intervals in jiffies */
#define PCACHE_CACHE_WRITEBACK_INTERVAL   (5 * HZ)
#define PCACHE_CACHE_GC_INTERVAL          (5 * HZ)

/* Macro to get the cache key structure from an rb_node pointer */
#define CACHE_KEY(node)                (container_of(node, struct pcache_cache_key, rb_node))

struct pcache_cache_pos_onmedia {
        struct pcache_meta_header header;
        __u32 cache_seg_id;
        __u32 seg_off;
};

/* Offset and size definitions for cache segment control */
#define PCACHE_CACHE_SEG_CTRL_OFF     (PCACHE_SEG_INFO_SIZE * PCACHE_META_INDEX_MAX)
#define PCACHE_CACHE_SEG_CTRL_SIZE    (4 * PCACHE_KB)

struct pcache_cache_seg_gen {
        struct pcache_meta_header header;
        __u64 gen;
};

/* Control structure for cache segments */
struct pcache_cache_seg_ctrl {
        struct pcache_cache_seg_gen gen[PCACHE_META_INDEX_MAX];
        __u64   res[64];
};

#define PCACHE_CACHE_FLAGS_DATA_CRC                     BIT(0)
#define PCACHE_CACHE_FLAGS_INIT_DONE                    BIT(1)

#define PCACHE_CACHE_FLAGS_CACHE_MODE_MASK              GENMASK(5, 2)
#define PCACHE_CACHE_MODE_WRITEBACK                     0
#define PCACHE_CACHE_MODE_WRITETHROUGH                  1
#define PCACHE_CACHE_MODE_WRITEAROUND                   2
#define PCACHE_CACHE_MODE_WRITEONLY                     3

#define PCACHE_CACHE_FLAGS_GC_PERCENT_MASK              GENMASK(12, 6)

struct pcache_cache_info {
        struct pcache_meta_header header;
        __u32 seg_id;
        __u32 n_segs;
        __u32 flags;
        __u32 reserved;
};

struct pcache_cache_pos {
        struct pcache_cache_segment *cache_seg;
        u32 seg_off;
};

struct pcache_cache_segment {
        struct pcache_cache     *cache;
        u32                     cache_seg_id;   /* Index in cache->segments */
        struct pcache_segment   segment;
        atomic_t                refs;

        struct pcache_segment_info cache_seg_info;
        struct mutex            info_lock;
        u32                     info_index;

        spinlock_t              gen_lock;
        u64                     gen;
        u64                     gen_seq;
        u32                     gen_index;

        struct pcache_cache_seg_ctrl *cache_seg_ctrl;
};

/* rbtree for cache entries */
struct pcache_cache_subtree {
        struct rb_root root;
        spinlock_t tree_lock;
};

struct pcache_cache_tree {
        struct pcache_cache             *cache;
        u32                             n_subtrees;
        mempool_t                       key_pool;
        struct pcache_cache_subtree     *subtrees;
};

extern struct kmem_cache *key_cache;

struct pcache_cache_key {
        struct pcache_cache_tree        *cache_tree;
        struct pcache_cache_subtree     *cache_subtree;
        struct kref                     ref;
        struct rb_node                  rb_node;
        struct list_head                list_node;
        u64                             off;
        u32                             len;
        u32                             flags;
        struct pcache_cache_pos         cache_pos;
        u64                             seg_gen;
};

#define PCACHE_CACHE_KEY_FLAGS_EMPTY            BIT(0)
#define PCACHE_CACHE_KEY_FLAGS_CLEAN            BIT(1)

struct pcache_cache_key_onmedia {
        __u64 off;
        __u32 len;
        __u32 flags;
        __u32 cache_seg_id;
        __u32 cache_seg_off;
        __u64 seg_gen;
        __u32 data_crc;
        __u32 reserved;
};

struct pcache_cache_kset_onmedia {
        __u32 crc;
        union {
                __u32 key_num;
                __u32 next_cache_seg_id;
        };
        __u64 magic;
        __u64 flags;
        struct pcache_cache_key_onmedia data[];
};

struct pcache_cache {
        struct pcache_backing_dev       *backing_dev;
        struct pcache_cache_dev         *cache_dev;
        struct pcache_cache_ctrl        *cache_ctrl;
        u64                             dev_size;

        struct pcache_cache_data_head __percpu *data_heads;

        spinlock_t              key_head_lock;
        struct pcache_cache_pos key_head;
        u32                     n_ksets;
        struct pcache_cache_kset        *ksets;

        struct mutex            key_tail_lock;
        struct pcache_cache_pos key_tail;
        u64                     key_tail_seq;
        u32                     key_tail_index;

        struct mutex            dirty_tail_lock;
        struct pcache_cache_pos dirty_tail;
        u64                     dirty_tail_seq;
        u32                     dirty_tail_index;

        struct pcache_cache_tree        req_key_tree;
        struct work_struct      clean_work;

        struct mutex            writeback_lock;
        char wb_kset_onmedia_buf[PCACHE_KSET_ONMEDIA_SIZE_MAX];
        struct pcache_cache_tree        writeback_key_tree;
        struct delayed_work     writeback_work;
        struct {
                atomic_t pending;
                u32 advance;
                int ret;
        } writeback_ctx;

        char gc_kset_onmedia_buf[PCACHE_KSET_ONMEDIA_SIZE_MAX];
        struct delayed_work     gc_work;
        atomic_t                gc_errors;

        struct mutex                    cache_info_lock;
        struct pcache_cache_info        cache_info;
        struct pcache_cache_info        *cache_info_addr;
        u32                             info_index;

        u32                     n_segs;
        unsigned long           *seg_map;
        u32                     last_cache_seg;
        bool                    cache_full;
        spinlock_t              seg_map_lock;
        struct pcache_cache_segment *segments;
};

struct workqueue_struct *cache_get_wq(struct pcache_cache *cache);

struct dm_pcache;
struct pcache_cache_options {
        u32     cache_mode:4;
        u32     data_crc:1;
};
int pcache_cache_start(struct dm_pcache *pcache);
void pcache_cache_stop(struct dm_pcache *pcache);

struct pcache_cache_ctrl {
        /* Updated by gc_thread */
        struct pcache_cache_pos_onmedia key_tail_pos[PCACHE_META_INDEX_MAX];

        /* Updated by writeback_thread */
        struct pcache_cache_pos_onmedia dirty_tail_pos[PCACHE_META_INDEX_MAX];
};

struct pcache_cache_data_head {
        struct pcache_cache_pos head_pos;
};

static inline u16 pcache_cache_get_gc_percent(struct pcache_cache *cache)
{
        return FIELD_GET(PCACHE_CACHE_FLAGS_GC_PERCENT_MASK, cache->cache_info.flags);
}

int pcache_cache_set_gc_percent(struct pcache_cache *cache, u8 percent);

/* cache key */
struct pcache_cache_key *cache_key_alloc(struct pcache_cache_tree *cache_tree, gfp_t gfp_mask);
void cache_key_init(struct pcache_cache_tree *cache_tree, struct pcache_cache_key *key);
void cache_key_get(struct pcache_cache_key *key);
void cache_key_put(struct pcache_cache_key *key);
int cache_key_append(struct pcache_cache *cache, struct pcache_cache_key *key, bool force_close);
void cache_key_insert(struct pcache_cache_tree *cache_tree, struct pcache_cache_key *key, bool fixup);
int cache_key_decode(struct pcache_cache *cache,
                        struct pcache_cache_key_onmedia *key_onmedia,
                        struct pcache_cache_key *key);
void cache_pos_advance(struct pcache_cache_pos *pos, u32 len);

#define PCACHE_KSET_FLAGS_LAST          BIT(0)
#define PCACHE_KSET_MAGIC               0x676894a64e164f1aULL

struct pcache_cache_kset {
        struct pcache_cache *cache;
        spinlock_t        kset_lock;
        struct delayed_work flush_work;
        struct pcache_cache_kset_onmedia kset_onmedia;
};

extern struct pcache_cache_kset_onmedia pcache_empty_kset;

#define SUBTREE_WALK_RET_OK             0
#define SUBTREE_WALK_RET_ERR            1
#define SUBTREE_WALK_RET_NEED_KEY       2
#define SUBTREE_WALK_RET_NEED_REQ       3
#define SUBTREE_WALK_RET_RESEARCH       4

struct pcache_cache_subtree_walk_ctx {
        struct pcache_cache_tree *cache_tree;
        struct rb_node *start_node;
        struct pcache_request *pcache_req;
        struct pcache_cache_key *key;
        u32     req_done;
        int     ret;

        /* pre-allocated key and backing_dev_req */
        struct pcache_cache_key         *pre_alloc_key;
        struct pcache_backing_dev_req   *pre_alloc_req;

        struct list_head *delete_key_list;
        struct list_head *submit_req_list;

        /*
         *        |--------|            key_tmp
         * |====|                       key
         */
        int (*before)(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
                        struct pcache_cache_subtree_walk_ctx *ctx);

        /*
         * |----------|                 key_tmp
         *              |=====|         key
         */
        int (*after)(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
                        struct pcache_cache_subtree_walk_ctx *ctx);

        /*
         *     |----------------|       key_tmp
         * |===========|                key
         */
        int (*overlap_tail)(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
                        struct pcache_cache_subtree_walk_ctx *ctx);

        /*
         * |--------|                   key_tmp
         *   |==========|               key
         */
        int (*overlap_head)(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
                        struct pcache_cache_subtree_walk_ctx *ctx);

        /*
         *    |----|                    key_tmp
         * |==========|                 key
         */
        int (*overlap_contain)(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
                        struct pcache_cache_subtree_walk_ctx *ctx);

        /*
         * |-----------|                key_tmp
         *   |====|                     key
         */
        int (*overlap_contained)(struct pcache_cache_key *key, struct pcache_cache_key *key_tmp,
                        struct pcache_cache_subtree_walk_ctx *ctx);

        int (*walk_finally)(struct pcache_cache_subtree_walk_ctx *ctx, int ret);
        bool (*walk_done)(struct pcache_cache_subtree_walk_ctx *ctx);
};

int cache_subtree_walk(struct pcache_cache_subtree_walk_ctx *ctx);
struct rb_node *cache_subtree_search(struct pcache_cache_subtree *cache_subtree, struct pcache_cache_key *key,
                                  struct rb_node **parentp, struct rb_node ***newp,
                                  struct list_head *delete_key_list);
int cache_kset_close(struct pcache_cache *cache, struct pcache_cache_kset *kset);
void clean_fn(struct work_struct *work);
void kset_flush_fn(struct work_struct *work);
int cache_replay(struct pcache_cache *cache);
int cache_tree_init(struct pcache_cache *cache, struct pcache_cache_tree *cache_tree, u32 n_subtrees);
void cache_tree_clear(struct pcache_cache_tree *cache_tree);
void cache_tree_exit(struct pcache_cache_tree *cache_tree);

/* cache segments */
struct pcache_cache_segment *get_cache_segment(struct pcache_cache *cache);
int cache_seg_init(struct pcache_cache *cache, u32 seg_id, u32 cache_seg_id,
                   bool new_cache);
void cache_seg_get(struct pcache_cache_segment *cache_seg);
void cache_seg_put(struct pcache_cache_segment *cache_seg);
void cache_seg_set_next_seg(struct pcache_cache_segment *cache_seg, u32 seg_id);

/* cache request*/
int pcache_cache_flush(struct pcache_cache *cache);
void miss_read_end_work_fn(struct work_struct *work);
int pcache_cache_handle_req(struct pcache_cache *cache, struct pcache_request *pcache_req);

/* gc */
void pcache_cache_gc_fn(struct work_struct *work);

/* writeback */
void cache_writeback_exit(struct pcache_cache *cache);
int cache_writeback_init(struct pcache_cache *cache);
void cache_writeback_fn(struct work_struct *work);

/* inline functions */
static inline struct pcache_cache_subtree *get_subtree(struct pcache_cache_tree *cache_tree, u64 off)
{
        if (cache_tree->n_subtrees == 1)
                return &cache_tree->subtrees[0];

        return &cache_tree->subtrees[off >> PCACHE_CACHE_SUBTREE_SIZE_SHIFT];
}

static inline void *cache_pos_addr(struct pcache_cache_pos *pos)
{
        return (pos->cache_seg->segment.data + pos->seg_off);
}

static inline void *get_key_head_addr(struct pcache_cache *cache)
{
        return cache_pos_addr(&cache->key_head);
}

static inline u32 get_kset_id(struct pcache_cache *cache, u64 off)
{
        u32 kset_id;

        div_u64_rem(off >> PCACHE_CACHE_SUBTREE_SIZE_SHIFT, cache->n_ksets, &kset_id);

        return kset_id;
}

static inline struct pcache_cache_kset *get_kset(struct pcache_cache *cache, u32 kset_id)
{
        return (void *)cache->ksets + PCACHE_KSET_SIZE * kset_id;
}

static inline struct pcache_cache_data_head *get_data_head(struct pcache_cache *cache)
{
        return this_cpu_ptr(cache->data_heads);
}

static inline bool cache_key_empty(struct pcache_cache_key *key)
{
        return key->flags & PCACHE_CACHE_KEY_FLAGS_EMPTY;
}

static inline bool cache_key_clean(struct pcache_cache_key *key)
{
        return key->flags & PCACHE_CACHE_KEY_FLAGS_CLEAN;
}

static inline void cache_pos_copy(struct pcache_cache_pos *dst, struct pcache_cache_pos *src)
{
        memcpy(dst, src, sizeof(struct pcache_cache_pos));
}

/**
 * cache_seg_is_ctrl_seg - Checks if a cache segment is a cache ctrl segment.
 * @cache_seg_id: ID of the cache segment.
 *
 * Returns true if the cache segment ID corresponds to a cache ctrl segment.
 *
 * Note: We extend the segment control of the first cache segment
 * (cache segment ID 0) to serve as the cache control (pcache_cache_ctrl)
 * for the entire PCACHE cache. This function determines whether the given
 * cache segment is the one storing the pcache_cache_ctrl information.
 */
static inline bool cache_seg_is_ctrl_seg(u32 cache_seg_id)
{
        return (cache_seg_id == 0);
}

/**
 * cache_key_cutfront - Cuts a specified length from the front of a cache key.
 * @key: Pointer to pcache_cache_key structure.
 * @cut_len: Length to cut from the front.
 *
 * Advances the cache key position by cut_len and adjusts offset and length accordingly.
 */
static inline void cache_key_cutfront(struct pcache_cache_key *key, u32 cut_len)
{
        if (key->cache_pos.cache_seg)
                cache_pos_advance(&key->cache_pos, cut_len);

        key->off += cut_len;
        key->len -= cut_len;
}

/**
 * cache_key_cutback - Cuts a specified length from the back of a cache key.
 * @key: Pointer to pcache_cache_key structure.
 * @cut_len: Length to cut from the back.
 *
 * Reduces the length of the cache key by cut_len.
 */
static inline void cache_key_cutback(struct pcache_cache_key *key, u32 cut_len)
{
        key->len -= cut_len;
}

static inline void cache_key_delete(struct pcache_cache_key *key)
{
        struct pcache_cache_subtree *cache_subtree;

        cache_subtree = key->cache_subtree;
        BUG_ON(!cache_subtree);

        rb_erase(&key->rb_node, &cache_subtree->root);
        key->flags = 0;
        cache_key_put(key);
}

static inline bool cache_data_crc_on(struct pcache_cache *cache)
{
        return (cache->cache_info.flags & PCACHE_CACHE_FLAGS_DATA_CRC);
}

static inline u32 cache_mode_get(struct pcache_cache *cache)
{
        return FIELD_GET(PCACHE_CACHE_FLAGS_CACHE_MODE_MASK, cache->cache_info.flags);
}

static inline void cache_mode_set(struct pcache_cache *cache, u32 cache_mode)
{
        cache->cache_info.flags &= ~PCACHE_CACHE_FLAGS_CACHE_MODE_MASK;
        cache->cache_info.flags |= FIELD_PREP(PCACHE_CACHE_FLAGS_CACHE_MODE_MASK, cache_mode);
}

/**
 * cache_key_data_crc - Calculates CRC for data in a cache key.
 * @key: Pointer to the pcache_cache_key structure.
 *
 * Returns the CRC-32 checksum of the data within the cache key's position.
 */
static inline u32 cache_key_data_crc(struct pcache_cache_key *key)
{
        void *data;

        data = cache_pos_addr(&key->cache_pos);

        return crc32c(PCACHE_CRC_SEED, data, key->len);
}

static inline u32 cache_kset_crc(struct pcache_cache_kset_onmedia *kset_onmedia)
{
        u32 crc_size;

        if (kset_onmedia->flags & PCACHE_KSET_FLAGS_LAST)
                crc_size = sizeof(struct pcache_cache_kset_onmedia) - 4;
        else
                crc_size = struct_size(kset_onmedia, data, kset_onmedia->key_num) - 4;

        return crc32c(PCACHE_CRC_SEED, (void *)kset_onmedia + 4, crc_size);
}

static inline u32 get_kset_onmedia_size(struct pcache_cache_kset_onmedia *kset_onmedia)
{
        return struct_size_t(struct pcache_cache_kset_onmedia, data, kset_onmedia->key_num);
}

/**
 * cache_seg_remain - Computes remaining space in a cache segment.
 * @pos: Pointer to pcache_cache_pos structure.
 *
 * Returns the amount of remaining space in the segment data starting from
 * the current position offset.
 */
static inline u32 cache_seg_remain(struct pcache_cache_pos *pos)
{
        struct pcache_cache_segment *cache_seg;
        struct pcache_segment *segment;
        u32 seg_remain;

        cache_seg = pos->cache_seg;
        segment = &cache_seg->segment;
        seg_remain = segment->data_size - pos->seg_off;

        return seg_remain;
}

/**
 * cache_key_invalid - Checks if a cache key is invalid.
 * @key: Pointer to pcache_cache_key structure.
 *
 * Returns true if the cache key is invalid due to its generation being
 * less than the generation of its segment; otherwise returns false.
 *
 * When the GC (garbage collection) thread identifies a segment
 * as reclaimable, it increments the segment's generation (gen). However,
 * it does not immediately remove all related cache keys. When accessing
 * such a cache key, this function can be used to determine if the cache
 * key has already become invalid.
 */
static inline bool cache_key_invalid(struct pcache_cache_key *key)
{
        if (cache_key_empty(key))
                return false;

        return (key->seg_gen < key->cache_pos.cache_seg->gen);
}

/**
 * cache_key_lstart - Retrieves the logical start offset of a cache key.
 * @key: Pointer to pcache_cache_key structure.
 *
 * Returns the logical start offset for the cache key.
 */
static inline u64 cache_key_lstart(struct pcache_cache_key *key)
{
        return key->off;
}

/**
 * cache_key_lend - Retrieves the logical end offset of a cache key.
 * @key: Pointer to pcache_cache_key structure.
 *
 * Returns the logical end offset for the cache key.
 */
static inline u64 cache_key_lend(struct pcache_cache_key *key)
{
        return key->off + key->len;
}

static inline void cache_key_copy(struct pcache_cache_key *key_dst, struct pcache_cache_key *key_src)
{
        key_dst->off = key_src->off;
        key_dst->len = key_src->len;
        key_dst->seg_gen = key_src->seg_gen;
        key_dst->cache_tree = key_src->cache_tree;
        key_dst->cache_subtree = key_src->cache_subtree;
        key_dst->flags = key_src->flags;

        cache_pos_copy(&key_dst->cache_pos, &key_src->cache_pos);
}

/**
 * cache_pos_onmedia_crc - Calculates the CRC for an on-media cache position.
 * @pos_om: Pointer to pcache_cache_pos_onmedia structure.
 *
 * Calculates the CRC-32 checksum of the position, excluding the first 4 bytes.
 * Returns the computed CRC value.
 */
static inline u32 cache_pos_onmedia_crc(struct pcache_cache_pos_onmedia *pos_om)
{
        return pcache_meta_crc(&pos_om->header, sizeof(struct pcache_cache_pos_onmedia));
}

void cache_pos_encode(struct pcache_cache *cache,
                             struct pcache_cache_pos_onmedia *pos_onmedia,
                             struct pcache_cache_pos *pos, u64 seq, u32 *index);
int cache_pos_decode(struct pcache_cache *cache,
                            struct pcache_cache_pos_onmedia *pos_onmedia,
                            struct pcache_cache_pos *pos, u64 *seq, u32 *index);

static inline void cache_encode_key_tail(struct pcache_cache *cache)
{
        cache_pos_encode(cache, cache->cache_ctrl->key_tail_pos,
                        &cache->key_tail, ++cache->key_tail_seq,
                        &cache->key_tail_index);
}

static inline int cache_decode_key_tail(struct pcache_cache *cache)
{
        return cache_pos_decode(cache, cache->cache_ctrl->key_tail_pos,
                                &cache->key_tail, &cache->key_tail_seq,
                                &cache->key_tail_index);
}

static inline void cache_encode_dirty_tail(struct pcache_cache *cache)
{
        cache_pos_encode(cache, cache->cache_ctrl->dirty_tail_pos,
                        &cache->dirty_tail, ++cache->dirty_tail_seq,
                        &cache->dirty_tail_index);
}

static inline int cache_decode_dirty_tail(struct pcache_cache *cache)
{
        return cache_pos_decode(cache, cache->cache_ctrl->dirty_tail_pos,
                                &cache->dirty_tail, &cache->dirty_tail_seq,
                                &cache->dirty_tail_index);
}

int pcache_cache_init(void);
void pcache_cache_exit(void);
#endif /* _PCACHE_CACHE_H */