root/fs/xfs/xfs_buf_mem.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (c) 2023-2024 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_buf.h"
#include "xfs_buf_mem.h"
#include "xfs_trace.h"
#include <linux/shmem_fs.h>
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_error.h"

/*
 * Buffer Cache for In-Memory Files
 * ================================
 *
 * Online fsck wants to create ephemeral ordered recordsets.  The existing
 * btree infrastructure can do this, but we need the buffer cache to target
 * memory instead of block devices.
 *
 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
 * requirements.  Therefore, the xmbuf mechanism uses an unlinked shmem file to
 * store our staging data.  This file is not installed in the file descriptor
 * table so that user programs cannot access the data, which means that the
 * xmbuf must be freed with xmbuf_destroy.
 *
 * xmbufs assume that the caller will handle all required concurrency
 * management; standard vfs locks (freezer and inode) are not taken.  Reads
 * and writes are satisfied directly from the page cache.
 *
 * The only supported block size is PAGE_SIZE, and we cannot use highmem.
 */

/*
 * shmem files used to back an in-memory buffer cache must not be exposed to
 * userspace.  Upper layers must coordinate access to the one handle returned
 * by the constructor, so establish a separate lock class for xmbufs to avoid
 * confusing lockdep.
 */
static struct lock_class_key xmbuf_i_mutex_key;

/*
 * Allocate a buffer cache target for a memory-backed file and set up the
 * buffer target.
 */
int
xmbuf_alloc(
        struct xfs_mount        *mp,
        const char              *descr,
        struct xfs_buftarg      **btpp)
{
        struct file             *file;
        struct inode            *inode;
        struct xfs_buftarg      *btp;
        int                     error;

        btp = kzalloc_flex(*btp, bt_cache, 1);
        if (!btp)
                return -ENOMEM;

        file = shmem_kernel_file_setup(descr, 0, EMPTY_VMA_FLAGS);
        if (IS_ERR(file)) {
                error = PTR_ERR(file);
                goto out_free_btp;
        }
        inode = file_inode(file);

        /* private file, private locking */
        lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key);

        /*
         * We don't want to bother with kmapping data during repair, so don't
         * allow highmem folios to back this mapping.
         */
        mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);

        /* ensure all writes are below EOF to avoid pagecache zeroing */
        i_size_write(inode, inode->i_sb->s_maxbytes);

        error = xfs_buf_cache_init(btp->bt_cache);
        if (error)
                goto out_file;

        /* Initialize buffer target */
        btp->bt_mount = mp;
        btp->bt_dev = (dev_t)-1U;
        btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */
        btp->bt_file = file;
        btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE;
        btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1;

        error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
        if (error)
                goto out_bcache;

        trace_xmbuf_create(btp);

        *btpp = btp;
        return 0;

out_bcache:
        xfs_buf_cache_destroy(btp->bt_cache);
out_file:
        fput(file);
out_free_btp:
        kfree(btp);
        return error;
}

/* Free a buffer cache target for a memory-backed buffer cache. */
void
xmbuf_free(
        struct xfs_buftarg      *btp)
{
        ASSERT(xfs_buftarg_is_mem(btp));
        ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0);

        trace_xmbuf_free(btp);

        xfs_destroy_buftarg(btp);
        xfs_buf_cache_destroy(btp->bt_cache);
        fput(btp->bt_file);
        kfree(btp);
}

/* Directly map a shmem folio into the buffer cache. */
int
xmbuf_map_backing_mem(
        struct xfs_buf          *bp)
{
        struct inode            *inode = file_inode(bp->b_target->bt_file);
        struct folio            *folio = NULL;
        loff_t                  pos = BBTOB(xfs_buf_daddr(bp));
        int                     error;

        ASSERT(xfs_buftarg_is_mem(bp->b_target));

        if (bp->b_map_count != 1)
                return -ENOMEM;
        if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE)
                return -ENOMEM;
        if (offset_in_page(pos) != 0) {
                ASSERT(offset_in_page(pos));
                return -ENOMEM;
        }

        error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, SGP_CACHE);
        if (error)
                return error;

        if (filemap_check_wb_err(inode->i_mapping, 0)) {
                folio_unlock(folio);
                folio_put(folio);
                return -EIO;
        }

        /*
         * Mark the folio dirty so that it won't be reclaimed once we drop the
         * (potentially last) reference in xfs_buf_free.
         */
        folio_set_dirty(folio);
        folio_unlock(folio);

        bp->b_addr = folio_address(folio) + offset_in_folio(folio, pos);
        return 0;
}

/* Is this a valid daddr within the buftarg? */
bool
xmbuf_verify_daddr(
        struct xfs_buftarg      *btp,
        xfs_daddr_t             daddr)
{
        struct inode            *inode = file_inode(btp->bt_file);

        ASSERT(xfs_buftarg_is_mem(btp));

        return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT);
}

/* Discard the folio backing this buffer. */
static void
xmbuf_stale(
        struct xfs_buf          *bp)
{
        struct inode            *inode = file_inode(bp->b_target->bt_file);
        loff_t                  pos;

        ASSERT(xfs_buftarg_is_mem(bp->b_target));

        pos = BBTOB(xfs_buf_daddr(bp));
        shmem_truncate_range(inode, pos, pos + BBTOB(bp->b_length) - 1);
}

/*
 * Finalize a buffer -- discard the backing folio if it's stale, or run the
 * write verifier to detect problems.
 */
int
xmbuf_finalize(
        struct xfs_buf          *bp)
{
        xfs_failaddr_t          fa;
        int                     error = 0;

        if (bp->b_flags & XBF_STALE) {
                xmbuf_stale(bp);
                return 0;
        }

        /*
         * Although this btree is ephemeral, validate the buffer structure so
         * that we can detect memory corruption errors and software bugs.
         */
        fa = bp->b_ops->verify_struct(bp);
        if (fa) {
                error = -EFSCORRUPTED;
                xfs_verifier_error(bp, error, fa);
        }

        return error;
}

/*
 * Detach this xmbuf buffer from the transaction by any means necessary.
 * All buffers are direct-mapped, so they do not need bwrite.
 */
void
xmbuf_trans_bdetach(
        struct xfs_trans        *tp,
        struct xfs_buf          *bp)
{
        struct xfs_buf_log_item *bli = bp->b_log_item;

        ASSERT(bli != NULL);

        bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED |
                            XFS_BLI_LOGGED | XFS_BLI_STALE);
        clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags);

        while (bp->b_log_item != NULL)
                xfs_trans_bdetach(tp, bp);
}