root/fs/xfs/scrub/xfile.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/trace.h"
#include <linux/shmem_fs.h>

/*
 * Swappable Temporary Memory
 * ==========================
 *
 * Online checking sometimes needs to be able to stage a large amount of data
 * in memory.  This information might not fit in the available memory and it
 * doesn't all need to be accessible at all times.  In other words, we want an
 * indexed data buffer to store data that can be paged out.
 *
 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
 * requirements.  Therefore, the xfile mechanism uses an unlinked shmem file to
 * store our staging data.  This file is not installed in the file descriptor
 * table so that user programs cannot access the data, which means that the
 * xfile must be freed with xfile_destroy.
 *
 * xfiles assume that the caller will handle all required concurrency
 * management; standard vfs locks (freezer and inode) are not taken.  Reads
 * and writes are satisfied directly from the page cache.
 */

/*
 * xfiles must not be exposed to userspace and require upper layers to
 * coordinate access to the one handle returned by the constructor, so
 * establish a separate lock class for xfiles to avoid confusing lockdep.
 */
static struct lock_class_key xfile_i_mutex_key;

/*
 * Create an xfile of the given size.  The description will be used in the
 * trace output.
 */
int
xfile_create(
        const char              *description,
        loff_t                  isize,
        struct xfile            **xfilep)
{
        struct inode            *inode;
        struct xfile            *xf;
        int                     error;

        xf = kmalloc_obj(struct xfile, XCHK_GFP_FLAGS);
        if (!xf)
                return -ENOMEM;

        xf->file = shmem_kernel_file_setup(description, isize,
                                           mk_vma_flags(VMA_NORESERVE_BIT));
        if (IS_ERR(xf->file)) {
                error = PTR_ERR(xf->file);
                goto out_xfile;
        }

        inode = file_inode(xf->file);
        lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);

        /*
         * We don't want to bother with kmapping data during repair, so don't
         * allow highmem pages to back this mapping.
         */
        mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);

        trace_xfile_create(xf);

        *xfilep = xf;
        return 0;
out_xfile:
        kfree(xf);
        return error;
}

/* Close the file and release all resources. */
void
xfile_destroy(
        struct xfile            *xf)
{
        struct inode            *inode = file_inode(xf->file);

        trace_xfile_destroy(xf);

        lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key);
        fput(xf->file);
        kfree(xf);
}

/*
 * Load an object.  Since we're treating this file as "memory", any error or
 * short IO is treated as a failure to allocate memory.
 */
int
xfile_load(
        struct xfile            *xf,
        void                    *buf,
        size_t                  count,
        loff_t                  pos)
{
        struct inode            *inode = file_inode(xf->file);
        unsigned int            pflags;

        if (count > MAX_RW_COUNT)
                return -ENOMEM;
        if (inode->i_sb->s_maxbytes - pos < count)
                return -ENOMEM;

        trace_xfile_load(xf, pos, count);

        pflags = memalloc_nofs_save();
        while (count > 0) {
                struct folio    *folio;
                unsigned int    len;
                unsigned int    offset;

                if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
                                SGP_READ) < 0)
                        break;
                if (!folio) {
                        /*
                         * No data stored at this offset, just zero the output
                         * buffer until the next page boundary.
                         */
                        len = min_t(ssize_t, count,
                                PAGE_SIZE - offset_in_page(pos));
                        memset(buf, 0, len);
                } else {
                        if (filemap_check_wb_err(inode->i_mapping, 0)) {
                                folio_unlock(folio);
                                folio_put(folio);
                                break;
                        }

                        offset = offset_in_folio(folio, pos);
                        len = min_t(ssize_t, count, folio_size(folio) - offset);
                        memcpy(buf, folio_address(folio) + offset, len);

                        folio_unlock(folio);
                        folio_put(folio);
                }
                count -= len;
                pos += len;
                buf += len;
        }
        memalloc_nofs_restore(pflags);

        if (count)
                return -ENOMEM;
        return 0;
}

/*
 * Store an object.  Since we're treating this file as "memory", any error or
 * short IO is treated as a failure to allocate memory.
 */
int
xfile_store(
        struct xfile            *xf,
        const void              *buf,
        size_t                  count,
        loff_t                  pos)
{
        struct inode            *inode = file_inode(xf->file);
        unsigned int            pflags;

        if (count > MAX_RW_COUNT)
                return -ENOMEM;
        if (inode->i_sb->s_maxbytes - pos < count)
                return -ENOMEM;

        trace_xfile_store(xf, pos, count);

        /*
         * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
         * actually allocates a folio instead of erroring out.
         */
        if (pos + count > i_size_read(inode))
                i_size_write(inode, pos + count);

        pflags = memalloc_nofs_save();
        while (count > 0) {
                struct folio    *folio;
                unsigned int    len;
                unsigned int    offset;

                if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
                                SGP_CACHE) < 0)
                        break;
                if (filemap_check_wb_err(inode->i_mapping, 0)) {
                        folio_unlock(folio);
                        folio_put(folio);
                        break;
                }

                offset = offset_in_folio(folio, pos);
                len = min_t(ssize_t, count, folio_size(folio) - offset);
                memcpy(folio_address(folio) + offset, buf, len);

                folio_mark_dirty(folio);
                folio_unlock(folio);
                folio_put(folio);

                count -= len;
                pos += len;
                buf += len;
        }
        memalloc_nofs_restore(pflags);

        if (count)
                return -ENOMEM;
        return 0;
}

/* Find the next written area in the xfile data for a given offset. */
loff_t
xfile_seek_data(
        struct xfile            *xf,
        loff_t                  pos)
{
        loff_t                  ret;

        ret = vfs_llseek(xf->file, pos, SEEK_DATA);
        trace_xfile_seek_data(xf, pos, ret);
        return ret;
}

/*
 * Grab the (locked) folio for a memory object.  The object cannot span a folio
 * boundary.  Returns the locked folio if successful, NULL if there was no
 * folio or it didn't cover the range requested, or an ERR_PTR on failure.
 */
struct folio *
xfile_get_folio(
        struct xfile            *xf,
        loff_t                  pos,
        size_t                  len,
        unsigned int            flags)
{
        struct inode            *inode = file_inode(xf->file);
        struct folio            *folio = NULL;
        unsigned int            pflags;
        int                     error;

        if (inode->i_sb->s_maxbytes - pos < len)
                return ERR_PTR(-ENOMEM);

        trace_xfile_get_folio(xf, pos, len);

        /*
         * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
         * actually allocates a folio instead of erroring out.
         */
        if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode))
                i_size_write(inode, pos + len);

        pflags = memalloc_nofs_save();
        error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
                        (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
        memalloc_nofs_restore(pflags);
        if (error)
                return ERR_PTR(error);

        if (!folio)
                return NULL;

        if (len > folio_size(folio) - offset_in_folio(folio, pos)) {
                folio_unlock(folio);
                folio_put(folio);
                return NULL;
        }

        if (filemap_check_wb_err(inode->i_mapping, 0)) {
                folio_unlock(folio);
                folio_put(folio);
                return ERR_PTR(-EIO);
        }

        /*
         * Mark the folio dirty so that it won't be reclaimed once we drop the
         * (potentially last) reference in xfile_put_folio.
         */
        if (flags & XFILE_ALLOC)
                folio_mark_dirty(folio);
        return folio;
}

/*
 * Release the (locked) folio for a memory object.
 */
void
xfile_put_folio(
        struct xfile            *xf,
        struct folio            *folio)
{
        trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio));

        folio_unlock(folio);
        folio_put(folio);
}

/* Discard the page cache that's backing a range of the xfile. */
void
xfile_discard(
        struct xfile            *xf,
        loff_t                  pos,
        u64                     count)
{
        trace_xfile_discard(xf, pos, count);

        shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1);
}