root/fs/xfs/libxfs/xfs_metafile.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_metafile.h"
#include "xfs_trace.h"
#include "xfs_inode.h"
#include "xfs_quota.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_alloc.h"
#include "xfs_rtgroup.h"
#include "xfs_rtrmap_btree.h"
#include "xfs_rtrefcount_btree.h"

static const struct {
        enum xfs_metafile_type  mtype;
        const char              *name;
} xfs_metafile_type_strs[] = { XFS_METAFILE_TYPE_STR };

const char *
xfs_metafile_type_str(enum xfs_metafile_type metatype)
{
        unsigned int    i;

        for (i = 0; i < ARRAY_SIZE(xfs_metafile_type_strs); i++) {
                if (xfs_metafile_type_strs[i].mtype == metatype)
                        return xfs_metafile_type_strs[i].name;
        }

        return NULL;
}

/* Set up an inode to be recognized as a metadata directory inode. */
void
xfs_metafile_set_iflag(
        struct xfs_trans        *tp,
        struct xfs_inode        *ip,
        enum xfs_metafile_type  metafile_type)
{
        VFS_I(ip)->i_mode &= ~0777;
        VFS_I(ip)->i_uid = GLOBAL_ROOT_UID;
        VFS_I(ip)->i_gid = GLOBAL_ROOT_GID;
        if (S_ISDIR(VFS_I(ip)->i_mode))
                ip->i_diflags |= XFS_METADIR_DIFLAGS;
        else
                ip->i_diflags |= XFS_METAFILE_DIFLAGS;
        ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
        ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
        ip->i_metatype = metafile_type;
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);

        XFS_STATS_DEC(ip->i_mount, xs_inodes_active);
        XFS_STATS_INC(ip->i_mount, xs_inodes_meta);
}

/* Clear the metadata directory inode flag. */
void
xfs_metafile_clear_iflag(
        struct xfs_trans        *tp,
        struct xfs_inode        *ip)
{
        ASSERT(xfs_is_metadir_inode(ip));
        ASSERT(VFS_I(ip)->i_nlink == 0);

        ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
        XFS_STATS_INC(ip->i_mount, xs_inodes_active);
        XFS_STATS_DEC(ip->i_mount, xs_inodes_meta);
}

/*
 * Is the metafile reservations at or beneath a certain threshold?
 */
static inline bool
xfs_metafile_resv_can_cover(
        struct xfs_mount        *mp,
        int64_t                 rhs)
{
        /*
         * The amount of space that can be allocated to this metadata file is
         * the remaining reservation for the particular metadata file + the
         * global free block count.  Take care of the first case to avoid
         * touching the per-cpu counter.
         */
        if (mp->m_metafile_resv_avail >= rhs)
                return true;

        /*
         * There aren't enough blocks left in the inode's reservation, but it
         * isn't critical unless there also isn't enough free space.
         */
        return xfs_compare_freecounter(mp, XC_FREE_BLOCKS,
                        rhs - mp->m_metafile_resv_avail, 2048) >= 0;
}

/*
 * Is the metafile reservation critically low on blocks?  For now we'll define
 * that as the number of blocks we can get our hands on being less than 10% of
 * what we reserved or less than some arbitrary number (maximum btree height).
 */
bool
xfs_metafile_resv_critical(
        struct xfs_mount        *mp)
{
        ASSERT(xfs_has_metadir(mp));

        trace_xfs_metafile_resv_critical(mp, 0);

        if (!xfs_metafile_resv_can_cover(mp, mp->m_rtbtree_maxlevels))
                return true;

        if (!xfs_metafile_resv_can_cover(mp,
                        div_u64(mp->m_metafile_resv_target, 10)))
                return true;

        return XFS_TEST_ERROR(mp, XFS_ERRTAG_METAFILE_RESV_CRITICAL);
}

/* Allocate a block from the metadata file's reservation. */
void
xfs_metafile_resv_alloc_space(
        struct xfs_inode        *ip,
        struct xfs_alloc_arg    *args)
{
        struct xfs_mount        *mp = ip->i_mount;
        int64_t                 len = args->len;

        ASSERT(xfs_is_metadir_inode(ip));
        ASSERT(args->resv == XFS_AG_RESV_METAFILE);

        trace_xfs_metafile_resv_alloc_space(mp, args->len);

        /*
         * Allocate the blocks from the metadata inode's block reservation
         * and update the ondisk sb counter.
         */
        mutex_lock(&mp->m_metafile_resv_lock);
        if (mp->m_metafile_resv_avail > 0) {
                int64_t         from_resv;

                from_resv = min_t(int64_t, len, mp->m_metafile_resv_avail);
                mp->m_metafile_resv_avail -= from_resv;
                xfs_mod_delalloc(ip, 0, -from_resv);
                xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS,
                                -from_resv);
                len -= from_resv;
        }

        /*
         * Any allocation in excess of the reservation requires in-core and
         * on-disk fdblocks updates.  If we can grab @len blocks from the
         * in-core fdblocks then all we need to do is update the on-disk
         * superblock; if not, then try to steal some from the transaction's
         * block reservation.  Overruns are only expected for rmap btrees.
         */
        if (len) {
                unsigned int    field;
                int             error;

                error = xfs_dec_fdblocks(ip->i_mount, len, true);
                if (error)
                        field = XFS_TRANS_SB_FDBLOCKS;
                else
                        field = XFS_TRANS_SB_RES_FDBLOCKS;

                xfs_trans_mod_sb(args->tp, field, -len);
        }

        mp->m_metafile_resv_used += args->len;
        mutex_unlock(&mp->m_metafile_resv_lock);

        ip->i_nblocks += args->len;
        xfs_trans_log_inode(args->tp, ip, XFS_ILOG_CORE);
}

/* Free a block to the metadata file's reservation. */
void
xfs_metafile_resv_free_space(
        struct xfs_inode        *ip,
        struct xfs_trans        *tp,
        xfs_filblks_t           len)
{
        struct xfs_mount        *mp = ip->i_mount;
        int64_t                 to_resv;

        ASSERT(xfs_is_metadir_inode(ip));

        trace_xfs_metafile_resv_free_space(mp, len);

        ip->i_nblocks -= len;
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);

        mutex_lock(&mp->m_metafile_resv_lock);
        mp->m_metafile_resv_used -= len;

        /*
         * Add the freed blocks back into the inode's delalloc reservation
         * until it reaches the maximum size.  Update the ondisk fdblocks only.
         */
        to_resv = mp->m_metafile_resv_target -
                (mp->m_metafile_resv_used + mp->m_metafile_resv_avail);
        if (to_resv > 0) {
                to_resv = min_t(int64_t, to_resv, len);
                mp->m_metafile_resv_avail += to_resv;
                xfs_mod_delalloc(ip, 0, to_resv);
                xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, to_resv);
                len -= to_resv;
        }
        mutex_unlock(&mp->m_metafile_resv_lock);

        /*
         * Everything else goes back to the filesystem, so update the in-core
         * and on-disk counters.
         */
        if (len)
                xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len);
}

static void
__xfs_metafile_resv_free(
        struct xfs_mount        *mp)
{
        if (mp->m_metafile_resv_avail) {
                xfs_mod_sb_delalloc(mp, -(int64_t)mp->m_metafile_resv_avail);
                xfs_add_fdblocks(mp, mp->m_metafile_resv_avail);
        }
        mp->m_metafile_resv_avail = 0;
        mp->m_metafile_resv_used = 0;
        mp->m_metafile_resv_target = 0;
}

/* Release unused metafile space reservation. */
void
xfs_metafile_resv_free(
        struct xfs_mount        *mp)
{
        if (!xfs_has_metadir(mp))
                return;

        trace_xfs_metafile_resv_free(mp, 0);

        mutex_lock(&mp->m_metafile_resv_lock);
        __xfs_metafile_resv_free(mp);
        mutex_unlock(&mp->m_metafile_resv_lock);
}

/* Set up a metafile space reservation. */
int
xfs_metafile_resv_init(
        struct xfs_mount        *mp)
{
        struct xfs_rtgroup      *rtg = NULL;
        xfs_filblks_t           used = 0, target = 0;
        xfs_filblks_t           hidden_space;
        xfs_rfsblock_t          dblocks_avail = mp->m_sb.sb_dblocks / 4;
        int                     error = 0;

        if (!xfs_has_metadir(mp))
                return 0;

        /*
         * Free any previous reservation to have a clean slate.
         */
        mutex_lock(&mp->m_metafile_resv_lock);
        __xfs_metafile_resv_free(mp);

        /*
         * Currently the only btree metafiles that require reservations are the
         * rtrmap and the rtrefcount.  Anything new will have to be added here
         * as well.
         */
        while ((rtg = xfs_rtgroup_next(mp, rtg))) {
                if (xfs_has_rtrmapbt(mp)) {
                        used += rtg_rmap(rtg)->i_nblocks;
                        target += xfs_rtrmapbt_calc_reserves(mp);
                }
                if (xfs_has_rtreflink(mp)) {
                        used += rtg_refcount(rtg)->i_nblocks;
                        target += xfs_rtrefcountbt_calc_reserves(mp);
                }
        }

        if (!target)
                goto out_unlock;

        /*
         * Space taken by the per-AG metadata btrees are accounted on-disk as
         * used space.  We therefore only hide the space that is reserved but
         * not used by the trees.
         */
        if (used > target)
                target = used;
        else if (target > dblocks_avail)
                target = dblocks_avail;
        hidden_space = target - used;

        error = xfs_dec_fdblocks(mp, hidden_space, true);
        if (error) {
                trace_xfs_metafile_resv_init_error(mp, 0);
                goto out_unlock;
        }

        xfs_mod_sb_delalloc(mp, hidden_space);

        mp->m_metafile_resv_target = target;
        mp->m_metafile_resv_used = used;
        mp->m_metafile_resv_avail = hidden_space;

        trace_xfs_metafile_resv_init(mp, target);

out_unlock:
        mutex_unlock(&mp->m_metafile_resv_lock);
        return error;
}