root/fs/xfs/scrub/tempfile.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_quota.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
#include "xfs_dir2.h"
#include "xfs_exchrange.h"
#include "xfs_exchmaps.h"
#include "xfs_defer.h"
#include "xfs_symlink_remote.h"
#include "xfs_metafile.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
#include "scrub/trace.h"
#include "scrub/tempfile.h"
#include "scrub/tempexch.h"
#include "scrub/xfile.h"

/*
 * Create a temporary file for reconstructing metadata, with the intention of
 * atomically exchanging the temporary file's contents with the file that's
 * being repaired.
 */
int
xrep_tempfile_create(
        struct xfs_scrub        *sc,
        uint16_t                mode)
{
        struct xfs_icreate_args args = {
                .pip            = sc->mp->m_rootip,
                .mode           = mode,
                .flags          = XFS_ICREATE_TMPFILE | XFS_ICREATE_UNLINKABLE,
        };
        struct xfs_mount        *mp = sc->mp;
        struct xfs_trans        *tp = NULL;
        struct xfs_dquot        *udqp;
        struct xfs_dquot        *gdqp;
        struct xfs_dquot        *pdqp;
        struct xfs_trans_res    *tres;
        struct xfs_inode        *dp = mp->m_rootip;
        xfs_ino_t               ino;
        unsigned int            resblks;
        bool                    is_dir = S_ISDIR(mode);
        int                     error;

        if (xfs_is_shutdown(mp))
                return -EIO;
        if (xfs_is_readonly(mp))
                return -EROFS;

        ASSERT(sc->tp == NULL);
        ASSERT(sc->tempip == NULL);

        /*
         * Make sure that we have allocated dquot(s) on disk.  The temporary
         * inode should be completely root owned so that we don't fail due to
         * quota limits.
         */
        error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp);
        if (error)
                return error;

        if (is_dir) {
                resblks = xfs_mkdir_space_res(mp, 0);
                tres = &M_RES(mp)->tr_mkdir;
        } else {
                resblks = XFS_IALLOC_SPACE_RES(mp);
                tres = &M_RES(mp)->tr_create_tmpfile;
        }

        error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
                        &tp);
        if (error)
                goto out_release_dquots;

        /* Allocate inode, set up directory. */
        error = xfs_dialloc(&tp, &args, &ino);
        if (error)
                goto out_trans_cancel;
        error = xfs_icreate(tp, ino, &args, &sc->tempip);
        if (error)
                goto out_trans_cancel;

        /* We don't touch file data, so drop the realtime flags. */
        sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
        xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);

        /*
         * Mark our temporary file as private so that LSMs and the ACL code
         * don't try to add their own metadata or reason about these files.
         * The file should never be exposed to userspace.
         */
        VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
        VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;

        if (is_dir) {
                error = xfs_dir_init(tp, sc->tempip, dp);
                if (error)
                        goto out_trans_cancel;
        } else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) {
                /*
                 * Initialize the temporary symlink with a meaningless target
                 * that won't trip the verifiers.  Repair must rewrite the
                 * target with meaningful content before swapping with the file
                 * being repaired.  A single-byte target will not write a
                 * remote target block, so the owner is irrelevant.
                 */
                error = xfs_symlink_write_target(tp, sc->tempip,
                                sc->tempip->i_ino, ".", 1, 0, 0);
                if (error)
                        goto out_trans_cancel;
        }

        /*
         * Attach the dquot(s) to the inodes and modify them incore.
         * These ids of the inode couldn't have changed since the new
         * inode has been locked ever since it was created.
         */
        xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);

        /*
         * Put our temp file on the unlinked list so it's purged automatically.
         * All file-based metadata being reconstructed using this file must be
         * atomically exchanged with the original file because the contents
         * here will be purged when the inode is dropped or log recovery cleans
         * out the unlinked list.
         */
        error = xfs_iunlink(tp, sc->tempip);
        if (error)
                goto out_trans_cancel;

        error = xfs_trans_commit(tp);
        if (error)
                goto out_release_inode;

        trace_xrep_tempfile_create(sc);

        xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(gdqp);
        xfs_qm_dqrele(pdqp);

        /* Finish setting up the incore / vfs context. */
        xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
        xfs_setup_iops(sc->tempip);
        xfs_finish_inode_setup(sc->tempip);

        sc->temp_ilock_flags = 0;
        return error;

out_trans_cancel:
        xfs_trans_cancel(tp);
out_release_inode:
        /*
         * Wait until after the current transaction is aborted to finish the
         * setup of the inode and release the inode.  This prevents recursive
         * transactions and deadlocks from xfs_inactive.
         */
        if (sc->tempip) {
                xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
                xfs_finish_inode_setup(sc->tempip);
                xchk_irele(sc, sc->tempip);
        }
out_release_dquots:
        xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(gdqp);
        xfs_qm_dqrele(pdqp);

        return error;
}

/*
 * Move sc->tempip from the regular directory tree to the metadata directory
 * tree if sc->ip is part of the metadata directory tree and tempip has an
 * eligible file mode.
 *
 * Temporary files have to be created before we even know which inode we're
 * going to scrub, so we assume that they will be part of the regular directory
 * tree.  If it turns out that we're actually scrubbing a file from the
 * metadata directory tree, we have to subtract the temp file from the root
 * dquots and detach the dquots prior to setting the METADATA iflag.  However,
 * the scrub setup functions grab sc->ip and create sc->tempip before we
 * actually get around to checking if the file mode is the right type for the
 * scrubber.
 */
int
xrep_tempfile_adjust_directory_tree(
        struct xfs_scrub        *sc)
{
        int                     error;

        if (!sc->tempip)
                return 0;

        ASSERT(sc->tp == NULL);
        ASSERT(!xfs_is_metadir_inode(sc->tempip));

        if (!sc->ip || !xfs_is_metadir_inode(sc->ip))
                return 0;
        if (!S_ISDIR(VFS_I(sc->tempip)->i_mode) &&
            !S_ISREG(VFS_I(sc->tempip)->i_mode))
                return 0;

        xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
        sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;

        error = xchk_trans_alloc(sc, 0);
        if (error)
                goto out_iolock;

        xrep_tempfile_ilock(sc);
        xfs_trans_ijoin(sc->tp, sc->tempip, 0);

        /* Metadir files are not accounted in quota, so drop icount */
        xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, -1L);
        xfs_metafile_set_iflag(sc->tp, sc->tempip, XFS_METAFILE_UNKNOWN);

        error = xrep_trans_commit(sc);
        if (error)
                goto out_ilock;

        xfs_iflags_set(sc->tempip, XFS_IRECOVERY);
        xfs_qm_dqdetach(sc->tempip);
out_ilock:
        xrep_tempfile_iunlock(sc);
out_iolock:
        xrep_tempfile_iounlock(sc);
        return error;
}

/*
 * Remove this temporary file from the metadata directory tree so that it can
 * be inactivated the normal way.
 */
STATIC int
xrep_tempfile_remove_metadir(
        struct xfs_scrub        *sc)
{
        int                     error;

        if (!sc->tempip || !xfs_is_metadir_inode(sc->tempip))
                return 0;

        ASSERT(sc->tp == NULL);

        xfs_iflags_clear(sc->tempip, XFS_IRECOVERY);

        xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
        sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;

        error = xchk_trans_alloc(sc, 0);
        if (error)
                goto out_iolock;

        xrep_tempfile_ilock(sc);
        xfs_trans_ijoin(sc->tp, sc->tempip, 0);

        xfs_metafile_clear_iflag(sc->tp, sc->tempip);

        /* Non-metadir files are accounted in quota, so bump bcount/icount */
        error = xfs_qm_dqattach_locked(sc->tempip, false);
        if (error)
                goto out_cancel;

        xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, 1L);
        xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_BCOUNT,
                        sc->tempip->i_nblocks);
        error = xrep_trans_commit(sc);
        goto out_ilock;

out_cancel:
        xchk_trans_cancel(sc);
out_ilock:
        xrep_tempfile_iunlock(sc);
out_iolock:
        xrep_tempfile_iounlock(sc);
        return error;
}

/* Take IOLOCK_EXCL on the temporary file, maybe. */
bool
xrep_tempfile_iolock_nowait(
        struct xfs_scrub        *sc)
{
        if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) {
                sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
                return true;
        }

        return false;
}

/*
 * Take the temporary file's IOLOCK while holding a different inode's IOLOCK.
 * In theory nobody else should hold the tempfile's IOLOCK, but we use trylock
 * to avoid deadlocks and lockdep complaints.
 */
int
xrep_tempfile_iolock_polled(
        struct xfs_scrub        *sc)
{
        int                     error = 0;

        while (!xrep_tempfile_iolock_nowait(sc)) {
                if (xchk_should_terminate(sc, &error))
                        return error;
                delay(1);
        }

        return 0;
}

/* Release IOLOCK_EXCL on the temporary file. */
void
xrep_tempfile_iounlock(
        struct xfs_scrub        *sc)
{
        xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL);
        sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL;
}

/* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */
void
xrep_tempfile_ilock(
        struct xfs_scrub        *sc)
{
        sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
        xfs_ilock(sc->tempip, XFS_ILOCK_EXCL);
}

/* Try to grab ILOCK_EXCL on the temporary file. */
bool
xrep_tempfile_ilock_nowait(
        struct xfs_scrub        *sc)
{
        if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) {
                sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
                return true;
        }

        return false;
}

/* Unlock ILOCK_EXCL on the temporary file after an update. */
void
xrep_tempfile_iunlock(
        struct xfs_scrub        *sc)
{
        xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
        sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
}

/*
 * Begin the process of making changes to both the file being scrubbed and
 * the temporary file by taking ILOCK_EXCL on both.
 */
void
xrep_tempfile_ilock_both(
        struct xfs_scrub        *sc)
{
        xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL);
        sc->ilock_flags |= XFS_ILOCK_EXCL;
        sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
}

/* Unlock ILOCK_EXCL on both files. */
void
xrep_tempfile_iunlock_both(
        struct xfs_scrub        *sc)
{
        xrep_tempfile_iunlock(sc);
        xchk_iunlock(sc, XFS_ILOCK_EXCL);
}

/* Release the temporary file. */
void
xrep_tempfile_rele(
        struct xfs_scrub        *sc)
{
        if (!sc->tempip)
                return;

        if (sc->temp_ilock_flags) {
                xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
                sc->temp_ilock_flags = 0;
        }

        xrep_tempfile_remove_metadir(sc);
        xchk_irele(sc, sc->tempip);
        sc->tempip = NULL;
}

/*
 * Make sure that the given range of the data fork of the temporary file is
 * mapped to written blocks.  The caller must ensure that both inodes are
 * joined to the transaction.
 */
int
xrep_tempfile_prealloc(
        struct xfs_scrub        *sc,
        xfs_fileoff_t           off,
        xfs_filblks_t           len)
{
        struct xfs_bmbt_irec    map;
        xfs_fileoff_t           end = off + len;
        int                     error;

        ASSERT(sc->tempip != NULL);
        ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));

        for (; off < end; off = map.br_startoff + map.br_blockcount) {
                int             nmaps = 1;

                /*
                 * If we have a real extent mapping this block then we're
                 * in ok shape.
                 */
                error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
                                XFS_DATA_FORK);
                if (error)
                        return error;
                if (nmaps == 0) {
                        ASSERT(nmaps != 0);
                        return -EFSCORRUPTED;
                }

                if (xfs_bmap_is_written_extent(&map))
                        continue;

                /*
                 * If we find a delalloc reservation then something is very
                 * very wrong.  Bail out.
                 */
                if (map.br_startblock == DELAYSTARTBLOCK)
                        return -EFSCORRUPTED;

                /*
                 * Make sure this block has a real zeroed extent allocated to
                 * it.
                 */
                nmaps = 1;
                error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
                                XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
                                &nmaps);
                if (error)
                        return error;
                if (nmaps != 1)
                        return -EFSCORRUPTED;

                trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);

                /* Commit new extent and all deferred work. */
                error = xfs_defer_finish(&sc->tp);
                if (error)
                        return error;
        }

        return 0;
}

/*
 * Write data to each block of a file.  The given range of the tempfile's data
 * fork must already be populated with written extents.
 */
int
xrep_tempfile_copyin(
        struct xfs_scrub        *sc,
        xfs_fileoff_t           off,
        xfs_filblks_t           len,
        xrep_tempfile_copyin_fn prep_fn,
        void                    *data)
{
        LIST_HEAD(buffers_list);
        struct xfs_mount        *mp = sc->mp;
        struct xfs_buf          *bp;
        xfs_fileoff_t           flush_mask;
        xfs_fileoff_t           end = off + len;
        loff_t                  pos = XFS_FSB_TO_B(mp, off);
        int                     error = 0;

        ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));

        /* Flush buffers to disk every 512K */
        flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;

        for (; off < end; off++, pos += mp->m_sb.sb_blocksize) {
                struct xfs_bmbt_irec    map;
                int                     nmaps = 1;

                /* Read block mapping for this file block. */
                error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
                if (error)
                        goto out_err;
                if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
                        error = -EFSCORRUPTED;
                        goto out_err;
                }

                /* Get the metadata buffer for this offset in the file. */
                error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
                                XFS_FSB_TO_DADDR(mp, map.br_startblock),
                                mp->m_bsize, 0, &bp);
                if (error)
                        goto out_err;

                trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map);

                /* Read in a block's worth of data from the xfile. */
                error = prep_fn(sc, bp, data);
                if (error) {
                        xfs_trans_brelse(sc->tp, bp);
                        goto out_err;
                }

                /* Queue buffer, and flush if we have too much dirty data. */
                xfs_buf_delwri_queue_here(bp, &buffers_list);
                xfs_trans_brelse(sc->tp, bp);

                if (!(off & flush_mask)) {
                        error = xfs_buf_delwri_submit(&buffers_list);
                        if (error)
                                goto out_err;
                }
        }

        /*
         * Write the new blocks to disk.  If the ordered list isn't empty after
         * that, then something went wrong and we have to fail.  This should
         * never happen, but we'll check anyway.
         */
        error = xfs_buf_delwri_submit(&buffers_list);
        if (error)
                goto out_err;

        if (!list_empty(&buffers_list)) {
                ASSERT(list_empty(&buffers_list));
                error = -EIO;
                goto out_err;
        }

        return 0;

out_err:
        xfs_buf_delwri_cancel(&buffers_list);
        return error;
}

/*
 * Set the temporary file's size.  Caller must join the tempfile to the scrub
 * transaction and is responsible for adjusting block mappings as needed.
 */
int
xrep_tempfile_set_isize(
        struct xfs_scrub        *sc,
        unsigned long long      isize)
{
        if (sc->tempip->i_disk_size == isize)
                return 0;

        sc->tempip->i_disk_size = isize;
        i_size_write(VFS_I(sc->tempip), isize);
        return xrep_tempfile_roll_trans(sc);
}

/*
 * Roll a repair transaction involving the temporary file.  Caller must join
 * both the temporary file and the file being scrubbed to the transaction.
 * This function return with both inodes joined to a new scrub transaction,
 * or the usual negative errno.
 */
int
xrep_tempfile_roll_trans(
        struct xfs_scrub        *sc)
{
        int                     error;

        xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
        error = xrep_roll_trans(sc);
        if (error)
                return error;

        xfs_trans_ijoin(sc->tp, sc->tempip, 0);
        return 0;
}

/*
 * Fill out the mapping exchange request in preparation for atomically
 * committing the contents of a metadata file that we've rebuilt in the temp
 * file.
 */
STATIC int
xrep_tempexch_prep_request(
        struct xfs_scrub        *sc,
        int                     whichfork,
        xfs_fileoff_t           off,
        xfs_filblks_t           len,
        struct xrep_tempexch    *tx)
{
        struct xfs_exchmaps_req *req = &tx->req;

        memset(tx, 0, sizeof(struct xrep_tempexch));

        /* COW forks don't exist on disk. */
        if (whichfork == XFS_COW_FORK) {
                ASSERT(0);
                return -EINVAL;
        }

        /* Both files should have the relevant forks. */
        if (!xfs_ifork_ptr(sc->ip, whichfork) ||
            !xfs_ifork_ptr(sc->tempip, whichfork)) {
                ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL);
                ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL);
                return -EINVAL;
        }

        /* Exchange all mappings in both forks. */
        req->ip1 = sc->tempip;
        req->ip2 = sc->ip;
        req->startoff1 = off;
        req->startoff2 = off;
        switch (whichfork) {
        case XFS_ATTR_FORK:
                req->flags |= XFS_EXCHMAPS_ATTR_FORK;
                break;
        case XFS_DATA_FORK:
                /* Exchange sizes when exchanging all data fork mappings. */
                if (off == 0 && len == XFS_MAX_FILEOFF)
                        req->flags |= XFS_EXCHMAPS_SET_SIZES;
                break;
        }
        req->blockcount = len;

        return 0;
}

/*
 * Fill out the mapping exchange resource estimation structures in preparation
 * for exchanging the contents of a metadata file that we've rebuilt in the
 * temp file.  Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files.
 */
STATIC int
xrep_tempexch_estimate(
        struct xfs_scrub        *sc,
        struct xrep_tempexch    *tx)
{
        struct xfs_exchmaps_req *req = &tx->req;
        struct xfs_ifork        *ifp;
        struct xfs_ifork        *tifp;
        int                     whichfork = xfs_exchmaps_reqfork(req);
        int                     state = 0;

        /*
         * The exchmaps code only knows how to exchange file fork space
         * mappings.  Any fork data in local format must be promoted to a
         * single block before the exchange can take place.
         */
        ifp = xfs_ifork_ptr(sc->ip, whichfork);
        if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
                state |= 1;

        tifp = xfs_ifork_ptr(sc->tempip, whichfork);
        if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
                state |= 2;

        switch (state) {
        case 0:
                /* Both files have mapped extents; use the regular estimate. */
                return xfs_exchrange_estimate(req);
        case 1:
                /*
                 * The file being repaired is in local format, but the temp
                 * file has mapped extents.  To perform the exchange, the file
                 * being repaired must have its shorform data converted to an
                 * ondisk block so that the forks will be in extents format.
                 * We need one resblk for the conversion; the number of
                 * exchanges is (worst case) the temporary file's extent count
                 * plus the block we converted.
                 */
                req->ip1_bcount = sc->tempip->i_nblocks;
                req->ip2_bcount = 1;
                req->nr_exchanges = 1 + tifp->if_nextents;
                req->resblks = 1;
                break;
        case 2:
                /*
                 * The temporary file is in local format, but the file being
                 * repaired has mapped extents.  To perform the exchange, the
                 * temp file must have its shortform data converted to an
                 * ondisk block, and the fork changed to extents format.  We
                 * need one resblk for the conversion; the number of exchanges
                 * is (worst case) the extent count of the file being repaired
                 * plus the block we converted.
                 */
                req->ip1_bcount = 1;
                req->ip2_bcount = sc->ip->i_nblocks;
                req->nr_exchanges = 1 + ifp->if_nextents;
                req->resblks = 1;
                break;
        case 3:
                /*
                 * Both forks are in local format.  To perform the exchange,
                 * both files must have their shortform data converted to
                 * fsblocks, and both forks must be converted to extents
                 * format.  We need two resblks for the two conversions, and
                 * the number of exchanges is 1 since there's only one block at
                 * fileoff 0.  Presumably, the caller could not exchange the
                 * two inode fork areas directly.
                 */
                req->ip1_bcount = 1;
                req->ip2_bcount = 1;
                req->nr_exchanges = 1;
                req->resblks = 2;
                break;
        }

        return xfs_exchmaps_estimate_overhead(req);
}

/*
 * Obtain a quota reservation to make sure we don't hit EDQUOT.  We can skip
 * this if quota enforcement is disabled or if both inodes' dquots are the
 * same.  The qretry structure must be initialized to zeroes before the first
 * call to this function.
 */
STATIC int
xrep_tempexch_reserve_quota(
        struct xfs_scrub                *sc,
        const struct xrep_tempexch      *tx)
{
        struct xfs_trans                *tp = sc->tp;
        const struct xfs_exchmaps_req   *req = &tx->req;
        int64_t                         ddelta, rdelta;
        int                             error;

        /*
         * Don't bother with a quota reservation if we're not enforcing them
         * or the two inodes have the same dquots.
         */
        if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
            xfs_is_metadir_inode(req->ip1) ||
            (req->ip1->i_udquot == req->ip2->i_udquot &&
             req->ip1->i_gdquot == req->ip2->i_gdquot &&
             req->ip1->i_pdquot == req->ip2->i_pdquot))
                return 0;

        /*
         * Quota reservation for each file comes from two sources.  First, we
         * need to account for any net gain in mapped blocks during the
         * exchange.  Second, we need reservation for the gross gain in mapped
         * blocks so that we don't trip over any quota block reservation
         * assertions.  We must reserve the gross gain because the quota code
         * subtracts from bcount the number of blocks that we unmap; it does
         * not add that quantity back to the quota block reservation.
         */
        ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount);
        rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount);
        error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
                        ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount,
                        true);
        if (error)
                return error;

        ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount);
        rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount);
        return xfs_trans_reserve_quota_nblks(tp, req->ip2,
                        ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount,
                        true);
}

/*
 * Prepare an existing transaction for an atomic file contents exchange.
 *
 * This function fills out the mapping exchange request and resource estimation
 * structures in preparation for exchanging the contents of a metadata file
 * that has been rebuilt in the temp file.  Next, it reserves space and quota
 * for the transaction.
 *
 * The caller must hold ILOCK_EXCL of the scrub target file and the temporary
 * file.  The caller must join both inodes to the transaction with no unlock
 * flags, and is responsible for dropping both ILOCKs when appropriate.  Only
 * use this when those ILOCKs cannot be dropped.
 */
int
xrep_tempexch_trans_reserve(
        struct xfs_scrub        *sc,
        int                     whichfork,
        xfs_fileoff_t           off,
        xfs_filblks_t           len,
        struct xrep_tempexch    *tx)
{
        int                     error;

        ASSERT(sc->tp != NULL);
        xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL);
        xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL);

        error = xrep_tempexch_prep_request(sc, whichfork, off, len, tx);
        if (error)
                return error;

        error = xfs_exchmaps_estimate(&tx->req);
        if (error)
                return error;

        error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0);
        if (error)
                return error;

        return xrep_tempexch_reserve_quota(sc, tx);
}

/*
 * Create a new transaction for a file contents exchange.
 *
 * This function fills out the mapping excahange request and resource
 * estimation structures in preparation for exchanging the contents of a
 * metadata file that has been rebuilt in the temp file.  Next, it reserves
 * space, takes ILOCK_EXCL of both inodes, joins them to the transaction and
 * reserves quota for the transaction.
 *
 * The caller is responsible for dropping both ILOCKs when appropriate.
 */
int
xrep_tempexch_trans_alloc(
        struct xfs_scrub        *sc,
        int                     whichfork,
        struct xrep_tempexch    *tx)
{
        unsigned int            flags = 0;
        int                     error;

        ASSERT(sc->tp == NULL);
        ASSERT(xfs_has_exchange_range(sc->mp));

        error = xrep_tempexch_prep_request(sc, whichfork, 0, XFS_MAX_FILEOFF,
                        tx);
        if (error)
                return error;

        error = xrep_tempexch_estimate(sc, tx);
        if (error)
                return error;

        if (xfs_has_lazysbcount(sc->mp))
                flags |= XFS_TRANS_RES_FDBLKS;

        error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
                        tx->req.resblks, 0, flags, &sc->tp);
        if (error)
                return error;

        sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
        sc->ilock_flags |= XFS_ILOCK_EXCL;
        xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip);

        return xrep_tempexch_reserve_quota(sc, tx);
}

/*
 * Exchange file mappings (and hence file contents) between the file being
 * repaired and the temporary file.  Returns with both inodes locked and joined
 * to a clean scrub transaction.
 */
int
xrep_tempexch_contents(
        struct xfs_scrub        *sc,
        struct xrep_tempexch    *tx)
{
        int                     error;

        ASSERT(xfs_has_exchange_range(sc->mp));

        xfs_exchange_mappings(sc->tp, &tx->req);
        error = xfs_defer_finish(&sc->tp);
        if (error)
                return error;

        /*
         * If we exchanged the ondisk sizes of two metadata files, we must
         * exchanged the incore sizes as well.
         */
        if (tx->req.flags & XFS_EXCHMAPS_SET_SIZES) {
                loff_t  temp;

                temp = i_size_read(VFS_I(sc->ip));
                i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
                i_size_write(VFS_I(sc->tempip), temp);
        }

        return 0;
}

/*
 * Write local format data from one of the temporary file's forks into the same
 * fork of file being repaired, and exchange the file sizes, if appropriate.
 * Caller must ensure that the file being repaired has enough fork space to
 * hold all the bytes.
 */
void
xrep_tempfile_copyout_local(
        struct xfs_scrub        *sc,
        int                     whichfork)
{
        struct xfs_ifork        *temp_ifp;
        struct xfs_ifork        *ifp;
        unsigned int            ilog_flags = XFS_ILOG_CORE;

        temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork);
        ifp = xfs_ifork_ptr(sc->ip, whichfork);

        ASSERT(temp_ifp != NULL);
        ASSERT(ifp != NULL);
        ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL);
        ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);

        switch (whichfork) {
        case XFS_DATA_FORK:
                ASSERT(sc->tempip->i_disk_size <=
                                        xfs_inode_data_fork_size(sc->ip));
                break;
        case XFS_ATTR_FORK:
                ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff);
                break;
        default:
                ASSERT(0);
                return;
        }

        /* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */
        xfs_idestroy_fork(ifp);
        xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data,
                        temp_ifp->if_bytes);

        if (whichfork == XFS_DATA_FORK) {
                i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
                sc->ip->i_disk_size = sc->tempip->i_disk_size;
        }

        ilog_flags |= xfs_ilog_fdata(whichfork);
        xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags);
}

/* Decide if a given XFS inode is a temporary file for a repair. */
bool
xrep_is_tempfile(
        const struct xfs_inode  *ip)
{
        const struct inode      *inode = &ip->i_vnode;
        struct xfs_mount        *mp = ip->i_mount;

        /*
         * Files in the metadata directory tree also have S_PRIVATE set and
         * IOP_XATTR unset, so we must distinguish them separately.  We (ab)use
         * the IRECOVERY flag to mark temporary metadir inodes knowing that the
         * end of log recovery clears IRECOVERY, so the only ones that can
         * exist during online repair are the ones we create.
         */
        if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA))
                return __xfs_iflags_test(ip, XFS_IRECOVERY);

        if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
                return true;

        return false;
}