root/fs/xfs/scrub/bmap_repair.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_btree_staging.h"
#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_inode_fork.h"
#include "xfs_alloc.h"
#include "xfs_rtalloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
#include "xfs_rtrmap_btree.h"
#include "xfs_refcount.h"
#include "xfs_quota.h"
#include "xfs_ialloc.h"
#include "xfs_ag.h"
#include "xfs_reflink.h"
#include "xfs_rtgroup.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
#include "scrub/fsb_bitmap.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/newbt.h"
#include "scrub/reap.h"

/*
 * Inode Fork Block Mapping (BMBT) Repair
 * ======================================
 *
 * Gather all the rmap records for the inode and fork we're fixing, reset the
 * incore fork, then recreate the btree.
 */

enum reflink_scan_state {
        RLS_IRRELEVANT = -1,    /* not applicable to this file */
        RLS_UNKNOWN,            /* shared extent scans required */
        RLS_SET_IFLAG,          /* iflag must be set */
};

struct xrep_bmap {
        /* Old bmbt blocks */
        struct xfsb_bitmap      old_bmbt_blocks;

        /* New fork. */
        struct xrep_newbt       new_bmapbt;

        /* List of new bmap records. */
        struct xfarray          *bmap_records;

        struct xfs_scrub        *sc;

        /* How many blocks did we find allocated to this file? */
        xfs_rfsblock_t          nblocks;

        /* How many bmbt blocks did we find for this fork? */
        xfs_rfsblock_t          old_bmbt_block_count;

        /* get_records()'s position in the free space record array. */
        xfarray_idx_t           array_cur;

        /* How many real (non-hole, non-delalloc) mappings do we have? */
        uint64_t                real_mappings;

        /* Which fork are we fixing? */
        int                     whichfork;

        /* What d the REFLINK flag be set when the repair is over? */
        enum reflink_scan_state reflink_scan;

        /* Do we allow unwritten extents? */
        bool                    allow_unwritten;
};

/* Is this space extent shared?  Flag the inode if it is. */
STATIC int
xrep_bmap_discover_shared(
        struct xrep_bmap        *rb,
        xfs_fsblock_t           startblock,
        xfs_filblks_t           blockcount)
{
        struct xfs_scrub        *sc = rb->sc;
        struct xfs_btree_cur    *cur;
        xfs_agblock_t           agbno;
        xfs_agblock_t           fbno;
        xfs_extlen_t            flen;
        int                     error;

        if (XFS_IS_REALTIME_INODE(sc->ip)) {
                agbno = xfs_rtb_to_rgbno(sc->mp, startblock);
                cur = sc->sr.refc_cur;
        } else {
                agbno = XFS_FSB_TO_AGBNO(sc->mp, startblock);
                cur = sc->sa.refc_cur;
        }
        error = xfs_refcount_find_shared(cur, agbno, blockcount, &fbno, &flen,
                        false);
        if (error)
                return error;

        if (fbno != NULLAGBLOCK)
                rb->reflink_scan = RLS_SET_IFLAG;

        return 0;
}

/* Remember this reverse-mapping as a series of bmap records. */
STATIC int
xrep_bmap_from_rmap(
        struct xrep_bmap        *rb,
        xfs_fileoff_t           startoff,
        xfs_fsblock_t           startblock,
        xfs_filblks_t           blockcount,
        bool                    unwritten)
{
        struct xfs_bmbt_irec    irec = {
                .br_startoff    = startoff,
                .br_startblock  = startblock,
                .br_state       = unwritten ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM,
        };
        struct xfs_bmbt_rec     rbe;
        struct xfs_scrub        *sc = rb->sc;
        int                     error = 0;

        /*
         * If we're repairing the data fork of a non-reflinked regular file on
         * a reflink filesystem, we need to figure out if this space extent is
         * shared.
         */
        if (rb->reflink_scan == RLS_UNKNOWN && !unwritten) {
                error = xrep_bmap_discover_shared(rb, startblock, blockcount);
                if (error)
                        return error;
        }

        do {
                xfs_failaddr_t  fa;

                irec.br_blockcount = min_t(xfs_filblks_t, blockcount,
                                XFS_MAX_BMBT_EXTLEN);

                fa = xfs_bmap_validate_extent(sc->ip, rb->whichfork, &irec);
                if (fa)
                        return -EFSCORRUPTED;

                xfs_bmbt_disk_set_all(&rbe, &irec);

                trace_xrep_bmap_found(sc->ip, rb->whichfork, &irec);

                if (xchk_should_terminate(sc, &error))
                        return error;

                error = xfarray_append(rb->bmap_records, &rbe);
                if (error)
                        return error;

                rb->real_mappings++;

                irec.br_startblock += irec.br_blockcount;
                irec.br_startoff += irec.br_blockcount;
                blockcount -= irec.br_blockcount;
        } while (blockcount > 0);

        return 0;
}

/* Check for any obvious errors or conflicts in the file mapping. */
STATIC int
xrep_bmap_check_fork_rmap(
        struct xrep_bmap                *rb,
        struct xfs_btree_cur            *cur,
        const struct xfs_rmap_irec      *rec)
{
        struct xfs_scrub                *sc = rb->sc;
        enum xbtree_recpacking          outcome;
        int                             error;

        /*
         * Data extents for rt files are never stored on the data device, but
         * everything else (xattrs, bmbt blocks) can be.
         */
        if (XFS_IS_REALTIME_INODE(sc->ip) &&
            !(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))
                return -EFSCORRUPTED;

        /* Check that this is within the AG. */
        if (!xfs_verify_agbext(to_perag(cur->bc_group), rec->rm_startblock,
                                rec->rm_blockcount))
                return -EFSCORRUPTED;

        /* Check the file offset range. */
        if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
            !xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
                return -EFSCORRUPTED;

        /* No contradictory flags. */
        if ((rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) &&
            (rec->rm_flags & XFS_RMAP_UNWRITTEN))
                return -EFSCORRUPTED;

        /* Make sure this isn't free space. */
        error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock,
                        rec->rm_blockcount, &outcome);
        if (error)
                return error;
        if (outcome != XBTREE_RECPACKING_EMPTY)
                return -EFSCORRUPTED;

        /* Must not be an inode chunk. */
        error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
                        rec->rm_startblock, rec->rm_blockcount, &outcome);
        if (error)
                return error;
        if (outcome != XBTREE_RECPACKING_EMPTY)
                return -EFSCORRUPTED;

        return 0;
}

/* Record extents that belong to this inode's fork. */
STATIC int
xrep_bmap_walk_rmap(
        struct xfs_btree_cur            *cur,
        const struct xfs_rmap_irec      *rec,
        void                            *priv)
{
        struct xrep_bmap                *rb = priv;
        xfs_fsblock_t                   fsbno;
        int                             error = 0;

        if (xchk_should_terminate(rb->sc, &error))
                return error;

        if (rec->rm_owner != rb->sc->ip->i_ino)
                return 0;

        error = xrep_bmap_check_fork_rmap(rb, cur, rec);
        if (error)
                return error;

        /*
         * Record all blocks allocated to this file even if the extent isn't
         * for the fork we're rebuilding so that we can reset di_nblocks later.
         */
        rb->nblocks += rec->rm_blockcount;

        /* If this rmap isn't for the fork we want, we're done. */
        if (rb->whichfork == XFS_DATA_FORK &&
            (rec->rm_flags & XFS_RMAP_ATTR_FORK))
                return 0;
        if (rb->whichfork == XFS_ATTR_FORK &&
            !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
                return 0;

        /* Reject unwritten extents if we don't allow those. */
        if ((rec->rm_flags & XFS_RMAP_UNWRITTEN) && !rb->allow_unwritten)
                return -EFSCORRUPTED;

        fsbno = xfs_agbno_to_fsb(to_perag(cur->bc_group), rec->rm_startblock);

        if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
                rb->old_bmbt_block_count += rec->rm_blockcount;
                return xfsb_bitmap_set(&rb->old_bmbt_blocks, fsbno,
                                rec->rm_blockcount);
        }

        return xrep_bmap_from_rmap(rb, rec->rm_offset, fsbno,
                        rec->rm_blockcount,
                        rec->rm_flags & XFS_RMAP_UNWRITTEN);
}

/*
 * Compare two block mapping records.  We want to sort in order of increasing
 * file offset.
 */
static int
xrep_bmap_extent_cmp(
        const void                      *a,
        const void                      *b)
{
        const struct xfs_bmbt_rec       *ba = a;
        const struct xfs_bmbt_rec       *bb = b;
        xfs_fileoff_t                   ao = xfs_bmbt_disk_get_startoff(ba);
        xfs_fileoff_t                   bo = xfs_bmbt_disk_get_startoff(bb);

        if (ao > bo)
                return 1;
        else if (ao < bo)
                return -1;
        return 0;
}

/*
 * Sort the bmap extents by fork offset or else the records will be in the
 * wrong order.  Ensure there are no overlaps in the file offset ranges.
 */
STATIC int
xrep_bmap_sort_records(
        struct xrep_bmap        *rb)
{
        struct xfs_bmbt_irec    irec;
        xfs_fileoff_t           next_off = 0;
        xfarray_idx_t           array_cur;
        int                     error;

        error = xfarray_sort(rb->bmap_records, xrep_bmap_extent_cmp,
                        XFARRAY_SORT_KILLABLE);
        if (error)
                return error;

        foreach_xfarray_idx(rb->bmap_records, array_cur) {
                struct xfs_bmbt_rec     rec;

                if (xchk_should_terminate(rb->sc, &error))
                        return error;

                error = xfarray_load(rb->bmap_records, array_cur, &rec);
                if (error)
                        return error;

                xfs_bmbt_disk_get_all(&rec, &irec);

                if (irec.br_startoff < next_off)
                        return -EFSCORRUPTED;

                next_off = irec.br_startoff + irec.br_blockcount;
        }

        return 0;
}

/* Scan one AG for reverse mappings that we can turn into extent maps. */
STATIC int
xrep_bmap_scan_ag(
        struct xrep_bmap        *rb,
        struct xfs_perag        *pag)
{
        struct xfs_scrub        *sc = rb->sc;
        int                     error;

        error = xrep_ag_init(sc, pag, &sc->sa);
        if (error)
                return error;

        error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_bmap_walk_rmap, rb);
        xchk_ag_free(sc, &sc->sa);
        return error;
}

#ifdef CONFIG_XFS_RT
/* Check for any obvious errors or conflicts in the file mapping. */
STATIC int
xrep_bmap_check_rtfork_rmap(
        struct xfs_scrub                *sc,
        struct xfs_btree_cur            *cur,
        const struct xfs_rmap_irec      *rec)
{
        /* xattr extents are never stored on realtime devices */
        if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
                return -EFSCORRUPTED;

        /* bmbt blocks are never stored on realtime devices */
        if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
                return -EFSCORRUPTED;

        /* Data extents for non-rt files are never stored on the rt device. */
        if (!XFS_IS_REALTIME_INODE(sc->ip))
                return -EFSCORRUPTED;

        /* Check the file offsets and physical extents. */
        if (!xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
                return -EFSCORRUPTED;

        /* Check that this is within the rtgroup. */
        if (!xfs_verify_rgbext(to_rtg(cur->bc_group), rec->rm_startblock,
                                rec->rm_blockcount))
                return -EFSCORRUPTED;

        /* Make sure this isn't free space. */
        return xrep_require_rtext_inuse(sc, rec->rm_startblock,
                        rec->rm_blockcount);
}

/* Record realtime extents that belong to this inode's fork. */
STATIC int
xrep_bmap_walk_rtrmap(
        struct xfs_btree_cur            *cur,
        const struct xfs_rmap_irec      *rec,
        void                            *priv)
{
        struct xrep_bmap                *rb = priv;
        int                             error = 0;

        if (xchk_should_terminate(rb->sc, &error))
                return error;

        /* Skip extents which are not owned by this inode and fork. */
        if (rec->rm_owner != rb->sc->ip->i_ino)
                return 0;

        error = xrep_bmap_check_rtfork_rmap(rb->sc, cur, rec);
        if (error)
                return error;

        /*
         * Record all blocks allocated to this file even if the extent isn't
         * for the fork we're rebuilding so that we can reset di_nblocks later.
         */
        rb->nblocks += rec->rm_blockcount;

        /* If this rmap isn't for the fork we want, we're done. */
        if (rb->whichfork == XFS_DATA_FORK &&
            (rec->rm_flags & XFS_RMAP_ATTR_FORK))
                return 0;
        if (rb->whichfork == XFS_ATTR_FORK &&
            !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
                return 0;

        return xrep_bmap_from_rmap(rb, rec->rm_offset,
                        xfs_rgbno_to_rtb(to_rtg(cur->bc_group),
                                rec->rm_startblock),
                        rec->rm_blockcount,
                        rec->rm_flags & XFS_RMAP_UNWRITTEN);
}

/* Scan the realtime reverse mappings to build the new extent map. */
STATIC int
xrep_bmap_scan_rtgroup(
        struct xrep_bmap        *rb,
        struct xfs_rtgroup      *rtg)
{
        struct xfs_scrub        *sc = rb->sc;
        int                     error;

        if (!xfs_has_rtrmapbt(sc->mp))
                return 0;

        error = xrep_rtgroup_init(sc, rtg, &sc->sr,
                        XFS_RTGLOCK_RMAP |
                        XFS_RTGLOCK_REFCOUNT |
                        XFS_RTGLOCK_BITMAP_SHARED);
        if (error)
                return error;

        error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_bmap_walk_rtrmap, rb);
        xchk_rtgroup_btcur_free(&sc->sr);
        xchk_rtgroup_free(sc, &sc->sr);
        return error;
}
#else
static inline int
xrep_bmap_scan_rtgroup(struct xrep_bmap *rb, struct xfs_rtgroup *rtg)
{
        return -EFSCORRUPTED;
}
#endif

/* Find the delalloc extents from the old incore extent tree. */
STATIC int
xrep_bmap_find_delalloc(
        struct xrep_bmap        *rb)
{
        struct xfs_bmbt_irec    irec;
        struct xfs_iext_cursor  icur;
        struct xfs_bmbt_rec     rbe;
        struct xfs_inode        *ip = rb->sc->ip;
        struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, rb->whichfork);
        int                     error = 0;

        /*
         * Skip this scan if we don't expect to find delayed allocation
         * reservations in this fork.
         */
        if (rb->whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0)
                return 0;

        for_each_xfs_iext(ifp, &icur, &irec) {
                if (!isnullstartblock(irec.br_startblock))
                        continue;

                xfs_bmbt_disk_set_all(&rbe, &irec);

                trace_xrep_bmap_found(ip, rb->whichfork, &irec);

                if (xchk_should_terminate(rb->sc, &error))
                        return error;

                error = xfarray_append(rb->bmap_records, &rbe);
                if (error)
                        return error;
        }

        return 0;
}

/*
 * Collect block mappings for this fork of this inode and decide if we have
 * enough space to rebuild.  Caller is responsible for cleaning up the list if
 * anything goes wrong.
 */
STATIC int
xrep_bmap_find_mappings(
        struct xrep_bmap        *rb)
{
        struct xfs_scrub        *sc = rb->sc;
        struct xfs_perag        *pag = NULL;
        int                     error = 0;

        /*
         * Iterate the rtrmaps for extents.  Metadata files never have content
         * on the realtime device, so there's no need to scan them.
         */
        if (!xfs_is_metadir_inode(sc->ip)) {
                struct xfs_rtgroup      *rtg = NULL;

                while ((rtg = xfs_rtgroup_next(sc->mp, rtg))) {
                        error = xrep_bmap_scan_rtgroup(rb, rtg);
                        if (error) {
                                xfs_rtgroup_rele(rtg);
                                return error;
                        }
                }
        }

        /* Iterate the rmaps for extents. */
        while ((pag = xfs_perag_next(sc->mp, pag))) {
                error = xrep_bmap_scan_ag(rb, pag);
                if (error) {
                        xfs_perag_rele(pag);
                        return error;
                }
        }

        return xrep_bmap_find_delalloc(rb);
}

/* Retrieve real extent mappings for bulk loading the bmap btree. */
STATIC int
xrep_bmap_get_records(
        struct xfs_btree_cur    *cur,
        unsigned int            idx,
        struct xfs_btree_block  *block,
        unsigned int            nr_wanted,
        void                    *priv)
{
        struct xfs_bmbt_rec     rec;
        struct xfs_bmbt_irec    *irec = &cur->bc_rec.b;
        struct xrep_bmap        *rb = priv;
        union xfs_btree_rec     *block_rec;
        unsigned int            loaded;
        int                     error;

        for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
                do {
                        error = xfarray_load(rb->bmap_records, rb->array_cur++,
                                        &rec);
                        if (error)
                                return error;

                        xfs_bmbt_disk_get_all(&rec, irec);
                } while (isnullstartblock(irec->br_startblock));

                block_rec = xfs_btree_rec_addr(cur, idx, block);
                cur->bc_ops->init_rec_from_cur(cur, block_rec);
        }

        return loaded;
}

/* Feed one of the new btree blocks to the bulk loader. */
STATIC int
xrep_bmap_claim_block(
        struct xfs_btree_cur    *cur,
        union xfs_btree_ptr     *ptr,
        void                    *priv)
{
        struct xrep_bmap        *rb = priv;

        return xrep_newbt_claim_block(cur, &rb->new_bmapbt, ptr);
}

/* Figure out how much space we need to create the incore btree root block. */
STATIC size_t
xrep_bmap_iroot_size(
        struct xfs_btree_cur    *cur,
        unsigned int            level,
        unsigned int            nr_this_level,
        void                    *priv)
{
        ASSERT(level > 0);

        return xfs_bmap_broot_space_calc(cur->bc_mp, nr_this_level);
}

/* Update the inode counters. */
STATIC int
xrep_bmap_reset_counters(
        struct xrep_bmap        *rb)
{
        struct xfs_scrub        *sc = rb->sc;
        struct xbtree_ifakeroot *ifake = &rb->new_bmapbt.ifake;
        int64_t                 delta;

        if (rb->reflink_scan == RLS_SET_IFLAG)
                sc->ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;

        /*
         * Update the inode block counts to reflect the extents we found in the
         * rmapbt.
         */
        delta = ifake->if_blocks - rb->old_bmbt_block_count;
        sc->ip->i_nblocks = rb->nblocks + delta;
        xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);

        /*
         * Adjust the quota counts by the difference in size between the old
         * and new bmbt.
         */
        xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT, delta);
        return 0;
}

/*
 * Create a new iext tree and load it with block mappings.  If the inode is
 * in extents format, that's all we need to do to commit the new mappings.
 * If it is in btree format, this takes care of preloading the incore tree.
 */
STATIC int
xrep_bmap_extents_load(
        struct xrep_bmap        *rb)
{
        struct xfs_iext_cursor  icur;
        struct xfs_bmbt_irec    irec;
        struct xfs_ifork        *ifp = rb->new_bmapbt.ifake.if_fork;
        xfarray_idx_t           array_cur;
        int                     error;

        ASSERT(ifp->if_bytes == 0);

        /* Add all the mappings (incl. delalloc) to the incore extent tree. */
        xfs_iext_first(ifp, &icur);
        foreach_xfarray_idx(rb->bmap_records, array_cur) {
                struct xfs_bmbt_rec     rec;

                error = xfarray_load(rb->bmap_records, array_cur, &rec);
                if (error)
                        return error;

                xfs_bmbt_disk_get_all(&rec, &irec);

                xfs_iext_insert_raw(ifp, &icur, &irec);
                if (!isnullstartblock(irec.br_startblock))
                        ifp->if_nextents++;

                xfs_iext_next(ifp, &icur);
        }

        return xrep_ino_ensure_extent_count(rb->sc, rb->whichfork,
                        ifp->if_nextents);
}

/*
 * Reserve new btree blocks, bulk load the bmap records into the ondisk btree,
 * and load the incore extent tree.
 */
STATIC int
xrep_bmap_btree_load(
        struct xrep_bmap        *rb,
        struct xfs_btree_cur    *bmap_cur)
{
        struct xfs_scrub        *sc = rb->sc;
        int                     error;

        /* Compute how many blocks we'll need. */
        error = xfs_btree_bload_compute_geometry(bmap_cur,
                        &rb->new_bmapbt.bload, rb->real_mappings);
        if (error)
                return error;

        /* Last chance to abort before we start committing fixes. */
        if (xchk_should_terminate(sc, &error))
                return error;

        /*
         * Guess how many blocks we're going to need to rebuild an entire bmap
         * from the number of extents we found, and pump up our transaction to
         * have sufficient block reservation.  We're allowed to exceed file
         * quota to repair inconsistent metadata.
         */
        error = xfs_trans_reserve_more_inode(sc->tp, sc->ip,
                        rb->new_bmapbt.bload.nr_blocks, 0, true);
        if (error)
                return error;

        /* Reserve the space we'll need for the new btree. */
        error = xrep_newbt_alloc_blocks(&rb->new_bmapbt,
                        rb->new_bmapbt.bload.nr_blocks);
        if (error)
                return error;

        /* Add all observed bmap records. */
        rb->array_cur = XFARRAY_CURSOR_INIT;
        error = xfs_btree_bload(bmap_cur, &rb->new_bmapbt.bload, rb);
        if (error)
                return error;

        /*
         * Load the new bmap records into the new incore extent tree to
         * preserve delalloc reservations for regular files.  The directory
         * code loads the extent tree during xfs_dir_open and assumes
         * thereafter that it remains loaded, so we must not violate that
         * assumption.
         */
        return xrep_bmap_extents_load(rb);
}

/*
 * Use the collected bmap information to stage a new bmap fork.  If this is
 * successful we'll return with the new fork information logged to the repair
 * transaction but not yet committed.  The caller must ensure that the inode
 * is joined to the transaction; the inode will be joined to a clean
 * transaction when the function returns.
 */
STATIC int
xrep_bmap_build_new_fork(
        struct xrep_bmap        *rb)
{
        struct xfs_owner_info   oinfo;
        struct xfs_scrub        *sc = rb->sc;
        struct xfs_btree_cur    *bmap_cur;
        struct xbtree_ifakeroot *ifake = &rb->new_bmapbt.ifake;
        int                     error;

        error = xrep_bmap_sort_records(rb);
        if (error)
                return error;

        /*
         * Prepare to construct the new fork by initializing the new btree
         * structure and creating a fake ifork in the ifakeroot structure.
         */
        xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
        error = xrep_newbt_init_inode(&rb->new_bmapbt, sc, rb->whichfork,
                        &oinfo);
        if (error)
                return error;

        rb->new_bmapbt.bload.get_records = xrep_bmap_get_records;
        rb->new_bmapbt.bload.claim_block = xrep_bmap_claim_block;
        rb->new_bmapbt.bload.iroot_size = xrep_bmap_iroot_size;

        /*
         * Allocate a new bmap btree cursor for reloading an inode block mapping
         * data structure.
         */
        bmap_cur = xfs_bmbt_init_cursor(sc->mp, NULL, sc->ip, XFS_STAGING_FORK);
        xfs_btree_stage_ifakeroot(bmap_cur, ifake);

        /*
         * Figure out the size and format of the new fork, then fill it with
         * all the bmap records we've found.  Join the inode to the transaction
         * so that we can roll the transaction while holding the inode locked.
         */
        if (rb->real_mappings <= XFS_IFORK_MAXEXT(sc->ip, rb->whichfork)) {
                ifake->if_fork->if_format = XFS_DINODE_FMT_EXTENTS;
                error = xrep_bmap_extents_load(rb);
        } else {
                ifake->if_fork->if_format = XFS_DINODE_FMT_BTREE;
                error = xrep_bmap_btree_load(rb, bmap_cur);
        }
        if (error)
                goto err_cur;

        /*
         * Install the new fork in the inode.  After this point the old mapping
         * data are no longer accessible and the new tree is live.  We delete
         * the cursor immediately after committing the staged root because the
         * staged fork might be in extents format.
         */
        xfs_bmbt_commit_staged_btree(bmap_cur, sc->tp, rb->whichfork);
        xfs_btree_del_cursor(bmap_cur, 0);

        /* Reset the inode counters now that we've changed the fork. */
        error = xrep_bmap_reset_counters(rb);
        if (error)
                goto err_newbt;

        /* Dispose of any unused blocks and the accounting information. */
        error = xrep_newbt_commit(&rb->new_bmapbt);
        if (error)
                return error;

        return xrep_roll_trans(sc);

err_cur:
        if (bmap_cur)
                xfs_btree_del_cursor(bmap_cur, error);
err_newbt:
        xrep_newbt_cancel(&rb->new_bmapbt);
        return error;
}

/*
 * Now that we've logged the new inode btree, invalidate all of the old blocks
 * and free them, if there were any.
 */
STATIC int
xrep_bmap_remove_old_tree(
        struct xrep_bmap        *rb)
{
        struct xfs_scrub        *sc = rb->sc;
        struct xfs_owner_info   oinfo;

        /* Free the old bmbt blocks if they're not in use. */
        xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
        return xrep_reap_fsblocks(sc, &rb->old_bmbt_blocks, &oinfo);
}

/* Check for garbage inputs.  Returns -ECANCELED if there's nothing to do. */
STATIC int
xrep_bmap_check_inputs(
        struct xfs_scrub        *sc,
        int                     whichfork)
{
        struct xfs_ifork        *ifp = xfs_ifork_ptr(sc->ip, whichfork);

        ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);

        if (!xfs_has_rmapbt(sc->mp))
                return -EOPNOTSUPP;

        /* No fork means nothing to rebuild. */
        if (!ifp)
                return -ECANCELED;

        /*
         * We only know how to repair extent mappings, which is to say that we
         * only support extents and btree fork format.  Repairs to a local
         * format fork require a higher level repair function, so we do not
         * have any work to do here.
         */
        switch (ifp->if_format) {
        case XFS_DINODE_FMT_DEV:
        case XFS_DINODE_FMT_LOCAL:
        case XFS_DINODE_FMT_UUID:
        case XFS_DINODE_FMT_META_BTREE:
                return -ECANCELED;
        case XFS_DINODE_FMT_EXTENTS:
        case XFS_DINODE_FMT_BTREE:
                break;
        default:
                return -EFSCORRUPTED;
        }

        if (whichfork == XFS_ATTR_FORK)
                return 0;

        /* Only files, symlinks, and directories get to have data forks. */
        switch (VFS_I(sc->ip)->i_mode & S_IFMT) {
        case S_IFREG:
        case S_IFDIR:
        case S_IFLNK:
                /* ok */
                break;
        default:
                return -EINVAL;
        }

        return 0;
}

/* Set up the initial state of the reflink scan. */
static inline enum reflink_scan_state
xrep_bmap_init_reflink_scan(
        struct xfs_scrub        *sc,
        int                     whichfork)
{
        /* cannot share on non-reflink filesystem */
        if (!xfs_has_reflink(sc->mp))
                return RLS_IRRELEVANT;

        /* preserve flag if it's already set */
        if (xfs_is_reflink_inode(sc->ip))
                return RLS_SET_IFLAG;

        /* can only share regular files */
        if (!S_ISREG(VFS_I(sc->ip)->i_mode))
                return RLS_IRRELEVANT;

        /* cannot share attr fork extents */
        if (whichfork != XFS_DATA_FORK)
                return RLS_IRRELEVANT;

        return RLS_UNKNOWN;
}

/* Repair an inode fork. */
int
xrep_bmap(
        struct xfs_scrub        *sc,
        int                     whichfork,
        bool                    allow_unwritten)
{
        struct xrep_bmap        *rb;
        xfs_extnum_t            max_bmbt_recs;
        bool                    large_extcount;
        int                     error = 0;

        error = xrep_bmap_check_inputs(sc, whichfork);
        if (error == -ECANCELED)
                return 0;
        if (error)
                return error;

        rb = kzalloc_obj(struct xrep_bmap, XCHK_GFP_FLAGS);
        if (!rb)
                return -ENOMEM;
        rb->sc = sc;
        rb->whichfork = whichfork;
        rb->reflink_scan = xrep_bmap_init_reflink_scan(sc, whichfork);
        rb->allow_unwritten = allow_unwritten;

        /* Set up enough storage to handle the max records for this fork. */
        large_extcount = xfs_has_large_extent_counts(sc->mp);
        max_bmbt_recs = xfs_iext_max_nextents(large_extcount, whichfork);
        error = xfarray_create("fork mapping records", max_bmbt_recs,
                        sizeof(struct xfs_bmbt_rec), &rb->bmap_records);
        if (error)
                goto out_rb;

        /* Collect all reverse mappings for this fork's extents. */
        xfsb_bitmap_init(&rb->old_bmbt_blocks);
        error = xrep_bmap_find_mappings(rb);
        if (error)
                goto out_bitmap;

        xfs_trans_ijoin(sc->tp, sc->ip, 0);

        /* Rebuild the bmap information. */
        error = xrep_bmap_build_new_fork(rb);
        if (error)
                goto out_bitmap;

        /* Kill the old tree. */
        error = xrep_bmap_remove_old_tree(rb);
        if (error)
                goto out_bitmap;

out_bitmap:
        xfsb_bitmap_destroy(&rb->old_bmbt_blocks);
        xfarray_destroy(rb->bmap_records);
out_rb:
        kfree(rb);
        return error;
}

/* Repair an inode's data fork. */
int
xrep_bmap_data(
        struct xfs_scrub        *sc)
{
        return xrep_bmap(sc, XFS_DATA_FORK, true);
}

/* Repair an inode's attr fork. */
int
xrep_bmap_attr(
        struct xfs_scrub        *sc)
{
        return xrep_bmap(sc, XFS_ATTR_FORK, false);
}