root/fs/xfs/scrub/rtbitmap_repair.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2020-2023 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_btree.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_rtalloc.h"
#include "xfs_inode.h"
#include "xfs_bit.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
#include "xfs_rtrmap_btree.h"
#include "xfs_exchmaps.h"
#include "xfs_rtbitmap.h"
#include "xfs_rtgroup.h"
#include "xfs_extent_busy.h"
#include "xfs_refcount.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/xfile.h"
#include "scrub/tempfile.h"
#include "scrub/tempexch.h"
#include "scrub/reap.h"
#include "scrub/rtbitmap.h"

/* rt bitmap content repairs */

/* Set up to repair the realtime bitmap for this group. */
int
xrep_setup_rtbitmap(
        struct xfs_scrub        *sc,
        struct xchk_rtbitmap    *rtb)
{
        struct xfs_mount        *mp = sc->mp;
        unsigned long long      blocks = mp->m_sb.sb_rbmblocks;
        int                     error;

        error = xrep_tempfile_create(sc, S_IFREG);
        if (error)
                return error;

        /* Create an xfile to hold our reconstructed bitmap. */
        error = xfile_create("realtime bitmap file",
                        blocks * mp->m_sb.sb_blocksize, &sc->xfile);
        if (error)
                return error;

        /*
         * Reserve enough blocks to write out a completely new bitmap file,
         * plus twice as many blocks as we would need if we can only allocate
         * one block per data fork mapping.  This should cover the
         * preallocation of the temporary file and exchanging the extent
         * mappings.
         *
         * We cannot use xfs_exchmaps_estimate because we have not yet
         * constructed the replacement bitmap and therefore do not know how
         * many extents it will use.  By the time we do, we will have a dirty
         * transaction (which we cannot drop because we cannot drop the
         * rtbitmap ILOCK) and cannot ask for more reservation.
         */
        blocks += xfs_bmbt_calc_size(mp, blocks) * 2;
        if (blocks > UINT_MAX)
                return -EOPNOTSUPP;

        rtb->resblks += blocks;
        return 0;
}

static inline xrep_wordoff_t
rtx_to_wordoff(
        struct xfs_mount        *mp,
        xfs_rtxnum_t            rtx)
{
        return rtx >> XFS_NBWORDLOG;
}

static inline xrep_wordcnt_t
rtxlen_to_wordcnt(
        xfs_rtxlen_t    rtxlen)
{
        return rtxlen >> XFS_NBWORDLOG;
}

/* Helper functions to record rtwords in an xfile. */

static inline int
xfbmp_load(
        struct xchk_rtbitmap    *rtb,
        xrep_wordoff_t          wordoff,
        xfs_rtword_t            *word)
{
        union xfs_rtword_raw    urk;
        int                     error;

        ASSERT(xfs_has_rtgroups(rtb->sc->mp));

        error = xfile_load(rtb->sc->xfile, &urk,
                        sizeof(union xfs_rtword_raw),
                        wordoff << XFS_WORDLOG);
        if (error)
                return error;

        *word = be32_to_cpu(urk.rtg);
        return 0;
}

static inline int
xfbmp_store(
        struct xchk_rtbitmap    *rtb,
        xrep_wordoff_t          wordoff,
        const xfs_rtword_t      word)
{
        union xfs_rtword_raw    urk;

        ASSERT(xfs_has_rtgroups(rtb->sc->mp));

        urk.rtg = cpu_to_be32(word);
        return xfile_store(rtb->sc->xfile, &urk,
                        sizeof(union xfs_rtword_raw),
                        wordoff << XFS_WORDLOG);
}

static inline int
xfbmp_copyin(
        struct xchk_rtbitmap    *rtb,
        xrep_wordoff_t          wordoff,
        const union xfs_rtword_raw      *word,
        xrep_wordcnt_t          nr_words)
{
        return xfile_store(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
                        wordoff << XFS_WORDLOG);
}

static inline int
xfbmp_copyout(
        struct xchk_rtbitmap    *rtb,
        xrep_wordoff_t          wordoff,
        union xfs_rtword_raw    *word,
        xrep_wordcnt_t          nr_words)
{
        return xfile_load(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
                        wordoff << XFS_WORDLOG);
}

/* Perform a logical OR operation on an rtword in the incore bitmap. */
static int
xrep_rtbitmap_or(
        struct xchk_rtbitmap    *rtb,
        xrep_wordoff_t          wordoff,
        xfs_rtword_t            mask)
{
        xfs_rtword_t            word;
        int                     error;

        error = xfbmp_load(rtb, wordoff, &word);
        if (error)
                return error;

        trace_xrep_rtbitmap_or(rtb->sc->mp, wordoff, mask, word);

        return xfbmp_store(rtb, wordoff, word | mask);
}

/*
 * Mark as free every rt extent between the next rt block we expected to see
 * in the rtrmap records and the given rt block.
 */
STATIC int
xrep_rtbitmap_mark_free(
        struct xchk_rtbitmap    *rtb,
        xfs_rgblock_t           rgbno)
{
        struct xfs_mount        *mp = rtb->sc->mp;
        struct xchk_rt          *sr = &rtb->sc->sr;
        struct xfs_rtgroup      *rtg = sr->rtg;
        xfs_rtxnum_t            startrtx;
        xfs_rtxnum_t            nextrtx;
        xrep_wordoff_t          wordoff, nextwordoff;
        unsigned int            bit;
        unsigned int            bufwsize;
        xfs_extlen_t            mod;
        xfs_rtword_t            mask;
        enum xbtree_recpacking  outcome;
        int                     error;

        if (!xfs_verify_rgbext(rtg, rtb->next_rgbno, rgbno - rtb->next_rgbno))
                return -EFSCORRUPTED;

        /*
         * Convert rt blocks to rt extents  The block range we find must be
         * aligned to an rtextent boundary on both ends.
         */
        startrtx = xfs_rgbno_to_rtx(mp, rtb->next_rgbno);
        mod = xfs_rgbno_to_rtxoff(mp, rtb->next_rgbno);
        if (mod)
                return -EFSCORRUPTED;

        nextrtx = xfs_rgbno_to_rtx(mp, rgbno - 1) + 1;
        mod = xfs_rgbno_to_rtxoff(mp, rgbno - 1);
        if (mod != mp->m_sb.sb_rextsize - 1)
                return -EFSCORRUPTED;

        /* Must not be shared or CoW staging. */
        if (sr->refc_cur) {
                error = xfs_refcount_has_records(sr->refc_cur,
                                XFS_REFC_DOMAIN_SHARED, rtb->next_rgbno,
                                rgbno - rtb->next_rgbno, &outcome);
                if (error)
                        return error;
                if (outcome != XBTREE_RECPACKING_EMPTY)
                        return -EFSCORRUPTED;

                error = xfs_refcount_has_records(sr->refc_cur,
                                XFS_REFC_DOMAIN_COW, rtb->next_rgbno,
                                rgbno - rtb->next_rgbno, &outcome);
                if (error)
                        return error;
                if (outcome != XBTREE_RECPACKING_EMPTY)
                        return -EFSCORRUPTED;
        }

        trace_xrep_rtbitmap_record_free(mp, startrtx, nextrtx - 1);

        /* Set bits as needed to round startrtx up to the nearest word. */
        bit = startrtx & XREP_RTBMP_WORDMASK;
        if (bit) {
                xfs_rtblock_t   len = nextrtx - startrtx;
                unsigned int    lastbit;

                lastbit = min(bit + len, XFS_NBWORD);
                mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;

                error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, startrtx),
                                mask);
                if (error || lastbit - bit == len)
                        return error;
                startrtx += XFS_NBWORD - bit;
        }

        /* Set bits as needed to round nextrtx down to the nearest word. */
        bit = nextrtx & XREP_RTBMP_WORDMASK;
        if (bit) {
                mask = ((xfs_rtword_t)1 << bit) - 1;

                error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, nextrtx),
                                mask);
                if (error || startrtx + bit == nextrtx)
                        return error;
                nextrtx -= bit;
        }

        trace_xrep_rtbitmap_record_free_bulk(mp, startrtx, nextrtx - 1);

        /* Set all the words in between, up to a whole fs block at once. */
        wordoff = rtx_to_wordoff(mp, startrtx);
        nextwordoff = rtx_to_wordoff(mp, nextrtx);
        bufwsize = mp->m_sb.sb_blocksize >> XFS_WORDLOG;

        while (wordoff < nextwordoff) {
                xrep_wordoff_t  rem;
                xrep_wordcnt_t  wordcnt;

                wordcnt = min_t(xrep_wordcnt_t, nextwordoff - wordoff,
                                bufwsize);

                /*
                 * Try to keep us aligned to the rtwords buffer to reduce the
                 * number of xfile writes.
                 */
                rem = wordoff & (bufwsize - 1);
                if (rem)
                        wordcnt = min_t(xrep_wordcnt_t, wordcnt,
                                        bufwsize - rem);

                error = xfbmp_copyin(rtb, wordoff, rtb->words, wordcnt);
                if (error)
                        return error;

                wordoff += wordcnt;
        }

        return 0;
}

/* Set free space in the rtbitmap based on rtrmapbt records. */
STATIC int
xrep_rtbitmap_walk_rtrmap(
        struct xfs_btree_cur            *cur,
        const struct xfs_rmap_irec      *rec,
        void                            *priv)
{
        struct xchk_rtbitmap            *rtb = priv;
        int                             error = 0;

        if (xchk_should_terminate(rtb->sc, &error))
                return error;

        if (rtb->next_rgbno < rec->rm_startblock) {
                error = xrep_rtbitmap_mark_free(rtb, rec->rm_startblock);
                if (error)
                        return error;
        }

        rtb->next_rgbno = max(rtb->next_rgbno,
                              rec->rm_startblock + rec->rm_blockcount);
        return 0;
}

/*
 * Walk the rtrmapbt to find all the gaps between records, and mark the gaps
 * in the realtime bitmap that we're computing.
 */
STATIC int
xrep_rtbitmap_find_freespace(
        struct xchk_rtbitmap    *rtb)
{
        struct xfs_scrub        *sc = rtb->sc;
        struct xfs_mount        *mp = sc->mp;
        struct xfs_rtgroup      *rtg = sc->sr.rtg;
        uint64_t                blockcount;
        int                     error;

        /* Prepare a buffer of ones so that we can accelerate bulk setting. */
        memset(rtb->words, 0xFF, mp->m_sb.sb_blocksize);

        xrep_rtgroup_btcur_init(sc, &sc->sr);
        error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap,
                        rtb);
        if (error)
                goto out;

        /*
         * Mark as free every possible rt extent from the last one we saw to
         * the end of the rt group.
         */
        blockcount = rtg->rtg_extents * mp->m_sb.sb_rextsize;
        if (rtb->next_rgbno < blockcount) {
                error = xrep_rtbitmap_mark_free(rtb, blockcount);
                if (error)
                        goto out;
        }

out:
        xchk_rtgroup_btcur_free(&sc->sr);
        return error;
}

static int
xrep_rtbitmap_prep_buf(
        struct xfs_scrub        *sc,
        struct xfs_buf          *bp,
        void                    *data)
{
        struct xchk_rtbitmap    *rtb = data;
        struct xfs_mount        *mp = sc->mp;
        union xfs_rtword_raw    *ondisk;
        int                     error;

        rtb->args.mp = sc->mp;
        rtb->args.tp = sc->tp;
        rtb->args.rbmbp = bp;
        ondisk = xfs_rbmblock_wordptr(&rtb->args, 0);
        rtb->args.rbmbp = NULL;

        error = xfbmp_copyout(rtb, rtb->prep_wordoff, ondisk,
                        mp->m_blockwsize);
        if (error)
                return error;

        if (xfs_has_rtgroups(sc->mp)) {
                struct xfs_rtbuf_blkinfo        *hdr = bp->b_addr;

                hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC);
                hdr->rt_owner = cpu_to_be64(sc->ip->i_ino);
                hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp));
                hdr->rt_lsn = 0;
                uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid);
                bp->b_ops = &xfs_rtbitmap_buf_ops;
        } else {
                bp->b_ops = &xfs_rtbuf_ops;
        }

        rtb->prep_wordoff += mp->m_blockwsize;
        xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTBITMAP_BUF);
        return 0;
}

/*
 * Make sure that the given range of the data fork of the realtime file is
 * mapped to written blocks.  The caller must ensure that the inode is joined
 * to the transaction.
 */
STATIC int
xrep_rtbitmap_data_mappings(
        struct xfs_scrub        *sc,
        xfs_filblks_t           len)
{
        struct xfs_bmbt_irec    map;
        xfs_fileoff_t           off = 0;
        int                     error;

        ASSERT(sc->ip != NULL);

        while (off < len) {
                int             nmaps = 1;

                /*
                 * If we have a real extent mapping this block then we're
                 * in ok shape.
                 */
                error = xfs_bmapi_read(sc->ip, off, len - off, &map, &nmaps,
                                XFS_DATA_FORK);
                if (error)
                        return error;
                if (nmaps == 0) {
                        ASSERT(nmaps != 0);
                        return -EFSCORRUPTED;
                }

                /*
                 * Written extents are ok.  Holes are not filled because we
                 * do not know the freespace information.
                 */
                if (xfs_bmap_is_written_extent(&map) ||
                    map.br_startblock == HOLESTARTBLOCK) {
                        off = map.br_startoff + map.br_blockcount;
                        continue;
                }

                /*
                 * If we find a delalloc reservation then something is very
                 * very wrong.  Bail out.
                 */
                if (map.br_startblock == DELAYSTARTBLOCK)
                        return -EFSCORRUPTED;

                /* Make sure we're really converting an unwritten extent. */
                if (map.br_state != XFS_EXT_UNWRITTEN) {
                        ASSERT(map.br_state == XFS_EXT_UNWRITTEN);
                        return -EFSCORRUPTED;
                }

                /* Make sure this block has a real zeroed extent mapped. */
                nmaps = 1;
                error = xfs_bmapi_write(sc->tp, sc->ip, map.br_startoff,
                                map.br_blockcount,
                                XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO,
                                0, &map, &nmaps);
                if (error)
                        return error;

                /* Commit new extent and all deferred work. */
                error = xrep_defer_finish(sc);
                if (error)
                        return error;

                off = map.br_startoff + map.br_blockcount;
        }

        return 0;
}

/* Fix broken rt volume geometry. */
STATIC int
xrep_rtbitmap_geometry(
        struct xfs_scrub        *sc,
        struct xchk_rtbitmap    *rtb)
{
        struct xfs_mount        *mp = sc->mp;
        struct xfs_trans        *tp = sc->tp;

        /* Superblock fields */
        if (mp->m_sb.sb_rextents != rtb->rextents)
                xfs_trans_mod_sb(sc->tp, XFS_TRANS_SB_REXTENTS,
                                rtb->rextents - mp->m_sb.sb_rextents);

        if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks)
                xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS,
                                rtb->rbmblocks - mp->m_sb.sb_rbmblocks);

        if (mp->m_sb.sb_rextslog != rtb->rextslog)
                xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG,
                                rtb->rextslog - mp->m_sb.sb_rextslog);

        /* Fix broken isize */
        sc->ip->i_disk_size = roundup_64(sc->ip->i_disk_size,
                                         mp->m_sb.sb_blocksize);

        if (sc->ip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks))
                sc->ip->i_disk_size = XFS_FSB_TO_B(mp, rtb->rbmblocks);

        xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
        return xrep_roll_trans(sc);
}

/* Repair the realtime bitmap file metadata. */
int
xrep_rtbitmap(
        struct xfs_scrub        *sc)
{
        struct xchk_rtbitmap    *rtb = sc->buf;
        struct xfs_mount        *mp = sc->mp;
        struct xfs_group        *xg = rtg_group(sc->sr.rtg);
        unsigned long long      blocks = 0;
        unsigned int            busy_gen;
        int                     error;

        /* We require the realtime rmapbt to rebuild anything. */
        if (!xfs_has_rtrmapbt(sc->mp))
                return -EOPNOTSUPP;
        /* We require atomic file exchange range to rebuild anything. */
        if (!xfs_has_exchange_range(sc->mp))
                return -EOPNOTSUPP;

        /* Impossibly large rtbitmap means we can't touch the filesystem. */
        if (rtb->rbmblocks > U32_MAX)
                return 0;

        /*
         * If the size of the rt bitmap file is larger than what we reserved,
         * figure out if we need to adjust the block reservation in the
         * transaction.
         */
        blocks = xfs_bmbt_calc_size(mp, rtb->rbmblocks);
        if (blocks > UINT_MAX)
                return -EOPNOTSUPP;
        if (blocks > rtb->resblks) {
                error = xfs_trans_reserve_more(sc->tp, blocks, 0);
                if (error)
                        return error;

                rtb->resblks += blocks;
        }

        /* Fix inode core and forks. */
        error = xrep_metadata_inode_forks(sc);
        if (error)
                return error;

        xfs_trans_ijoin(sc->tp, sc->ip, 0);

        /* Ensure no unwritten extents. */
        error = xrep_rtbitmap_data_mappings(sc, rtb->rbmblocks);
        if (error)
                return error;

        /*
         * Fix inconsistent bitmap geometry.  This function returns with a
         * clean scrub transaction.
         */
        error = xrep_rtbitmap_geometry(sc, rtb);
        if (error)
                return error;

        /*
         * Make sure the busy extent list is clear because we can't put extents
         * on there twice.
         */
        if (!xfs_extent_busy_list_empty(xg, &busy_gen)) {
                error = xfs_extent_busy_flush(sc->tp, xg, busy_gen, 0);
                if (error)
                        return error;
        }

        /*
         * Generate the new rtbitmap data.  We don't need the rtbmp information
         * once this call is finished.
         */
        error = xrep_rtbitmap_find_freespace(rtb);
        if (error)
                return error;

        /*
         * Try to take ILOCK_EXCL of the temporary file.  We had better be the
         * only ones holding onto this inode, but we can't block while holding
         * the rtbitmap file's ILOCK_EXCL.
         */
        while (!xrep_tempfile_ilock_nowait(sc)) {
                if (xchk_should_terminate(sc, &error))
                        return error;
                delay(1);
        }

        /*
         * Make sure we have space allocated for the part of the bitmap
         * file that corresponds to this group.  We already joined sc->ip.
         */
        xfs_trans_ijoin(sc->tp, sc->tempip, 0);
        error = xrep_tempfile_prealloc(sc, 0, rtb->rbmblocks);
        if (error)
                return error;

        /* Last chance to abort before we start committing fixes. */
        if (xchk_should_terminate(sc, &error))
                return error;

        /* Copy the bitmap file that we generated. */
        error = xrep_tempfile_copyin(sc, 0, rtb->rbmblocks,
                        xrep_rtbitmap_prep_buf, rtb);
        if (error)
                return error;
        error = xrep_tempfile_set_isize(sc,
                        XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks));
        if (error)
                return error;

        /*
         * Now exchange the data fork contents.  We're done with the temporary
         * buffer, so we can reuse it for the tempfile exchmaps information.
         */
        error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0,
                        rtb->rbmblocks, &rtb->tempexch);
        if (error)
                return error;

        error = xrep_tempexch_contents(sc, &rtb->tempexch);
        if (error)
                return error;

        /* Free the old rtbitmap blocks if they're not in use. */
        return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
}