root/fs/xfs/scrub/dabtree.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_attr_leaf.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/dabtree.h"

/* Directory/Attribute Btree */

/*
 * Check for da btree operation errors.  See the section about handling
 * operational errors in common.c.
 */
bool
xchk_da_process_error(
        struct xchk_da_btree    *ds,
        int                     level,
        int                     *error)
{
        struct xfs_scrub        *sc = ds->sc;

        if (*error == 0)
                return true;

        switch (*error) {
        case -EDEADLOCK:
        case -ECHRNG:
                /* Used to restart an op with deadlock avoidance. */
                trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
                break;
        case -EFSBADCRC:
        case -EFSCORRUPTED:
        case -EIO:
        case -ENODATA:
                /* Note the badness but don't abort. */
                sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
                *error = 0;
                fallthrough;
        default:
                trace_xchk_file_op_error(sc, ds->dargs.whichfork,
                                xfs_dir2_da_to_db(ds->dargs.geo,
                                        ds->state->path.blk[level].blkno),
                                *error, __return_address);
                break;
        }
        return false;
}

/*
 * Check for da btree corruption.  See the section about handling
 * operational errors in common.c.
 */
void
xchk_da_set_corrupt(
        struct xchk_da_btree    *ds,
        int                     level)
{
        struct xfs_scrub        *sc = ds->sc;

        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;

        trace_xchk_fblock_error(sc, ds->dargs.whichfork,
                        xfs_dir2_da_to_db(ds->dargs.geo,
                                ds->state->path.blk[level].blkno),
                        __return_address);
}

/* Flag a da btree node in need of optimization. */
void
xchk_da_set_preen(
        struct xchk_da_btree    *ds,
        int                     level)
{
        struct xfs_scrub        *sc = ds->sc;

        sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
        trace_xchk_fblock_preen(sc, ds->dargs.whichfork,
                        xfs_dir2_da_to_db(ds->dargs.geo,
                                ds->state->path.blk[level].blkno),
                        __return_address);
}

/* Find an entry at a certain level in a da btree. */
static struct xfs_da_node_entry *
xchk_da_btree_node_entry(
        struct xchk_da_btree            *ds,
        int                             level)
{
        struct xfs_da_state_blk         *blk = &ds->state->path.blk[level];
        struct xfs_da3_icnode_hdr       hdr;

        ASSERT(blk->magic == XFS_DA_NODE_MAGIC);

        xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr);
        return hdr.btree + blk->index;
}

/* Scrub a da btree hash (key). */
int
xchk_da_btree_hash(
        struct xchk_da_btree            *ds,
        int                             level,
        __be32                          *hashp)
{
        struct xfs_da_node_entry        *entry;
        xfs_dahash_t                    hash;
        xfs_dahash_t                    parent_hash;

        /* Is this hash in order? */
        hash = be32_to_cpu(*hashp);
        if (hash < ds->hashes[level])
                xchk_da_set_corrupt(ds, level);
        ds->hashes[level] = hash;

        if (level == 0)
                return 0;

        /* Is this hash no larger than the parent hash? */
        entry = xchk_da_btree_node_entry(ds, level - 1);
        parent_hash = be32_to_cpu(entry->hashval);
        if (parent_hash < hash)
                xchk_da_set_corrupt(ds, level);

        return 0;
}

/*
 * Check a da btree pointer.  Returns true if it's ok to use this
 * pointer.
 */
STATIC bool
xchk_da_btree_ptr_ok(
        struct xchk_da_btree    *ds,
        int                     level,
        xfs_dablk_t             blkno)
{
        if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
                xchk_da_set_corrupt(ds, level);
                return false;
        }

        return true;
}

/*
 * The da btree scrubber can handle leaf1 blocks as a degenerate
 * form of leafn blocks.  Since the regular da code doesn't handle
 * leaf1, we must multiplex the verifiers.
 */
static void
xchk_da_btree_read_verify(
        struct xfs_buf          *bp)
{
        struct xfs_da_blkinfo   *info = bp->b_addr;

        switch (be16_to_cpu(info->magic)) {
        case XFS_DIR2_LEAF1_MAGIC:
        case XFS_DIR3_LEAF1_MAGIC:
                bp->b_ops = &xfs_dir3_leaf1_buf_ops;
                bp->b_ops->verify_read(bp);
                return;
        default:
                /*
                 * xfs_da3_node_buf_ops already know how to handle
                 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
                 */
                bp->b_ops = &xfs_da3_node_buf_ops;
                bp->b_ops->verify_read(bp);
                return;
        }
}
static void
xchk_da_btree_write_verify(
        struct xfs_buf          *bp)
{
        struct xfs_da_blkinfo   *info = bp->b_addr;

        switch (be16_to_cpu(info->magic)) {
        case XFS_DIR2_LEAF1_MAGIC:
        case XFS_DIR3_LEAF1_MAGIC:
                bp->b_ops = &xfs_dir3_leaf1_buf_ops;
                bp->b_ops->verify_write(bp);
                return;
        default:
                /*
                 * xfs_da3_node_buf_ops already know how to handle
                 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
                 */
                bp->b_ops = &xfs_da3_node_buf_ops;
                bp->b_ops->verify_write(bp);
                return;
        }
}
static void *
xchk_da_btree_verify(
        struct xfs_buf          *bp)
{
        struct xfs_da_blkinfo   *info = bp->b_addr;

        switch (be16_to_cpu(info->magic)) {
        case XFS_DIR2_LEAF1_MAGIC:
        case XFS_DIR3_LEAF1_MAGIC:
                bp->b_ops = &xfs_dir3_leaf1_buf_ops;
                return bp->b_ops->verify_struct(bp);
        default:
                bp->b_ops = &xfs_da3_node_buf_ops;
                return bp->b_ops->verify_struct(bp);
        }
}

static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
        .name = "xchk_da_btree",
        .verify_read = xchk_da_btree_read_verify,
        .verify_write = xchk_da_btree_write_verify,
        .verify_struct = xchk_da_btree_verify,
};

/* Check a block's sibling. */
STATIC int
xchk_da_btree_block_check_sibling(
        struct xchk_da_btree    *ds,
        int                     level,
        int                     direction,
        xfs_dablk_t             sibling)
{
        struct xfs_da_state_path *path = &ds->state->path;
        struct xfs_da_state_path *altpath = &ds->state->altpath;
        int                     retval;
        int                     plevel;
        int                     error;

        memcpy(altpath, path, sizeof(ds->state->altpath));

        /*
         * If the pointer is null, we shouldn't be able to move the upper
         * level pointer anywhere.
         */
        if (sibling == 0) {
                error = xfs_da3_path_shift(ds->state, altpath, direction,
                                false, &retval);
                if (error == 0 && retval == 0)
                        xchk_da_set_corrupt(ds, level);
                error = 0;
                goto out;
        }

        /* Move the alternate cursor one block in the direction given. */
        error = xfs_da3_path_shift(ds->state, altpath, direction, false,
                        &retval);
        if (!xchk_da_process_error(ds, level, &error))
                goto out;
        if (retval) {
                xchk_da_set_corrupt(ds, level);
                goto out;
        }
        if (altpath->blk[level].bp)
                xchk_buffer_recheck(ds->sc, altpath->blk[level].bp);

        /* Compare upper level pointer to sibling pointer. */
        if (altpath->blk[level].blkno != sibling)
                xchk_da_set_corrupt(ds, level);

out:
        /* Free all buffers in the altpath that aren't referenced from path. */
        for (plevel = 0; plevel < altpath->active; plevel++) {
                if (altpath->blk[plevel].bp == NULL ||
                    (plevel < path->active &&
                     altpath->blk[plevel].bp == path->blk[plevel].bp))
                        continue;

                xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp);
                altpath->blk[plevel].bp = NULL;
        }

        return error;
}

/* Check a block's sibling pointers. */
STATIC int
xchk_da_btree_block_check_siblings(
        struct xchk_da_btree    *ds,
        int                     level,
        struct xfs_da_blkinfo   *hdr)
{
        xfs_dablk_t             forw;
        xfs_dablk_t             back;
        int                     error = 0;

        forw = be32_to_cpu(hdr->forw);
        back = be32_to_cpu(hdr->back);

        /* Top level blocks should not have sibling pointers. */
        if (level == 0) {
                if (forw != 0 || back != 0)
                        xchk_da_set_corrupt(ds, level);
                return 0;
        }

        /*
         * Check back (left) and forw (right) pointers.  These functions
         * absorb error codes for us.
         */
        error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
        if (error)
                goto out;
        error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);

out:
        memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
        return error;
}

/* Load a dir/attribute block from a btree. */
STATIC int
xchk_da_btree_block(
        struct xchk_da_btree            *ds,
        int                             level,
        xfs_dablk_t                     blkno)
{
        struct xfs_da_state_blk         *blk;
        struct xfs_da_intnode           *node;
        struct xfs_da_node_entry        *btree;
        struct xfs_da3_blkinfo          *hdr3;
        struct xfs_da_args              *dargs = &ds->dargs;
        struct xfs_inode                *ip = ds->dargs.dp;
        xfs_failaddr_t                  fa;
        xfs_ino_t                       owner;
        int                             *pmaxrecs;
        struct xfs_da3_icnode_hdr       nodehdr;
        int                             error = 0;

        blk = &ds->state->path.blk[level];
        ds->state->path.active = level + 1;

        /* Release old block. */
        if (blk->bp) {
                xfs_trans_brelse(dargs->trans, blk->bp);
                blk->bp = NULL;
        }

        /* Check the pointer. */
        blk->blkno = blkno;
        if (!xchk_da_btree_ptr_ok(ds, level, blkno))
                goto out_nobuf;

        /* Read the buffer. */
        error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno,
                        XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork,
                        &xchk_da_btree_buf_ops);
        if (!xchk_da_process_error(ds, level, &error))
                goto out_nobuf;
        if (blk->bp)
                xchk_buffer_recheck(ds->sc, blk->bp);

        /*
         * We didn't find a dir btree root block, which means that
         * there's no LEAF1/LEAFN tree (at least not where it's supposed
         * to be), so jump out now.
         */
        if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
                        blk->bp == NULL)
                goto out_nobuf;

        /* It's /not/ ok for attr trees not to have a da btree. */
        if (blk->bp == NULL) {
                xchk_da_set_corrupt(ds, level);
                goto out_nobuf;
        }

        hdr3 = blk->bp->b_addr;
        blk->magic = be16_to_cpu(hdr3->hdr.magic);
        pmaxrecs = &ds->maxrecs[level];

        /* We only started zeroing the header on v5 filesystems. */
        if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad)
                xchk_da_set_corrupt(ds, level);

        /* Check the owner. */
        if (xfs_has_crc(ip->i_mount)) {
                owner = be64_to_cpu(hdr3->owner);
                if (owner != ip->i_ino)
                        xchk_da_set_corrupt(ds, level);
        }

        /* Check the siblings. */
        error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
        if (error)
                goto out;

        /* Interpret the buffer. */
        switch (blk->magic) {
        case XFS_ATTR_LEAF_MAGIC:
        case XFS_ATTR3_LEAF_MAGIC:
                xfs_trans_buf_set_type(dargs->trans, blk->bp,
                                XFS_BLFT_ATTR_LEAF_BUF);
                blk->magic = XFS_ATTR_LEAF_MAGIC;
                blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
                if (ds->tree_level != 0)
                        xchk_da_set_corrupt(ds, level);
                break;
        case XFS_DIR2_LEAFN_MAGIC:
        case XFS_DIR3_LEAFN_MAGIC:
                xfs_trans_buf_set_type(dargs->trans, blk->bp,
                                XFS_BLFT_DIR_LEAFN_BUF);
                blk->magic = XFS_DIR2_LEAFN_MAGIC;
                blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
                if (ds->tree_level != 0)
                        xchk_da_set_corrupt(ds, level);
                break;
        case XFS_DIR2_LEAF1_MAGIC:
        case XFS_DIR3_LEAF1_MAGIC:
                xfs_trans_buf_set_type(dargs->trans, blk->bp,
                                XFS_BLFT_DIR_LEAF1_BUF);
                blk->magic = XFS_DIR2_LEAF1_MAGIC;
                blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
                if (ds->tree_level != 0)
                        xchk_da_set_corrupt(ds, level);
                break;
        case XFS_DA_NODE_MAGIC:
        case XFS_DA3_NODE_MAGIC:
                xfs_trans_buf_set_type(dargs->trans, blk->bp,
                                XFS_BLFT_DA_NODE_BUF);
                blk->magic = XFS_DA_NODE_MAGIC;
                node = blk->bp->b_addr;
                xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node);
                btree = nodehdr.btree;
                *pmaxrecs = nodehdr.count;
                blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
                if (level == 0) {
                        if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
                                xchk_da_set_corrupt(ds, level);
                                goto out_freebp;
                        }
                        ds->tree_level = nodehdr.level;
                } else {
                        if (ds->tree_level != nodehdr.level) {
                                xchk_da_set_corrupt(ds, level);
                                goto out_freebp;
                        }
                }

                /* XXX: Check hdr3.pad32 once we know how to fix it. */
                break;
        default:
                xchk_da_set_corrupt(ds, level);
                goto out_freebp;
        }

        fa = xfs_da3_header_check(blk->bp, dargs->owner);
        if (fa) {
                xchk_da_set_corrupt(ds, level);
                goto out_freebp;
        }

        /*
         * If we've been handed a block that is below the dabtree root, does
         * its hashval match what the parent block expected to see?
         */
        if (level > 0) {
                struct xfs_da_node_entry        *key;

                key = xchk_da_btree_node_entry(ds, level - 1);
                if (be32_to_cpu(key->hashval) != blk->hashval) {
                        xchk_da_set_corrupt(ds, level);
                        goto out_freebp;
                }
        }

out:
        return error;
out_freebp:
        xfs_trans_brelse(dargs->trans, blk->bp);
        blk->bp = NULL;
out_nobuf:
        blk->blkno = 0;
        return error;
}

/* Visit all nodes and leaves of a da btree. */
int
xchk_da_btree(
        struct xfs_scrub                *sc,
        int                             whichfork,
        xchk_da_btree_rec_fn            scrub_fn,
        void                            *private)
{
        struct xchk_da_btree            *ds;
        struct xfs_mount                *mp = sc->mp;
        struct xfs_da_state_blk         *blks;
        struct xfs_da_node_entry        *key;
        xfs_dablk_t                     blkno;
        int                             level;
        int                             error;

        /* Skip short format data structures; no btree to scan. */
        if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork)))
                return 0;

        /* Set up initial da state. */
        ds = kzalloc_obj(struct xchk_da_btree, XCHK_GFP_FLAGS);
        if (!ds)
                return -ENOMEM;
        ds->dargs.dp = sc->ip;
        ds->dargs.whichfork = whichfork;
        ds->dargs.trans = sc->tp;
        ds->dargs.op_flags = XFS_DA_OP_OKNOENT;
        ds->dargs.owner = sc->ip->i_ino;
        ds->state = xfs_da_state_alloc(&ds->dargs);
        ds->sc = sc;
        ds->private = private;
        if (whichfork == XFS_ATTR_FORK) {
                ds->dargs.geo = mp->m_attr_geo;
                ds->lowest = 0;
                ds->highest = 0;
        } else {
                ds->dargs.geo = mp->m_dir_geo;
                ds->lowest = ds->dargs.geo->leafblk;
                ds->highest = ds->dargs.geo->freeblk;
        }
        blkno = ds->lowest;
        level = 0;

        /* Find the root of the da tree, if present. */
        blks = ds->state->path.blk;
        error = xchk_da_btree_block(ds, level, blkno);
        if (error)
                goto out_state;
        /*
         * We didn't find a block at ds->lowest, which means that there's
         * no LEAF1/LEAFN tree (at least not where it's supposed to be),
         * so jump out now.
         */
        if (blks[level].bp == NULL)
                goto out_state;

        blks[level].index = 0;
        while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
                /* Handle leaf block. */
                if (blks[level].magic != XFS_DA_NODE_MAGIC) {
                        /* End of leaf, pop back towards the root. */
                        if (blks[level].index >= ds->maxrecs[level]) {
                                if (level > 0)
                                        blks[level - 1].index++;
                                ds->tree_level++;
                                level--;
                                continue;
                        }

                        /* Dispatch record scrubbing. */
                        error = scrub_fn(ds, level);
                        if (error)
                                break;
                        if (xchk_should_terminate(sc, &error) ||
                            (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
                                break;

                        blks[level].index++;
                        continue;
                }


                /* End of node, pop back towards the root. */
                if (blks[level].index >= ds->maxrecs[level]) {
                        if (level > 0)
                                blks[level - 1].index++;
                        ds->tree_level++;
                        level--;
                        continue;
                }

                /* Hashes in order for scrub? */
                key = xchk_da_btree_node_entry(ds, level);
                error = xchk_da_btree_hash(ds, level, &key->hashval);
                if (error)
                        goto out;

                /* Drill another level deeper. */
                blkno = be32_to_cpu(key->before);
                level++;
                if (level >= XFS_DA_NODE_MAXDEPTH) {
                        /* Too deep! */
                        xchk_da_set_corrupt(ds, level - 1);
                        break;
                }
                ds->tree_level--;
                error = xchk_da_btree_block(ds, level, blkno);
                if (error)
                        goto out;
                if (blks[level].bp == NULL)
                        goto out;

                blks[level].index = 0;
        }

out:
        /* Release all the buffers we're tracking. */
        for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
                if (blks[level].bp == NULL)
                        continue;
                xfs_trans_brelse(sc->tp, blks[level].bp);
                blks[level].bp = NULL;
        }

out_state:
        xfs_da_state_free(ds->state);
        kfree(ds);
        return error;
}