root/fs/xfs/scrub/readdir.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_trace.h"
#include "xfs_bmap.h"
#include "xfs_trans.h"
#include "xfs_error.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/readdir.h"

/* Call a function for every entry in a shortform directory. */
STATIC int
xchk_dir_walk_sf(
        struct xfs_scrub        *sc,
        struct xfs_inode        *dp,
        xchk_dirent_fn          dirent_fn,
        void                    *priv)
{
        struct xfs_name         name = {
                .name           = ".",
                .len            = 1,
                .type           = XFS_DIR3_FT_DIR,
        };
        struct xfs_mount        *mp = dp->i_mount;
        struct xfs_da_geometry  *geo = mp->m_dir_geo;
        struct xfs_dir2_sf_entry *sfep;
        struct xfs_dir2_sf_hdr  *sfp = dp->i_df.if_data;
        xfs_ino_t               ino;
        xfs_dir2_dataptr_t      dapos;
        unsigned int            i;
        int                     error;

        ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
        ASSERT(sfp != NULL);

        /* dot entry */
        dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
                        geo->data_entry_offset);

        error = dirent_fn(sc, dp, dapos, &name, dp->i_ino, priv);
        if (error)
                return error;

        /* dotdot entry */
        dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
                        geo->data_entry_offset +
                        xfs_dir2_data_entsize(mp, sizeof(".") - 1));
        ino = xfs_dir2_sf_get_parent_ino(sfp);
        name.name = "..";
        name.len = 2;

        error = dirent_fn(sc, dp, dapos, &name, ino, priv);
        if (error)
                return error;

        /* iterate everything else */
        sfep = xfs_dir2_sf_firstentry(sfp);
        for (i = 0; i < sfp->count; i++) {
                dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
                                xfs_dir2_sf_get_offset(sfep));
                ino = xfs_dir2_sf_get_ino(mp, sfp, sfep);
                name.name = sfep->name;
                name.len = sfep->namelen;
                name.type = xfs_dir2_sf_get_ftype(mp, sfep);

                error = dirent_fn(sc, dp, dapos, &name, ino, priv);
                if (error)
                        return error;

                sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
        }

        return 0;
}

/* Call a function for every entry in a block directory. */
STATIC int
xchk_dir_walk_block(
        struct xfs_scrub        *sc,
        struct xfs_inode        *dp,
        xchk_dirent_fn          dirent_fn,
        void                    *priv)
{
        struct xfs_mount        *mp = dp->i_mount;
        struct xfs_da_geometry  *geo = mp->m_dir_geo;
        struct xfs_buf          *bp;
        unsigned int            off, next_off, end;
        int                     error;

        error = xfs_dir3_block_read(sc->tp, dp, dp->i_ino, &bp);
        if (error)
                return error;

        /* Walk each directory entry. */
        end = xfs_dir3_data_end_offset(geo, bp->b_addr);
        for (off = geo->data_entry_offset; off < end; off = next_off) {
                struct xfs_name                 name = { };
                struct xfs_dir2_data_unused     *dup = bp->b_addr + off;
                struct xfs_dir2_data_entry      *dep = bp->b_addr + off;
                xfs_ino_t                       ino;
                xfs_dir2_dataptr_t              dapos;

                /* Skip an empty entry. */
                if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
                        next_off = off + be16_to_cpu(dup->length);
                        continue;
                }

                /* Otherwise, find the next entry and report it. */
                next_off = off + xfs_dir2_data_entsize(mp, dep->namelen);
                if (next_off > end)
                        break;

                dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, off);
                ino = be64_to_cpu(dep->inumber);
                name.name = dep->name;
                name.len = dep->namelen;
                name.type = xfs_dir2_data_get_ftype(mp, dep);

                error = dirent_fn(sc, dp, dapos, &name, ino, priv);
                if (error)
                        break;
        }

        xfs_trans_brelse(sc->tp, bp);
        return error;
}

/* Read a leaf-format directory buffer. */
STATIC int
xchk_read_leaf_dir_buf(
        struct xfs_trans        *tp,
        struct xfs_inode        *dp,
        struct xfs_da_geometry  *geo,
        xfs_dir2_off_t          *curoff,
        struct xfs_buf          **bpp)
{
        struct xfs_iext_cursor  icur;
        struct xfs_bmbt_irec    map;
        struct xfs_ifork        *ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK);
        xfs_dablk_t             last_da;
        xfs_dablk_t             map_off;
        xfs_dir2_off_t          new_off;

        *bpp = NULL;

        /*
         * Look for mapped directory blocks at or above the current offset.
         * Truncate down to the nearest directory block to start the scanning
         * operation.
         */
        last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
        map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *curoff));

        if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map))
                return 0;
        if (map.br_startoff >= last_da)
                return 0;
        xfs_trim_extent(&map, map_off, last_da - map_off);

        /* Read the directory block of that first mapping. */
        new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
        if (new_off > *curoff)
                *curoff = new_off;

        return xfs_dir3_data_read(tp, dp, dp->i_ino, map.br_startoff, 0, bpp);
}

/* Call a function for every entry in a leaf directory. */
STATIC int
xchk_dir_walk_leaf(
        struct xfs_scrub        *sc,
        struct xfs_inode        *dp,
        xchk_dirent_fn          dirent_fn,
        void                    *priv)
{
        struct xfs_mount        *mp = dp->i_mount;
        struct xfs_da_geometry  *geo = mp->m_dir_geo;
        struct xfs_buf          *bp = NULL;
        xfs_dir2_off_t          curoff = 0;
        unsigned int            offset = 0;
        int                     error;

        /* Iterate every directory offset in this directory. */
        while (curoff < XFS_DIR2_LEAF_OFFSET) {
                struct xfs_name                 name = { };
                struct xfs_dir2_data_unused     *dup;
                struct xfs_dir2_data_entry      *dep;
                xfs_ino_t                       ino;
                unsigned int                    length;
                xfs_dir2_dataptr_t              dapos;

                /*
                 * If we have no buffer, or we're off the end of the
                 * current buffer, need to get another one.
                 */
                if (!bp || offset >= geo->blksize) {
                        if (bp) {
                                xfs_trans_brelse(sc->tp, bp);
                                bp = NULL;
                        }

                        error = xchk_read_leaf_dir_buf(sc->tp, dp, geo, &curoff,
                                        &bp);
                        if (error || !bp)
                                break;

                        /*
                         * Find our position in the block.
                         */
                        offset = geo->data_entry_offset;
                        curoff += geo->data_entry_offset;
                }

                /* Skip an empty entry. */
                dup = bp->b_addr + offset;
                if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
                        length = be16_to_cpu(dup->length);
                        offset += length;
                        curoff += length;
                        continue;
                }

                /* Otherwise, find the next entry and report it. */
                dep = bp->b_addr + offset;
                length = xfs_dir2_data_entsize(mp, dep->namelen);

                dapos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
                ino = be64_to_cpu(dep->inumber);
                name.name = dep->name;
                name.len = dep->namelen;
                name.type = xfs_dir2_data_get_ftype(mp, dep);

                error = dirent_fn(sc, dp, dapos, &name, ino, priv);
                if (error)
                        break;

                /* Advance to the next entry. */
                offset += length;
                curoff += length;
        }

        if (bp)
                xfs_trans_brelse(sc->tp, bp);
        return error;
}

/*
 * Call a function for every entry in a directory.
 *
 * Callers must hold the ILOCK.  File types are XFS_DIR3_FT_*.
 */
int
xchk_dir_walk(
        struct xfs_scrub        *sc,
        struct xfs_inode        *dp,
        xchk_dirent_fn          dirent_fn,
        void                    *priv)
{
        struct xfs_da_args      args = {
                .dp             = dp,
                .geo            = dp->i_mount->m_dir_geo,
                .trans          = sc->tp,
                .owner          = dp->i_ino,
        };
        int                     error;

        if (xfs_is_shutdown(dp->i_mount))
                return -EIO;

        ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
        xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);

        switch (xfs_dir2_format(&args, &error)) {
        case XFS_DIR2_FMT_SF:
                return xchk_dir_walk_sf(sc, dp, dirent_fn, priv);
        case XFS_DIR2_FMT_BLOCK:
                return xchk_dir_walk_block(sc, dp, dirent_fn, priv);
        case XFS_DIR2_FMT_LEAF:
        case XFS_DIR2_FMT_NODE:
                return xchk_dir_walk_leaf(sc, dp, dirent_fn, priv);
        default:
                return error;
        }
}

/*
 * Look up the inode number for an exact name in a directory.
 *
 * Callers must hold the ILOCK.  File types are XFS_DIR3_FT_*.  Names are not
 * checked for correctness.
 */
int
xchk_dir_lookup(
        struct xfs_scrub        *sc,
        struct xfs_inode        *dp,
        const struct xfs_name   *name,
        xfs_ino_t               *ino)
{
        struct xfs_da_args      args = {
                .dp             = dp,
                .geo            = dp->i_mount->m_dir_geo,
                .trans          = sc->tp,
                .name           = name->name,
                .namelen        = name->len,
                .filetype       = name->type,
                .hashval        = xfs_dir2_hashname(dp->i_mount, name),
                .whichfork      = XFS_DATA_FORK,
                .op_flags       = XFS_DA_OP_OKNOENT,
                .owner          = dp->i_ino,
        };
        int                     error;

        if (xfs_is_shutdown(dp->i_mount))
                return -EIO;

        /*
         * A temporary directory's block headers are written with the owner
         * set to sc->ip, so we must switch the owner here for the lookup.
         */
        if (dp == sc->tempip)
                args.owner = sc->ip->i_ino;

        ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
        xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);

        error = xfs_dir_lookup_args(&args);
        if (!error)
                *ino = args.inumber;
        return error;
}

/*
 * Try to grab the IOLOCK and ILOCK of sc->ip and ip, returning @ip's lock
 * state.  The caller may have a transaction, so we must use trylock for both
 * IOLOCKs.
 */
static inline unsigned int
xchk_dir_trylock_both(
        struct xfs_scrub        *sc,
        struct xfs_inode        *ip)
{
        if (!xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
                return 0;

        if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
                goto parent_iolock;

        xchk_ilock(sc, XFS_ILOCK_EXCL);
        if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
                goto parent_ilock;

        return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL;

parent_ilock:
        xchk_iunlock(sc, XFS_ILOCK_EXCL);
        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
parent_iolock:
        xchk_iunlock(sc, XFS_IOLOCK_EXCL);
        return 0;
}

/*
 * Try for a limited time to grab the IOLOCK and ILOCK of both the scrub target
 * (@sc->ip) and the inode at the other end (@ip) of a directory or parent
 * pointer link so that we can check that link.
 *
 * We do not know ahead of time that the directory tree is /not/ corrupt, so we
 * cannot use the "lock two inode" functions because we do not know that there
 * is not a racing thread trying to take the locks in opposite order.  First
 * take IOLOCK_EXCL of the scrub target, and then try to take IOLOCK_SHARED
 * of @ip to synchronize with the VFS.  Next, take ILOCK_EXCL of the scrub
 * target and @ip to synchronize with XFS.
 *
 * If the trylocks succeed, *lockmode will be set to the locks held for @ip;
 * @sc->ilock_flags will be set for the locks held for @sc->ip; and zero will
 * be returned.  If not, returns -EDEADLOCK to try again; or -ETIMEDOUT if
 * XCHK_TRY_HARDER was set.  Returns -EINTR if the process has been killed.
 */
int
xchk_dir_trylock_for_pptrs(
        struct xfs_scrub        *sc,
        struct xfs_inode        *ip,
        unsigned int            *lockmode)
{
        unsigned int            nr;
        int                     error = 0;

        ASSERT(sc->ilock_flags == 0);

        for (nr = 0; nr < HZ; nr++) {
                *lockmode = xchk_dir_trylock_both(sc, ip);
                if (*lockmode)
                        return 0;

                if (xchk_should_terminate(sc, &error))
                        return error;

                delay(1);
        }

        if (sc->flags & XCHK_TRY_HARDER) {
                xchk_set_incomplete(sc);
                return -ETIMEDOUT;
        }

        return -EDEADLOCK;
}