root/fs/xfs/scrub/orphanage.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs_platform.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_dir2.h"
#include "xfs_icache.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_parent.h"
#include "xfs_attr_sf.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
#include "scrub/trace.h"
#include "scrub/orphanage.h"
#include "scrub/readdir.h"

#include <linux/namei.h>

/*
 * The Orphanage
 * =============
 *
 * If the directory tree is damaged, children of that directory become
 * inaccessible via that file path.  If a child has no other parents, the file
 * is said to be orphaned.  xfs_repair fixes this situation by creating a
 * orphanage directory (specifically, /lost+found) and creating a directory
 * entry pointing to the orphaned file.
 *
 * Online repair follows this tactic by creating a root-owned /lost+found
 * directory if one does not exist.  If an orphan is found, it will move that
 * files into orphanage.
 */

/* Make the orphanage owned by root. */
STATIC int
xrep_chown_orphanage(
        struct xfs_scrub        *sc,
        struct xfs_inode        *dp)
{
        struct xfs_trans        *tp;
        struct xfs_mount        *mp = sc->mp;
        struct xfs_dquot        *udqp = NULL, *gdqp = NULL, *pdqp = NULL;
        struct xfs_dquot        *oldu = NULL, *oldg = NULL, *oldp = NULL;
        struct inode            *inode = VFS_I(dp);
        int                     error;

        error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
                        XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
        if (error)
                return error;

        error = xfs_trans_alloc_ichange(dp, udqp, gdqp, pdqp, true, &tp);
        if (error)
                goto out_dqrele;

        /*
         * Always clear setuid/setgid/sticky on the orphanage since we don't
         * normally want that functionality on this directory and xfs_repair
         * doesn't create it this way either.  Leave the other access bits
         * unchanged.
         */
        inode->i_mode &= ~(S_ISUID | S_ISGID | S_ISVTX);

        /*
         * Change the ownerships and register quota modifications
         * in the transaction.
         */
        if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) {
                if (XFS_IS_UQUOTA_ON(mp))
                        oldu = xfs_qm_vop_chown(tp, dp, &dp->i_udquot, udqp);
                inode->i_uid = GLOBAL_ROOT_UID;
        }
        if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) {
                if (XFS_IS_GQUOTA_ON(mp))
                        oldg = xfs_qm_vop_chown(tp, dp, &dp->i_gdquot, gdqp);
                inode->i_gid = GLOBAL_ROOT_GID;
        }
        if (dp->i_projid != 0) {
                if (XFS_IS_PQUOTA_ON(mp))
                        oldp = xfs_qm_vop_chown(tp, dp, &dp->i_pdquot, pdqp);
                dp->i_projid = 0;
        }

        dp->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
        xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);

        XFS_STATS_INC(mp, xs_ig_attrchg);

        if (xfs_has_wsync(mp))
                xfs_trans_set_sync(tp);
        error = xfs_trans_commit(tp);

        xfs_qm_dqrele(oldu);
        xfs_qm_dqrele(oldg);
        xfs_qm_dqrele(oldp);

out_dqrele:
        xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(gdqp);
        xfs_qm_dqrele(pdqp);
        return error;
}

#define ORPHANAGE       "lost+found"

/* Create the orphanage directory, and set sc->orphanage to it. */
int
xrep_orphanage_create(
        struct xfs_scrub        *sc)
{
        struct xfs_mount        *mp = sc->mp;
        struct dentry           *root_dentry, *orphanage_dentry;
        struct inode            *root_inode = VFS_I(sc->mp->m_rootip);
        struct inode            *orphanage_inode;
        int                     error;

        if (xfs_is_shutdown(mp))
                return -EIO;
        if (xfs_is_readonly(mp)) {
                sc->orphanage = NULL;
                return 0;
        }

        ASSERT(sc->tp == NULL);
        ASSERT(sc->orphanage == NULL);

        /* Find the dentry for the root directory... */
        root_dentry = d_find_alias(root_inode);
        if (!root_dentry) {
                error = -EFSCORRUPTED;
                goto out;
        }

        /* ...which is a directory, right? */
        if (!d_is_dir(root_dentry)) {
                error = -EFSCORRUPTED;
                goto out_dput_root;
        }

        /* Try to find the orphanage directory. */
        orphanage_dentry = start_creating_noperm(root_dentry, &QSTR(ORPHANAGE));
        if (IS_ERR(orphanage_dentry)) {
                error = PTR_ERR(orphanage_dentry);
                goto out_dput_root;
        }

        /*
         * Nothing found?  Call mkdir to create the orphanage.  Create the
         * directory without other-user access because we're live and someone
         * could have been relying partly on minimal access to a parent
         * directory to control access to a file we put in here.
         */
        if (d_really_is_negative(orphanage_dentry)) {
                orphanage_dentry = vfs_mkdir(&nop_mnt_idmap, root_inode,
                                             orphanage_dentry, 0750, NULL);
                error = PTR_ERR(orphanage_dentry);
                if (IS_ERR(orphanage_dentry))
                        goto out_dput_orphanage;
        }

        /* Not a directory? Bail out. */
        if (!d_is_dir(orphanage_dentry)) {
                error = -ENOTDIR;
                goto out_dput_orphanage;
        }

        /*
         * Grab a reference to the orphanage.  This /should/ succeed since
         * we hold the root directory locked and therefore nobody can delete
         * the orphanage.
         */
        orphanage_inode = igrab(d_inode(orphanage_dentry));
        if (!orphanage_inode) {
                error = -ENOENT;
                goto out_dput_orphanage;
        }

        /* Make sure the orphanage is owned by root. */
        error = xrep_chown_orphanage(sc, XFS_I(orphanage_inode));
        if (error)
                goto out_dput_orphanage;

        /* Stash the reference for later and bail out. */
        sc->orphanage = XFS_I(orphanage_inode);
        sc->orphanage_ilock_flags = 0;

out_dput_orphanage:
        end_creating(orphanage_dentry);
out_dput_root:
        dput(root_dentry);
out:
        return error;
}

void
xrep_orphanage_ilock(
        struct xfs_scrub        *sc,
        unsigned int            ilock_flags)
{
        sc->orphanage_ilock_flags |= ilock_flags;
        xfs_ilock(sc->orphanage, ilock_flags);
}

bool
xrep_orphanage_ilock_nowait(
        struct xfs_scrub        *sc,
        unsigned int            ilock_flags)
{
        if (xfs_ilock_nowait(sc->orphanage, ilock_flags)) {
                sc->orphanage_ilock_flags |= ilock_flags;
                return true;
        }

        return false;
}

void
xrep_orphanage_iunlock(
        struct xfs_scrub        *sc,
        unsigned int            ilock_flags)
{
        xfs_iunlock(sc->orphanage, ilock_flags);
        sc->orphanage_ilock_flags &= ~ilock_flags;
}

/* Grab the IOLOCK of the orphanage and sc->ip. */
int
xrep_orphanage_iolock_two(
        struct xfs_scrub        *sc)
{
        int                     error = 0;

        while (true) {
                if (xchk_should_terminate(sc, &error))
                        return error;

                /*
                 * Normal XFS takes the IOLOCK before grabbing a transaction.
                 * Scrub holds a transaction, which means that we can't block
                 * on either IOLOCK.
                 */
                if (xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
                        if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
                                break;
                        xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL);
                }
                delay(1);
        }

        return 0;
}

/* Release the orphanage. */
void
xrep_orphanage_rele(
        struct xfs_scrub        *sc)
{
        if (!sc->orphanage)
                return;

        if (sc->orphanage_ilock_flags)
                xfs_iunlock(sc->orphanage, sc->orphanage_ilock_flags);

        xchk_irele(sc, sc->orphanage);
        sc->orphanage = NULL;
}

/* Adoption moves a file into /lost+found */

/* Can the orphanage adopt @sc->ip? */
bool
xrep_orphanage_can_adopt(
        struct xfs_scrub        *sc)
{
        ASSERT(sc->ip != NULL);

        if (!sc->orphanage)
                return false;
        if (sc->ip == sc->orphanage)
                return false;
        if (xchk_inode_is_sb_rooted(sc->ip))
                return false;
        if (xfs_is_internal_inode(sc->ip))
                return false;
        return true;
}

/*
 * Create a new transaction to send a child to the orphanage.
 *
 * Allocate a new transaction with sufficient disk space to handle the
 * adoption, take ILOCK_EXCL of the orphanage and sc->ip, joins them to the
 * transaction, and reserve quota to reparent the latter.  Caller must hold the
 * IOLOCK of the orphanage and sc->ip.
 */
int
xrep_adoption_trans_alloc(
        struct xfs_scrub        *sc,
        struct xrep_adoption    *adopt)
{
        struct xfs_mount        *mp = sc->mp;
        unsigned int            child_blkres = 0;
        int                     error;

        ASSERT(sc->tp == NULL);
        ASSERT(sc->ip != NULL);
        ASSERT(sc->orphanage != NULL);
        ASSERT(sc->ilock_flags & XFS_IOLOCK_EXCL);
        ASSERT(sc->orphanage_ilock_flags & XFS_IOLOCK_EXCL);
        ASSERT(!(sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)));
        ASSERT(!(sc->orphanage_ilock_flags &
                                (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)));

        /* Compute the worst case space reservation that we need. */
        adopt->sc = sc;
        adopt->orphanage_blkres = xfs_link_space_res(mp, MAXNAMELEN);
        if (S_ISDIR(VFS_I(sc->ip)->i_mode))
                child_blkres = xfs_rename_space_res(mp, 0, false,
                                                    xfs_name_dotdot.len, false);
        if (xfs_has_parent(mp))
                child_blkres += XFS_ADDAFORK_SPACE_RES(mp);
        adopt->child_blkres = child_blkres;

        /*
         * Allocate a transaction to link the child into the parent, along with
         * enough disk space to handle expansion of both the orphanage and the
         * dotdot entry of a child directory.
         */
        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link,
                        adopt->orphanage_blkres + adopt->child_blkres, 0, 0,
                        &sc->tp);
        if (error)
                return error;

        xfs_lock_two_inodes(sc->orphanage, XFS_ILOCK_EXCL,
                            sc->ip, XFS_ILOCK_EXCL);
        sc->ilock_flags |= XFS_ILOCK_EXCL;
        sc->orphanage_ilock_flags |= XFS_ILOCK_EXCL;

        xfs_trans_ijoin(sc->tp, sc->orphanage, 0);
        xfs_trans_ijoin(sc->tp, sc->ip, 0);

        /*
         * Reserve enough quota in the orphan directory to add the new name.
         * Normally the orphanage should have user/group/project ids of zero
         * and hence is not subject to quota enforcement, but we're allowed to
         * exceed quota to reattach disconnected parts of the directory tree.
         */
        error = xfs_trans_reserve_quota_nblks(sc->tp, sc->orphanage,
                        adopt->orphanage_blkres, 0, true);
        if (error)
                goto out_cancel;

        /*
         * Reserve enough quota in the child directory to change dotdot.
         * Here we're also allowed to exceed file quota to repair inconsistent
         * metadata.
         */
        if (adopt->child_blkres) {
                error = xfs_trans_reserve_quota_nblks(sc->tp, sc->ip,
                                adopt->child_blkres, 0, true);
                if (error)
                        goto out_cancel;
        }

        return 0;
out_cancel:
        xchk_trans_cancel(sc);
        xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL);
        xchk_iunlock(sc, XFS_ILOCK_EXCL);
        return error;
}

/*
 * Compute the xfs_name for the directory entry that we're adding to the
 * orphanage.  Caller must hold ILOCKs of sc->ip and the orphanage and must not
 * reuse namebuf until the adoption completes or is dissolved.
 */
int
xrep_adoption_compute_name(
        struct xrep_adoption    *adopt,
        struct xfs_name         *xname)
{
        struct xfs_scrub        *sc = adopt->sc;
        char                    *namebuf = (void *)xname->name;
        xfs_ino_t               ino;
        unsigned int            incr = 0;
        int                     error = 0;

        adopt->xname = xname;
        xname->len = snprintf(namebuf, MAXNAMELEN, "%llu", sc->ip->i_ino);
        xname->type = xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode);

        /* Make sure the filename is unique in the lost+found. */
        error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
        while (error == 0 && incr < 10000) {
                xname->len = snprintf(namebuf, MAXNAMELEN, "%llu.%u",
                                sc->ip->i_ino, ++incr);
                error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
        }
        if (error == 0) {
                /* We already have 10,000 entries in the orphanage? */
                return -EFSCORRUPTED;
        }

        if (error != -ENOENT)
                return error;
        return 0;
}

/*
 * Make sure the dcache does not have a positive dentry for the name we've
 * chosen.  The caller should have checked with the ondisk directory, so any
 * discrepancy is a sign that something is seriously wrong.
 */
static int
xrep_adoption_check_dcache(
        struct xrep_adoption    *adopt)
{
        struct qstr             qname = QSTR_INIT(adopt->xname->name,
                                                  adopt->xname->len);
        struct xfs_scrub        *sc = adopt->sc;
        struct dentry           *d_orphanage, *d_child;
        int                     error = 0;

        d_orphanage = d_find_alias(VFS_I(sc->orphanage));
        if (!d_orphanage)
                return 0;

        d_child = try_lookup_noperm(&qname, d_orphanage);
        if (IS_ERR(d_child)) {
                dput(d_orphanage);
                return PTR_ERR(d_child);
        }

        if (d_child) {
                trace_xrep_adoption_check_child(sc->mp, d_child);

                if (d_is_positive(d_child)) {
                        ASSERT(d_is_negative(d_child));
                        error = -EFSCORRUPTED;
                }

                dput(d_child);
        }

        dput(d_orphanage);
        return error;
}

/*
 * Invalidate all dentries for the name that was added to the orphanage
 * directory, and all dentries pointing to the child inode that was moved.
 *
 * There should not be any positive entries for the name, since we've
 * maintained our lock on the orphanage directory.
 */
static void
xrep_adoption_zap_dcache(
        struct xrep_adoption    *adopt)
{
        struct qstr             qname = QSTR_INIT(adopt->xname->name,
                                                  adopt->xname->len);
        struct xfs_scrub        *sc = adopt->sc;
        struct dentry           *d_orphanage, *d_child;

        /* Invalidate all dentries for the adoption name */
        d_orphanage = d_find_alias(VFS_I(sc->orphanage));
        if (!d_orphanage)
                return;

        d_child = try_lookup_noperm(&qname, d_orphanage);
        while (!IS_ERR_OR_NULL(d_child)) {
                trace_xrep_adoption_invalidate_child(sc->mp, d_child);

                ASSERT(d_is_negative(d_child));
                d_invalidate(d_child);
                dput(d_child);
                d_child = d_lookup(d_orphanage, &qname);
        }

        dput(d_orphanage);

        /* Invalidate all the dentries pointing down to this file. */
        while ((d_child = d_find_alias(VFS_I(sc->ip))) != NULL) {
                trace_xrep_adoption_invalidate_child(sc->mp, d_child);

                d_invalidate(d_child);
                dput(d_child);
        }
}

/*
 * If we have to add an attr fork ahead of a parent pointer update, how much
 * space should we ask for?
 */
static inline int
xrep_adoption_attr_sizeof(
        const struct xrep_adoption      *adopt)
{
        return sizeof(struct xfs_attr_sf_hdr) +
                xfs_attr_sf_entsize_byname(sizeof(struct xfs_parent_rec),
                                           adopt->xname->len);
}

/*
 * Move the current file to the orphanage under the computed name.
 *
 * Returns with a dirty transaction so that the caller can handle any other
 * work, such as fixing up unlinked lists or resetting link counts.
 */
int
xrep_adoption_move(
        struct xrep_adoption    *adopt)
{
        struct xfs_scrub        *sc = adopt->sc;
        bool                    isdir = S_ISDIR(VFS_I(sc->ip)->i_mode);
        int                     error;

        trace_xrep_adoption_reparent(sc->orphanage, adopt->xname,
                        sc->ip->i_ino);

        error = xrep_adoption_check_dcache(adopt);
        if (error)
                return error;

        /*
         * If this filesystem has parent pointers, ensure that the file being
         * moved to the orphanage has an attribute fork.  This is required
         * because the parent pointer code does not itself add attr forks.
         */
        if (!xfs_inode_has_attr_fork(sc->ip) && xfs_has_parent(sc->mp)) {
                int sf_size = xrep_adoption_attr_sizeof(adopt);

                error = xfs_bmap_add_attrfork(sc->tp, sc->ip, sf_size, true);
                if (error)
                        return error;
        }

        /* Create the new name in the orphanage. */
        error = xfs_dir_createname(sc->tp, sc->orphanage, adopt->xname,
                        sc->ip->i_ino, adopt->orphanage_blkres);
        if (error)
                return error;

        /*
         * Bump the link count of the orphanage if we just added a
         * subdirectory, and update its timestamps.
         */
        xfs_trans_ichgtime(sc->tp, sc->orphanage,
                        XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        if (isdir)
                xfs_bumplink(sc->tp, sc->orphanage);
        xfs_trans_log_inode(sc->tp, sc->orphanage, XFS_ILOG_CORE);

        /* Bump the link count of the child. */
        if (adopt->bump_child_nlink) {
                xfs_bumplink(sc->tp, sc->ip);
                xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
        }

        /* Replace the dotdot entry if the child is a subdirectory. */
        if (isdir) {
                error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot,
                                sc->orphanage->i_ino, adopt->child_blkres);
                if (error)
                        return error;
        }

        /* Add a parent pointer from the file back to the lost+found. */
        if (xfs_has_parent(sc->mp)) {
                error = xfs_parent_addname(sc->tp, &adopt->ppargs,
                                sc->orphanage, adopt->xname, sc->ip);
                if (error)
                        return error;
        }

        /*
         * Notify dirent hooks that we moved the file to /lost+found, and
         * finish all the deferred work so that we know the adoption is fully
         * recorded in the log.
         */
        xfs_dir_update_hook(sc->orphanage, sc->ip, 1, adopt->xname);

        /* Remove negative dentries from the lost+found's dcache */
        xrep_adoption_zap_dcache(adopt);
        return 0;
}

/*
 * Roll to a clean scrub transaction so that we can release the orphanage,
 * even if xrep_adoption_move was not called.
 *
 * Commits all the work and deferred ops attached to an adoption request and
 * rolls to a clean scrub transaction.  On success, returns 0 with the scrub
 * context holding a clean transaction with no inodes joined.  On failure,
 * returns negative errno with no scrub transaction.  All inode locks are
 * still held after this function returns.
 */
int
xrep_adoption_trans_roll(
        struct xrep_adoption    *adopt)
{
        struct xfs_scrub        *sc = adopt->sc;
        int                     error;

        trace_xrep_adoption_trans_roll(sc->orphanage, sc->ip,
                        !!(sc->tp->t_flags & XFS_TRANS_DIRTY));

        /* Finish all the deferred ops to commit all repairs. */
        error = xrep_defer_finish(sc);
        if (error)
                return error;

        /* Roll the transaction once more to detach the inodes. */
        return xfs_trans_roll(&sc->tp);
}