root/sys/fs/autofs/autofs_vnops.c
/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c) 2014 The FreeBSD Foundation
 *
 * This software was developed by Edward Tomasz Napierala under sponsorship
 * from the FreeBSD Foundation.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/condvar.h>
#include <sys/dirent.h>
#include <sys/fcntl.h>
#include <sys/lock.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/namei.h>
#include <sys/signalvar.h>
#include <sys/stat.h>
#include <sys/taskqueue.h>
#include <sys/tree.h>
#include <sys/vnode.h>
#include <machine/atomic.h>
#include <vm/uma.h>

#include <fs/autofs/autofs.h>

static int      autofs_trigger_vn(struct vnode *vp, const char *path,
                    int pathlen, struct vnode **newvp);

extern struct autofs_softc      *autofs_softc;

static int
autofs_access(struct vop_access_args *ap)
{

        /*
         * Nothing to do here; the only kind of access control
         * needed is in autofs_mkdir().
         */

        return (0);
}

static int
autofs_getattr(struct vop_getattr_args *ap)
{
        struct vnode *vp, *newvp;
        struct autofs_node *anp;
        struct mount *mp;
        struct vattr *vap;
        int error;

        vp = ap->a_vp;
        anp = vp->v_data;
        mp = vp->v_mount;
        vap = ap->a_vap;

        KASSERT(ap->a_vp->v_type == VDIR, ("!VDIR"));

        /*
         * The reason we must do this is that some tree-walking software,
         * namely fts(3), assumes that stat(".") results will not change
         * between chdir("subdir") and chdir(".."), and fails with ENOENT
         * otherwise.
         */
        if (autofs_mount_on_stat && autofs_cached(anp, NULL, 0) == false &&
            autofs_ignore_thread(curthread) == false) {
                error = autofs_trigger_vn(vp, "", 0, &newvp);
                if (error != 0)
                        return (error);

                if (newvp != NULL) {
                        error = VOP_GETATTR(newvp, ap->a_vap,
                            ap->a_cred);
                        vput(newvp);
                        return (error);
                }
        }

        vap->va_type = VDIR;
        vap->va_mode = 0755;
        vap->va_nlink = 3; /* XXX */
        vap->va_uid = 0;
        vap->va_gid = 0;
        vap->va_rdev = NODEV;
        vap->va_fsid = mp->mnt_stat.f_fsid.val[0];
        vap->va_fileid = anp->an_fileno;
        vap->va_size = S_BLKSIZE;
        vap->va_blocksize = S_BLKSIZE;
        vap->va_mtime = anp->an_ctime;
        vap->va_atime = anp->an_ctime;
        vap->va_ctime = anp->an_ctime;
        vap->va_birthtime = anp->an_ctime;
        vap->va_gen = 0;
        vap->va_flags = 0;
        vap->va_rdev = 0;
        vap->va_bytes = S_BLKSIZE;
        vap->va_filerev = 0;
        vap->va_spare = 0;

        return (0);
}

/*
 * Unlock the vnode, request automountd(8) action, and then lock it back.
 * If anything got mounted on top of the vnode, return the new filesystem's
 * root vnode in 'newvp', locked.
 */
static int
autofs_trigger_vn(struct vnode *vp, const char *path, int pathlen,
    struct vnode **newvp)
{
        struct autofs_node *anp;
        int error, lock_flags;

        anp = vp->v_data;

        /*
         * Release the vnode lock, so that other operations, in partcular
         * mounting a filesystem on top of it, can proceed.  Increase use
         * count, to prevent the vnode from being deallocated and to prevent
         * filesystem from being unmounted.
         */
        lock_flags = VOP_ISLOCKED(vp);
        vref(vp);
        VOP_UNLOCK(vp);

        sx_xlock(&autofs_softc->sc_lock);

        /*
         * XXX: Workaround for mounting the same thing multiple times; revisit.
         */
        if (vp->v_mountedhere != NULL) {
                error = 0;
                goto mounted;
        }

        error = autofs_trigger(anp, path, pathlen);
mounted:
        sx_xunlock(&autofs_softc->sc_lock);
        vn_lock(vp, lock_flags | LK_RETRY);
        vunref(vp);
        if (VN_IS_DOOMED(vp)) {
                AUTOFS_DEBUG("VIRF_DOOMED");
                return (ENOENT);
        }

        if (error != 0)
                return (error);

        if (vp->v_mountedhere == NULL) {
                *newvp = NULL;
                return (0);
        } else {
                /*
                 * If the operation that succeeded was mount, then mark
                 * the node as non-cached.  Otherwise, if someone unmounts
                 * the filesystem before the cache times out, we will fail
                 * to trigger.
                 */
                anp->an_cached = false;
        }

        error = VFS_ROOT(vp->v_mountedhere, lock_flags, newvp);
        if (error != 0) {
                AUTOFS_WARN("VFS_ROOT() failed with error %d", error);
                return (error);
        }

        return (0);
}

static int
autofs_vget_callback(struct mount *mp, void *arg, int flags,
    struct vnode **vpp)
{

        return (autofs_node_vn(arg, mp, flags, vpp));
}

static int
autofs_lookup(struct vop_lookup_args *ap)
{
        struct vnode *dvp, *newvp, **vpp;
        struct mount *mp;
        struct autofs_mount *amp;
        struct autofs_node *anp, *child;
        struct componentname *cnp;
        int error;

        dvp = ap->a_dvp;
        vpp = ap->a_vpp;
        mp = dvp->v_mount;
        amp = VFSTOAUTOFS(mp);
        anp = dvp->v_data;
        cnp = ap->a_cnp;

        if (cnp->cn_flags & ISDOTDOT) {
                KASSERT(anp->an_parent != NULL, ("NULL parent"));
                /*
                 * Note that in this case, dvp is the child vnode, and we
                 * are looking up the parent vnode - exactly reverse from
                 * normal operation.  Unlocking dvp requires some rather
                 * tricky unlock/relock dance to prevent mp from being freed;
                 * use vn_vget_ino_gen() which takes care of all that.
                 */
                error = vn_vget_ino_gen(dvp, autofs_vget_callback,
                    anp->an_parent, cnp->cn_lkflags, vpp);
                if (error != 0) {
                        AUTOFS_WARN("vn_vget_ino_gen() failed with error %d",
                            error);
                        return (error);
                }
                return (error);
        }

        if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
                vref(dvp);
                *vpp = dvp;

                return (0);
        }

        if (autofs_cached(anp, cnp->cn_nameptr, cnp->cn_namelen) == false &&
            autofs_ignore_thread(curthread) == false) {
                error = autofs_trigger_vn(dvp,
                    cnp->cn_nameptr, cnp->cn_namelen, &newvp);
                if (error != 0)
                        return (error);

                if (newvp != NULL) {
                        /*
                         * The target filesystem got automounted.
                         * Let the lookup(9) go around with the same
                         * path component.
                         */
                        vput(newvp);
                        return (ERELOOKUP);
                }
        }

        AUTOFS_SLOCK(amp);
        error = autofs_node_find(anp, cnp->cn_nameptr, cnp->cn_namelen, &child);
        if (error != 0) {
                if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) {
                        AUTOFS_SUNLOCK(amp);
                        return (EJUSTRETURN);
                }

                AUTOFS_SUNLOCK(amp);
                return (ENOENT);
        }

        /*
         * XXX: Dropping the node here is ok, because we never remove nodes.
         */
        AUTOFS_SUNLOCK(amp);

        error = autofs_node_vn(child, mp, cnp->cn_lkflags, vpp);
        if (error != 0) {
                if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE)
                        return (EJUSTRETURN);

                return (error);
        }

        return (0);
}

static int
autofs_mkdir(struct vop_mkdir_args *ap)
{
        struct vnode *vp;
        struct autofs_node *anp;
        struct autofs_mount *amp;
        struct autofs_node *child;
        int error;

        vp = ap->a_dvp;
        anp = vp->v_data;
        amp = VFSTOAUTOFS(vp->v_mount);

        /*
         * Do not allow mkdir() if the calling thread is not
         * automountd(8) descendant.
         */
        if (autofs_ignore_thread(curthread) == false)
                return (EPERM);

        AUTOFS_XLOCK(amp);
        error = autofs_node_new(anp, amp, ap->a_cnp->cn_nameptr,
            ap->a_cnp->cn_namelen, &child);
        if (error != 0) {
                AUTOFS_XUNLOCK(amp);
                return (error);
        }
        AUTOFS_XUNLOCK(amp);

        error = autofs_node_vn(child, vp->v_mount, LK_EXCLUSIVE, ap->a_vpp);

        return (error);
}

static int
autofs_print(struct vop_print_args *ap)
{
        struct vnode *vp;
        struct autofs_node *anp;

        vp = ap->a_vp;
        anp = vp->v_data;

        printf("    name \"%s\", fileno %d, cached %d, wildcards %d\n",
            anp->an_name, anp->an_fileno, anp->an_cached, anp->an_wildcards);

        return (0);
}

/*
 * Write out a single 'struct dirent', based on 'name' and 'fileno' arguments.
 */
static int
autofs_readdir_one(struct uio *uio, const char *name, int fileno,
    size_t *reclenp)
{
        struct dirent dirent;
        size_t namlen, reclen;
        int error;

        namlen = strlen(name);
        reclen = _GENERIC_DIRLEN(namlen);
        if (reclenp != NULL)
                *reclenp = reclen;

        if (uio == NULL)
                return (0);

        if (uio->uio_resid < reclen)
                return (EINVAL);

        dirent.d_fileno = fileno;
        dirent.d_off = uio->uio_offset + reclen;
        dirent.d_reclen = reclen;
        dirent.d_type = DT_DIR;
        dirent.d_namlen = namlen;
        memcpy(dirent.d_name, name, namlen);
        dirent_terminate(&dirent);
        error = uiomove(&dirent, reclen, uio);

        return (error);
}

static size_t
autofs_dirent_reclen(const char *name)
{
        size_t reclen;

        (void)autofs_readdir_one(NULL, name, -1, &reclen);

        return (reclen);
}

static int
autofs_readdir(struct vop_readdir_args *ap)
{
        struct vnode *vp, *newvp;
        struct autofs_mount *amp;
        struct autofs_node *anp, *child;
        struct uio *uio;
        size_t reclen, reclens;
        ssize_t initial_resid;
        int error;

        vp = ap->a_vp;
        amp = VFSTOAUTOFS(vp->v_mount);
        anp = vp->v_data;
        uio = ap->a_uio;
        initial_resid = ap->a_uio->uio_resid;

        KASSERT(vp->v_type == VDIR, ("!VDIR"));

        if (autofs_cached(anp, NULL, 0) == false &&
            autofs_ignore_thread(curthread) == false) {
                error = autofs_trigger_vn(vp, "", 0, &newvp);
                if (error != 0)
                        return (error);

                if (newvp != NULL) {
                        error = VOP_READDIR(newvp, ap->a_uio, ap->a_cred,
                            ap->a_eofflag, ap->a_ncookies, ap->a_cookies);
                        vput(newvp);
                        return (error);
                }
        }

        if (uio->uio_offset < 0)
                return (EINVAL);

        if (ap->a_eofflag != NULL)
                *ap->a_eofflag = FALSE;

        /*
         * Write out the directory entry for ".".  This is conditional
         * on the current offset into the directory; same applies to the
         * other two cases below.
         */
        if (uio->uio_offset == 0) {
                error = autofs_readdir_one(uio, ".", anp->an_fileno, &reclen);
                if (error != 0)
                        goto out;
        }
        reclens = autofs_dirent_reclen(".");

        /*
         * Write out the directory entry for "..".
         */
        if (uio->uio_offset <= reclens) {
                if (uio->uio_offset != reclens)
                        return (EINVAL);
                if (anp->an_parent == NULL) {
                        error = autofs_readdir_one(uio, "..",
                            anp->an_fileno, &reclen);
                } else {
                        error = autofs_readdir_one(uio, "..",
                            anp->an_parent->an_fileno, &reclen);
                }
                if (error != 0)
                        goto out;
        }

        reclens += autofs_dirent_reclen("..");

        /*
         * Write out the directory entries for subdirectories.
         */
        AUTOFS_SLOCK(amp);
        RB_FOREACH(child, autofs_node_tree, &anp->an_children) {
                /*
                 * Check the offset to skip entries returned by previous
                 * calls to getdents().
                 */
                if (uio->uio_offset > reclens) {
                        reclens += autofs_dirent_reclen(child->an_name);
                        continue;
                }

                /*
                 * Prevent seeking into the middle of dirent.
                 */
                if (uio->uio_offset != reclens) {
                        AUTOFS_SUNLOCK(amp);
                        return (EINVAL);
                }

                error = autofs_readdir_one(uio, child->an_name,
                    child->an_fileno, &reclen);
                reclens += reclen;
                if (error != 0) {
                        AUTOFS_SUNLOCK(amp);
                        goto out;
                }
        }
        AUTOFS_SUNLOCK(amp);

        if (ap->a_eofflag != NULL)
                *ap->a_eofflag = TRUE;

        return (0);

out:
        /*
         * Return error if the initial buffer was too small to do anything.
         */
        if (uio->uio_resid == initial_resid)
                return (error);

        /*
         * Don't return an error if we managed to copy out some entries.
         */
        if (uio->uio_resid < reclen)
                return (0);

        return (error);
}

static int
autofs_reclaim(struct vop_reclaim_args *ap)
{
        struct vnode *vp;
        struct autofs_node *anp;

        vp = ap->a_vp;
        anp = vp->v_data;

        /*
         * We do not free autofs_node here; instead we are
         * destroying them in autofs_node_delete().
         */
        sx_xlock(&anp->an_vnode_lock);
        anp->an_vnode = NULL;
        vp->v_data = NULL;
        sx_xunlock(&anp->an_vnode_lock);

        return (0);
}

struct vop_vector autofs_vnodeops = {
        .vop_default =          &default_vnodeops,

        .vop_access =           autofs_access,
        .vop_lookup =           autofs_lookup,
        .vop_create =           VOP_EOPNOTSUPP,
        .vop_getattr =          autofs_getattr,
        .vop_link =             VOP_EOPNOTSUPP,
        .vop_mkdir =            autofs_mkdir,
        .vop_mknod =            VOP_EOPNOTSUPP,
        .vop_print =            autofs_print,
        .vop_read =             VOP_EOPNOTSUPP,
        .vop_readdir =          autofs_readdir,
        .vop_remove =           VOP_EOPNOTSUPP,
        .vop_rename =           VOP_EOPNOTSUPP,
        .vop_rmdir =            VOP_EOPNOTSUPP,
        .vop_setattr =          VOP_EOPNOTSUPP,
        .vop_symlink =          VOP_EOPNOTSUPP,
        .vop_write =            VOP_EOPNOTSUPP,
        .vop_reclaim =          autofs_reclaim,
};
VFS_VOP_VECTOR_REGISTER(autofs_vnodeops);

int
autofs_node_new(struct autofs_node *parent, struct autofs_mount *amp,
    const char *name, int namelen, struct autofs_node **anpp)
{
        struct autofs_node *anp;

        if (parent != NULL) {
                AUTOFS_ASSERT_XLOCKED(parent->an_mount);

                KASSERT(autofs_node_find(parent, name, namelen, NULL) == ENOENT,
                    ("node \"%s\" already exists", name));
        }

        anp = uma_zalloc(autofs_node_zone, M_WAITOK | M_ZERO);
        if (namelen >= 0)
                anp->an_name = strndup(name, namelen, M_AUTOFS);
        else
                anp->an_name = strdup(name, M_AUTOFS);
        anp->an_fileno = atomic_fetchadd_int(&amp->am_last_fileno, 1);
        callout_init(&anp->an_callout, 1);
        /*
         * The reason for SX_NOWITNESS here is that witness(4)
         * cannot tell vnodes apart, so the following perfectly
         * valid lock order...
         *
         * vnode lock A -> autofsvlk B -> vnode lock B
         *
         * ... gets reported as a LOR.
         */
        sx_init_flags(&anp->an_vnode_lock, "autofsvlk", SX_NOWITNESS);
        getnanotime(&anp->an_ctime);
        anp->an_parent = parent;
        anp->an_mount = amp;
        if (parent != NULL)
                RB_INSERT(autofs_node_tree, &parent->an_children, anp);
        RB_INIT(&anp->an_children);

        *anpp = anp;
        return (0);
}

int
autofs_node_find(struct autofs_node *parent, const char *name,
    int namelen, struct autofs_node **anpp)
{
        struct autofs_node *anp, find;
        int error;

        AUTOFS_ASSERT_LOCKED(parent->an_mount);

        if (namelen >= 0)
                find.an_name = strndup(name, namelen, M_AUTOFS);
        else
                find.an_name = strdup(name, M_AUTOFS);

        anp = RB_FIND(autofs_node_tree, &parent->an_children, &find);
        if (anp != NULL) {
                error = 0;
                if (anpp != NULL)
                        *anpp = anp;
        } else {
                error = ENOENT;
        }

        free(find.an_name, M_AUTOFS);

        return (error);
}

void
autofs_node_delete(struct autofs_node *anp)
{
        struct autofs_node *parent;

        AUTOFS_ASSERT_XLOCKED(anp->an_mount);
        KASSERT(RB_EMPTY(&anp->an_children), ("have children"));

        callout_drain(&anp->an_callout);

        parent = anp->an_parent;
        if (parent != NULL)
                RB_REMOVE(autofs_node_tree, &parent->an_children, anp);
        sx_destroy(&anp->an_vnode_lock);
        free(anp->an_name, M_AUTOFS);
        uma_zfree(autofs_node_zone, anp);
}

int
autofs_node_vn(struct autofs_node *anp, struct mount *mp, int flags,
    struct vnode **vpp)
{
        struct vnode *vp;
        int error;

        AUTOFS_ASSERT_UNLOCKED(anp->an_mount);

        sx_xlock(&anp->an_vnode_lock);

        vp = anp->an_vnode;
        if (vp != NULL) {
                error = vget(vp, flags | LK_RETRY);
                if (error != 0) {
                        AUTOFS_WARN("vget failed with error %d", error);
                        sx_xunlock(&anp->an_vnode_lock);
                        return (error);
                }
                if (VN_IS_DOOMED(vp)) {
                        /*
                         * We got forcibly unmounted.
                         */
                        AUTOFS_DEBUG("doomed vnode");
                        sx_xunlock(&anp->an_vnode_lock);
                        vput(vp);

                        return (ENOENT);
                }

                *vpp = vp;
                sx_xunlock(&anp->an_vnode_lock);
                return (0);
        }

        error = getnewvnode("autofs", mp, &autofs_vnodeops, &vp);
        if (error != 0) {
                sx_xunlock(&anp->an_vnode_lock);
                return (error);
        }

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);

        vp->v_type = VDIR;
        if (anp->an_parent == NULL)
                vp->v_vflag |= VV_ROOT;
        vp->v_data = anp;

        VN_LOCK_ASHARE(vp);

        error = insmntque(vp, mp);
        if (error != 0) {
                AUTOFS_DEBUG("insmntque() failed with error %d", error);
                sx_xunlock(&anp->an_vnode_lock);
                return (error);
        }

        KASSERT(anp->an_vnode == NULL, ("lost race"));
        anp->an_vnode = vp;

        sx_xunlock(&anp->an_vnode_lock);

        vn_set_state(vp, VSTATE_CONSTRUCTED);
        *vpp = vp;
        return (0);
}