root/fs/fsopen.c
// SPDX-License-Identifier: GPL-2.0-or-later
/* Filesystem access-by-fd.
 *
 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */

#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/syscalls.h>
#include <linux/security.h>
#include <linux/anon_inodes.h>
#include <linux/namei.h>
#include <linux/file.h>
#include <uapi/linux/mount.h>
#include "internal.h"
#include "mount.h"

static inline const char *fetch_message_locked(struct fc_log *log, size_t len,
                                               bool *need_free)
{
        const char *p;
        int index;

        if (unlikely(log->head == log->tail))
                return ERR_PTR(-ENODATA);

        index = log->tail & (ARRAY_SIZE(log->buffer) - 1);
        p = log->buffer[index];
        if (unlikely(strlen(p) > len))
                return ERR_PTR(-EMSGSIZE);

        log->buffer[index] = NULL;
        *need_free = log->need_free & (1 << index);
        log->need_free &= ~(1 << index);
        log->tail++;

        return p;
}

/*
 * Allow the user to read back any error, warning or informational messages.
 * Only one message is returned for each read(2) call.
 */
static ssize_t fscontext_read(struct file *file,
                              char __user *_buf, size_t len, loff_t *pos)
{
        struct fs_context *fc = file->private_data;
        ssize_t err;
        const char *p __free(kfree) = NULL, *message;
        bool need_free;
        int n;

        err = mutex_lock_interruptible(&fc->uapi_mutex);
        if (err < 0)
                return err;
        message = fetch_message_locked(fc->log.log, len, &need_free);
        mutex_unlock(&fc->uapi_mutex);
        if (IS_ERR(message))
                return PTR_ERR(message);

        if (need_free)
                p = message;

        n = strlen(message);
        if (copy_to_user(_buf, message, n))
                return -EFAULT;
        return n;
}

static int fscontext_release(struct inode *inode, struct file *file)
{
        struct fs_context *fc = file->private_data;

        if (fc) {
                file->private_data = NULL;
                put_fs_context(fc);
        }
        return 0;
}

const struct file_operations fscontext_fops = {
        .read           = fscontext_read,
        .release        = fscontext_release,
};

/*
 * Attach a filesystem context to a file and an fd.
 */
static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags)
{
        int fd;

        fd = anon_inode_getfd("[fscontext]", &fscontext_fops, fc,
                              O_RDWR | o_flags);
        if (fd < 0)
                put_fs_context(fc);
        return fd;
}

static int fscontext_alloc_log(struct fs_context *fc)
{
        fc->log.log = kzalloc_obj(*fc->log.log);
        if (!fc->log.log)
                return -ENOMEM;
        refcount_set(&fc->log.log->usage, 1);
        fc->log.log->owner = fc->fs_type->owner;
        return 0;
}

/*
 * Open a filesystem by name so that it can be configured for mounting.
 *
 * We are allowed to specify a container in which the filesystem will be
 * opened, thereby indicating which namespaces will be used (notably, which
 * network namespace will be used for network filesystems).
 */
SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
{
        struct file_system_type *fs_type;
        struct fs_context *fc;
        const char *fs_name;
        int ret;

        if (!may_mount())
                return -EPERM;

        if (flags & ~FSOPEN_CLOEXEC)
                return -EINVAL;

        fs_name = strndup_user(_fs_name, PAGE_SIZE);
        if (IS_ERR(fs_name))
                return PTR_ERR(fs_name);

        fs_type = get_fs_type(fs_name);
        kfree(fs_name);
        if (!fs_type)
                return -ENODEV;

        fc = fs_context_for_mount(fs_type, 0);
        put_filesystem(fs_type);
        if (IS_ERR(fc))
                return PTR_ERR(fc);

        fc->phase = FS_CONTEXT_CREATE_PARAMS;

        ret = fscontext_alloc_log(fc);
        if (ret < 0)
                goto err_fc;

        return fscontext_create_fd(fc, flags & FSOPEN_CLOEXEC ? O_CLOEXEC : 0);

err_fc:
        put_fs_context(fc);
        return ret;
}

/*
 * Pick a superblock into a context for reconfiguration.
 */
SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags)
{
        struct fs_context *fc;
        struct path target;
        unsigned int lookup_flags;
        int ret;

        if (!may_mount())
                return -EPERM;

        if ((flags & ~(FSPICK_CLOEXEC |
                       FSPICK_SYMLINK_NOFOLLOW |
                       FSPICK_NO_AUTOMOUNT |
                       FSPICK_EMPTY_PATH)) != 0)
                return -EINVAL;

        lookup_flags = LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT;
        if (flags & FSPICK_SYMLINK_NOFOLLOW)
                lookup_flags &= ~LOOKUP_FOLLOW;
        if (flags & FSPICK_NO_AUTOMOUNT)
                lookup_flags &= ~LOOKUP_AUTOMOUNT;
        CLASS(filename_flags, filename)(path,
                         (flags & FSPICK_EMPTY_PATH) ? LOOKUP_EMPTY : 0);
        ret = filename_lookup(dfd, filename, lookup_flags, &target, NULL);
        if (ret < 0)
                goto err;

        ret = -EINVAL;
        if (target.mnt->mnt_root != target.dentry)
                goto err_path;

        fc = fs_context_for_reconfigure(target.dentry, 0, 0);
        if (IS_ERR(fc)) {
                ret = PTR_ERR(fc);
                goto err_path;
        }

        fc->phase = FS_CONTEXT_RECONF_PARAMS;

        ret = fscontext_alloc_log(fc);
        if (ret < 0)
                goto err_fc;

        path_put(&target);
        return fscontext_create_fd(fc, flags & FSPICK_CLOEXEC ? O_CLOEXEC : 0);

err_fc:
        put_fs_context(fc);
err_path:
        path_put(&target);
err:
        return ret;
}

static int vfs_cmd_create(struct fs_context *fc, bool exclusive)
{
        struct super_block *sb;
        int ret;

        if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
                return -EBUSY;

        if (!mount_capable(fc))
                return -EPERM;

        fc->phase = FS_CONTEXT_CREATING;
        fc->exclusive = exclusive;

        ret = vfs_get_tree(fc);
        if (ret) {
                fc->phase = FS_CONTEXT_FAILED;
                return ret;
        }

        sb = fc->root->d_sb;
        ret = security_sb_kern_mount(sb);
        if (unlikely(ret)) {
                fc_drop_locked(fc);
                fc->phase = FS_CONTEXT_FAILED;
                return ret;
        }

        /* vfs_get_tree() callchains will have grabbed @s_umount */
        up_write(&sb->s_umount);
        fc->phase = FS_CONTEXT_AWAITING_MOUNT;
        return 0;
}

static int vfs_cmd_reconfigure(struct fs_context *fc)
{
        struct super_block *sb;
        int ret;

        if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
                return -EBUSY;

        fc->phase = FS_CONTEXT_RECONFIGURING;

        sb = fc->root->d_sb;
        if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
                fc->phase = FS_CONTEXT_FAILED;
                return -EPERM;
        }

        down_write(&sb->s_umount);
        ret = reconfigure_super(fc);
        up_write(&sb->s_umount);
        if (ret) {
                fc->phase = FS_CONTEXT_FAILED;
                return ret;
        }

        vfs_clean_context(fc);
        return 0;
}

/*
 * Check the state and apply the configuration.  Note that this function is
 * allowed to 'steal' the value by setting param->xxx to NULL before returning.
 */
static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
                               struct fs_parameter *param)
{
        int ret;

        ret = finish_clean_context(fc);
        if (ret)
                return ret;
        switch (cmd) {
        case FSCONFIG_CMD_CREATE:
                return vfs_cmd_create(fc, false);
        case FSCONFIG_CMD_CREATE_EXCL:
                return vfs_cmd_create(fc, true);
        case FSCONFIG_CMD_RECONFIGURE:
                return vfs_cmd_reconfigure(fc);
        default:
                if (fc->phase != FS_CONTEXT_CREATE_PARAMS &&
                    fc->phase != FS_CONTEXT_RECONF_PARAMS)
                        return -EBUSY;

                return vfs_parse_fs_param(fc, param);
        }
}

/**
 * sys_fsconfig - Set parameters and trigger actions on a context
 * @fd: The filesystem context to act upon
 * @cmd: The action to take
 * @_key: Where appropriate, the parameter key to set
 * @_value: Where appropriate, the parameter value to set
 * @aux: Additional information for the value
 *
 * This system call is used to set parameters on a context, including
 * superblock settings, data source and security labelling.
 *
 * Actions include triggering the creation of a superblock and the
 * reconfiguration of the superblock attached to the specified context.
 *
 * When setting a parameter, @cmd indicates the type of value being proposed
 * and @_key indicates the parameter to be altered.
 *
 * @_value and @aux are used to specify the value, should a value be required:
 *
 * (*) fsconfig_set_flag: No value is specified.  The parameter must be boolean
 *     in nature.  The key may be prefixed with "no" to invert the
 *     setting. @_value must be NULL and @aux must be 0.
 *
 * (*) fsconfig_set_string: A string value is specified.  The parameter can be
 *     expecting boolean, integer, string or take a path.  A conversion to an
 *     appropriate type will be attempted (which may include looking up as a
 *     path).  @_value points to a NUL-terminated string and @aux must be 0.
 *
 * (*) fsconfig_set_binary: A binary blob is specified.  @_value points to the
 *     blob and @aux indicates its size.  The parameter must be expecting a
 *     blob.
 *
 * (*) fsconfig_set_path: A non-empty path is specified.  The parameter must be
 *     expecting a path object.  @_value points to a NUL-terminated string that
 *     is the path and @aux is a file descriptor at which to start a relative
 *     lookup or AT_FDCWD.
 *
 * (*) fsconfig_set_path_empty: As fsconfig_set_path, but with AT_EMPTY_PATH
 *     implied.
 *
 * (*) fsconfig_set_fd: An open file descriptor is specified.  @_value must be
 *     NULL and @aux indicates the file descriptor.
 */
SYSCALL_DEFINE5(fsconfig,
                int, fd,
                unsigned int, cmd,
                const char __user *, _key,
                const void __user *, _value,
                int, aux)
{
        struct fs_context *fc;
        int ret;
        int lookup_flags = 0;

        struct fs_parameter param = {
                .type   = fs_value_is_undefined,
        };

        if (fd < 0)
                return -EINVAL;

        switch (cmd) {
        case FSCONFIG_SET_FLAG:
                if (!_key || _value || aux)
                        return -EINVAL;
                break;
        case FSCONFIG_SET_STRING:
                if (!_key || !_value || aux)
                        return -EINVAL;
                break;
        case FSCONFIG_SET_BINARY:
                if (!_key || !_value || aux <= 0 || aux > 1024 * 1024)
                        return -EINVAL;
                break;
        case FSCONFIG_SET_PATH:
        case FSCONFIG_SET_PATH_EMPTY:
                if (!_key || !_value || (aux != AT_FDCWD && aux < 0))
                        return -EINVAL;
                break;
        case FSCONFIG_SET_FD:
                if (!_key || _value || aux < 0)
                        return -EINVAL;
                break;
        case FSCONFIG_CMD_CREATE:
        case FSCONFIG_CMD_CREATE_EXCL:
        case FSCONFIG_CMD_RECONFIGURE:
                if (_key || _value || aux)
                        return -EINVAL;
                break;
        default:
                return -EOPNOTSUPP;
        }

        CLASS(fd, f)(fd);
        if (fd_empty(f))
                return -EBADF;
        if (fd_file(f)->f_op != &fscontext_fops)
                return -EINVAL;

        fc = fd_file(f)->private_data;

        if (_key) {
                param.key = strndup_user(_key, 256);
                if (IS_ERR(param.key))
                        return PTR_ERR(param.key);
        }

        switch (cmd) {
        case FSCONFIG_SET_FLAG:
                param.type = fs_value_is_flag;
                break;
        case FSCONFIG_SET_STRING:
                param.type = fs_value_is_string;
                param.string = strndup_user(_value, 256);
                if (IS_ERR(param.string)) {
                        ret = PTR_ERR(param.string);
                        goto out_key;
                }
                param.size = strlen(param.string);
                break;
        case FSCONFIG_SET_BINARY:
                param.type = fs_value_is_blob;
                param.size = aux;
                param.blob = memdup_user_nul(_value, aux);
                if (IS_ERR(param.blob)) {
                        ret = PTR_ERR(param.blob);
                        goto out_key;
                }
                break;
        case FSCONFIG_SET_PATH_EMPTY:
                lookup_flags = LOOKUP_EMPTY;
                fallthrough;
        case FSCONFIG_SET_PATH:
                param.type = fs_value_is_filename;
                param.name = getname_flags(_value, lookup_flags);
                if (IS_ERR(param.name)) {
                        ret = PTR_ERR(param.name);
                        goto out_key;
                }
                param.dirfd = aux;
                param.size = strlen(param.name->name);
                break;
        case FSCONFIG_SET_FD:
                param.type = fs_value_is_file;
                ret = -EBADF;
                param.file = fget_raw(aux);
                if (!param.file)
                        goto out_key;
                param.dirfd = aux;
                break;
        default:
                break;
        }

        ret = mutex_lock_interruptible(&fc->uapi_mutex);
        if (ret == 0) {
                ret = vfs_fsconfig_locked(fc, cmd, &param);
                mutex_unlock(&fc->uapi_mutex);
        }

        /* Clean up the our record of any value that we obtained from
         * userspace.  Note that the value may have been stolen by the LSM or
         * filesystem, in which case the value pointer will have been cleared.
         */
        switch (cmd) {
        case FSCONFIG_SET_STRING:
        case FSCONFIG_SET_BINARY:
                kfree(param.string);
                break;
        case FSCONFIG_SET_PATH:
        case FSCONFIG_SET_PATH_EMPTY:
                if (param.name)
                        putname(param.name);
                break;
        case FSCONFIG_SET_FD:
                if (param.file)
                        fput(param.file);
                break;
        default:
                break;
        }
out_key:
        kfree(param.key);
        return ret;
}