root/fs/afs/super.c
/* AFS superblock handling
 *
 * Copyright (c) 2002, 2007, 2018 Red Hat, Inc. All rights reserved.
 *
 * This software may be freely redistributed under the terms of the
 * GNU General Public License.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * Authors: David Howells <dhowells@redhat.com>
 *          David Woodhouse <dwmw2@infradead.org>
 *
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/fs_parser.h>
#include <linux/statfs.h>
#include <linux/sched.h>
#include <linux/nsproxy.h>
#include <linux/magic.h>
#include <net/net_namespace.h>
#include "internal.h"

static void afs_i_init_once(void *foo);
static void afs_kill_super(struct super_block *sb);
static struct inode *afs_alloc_inode(struct super_block *sb);
static void afs_destroy_inode(struct inode *inode);
static void afs_free_inode(struct inode *inode);
static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
static int afs_show_devname(struct seq_file *m, struct dentry *root);
static int afs_show_options(struct seq_file *m, struct dentry *root);
static int afs_init_fs_context(struct fs_context *fc);
static const struct fs_parameter_spec afs_fs_parameters[];

struct file_system_type afs_fs_type = {
        .owner                  = THIS_MODULE,
        .name                   = "afs",
        .init_fs_context        = afs_init_fs_context,
        .parameters             = afs_fs_parameters,
        .kill_sb                = afs_kill_super,
        .fs_flags               = FS_RENAME_DOES_D_MOVE,
};
MODULE_ALIAS_FS("afs");

int afs_net_id;

static const struct super_operations afs_super_ops = {
        .statfs         = afs_statfs,
        .alloc_inode    = afs_alloc_inode,
        .write_inode    = netfs_unpin_writeback,
        .drop_inode     = afs_drop_inode,
        .destroy_inode  = afs_destroy_inode,
        .free_inode     = afs_free_inode,
        .evict_inode    = afs_evict_inode,
        .show_devname   = afs_show_devname,
        .show_options   = afs_show_options,
};

static struct kmem_cache *afs_inode_cachep;
static atomic_t afs_count_active_inodes;

enum afs_param {
        Opt_autocell,
        Opt_dyn,
        Opt_flock,
        Opt_source,
};

static const struct constant_table afs_param_flock[] = {
        {"local",       afs_flock_mode_local },
        {"openafs",     afs_flock_mode_openafs },
        {"strict",      afs_flock_mode_strict },
        {"write",       afs_flock_mode_write },
        {}
};

static const struct fs_parameter_spec afs_fs_parameters[] = {
        fsparam_flag  ("autocell",      Opt_autocell),
        fsparam_flag  ("dyn",           Opt_dyn),
        fsparam_enum  ("flock",         Opt_flock, afs_param_flock),
        fsparam_string("source",        Opt_source),
        {}
};

/*
 * initialise the filesystem
 */
int __init afs_fs_init(void)
{
        int ret;

        _enter("");

        /* create ourselves an inode cache */
        atomic_set(&afs_count_active_inodes, 0);

        ret = -ENOMEM;
        afs_inode_cachep = kmem_cache_create("afs_inode_cache",
                                             sizeof(struct afs_vnode),
                                             0,
                                             SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT,
                                             afs_i_init_once);
        if (!afs_inode_cachep) {
                printk(KERN_NOTICE "kAFS: Failed to allocate inode cache\n");
                return ret;
        }

        /* now export our filesystem to lesser mortals */
        ret = register_filesystem(&afs_fs_type);
        if (ret < 0) {
                kmem_cache_destroy(afs_inode_cachep);
                _leave(" = %d", ret);
                return ret;
        }

        _leave(" = 0");
        return 0;
}

/*
 * clean up the filesystem
 */
void afs_fs_exit(void)
{
        _enter("");

        afs_mntpt_kill_timer();
        unregister_filesystem(&afs_fs_type);

        if (atomic_read(&afs_count_active_inodes) != 0) {
                printk("kAFS: %d active inode objects still present\n",
                       atomic_read(&afs_count_active_inodes));
                BUG();
        }

        /*
         * Make sure all delayed rcu free inodes are flushed before we
         * destroy cache.
         */
        rcu_barrier();
        kmem_cache_destroy(afs_inode_cachep);
        _leave("");
}

/*
 * Display the mount device name in /proc/mounts.
 */
static int afs_show_devname(struct seq_file *m, struct dentry *root)
{
        struct afs_super_info *as = AFS_FS_S(root->d_sb);
        struct afs_volume *volume = as->volume;
        struct afs_cell *cell = as->cell;
        const char *suf = "";
        char pref = '%';

        if (as->dyn_root) {
                seq_puts(m, "none");
                return 0;
        }

        switch (volume->type) {
        case AFSVL_RWVOL:
                break;
        case AFSVL_ROVOL:
                pref = '#';
                if (volume->type_force)
                        suf = ".readonly";
                break;
        case AFSVL_BACKVOL:
                pref = '#';
                suf = ".backup";
                break;
        }

        seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->name, suf);
        return 0;
}

/*
 * Display the mount options in /proc/mounts.
 */
static int afs_show_options(struct seq_file *m, struct dentry *root)
{
        struct afs_super_info *as = AFS_FS_S(root->d_sb);
        const char *p = NULL;

        if (as->dyn_root)
                seq_puts(m, ",dyn");
        switch (as->flock_mode) {
        case afs_flock_mode_unset:      break;
        case afs_flock_mode_local:      p = "local";    break;
        case afs_flock_mode_openafs:    p = "openafs";  break;
        case afs_flock_mode_strict:     p = "strict";   break;
        case afs_flock_mode_write:      p = "write";    break;
        }
        if (p)
                seq_printf(m, ",flock=%s", p);

        return 0;
}

/*
 * Parse the source name to get cell name, volume name, volume type and R/W
 * selector.
 *
 * This can be one of the following:
 *      "%[cell:]volume[.]"             R/W volume
 *      "#[cell:]volume[.]"             R/O or R/W volume (R/O parent),
 *                                       or R/W (R/W parent) volume
 *      "%[cell:]volume.readonly"       R/O volume
 *      "#[cell:]volume.readonly"       R/O volume
 *      "%[cell:]volume.backup"         Backup volume
 *      "#[cell:]volume.backup"         Backup volume
 */
static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param)
{
        struct afs_fs_context *ctx = fc->fs_private;
        struct afs_cell *cell;
        const char *cellname, *suffix, *name = param->string;
        int cellnamesz;

        _enter(",%s", name);

        if (fc->source)
                return invalf(fc, "kAFS: Multiple sources not supported");

        if (!name) {
                printk(KERN_ERR "kAFS: no volume name specified\n");
                return -EINVAL;
        }

        if ((name[0] != '%' && name[0] != '#') || !name[1]) {
                /* To use dynroot, we don't want to have to provide a source */
                if (strcmp(name, "none") == 0) {
                        ctx->no_cell = true;
                        return 0;
                }
                printk(KERN_ERR "kAFS: unparsable volume name\n");
                return -EINVAL;
        }

        /* determine the type of volume we're looking for */
        if (name[0] == '%') {
                ctx->type = AFSVL_RWVOL;
                ctx->force = true;
        }
        name++;

        /* split the cell name out if there is one */
        ctx->volname = strchr(name, ':');
        if (ctx->volname) {
                cellname = name;
                cellnamesz = ctx->volname - name;
                ctx->volname++;
        } else {
                ctx->volname = name;
                cellname = NULL;
                cellnamesz = 0;
        }

        /* the volume type is further affected by a possible suffix */
        suffix = strrchr(ctx->volname, '.');
        if (suffix) {
                if (strcmp(suffix, ".readonly") == 0) {
                        ctx->type = AFSVL_ROVOL;
                        ctx->force = true;
                } else if (strcmp(suffix, ".backup") == 0) {
                        ctx->type = AFSVL_BACKVOL;
                        ctx->force = true;
                } else if (suffix[1] == 0) {
                } else {
                        suffix = NULL;
                }
        }

        ctx->volnamesz = suffix ?
                suffix - ctx->volname : strlen(ctx->volname);

        _debug("cell %*.*s [%p]",
               cellnamesz, cellnamesz, cellname ?: "", ctx->cell);

        /* lookup the cell record */
        if (cellname) {
                cell = afs_lookup_cell(ctx->net, cellname, cellnamesz,
                                       NULL, AFS_LOOKUP_CELL_DIRECT_MOUNT,
                                       afs_cell_trace_use_lookup_mount);
                if (IS_ERR(cell)) {
                        pr_err("kAFS: unable to lookup cell '%*.*s'\n",
                               cellnamesz, cellnamesz, cellname ?: "");
                        return PTR_ERR(cell);
                }
                afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_parse);
                afs_see_cell(cell, afs_cell_trace_see_source);
                ctx->cell = cell;
        }

        _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
               ctx->cell->name, ctx->cell,
               ctx->volnamesz, ctx->volnamesz, ctx->volname,
               suffix ?: "-", ctx->type, ctx->force ? " FORCE" : "");

        fc->source = param->string;
        param->string = NULL;
        return 0;
}

/*
 * Parse a single mount parameter.
 */
static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
        struct fs_parse_result result;
        struct afs_fs_context *ctx = fc->fs_private;
        int opt;

        opt = fs_parse(fc, afs_fs_parameters, param, &result);
        if (opt < 0)
                return opt;

        switch (opt) {
        case Opt_source:
                return afs_parse_source(fc, param);

        case Opt_autocell:
                ctx->autocell = true;
                break;

        case Opt_dyn:
                ctx->dyn_root = true;
                break;

        case Opt_flock:
                ctx->flock_mode = result.uint_32;
                break;

        default:
                return -EINVAL;
        }

        _leave(" = 0");
        return 0;
}

/*
 * Validate the options, get the cell key and look up the volume.
 */
static int afs_validate_fc(struct fs_context *fc)
{
        struct afs_fs_context *ctx = fc->fs_private;
        struct afs_volume *volume;
        struct afs_cell *cell;
        struct key *key;
        int ret;

        if (!ctx->dyn_root) {
                if (ctx->no_cell) {
                        pr_warn("kAFS: Can only specify source 'none' with -o dyn\n");
                        return -EINVAL;
                }

                if (!ctx->cell) {
                        pr_warn("kAFS: No cell specified\n");
                        return -EDESTADDRREQ;
                }

        reget_key:
                /* We try to do the mount securely. */
                key = afs_request_key(ctx->cell);
                if (IS_ERR(key))
                        return PTR_ERR(key);

                ctx->key = key;

                if (ctx->volume) {
                        afs_put_volume(ctx->volume, afs_volume_trace_put_validate_fc);
                        ctx->volume = NULL;
                }

                if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &ctx->cell->flags)) {
                        ret = afs_cell_detect_alias(ctx->cell, key);
                        if (ret < 0)
                                return ret;
                        if (ret == 1) {
                                _debug("switch to alias");
                                key_put(ctx->key);
                                ctx->key = NULL;
                                cell = afs_use_cell(ctx->cell->alias_of,
                                                    afs_cell_trace_use_fc_alias);
                                afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_fc);
                                ctx->cell = cell;
                                goto reget_key;
                        }
                }

                volume = afs_create_volume(ctx);
                if (IS_ERR(volume))
                        return PTR_ERR(volume);

                ctx->volume = volume;
                if (volume->type != AFSVL_RWVOL) {
                        ctx->flock_mode = afs_flock_mode_local;
                        fc->sb_flags |= SB_RDONLY;
                }
        }

        return 0;
}

/*
 * check a superblock to see if it's the one we're looking for
 */
static int afs_test_super(struct super_block *sb, struct fs_context *fc)
{
        struct afs_fs_context *ctx = fc->fs_private;
        struct afs_super_info *as = AFS_FS_S(sb);

        return (as->net_ns == fc->net_ns &&
                as->volume &&
                as->volume->vid == ctx->volume->vid &&
                as->cell == ctx->cell &&
                !as->dyn_root);
}

static int afs_dynroot_test_super(struct super_block *sb, struct fs_context *fc)
{
        struct afs_super_info *as = AFS_FS_S(sb);

        return (as->net_ns == fc->net_ns &&
                as->dyn_root);
}

static int afs_set_super(struct super_block *sb, struct fs_context *fc)
{
        return set_anon_super(sb, NULL);
}

/*
 * fill in the superblock
 */
static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
{
        struct afs_super_info *as = AFS_FS_S(sb);
        struct inode *inode = NULL;
        int ret;

        _enter("");

        /* fill in the superblock */
        sb->s_blocksize         = PAGE_SIZE;
        sb->s_blocksize_bits    = PAGE_SHIFT;
        sb->s_maxbytes          = MAX_LFS_FILESIZE;
        sb->s_magic             = AFS_FS_MAGIC;
        sb->s_op                = &afs_super_ops;
        if (!as->dyn_root)
                sb->s_xattr     = afs_xattr_handlers;
        ret = super_setup_bdi(sb);
        if (ret)
                return ret;

        /* allocate the root inode and dentry */
        if (as->dyn_root) {
                inode = afs_dynroot_iget_root(sb);
        } else {
                sprintf(sb->s_id, "%llu", as->volume->vid);
                afs_activate_volume(as->volume);
                inode = afs_root_iget(sb, ctx->key);
        }

        if (IS_ERR(inode))
                return PTR_ERR(inode);

        ret = -ENOMEM;
        sb->s_root = d_make_root(inode);
        if (!sb->s_root)
                goto error;

        if (as->dyn_root) {
                set_default_d_op(sb, &afs_dynroot_dentry_operations);
        } else {
                set_default_d_op(sb, &afs_fs_dentry_operations);
                rcu_assign_pointer(as->volume->sb, sb);
        }

        _leave(" = 0");
        return 0;

error:
        _leave(" = %d", ret);
        return ret;
}

static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc)
{
        struct afs_fs_context *ctx = fc->fs_private;
        struct afs_super_info *as;

        as = kzalloc_obj(struct afs_super_info);
        if (as) {
                as->net_ns = get_net(fc->net_ns);
                as->flock_mode = ctx->flock_mode;
                if (ctx->dyn_root) {
                        as->dyn_root = true;
                } else {
                        as->cell = afs_use_cell(ctx->cell, afs_cell_trace_use_sbi);
                        as->volume = afs_get_volume(ctx->volume,
                                                    afs_volume_trace_get_alloc_sbi);
                }
        }
        return as;
}

static void afs_destroy_sbi(struct afs_super_info *as)
{
        if (as) {
                afs_put_volume(as->volume, afs_volume_trace_put_destroy_sbi);
                afs_unuse_cell(as->cell, afs_cell_trace_unuse_sbi);
                put_net(as->net_ns);
                kfree(as);
        }
}

static void afs_kill_super(struct super_block *sb)
{
        struct afs_super_info *as = AFS_FS_S(sb);

        /* Clear the callback interests (which will do ilookup5) before
         * deactivating the superblock.
         */
        if (as->volume)
                rcu_assign_pointer(as->volume->sb, NULL);
        kill_anon_super(sb);
        if (as->volume)
                afs_deactivate_volume(as->volume);
        afs_destroy_sbi(as);
}

/*
 * Get an AFS superblock and root directory.
 */
static int afs_get_tree(struct fs_context *fc)
{
        struct afs_fs_context *ctx = fc->fs_private;
        struct super_block *sb;
        struct afs_super_info *as;
        int ret;

        ret = afs_validate_fc(fc);
        if (ret)
                goto error;

        _enter("");

        /* allocate a superblock info record */
        ret = -ENOMEM;
        as = afs_alloc_sbi(fc);
        if (!as)
                goto error;
        fc->s_fs_info = as;

        /* allocate a deviceless superblock */
        sb = sget_fc(fc,
                     as->dyn_root ? afs_dynroot_test_super : afs_test_super,
                     afs_set_super);
        if (IS_ERR(sb)) {
                ret = PTR_ERR(sb);
                goto error;
        }

        if (!sb->s_root) {
                /* initial superblock/root creation */
                _debug("create");
                ret = afs_fill_super(sb, ctx);
                if (ret < 0)
                        goto error_sb;
                sb->s_flags |= SB_ACTIVE;
        } else {
                _debug("reuse");
                ASSERTCMP(sb->s_flags, &, SB_ACTIVE);
        }

        fc->root = dget(sb->s_root);
        trace_afs_get_tree(as->cell, as->volume);
        _leave(" = 0 [%p]", sb);
        return 0;

error_sb:
        deactivate_locked_super(sb);
error:
        _leave(" = %d", ret);
        return ret;
}

static void afs_free_fc(struct fs_context *fc)
{
        struct afs_fs_context *ctx = fc->fs_private;

        afs_destroy_sbi(fc->s_fs_info);
        afs_put_volume(ctx->volume, afs_volume_trace_put_free_fc);
        afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_fc);
        key_put(ctx->key);
        kfree(ctx);
}

static const struct fs_context_operations afs_context_ops = {
        .free           = afs_free_fc,
        .parse_param    = afs_parse_param,
        .get_tree       = afs_get_tree,
};

/*
 * Set up the filesystem mount context.
 */
static int afs_init_fs_context(struct fs_context *fc)
{
        struct afs_fs_context *ctx;
        struct afs_cell *cell;

        ctx = kzalloc_obj(struct afs_fs_context);
        if (!ctx)
                return -ENOMEM;

        ctx->type = AFSVL_ROVOL;
        ctx->net = afs_net(fc->net_ns);

        /* Default to the workstation cell. */
        cell = afs_find_cell(ctx->net, NULL, 0, afs_cell_trace_use_fc);
        if (IS_ERR(cell))
                cell = NULL;
        ctx->cell = cell;

        fc->fs_private = ctx;
        fc->ops = &afs_context_ops;
        return 0;
}

/*
 * Initialise an inode cache slab element prior to any use.  Note that
 * afs_alloc_inode() *must* reset anything that could incorrectly leak from one
 * inode to another.
 */
static void afs_i_init_once(void *_vnode)
{
        struct afs_vnode *vnode = _vnode;

        memset(vnode, 0, sizeof(*vnode));
        inode_init_once(&vnode->netfs.inode);
        INIT_LIST_HEAD(&vnode->io_lock_waiters);
        init_rwsem(&vnode->validate_lock);
        spin_lock_init(&vnode->wb_lock);
        spin_lock_init(&vnode->lock);
        INIT_LIST_HEAD(&vnode->wb_keys);
        INIT_LIST_HEAD(&vnode->pending_locks);
        INIT_LIST_HEAD(&vnode->granted_locks);
        INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work);
        INIT_LIST_HEAD(&vnode->cb_mmap_link);
        seqlock_init(&vnode->cb_lock);
}

/*
 * allocate an AFS inode struct from our slab cache
 */
static struct inode *afs_alloc_inode(struct super_block *sb)
{
        struct afs_vnode *vnode;

        vnode = alloc_inode_sb(sb, afs_inode_cachep, GFP_KERNEL);
        if (!vnode)
                return NULL;

        atomic_inc(&afs_count_active_inodes);

        /* Reset anything that shouldn't leak from one inode to the next. */
        memset(&vnode->fid, 0, sizeof(vnode->fid));
        memset(&vnode->status, 0, sizeof(vnode->status));
        afs_vnode_set_cache(vnode, NULL);

        vnode->volume           = NULL;
        vnode->lock_key         = NULL;
        vnode->permit_cache     = NULL;
        vnode->directory        = NULL;
        vnode->directory_size   = 0;

        vnode->flags            = 1 << AFS_VNODE_UNSET;
        vnode->lock_state       = AFS_VNODE_LOCK_NONE;

        init_rwsem(&vnode->rmdir_lock);
        INIT_WORK(&vnode->cb_work, afs_invalidate_mmap_work);

        _leave(" = %p", &vnode->netfs.inode);
        return &vnode->netfs.inode;
}

static void afs_free_inode(struct inode *inode)
{
        kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode));
}

/*
 * destroy an AFS inode struct
 */
static void afs_destroy_inode(struct inode *inode)
{
        struct afs_vnode *vnode = AFS_FS_I(inode);

        _enter("%p{%llx:%llu}", inode, vnode->fid.vid, vnode->fid.vnode);

        _debug("DESTROY INODE %p", inode);

        atomic_dec(&afs_count_active_inodes);
}

static void afs_get_volume_status_success(struct afs_operation *op)
{
        struct afs_volume_status *vs = &op->volstatus.vs;
        struct kstatfs *buf = op->volstatus.buf;

        if (vs->max_quota == 0)
                buf->f_blocks = vs->part_max_blocks;
        else
                buf->f_blocks = vs->max_quota;

        if (buf->f_blocks > vs->blocks_in_use)
                buf->f_bavail = buf->f_bfree =
                        buf->f_blocks - vs->blocks_in_use;
}

static const struct afs_operation_ops afs_get_volume_status_operation = {
        .issue_afs_rpc  = afs_fs_get_volume_status,
        .issue_yfs_rpc  = yfs_fs_get_volume_status,
        .success        = afs_get_volume_status_success,
};

/*
 * return information about an AFS volume
 */
static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
        struct afs_super_info *as = AFS_FS_S(dentry->d_sb);
        struct afs_operation *op;
        struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));

        buf->f_type     = dentry->d_sb->s_magic;
        buf->f_bsize    = AFS_BLOCK_SIZE;
        buf->f_namelen  = AFSNAMEMAX - 1;

        if (as->dyn_root) {
                buf->f_blocks   = 1;
                buf->f_bavail   = 0;
                buf->f_bfree    = 0;
                return 0;
        }

        op = afs_alloc_operation(NULL, as->volume);
        if (IS_ERR(op))
                return PTR_ERR(op);

        afs_op_set_vnode(op, 0, vnode);
        op->nr_files            = 1;
        op->volstatus.buf       = buf;
        op->ops                 = &afs_get_volume_status_operation;
        return afs_do_sync_operation(op);
}