root/fs/orangefs/orangefs-utils.c
// SPDX-License-Identifier: GPL-2.0
/*
 * (C) 2001 Clemson University and The University of Chicago
 * Copyright 2018 Omnibond Systems, L.L.C.
 *
 * See COPYING in top-level directory.
 */
#include <linux/kernel.h>
#include "protocol.h"
#include "orangefs-kernel.h"
#include "orangefs-dev-proto.h"
#include "orangefs-bufmap.h"

__s32 fsid_of_op(struct orangefs_kernel_op_s *op)
{
        __s32 fsid = ORANGEFS_FS_ID_NULL;

        if (op) {
                switch (op->upcall.type) {
                case ORANGEFS_VFS_OP_FILE_IO:
                        fsid = op->upcall.req.io.refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_LOOKUP:
                        fsid = op->upcall.req.lookup.parent_refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_CREATE:
                        fsid = op->upcall.req.create.parent_refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_GETATTR:
                        fsid = op->upcall.req.getattr.refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_REMOVE:
                        fsid = op->upcall.req.remove.parent_refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_MKDIR:
                        fsid = op->upcall.req.mkdir.parent_refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_READDIR:
                        fsid = op->upcall.req.readdir.refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_SETATTR:
                        fsid = op->upcall.req.setattr.refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_SYMLINK:
                        fsid = op->upcall.req.sym.parent_refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_RENAME:
                        fsid = op->upcall.req.rename.old_parent_refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_STATFS:
                        fsid = op->upcall.req.statfs.fs_id;
                        break;
                case ORANGEFS_VFS_OP_TRUNCATE:
                        fsid = op->upcall.req.truncate.refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_RA_FLUSH:
                        fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_FS_UMOUNT:
                        fsid = op->upcall.req.fs_umount.fs_id;
                        break;
                case ORANGEFS_VFS_OP_GETXATTR:
                        fsid = op->upcall.req.getxattr.refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_SETXATTR:
                        fsid = op->upcall.req.setxattr.refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_LISTXATTR:
                        fsid = op->upcall.req.listxattr.refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_REMOVEXATTR:
                        fsid = op->upcall.req.removexattr.refn.fs_id;
                        break;
                case ORANGEFS_VFS_OP_FSYNC:
                        fsid = op->upcall.req.fsync.refn.fs_id;
                        break;
                default:
                        break;
                }
        }
        return fsid;
}

static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
{
        int flags = 0;
        if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
                flags |= S_IMMUTABLE;
        else
                flags &= ~S_IMMUTABLE;
        if (attrs->flags & ORANGEFS_APPEND_FL)
                flags |= S_APPEND;
        else
                flags &= ~S_APPEND;
        if (attrs->flags & ORANGEFS_NOATIME_FL)
                flags |= S_NOATIME;
        else
                flags &= ~S_NOATIME;
        return flags;
}

static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
{
        int perm_mode = 0;

        if (attrs->perms & ORANGEFS_O_EXECUTE)
                perm_mode |= S_IXOTH;
        if (attrs->perms & ORANGEFS_O_WRITE)
                perm_mode |= S_IWOTH;
        if (attrs->perms & ORANGEFS_O_READ)
                perm_mode |= S_IROTH;

        if (attrs->perms & ORANGEFS_G_EXECUTE)
                perm_mode |= S_IXGRP;
        if (attrs->perms & ORANGEFS_G_WRITE)
                perm_mode |= S_IWGRP;
        if (attrs->perms & ORANGEFS_G_READ)
                perm_mode |= S_IRGRP;

        if (attrs->perms & ORANGEFS_U_EXECUTE)
                perm_mode |= S_IXUSR;
        if (attrs->perms & ORANGEFS_U_WRITE)
                perm_mode |= S_IWUSR;
        if (attrs->perms & ORANGEFS_U_READ)
                perm_mode |= S_IRUSR;

        if (attrs->perms & ORANGEFS_G_SGID)
                perm_mode |= S_ISGID;
        if (attrs->perms & ORANGEFS_U_SUID)
                perm_mode |= S_ISUID;

        return perm_mode;
}

/*
 * NOTE: in kernel land, we never use the sys_attr->link_target for
 * anything, so don't bother copying it into the sys_attr object here.
 */
static inline void copy_attributes_from_inode(struct inode *inode,
    struct ORANGEFS_sys_attr_s *attrs)
{
        struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
        attrs->mask = 0;
        if (orangefs_inode->attr_valid & ATTR_UID) {
                attrs->owner = from_kuid(&init_user_ns, inode->i_uid);
                attrs->mask |= ORANGEFS_ATTR_SYS_UID;
                gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
        }
        if (orangefs_inode->attr_valid & ATTR_GID) {
                attrs->group = from_kgid(&init_user_ns, inode->i_gid);
                attrs->mask |= ORANGEFS_ATTR_SYS_GID;
                gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
        }

        if (orangefs_inode->attr_valid & ATTR_ATIME) {
                attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
                if (orangefs_inode->attr_valid & ATTR_ATIME_SET) {
                        attrs->atime = (time64_t) inode_get_atime_sec(inode);
                        attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
                }
        }
        if (orangefs_inode->attr_valid & ATTR_MTIME) {
                attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
                if (orangefs_inode->attr_valid & ATTR_MTIME_SET) {
                        attrs->mtime = (time64_t) inode_get_mtime_sec(inode);
                        attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
                }
        }
        if (orangefs_inode->attr_valid & ATTR_CTIME)
                attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;

        /*
         * ORANGEFS cannot set size with a setattr operation. Probably not
         * likely to be requested through the VFS, but just in case, don't
         * worry about ATTR_SIZE
         */

        if (orangefs_inode->attr_valid & ATTR_MODE) {
                attrs->perms = ORANGEFS_util_translate_mode(inode->i_mode);
                attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
        }
}

static int orangefs_inode_type(enum orangefs_ds_type objtype)
{
        if (objtype == ORANGEFS_TYPE_METAFILE)
                return S_IFREG;
        else if (objtype == ORANGEFS_TYPE_DIRECTORY)
                return S_IFDIR;
        else if (objtype == ORANGEFS_TYPE_SYMLINK)
                return S_IFLNK;
        else
                return -1;
}

static void orangefs_make_bad_inode(struct inode *inode)
{
        if (is_root_handle(inode)) {
                /*
                 * if this occurs, the pvfs2-client-core was killed but we
                 * can't afford to lose the inode operations and such
                 * associated with the root handle in any case.
                 */
                gossip_debug(GOSSIP_UTILS_DEBUG,
                             "*** NOT making bad root inode %pU\n",
                             get_khandle_from_ino(inode));
        } else {
                gossip_debug(GOSSIP_UTILS_DEBUG,
                             "*** making bad inode %pU\n",
                             get_khandle_from_ino(inode));
                make_bad_inode(inode);
        }
}

static int orangefs_inode_is_stale(struct inode *inode,
    struct ORANGEFS_sys_attr_s *attrs, char *link_target)
{
        struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
        int type = orangefs_inode_type(attrs->objtype);
        /*
         * If the inode type or symlink target have changed then this
         * inode is stale.
         */
        if (type == -1 || inode_wrong_type(inode, type)) {
                orangefs_make_bad_inode(inode);
                return 1;
        }
        if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
            link_target, ORANGEFS_NAME_MAX)) {
                orangefs_make_bad_inode(inode);
                return 1;
        }
        return 0;
}

int orangefs_inode_getattr(struct inode *inode, int flags)
{
        struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
        struct orangefs_kernel_op_s *new_op;
        loff_t inode_size;
        int ret, type;

        gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU flags %d\n",
            __func__, get_khandle_from_ino(inode), flags);

again:
        spin_lock(&inode->i_lock);
        /* Must have all the attributes in the mask and be within cache time. */
        if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) ||
            orangefs_inode->attr_valid || inode_state_read(inode) & I_DIRTY_PAGES) {
                if (orangefs_inode->attr_valid) {
                        spin_unlock(&inode->i_lock);
                        write_inode_now(inode, 1);
                        goto again;
                }
                spin_unlock(&inode->i_lock);
                return 0;
        }
        spin_unlock(&inode->i_lock);

        new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
        if (!new_op)
                return -ENOMEM;
        new_op->upcall.req.getattr.refn = orangefs_inode->refn;
        /*
         * Size is the hardest attribute to get.  The incremental cost of any
         * other attribute is essentially zero.
         */
        if (flags)
                new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
        else
                new_op->upcall.req.getattr.mask =
                    ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;

        ret = service_operation(new_op, __func__,
            get_interruptible_flag(inode));
        if (ret != 0)
                goto out;

again2:
        spin_lock(&inode->i_lock);
        /* Must have all the attributes in the mask and be within cache time. */
        if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) ||
            orangefs_inode->attr_valid || inode_state_read(inode) & I_DIRTY_PAGES) {
                if (orangefs_inode->attr_valid) {
                        spin_unlock(&inode->i_lock);
                        write_inode_now(inode, 1);
                        goto again2;
                }
                if (inode_state_read(inode) & I_DIRTY_PAGES) {
                        ret = 0;
                        goto out_unlock;
                }
                gossip_debug(GOSSIP_UTILS_DEBUG, "%s: in cache or dirty\n",
                    __func__);
                ret = 0;
                goto out_unlock;
        }

        if (!(flags & ORANGEFS_GETATTR_NEW)) {
                ret = orangefs_inode_is_stale(inode,
                    &new_op->downcall.resp.getattr.attributes,
                    new_op->downcall.resp.getattr.link_target);
                if (ret) {
                        ret = -ESTALE;
                        goto out_unlock;
                }
        }

        type = orangefs_inode_type(new_op->
            downcall.resp.getattr.attributes.objtype);
        switch (type) {
        case S_IFREG:
                inode->i_flags = orangefs_inode_flags(&new_op->
                    downcall.resp.getattr.attributes);
                if (flags) {
                        inode_size = (loff_t)new_op->
                            downcall.resp.getattr.attributes.size;
                        inode->i_size = inode_size;
                        inode->i_blkbits = ffs(new_op->downcall.resp.getattr.
                            attributes.blksize);
                        inode->i_bytes = inode_size;
                        inode->i_blocks =
                            (inode_size + 512 - inode_size % 512)/512;
                }
                break;
        case S_IFDIR:
                if (flags) {
                        inode->i_size = PAGE_SIZE;
                        inode_set_bytes(inode, inode->i_size);
                }
                set_nlink(inode, 1);
                break;
        case S_IFLNK:
                if (flags & ORANGEFS_GETATTR_NEW) {
                        inode->i_size = (loff_t)strlen(new_op->
                            downcall.resp.getattr.link_target);
                        ret = strscpy(orangefs_inode->link_target,
                            new_op->downcall.resp.getattr.link_target,
                            ORANGEFS_NAME_MAX);
                        if (ret == -E2BIG) {
                                ret = -EIO;
                                goto out_unlock;
                        }
                        inode->i_link = orangefs_inode->link_target;
                }
                break;
        /* i.e. -1 */
        default:
                /* XXX: ESTALE?  This is what is done if it is not new. */
                orangefs_make_bad_inode(inode);
                ret = -ESTALE;
                goto out_unlock;
        }

        inode->i_uid = make_kuid(&init_user_ns, new_op->
            downcall.resp.getattr.attributes.owner);
        inode->i_gid = make_kgid(&init_user_ns, new_op->
            downcall.resp.getattr.attributes.group);
        inode_set_atime(inode,
                        (time64_t)new_op->downcall.resp.getattr.attributes.atime,
                        0);
        inode_set_mtime(inode,
                        (time64_t)new_op->downcall.resp.getattr.attributes.mtime,
                        0);
        inode_set_ctime(inode,
                        (time64_t)new_op->downcall.resp.getattr.attributes.ctime,
                        0);

        /* special case: mark the root inode as sticky */
        inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
            orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);

        orangefs_inode->getattr_time = jiffies +
            orangefs_getattr_timeout_msecs*HZ/1000;
        ret = 0;
out_unlock:
        spin_unlock(&inode->i_lock);
out:
        op_release(new_op);
        return ret;
}

int orangefs_inode_check_changed(struct inode *inode)
{
        struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
        struct orangefs_kernel_op_s *new_op;
        int ret;

        gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
            get_khandle_from_ino(inode));

        new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
        if (!new_op)
                return -ENOMEM;
        new_op->upcall.req.getattr.refn = orangefs_inode->refn;
        new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
            ORANGEFS_ATTR_SYS_LNK_TARGET;

        ret = service_operation(new_op, __func__,
            get_interruptible_flag(inode));
        if (ret != 0)
                goto out;

        ret = orangefs_inode_is_stale(inode,
            &new_op->downcall.resp.getattr.attributes,
            new_op->downcall.resp.getattr.link_target);
out:
        op_release(new_op);
        return ret;
}

/*
 * issues a orangefs setattr request to make sure the new attribute values
 * take effect if successful.  returns 0 on success; -errno otherwise
 */
int orangefs_inode_setattr(struct inode *inode)
{
        struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
        struct orangefs_kernel_op_s *new_op;
        int ret;

        new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
        if (!new_op)
                return -ENOMEM;

        spin_lock(&inode->i_lock);
        new_op->upcall.uid = from_kuid(&init_user_ns, orangefs_inode->attr_uid);
        new_op->upcall.gid = from_kgid(&init_user_ns, orangefs_inode->attr_gid);
        new_op->upcall.req.setattr.refn = orangefs_inode->refn;
        copy_attributes_from_inode(inode,
            &new_op->upcall.req.setattr.attributes);
        orangefs_inode->attr_valid = 0;
        if (!new_op->upcall.req.setattr.attributes.mask) {
                spin_unlock(&inode->i_lock);
                op_release(new_op);
                return 0;
        }
        spin_unlock(&inode->i_lock);

        ret = service_operation(new_op, __func__,
            get_interruptible_flag(inode) | ORANGEFS_OP_WRITEBACK);
        gossip_debug(GOSSIP_UTILS_DEBUG,
            "orangefs_inode_setattr: returning %d\n", ret);
        if (ret)
                orangefs_make_bad_inode(inode);

        op_release(new_op);

        if (ret == 0)
                orangefs_inode->getattr_time = jiffies - 1;
        return ret;
}

/*
 * The following is a very dirty hack that is now a permanent part of the
 * ORANGEFS protocol. See protocol.h for more error definitions.
 */

/* The order matches include/orangefs-types.h in the OrangeFS source. */
static int PINT_errno_mapping[] = {
        0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
        EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
        EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
        ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
        EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
        EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
        ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
        EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
        ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
        EACCES, ECONNRESET, ERANGE
};

int orangefs_normalize_to_errno(__s32 error_code)
{
        __u32 i;

        /* Success */
        if (error_code == 0) {
                return 0;
        /*
         * This shouldn't ever happen. If it does it should be fixed on the
         * server.
         */
        } else if (error_code > 0) {
                gossip_err("orangefs: error status received.\n");
                gossip_err("orangefs: assuming error code is inverted.\n");
                error_code = -error_code;
        }

        /*
         * XXX: This is very bad since error codes from ORANGEFS may not be
         * suitable for return into userspace.
         */

        /*
         * Convert ORANGEFS error values into errno values suitable for return
         * from the kernel.
         */
        if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
                if (((-error_code) &
                    (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
                    ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
                        /*
                         * cancellation error codes generally correspond to
                         * a timeout from the client's perspective
                         */
                        error_code = -ETIMEDOUT;
                } else {
                        /* assume a default error code */
                        gossip_err("%s: bad error code :%d:.\n",
                                __func__,
                                error_code);
                        error_code = -EINVAL;
                }

        /* Convert ORANGEFS encoded errno values into regular errno values. */
        } else if ((-error_code) & ORANGEFS_ERROR_BIT) {
                i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
                if (i < ARRAY_SIZE(PINT_errno_mapping))
                        error_code = -PINT_errno_mapping[i];
                else
                        error_code = -EINVAL;

        /*
         * Only ORANGEFS protocol error codes should ever come here. Otherwise
         * there is a bug somewhere.
         */
        } else {
                gossip_err("%s: unknown error code.\n", __func__);
                error_code = -EINVAL;
        }
        return error_code;
}

#define NUM_MODES 11
__s32 ORANGEFS_util_translate_mode(int mode)
{
        int ret = 0;
        int i = 0;
        static int modes[NUM_MODES] = {
                S_IXOTH, S_IWOTH, S_IROTH,
                S_IXGRP, S_IWGRP, S_IRGRP,
                S_IXUSR, S_IWUSR, S_IRUSR,
                S_ISGID, S_ISUID
        };
        static int orangefs_modes[NUM_MODES] = {
                ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
                ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
                ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
                ORANGEFS_G_SGID, ORANGEFS_U_SUID
        };

        for (i = 0; i < NUM_MODES; i++)
                if (mode & modes[i])
                        ret |= orangefs_modes[i];

        return ret;
}
#undef NUM_MODES