root/fs/btrfs/inode-item.c
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

#include "ctree.h"
#include "fs.h"
#include "messages.h"
#include "inode-item.h"
#include "disk-io.h"
#include "transaction.h"
#include "space-info.h"
#include "accessors.h"
#include "extent-tree.h"
#include "file-item.h"

struct btrfs_inode_ref *btrfs_find_name_in_backref(const struct extent_buffer *leaf,
                                                   int slot,
                                                   const struct fscrypt_str *name)
{
        struct btrfs_inode_ref *ref;
        unsigned long ptr;
        unsigned long name_ptr;
        u32 item_size;
        u32 cur_offset = 0;
        int len;

        item_size = btrfs_item_size(leaf, slot);
        ptr = btrfs_item_ptr_offset(leaf, slot);
        while (cur_offset < item_size) {
                ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
                len = btrfs_inode_ref_name_len(leaf, ref);
                name_ptr = (unsigned long)(ref + 1);
                cur_offset += len + sizeof(*ref);
                if (len != name->len)
                        continue;
                if (memcmp_extent_buffer(leaf, name->name, name_ptr,
                                         name->len) == 0)
                        return ref;
        }
        return NULL;
}

struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
                const struct extent_buffer *leaf, int slot, u64 ref_objectid,
                const struct fscrypt_str *name)
{
        struct btrfs_inode_extref *extref;
        unsigned long ptr;
        unsigned long name_ptr;
        u32 item_size;
        u32 cur_offset = 0;
        int ref_name_len;

        item_size = btrfs_item_size(leaf, slot);
        ptr = btrfs_item_ptr_offset(leaf, slot);

        /*
         * Search all extended backrefs in this item. We're only
         * looking through any collisions so most of the time this is
         * just going to compare against one buffer. If all is well,
         * we'll return success and the inode ref object.
         */
        while (cur_offset < item_size) {
                extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
                name_ptr = (unsigned long)(&extref->name);
                ref_name_len = btrfs_inode_extref_name_len(leaf, extref);

                if (ref_name_len == name->len &&
                    btrfs_inode_extref_parent(leaf, extref) == ref_objectid &&
                    (memcmp_extent_buffer(leaf, name->name, name_ptr,
                                          name->len) == 0))
                        return extref;

                cur_offset += ref_name_len + sizeof(*extref);
        }
        return NULL;
}

/* Returns NULL if no extref found */
struct btrfs_inode_extref *btrfs_lookup_inode_extref(struct btrfs_root *root,
                                                     struct btrfs_path *path,
                                                     const struct fscrypt_str *name,
                                                     u64 inode_objectid, u64 ref_objectid)
{
        int ret;
        struct btrfs_key key;

        key.objectid = inode_objectid;
        key.type = BTRFS_INODE_EXTREF_KEY;
        key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);

        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                return ERR_PTR(ret);
        if (ret > 0)
                return NULL;
        return btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
                                              ref_objectid, name);

}

static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root,
                                  const struct fscrypt_str *name,
                                  u64 inode_objectid, u64 ref_objectid,
                                  u64 *index)
{
        BTRFS_PATH_AUTO_FREE(path);
        struct btrfs_key key;
        struct btrfs_inode_extref *extref;
        struct extent_buffer *leaf;
        int ret;
        int del_len = name->len + sizeof(*extref);
        unsigned long ptr;
        unsigned long item_start;
        u32 item_size;

        key.objectid = inode_objectid;
        key.type = BTRFS_INODE_EXTREF_KEY;
        key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);

        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;

        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret > 0)
                return -ENOENT;
        if (ret < 0)
                return ret;

        /*
         * Sanity check - did we find the right item for this name?
         * This should always succeed so error here will make the FS
         * readonly.
         */
        extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
                                                ref_objectid, name);
        if (unlikely(!extref)) {
                btrfs_abort_transaction(trans, -ENOENT);
                return -ENOENT;
        }

        leaf = path->nodes[0];
        item_size = btrfs_item_size(leaf, path->slots[0]);
        if (index)
                *index = btrfs_inode_extref_index(leaf, extref);

        if (del_len == item_size) {
                /* Common case only one ref in the item, remove the whole item. */
                return btrfs_del_item(trans, root, path);
        }

        ptr = (unsigned long)extref;
        item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);

        memmove_extent_buffer(leaf, ptr, ptr + del_len,
                              item_size - (ptr + del_len - item_start));

        btrfs_truncate_item(trans, path, item_size - del_len, 1);

        return ret;
}

int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root, const struct fscrypt_str *name,
                        u64 inode_objectid, u64 ref_objectid, u64 *index)
{
        struct btrfs_path *path;
        struct btrfs_key key;
        struct btrfs_inode_ref *ref;
        struct extent_buffer *leaf;
        unsigned long ptr;
        unsigned long item_start;
        u32 item_size;
        u32 sub_item_len;
        int ret;
        int search_ext_refs = 0;
        int del_len = name->len + sizeof(*ref);

        key.objectid = inode_objectid;
        key.type = BTRFS_INODE_REF_KEY;
        key.offset = ref_objectid;

        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;

        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret > 0) {
                ret = -ENOENT;
                search_ext_refs = 1;
                goto out;
        } else if (ret < 0) {
                goto out;
        }

        ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name);
        if (!ref) {
                ret = -ENOENT;
                search_ext_refs = 1;
                goto out;
        }
        leaf = path->nodes[0];
        item_size = btrfs_item_size(leaf, path->slots[0]);

        if (index)
                *index = btrfs_inode_ref_index(leaf, ref);

        if (del_len == item_size) {
                ret = btrfs_del_item(trans, root, path);
                goto out;
        }
        ptr = (unsigned long)ref;
        sub_item_len = name->len + sizeof(*ref);
        item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
        memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
                              item_size - (ptr + sub_item_len - item_start));
        btrfs_truncate_item(trans, path, item_size - sub_item_len, 1);
out:
        btrfs_free_path(path);

        if (search_ext_refs) {
                /*
                 * No refs were found, or we could not find the
                 * name in our ref array. Find and remove the extended
                 * inode ref then.
                 */
                return btrfs_del_inode_extref(trans, root, name,
                                              inode_objectid, ref_objectid, index);
        }

        return ret;
}

/*
 * Insert an extended inode ref into a tree.
 *
 * The caller must have checked against BTRFS_LINK_MAX already.
 */
static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     const struct fscrypt_str *name,
                                     u64 inode_objectid, u64 ref_objectid,
                                     u64 index)
{
        struct btrfs_inode_extref *extref;
        int ret;
        int ins_len = name->len + sizeof(*extref);
        unsigned long ptr;
        BTRFS_PATH_AUTO_FREE(path);
        struct btrfs_key key;
        struct extent_buffer *leaf;

        key.objectid = inode_objectid;
        key.type = BTRFS_INODE_EXTREF_KEY;
        key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);

        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;

        ret = btrfs_insert_empty_item(trans, root, path, &key,
                                      ins_len);
        if (ret == -EEXIST) {
                if (btrfs_find_name_in_ext_backref(path->nodes[0],
                                                   path->slots[0],
                                                   ref_objectid,
                                                   name))
                        return ret;

                btrfs_extend_item(trans, path, ins_len);
                ret = 0;
        }
        if (ret < 0)
                return ret;

        leaf = path->nodes[0];
        ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char);
        ptr += btrfs_item_size(leaf, path->slots[0]) - ins_len;
        extref = (struct btrfs_inode_extref *)ptr;

        btrfs_set_inode_extref_name_len(path->nodes[0], extref, name->len);
        btrfs_set_inode_extref_index(path->nodes[0], extref, index);
        btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid);

        ptr = (unsigned long)&extref->name;
        write_extent_buffer(path->nodes[0], name->name, ptr, name->len);

        return 0;
}

/* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */
int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, const struct fscrypt_str *name,
                           u64 inode_objectid, u64 ref_objectid, u64 index)
{
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_path *path;
        struct btrfs_key key;
        struct btrfs_inode_ref *ref;
        unsigned long ptr;
        int ret;
        int ins_len = name->len + sizeof(*ref);

        key.objectid = inode_objectid;
        key.type = BTRFS_INODE_REF_KEY;
        key.offset = ref_objectid;

        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;

        path->skip_release_on_error = true;
        ret = btrfs_insert_empty_item(trans, root, path, &key,
                                      ins_len);
        if (ret == -EEXIST) {
                u32 old_size;
                ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
                                                 name);
                if (ref)
                        goto out;

                old_size = btrfs_item_size(path->nodes[0], path->slots[0]);
                btrfs_extend_item(trans, path, ins_len);
                ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
                                     struct btrfs_inode_ref);
                ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
                btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len);
                btrfs_set_inode_ref_index(path->nodes[0], ref, index);
                ptr = (unsigned long)(ref + 1);
                ret = 0;
        } else if (ret < 0) {
                if (ret == -EOVERFLOW) {
                        if (btrfs_find_name_in_backref(path->nodes[0],
                                                       path->slots[0],
                                                       name))
                                ret = -EEXIST;
                        else
                                ret = -EMLINK;
                }
                goto out;
        } else {
                ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
                                     struct btrfs_inode_ref);
                btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len);
                btrfs_set_inode_ref_index(path->nodes[0], ref, index);
                ptr = (unsigned long)(ref + 1);
        }
        write_extent_buffer(path->nodes[0], name->name, ptr, name->len);
out:
        btrfs_free_path(path);

        if (ret == -EMLINK) {
                struct btrfs_super_block *disk_super = fs_info->super_copy;
                /* We ran out of space in the ref array. Need to
                 * add an extended ref. */
                if (btrfs_super_incompat_flags(disk_super)
                    & BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
                        ret = btrfs_insert_inode_extref(trans, root, name,
                                                        inode_objectid,
                                                        ref_objectid, index);
        }

        return ret;
}

int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             struct btrfs_path *path, u64 objectid)
{
        struct btrfs_key key;

        key.objectid = objectid;
        key.type = BTRFS_INODE_ITEM_KEY;
        key.offset = 0;

        return btrfs_insert_empty_item(trans, root, path, &key,
                                       sizeof(struct btrfs_inode_item));
}

int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
                       *root, struct btrfs_path *path,
                       struct btrfs_key *location, int mod)
{
        int ins_len = mod < 0 ? -1 : 0;
        int cow = mod != 0;
        int ret;
        int slot;
        struct extent_buffer *leaf;
        struct btrfs_key found_key;

        ret = btrfs_search_slot(trans, root, location, path, ins_len, cow);
        if (ret > 0 && location->type == BTRFS_ROOT_ITEM_KEY &&
            location->offset == (u64)-1 && path->slots[0] != 0) {
                slot = path->slots[0] - 1;
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
                if (found_key.objectid == location->objectid &&
                    found_key.type == location->type) {
                        path->slots[0]--;
                        return 0;
                }
        }
        return ret;
}

static inline void btrfs_trace_truncate(const struct btrfs_inode *inode,
                                        const struct extent_buffer *leaf,
                                        const struct btrfs_file_extent_item *fi,
                                        u64 offset, int extent_type, int slot)
{
        if (!inode)
                return;
        if (extent_type == BTRFS_FILE_EXTENT_INLINE)
                trace_btrfs_truncate_show_fi_inline(inode, leaf, fi, slot,
                                                    offset);
        else
                trace_btrfs_truncate_show_fi_regular(inode, leaf, fi, offset);
}

/*
 * Remove inode items from a given root.
 *
 * @trans:              A transaction handle.
 * @root:               The root from which to remove items.
 * @inode:              The inode whose items we want to remove.
 * @control:            The btrfs_truncate_control to control how and what we
 *                      are truncating.
 *
 * Remove all keys associated with the inode from the given root that have a key
 * with a type greater than or equals to @min_type. When @min_type has a value of
 * BTRFS_EXTENT_DATA_KEY, only remove file extent items that have an offset value
 * greater than or equals to @new_size. If a file extent item that starts before
 * @new_size and ends after it is found, its length is adjusted.
 *
 * Returns: 0 on success, < 0 on error and NEED_TRUNCATE_BLOCK when @min_type is
 * BTRFS_EXTENT_DATA_KEY and the caller must truncate the last block.
 */
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
                               struct btrfs_truncate_control *control)
{
        struct btrfs_fs_info *fs_info = root->fs_info;
        BTRFS_PATH_AUTO_FREE(path);
        struct extent_buffer *leaf;
        struct btrfs_file_extent_item *fi;
        struct btrfs_key key;
        struct btrfs_key found_key;
        u64 new_size = control->new_size;
        u64 extent_num_bytes = 0;
        u64 extent_offset = 0;
        u64 item_end = 0;
        u32 found_type = (u8)-1;
        int del_item;
        int pending_del_nr = 0;
        int pending_del_slot = 0;
        int extent_type = -1;
        int ret;
        u64 bytes_deleted = 0;
        bool be_nice = false;

        ASSERT(control->inode || !control->clear_extent_range);
        ASSERT(new_size == 0 || control->min_type == BTRFS_EXTENT_DATA_KEY);

        control->last_size = new_size;
        control->sub_bytes = 0;

        /*
         * For shareable roots we want to back off from time to time, this turns
         * out to be subvolume roots, reloc roots, and data reloc roots.
         */
        if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
                be_nice = true;

        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
        path->reada = READA_BACK;

        key.objectid = control->ino;
        key.type = (u8)-1;
        key.offset = (u64)-1;

search_again:
        /*
         * With a 16K leaf size and 128MiB extents, you can actually queue up a
         * huge file in a single leaf.  Most of the time that bytes_deleted is
         * > 0, it will be huge by the time we get here
         */
        if (be_nice && bytes_deleted > SZ_32M &&
            btrfs_should_end_transaction(trans)) {
                ret = -EAGAIN;
                goto out;
        }

        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret < 0)
                goto out;

        if (ret > 0) {
                ret = 0;
                /* There are no items in the tree for us to truncate, we're done */
                if (path->slots[0] == 0)
                        goto out;
                path->slots[0]--;
        }

        while (1) {
                u64 clear_start = 0, clear_len = 0, extent_start = 0;
                bool refill_delayed_refs_rsv = false;

                fi = NULL;
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
                found_type = found_key.type;

                if (found_key.objectid != control->ino)
                        break;

                if (found_type < control->min_type)
                        break;

                item_end = found_key.offset;
                if (found_type == BTRFS_EXTENT_DATA_KEY) {
                        fi = btrfs_item_ptr(leaf, path->slots[0],
                                            struct btrfs_file_extent_item);
                        extent_type = btrfs_file_extent_type(leaf, fi);
                        if (extent_type != BTRFS_FILE_EXTENT_INLINE)
                                item_end +=
                                    btrfs_file_extent_num_bytes(leaf, fi);
                        else if (extent_type == BTRFS_FILE_EXTENT_INLINE)
                                item_end += btrfs_file_extent_ram_bytes(leaf, fi);

                        btrfs_trace_truncate(control->inode, leaf, fi,
                                             found_key.offset, extent_type,
                                             path->slots[0]);
                        item_end--;
                }
                if (found_type > control->min_type) {
                        del_item = 1;
                } else {
                        if (item_end < new_size)
                                break;
                        if (found_key.offset >= new_size)
                                del_item = 1;
                        else
                                del_item = 0;
                }

                /* FIXME, shrink the extent if the ref count is only 1 */
                if (found_type != BTRFS_EXTENT_DATA_KEY)
                        goto delete;

                control->extents_found++;

                if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
                        u64 num_dec;

                        clear_start = found_key.offset;
                        extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
                        if (!del_item) {
                                u64 orig_num_bytes =
                                        btrfs_file_extent_num_bytes(leaf, fi);
                                extent_num_bytes = ALIGN(new_size -
                                                found_key.offset,
                                                fs_info->sectorsize);
                                clear_start = ALIGN(new_size, fs_info->sectorsize);

                                btrfs_set_file_extent_num_bytes(leaf, fi,
                                                         extent_num_bytes);
                                num_dec = (orig_num_bytes - extent_num_bytes);
                                if (extent_start != 0)
                                        control->sub_bytes += num_dec;
                        } else {
                                extent_num_bytes =
                                        btrfs_file_extent_disk_num_bytes(leaf, fi);
                                extent_offset = found_key.offset -
                                        btrfs_file_extent_offset(leaf, fi);

                                /* FIXME blocksize != 4096 */
                                num_dec = btrfs_file_extent_num_bytes(leaf, fi);
                                if (extent_start != 0)
                                        control->sub_bytes += num_dec;
                        }
                        clear_len = num_dec;
                } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
                        /*
                         * We can't truncate inline items that have had
                         * special encodings
                         */
                        if (!del_item &&
                            btrfs_file_extent_encryption(leaf, fi) == 0 &&
                            btrfs_file_extent_other_encoding(leaf, fi) == 0 &&
                            btrfs_file_extent_compression(leaf, fi) == 0) {
                                u32 size = (u32)(new_size - found_key.offset);

                                btrfs_set_file_extent_ram_bytes(leaf, fi, size);
                                size = btrfs_file_extent_calc_inline_size(size);
                                btrfs_truncate_item(trans, path, size, 1);
                        } else if (!del_item) {
                                /*
                                 * We have to bail so the last_size is set to
                                 * just before this extent.
                                 */
                                ret = BTRFS_NEED_TRUNCATE_BLOCK;
                                break;
                        } else {
                                /*
                                 * Inline extents are special, we just treat
                                 * them as a full sector worth in the file
                                 * extent tree just for simplicity sake.
                                 */
                                clear_len = fs_info->sectorsize;
                        }

                        control->sub_bytes += item_end + 1 - new_size;
                }
delete:
                /*
                 * We only want to clear the file extent range if we're
                 * modifying the actual inode's mapping, which is just the
                 * normal truncate path.
                 */
                if (control->clear_extent_range) {
                        ret = btrfs_inode_clear_file_extent_range(control->inode,
                                                  clear_start, clear_len);
                        if (unlikely(ret)) {
                                btrfs_abort_transaction(trans, ret);
                                break;
                        }
                }

                if (del_item) {
                        ASSERT(!pending_del_nr ||
                               ((path->slots[0] + 1) == pending_del_slot));

                        control->last_size = found_key.offset;
                        if (!pending_del_nr) {
                                /* No pending yet, add ourselves */
                                pending_del_slot = path->slots[0];
                                pending_del_nr = 1;
                        } else if (path->slots[0] + 1 == pending_del_slot) {
                                /* Hop on the pending chunk */
                                pending_del_nr++;
                                pending_del_slot = path->slots[0];
                        }
                } else {
                        control->last_size = new_size;
                        break;
                }

                if (del_item && extent_start != 0 && !control->skip_ref_updates) {
                        struct btrfs_ref ref = {
                                .action = BTRFS_DROP_DELAYED_REF,
                                .bytenr = extent_start,
                                .num_bytes = extent_num_bytes,
                                .owning_root = btrfs_root_id(root),
                                .ref_root = btrfs_header_owner(leaf),
                        };

                        bytes_deleted += extent_num_bytes;

                        btrfs_init_data_ref(&ref, control->ino, extent_offset,
                                            btrfs_root_id(root), false);
                        ret = btrfs_free_extent(trans, &ref);
                        if (unlikely(ret)) {
                                btrfs_abort_transaction(trans, ret);
                                break;
                        }
                        if (be_nice && btrfs_check_space_for_delayed_refs(fs_info))
                                refill_delayed_refs_rsv = true;
                }

                if (found_type == BTRFS_INODE_ITEM_KEY)
                        break;

                if (path->slots[0] == 0 ||
                    path->slots[0] != pending_del_slot ||
                    refill_delayed_refs_rsv) {
                        if (pending_del_nr) {
                                ret = btrfs_del_items(trans, root, path,
                                                pending_del_slot,
                                                pending_del_nr);
                                if (unlikely(ret)) {
                                        btrfs_abort_transaction(trans, ret);
                                        break;
                                }
                                pending_del_nr = 0;
                        }
                        btrfs_release_path(path);

                        /*
                         * We can generate a lot of delayed refs, so we need to
                         * throttle every once and a while and make sure we're
                         * adding enough space to keep up with the work we are
                         * generating.  Since we hold a transaction here we
                         * can't flush, and we don't want to FLUSH_LIMIT because
                         * we could have generated too many delayed refs to
                         * actually allocate, so just bail if we're short and
                         * let the normal reservation dance happen higher up.
                         */
                        if (refill_delayed_refs_rsv) {
                                ret = btrfs_delayed_refs_rsv_refill(fs_info,
                                                        BTRFS_RESERVE_NO_FLUSH);
                                if (ret) {
                                        ret = -EAGAIN;
                                        break;
                                }
                        }
                        goto search_again;
                } else {
                        path->slots[0]--;
                }
        }
out:
        if (ret >= 0 && pending_del_nr) {
                int ret2;

                ret2 = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr);
                if (unlikely(ret2)) {
                        btrfs_abort_transaction(trans, ret2);
                        ret = ret2;
                }
        }

        ASSERT(control->last_size >= new_size);
        if (!ret && control->last_size > new_size)
                control->last_size = new_size;

        return ret;
}