#include <linux/kernel.h>
#include <linux/sched/mm.h>
#include "messages.h"
#include "ctree.h"
#include "disk-io.h"
#include "locking.h"
#include "free-space-tree.h"
#include "transaction.h"
#include "block-group.h"
#include "fs.h"
#include "accessors.h"
#include "extent-tree.h"
#include "root-tree.h"
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path);
struct btrfs_root *btrfs_free_space_root(struct btrfs_block_group *block_group)
{
struct btrfs_key key = {
.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
.type = BTRFS_ROOT_ITEM_KEY,
.offset = 0,
};
if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
key.offset = block_group->global_root_id;
return btrfs_global_root(block_group->fs_info, &key);
}
void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache)
{
u32 bitmap_range;
size_t bitmap_size;
u64 num_bitmaps, total_bitmap_size;
if (WARN_ON(cache->length == 0))
btrfs_warn(cache->fs_info, "block group %llu length is zero",
cache->start);
bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
total_bitmap_size = num_bitmaps * bitmap_size;
cache->bitmap_high_thresh = div_u64(total_bitmap_size,
sizeof(struct btrfs_item));
if (cache->bitmap_high_thresh > 100)
cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
else
cache->bitmap_low_thresh = 0;
}
static int add_new_free_space_info(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
struct btrfs_root *root = btrfs_free_space_root(block_group);
struct btrfs_free_space_info *info;
struct btrfs_key key;
struct extent_buffer *leaf;
int ret;
key.objectid = block_group->start;
key.type = BTRFS_FREE_SPACE_INFO_KEY;
key.offset = block_group->length;
ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
if (ret)
return ret;
leaf = path->nodes[0];
info = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_free_space_info);
btrfs_set_free_space_extent_count(leaf, info, 0);
btrfs_set_free_space_flags(leaf, info, 0);
btrfs_release_path(path);
return 0;
}
struct btrfs_free_space_info *btrfs_search_free_space_info(
struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path, int cow)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_root *root = btrfs_free_space_root(block_group);
struct btrfs_key key;
int ret;
key.objectid = block_group->start;
key.type = BTRFS_FREE_SPACE_INFO_KEY;
key.offset = block_group->length;
ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
if (ret < 0)
return ERR_PTR(ret);
if (ret != 0) {
btrfs_warn(fs_info, "missing free space info for %llu",
block_group->start);
DEBUG_WARN();
return ERR_PTR(-ENOENT);
}
return btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_free_space_info);
}
static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_key *key, struct btrfs_path *p,
int ins_len, int cow)
{
int ret;
ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
if (ret < 0)
return ret;
if (unlikely(ret == 0)) {
DEBUG_WARN();
return -EIO;
}
if (unlikely(p->slots[0] == 0)) {
DEBUG_WARN("no previous slot found");
return -EIO;
}
p->slots[0]--;
return 0;
}
static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
u64 size)
{
return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
}
static unsigned long *alloc_bitmap(u32 bitmap_size)
{
unsigned long *ret;
unsigned int nofs_flag;
u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
nofs_flag = memalloc_nofs_save();
ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
return ret;
}
static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
{
u8 *p = ((u8 *)map) + BIT_BYTE(start);
const unsigned int size = start + len;
int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
while (len - bits_to_set >= 0) {
*p |= mask_to_set;
len -= bits_to_set;
bits_to_set = BITS_PER_BYTE;
mask_to_set = ~0;
p++;
}
if (len) {
mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
*p |= mask_to_set;
}
}
EXPORT_FOR_TESTS
int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = btrfs_free_space_root(block_group);
struct btrfs_free_space_info *info;
struct btrfs_key key, found_key;
struct extent_buffer *leaf;
unsigned long *bitmap;
char *bitmap_cursor;
u64 start, end;
u64 bitmap_range, i;
u32 bitmap_size, flags, expected_extent_count;
u32 extent_count = 0;
int done = 0, nr;
int ret;
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
bitmap = alloc_bitmap(bitmap_size);
if (unlikely(!bitmap))
return 0;
start = block_group->start;
end = btrfs_block_group_end(block_group);
key.objectid = end - 1;
key.type = (u8)-1;
key.offset = (u64)-1;
while (!done) {
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
leaf = path->nodes[0];
nr = 0;
path->slots[0]++;
while (path->slots[0] > 0) {
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
ASSERT(found_key.objectid == block_group->start);
ASSERT(found_key.offset == block_group->length);
done = 1;
break;
} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
u64 first, last;
ASSERT(found_key.objectid >= start);
ASSERT(found_key.objectid < end);
ASSERT(found_key.objectid + found_key.offset <= end);
first = div_u64(found_key.objectid - start,
fs_info->sectorsize);
last = div_u64(found_key.objectid + found_key.offset - start,
fs_info->sectorsize);
le_bitmap_set(bitmap, first, last - first);
extent_count++;
nr++;
path->slots[0]--;
} else {
ASSERT(0);
}
}
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_release_path(path);
}
info = btrfs_search_free_space_info(trans, block_group, path, 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
btrfs_abort_transaction(trans, ret);
goto out;
}
leaf = path->nodes[0];
flags = btrfs_free_space_flags(leaf, info);
flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
block_group->using_free_space_bitmaps = true;
block_group->using_free_space_bitmaps_cached = true;
btrfs_set_free_space_flags(leaf, info, flags);
expected_extent_count = btrfs_free_space_extent_count(leaf, info);
btrfs_release_path(path);
if (unlikely(extent_count != expected_extent_count)) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
expected_extent_count);
ret = -EIO;
btrfs_abort_transaction(trans, ret);
goto out;
}
bitmap_cursor = (char *)bitmap;
bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
i = start;
while (i < end) {
unsigned long ptr;
u64 extent_size;
u32 data_size;
extent_size = min(end - i, bitmap_range);
data_size = free_space_bitmap_size(fs_info, extent_size);
key.objectid = i;
key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
key.offset = extent_size;
ret = btrfs_insert_empty_item(trans, root, path, &key,
data_size);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
leaf = path->nodes[0];
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
write_extent_buffer(leaf, bitmap_cursor, ptr,
data_size);
btrfs_release_path(path);
i += extent_size;
bitmap_cursor += data_size;
}
ret = 0;
out:
kvfree(bitmap);
return ret;
}
EXPORT_FOR_TESTS
int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = btrfs_free_space_root(block_group);
struct btrfs_free_space_info *info;
struct btrfs_key key, found_key;
struct extent_buffer *leaf;
unsigned long *bitmap;
u64 start, end;
u32 bitmap_size, flags, expected_extent_count;
unsigned long nrbits, start_bit, end_bit;
u32 extent_count = 0;
int done = 0, nr;
int ret;
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
bitmap = alloc_bitmap(bitmap_size);
if (unlikely(!bitmap))
return 0;
start = block_group->start;
end = btrfs_block_group_end(block_group);
key.objectid = end - 1;
key.type = (u8)-1;
key.offset = (u64)-1;
while (!done) {
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
leaf = path->nodes[0];
nr = 0;
path->slots[0]++;
while (path->slots[0] > 0) {
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
ASSERT(found_key.objectid == block_group->start);
ASSERT(found_key.offset == block_group->length);
done = 1;
break;
} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
unsigned long ptr;
char *bitmap_cursor;
u32 bitmap_pos, data_size;
ASSERT(found_key.objectid >= start);
ASSERT(found_key.objectid < end);
ASSERT(found_key.objectid + found_key.offset <= end);
bitmap_pos = div_u64(found_key.objectid - start,
fs_info->sectorsize *
BITS_PER_BYTE);
bitmap_cursor = ((char *)bitmap) + bitmap_pos;
data_size = free_space_bitmap_size(fs_info,
found_key.offset);
path->slots[0]--;
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
read_extent_buffer(leaf, bitmap_cursor, ptr,
data_size);
nr++;
} else {
ASSERT(0);
}
}
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_release_path(path);
}
info = btrfs_search_free_space_info(trans, block_group, path, 1);
if (IS_ERR(info)) {
ret = PTR_ERR(info);
btrfs_abort_transaction(trans, ret);
goto out;
}
leaf = path->nodes[0];
flags = btrfs_free_space_flags(leaf, info);
flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
block_group->using_free_space_bitmaps = false;
block_group->using_free_space_bitmaps_cached = true;
btrfs_set_free_space_flags(leaf, info, flags);
expected_extent_count = btrfs_free_space_extent_count(leaf, info);
btrfs_release_path(path);
nrbits = block_group->length >> fs_info->sectorsize_bits;
start_bit = find_next_bit_le(bitmap, nrbits, 0);
while (start_bit < nrbits) {
end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
ASSERT(start_bit < end_bit);
key.objectid = start + start_bit * fs_info->sectorsize;
key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
key.offset = (end_bit - start_bit) * fs_info->sectorsize;
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_release_path(path);
extent_count++;
start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
}
if (unlikely(extent_count != expected_extent_count)) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
expected_extent_count);
ret = -EIO;
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = 0;
out:
kvfree(bitmap);
return ret;
}
static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path,
int new_extents)
{
struct btrfs_free_space_info *info;
u32 flags;
u32 extent_count;
int ret = 0;
if (new_extents == 0)
return 0;
info = btrfs_search_free_space_info(trans, block_group, path, 1);
if (IS_ERR(info))
return PTR_ERR(info);
flags = btrfs_free_space_flags(path->nodes[0], info);
extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
extent_count += new_extents;
btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
btrfs_release_path(path);
if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
extent_count > block_group->bitmap_high_thresh) {
ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path);
} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
extent_count < block_group->bitmap_low_thresh) {
ret = btrfs_convert_free_space_to_extents(trans, block_group, path);
}
return ret;
}
EXPORT_FOR_TESTS
bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 offset)
{
struct extent_buffer *leaf;
struct btrfs_key key;
u64 found_start, found_end;
unsigned long ptr, i;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
found_start = key.objectid;
found_end = key.objectid + key.offset;
ASSERT(offset >= found_start && offset < found_end);
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
i = div_u64(offset - found_start,
block_group->fs_info->sectorsize);
return extent_buffer_test_bit(leaf, ptr, i);
}
static void free_space_modify_bits(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 *start, u64 *size,
bool set_bits)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct extent_buffer *leaf;
struct btrfs_key key;
u64 end = *start + *size;
u64 found_start, found_end;
unsigned long ptr, first, last;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
found_start = key.objectid;
found_end = key.objectid + key.offset;
ASSERT(*start >= found_start && *start < found_end);
ASSERT(end > found_start);
if (end > found_end)
end = found_end;
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
first = (*start - found_start) >> fs_info->sectorsize_bits;
last = (end - found_start) >> fs_info->sectorsize_bits;
if (set_bits)
extent_buffer_bitmap_set(leaf, ptr, first, last - first);
else
extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
btrfs_mark_buffer_dirty(trans, leaf);
*size -= end - *start;
*start = end;
}
static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *p)
{
struct btrfs_key key;
if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
p->slots[0]++;
return 0;
}
btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
btrfs_release_path(p);
key.objectid += key.offset;
key.type = (u8)-1;
key.offset = (u64)-1;
return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
}
static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path,
u64 start, u64 size, bool remove)
{
struct btrfs_root *root = btrfs_free_space_root(block_group);
struct btrfs_key key;
u64 end = start + size;
u64 cur_start, cur_size;
bool prev_bit_set = false;
bool next_bit_set = false;
int new_extents;
int ret;
if (start > block_group->start) {
u64 prev_block = start - block_group->fs_info->sectorsize;
key.objectid = prev_block;
key.type = (u8)-1;
key.offset = (u64)-1;
ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
if (ret)
return ret;
prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block);
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (start >= key.objectid + key.offset) {
ret = free_space_next_bitmap(trans, root, path);
if (ret)
return ret;
}
} else {
key.objectid = start;
key.type = (u8)-1;
key.offset = (u64)-1;
ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
if (ret)
return ret;
}
cur_start = start;
cur_size = size;
while (1) {
free_space_modify_bits(trans, block_group, path, &cur_start,
&cur_size, !remove);
if (cur_size == 0)
break;
ret = free_space_next_bitmap(trans, root, path);
if (ret)
return ret;
}
if (end < btrfs_block_group_end(block_group)) {
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (end >= key.objectid + key.offset) {
ret = free_space_next_bitmap(trans, root, path);
if (ret)
return ret;
}
next_bit_set = btrfs_free_space_test_bit(block_group, path, end);
}
if (remove) {
new_extents = -1;
if (prev_bit_set) {
new_extents++;
}
if (next_bit_set) {
new_extents++;
}
} else {
new_extents = 1;
if (prev_bit_set) {
new_extents--;
}
if (next_bit_set) {
new_extents--;
}
}
btrfs_release_path(path);
return update_free_space_extent_count(trans, block_group, path, new_extents);
}
static int remove_free_space_extent(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path,
u64 start, u64 size)
{
struct btrfs_root *root = btrfs_free_space_root(block_group);
struct btrfs_key key;
u64 found_start, found_end;
u64 end = start + size;
int new_extents = -1;
int ret;
key.objectid = start;
key.type = (u8)-1;
key.offset = (u64)-1;
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
if (ret)
return ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
found_start = key.objectid;
found_end = key.objectid + key.offset;
ASSERT(start >= found_start && end <= found_end);
ret = btrfs_del_item(trans, root, path);
if (ret)
return ret;
if (start > found_start) {
key.objectid = found_start;
key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
key.offset = start - found_start;
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
if (ret)
return ret;
new_extents++;
}
if (end < found_end) {
key.objectid = end;
key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
key.offset = found_end - end;
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
if (ret)
return ret;
new_extents++;
}
btrfs_release_path(path);
return update_free_space_extent_count(trans, block_group, path, new_extents);
}
static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path)
{
struct btrfs_free_space_info *info;
u32 flags;
if (bg->using_free_space_bitmaps_cached)
return bg->using_free_space_bitmaps;
info = btrfs_search_free_space_info(NULL, bg, path, 0);
if (IS_ERR(info))
return PTR_ERR(info);
flags = btrfs_free_space_flags(path->nodes[0], info);
btrfs_release_path(path);
bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS);
bg->using_free_space_bitmaps_cached = true;
return bg->using_free_space_bitmaps;
}
EXPORT_FOR_TESTS
int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 start, u64 size)
{
int ret;
ret = __add_block_group_free_space(trans, block_group, path);
if (ret)
return ret;
ret = using_bitmaps(block_group, path);
if (ret < 0)
return ret;
if (ret)
return modify_free_space_bitmap(trans, block_group, path,
start, size, true);
return remove_free_space_extent(trans, block_group, path, start, size);
}
int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
u64 start, u64 size)
{
struct btrfs_block_group *block_group;
BTRFS_PATH_AUTO_FREE(path);
int ret;
if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
return 0;
path = btrfs_alloc_path();
if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
return ret;
}
block_group = btrfs_lookup_block_group(trans->fs_info, start);
if (unlikely(!block_group)) {
DEBUG_WARN("no block group found for start=%llu", start);
ret = -ENOENT;
btrfs_abort_transaction(trans, ret);
return ret;
}
mutex_lock(&block_group->free_space_lock);
ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size);
mutex_unlock(&block_group->free_space_lock);
if (ret)
btrfs_abort_transaction(trans, ret);
btrfs_put_block_group(block_group);
return ret;
}
static int add_free_space_extent(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path,
u64 start, u64 size)
{
struct btrfs_root *root = btrfs_free_space_root(block_group);
struct btrfs_key key, new_key;
u64 found_start, found_end;
u64 end = start + size;
int new_extents = 1;
int ret;
new_key.objectid = start;
new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
new_key.offset = size;
if (start == block_group->start)
goto right;
key.objectid = start - 1;
key.type = (u8)-1;
key.offset = (u64)-1;
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
if (ret)
return ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
btrfs_release_path(path);
goto right;
}
found_start = key.objectid;
found_end = key.objectid + key.offset;
ASSERT(found_start >= block_group->start &&
found_end > block_group->start);
ASSERT(found_start < start && found_end <= start);
if (found_end == start) {
ret = btrfs_del_item(trans, root, path);
if (ret)
return ret;
new_key.objectid = found_start;
new_key.offset += key.offset;
new_extents--;
}
btrfs_release_path(path);
right:
if (end == btrfs_block_group_end(block_group))
goto insert;
key.objectid = end;
key.type = (u8)-1;
key.offset = (u64)-1;
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
if (ret)
return ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
btrfs_release_path(path);
goto insert;
}
found_start = key.objectid;
found_end = key.objectid + key.offset;
ASSERT(found_start >= block_group->start &&
found_end > block_group->start);
ASSERT((found_start < start && found_end <= start) ||
(found_start >= end && found_end > end));
if (found_start == end) {
ret = btrfs_del_item(trans, root, path);
if (ret)
return ret;
new_key.offset += key.offset;
new_extents--;
}
btrfs_release_path(path);
insert:
ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
if (ret)
return ret;
btrfs_release_path(path);
return update_free_space_extent_count(trans, block_group, path, new_extents);
}
EXPORT_FOR_TESTS
int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 start, u64 size)
{
int ret;
ret = __add_block_group_free_space(trans, block_group, path);
if (ret)
return ret;
ret = using_bitmaps(block_group, path);
if (ret < 0)
return ret;
if (ret)
return modify_free_space_bitmap(trans, block_group, path,
start, size, false);
return add_free_space_extent(trans, block_group, path, start, size);
}
int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
u64 start, u64 size)
{
struct btrfs_block_group *block_group;
BTRFS_PATH_AUTO_FREE(path);
int ret;
if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
return 0;
path = btrfs_alloc_path();
if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
return ret;
}
block_group = btrfs_lookup_block_group(trans->fs_info, start);
if (unlikely(!block_group)) {
DEBUG_WARN("no block group found for start=%llu", start);
ret = -ENOENT;
btrfs_abort_transaction(trans, ret);
return ret;
}
mutex_lock(&block_group->free_space_lock);
ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size);
mutex_unlock(&block_group->free_space_lock);
if (ret)
btrfs_abort_transaction(trans, ret);
btrfs_put_block_group(block_group);
return ret;
}
static int populate_free_space_tree(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group)
{
struct btrfs_root *extent_root;
BTRFS_PATH_AUTO_FREE(path);
BTRFS_PATH_AUTO_FREE(path2);
struct btrfs_key key;
u64 start, end;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path2 = btrfs_alloc_path();
if (!path2)
return -ENOMEM;
path->reada = READA_FORWARD;
ret = add_new_free_space_info(trans, block_group, path2);
if (ret)
return ret;
extent_root = btrfs_extent_root(trans->fs_info, block_group->start);
if (unlikely(!extent_root)) {
btrfs_err(trans->fs_info,
"missing extent root for block group at offset %llu",
block_group->start);
return -EUCLEAN;
}
mutex_lock(&block_group->free_space_lock);
key.objectid = block_group->start;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
if (ret < 0)
goto out_locked;
start = block_group->start;
end = btrfs_block_group_end(block_group);
while (ret == 0) {
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.type == BTRFS_EXTENT_ITEM_KEY ||
key.type == BTRFS_METADATA_ITEM_KEY) {
if (key.objectid >= end)
break;
if (start < key.objectid) {
ret = __btrfs_add_to_free_space_tree(trans,
block_group,
path2, start,
key.objectid -
start);
if (ret)
goto out_locked;
}
start = key.objectid;
if (key.type == BTRFS_METADATA_ITEM_KEY)
start += trans->fs_info->nodesize;
else
start += key.offset;
} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
if (key.objectid != block_group->start)
break;
}
ret = btrfs_next_item(extent_root, path);
if (ret < 0)
goto out_locked;
}
if (start < end) {
ret = __btrfs_add_to_free_space_tree(trans, block_group, path2,
start, end - start);
if (ret)
goto out_locked;
}
ret = 0;
out_locked:
mutex_unlock(&block_group->free_space_lock);
return ret;
}
int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_root *free_space_root;
struct btrfs_block_group *block_group;
struct rb_node *node;
int ret;
trans = btrfs_start_transaction(tree_root, 0);
if (IS_ERR(trans))
return PTR_ERR(trans);
set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
free_space_root = btrfs_create_tree(trans,
BTRFS_FREE_SPACE_TREE_OBJECTID);
if (IS_ERR(free_space_root)) {
ret = PTR_ERR(free_space_root);
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out_clear;
}
ret = btrfs_global_root_insert(free_space_root);
if (unlikely(ret)) {
btrfs_put_root(free_space_root);
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out_clear;
}
node = rb_first_cached(&fs_info->block_group_cache_tree);
while (node) {
block_group = rb_entry(node, struct btrfs_block_group,
cache_node);
ret = populate_free_space_tree(trans, block_group);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out_clear;
}
node = rb_next(node);
}
btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
ret = btrfs_commit_transaction(trans);
clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
return ret;
out_clear:
clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
return ret;
}
static int clear_free_space_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct rb_node *node;
int nr;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = 0;
key.type = 0;
key.offset = 0;
while (1) {
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
return ret;
nr = btrfs_header_nritems(path->nodes[0]);
if (!nr)
break;
path->slots[0] = 0;
ret = btrfs_del_items(trans, root, path, 0, nr);
if (ret)
return ret;
btrfs_release_path(path);
}
node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
while (node) {
struct btrfs_block_group *bg;
bg = rb_entry(node, struct btrfs_block_group, cache_node);
clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
node = rb_next(node);
cond_resched();
}
return 0;
}
int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_key key = {
.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
.type = BTRFS_ROOT_ITEM_KEY,
.offset = 0,
};
struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
int ret;
trans = btrfs_start_transaction(tree_root, 0);
if (IS_ERR(trans))
return PTR_ERR(trans);
btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
ret = clear_free_space_tree(trans, free_space_root);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
ret = btrfs_del_root(trans, &free_space_root->root_key);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
btrfs_global_root_delete(free_space_root);
spin_lock(&fs_info->trans_lock);
list_del(&free_space_root->dirty_list);
spin_unlock(&fs_info->trans_lock);
btrfs_tree_lock(free_space_root->node);
btrfs_clear_buffer_dirty(trans, free_space_root->node);
btrfs_tree_unlock(free_space_root->node);
ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
free_space_root->node, 0, 1);
btrfs_put_root(free_space_root);
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
return btrfs_commit_transaction(trans);
}
int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key = {
.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
.type = BTRFS_ROOT_ITEM_KEY,
.offset = 0,
};
struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
struct rb_node *node;
int ret;
trans = btrfs_start_transaction(free_space_root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
ret = clear_free_space_tree(trans, free_space_root);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
node = rb_first_cached(&fs_info->block_group_cache_tree);
while (node) {
struct btrfs_block_group *block_group;
block_group = rb_entry(node, struct btrfs_block_group,
cache_node);
if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
&block_group->runtime_flags))
goto next;
ret = populate_free_space_tree(trans, block_group);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
next:
if (btrfs_should_end_transaction(trans)) {
btrfs_end_transaction(trans);
trans = btrfs_start_transaction(free_space_root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
}
node = rb_next(node);
}
btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
ret = btrfs_commit_transaction(trans);
clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
return ret;
}
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
bool own_path = false;
int ret;
if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
&block_group->runtime_flags))
return 0;
set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
if (!path) {
path = btrfs_alloc_path();
if (unlikely(!path)) {
btrfs_abort_transaction(trans, -ENOMEM);
return -ENOMEM;
}
own_path = true;
}
ret = add_new_free_space_info(trans, block_group, path);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = __btrfs_add_to_free_space_tree(trans, block_group, path,
block_group->start, block_group->length);
if (ret)
btrfs_abort_transaction(trans, ret);
out:
if (own_path)
btrfs_free_path(path);
return ret;
}
int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group)
{
int ret;
if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
return 0;
mutex_lock(&block_group->free_space_lock);
ret = __add_block_group_free_space(trans, block_group, NULL);
mutex_unlock(&block_group->free_space_lock);
return ret;
}
int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group)
{
struct btrfs_root *root = btrfs_free_space_root(block_group);
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key, found_key;
struct extent_buffer *leaf;
u64 start, end;
int done = 0, nr;
int ret;
if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
return 0;
if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
return 0;
}
path = btrfs_alloc_path();
if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
return ret;
}
start = block_group->start;
end = btrfs_block_group_end(block_group);
key.objectid = end - 1;
key.type = (u8)-1;
key.offset = (u64)-1;
while (!done) {
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
leaf = path->nodes[0];
nr = 0;
path->slots[0]++;
while (path->slots[0] > 0) {
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
ASSERT(found_key.objectid == block_group->start);
ASSERT(found_key.offset == block_group->length);
done = 1;
nr++;
path->slots[0]--;
break;
} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
ASSERT(found_key.objectid >= start);
ASSERT(found_key.objectid < end);
ASSERT(found_key.objectid + found_key.offset <= end);
nr++;
path->slots[0]--;
} else {
ASSERT(0);
}
}
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
btrfs_release_path(path);
}
return 0;
}
static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
struct btrfs_path *path,
u32 expected_extent_count)
{
struct btrfs_block_group *block_group = caching_ctl->block_group;
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_root *root;
struct btrfs_key key;
bool prev_bit_set = false;
u64 extent_start = 0;
const u64 end = btrfs_block_group_end(block_group);
u64 offset;
u64 total_found = 0;
u32 extent_count = 0;
int ret;
root = btrfs_free_space_root(block_group);
while (1) {
ret = btrfs_next_item(root, path);
if (ret < 0)
return ret;
if (ret)
break;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
break;
ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
ASSERT(key.objectid < end && key.objectid + key.offset <= end);
offset = key.objectid;
while (offset < key.objectid + key.offset) {
bool bit_set;
bit_set = btrfs_free_space_test_bit(block_group, path, offset);
if (!prev_bit_set && bit_set) {
extent_start = offset;
} else if (prev_bit_set && !bit_set) {
u64 space_added;
ret = btrfs_add_new_free_space(block_group,
extent_start,
offset,
&space_added);
if (ret)
return ret;
total_found += space_added;
if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0;
wake_up(&caching_ctl->wait);
}
extent_count++;
}
prev_bit_set = bit_set;
offset += fs_info->sectorsize;
}
}
if (prev_bit_set) {
ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
if (ret)
return ret;
extent_count++;
}
if (unlikely(extent_count != expected_extent_count)) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
expected_extent_count);
DEBUG_WARN();
return -EIO;
}
return 0;
}
static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
struct btrfs_path *path,
u32 expected_extent_count)
{
struct btrfs_block_group *block_group = caching_ctl->block_group;
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_root *root;
struct btrfs_key key;
const u64 end = btrfs_block_group_end(block_group);
u64 total_found = 0;
u32 extent_count = 0;
int ret;
root = btrfs_free_space_root(block_group);
while (1) {
u64 space_added;
ret = btrfs_next_item(root, path);
if (ret < 0)
return ret;
if (ret)
break;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
break;
ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
ASSERT(key.objectid < end && key.objectid + key.offset <= end);
ret = btrfs_add_new_free_space(block_group, key.objectid,
key.objectid + key.offset,
&space_added);
if (ret)
return ret;
total_found += space_added;
if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0;
wake_up(&caching_ctl->wait);
}
extent_count++;
}
if (unlikely(extent_count != expected_extent_count)) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
expected_extent_count);
DEBUG_WARN();
return -EIO;
}
return 0;
}
int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl)
{
struct btrfs_block_group *block_group;
struct btrfs_free_space_info *info;
BTRFS_PATH_AUTO_FREE(path);
u32 extent_count, flags;
block_group = caching_ctl->block_group;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->skip_locking = true;
path->search_commit_root = true;
path->reada = READA_FORWARD;
info = btrfs_search_free_space_info(NULL, block_group, path, 0);
if (IS_ERR(info))
return PTR_ERR(info);
extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
flags = btrfs_free_space_flags(path->nodes[0], info);
if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
return load_free_space_bitmaps(caching_ctl, path, extent_count);
else
return load_free_space_extents(caching_ctl, path, extent_count);
}
static int delete_orphan_free_space_entries(struct btrfs_root *fst_root,
struct btrfs_path *path,
u64 first_bg_bytenr)
{
struct btrfs_trans_handle *trans;
int ret;
trans = btrfs_start_transaction(fst_root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
while (true) {
struct btrfs_key key = { 0 };
int i;
ret = btrfs_search_slot(trans, fst_root, &key, path, -1, 1);
if (ret < 0)
break;
ASSERT(ret > 0);
ret = 0;
for (i = 0; i < btrfs_header_nritems(path->nodes[0]); i++) {
btrfs_item_key_to_cpu(path->nodes[0], &key, i);
if (key.objectid >= first_bg_bytenr) {
break;
}
}
if (i == 0)
break;
ret = btrfs_del_items(trans, fst_root, path, 0, i);
if (ret < 0)
break;
btrfs_release_path(path);
}
btrfs_release_path(path);
btrfs_end_transaction(trans);
if (ret == 0)
btrfs_info(fst_root->fs_info, "deleted orphan free space tree entries");
return ret;
}
int btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info *fs_info)
{
BTRFS_PATH_AUTO_RELEASE(path);
struct btrfs_key key = {
.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
.type = BTRFS_ROOT_ITEM_KEY,
.offset = 0,
};
struct btrfs_root *root;
struct btrfs_block_group *bg;
u64 first_bg_bytenr;
int ret;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return 0;
if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
return 0;
root = btrfs_global_root(fs_info, &key);
if (!root)
return 0;
key.objectid = 0;
key.type = 0;
key.offset = 0;
bg = btrfs_lookup_first_block_group(fs_info, 0);
if (unlikely(!bg)) {
btrfs_err(fs_info, "no block group found");
return -EUCLEAN;
}
first_bg_bytenr = bg->start;
btrfs_put_block_group(bg);
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0)
return ret;
ASSERT(ret > 0);
if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
return 0;
btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
if (key.objectid >= first_bg_bytenr)
return 0;
btrfs_release_path(&path);
return delete_orphan_free_space_entries(root, &path, first_bg_bytenr);
}