aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/block-group.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/block-group.c')
-rw-r--r--fs/btrfs/block-group.c259
1 files changed, 139 insertions, 120 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 82324c327a50..6e5dc68ff661 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -504,13 +504,20 @@ static void fragment_free_space(struct btrfs_block_group *block_group)
#endif
/*
- * This is only called by btrfs_cache_block_group, since we could have freed
- * extents we need to check the pinned_extents for any extents that can't be
- * used yet since their free space will be released as soon as the transaction
- * commits.
+ * Add a free space range to the in memory free space cache of a block group.
+ * This checks if the range contains super block locations and any such
+ * locations are not added to the free space cache.
+ *
+ * @block_group: The target block group.
+ * @start: Start offset of the range.
+ * @end: End offset of the range (exclusive).
+ * @total_added_ret: Optional pointer to return the total amount of space
+ * added to the block group's free space cache.
+ *
+ * Returns 0 on success or < 0 on error.
*/
-int add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end,
- u64 *total_added_ret)
+int btrfs_add_new_free_space(struct btrfs_block_group *block_group, u64 start,
+ u64 end, u64 *total_added_ret)
{
struct btrfs_fs_info *info = block_group->fs_info;
u64 extent_start, extent_end, size;
@@ -520,11 +527,10 @@ int add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
*total_added_ret = 0;
while (start < end) {
- ret = find_first_extent_bit(&info->excluded_extents, start,
- &extent_start, &extent_end,
- EXTENT_DIRTY | EXTENT_UPTODATE,
- NULL);
- if (ret)
+ if (!find_first_extent_bit(&info->excluded_extents, start,
+ &extent_start, &extent_end,
+ EXTENT_DIRTY | EXTENT_UPTODATE,
+ NULL))
break;
if (extent_start <= start) {
@@ -799,8 +805,8 @@ next:
key.type == BTRFS_METADATA_ITEM_KEY) {
u64 space_added;
- ret = add_new_free_space(block_group, last, key.objectid,
- &space_added);
+ ret = btrfs_add_new_free_space(block_group, last,
+ key.objectid, &space_added);
if (ret)
goto out;
total_found += space_added;
@@ -821,14 +827,20 @@ next:
path->slots[0]++;
}
- ret = add_new_free_space(block_group, last,
- block_group->start + block_group->length,
- NULL);
+ ret = btrfs_add_new_free_space(block_group, last,
+ block_group->start + block_group->length,
+ NULL);
out:
btrfs_free_path(path);
return ret;
}
+static inline void btrfs_free_excluded_extents(const struct btrfs_block_group *bg)
+{
+ clear_extent_bits(&bg->fs_info->excluded_extents, bg->start,
+ bg->start + bg->length - 1, EXTENT_UPTODATE);
+}
+
static noinline void caching_thread(struct btrfs_work *work)
{
struct btrfs_block_group *block_group;
@@ -923,7 +935,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
caching_ctl->block_group = cache;
refcount_set(&caching_ctl->count, 2);
atomic_set(&caching_ctl->progress, 0);
- btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
+ btrfs_init_work(&caching_ctl->work, caching_thread, NULL);
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
@@ -1274,7 +1286,7 @@ out:
/* Once for the lookup reference */
btrfs_put_block_group(block_group);
if (remove_rsv)
- btrfs_delayed_refs_rsv_release(fs_info, 1);
+ btrfs_dec_delayed_refs_rsv_bg_updates(fs_info);
btrfs_free_path(path);
return ret;
}
@@ -2098,8 +2110,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
if (cache->start < BTRFS_SUPER_INFO_OFFSET) {
stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start;
cache->bytes_super += stripe_len;
- ret = btrfs_add_excluded_extent(fs_info, cache->start,
- stripe_len);
+ ret = set_extent_bit(&fs_info->excluded_extents, cache->start,
+ cache->start + stripe_len - 1,
+ EXTENT_UPTODATE, NULL);
if (ret)
return ret;
}
@@ -2125,8 +2138,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
cache->start + cache->length - logical[nr]);
cache->bytes_super += len;
- ret = btrfs_add_excluded_extent(fs_info, logical[nr],
- len);
+ ret = set_extent_bit(&fs_info->excluded_extents, logical[nr],
+ logical[nr] + len - 1,
+ EXTENT_UPTODATE, NULL);
if (ret) {
kfree(logical);
return ret;
@@ -2319,8 +2333,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
btrfs_free_excluded_extents(cache);
} else if (cache->used == 0) {
cache->cached = BTRFS_CACHE_FINISHED;
- ret = add_new_free_space(cache, cache->start,
- cache->start + cache->length, NULL);
+ ret = btrfs_add_new_free_space(cache, cache->start,
+ cache->start + cache->length, NULL);
btrfs_free_excluded_extents(cache);
if (ret)
goto error;
@@ -2587,7 +2601,7 @@ static int insert_dev_extent(struct btrfs_trans_handle *trans,
btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
btrfs_set_dev_extent_length(leaf, extent, num_bytes);
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
out:
btrfs_free_path(path);
return ret;
@@ -2695,7 +2709,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
/* Already aborted the transaction if it failed. */
next:
- btrfs_delayed_refs_rsv_release(fs_info, 1);
+ btrfs_dec_delayed_refs_rsv_bg_inserts(fs_info);
list_del_init(&block_group->bg_list);
clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags);
}
@@ -2767,7 +2781,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
return ERR_PTR(ret);
}
- ret = add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL);
+ ret = btrfs_add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL);
btrfs_free_excluded_extents(cache);
if (ret) {
btrfs_put_block_group(cache);
@@ -2805,8 +2819,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
#endif
list_add_tail(&cache->bg_list, &trans->new_bgs);
- trans->delayed_ref_updates++;
- btrfs_update_delayed_refs_rsv(trans);
+ btrfs_inc_delayed_refs_rsv_bg_inserts(fs_info);
set_avail_alloc_bits(fs_info, type);
return cache;
@@ -3011,11 +3024,19 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
cache->global_root_id);
btrfs_set_stack_block_group_flags(&bgi, cache->flags);
write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
- btrfs_mark_buffer_dirty(leaf);
+ btrfs_mark_buffer_dirty(trans, leaf);
fail:
btrfs_release_path(path);
- /* We didn't update the block group item, need to revert @commit_used. */
- if (ret < 0) {
+ /*
+ * We didn't update the block group item, need to revert commit_used
+ * unless the block group item didn't exist yet - this is to prevent a
+ * race with a concurrent insertion of the block group item, with
+ * insert_block_group_item(), that happened just after we attempted to
+ * update. In that case we would reset commit_used to 0 just after the
+ * insertion set it to a value greater than 0 - if the block group later
+ * becomes with 0 used bytes, we would incorrectly skip its update.
+ */
+ if (ret < 0 && ret != -ENOENT) {
spin_lock(&cache->lock);
cache->commit_used = old_commit_used;
spin_unlock(&cache->lock);
@@ -3029,7 +3050,6 @@ static int cache_save_setup(struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
- struct btrfs_root *root = fs_info->tree_root;
struct inode *inode = NULL;
struct extent_changeset *data_reserved = NULL;
u64 alloc_hint = 0;
@@ -3081,7 +3101,7 @@ again:
* time.
*/
BTRFS_I(inode)->generation = 0;
- ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
+ ret = btrfs_update_inode(trans, BTRFS_I(inode));
if (ret) {
/*
* So theoretically we could recover from this, simply set the
@@ -3348,7 +3368,7 @@ again:
if (should_put)
btrfs_put_block_group(cache);
if (drop_reserve)
- btrfs_delayed_refs_rsv_release(fs_info, 1);
+ btrfs_dec_delayed_refs_rsv_bg_updates(fs_info);
/*
* Avoid blocking other tasks for too long. It might even save
* us from writing caches for block groups that are going to be
@@ -3452,8 +3472,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
cache_save_setup(cache, trans, path);
if (!ret)
- ret = btrfs_run_delayed_refs(trans,
- (unsigned long) -1);
+ ret = btrfs_run_delayed_refs(trans, U64_MAX);
if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
cache->io_ctl.inode = NULL;
@@ -3496,7 +3515,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
/* If its not on the io list, we need to put the block group */
if (should_put)
btrfs_put_block_group(cache);
- btrfs_delayed_refs_rsv_release(fs_info, 1);
+ btrfs_dec_delayed_refs_rsv_bg_updates(fs_info);
spin_lock(&cur_trans->dirty_bgs_lock);
}
spin_unlock(&cur_trans->dirty_bgs_lock);
@@ -3521,12 +3540,12 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, bool alloc)
{
struct btrfs_fs_info *info = trans->fs_info;
- struct btrfs_block_group *cache = NULL;
- u64 total = num_bytes;
+ struct btrfs_space_info *space_info;
+ struct btrfs_block_group *cache;
u64 old_val;
- u64 byte_in_group;
+ bool reclaim = false;
+ bool bg_already_dirty = true;
int factor;
- int ret = 0;
/* Block accounting for super block */
spin_lock(&info->delalloc_root_lock);
@@ -3538,97 +3557,86 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
btrfs_set_super_bytes_used(info->super_copy, old_val);
spin_unlock(&info->delalloc_root_lock);
- while (total) {
- struct btrfs_space_info *space_info;
- bool reclaim = false;
-
- cache = btrfs_lookup_block_group(info, bytenr);
- if (!cache) {
- ret = -ENOENT;
- break;
- }
- space_info = cache->space_info;
- factor = btrfs_bg_type_to_factor(cache->flags);
+ cache = btrfs_lookup_block_group(info, bytenr);
+ if (!cache)
+ return -ENOENT;
- /*
- * If this block group has free space cache written out, we
- * need to make sure to load it if we are removing space. This
- * is because we need the unpinning stage to actually add the
- * space back to the block group, otherwise we will leak space.
- */
- if (!alloc && !btrfs_block_group_done(cache))
- btrfs_cache_block_group(cache, true);
+ /* An extent can not span multiple block groups. */
+ ASSERT(bytenr + num_bytes <= cache->start + cache->length);
- byte_in_group = bytenr - cache->start;
- WARN_ON(byte_in_group > cache->length);
+ space_info = cache->space_info;
+ factor = btrfs_bg_type_to_factor(cache->flags);
- spin_lock(&space_info->lock);
- spin_lock(&cache->lock);
+ /*
+ * If this block group has free space cache written out, we need to make
+ * sure to load it if we are removing space. This is because we need
+ * the unpinning stage to actually add the space back to the block group,
+ * otherwise we will leak space.
+ */
+ if (!alloc && !btrfs_block_group_done(cache))
+ btrfs_cache_block_group(cache, true);
- if (btrfs_test_opt(info, SPACE_CACHE) &&
- cache->disk_cache_state < BTRFS_DC_CLEAR)
- cache->disk_cache_state = BTRFS_DC_CLEAR;
+ spin_lock(&space_info->lock);
+ spin_lock(&cache->lock);
- old_val = cache->used;
- num_bytes = min(total, cache->length - byte_in_group);
- if (alloc) {
- old_val += num_bytes;
- cache->used = old_val;
- cache->reserved -= num_bytes;
- space_info->bytes_reserved -= num_bytes;
- space_info->bytes_used += num_bytes;
- space_info->disk_used += num_bytes * factor;
- spin_unlock(&cache->lock);
- spin_unlock(&space_info->lock);
- } else {
- old_val -= num_bytes;
- cache->used = old_val;
- cache->pinned += num_bytes;
- btrfs_space_info_update_bytes_pinned(info, space_info,
- num_bytes);
- space_info->bytes_used -= num_bytes;
- space_info->disk_used -= num_bytes * factor;
+ if (btrfs_test_opt(info, SPACE_CACHE) &&
+ cache->disk_cache_state < BTRFS_DC_CLEAR)
+ cache->disk_cache_state = BTRFS_DC_CLEAR;
- reclaim = should_reclaim_block_group(cache, num_bytes);
+ old_val = cache->used;
+ if (alloc) {
+ old_val += num_bytes;
+ cache->used = old_val;
+ cache->reserved -= num_bytes;
+ space_info->bytes_reserved -= num_bytes;
+ space_info->bytes_used += num_bytes;
+ space_info->disk_used += num_bytes * factor;
+ spin_unlock(&cache->lock);
+ spin_unlock(&space_info->lock);
+ } else {
+ old_val -= num_bytes;
+ cache->used = old_val;
+ cache->pinned += num_bytes;
+ btrfs_space_info_update_bytes_pinned(info, space_info, num_bytes);
+ space_info->bytes_used -= num_bytes;
+ space_info->disk_used -= num_bytes * factor;
- spin_unlock(&cache->lock);
- spin_unlock(&space_info->lock);
+ reclaim = should_reclaim_block_group(cache, num_bytes);
- set_extent_bit(&trans->transaction->pinned_extents,
- bytenr, bytenr + num_bytes - 1,
- EXTENT_DIRTY, NULL);
- }
+ spin_unlock(&cache->lock);
+ spin_unlock(&space_info->lock);
- spin_lock(&trans->transaction->dirty_bgs_lock);
- if (list_empty(&cache->dirty_list)) {
- list_add_tail(&cache->dirty_list,
- &trans->transaction->dirty_bgs);
- trans->delayed_ref_updates++;
- btrfs_get_block_group(cache);
- }
- spin_unlock(&trans->transaction->dirty_bgs_lock);
+ set_extent_bit(&trans->transaction->pinned_extents, bytenr,
+ bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
+ }
- /*
- * No longer have used bytes in this block group, queue it for
- * deletion. We do this after adding the block group to the
- * dirty list to avoid races between cleaner kthread and space
- * cache writeout.
- */
- if (!alloc && old_val == 0) {
- if (!btrfs_test_opt(info, DISCARD_ASYNC))
- btrfs_mark_bg_unused(cache);
- } else if (!alloc && reclaim) {
- btrfs_mark_bg_to_reclaim(cache);
- }
+ spin_lock(&trans->transaction->dirty_bgs_lock);
+ if (list_empty(&cache->dirty_list)) {
+ list_add_tail(&cache->dirty_list, &trans->transaction->dirty_bgs);
+ bg_already_dirty = false;
+ btrfs_get_block_group(cache);
+ }
+ spin_unlock(&trans->transaction->dirty_bgs_lock);
- btrfs_put_block_group(cache);
- total -= num_bytes;
- bytenr += num_bytes;
+ /*
+ * No longer have used bytes in this block group, queue it for deletion.
+ * We do this after adding the block group to the dirty list to avoid
+ * races between cleaner kthread and space cache writeout.
+ */
+ if (!alloc && old_val == 0) {
+ if (!btrfs_test_opt(info, DISCARD_ASYNC))
+ btrfs_mark_bg_unused(cache);
+ } else if (!alloc && reclaim) {
+ btrfs_mark_bg_to_reclaim(cache);
}
+ btrfs_put_block_group(cache);
+
/* Modified block groups are accounted for in the delayed_refs_rsv. */
- btrfs_update_delayed_refs_rsv(trans);
- return ret;
+ if (!bg_already_dirty)
+ btrfs_inc_delayed_refs_rsv_bg_updates(info);
+
+ return 0;
}
/*
@@ -4075,7 +4083,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
if (IS_ERR(ret_bg)) {
ret = PTR_ERR(ret_bg);
- } else if (from_extent_allocation) {
+ } else if (from_extent_allocation && (flags & BTRFS_BLOCK_GROUP_DATA)) {
/*
* New block group is likely to be used soon. Try to activate
* it now. Failure is OK for now.
@@ -4273,6 +4281,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
struct btrfs_caching_control *caching_ctl;
struct rb_node *n;
+ if (btrfs_is_zoned(info)) {
+ if (info->active_meta_bg) {
+ btrfs_put_block_group(info->active_meta_bg);
+ info->active_meta_bg = NULL;
+ }
+ if (info->active_system_bg) {
+ btrfs_put_block_group(info->active_system_bg);
+ info->active_system_bg = NULL;
+ }
+ }
+
write_lock(&info->block_group_cache_lock);
while (!list_empty(&info->caching_block_groups)) {
caching_ctl = list_entry(info->caching_block_groups.next,