aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/space-info.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/space-info.c')
-rw-r--r--fs/btrfs/space-info.c149
1 files changed, 84 insertions, 65 deletions
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 75e7fa337e66..571bb13587d5 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -345,8 +345,10 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
enum btrfs_reserve_flush_enum flush)
{
+ struct btrfs_space_info *data_sinfo;
u64 profile;
u64 avail;
+ u64 data_chunk_size;
int factor;
if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
@@ -364,6 +366,36 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
*/
factor = btrfs_bg_type_to_factor(profile);
avail = div_u64(avail, factor);
+ if (avail == 0)
+ return 0;
+
+ /*
+ * Calculate the data_chunk_size, space_info->chunk_size is the
+ * "optimal" chunk size based on the fs size. However when we actually
+ * allocate the chunk we will strip this down further, making it no more
+ * than 10% of the disk or 1G, whichever is smaller.
+ */
+ data_sinfo = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
+ data_chunk_size = min(data_sinfo->chunk_size,
+ mult_perc(fs_info->fs_devices->total_rw_bytes, 10));
+ data_chunk_size = min_t(u64, data_chunk_size, SZ_1G);
+
+ /*
+ * Since data allocations immediately use block groups as part of the
+ * reservation, because we assume that data reservations will == actual
+ * usage, we could potentially overcommit and then immediately have that
+ * available space used by a data allocation, which could put us in a
+ * bind when we get close to filling the file system.
+ *
+ * To handle this simply remove the data_chunk_size from the available
+ * space. If we are relatively empty this won't affect our ability to
+ * overcommit much, and if we're very close to full it'll keep us from
+ * getting into a position where we've given ourselves very little
+ * metadata wiggle room.
+ */
+ if (avail <= data_chunk_size)
+ return 0;
+ avail -= data_chunk_size;
/*
* If we aren't flushing all things, let us overcommit up to
@@ -389,11 +421,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
return 0;
used = btrfs_space_info_used(space_info, true);
- if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) &&
- (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
- avail = 0;
- else
- avail = calc_available_free_space(fs_info, space_info, flush);
+ avail = calc_available_free_space(fs_info, space_info, flush);
if (used + bytes < space_info->total_bytes + avail)
return 1;
@@ -510,6 +538,7 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
int dump_block_groups)
{
struct btrfs_block_group *cache;
+ u64 total_avail = 0;
int index = 0;
spin_lock(&info->lock);
@@ -523,18 +552,27 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
down_read(&info->groups_sem);
again:
list_for_each_entry(cache, &info->block_groups[index], list) {
+ u64 avail;
+
spin_lock(&cache->lock);
+ avail = cache->length - cache->used - cache->pinned -
+ cache->reserved - cache->delalloc_bytes -
+ cache->bytes_super - cache->zone_unusable;
btrfs_info(fs_info,
- "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu zone_unusable %s",
- cache->start, cache->length, cache->used, cache->pinned,
- cache->reserved, cache->zone_unusable,
- cache->ro ? "[readonly]" : "");
+"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s",
+ cache->start, cache->length, cache->used, cache->pinned,
+ cache->reserved, cache->delalloc_bytes,
+ cache->bytes_super, cache->zone_unusable,
+ avail, cache->ro ? "[readonly]" : "");
spin_unlock(&cache->lock);
btrfs_dump_free_space(cache, bytes);
+ total_avail += avail;
}
if (++index < BTRFS_NR_RAID_TYPES)
goto again;
up_read(&info->groups_sem);
+
+ btrfs_info(fs_info, "%llu bytes available across all block groups", total_avail);
}
static inline u64 calc_reclaim_items_nr(const struct btrfs_fs_info *fs_info,
@@ -550,18 +588,6 @@ static inline u64 calc_reclaim_items_nr(const struct btrfs_fs_info *fs_info,
return nr;
}
-static inline u64 calc_delayed_refs_nr(const struct btrfs_fs_info *fs_info,
- u64 to_reclaim)
-{
- const u64 bytes = btrfs_calc_delayed_ref_bytes(fs_info, 1);
- u64 nr;
-
- nr = div64_u64(to_reclaim, bytes);
- if (!nr)
- nr = 1;
- return nr;
-}
-
#define EXTENT_SIZE_PER_ITEM SZ_256K
/*
@@ -715,9 +741,11 @@ static void flush_space(struct btrfs_fs_info *fs_info,
else
nr = -1;
- trans = btrfs_join_transaction(root);
+ trans = btrfs_join_transaction_nostart(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
+ if (ret == -ENOENT)
+ ret = 0;
break;
}
ret = btrfs_run_delayed_items_nr(trans, nr);
@@ -733,32 +761,21 @@ static void flush_space(struct btrfs_fs_info *fs_info,
break;
case FLUSH_DELAYED_REFS_NR:
case FLUSH_DELAYED_REFS:
- trans = btrfs_join_transaction(root);
+ trans = btrfs_join_transaction_nostart(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
+ if (ret == -ENOENT)
+ ret = 0;
break;
}
if (state == FLUSH_DELAYED_REFS_NR)
- nr = calc_delayed_refs_nr(fs_info, num_bytes);
+ btrfs_run_delayed_refs(trans, num_bytes);
else
- nr = 0;
- btrfs_run_delayed_refs(trans, nr);
+ btrfs_run_delayed_refs(trans, 0);
btrfs_end_transaction(trans);
break;
case ALLOC_CHUNK:
case ALLOC_CHUNK_FORCE:
- /*
- * For metadata space on zoned filesystem, reaching here means we
- * don't have enough space left in active_total_bytes. Try to
- * activate a block group first, because we may have inactive
- * block group already allocated.
- */
- ret = btrfs_zoned_activate_one_bg(fs_info, space_info, false);
- if (ret < 0)
- break;
- else if (ret == 1)
- break;
-
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -770,22 +787,6 @@ static void flush_space(struct btrfs_fs_info *fs_info,
CHUNK_ALLOC_FORCE);
btrfs_end_transaction(trans);
- /*
- * For metadata space on zoned filesystem, allocating a new chunk
- * is not enough. We still need to activate the block * group.
- * Active the newly allocated block group by (maybe) finishing
- * a block group.
- */
- if (ret == 1) {
- ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true);
- /*
- * Revert to the original ret regardless we could finish
- * one block group or not.
- */
- if (ret >= 0)
- ret = 1;
- }
-
if (ret > 0 || ret == -ENOSPC)
ret = 0;
break;
@@ -800,9 +801,18 @@ static void flush_space(struct btrfs_fs_info *fs_info,
break;
case COMMIT_TRANS:
ASSERT(current->journal_info == NULL);
- trans = btrfs_join_transaction(root);
+ /*
+ * We don't want to start a new transaction, just attach to the
+ * current one or wait it fully commits in case its commit is
+ * happening at the moment. Note: we don't use a nostart join
+ * because that does not wait for a transaction to fully commit
+ * (only for it to be unblocked, state TRANS_STATE_UNBLOCKED).
+ */
+ trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
+ if (ret == -ENOENT)
+ ret = 0;
break;
}
ret = btrfs_commit_transaction(trans);
@@ -987,7 +997,8 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
}
/*
- * maybe_fail_all_tickets - we've exhausted our flushing, start failing tickets
+ * We've exhausted our flushing, start failing tickets.
+ *
* @fs_info - fs_info for this fs
* @space_info - the space info we were flushing
*
@@ -1408,8 +1419,18 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
}
}
- /* Attempt to steal from the global rsv if we can. */
- if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
+ /*
+ * Attempt to steal from the global rsv if we can, except if the fs was
+ * turned into error mode due to a transaction abort when flushing space
+ * above, in that case fail with the abort error instead of returning
+ * success to the caller if we can steal from the global rsv - this is
+ * just to have caller fail immeditelly instead of later when trying to
+ * modify the fs, making it easier to debug -ENOSPC problems.
+ */
+ if (BTRFS_FS_ERROR(fs_info)) {
+ ticket->error = BTRFS_FS_ERROR(fs_info);
+ remove_ticket(space_info, ticket);
+ } else if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
ticket->error = -ENOSPC;
remove_ticket(space_info, ticket);
}
@@ -1741,7 +1762,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
* Try to reserve metadata bytes from the block_rsv's space.
*
* @fs_info: the filesystem
- * @block_rsv: block_rsv we're allocating for
+ * @space_info: the space_info we're allocating for
* @orig_bytes: number of bytes we want
* @flush: whether or not we can flush to make our reservation
*
@@ -1753,21 +1774,19 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
* space already.
*/
int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv,
+ struct btrfs_space_info *space_info,
u64 orig_bytes,
enum btrfs_reserve_flush_enum flush)
{
int ret;
- ret = __reserve_bytes(fs_info, block_rsv->space_info, orig_bytes, flush);
+ ret = __reserve_bytes(fs_info, space_info, orig_bytes, flush);
if (ret == -ENOSPC) {
trace_btrfs_space_reservation(fs_info, "space_info:enospc",
- block_rsv->space_info->flags,
- orig_bytes, 1);
+ space_info->flags, orig_bytes, 1);
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
- btrfs_dump_space_info(fs_info, block_rsv->space_info,
- orig_bytes, 0);
+ btrfs_dump_space_info(fs_info, space_info, orig_bytes, 0);
}
return ret;
}