diff options
Diffstat (limited to 'fs/btrfs/space-info.c')
-rw-r--r-- | fs/btrfs/space-info.c | 149 |
1 files changed, 84 insertions, 65 deletions
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 75e7fa337e66..571bb13587d5 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -345,8 +345,10 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, enum btrfs_reserve_flush_enum flush) { + struct btrfs_space_info *data_sinfo; u64 profile; u64 avail; + u64 data_chunk_size; int factor; if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM) @@ -364,6 +366,36 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, */ factor = btrfs_bg_type_to_factor(profile); avail = div_u64(avail, factor); + if (avail == 0) + return 0; + + /* + * Calculate the data_chunk_size, space_info->chunk_size is the + * "optimal" chunk size based on the fs size. However when we actually + * allocate the chunk we will strip this down further, making it no more + * than 10% of the disk or 1G, whichever is smaller. + */ + data_sinfo = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); + data_chunk_size = min(data_sinfo->chunk_size, + mult_perc(fs_info->fs_devices->total_rw_bytes, 10)); + data_chunk_size = min_t(u64, data_chunk_size, SZ_1G); + + /* + * Since data allocations immediately use block groups as part of the + * reservation, because we assume that data reservations will == actual + * usage, we could potentially overcommit and then immediately have that + * available space used by a data allocation, which could put us in a + * bind when we get close to filling the file system. + * + * To handle this simply remove the data_chunk_size from the available + * space. If we are relatively empty this won't affect our ability to + * overcommit much, and if we're very close to full it'll keep us from + * getting into a position where we've given ourselves very little + * metadata wiggle room. + */ + if (avail <= data_chunk_size) + return 0; + avail -= data_chunk_size; /* * If we aren't flushing all things, let us overcommit up to @@ -389,11 +421,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, return 0; used = btrfs_space_info_used(space_info, true); - if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) && - (space_info->flags & BTRFS_BLOCK_GROUP_METADATA)) - avail = 0; - else - avail = calc_available_free_space(fs_info, space_info, flush); + avail = calc_available_free_space(fs_info, space_info, flush); if (used + bytes < space_info->total_bytes + avail) return 1; @@ -510,6 +538,7 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info, int dump_block_groups) { struct btrfs_block_group *cache; + u64 total_avail = 0; int index = 0; spin_lock(&info->lock); @@ -523,18 +552,27 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info, down_read(&info->groups_sem); again: list_for_each_entry(cache, &info->block_groups[index], list) { + u64 avail; + spin_lock(&cache->lock); + avail = cache->length - cache->used - cache->pinned - + cache->reserved - cache->delalloc_bytes - + cache->bytes_super - cache->zone_unusable; btrfs_info(fs_info, - "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu zone_unusable %s", - cache->start, cache->length, cache->used, cache->pinned, - cache->reserved, cache->zone_unusable, - cache->ro ? "[readonly]" : ""); +"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s", + cache->start, cache->length, cache->used, cache->pinned, + cache->reserved, cache->delalloc_bytes, + cache->bytes_super, cache->zone_unusable, + avail, cache->ro ? "[readonly]" : ""); spin_unlock(&cache->lock); btrfs_dump_free_space(cache, bytes); + total_avail += avail; } if (++index < BTRFS_NR_RAID_TYPES) goto again; up_read(&info->groups_sem); + + btrfs_info(fs_info, "%llu bytes available across all block groups", total_avail); } static inline u64 calc_reclaim_items_nr(const struct btrfs_fs_info *fs_info, @@ -550,18 +588,6 @@ static inline u64 calc_reclaim_items_nr(const struct btrfs_fs_info *fs_info, return nr; } -static inline u64 calc_delayed_refs_nr(const struct btrfs_fs_info *fs_info, - u64 to_reclaim) -{ - const u64 bytes = btrfs_calc_delayed_ref_bytes(fs_info, 1); - u64 nr; - - nr = div64_u64(to_reclaim, bytes); - if (!nr) - nr = 1; - return nr; -} - #define EXTENT_SIZE_PER_ITEM SZ_256K /* @@ -715,9 +741,11 @@ static void flush_space(struct btrfs_fs_info *fs_info, else nr = -1; - trans = btrfs_join_transaction(root); + trans = btrfs_join_transaction_nostart(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); + if (ret == -ENOENT) + ret = 0; break; } ret = btrfs_run_delayed_items_nr(trans, nr); @@ -733,32 +761,21 @@ static void flush_space(struct btrfs_fs_info *fs_info, break; case FLUSH_DELAYED_REFS_NR: case FLUSH_DELAYED_REFS: - trans = btrfs_join_transaction(root); + trans = btrfs_join_transaction_nostart(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); + if (ret == -ENOENT) + ret = 0; break; } if (state == FLUSH_DELAYED_REFS_NR) - nr = calc_delayed_refs_nr(fs_info, num_bytes); + btrfs_run_delayed_refs(trans, num_bytes); else - nr = 0; - btrfs_run_delayed_refs(trans, nr); + btrfs_run_delayed_refs(trans, 0); btrfs_end_transaction(trans); break; case ALLOC_CHUNK: case ALLOC_CHUNK_FORCE: - /* - * For metadata space on zoned filesystem, reaching here means we - * don't have enough space left in active_total_bytes. Try to - * activate a block group first, because we may have inactive - * block group already allocated. - */ - ret = btrfs_zoned_activate_one_bg(fs_info, space_info, false); - if (ret < 0) - break; - else if (ret == 1) - break; - trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); @@ -770,22 +787,6 @@ static void flush_space(struct btrfs_fs_info *fs_info, CHUNK_ALLOC_FORCE); btrfs_end_transaction(trans); - /* - * For metadata space on zoned filesystem, allocating a new chunk - * is not enough. We still need to activate the block * group. - * Active the newly allocated block group by (maybe) finishing - * a block group. - */ - if (ret == 1) { - ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true); - /* - * Revert to the original ret regardless we could finish - * one block group or not. - */ - if (ret >= 0) - ret = 1; - } - if (ret > 0 || ret == -ENOSPC) ret = 0; break; @@ -800,9 +801,18 @@ static void flush_space(struct btrfs_fs_info *fs_info, break; case COMMIT_TRANS: ASSERT(current->journal_info == NULL); - trans = btrfs_join_transaction(root); + /* + * We don't want to start a new transaction, just attach to the + * current one or wait it fully commits in case its commit is + * happening at the moment. Note: we don't use a nostart join + * because that does not wait for a transaction to fully commit + * (only for it to be unblocked, state TRANS_STATE_UNBLOCKED). + */ + trans = btrfs_attach_transaction_barrier(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); + if (ret == -ENOENT) + ret = 0; break; } ret = btrfs_commit_transaction(trans); @@ -987,7 +997,8 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info, } /* - * maybe_fail_all_tickets - we've exhausted our flushing, start failing tickets + * We've exhausted our flushing, start failing tickets. + * * @fs_info - fs_info for this fs * @space_info - the space info we were flushing * @@ -1408,8 +1419,18 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, } } - /* Attempt to steal from the global rsv if we can. */ - if (!steal_from_global_rsv(fs_info, space_info, ticket)) { + /* + * Attempt to steal from the global rsv if we can, except if the fs was + * turned into error mode due to a transaction abort when flushing space + * above, in that case fail with the abort error instead of returning + * success to the caller if we can steal from the global rsv - this is + * just to have caller fail immeditelly instead of later when trying to + * modify the fs, making it easier to debug -ENOSPC problems. + */ + if (BTRFS_FS_ERROR(fs_info)) { + ticket->error = BTRFS_FS_ERROR(fs_info); + remove_ticket(space_info, ticket); + } else if (!steal_from_global_rsv(fs_info, space_info, ticket)) { ticket->error = -ENOSPC; remove_ticket(space_info, ticket); } @@ -1741,7 +1762,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, * Try to reserve metadata bytes from the block_rsv's space. * * @fs_info: the filesystem - * @block_rsv: block_rsv we're allocating for + * @space_info: the space_info we're allocating for * @orig_bytes: number of bytes we want * @flush: whether or not we can flush to make our reservation * @@ -1753,21 +1774,19 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, * space already. */ int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *block_rsv, + struct btrfs_space_info *space_info, u64 orig_bytes, enum btrfs_reserve_flush_enum flush) { int ret; - ret = __reserve_bytes(fs_info, block_rsv->space_info, orig_bytes, flush); + ret = __reserve_bytes(fs_info, space_info, orig_bytes, flush); if (ret == -ENOSPC) { trace_btrfs_space_reservation(fs_info, "space_info:enospc", - block_rsv->space_info->flags, - orig_bytes, 1); + space_info->flags, orig_bytes, 1); if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) - btrfs_dump_space_info(fs_info, block_rsv->space_info, - orig_bytes, 0); + btrfs_dump_space_info(fs_info, space_info, orig_bytes, 0); } return ret; } |