diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 147 |
1 files changed, 102 insertions, 45 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 39c9677c47d5..b923d0cec61c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -190,7 +190,7 @@ static void process_one_folio(struct btrfs_fs_info *fs_info, btrfs_folio_clamp_clear_writeback(fs_info, folio, start, len); if (folio != locked_folio && (page_ops & PAGE_UNLOCK)) - btrfs_folio_end_writer_lock(fs_info, folio, start, len); + btrfs_folio_end_lock(fs_info, folio, start, len); } static void __process_folios_contig(struct address_space *mapping, @@ -262,22 +262,23 @@ static noinline int lock_delalloc_folios(struct inode *inode, for (i = 0; i < found_folios; i++) { struct folio *folio = fbatch.folios[i]; - u32 len = end + 1 - start; + u64 range_start; + u32 range_len; if (folio == locked_folio) continue; - if (btrfs_folio_start_writer_lock(fs_info, folio, start, - len)) - goto out; - + folio_lock(folio); if (!folio_test_dirty(folio) || folio->mapping != mapping) { - btrfs_folio_end_writer_lock(fs_info, folio, start, - len); + folio_unlock(folio); goto out; } + range_start = max_t(u64, folio_pos(folio), start); + range_len = min_t(u64, folio_pos(folio) + folio_size(folio), + end + 1) - range_start; + btrfs_folio_set_lock(fs_info, folio, range_start, range_len); - processed_end = folio_pos(folio) + folio_size(folio) - 1; + processed_end = range_start + range_len - 1; } folio_batch_release(&fbatch); cond_resched(); @@ -437,7 +438,7 @@ static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 le if (!btrfs_is_subpage(fs_info, folio->mapping)) folio_unlock(folio); else - btrfs_subpage_end_reader(fs_info, folio, start, len); + btrfs_folio_end_lock(fs_info, folio, start, len); } /* @@ -494,7 +495,7 @@ static void begin_folio_read(struct btrfs_fs_info *fs_info, struct folio *folio) return; ASSERT(folio_test_private(folio)); - btrfs_subpage_start_reader(fs_info, folio, folio_pos(folio), PAGE_SIZE); + btrfs_folio_set_lock(fs_info, folio, folio_pos(folio), PAGE_SIZE); } /* @@ -785,7 +786,7 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl, } if (bio_ctrl->wbc) - wbc_account_cgroup_owner(bio_ctrl->wbc, &folio->page, + wbc_account_cgroup_owner(bio_ctrl->wbc, folio, len); size -= len; @@ -1101,6 +1102,45 @@ int btrfs_read_folio(struct file *file, struct folio *folio) return ret; } +static void set_delalloc_bitmap(struct folio *folio, unsigned long *delalloc_bitmap, + u64 start, u32 len) +{ + struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); + const u64 folio_start = folio_pos(folio); + unsigned int start_bit; + unsigned int nbits; + + ASSERT(start >= folio_start && start + len <= folio_start + PAGE_SIZE); + start_bit = (start - folio_start) >> fs_info->sectorsize_bits; + nbits = len >> fs_info->sectorsize_bits; + ASSERT(bitmap_test_range_all_zero(delalloc_bitmap, start_bit, nbits)); + bitmap_set(delalloc_bitmap, start_bit, nbits); +} + +static bool find_next_delalloc_bitmap(struct folio *folio, + unsigned long *delalloc_bitmap, u64 start, + u64 *found_start, u32 *found_len) +{ + struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); + const u64 folio_start = folio_pos(folio); + const unsigned int bitmap_size = fs_info->sectors_per_page; + unsigned int start_bit; + unsigned int first_zero; + unsigned int first_set; + + ASSERT(start >= folio_start && start < folio_start + PAGE_SIZE); + + start_bit = (start - folio_start) >> fs_info->sectorsize_bits; + first_set = find_next_bit(delalloc_bitmap, bitmap_size, start_bit); + if (first_set >= bitmap_size) + return false; + + *found_start = folio_start + (first_set << fs_info->sectorsize_bits); + first_zero = find_next_zero_bit(delalloc_bitmap, bitmap_size, first_set); + *found_len = (first_zero - first_set) << fs_info->sectorsize_bits; + return true; +} + /* * helper for extent_writepage(), doing all of the delayed allocation setup. * @@ -1120,6 +1160,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, const bool is_subpage = btrfs_is_subpage(fs_info, folio->mapping); const u64 page_start = folio_pos(folio); const u64 page_end = page_start + folio_size(folio) - 1; + unsigned long delalloc_bitmap = 0; /* * Save the last found delalloc end. As the delalloc end can go beyond * page boundary, thus we cannot rely on subpage bitmap to locate the @@ -1130,6 +1171,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, u64 delalloc_end = page_end; u64 delalloc_to_write = 0; int ret = 0; + int bit; /* Save the dirty bitmap as our submission bitmap will be a subset of it. */ if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) { @@ -1139,6 +1181,12 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, bio_ctrl->submit_bitmap = 1; } + for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) { + u64 start = page_start + (bit << fs_info->sectorsize_bits); + + btrfs_folio_set_lock(fs_info, folio, start, fs_info->sectorsize); + } + /* Lock all (subpage) delalloc ranges inside the folio first. */ while (delalloc_start < page_end) { delalloc_end = page_end; @@ -1147,9 +1195,8 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, delalloc_start = delalloc_end + 1; continue; } - btrfs_folio_set_writer_lock(fs_info, folio, delalloc_start, - min(delalloc_end, page_end) + 1 - - delalloc_start); + set_delalloc_bitmap(folio, &delalloc_bitmap, delalloc_start, + min(delalloc_end, page_end) + 1 - delalloc_start); last_delalloc_end = delalloc_end; delalloc_start = delalloc_end + 1; } @@ -1174,7 +1221,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, found_len = last_delalloc_end + 1 - found_start; found = true; } else { - found = btrfs_subpage_find_writer_locked(fs_info, folio, + found = find_next_delalloc_bitmap(folio, &delalloc_bitmap, delalloc_start, &found_start, &found_len); } if (!found) @@ -1306,7 +1353,14 @@ static int submit_one_sector(struct btrfs_inode *inode, free_extent_map(em); em = NULL; - btrfs_set_range_writeback(inode, filepos, filepos + sectorsize - 1); + /* + * Although the PageDirty bit is cleared before entering this + * function, subpage dirty bit is not cleared. + * So clear subpage dirty bit here so next time we won't submit + * a folio for a range already written to disk. + */ + btrfs_folio_clear_dirty(fs_info, folio, filepos, sectorsize); + btrfs_folio_set_writeback(fs_info, folio, filepos, sectorsize); /* * Above call should set the whole folio with writeback flag, even * just for a single subpage sector. @@ -1315,13 +1369,6 @@ static int submit_one_sector(struct btrfs_inode *inode, */ ASSERT(folio_test_writeback(folio)); - /* - * Although the PageDirty bit is cleared before entering this - * function, subpage dirty bit is not cleared. - * So clear subpage dirty bit here so next time we won't submit - * folio for range already written to disk. - */ - btrfs_folio_clear_dirty(fs_info, folio, filepos, sectorsize); submit_extent_folio(bio_ctrl, disk_bytenr, folio, sectorsize, filepos - folio_pos(folio)); return 0; @@ -1390,8 +1437,6 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, goto out; submitted_io = true; } - - btrfs_folio_assert_not_dirty(fs_info, folio, start, len); out: /* * If we didn't submitted any sector (>= i_size), folio dirty get @@ -1475,7 +1520,7 @@ done: * Only unlock ranges that are submitted. As there can be some async * submitted ranges inside the folio. */ - btrfs_folio_end_writer_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap); + btrfs_folio_end_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap); ASSERT(ret <= 0); return ret; } @@ -1707,7 +1752,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb, ret = bio_add_folio(&bbio->bio, folio, eb->len, eb->start - folio_pos(folio)); ASSERT(ret); - wbc_account_cgroup_owner(wbc, folio_page(folio, 0), eb->len); + wbc_account_cgroup_owner(wbc, folio, eb->len); folio_unlock(folio); } else { int num_folios = num_extent_folios(eb); @@ -1721,8 +1766,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb, folio_start_writeback(folio); ret = bio_add_folio(&bbio->bio, folio, eb->folio_size, 0); ASSERT(ret); - wbc_account_cgroup_owner(wbc, folio_page(folio, 0), - eb->folio_size); + wbc_account_cgroup_owner(wbc, folio, eb->folio_size); wbc->nr_to_write -= folio_nr_pages(folio); folio_unlock(folio); } @@ -2115,7 +2159,27 @@ retry: continue; } - if (wbc->sync_mode != WB_SYNC_NONE) { + /* + * For subpage case, compression can lead to mixed + * writeback and dirty flags, e.g: + * 0 32K 64K 96K 128K + * | |//////||/////| |//| + * + * In above case, [32K, 96K) is asynchronously submitted + * for compression, and [124K, 128K) needs to be written back. + * + * If we didn't wait wrtiteback for page 64K, [128K, 128K) + * won't be submitted as the page still has writeback flag + * and will be skipped in the next check. + * + * This mixed writeback and dirty case is only possible for + * subpage case. + * + * TODO: Remove this check after migrating compression to + * regular submission. + */ + if (wbc->sync_mode != WB_SYNC_NONE || + btrfs_is_subpage(inode_to_fs_info(inode), mapping)) { if (folio_test_writeback(folio)) submit_write_bio(bio_ctrl, 0); folio_wait_writeback(folio); @@ -2200,7 +2264,7 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f u32 cur_len = cur_end + 1 - cur; struct folio *folio; - folio = __filemap_get_folio(mapping, cur >> PAGE_SHIFT, 0, 0); + folio = filemap_get_folio(mapping, cur >> PAGE_SHIFT); /* * This shouldn't happen, the pages are pinned and locked, this @@ -2233,7 +2297,7 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f cur, cur_len, !ret); mapping_set_error(mapping, ret); } - btrfs_folio_end_writer_lock(fs_info, folio, cur, cur_len); + btrfs_folio_end_lock(fs_info, folio, cur, cur_len); if (ret < 0) found_error = true; next_page: @@ -2317,7 +2381,7 @@ int extent_invalidate_folio(struct extent_io_tree *tree, * to drop the page. */ static bool try_release_extent_state(struct extent_io_tree *tree, - struct folio *folio, gfp_t mask) + struct folio *folio) { u64 start = folio_pos(folio); u64 end = start + PAGE_SIZE - 1; @@ -2428,7 +2492,7 @@ next: cond_resched(); } } - return try_release_extent_state(io_tree, folio, mask); + return try_release_extent_state(io_tree, folio); } static void __free_extent_buffer(struct extent_buffer *eb) @@ -2442,7 +2506,7 @@ static int extent_buffer_under_io(const struct extent_buffer *eb) test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); } -static bool folio_range_has_eb(struct btrfs_fs_info *fs_info, struct folio *folio) +static bool folio_range_has_eb(struct folio *folio) { struct btrfs_subpage *subpage; @@ -2452,12 +2516,6 @@ static bool folio_range_has_eb(struct btrfs_fs_info *fs_info, struct folio *foli subpage = folio_get_private(folio); if (atomic_read(&subpage->eb_refs)) return true; - /* - * Even there is no eb refs here, we may still have - * end_folio_read() call relying on page::private. - */ - if (atomic_read(&subpage->readers)) - return true; } return false; } @@ -2516,7 +2574,7 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo * We can only detach the folio private if there are no other ebs in the * page range and no unfinished IO. */ - if (!folio_range_has_eb(fs_info, folio)) + if (!folio_range_has_eb(folio)) btrfs_detach_subpage(fs_info, folio); spin_unlock(&folio->mapping->i_private_lock); @@ -3121,7 +3179,7 @@ out: } /* * Now all pages of that extent buffer is unmapped, set UNMAPPED flag, - * so it can be cleaned up without utlizing page->mapping. + * so it can be cleaned up without utilizing page->mapping. */ set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags); @@ -4221,7 +4279,6 @@ void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, u64 owner_root, u64 gen, int level) { struct btrfs_tree_parent_check check = { - .has_first_key = 0, .level = level, .transid = gen }; |