diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 161 |
1 files changed, 97 insertions, 64 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 01c9e4f743ba..1ce13f69fbec 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -137,8 +137,6 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, new_size); } -static void ext4_invalidatepage(struct page *page, unsigned int offset, - unsigned int length); static int __ext4_journalled_writepage(struct page *page, unsigned int len); static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, int pextents); @@ -186,7 +184,7 @@ void ext4_evict_inode(struct inode *inode) * journal. So although mm thinks everything is clean and * ready for reaping the inode might still have some pages to * write in the running transaction or waiting to be - * checkpointed. Thus calling jbd2_journal_invalidatepage() + * checkpointed. Thus calling jbd2_journal_invalidate_folio() * (via truncate_inode_pages()) to discard these buffers can * cause data loss. Also even if we did not discard these * buffers, we would have no way to find them after the inode @@ -1571,16 +1569,18 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, break; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; + struct folio *folio = page_folio(page); - BUG_ON(!PageLocked(page)); - BUG_ON(PageWriteback(page)); + BUG_ON(!folio_test_locked(folio)); + BUG_ON(folio_test_writeback(folio)); if (invalidate) { - if (page_mapped(page)) - clear_page_dirty_for_io(page); - block_invalidatepage(page, 0, PAGE_SIZE); - ClearPageUptodate(page); + if (folio_mapped(folio)) + folio_clear_dirty_for_io(folio); + block_invalidate_folio(folio, 0, + folio_size(folio)); + folio_clear_uptodate(folio); } - unlock_page(page); + folio_unlock(folio); } pagevec_release(&pvec); } @@ -1971,6 +1971,7 @@ out_no_pagelock: static int ext4_writepage(struct page *page, struct writeback_control *wbc) { + struct folio *folio = page_folio(page); int ret = 0; loff_t size; unsigned int len; @@ -1980,8 +1981,8 @@ static int ext4_writepage(struct page *page, bool keep_towrite = false; if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { - inode->i_mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); - unlock_page(page); + folio_invalidate(folio, 0, folio_size(folio)); + folio_unlock(folio); return -EIO; } @@ -1993,6 +1994,15 @@ static int ext4_writepage(struct page *page, else len = PAGE_SIZE; + /* Should never happen but for bugs in other kernel subsystems */ + if (!page_has_buffers(page)) { + ext4_warning_inode(inode, + "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + return 0; + } + page_bufs = page_buffers(page); /* * We cannot do block allocation or other extent handling in this @@ -2594,6 +2604,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) wait_on_page_writeback(page); BUG_ON(PageWriteback(page)); + /* + * Should never happen but for buggy code in + * other subsystems that call + * set_page_dirty() without properly warning + * the file system first. See [1] for more + * information. + * + * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz + */ + if (!page_has_buffers(page)) { + ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + continue; + } + if (mpd->map.m_len == 0) mpd->first_page = page->index; mpd->next_page = page->index + 1; @@ -3182,40 +3208,39 @@ static void ext4_readahead(struct readahead_control *rac) ext4_mpage_readpages(inode, rac, NULL); } -static void ext4_invalidatepage(struct page *page, unsigned int offset, - unsigned int length) +static void ext4_invalidate_folio(struct folio *folio, size_t offset, + size_t length) { - trace_ext4_invalidatepage(page, offset, length); + trace_ext4_invalidate_folio(folio, offset, length); /* No journalling happens on data buffers when this function is used */ - WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); + WARN_ON(folio_buffers(folio) && buffer_jbd(folio_buffers(folio))); - block_invalidatepage(page, offset, length); + block_invalidate_folio(folio, offset, length); } -static int __ext4_journalled_invalidatepage(struct page *page, - unsigned int offset, - unsigned int length) +static int __ext4_journalled_invalidate_folio(struct folio *folio, + size_t offset, size_t length) { - journal_t *journal = EXT4_JOURNAL(page->mapping->host); + journal_t *journal = EXT4_JOURNAL(folio->mapping->host); - trace_ext4_journalled_invalidatepage(page, offset, length); + trace_ext4_journalled_invalidate_folio(folio, offset, length); /* * If it's a full truncate we just forget about the pending dirtying */ - if (offset == 0 && length == PAGE_SIZE) - ClearPageChecked(page); + if (offset == 0 && length == folio_size(folio)) + folio_clear_checked(folio); - return jbd2_journal_invalidatepage(journal, page, offset, length); + return jbd2_journal_invalidate_folio(journal, folio, offset, length); } /* Wrapper for aops... */ -static void ext4_journalled_invalidatepage(struct page *page, - unsigned int offset, - unsigned int length) +static void ext4_journalled_invalidate_folio(struct folio *folio, + size_t offset, + size_t length) { - WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0); + WARN_ON(__ext4_journalled_invalidate_folio(folio, offset, length) < 0); } static int ext4_releasepage(struct page *page, gfp_t wait) @@ -3409,6 +3434,13 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (ret < 0) return ret; out: + /* + * When inline encryption is enabled, sometimes I/O to an encrypted file + * has to be broken up to guarantee DUN contiguity. Handle this by + * limiting the length of the mapping returned. + */ + map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len); + ext4_set_iomap(inode, iomap, &map, offset, length, flags); return 0; @@ -3541,29 +3573,32 @@ const struct iomap_ops ext4_iomap_report_ops = { }; /* - * Pages can be marked dirty completely asynchronously from ext4's journalling - * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do - * much here because ->set_page_dirty is called under VFS locks. The page is - * not necessarily locked. + * Whenever the folio is being dirtied, corresponding buffers should already + * be attached to the transaction (we take care of this in ext4_page_mkwrite() + * and ext4_write_begin()). However we cannot move buffers to dirty transaction + * lists here because ->dirty_folio is called under VFS locks and the folio + * is not necessarily locked. * - * We cannot just dirty the page and leave attached buffers clean, because the + * We cannot just dirty the folio and leave attached buffers clean, because the * buffers' dirty state is "definitive". We cannot just set the buffers dirty * or jbddirty because all the journalling code will explode. * - * So what we do is to mark the page "pending dirty" and next time writepage + * So what we do is to mark the folio "pending dirty" and next time writepage * is called, propagate that into the buffers appropriately. */ -static int ext4_journalled_set_page_dirty(struct page *page) +static bool ext4_journalled_dirty_folio(struct address_space *mapping, + struct folio *folio) { - SetPageChecked(page); - return __set_page_dirty_nobuffers(page); + WARN_ON_ONCE(!page_has_buffers(&folio->page)); + folio_set_checked(folio); + return filemap_dirty_folio(mapping, folio); } -static int ext4_set_page_dirty(struct page *page) +static bool ext4_dirty_folio(struct address_space *mapping, struct folio *folio) { - WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page)); - WARN_ON_ONCE(!page_has_buffers(page)); - return __set_page_dirty_buffers(page); + WARN_ON_ONCE(!folio_test_locked(folio) && !folio_test_dirty(folio)); + WARN_ON_ONCE(!folio_buffers(folio)); + return block_dirty_folio(mapping, folio); } static int ext4_iomap_swap_activate(struct swap_info_struct *sis, @@ -3580,9 +3615,9 @@ static const struct address_space_operations ext4_aops = { .writepages = ext4_writepages, .write_begin = ext4_write_begin, .write_end = ext4_write_end, - .set_page_dirty = ext4_set_page_dirty, + .dirty_folio = ext4_dirty_folio, .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, + .invalidate_folio = ext4_invalidate_folio, .releasepage = ext4_releasepage, .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, @@ -3598,9 +3633,9 @@ static const struct address_space_operations ext4_journalled_aops = { .writepages = ext4_writepages, .write_begin = ext4_write_begin, .write_end = ext4_journalled_write_end, - .set_page_dirty = ext4_journalled_set_page_dirty, + .dirty_folio = ext4_journalled_dirty_folio, .bmap = ext4_bmap, - .invalidatepage = ext4_journalled_invalidatepage, + .invalidate_folio = ext4_journalled_invalidate_folio, .releasepage = ext4_releasepage, .direct_IO = noop_direct_IO, .is_partially_uptodate = block_is_partially_uptodate, @@ -3615,9 +3650,9 @@ static const struct address_space_operations ext4_da_aops = { .writepages = ext4_writepages, .write_begin = ext4_da_write_begin, .write_end = ext4_da_write_end, - .set_page_dirty = ext4_set_page_dirty, + .dirty_folio = ext4_dirty_folio, .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, + .invalidate_folio = ext4_invalidate_folio, .releasepage = ext4_releasepage, .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, @@ -3629,9 +3664,8 @@ static const struct address_space_operations ext4_da_aops = { static const struct address_space_operations ext4_dax_aops = { .writepages = ext4_dax_writepages, .direct_IO = noop_direct_IO, - .set_page_dirty = __set_page_dirty_no_writeback, + .dirty_folio = noop_dirty_folio, .bmap = ext4_bmap, - .invalidatepage = noop_invalidatepage, .swap_activate = ext4_iomap_swap_activate, }; @@ -5204,13 +5238,12 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) } /* - * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate - * buffers that are attached to a page stradding i_size and are undergoing + * In data=journal mode ext4_journalled_invalidate_folio() may fail to invalidate + * buffers that are attached to a folio straddling i_size and are undergoing * commit. In that case we have to wait for commit to finish and try again. */ static void ext4_wait_for_tail_page_commit(struct inode *inode) { - struct page *page; unsigned offset; journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; tid_t commit_tid = 0; @@ -5218,25 +5251,25 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) offset = inode->i_size & (PAGE_SIZE - 1); /* - * If the page is fully truncated, we don't need to wait for any commit - * (and we even should not as __ext4_journalled_invalidatepage() may - * strip all buffers from the page but keep the page dirty which can then - * confuse e.g. concurrent ext4_writepage() seeing dirty page without + * If the folio is fully truncated, we don't need to wait for any commit + * (and we even should not as __ext4_journalled_invalidate_folio() may + * strip all buffers from the folio but keep the folio dirty which can then + * confuse e.g. concurrent ext4_writepage() seeing dirty folio without * buffers). Also we don't need to wait for any commit if all buffers in - * the page remain valid. This is most beneficial for the common case of + * the folio remain valid. This is most beneficial for the common case of * blocksize == PAGESIZE. */ if (!offset || offset > (PAGE_SIZE - i_blocksize(inode))) return; while (1) { - page = find_lock_page(inode->i_mapping, + struct folio *folio = filemap_lock_folio(inode->i_mapping, inode->i_size >> PAGE_SHIFT); - if (!page) + if (!folio) return; - ret = __ext4_journalled_invalidatepage(page, offset, - PAGE_SIZE - offset); - unlock_page(page); - put_page(page); + ret = __ext4_journalled_invalidate_folio(folio, offset, + folio_size(folio) - offset); + folio_unlock(folio); + folio_put(folio); if (ret != -EBUSY) return; commit_tid = 0; |