aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c374
1 files changed, 133 insertions, 241 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4fb521d91b06..588c353d2969 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -37,33 +37,30 @@
#include "file.h"
#include "super.h"
-/* simple helper to fault in pages and copy. This should go away
- * and be replaced with calls into generic code.
+/*
+ * Helper to fault in page and copy. This should go away and be replaced with
+ * calls into generic code.
*/
static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
- struct page **prepared_pages,
- struct iov_iter *i)
+ struct folio *folio, struct iov_iter *i)
{
size_t copied = 0;
size_t total_copied = 0;
- int pg = 0;
int offset = offset_in_page(pos);
while (write_bytes > 0) {
- size_t count = min_t(size_t,
- PAGE_SIZE - offset, write_bytes);
- struct page *page = prepared_pages[pg];
+ size_t count = min_t(size_t, PAGE_SIZE - offset, write_bytes);
/*
* Copy data from userspace to the current page
*/
- copied = copy_page_from_iter_atomic(page, offset, count, i);
+ copied = copy_folio_from_iter_atomic(folio, offset, count, i);
/* Flush processor's dcache for this page */
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
/*
* if we get a partial write, we can end up with
- * partially up to date pages. These add
+ * partially up to date page. These add
* a lot of complexity, so make sure they don't
* happen by forcing this copy to be retried.
*
@@ -71,7 +68,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
* back to page at a time copies after we return 0.
*/
if (unlikely(copied < count)) {
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
iov_iter_revert(i, copied);
copied = 0;
}
@@ -82,54 +79,44 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
write_bytes -= copied;
total_copied += copied;
offset += copied;
- if (offset == PAGE_SIZE) {
- pg++;
- offset = 0;
- }
}
return total_copied;
}
/*
- * unlocks pages after btrfs_file_write is done with them
+ * Unlock folio after btrfs_file_write() is done with it.
*/
-static void btrfs_drop_pages(struct btrfs_fs_info *fs_info,
- struct page **pages, size_t num_pages,
+static void btrfs_drop_folio(struct btrfs_fs_info *fs_info, struct folio *folio,
u64 pos, u64 copied)
{
- size_t i;
u64 block_start = round_down(pos, fs_info->sectorsize);
u64 block_len = round_up(pos + copied, fs_info->sectorsize) - block_start;
ASSERT(block_len <= U32_MAX);
- for (i = 0; i < num_pages; i++) {
- /* page checked is some magic around finding pages that
- * have been modified without going through btrfs_set_page_dirty
- * clear it here. There should be no need to mark the pages
- * accessed as prepare_pages should have marked them accessed
- * in prepare_pages via find_or_create_page()
- */
- btrfs_folio_clamp_clear_checked(fs_info, page_folio(pages[i]),
- block_start, block_len);
- unlock_page(pages[i]);
- put_page(pages[i]);
- }
+ /*
+ * Folio checked is some magic around finding folios that have been
+ * modified without going through btrfs_dirty_folio(). Clear it here.
+ * There should be no need to mark the pages accessed as
+ * prepare_one_folio() should have marked them accessed in
+ * prepare_one_folio() via find_or_create_page()
+ */
+ btrfs_folio_clamp_clear_checked(fs_info, folio, block_start, block_len);
+ folio_unlock(folio);
+ folio_put(folio);
}
/*
* After btrfs_copy_from_user(), update the following things for delalloc:
- * - Mark newly dirtied pages as DELALLOC in the io tree.
+ * - Mark newly dirtied folio as DELALLOC in the io tree.
* Used to advise which range is to be written back.
- * - Mark modified pages as Uptodate/Dirty and not needing COW fixup
+ * - Mark modified folio as Uptodate/Dirty and not needing COW fixup
* - Update inode size for past EOF write
*/
-int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
- size_t num_pages, loff_t pos, size_t write_bytes,
- struct extent_state **cached, bool noreserve)
+int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos,
+ size_t write_bytes, struct extent_state **cached, bool noreserve)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret = 0;
- int i;
u64 num_bytes;
u64 start_pos;
u64 end_of_last_block;
@@ -147,6 +134,8 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
num_bytes = round_up(write_bytes + pos - start_pos,
fs_info->sectorsize);
ASSERT(num_bytes <= U32_MAX);
+ ASSERT(folio_pos(folio) <= pos &&
+ folio_pos(folio) + folio_size(folio) >= pos + write_bytes);
end_of_last_block = start_pos + num_bytes - 1;
@@ -163,16 +152,9 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
if (ret)
return ret;
- for (i = 0; i < num_pages; i++) {
- struct page *p = pages[i];
-
- btrfs_folio_clamp_set_uptodate(fs_info, page_folio(p),
- start_pos, num_bytes);
- btrfs_folio_clamp_clear_checked(fs_info, page_folio(p),
- start_pos, num_bytes);
- btrfs_folio_clamp_set_dirty(fs_info, page_folio(p),
- start_pos, num_bytes);
- }
+ btrfs_folio_clamp_set_uptodate(fs_info, folio, start_pos, num_bytes);
+ btrfs_folio_clamp_clear_checked(fs_info, folio, start_pos, num_bytes);
+ btrfs_folio_clamp_set_dirty(fs_info, folio, start_pos, num_bytes);
/*
* we've only changed i_size in ram, and we haven't updated
@@ -851,53 +833,47 @@ out:
}
/*
- * on error we return an unlocked page and the error value
- * on success we return a locked page and 0
+ * On error return an unlocked folio and the error value
+ * On success return a locked folio and 0
*/
-static int prepare_uptodate_page(struct inode *inode,
- struct page *page, u64 pos,
- bool force_uptodate)
+static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64 pos,
+ u64 len, bool force_uptodate)
{
- struct folio *folio = page_folio(page);
+ u64 clamp_start = max_t(u64, pos, folio_pos(folio));
+ u64 clamp_end = min_t(u64, pos + len, folio_pos(folio) + folio_size(folio));
int ret = 0;
- if (((pos & (PAGE_SIZE - 1)) || force_uptodate) &&
- !PageUptodate(page)) {
- ret = btrfs_read_folio(NULL, folio);
- if (ret)
- return ret;
- lock_page(page);
- if (!PageUptodate(page)) {
- unlock_page(page);
- return -EIO;
- }
-
- /*
- * Since btrfs_read_folio() will unlock the folio before it
- * returns, there is a window where btrfs_release_folio() can be
- * called to release the page. Here we check both inode
- * mapping and PagePrivate() to make sure the page was not
- * released.
- *
- * The private flag check is essential for subpage as we need
- * to store extra bitmap using folio private.
- */
- if (page->mapping != inode->i_mapping || !folio_test_private(folio)) {
- unlock_page(page);
- return -EAGAIN;
- }
- }
- return 0;
-}
+ if (folio_test_uptodate(folio))
+ return 0;
-static fgf_t get_prepare_fgp_flags(bool nowait)
-{
- fgf_t fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;
+ if (!force_uptodate &&
+ IS_ALIGNED(clamp_start, PAGE_SIZE) &&
+ IS_ALIGNED(clamp_end, PAGE_SIZE))
+ return 0;
- if (nowait)
- fgp_flags |= FGP_NOWAIT;
+ ret = btrfs_read_folio(NULL, folio);
+ if (ret)
+ return ret;
+ folio_lock(folio);
+ if (!folio_test_uptodate(folio)) {
+ folio_unlock(folio);
+ return -EIO;
+ }
- return fgp_flags;
+ /*
+ * Since btrfs_read_folio() will unlock the folio before it returns,
+ * there is a window where btrfs_release_folio() can be called to
+ * release the page. Here we check both inode mapping and page
+ * private to make sure the page was not released.
+ *
+ * The private flag check is essential for subpage as we need to store
+ * extra bitmap using folio private.
+ */
+ if (folio->mapping != inode->i_mapping || !folio_test_private(folio)) {
+ folio_unlock(folio);
+ return -EAGAIN;
+ }
+ return 0;
}
static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait)
@@ -914,89 +890,67 @@ static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait)
}
/*
- * this just gets pages into the page cache and locks them down.
+ * Get folio into the page cache and lock it.
*/
-static noinline int prepare_pages(struct inode *inode, struct page **pages,
- size_t num_pages, loff_t pos,
- size_t write_bytes, bool force_uptodate,
- bool nowait)
+static noinline int prepare_one_folio(struct inode *inode, struct folio **folio_ret,
+ loff_t pos, size_t write_bytes,
+ bool force_uptodate, bool nowait)
{
- int i;
unsigned long index = pos >> PAGE_SHIFT;
gfp_t mask = get_prepare_gfp_flags(inode, nowait);
- fgf_t fgp_flags = get_prepare_fgp_flags(nowait);
+ fgf_t fgp_flags = (nowait ? FGP_WRITEBEGIN | FGP_NOWAIT : FGP_WRITEBEGIN);
+ struct folio *folio;
int ret = 0;
- int faili;
- for (i = 0; i < num_pages; i++) {
again:
- pages[i] = pagecache_get_page(inode->i_mapping, index + i,
- fgp_flags, mask | __GFP_WRITE);
- if (!pages[i]) {
- faili = i - 1;
- if (nowait)
- ret = -EAGAIN;
- else
- ret = -ENOMEM;
- goto fail;
- }
-
- ret = set_page_extent_mapped(pages[i]);
- if (ret < 0) {
- faili = i;
- goto fail;
- }
-
- if (i == 0)
- ret = prepare_uptodate_page(inode, pages[i], pos,
- force_uptodate);
- if (!ret && i == num_pages - 1)
- ret = prepare_uptodate_page(inode, pages[i],
- pos + write_bytes, false);
- if (ret) {
- put_page(pages[i]);
- if (!nowait && ret == -EAGAIN) {
- ret = 0;
- goto again;
- }
- faili = i - 1;
- goto fail;
+ folio = __filemap_get_folio(inode->i_mapping, index, fgp_flags, mask);
+ if (IS_ERR(folio)) {
+ if (nowait)
+ ret = -EAGAIN;
+ else
+ ret = PTR_ERR(folio);
+ return ret;
+ }
+ /* Only support page sized folio yet. */
+ ASSERT(folio_order(folio) == 0);
+ ret = set_folio_extent_mapped(folio);
+ if (ret < 0) {
+ folio_unlock(folio);
+ folio_put(folio);
+ return ret;
+ }
+ ret = prepare_uptodate_folio(inode, folio, pos, write_bytes, force_uptodate);
+ if (ret) {
+ /* The folio is already unlocked. */
+ folio_put(folio);
+ if (!nowait && ret == -EAGAIN) {
+ ret = 0;
+ goto again;
}
- wait_on_page_writeback(pages[i]);
+ return ret;
}
-
+ *folio_ret = folio;
return 0;
-fail:
- while (faili >= 0) {
- unlock_page(pages[faili]);
- put_page(pages[faili]);
- faili--;
- }
- return ret;
-
}
/*
- * This function locks the extent and properly waits for data=ordered extents
- * to finish before allowing the pages to be modified if need.
+ * Locks the extent and properly waits for data=ordered extents to finish
+ * before allowing the folios to be modified if need.
*
- * The return value:
+ * Return:
* 1 - the extent is locked
* 0 - the extent is not locked, and everything is OK
- * -EAGAIN - need re-prepare the pages
- * the other < 0 number - Something wrong happens
+ * -EAGAIN - need to prepare the folios again
*/
static noinline int
-lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
- size_t num_pages, loff_t pos,
- size_t write_bytes,
+lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct folio *folio,
+ loff_t pos, size_t write_bytes,
u64 *lockstart, u64 *lockend, bool nowait,
struct extent_state **cached_state)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 start_pos;
u64 last_pos;
- int i;
int ret = 0;
start_pos = round_down(pos, fs_info->sectorsize);
@@ -1008,12 +962,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
if (nowait) {
if (!try_lock_extent(&inode->io_tree, start_pos, last_pos,
cached_state)) {
- for (i = 0; i < num_pages; i++) {
- unlock_page(pages[i]);
- put_page(pages[i]);
- pages[i] = NULL;
- }
-
+ folio_unlock(folio);
+ folio_put(folio);
return -EAGAIN;
}
} else {
@@ -1027,10 +977,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
ordered->file_offset <= last_pos) {
unlock_extent(&inode->io_tree, start_pos, last_pos,
cached_state);
- for (i = 0; i < num_pages; i++) {
- unlock_page(pages[i]);
- put_page(pages[i]);
- }
+ folio_unlock(folio);
+ folio_put(folio);
btrfs_start_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
return -EAGAIN;
@@ -1044,11 +992,10 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
}
/*
- * We should be called after prepare_pages() which should have locked
+ * We should be called after prepare_one_folio() which should have locked
* all pages in the range.
*/
- for (i = 0; i < num_pages; i++)
- WARN_ON(!PageLocked(pages[i]));
+ WARN_ON(!folio_test_locked(folio));
return ret;
}
@@ -1120,27 +1067,7 @@ void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
btrfs_drew_write_unlock(&inode->root->snapshot_lock);
}
-static void update_time_for_write(struct inode *inode)
-{
- struct timespec64 now, ts;
-
- if (IS_NOCMTIME(inode))
- return;
-
- now = current_time(inode);
- ts = inode_get_mtime(inode);
- if (!timespec64_equal(&ts, &now))
- inode_set_mtime_to_ts(inode, now);
-
- ts = inode_get_ctime(inode);
- if (!timespec64_equal(&ts, &now))
- inode_set_ctime_to_ts(inode, now);
-
- if (IS_I_VERSION(inode))
- inode_inc_iversion(inode);
-}
-
-int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count)
+int btrfs_write_check(struct kiocb *iocb, size_t count)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
@@ -1170,7 +1097,10 @@ int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count)
* need to start yet another transaction to update the inode as we will
* update the inode when we finish writing whatever data we write.
*/
- update_time_for_write(inode);
+ if (!IS_NOCMTIME(inode)) {
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
+ inode_inc_iversion(inode);
+ }
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
@@ -1192,20 +1122,17 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
loff_t pos;
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
- struct page **pages = NULL;
struct extent_changeset *data_reserved = NULL;
u64 release_bytes = 0;
u64 lockstart;
u64 lockend;
size_t num_written = 0;
- int nrptrs;
ssize_t ret;
- bool only_release_metadata = false;
- bool force_page_uptodate = false;
loff_t old_isize = i_size_read(inode);
unsigned int ilock_flags = 0;
const bool nowait = (iocb->ki_flags & IOCB_NOWAIT);
unsigned int bdp_flags = (nowait ? BDP_ASYNC : 0);
+ bool only_release_metadata = false;
if (nowait)
ilock_flags |= BTRFS_ILOCK_TRY;
@@ -1218,38 +1145,26 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
if (ret <= 0)
goto out;
- ret = btrfs_write_check(iocb, i, ret);
+ ret = btrfs_write_check(iocb, ret);
if (ret < 0)
goto out;
pos = iocb->ki_pos;
- nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
- PAGE_SIZE / (sizeof(struct page *)));
- nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
- nrptrs = max(nrptrs, 8);
- pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
- if (!pages) {
- ret = -ENOMEM;
- goto out;
- }
-
while (iov_iter_count(i) > 0) {
struct extent_state *cached_state = NULL;
size_t offset = offset_in_page(pos);
size_t sector_offset;
- size_t write_bytes = min(iov_iter_count(i),
- nrptrs * (size_t)PAGE_SIZE -
- offset);
- size_t num_pages;
+ size_t write_bytes = min(iov_iter_count(i), PAGE_SIZE - offset);
size_t reserve_bytes;
- size_t dirty_pages;
size_t copied;
size_t dirty_sectors;
size_t num_sectors;
+ struct folio *folio = NULL;
int extents_locked;
+ bool force_page_uptodate = false;
/*
- * Fault pages before locking them in prepare_pages
+ * Fault pages before locking them in prepare_one_folio()
* to avoid recursive lock
*/
if (unlikely(fault_in_iov_iter_readable(i, write_bytes))) {
@@ -1288,8 +1203,6 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
only_release_metadata = true;
}
- num_pages = DIV_ROUND_UP(write_bytes + offset, PAGE_SIZE);
- WARN_ON(num_pages > nrptrs);
reserve_bytes = round_up(write_bytes + sector_offset,
fs_info->sectorsize);
WARN_ON(reserve_bytes == 0);
@@ -1317,23 +1230,17 @@ again:
break;
}
- /*
- * This is going to setup the pages array with the number of
- * pages we want, so we don't really need to worry about the
- * contents of pages from loop to loop
- */
- ret = prepare_pages(inode, pages, num_pages,
- pos, write_bytes, force_page_uptodate, false);
+ ret = prepare_one_folio(inode, &folio, pos, write_bytes,
+ force_page_uptodate, false);
if (ret) {
btrfs_delalloc_release_extents(BTRFS_I(inode),
reserve_bytes);
break;
}
- extents_locked = lock_and_cleanup_extent_if_need(
- BTRFS_I(inode), pages,
- num_pages, pos, write_bytes, &lockstart,
- &lockend, nowait, &cached_state);
+ extents_locked = lock_and_cleanup_extent_if_need(BTRFS_I(inode),
+ folio, pos, write_bytes, &lockstart,
+ &lockend, nowait, &cached_state);
if (extents_locked < 0) {
if (!nowait && extents_locked == -EAGAIN)
goto again;
@@ -1344,28 +1251,18 @@ again:
break;
}
- copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
+ copied = btrfs_copy_from_user(pos, write_bytes, folio, i);
num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
dirty_sectors = round_up(copied + sector_offset,
fs_info->sectorsize);
dirty_sectors = BTRFS_BYTES_TO_BLKS(fs_info, dirty_sectors);
- /*
- * if we have trouble faulting in the pages, fall
- * back to one page at a time
- */
- if (copied < write_bytes)
- nrptrs = 1;
-
if (copied == 0) {
force_page_uptodate = true;
dirty_sectors = 0;
- dirty_pages = 0;
} else {
force_page_uptodate = false;
- dirty_pages = DIV_ROUND_UP(copied + offset,
- PAGE_SIZE);
}
if (num_sectors > dirty_sectors) {
@@ -1375,13 +1272,10 @@ again:
btrfs_delalloc_release_metadata(BTRFS_I(inode),
release_bytes, true);
} else {
- u64 __pos;
-
- __pos = round_down(pos,
- fs_info->sectorsize) +
- (dirty_pages << PAGE_SHIFT);
+ u64 release_start = round_up(pos + copied,
+ fs_info->sectorsize);
btrfs_delalloc_release_space(BTRFS_I(inode),
- data_reserved, __pos,
+ data_reserved, release_start,
release_bytes, true);
}
}
@@ -1389,15 +1283,14 @@ again:
release_bytes = round_up(copied + sector_offset,
fs_info->sectorsize);
- ret = btrfs_dirty_pages(BTRFS_I(inode), pages,
- dirty_pages, pos, copied,
+ ret = btrfs_dirty_folio(BTRFS_I(inode), folio, pos, copied,
&cached_state, only_release_metadata);
/*
* If we have not locked the extent range, because the range's
* start offset is >= i_size, we might still have a non-NULL
* cached extent state, acquired while marking the extent range
- * as delalloc through btrfs_dirty_pages(). Therefore free any
+ * as delalloc through btrfs_dirty_page(). Therefore free any
* possible cached extent state to avoid a memory leak.
*/
if (extents_locked)
@@ -1408,7 +1301,7 @@ again:
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
if (ret) {
- btrfs_drop_pages(fs_info, pages, num_pages, pos, copied);
+ btrfs_drop_folio(fs_info, folio, pos, copied);
break;
}
@@ -1416,7 +1309,7 @@ again:
if (only_release_metadata)
btrfs_check_nocow_unlock(BTRFS_I(inode));
- btrfs_drop_pages(fs_info, pages, num_pages, pos, copied);
+ btrfs_drop_folio(fs_info, folio, pos, copied);
cond_resched();
@@ -1424,8 +1317,6 @@ again:
num_written += copied;
}
- kfree(pages);
-
if (release_bytes) {
if (only_release_metadata) {
btrfs_check_nocow_unlock(BTRFS_I(inode));
@@ -1470,7 +1361,7 @@ static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
if (ret || encoded->len == 0)
goto out;
- ret = btrfs_write_check(iocb, from, encoded->len);
+ ret = btrfs_write_check(iocb, encoded->len);
if (ret < 0)
goto out;
@@ -3802,6 +3693,7 @@ const struct file_operations btrfs_file_operations = {
.compat_ioctl = btrfs_compat_ioctl,
#endif
.remap_file_range = btrfs_remap_file_range,
+ .uring_cmd = btrfs_uring_cmd,
.fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC,
};