aboutsummaryrefslogtreecommitdiff
path: root/fs/iomap/buffered-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/iomap/buffered-io.c')
-rw-r--r--fs/iomap/buffered-io.c90
1 files changed, 47 insertions, 43 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index d42f01e0fc1c..955f19e27e47 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1350,40 +1350,12 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i)
return filemap_write_and_wait_range(mapping, i->pos, end);
}
-static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
- bool *range_dirty)
+static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
{
- const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t pos = iter->pos;
loff_t length = iomap_length(iter);
loff_t written = 0;
- /*
- * We must zero subranges of unwritten mappings that might be dirty in
- * pagecache from previous writes. We only know whether the entire range
- * was clean or not, however, and dirty folios may have been written
- * back or reclaimed at any point after mapping lookup.
- *
- * The easiest way to deal with this is to flush pagecache to trigger
- * any pending unwritten conversions and then grab the updated extents
- * from the fs. The flush may change the current mapping, so mark it
- * stale for the iterator to remap it for the next pass to handle
- * properly.
- *
- * Note that holes are treated the same as unwritten because zero range
- * is (ab)used for partial folio zeroing in some cases. Hole backed
- * post-eof ranges can be dirtied via mapped write and the flush
- * triggers writeback time post-eof zeroing.
- */
- if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) {
- if (*range_dirty) {
- *range_dirty = false;
- return iomap_zero_iter_flush_and_stale(iter);
- }
- /* range is clean and already zeroed, nothing to do */
- return length;
- }
-
do {
struct folio *folio;
int status;
@@ -1397,6 +1369,8 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
if (iter->iomap.flags & IOMAP_F_STALE)
break;
+ /* warn about zeroing folios beyond eof that won't write back */
+ WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size);
offset = offset_in_folio(folio, pos);
if (bytes > folio_size(folio) - offset)
bytes = folio_size(folio) - offset;
@@ -1429,28 +1403,58 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
.len = len,
.flags = IOMAP_ZERO,
};
+ struct address_space *mapping = inode->i_mapping;
+ unsigned int blocksize = i_blocksize(inode);
+ unsigned int off = pos & (blocksize - 1);
+ loff_t plen = min_t(loff_t, len, blocksize - off);
int ret;
bool range_dirty;
/*
- * Zero range wants to skip pre-zeroed (i.e. unwritten) mappings, but
- * pagecache must be flushed to ensure stale data from previous
- * buffered writes is not exposed. A flush is only required for certain
- * types of mappings, but checking pagecache after mapping lookup is
- * racy with writeback and reclaim.
+ * Zero range can skip mappings that are zero on disk so long as
+ * pagecache is clean. If pagecache was dirty prior to zero range, the
+ * mapping converts on writeback completion and so must be zeroed.
*
- * Therefore, check the entire range first and pass along whether any
- * part of it is dirty. If so and an underlying mapping warrants it,
- * flush the cache at that point. This trades off the occasional false
- * positive (and spurious flush, if the dirty data and mapping don't
- * happen to overlap) for simplicity in handling a relatively uncommon
- * situation.
+ * The simplest way to deal with this across a range is to flush
+ * pagecache and process the updated mappings. To avoid excessive
+ * flushing on partial eof zeroing, special case it to zero the
+ * unaligned start portion if already dirty in pagecache.
+ */
+ if (off &&
+ filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) {
+ iter.len = plen;
+ while ((ret = iomap_iter(&iter, ops)) > 0)
+ iter.processed = iomap_zero_iter(&iter, did_zero);
+
+ iter.len = len - (iter.pos - pos);
+ if (ret || !iter.len)
+ return ret;
+ }
+
+ /*
+ * To avoid an unconditional flush, check pagecache state and only flush
+ * if dirty and the fs returns a mapping that might convert on
+ * writeback.
*/
range_dirty = filemap_range_needs_writeback(inode->i_mapping,
- pos, pos + len - 1);
+ iter.pos, iter.pos + iter.len - 1);
+ while ((ret = iomap_iter(&iter, ops)) > 0) {
+ const struct iomap *srcmap = iomap_iter_srcmap(&iter);
- while ((ret = iomap_iter(&iter, ops)) > 0)
- iter.processed = iomap_zero_iter(&iter, did_zero, &range_dirty);
+ if (srcmap->type == IOMAP_HOLE ||
+ srcmap->type == IOMAP_UNWRITTEN) {
+ loff_t proc = iomap_length(&iter);
+
+ if (range_dirty) {
+ range_dirty = false;
+ proc = iomap_zero_iter_flush_and_stale(&iter);
+ }
+ iter.processed = proc;
+ continue;
+ }
+
+ iter.processed = iomap_zero_iter(&iter, did_zero);
+ }
return ret;
}
EXPORT_SYMBOL_GPL(iomap_zero_range);