7 files changed, 80 insertions, 80 deletions
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index c40e226053cc..7ac34550c403 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -67,7 +67,8 @@ static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_in
  * Decant the list of folios to read into a rolling buffer.
  */
 static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
-					struct folio_queue *folioq)
+					struct folio_queue *folioq,
+					struct folio_batch *put_batch)
 {
 	unsigned int order, nr;
 	size_t size = 0;
@@ -82,6 +83,9 @@ static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
 		order = folio_order(folio);
 		folioq->orders[i] = order;
 		size += PAGE_SIZE << order;
+
+		if (!folio_batch_add(put_batch, folio))
+			folio_batch_release(put_batch);
 	}
 
 	for (int i = nr; i < folioq_nr_slots(folioq); i++)
@@ -120,6 +124,9 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
 		 * that we will need to release later - but we don't want to do
 		 * that until after we've started the I/O.
 		 */
+		struct folio_batch put_batch;
+
+		folio_batch_init(&put_batch);
 		while (rreq->submitted < subreq->start + rsize) {
 			struct folio_queue *tail = rreq->buffer_tail, *new;
 			size_t added;
@@ -132,10 +139,11 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
 			new->prev = tail;
 			tail->next = new;
 			rreq->buffer_tail = new;
-			added = netfs_load_buffer_from_ra(rreq, new);
+			added = netfs_load_buffer_from_ra(rreq, new, &put_batch);
 			rreq->iter.count += added;
 			rreq->submitted += added;
 		}
+		folio_batch_release(&put_batch);
 	}
 
 	subreq->len = rsize;
@@ -348,6 +356,7 @@ static int netfs_wait_for_read(struct netfs_io_request *rreq)
 static int netfs_prime_buffer(struct netfs_io_request *rreq)
 {
 	struct folio_queue *folioq;
+	struct folio_batch put_batch;
 	size_t added;
 
 	folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
@@ -360,39 +369,14 @@ static int netfs_prime_buffer(struct netfs_io_request *rreq)
 	rreq->submitted = rreq->start;
 	iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0);
 
-	added = netfs_load_buffer_from_ra(rreq, folioq);
+	folio_batch_init(&put_batch);
+	added = netfs_load_buffer_from_ra(rreq, folioq, &put_batch);
+	folio_batch_release(&put_batch);
 	rreq->iter.count += added;
 	rreq->submitted += added;
 	return 0;
 }
 
-/*
- * Drop the ref on each folio that we inherited from the VM readahead code.  We
- * still have the folio locks to pin the page until we complete the I/O.
- *
- * Note that we can't just release the batch in each queue struct as we use the
- * occupancy count in other places.
- */
-static void netfs_put_ra_refs(struct folio_queue *folioq)
-{
-	struct folio_batch fbatch;
-
-	folio_batch_init(&fbatch);
-	while (folioq) {
-		for (unsigned int slot = 0; slot < folioq_count(folioq); slot++) {
-			struct folio *folio = folioq_folio(folioq, slot);
-			if (!folio)
-				continue;
-			trace_netfs_folio(folio, netfs_folio_trace_read_put);
-			if (!folio_batch_add(&fbatch, folio))
-				folio_batch_release(&fbatch);
-		}
-		folioq = folioq->next;
-	}
-
-	folio_batch_release(&fbatch);
-}
-
 /**
  * netfs_readahead - Helper to manage a read request
  * @ractl: The description of the readahead request
@@ -436,9 +420,6 @@ void netfs_readahead(struct readahead_control *ractl)
 		goto cleanup_free;
 	netfs_read_to_pagecache(rreq);
 
-	/* Release the folio refs whilst we're waiting for the I/O. */
-	netfs_put_ra_refs(rreq->buffer);
-
 	netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
 	return;
 
@@ -646,7 +627,7 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
 	if (unlikely(always_fill)) {
 		if (pos - offset + len <= i_size)
 			return false; /* Page entirely before EOF */
-		zero_user_segment(&folio->page, 0, plen);
+		folio_zero_segment(folio, 0, plen);
 		folio_mark_uptodate(folio);
 		return true;
 	}
@@ -665,7 +646,7 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
 
 	return false;
 zero_out:
-	zero_user_segments(&folio->page, 0, offset, offset + len, plen);
+	folio_zero_segments(folio, 0, offset, offset + len, plen);
 	return true;
 }
 
@@ -732,7 +713,7 @@ retry:
 	if (folio_test_uptodate(folio))
 		goto have_folio;
 
-	/* If the page is beyond the EOF, we want to clear it - unless it's
+	/* If the folio is beyond the EOF, we want to clear it - unless it's
 	 * within the cache granule containing the EOF, in which case we need
 	 * to preload the granule.
 	 */
@@ -792,7 +773,7 @@ error:
 EXPORT_SYMBOL(netfs_write_begin);
 
 /*
- * Preload the data into a page we're proposing to write into.
+ * Preload the data into a folio we're proposing to write into.
  */
 int netfs_prefetch_for_write(struct file *file, struct folio *folio,
 			     size_t offset, size_t len)
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index b3910dfcb56d..b4826360a411 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -83,13 +83,13 @@ static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
  * netfs_perform_write - Copy data into the pagecache.
  * @iocb: The operation parameters
  * @iter: The source buffer
- * @netfs_group: Grouping for dirty pages (eg. ceph snaps).
+ * @netfs_group: Grouping for dirty folios (eg. ceph snaps).
  *
- * Copy data into pagecache pages attached to the inode specified by @iocb.
+ * Copy data into pagecache folios attached to the inode specified by @iocb.
  * The caller must hold appropriate inode locks.
  *
- * Dirty pages are tagged with a netfs_folio struct if they're not up to date
- * to indicate the range modified.  Dirty pages may also be tagged with a
+ * Dirty folios are tagged with a netfs_folio struct if they're not up to date
+ * to indicate the range modified.  Dirty folios may also be tagged with a
  * netfs-specific grouping such that data from an old group gets flushed before
  * a new one is started.
  */
@@ -223,11 +223,11 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
 		 * we try to read it.
 		 */
 		if (fpos >= ctx->zero_point) {
-			zero_user_segment(&folio->page, 0, offset);
+			folio_zero_segment(folio, 0, offset);
 			copied = copy_folio_from_iter_atomic(folio, offset, part, iter);
 			if (unlikely(copied == 0))
 				goto copy_failed;
-			zero_user_segment(&folio->page, offset + copied, flen);
+			folio_zero_segment(folio, offset + copied, flen);
 			__netfs_set_group(folio, netfs_group);
 			folio_mark_uptodate(folio);
 			trace_netfs_folio(folio, netfs_modify_and_clear);
@@ -407,7 +407,7 @@ EXPORT_SYMBOL(netfs_perform_write);
  * netfs_buffered_write_iter_locked - write data to a file
  * @iocb:	IO state structure (file, offset, etc.)
  * @from:	iov_iter with data to write
- * @netfs_group: Grouping for dirty pages (eg. ceph snaps).
+ * @netfs_group: Grouping for dirty folios (eg. ceph snaps).
  *
  * This function does all the work needed for actually writing data to a
  * file. It does all basic checks, removes SUID from the file, updates
@@ -491,7 +491,9 @@ EXPORT_SYMBOL(netfs_file_write_iter);
 
 /*
  * Notification that a previously read-only page is about to become writable.
- * Note that the caller indicates a single page of a multipage folio.
+ * The caller indicates the precise page that needs to be written to, but
+ * we only track group on a per-folio basis, so we block more often than
+ * we might otherwise.
  */
 vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group)
 {
@@ -501,7 +503,7 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file_inode(file);
 	struct netfs_inode *ictx = netfs_inode(inode);
-	vm_fault_t ret = VM_FAULT_RETRY;
+	vm_fault_t ret = VM_FAULT_NOPAGE;
 	int err;
 
 	_enter("%lx", folio->index);
@@ -510,21 +512,15 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
 
 	if (folio_lock_killable(folio) < 0)
 		goto out;
-	if (folio->mapping != mapping) {
-		folio_unlock(folio);
-		ret = VM_FAULT_NOPAGE;
-		goto out;
-	}
-
-	if (folio_wait_writeback_killable(folio)) {
-		ret = VM_FAULT_LOCKED;
-		goto out;
-	}
+	if (folio->mapping != mapping)
+		goto unlock;
+	if (folio_wait_writeback_killable(folio) < 0)
+		goto unlock;
 
 	/* Can we see a streaming write here? */
 	if (WARN_ON(!folio_test_uptodate(folio))) {
-		ret = VM_FAULT_SIGBUS | VM_FAULT_LOCKED;
-		goto out;
+		ret = VM_FAULT_SIGBUS;
+		goto unlock;
 	}
 
 	group = netfs_folio_group(folio);
@@ -559,5 +555,8 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
 out:
 	sb_end_pagefault(inode->i_sb);
 	return ret;
+unlock:
+	folio_unlock(folio);
+	goto out;
 }
 EXPORT_SYMBOL(netfs_page_mkwrite);
diff --git a/fs/netfs/fscache_volume.c b/fs/netfs/fscache_volume.c
index cb75c07b5281..ced14ac78cc1 100644
--- a/fs/netfs/fscache_volume.c
+++ b/fs/netfs/fscache_volume.c
@@ -322,8 +322,7 @@ maybe_wait:
 	}
 	return;
 no_wait:
-	clear_bit_unlock(FSCACHE_VOLUME_CREATING, &volume->flags);
-	wake_up_bit(&volume->flags, FSCACHE_VOLUME_CREATING);
+	clear_and_wake_up_bit(FSCACHE_VOLUME_CREATING, &volume->flags);
 }
 
 /*
diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c
index 21eab56ee2f9..2249ecd09d0a 100644
--- a/fs/netfs/locking.c
+++ b/fs/netfs/locking.c
@@ -109,6 +109,7 @@ int netfs_start_io_write(struct inode *inode)
 		up_write(&inode->i_rwsem);
 		return -ERESTARTSYS;
 	}
+	downgrade_write(&inode->i_rwsem);
 	return 0;
 }
 EXPORT_SYMBOL(netfs_start_io_write);
@@ -123,7 +124,7 @@ EXPORT_SYMBOL(netfs_start_io_write);
 void netfs_end_io_write(struct inode *inode)
 	__releases(inode->i_rwsem)
 {
-	up_write(&inode->i_rwsem);
+	up_read(&inode->i_rwsem);
 }
 EXPORT_SYMBOL(netfs_end_io_write);
 
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 63280791de3b..78fe5796b2b2 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -102,7 +102,7 @@ void netfs_clear_buffer(struct netfs_io_request *rreq)
 
 	while ((p = rreq->buffer)) {
 		rreq->buffer = p->next;
-		for (int slot = 0; slot < folioq_nr_slots(p); slot++) {
+		for (int slot = 0; slot < folioq_count(p); slot++) {
 			struct folio *folio = folioq_folio(p, slot);
 			if (!folio)
 				continue;
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index b18c65ba5580..3cbb289535a8 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -77,6 +77,8 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
 			folio_unlock(folio);
 		}
 	}
+
+	folioq_clear(folioq, slot);
 }
 
 /*
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 0929d9fd4ce7..bf6d507578e5 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -317,6 +317,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
 	struct netfs_io_stream *stream;
 	struct netfs_group *fgroup; /* TODO: Use this with ceph */
 	struct netfs_folio *finfo;
+	size_t iter_off = 0;
 	size_t fsize = folio_size(folio), flen = fsize, foff = 0;
 	loff_t fpos = folio_pos(folio), i_size;
 	bool to_eof = false, streamw = false;
@@ -472,7 +473,12 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
 		if (choose_s < 0)
 			break;
 		stream = &wreq->io_streams[choose_s];
-		wreq->io_iter.iov_offset = stream->submit_off;
+
+		/* Advance the iterator(s). */
+		if (stream->submit_off > iter_off) {
+			iov_iter_advance(&wreq->io_iter, stream->submit_off - iter_off);
+			iter_off = stream->submit_off;
+		}
 
 		atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
 		stream->submit_extendable_to = fsize - stream->submit_off;
@@ -487,8 +493,8 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
 			debug = true;
 	}
 
-	wreq->io_iter.iov_offset = 0;
-	iov_iter_advance(&wreq->io_iter, fsize);
+	if (fsize > iter_off)
+		iov_iter_advance(&wreq->io_iter, fsize - iter_off);
 	atomic64_set(&wreq->issued_to, fpos + fsize);
 
 	if (!debug)
@@ -503,6 +509,30 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
 }
 
 /*
+ * End the issuing of writes, letting the collector know we're done.
+ */
+static void netfs_end_issue_write(struct netfs_io_request *wreq)
+{
+	bool needs_poke = true;
+
+	smp_wmb(); /* Write subreq lists before ALL_QUEUED. */
+	set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
+
+	for (int s = 0; s < NR_IO_STREAMS; s++) {
+		struct netfs_io_stream *stream = &wreq->io_streams[s];
+
+		if (!stream->active)
+			continue;
+		if (!list_empty(&stream->subrequests))
+			needs_poke = false;
+		netfs_issue_write(wreq, stream);
+	}
+
+	if (needs_poke)
+		netfs_wake_write_collector(wreq, false);
+}
+
+/*
  * Write some of the pending data back to the server
  */
 int netfs_writepages(struct address_space *mapping,
@@ -553,10 +583,7 @@ int netfs_writepages(struct address_space *mapping,
 			break;
 	} while ((folio = writeback_iter(mapping, wbc, folio, &error)));
 
-	for (int s = 0; s < NR_IO_STREAMS; s++)
-		netfs_issue_write(wreq, &wreq->io_streams[s]);
-	smp_wmb(); /* Write lists before ALL_QUEUED. */
-	set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
+	netfs_end_issue_write(wreq);
 
 	mutex_unlock(&ictx->wb_lock);
 
@@ -644,10 +671,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
 	if (writethrough_cache)
 		netfs_write_folio(wreq, wbc, writethrough_cache);
 
-	netfs_issue_write(wreq, &wreq->io_streams[0]);
-	netfs_issue_write(wreq, &wreq->io_streams[1]);
-	smp_wmb(); /* Write lists before ALL_QUEUED. */
-	set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
+	netfs_end_issue_write(wreq);
 
 	mutex_unlock(&ictx->wb_lock);
 
@@ -693,13 +717,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
 			break;
 	}
 
-	netfs_issue_write(wreq, upload);
-
-	smp_wmb(); /* Write lists before ALL_QUEUED. */
-	set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
-	if (list_empty(&upload->subrequests))
-		netfs_wake_write_collector(wreq, false);
-
+	netfs_end_issue_write(wreq);
 	_leave(" = %d", error);
 	return error;
 }