aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/libxfs/xfs_ag.c75
-rw-r--r--fs/xfs/libxfs/xfs_ag.h11
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c2
-rw-r--r--fs/xfs/scrub/bmap_repair.c2
-rw-r--r--fs/xfs/scrub/repair.c8
-rw-r--r--fs/xfs/xfs_aops.c4
-rw-r--r--fs/xfs/xfs_bmap_util.c10
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_buf_item_recover.c70
-rw-r--r--fs/xfs/xfs_file.c146
-rw-r--r--fs/xfs/xfs_filestream.c99
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_inode.h5
-rw-r--r--fs/xfs/xfs_ioctl.c4
-rw-r--r--fs/xfs/xfs_iomap.c69
-rw-r--r--fs/xfs/xfs_log_recover.c7
-rw-r--r--fs/xfs/xfs_mount.c9
-rw-r--r--fs/xfs/xfs_trace.h15
19 files changed, 330 insertions, 230 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 5f0494702e0b..5ca8d0106827 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -185,17 +185,20 @@ out:
}
/*
- * Free up the per-ag resources associated with the mount structure.
+ * Free up the per-ag resources within the specified AG range.
*/
void
-xfs_free_perag(
- struct xfs_mount *mp)
+xfs_free_perag_range(
+ struct xfs_mount *mp,
+ xfs_agnumber_t first_agno,
+ xfs_agnumber_t end_agno)
+
{
- struct xfs_perag *pag;
xfs_agnumber_t agno;
- for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
- pag = xa_erase(&mp->m_perags, agno);
+ for (agno = first_agno; agno < end_agno; agno++) {
+ struct xfs_perag *pag = xa_erase(&mp->m_perags, agno);
+
ASSERT(pag);
XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
xfs_defer_drain_free(&pag->pag_intents_drain);
@@ -270,54 +273,37 @@ xfs_agino_range(
return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last);
}
-/*
- * Free perag within the specified AG range, it is only used to free unused
- * perags under the error handling path.
- */
-void
-xfs_free_unused_perag_range(
+int
+xfs_update_last_ag_size(
struct xfs_mount *mp,
- xfs_agnumber_t agstart,
- xfs_agnumber_t agend)
+ xfs_agnumber_t prev_agcount)
{
- struct xfs_perag *pag;
- xfs_agnumber_t index;
+ struct xfs_perag *pag = xfs_perag_grab(mp, prev_agcount - 1);
- for (index = agstart; index < agend; index++) {
- pag = xa_erase(&mp->m_perags, index);
- if (!pag)
- break;
- xfs_buf_cache_destroy(&pag->pag_bcache);
- xfs_defer_drain_free(&pag->pag_intents_drain);
- kfree(pag);
- }
+ if (!pag)
+ return -EFSCORRUPTED;
+ pag->block_count = __xfs_ag_block_count(mp, prev_agcount - 1,
+ mp->m_sb.sb_agcount, mp->m_sb.sb_dblocks);
+ __xfs_agino_range(mp, pag->block_count, &pag->agino_min,
+ &pag->agino_max);
+ xfs_perag_rele(pag);
+ return 0;
}
int
xfs_initialize_perag(
struct xfs_mount *mp,
- xfs_agnumber_t agcount,
+ xfs_agnumber_t old_agcount,
+ xfs_agnumber_t new_agcount,
xfs_rfsblock_t dblocks,
xfs_agnumber_t *maxagi)
{
struct xfs_perag *pag;
xfs_agnumber_t index;
- xfs_agnumber_t first_initialised = NULLAGNUMBER;
int error;
- /*
- * Walk the current per-ag tree so we don't try to initialise AGs
- * that already exist (growfs case). Allocate and insert all the
- * AGs we don't find ready for initialisation.
- */
- for (index = 0; index < agcount; index++) {
- pag = xfs_perag_get(mp, index);
- if (pag) {
- xfs_perag_put(pag);
- continue;
- }
-
- pag = kzalloc(sizeof(*pag), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ for (index = old_agcount; index < new_agcount; index++) {
+ pag = kzalloc(sizeof(*pag), GFP_KERNEL);
if (!pag) {
error = -ENOMEM;
goto out_unwind_new_pags;
@@ -353,21 +339,17 @@ xfs_initialize_perag(
/* Active ref owned by mount indicates AG is online. */
atomic_set(&pag->pag_active_ref, 1);
- /* first new pag is fully initialized */
- if (first_initialised == NULLAGNUMBER)
- first_initialised = index;
-
/*
* Pre-calculated geometry
*/
- pag->block_count = __xfs_ag_block_count(mp, index, agcount,
+ pag->block_count = __xfs_ag_block_count(mp, index, new_agcount,
dblocks);
pag->min_block = XFS_AGFL_BLOCK(mp);
__xfs_agino_range(mp, pag->block_count, &pag->agino_min,
&pag->agino_max);
}
- index = xfs_set_inode_alloc(mp, agcount);
+ index = xfs_set_inode_alloc(mp, new_agcount);
if (maxagi)
*maxagi = index;
@@ -381,8 +363,7 @@ out_remove_pag:
out_free_pag:
kfree(pag);
out_unwind_new_pags:
- /* unwind any prior newly initialized pags */
- xfs_free_unused_perag_range(mp, first_initialised, agcount);
+ xfs_free_perag_range(mp, old_agcount, index);
return error;
}
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index d9cccd093b60..9edfe0e96439 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -144,12 +144,13 @@ __XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA)
__XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES)
__XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET)
-void xfs_free_unused_perag_range(struct xfs_mount *mp, xfs_agnumber_t agstart,
- xfs_agnumber_t agend);
-int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
- xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi);
+int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t old_agcount,
+ xfs_agnumber_t agcount, xfs_rfsblock_t dcount,
+ xfs_agnumber_t *maxagi);
+void xfs_free_perag_range(struct xfs_mount *mp, xfs_agnumber_t first_agno,
+ xfs_agnumber_t end_agno);
int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno);
-void xfs_free_perag(struct xfs_mount *mp);
+int xfs_update_last_ag_size(struct xfs_mount *mp, xfs_agnumber_t prev_agcount);
/* Passive AG references */
struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 04f64cf9777e..22bdbb3e9980 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1923,7 +1923,7 @@ restart:
error = -EFSCORRUPTED;
goto error0;
}
- if (flen < bestrlen)
+ if (flen <= bestrlen)
break;
busy = xfs_alloc_compute_aligned(args, fbno, flen,
&rbno, &rlen, &busy_gen);
diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c
index 49dc38acc66b..4505f4829d53 100644
--- a/fs/xfs/scrub/bmap_repair.c
+++ b/fs/xfs/scrub/bmap_repair.c
@@ -801,7 +801,7 @@ xrep_bmap(
{
struct xrep_bmap *rb;
char *descr;
- unsigned int max_bmbt_recs;
+ xfs_extnum_t max_bmbt_recs;
bool large_extcount;
int error = 0;
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 67478294f11a..155bbaaa496e 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -1084,9 +1084,11 @@ xrep_metadata_inode_forks(
return error;
/* Make sure the attr fork looks ok before we delete it. */
- error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA);
- if (error)
- return error;
+ if (xfs_inode_hasattr(sc->ip)) {
+ error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA);
+ if (error)
+ return error;
+ }
/* Clear the reflink flag since metadata never shares. */
if (xfs_is_reflink_inode(sc->ip)) {
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 6dead20338e2..559a3a577097 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -116,7 +116,7 @@ xfs_end_ioend(
if (unlikely(error)) {
if (ioend->io_flags & IOMAP_F_SHARED) {
xfs_reflink_cancel_cow_range(ip, offset, size, true);
- xfs_bmap_punch_delalloc_range(ip, offset,
+ xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset,
offset + size);
}
goto done;
@@ -456,7 +456,7 @@ xfs_discard_folio(
* byte of the next folio. Hence the end offset is only dependent on the
* folio itself and not the start offset that is passed in.
*/
- xfs_bmap_punch_delalloc_range(ip, pos,
+ xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos,
folio_pos(folio) + folio_size(folio));
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 053d567c9108..4719ec90029c 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -442,11 +442,12 @@ out_unlock_iolock:
void
xfs_bmap_punch_delalloc_range(
struct xfs_inode *ip,
+ int whichfork,
xfs_off_t start_byte,
xfs_off_t end_byte)
{
struct xfs_mount *mp = ip->i_mount;
- struct xfs_ifork *ifp = &ip->i_df;
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
struct xfs_bmbt_irec got, del;
@@ -474,11 +475,14 @@ xfs_bmap_punch_delalloc_range(
continue;
}
- xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, &got, &del);
+ xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del);
if (!xfs_iext_get_extent(ifp, &icur, &got))
break;
}
+ if (whichfork == XFS_COW_FORK && !ifp->if_bytes)
+ xfs_inode_clear_cowblocks_tag(ip);
+
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
@@ -580,7 +584,7 @@ xfs_free_eofblocks(
*/
if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
if (ip->i_delayed_blks) {
- xfs_bmap_punch_delalloc_range(ip,
+ xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK,
round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
LLONG_MAX);
}
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index eb0895bfb9da..b29760d36e1a 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -30,7 +30,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
}
#endif /* CONFIG_XFS_RT */
-void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
+void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, int whichfork,
xfs_off_t start_byte, xfs_off_t end_byte);
struct kgetbmap {
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 09e893cf563c..5180cbf5a90b 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -22,6 +22,9 @@
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_quota.h"
+#include "xfs_alloc.h"
+#include "xfs_ag.h"
+#include "xfs_sb.h"
/*
* This is the number of entries in the l_buf_cancel_table used during
@@ -685,6 +688,67 @@ xlog_recover_do_inode_buffer(
}
/*
+ * Update the in-memory superblock and perag structures from the primary SB
+ * buffer.
+ *
+ * This is required because transactions running after growfs may require the
+ * updated values to be set in a previous fully commit transaction.
+ */
+static int
+xlog_recover_do_primary_sb_buffer(
+ struct xfs_mount *mp,
+ struct xlog_recover_item *item,
+ struct xfs_buf *bp,
+ struct xfs_buf_log_format *buf_f,
+ xfs_lsn_t current_lsn)
+{
+ struct xfs_dsb *dsb = bp->b_addr;
+ xfs_agnumber_t orig_agcount = mp->m_sb.sb_agcount;
+ int error;
+
+ xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
+
+ if (orig_agcount == 0) {
+ xfs_alert(mp, "Trying to grow file system without AGs");
+ return -EFSCORRUPTED;
+ }
+
+ /*
+ * Update the in-core super block from the freshly recovered on-disk one.
+ */
+ xfs_sb_from_disk(&mp->m_sb, dsb);
+
+ if (mp->m_sb.sb_agcount < orig_agcount) {
+ xfs_alert(mp, "Shrinking AG count in log recovery not supported");
+ return -EFSCORRUPTED;
+ }
+
+ /*
+ * Growfs can also grow the last existing AG. In this case we also need
+ * to update the length in the in-core perag structure and values
+ * depending on it.
+ */
+ error = xfs_update_last_ag_size(mp, orig_agcount);
+ if (error)
+ return error;
+
+ /*
+ * Initialize the new perags, and also update various block and inode
+ * allocator setting based off the number of AGs or total blocks.
+ * Because of the latter this also needs to happen if the agcount did
+ * not change.
+ */
+ error = xfs_initialize_perag(mp, orig_agcount, mp->m_sb.sb_agcount,
+ mp->m_sb.sb_dblocks, &mp->m_maxagi);
+ if (error) {
+ xfs_warn(mp, "Failed recovery per-ag init: %d", error);
+ return error;
+ }
+ mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
+ return 0;
+}
+
+/*
* V5 filesystems know the age of the buffer on disk being recovered. We can
* have newer objects on disk than we are replaying, and so for these cases we
* don't want to replay the current change as that will make the buffer contents
@@ -967,6 +1031,12 @@ xlog_recover_buf_commit_pass2(
dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
if (!dirty)
goto out_release;
+ } else if ((xfs_blft_from_flags(buf_f) & XFS_BLFT_SB_BUF) &&
+ xfs_buf_daddr(bp) == 0) {
+ error = xlog_recover_do_primary_sb_buffer(mp, item, bp, buf_f,
+ current_lsn);
+ if (error)
+ goto out_release;
} else {
xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 412b1d71b52b..b19916b11fd5 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -348,9 +348,82 @@ xfs_file_splice_read(
}
/*
+ * Take care of zeroing post-EOF blocks when they might exist.
+ *
+ * Returns 0 if successfully, a negative error for a failure, or 1 if this
+ * function dropped the iolock and reacquired it exclusively and the caller
+ * needs to restart the write sanity checks.
+ */
+static ssize_t
+xfs_file_write_zero_eof(
+ struct kiocb *iocb,
+ struct iov_iter *from,
+ unsigned int *iolock,
+ size_t count,
+ bool *drained_dio)
+{
+ struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host);
+ loff_t isize;
+ int error;
+
+ /*
+ * We need to serialise against EOF updates that occur in IO completions
+ * here. We want to make sure that nobody is changing the size while
+ * we do this check until we have placed an IO barrier (i.e. hold
+ * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The
+ * spinlock effectively forms a memory barrier once we have
+ * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
+ * hence be able to correctly determine if we need to run zeroing.
+ */
+ spin_lock(&ip->i_flags_lock);
+ isize = i_size_read(VFS_I(ip));
+ if (iocb->ki_pos <= isize) {
+ spin_unlock(&ip->i_flags_lock);
+ return 0;
+ }
+ spin_unlock(&ip->i_flags_lock);
+
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+
+ if (!*drained_dio) {
+ /*
+ * If zeroing is needed and we are currently holding the iolock
+ * shared, we need to update it to exclusive which implies
+ * having to redo all checks before.
+ */
+ if (*iolock == XFS_IOLOCK_SHARED) {
+ xfs_iunlock(ip, *iolock);
+ *iolock = XFS_IOLOCK_EXCL;
+ xfs_ilock(ip, *iolock);
+ iov_iter_reexpand(from, count);
+ }
+
+ /*
+ * We now have an IO submission barrier in place, but AIO can do
+ * EOF updates during IO completion and hence we now need to
+ * wait for all of them to drain. Non-AIO DIO will have drained
+ * before we are given the XFS_IOLOCK_EXCL, and so for most
+ * cases this wait is a no-op.
+ */
+ inode_dio_wait(VFS_I(ip));
+ *drained_dio = true;
+ return 1;
+ }
+
+ trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
+
+ xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+ error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
+ xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
+
+ return error;
+}
+
+/*
* Common pre-write limit and setup checks.
*
- * Called with the iolocked held either shared and exclusive according to
+ * Called with the iolock held either shared and exclusive according to
* @iolock, and returns with it held. Might upgrade the iolock to exclusive
* if called for a direct write beyond i_size.
*/
@@ -360,13 +433,10 @@ xfs_file_write_checks(
struct iov_iter *from,
unsigned int *iolock)
{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- ssize_t error = 0;
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
size_t count = iov_iter_count(from);
bool drained_dio = false;
- loff_t isize;
+ ssize_t error;
restart:
error = generic_write_checks(iocb, from);
@@ -389,7 +459,7 @@ restart:
* exclusively.
*/
if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
- xfs_iunlock(ip, *iolock);
+ xfs_iunlock(XFS_I(inode), *iolock);
*iolock = XFS_IOLOCK_EXCL;
error = xfs_ilock_iocb(iocb, *iolock);
if (error) {
@@ -400,64 +470,24 @@ restart:
}
/*
- * If the offset is beyond the size of the file, we need to zero any
+ * If the offset is beyond the size of the file, we need to zero all
* blocks that fall between the existing EOF and the start of this
- * write. If zeroing is needed and we are currently holding the iolock
- * shared, we need to update it to exclusive which implies having to
- * redo all checks before.
+ * write.
*
- * We need to serialise against EOF updates that occur in IO completions
- * here. We want to make sure that nobody is changing the size while we
- * do this check until we have placed an IO barrier (i.e. hold the
- * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The
- * spinlock effectively forms a memory barrier once we have the
- * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
- * hence be able to correctly determine if we need to run zeroing.
- *
- * We can do an unlocked check here safely as IO completion can only
- * extend EOF. Truncate is locked out at this point, so the EOF can
- * not move backwards, only forwards. Hence we only need to take the
- * slow path and spin locks when we are at or beyond the current EOF.
+ * We can do an unlocked check for i_size here safely as I/O completion
+ * can only extend EOF. Truncate is locked out at this point, so the
+ * EOF can not move backwards, only forwards. Hence we only need to take
+ * the slow path when we are at or beyond the current EOF.
*/
- if (iocb->ki_pos <= i_size_read(inode))
- goto out;
-
- spin_lock(&ip->i_flags_lock);
- isize = i_size_read(inode);
- if (iocb->ki_pos > isize) {
- spin_unlock(&ip->i_flags_lock);
-
- if (iocb->ki_flags & IOCB_NOWAIT)
- return -EAGAIN;
-
- if (!drained_dio) {
- if (*iolock == XFS_IOLOCK_SHARED) {
- xfs_iunlock(ip, *iolock);
- *iolock = XFS_IOLOCK_EXCL;
- xfs_ilock(ip, *iolock);
- iov_iter_reexpand(from, count);
- }
- /*
- * We now have an IO submission barrier in place, but
- * AIO can do EOF updates during IO completion and hence
- * we now need to wait for all of them to drain. Non-AIO
- * DIO will have drained before we are given the
- * XFS_IOLOCK_EXCL, and so for most cases this wait is a
- * no-op.
- */
- inode_dio_wait(inode);
- drained_dio = true;
+ if (iocb->ki_pos > i_size_read(inode)) {
+ error = xfs_file_write_zero_eof(iocb, from, iolock, count,
+ &drained_dio);
+ if (error == 1)
goto restart;
- }
-
- trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
- error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
if (error)
return error;
- } else
- spin_unlock(&ip->i_flags_lock);
+ }
-out:
return kiocb_modified(iocb);
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index e3aaa0555597..290ba8887d29 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -64,25 +64,31 @@ xfs_filestream_pick_ag(
struct xfs_perag *pag;
struct xfs_perag *max_pag = NULL;
xfs_extlen_t minlen = *longest;
- xfs_extlen_t free = 0, minfree, maxfree = 0;
+ xfs_extlen_t minfree, maxfree = 0;
xfs_agnumber_t agno;
bool first_pass = true;
- int err;
/* 2% of an AG's blocks must be free for it to be chosen. */
minfree = mp->m_sb.sb_agblocks / 50;
restart:
for_each_perag_wrap(mp, start_agno, agno, pag) {
+ int err;
+
trace_xfs_filestream_scan(pag, pino);
+
*longest = 0;
err = xfs_bmap_longest_free_extent(pag, NULL, longest);
if (err) {
- if (err != -EAGAIN)
- break;
- /* Couldn't lock the AGF, skip this AG. */
- err = 0;
- continue;
+ if (err == -EAGAIN) {
+ /* Couldn't lock the AGF, skip this AG. */
+ err = 0;
+ continue;
+ }
+ xfs_perag_rele(pag);
+ if (max_pag)
+ xfs_perag_rele(max_pag);
+ return err;
}
/* Keep track of the AG with the most free blocks. */
@@ -107,8 +113,9 @@ restart:
!(flags & XFS_PICK_USERDATA) ||
(flags & XFS_PICK_LOWSPACE))) {
/* Break out, retaining the reference on the AG. */
- free = pag->pagf_freeblks;
- break;
+ if (max_pag)
+ xfs_perag_rele(max_pag);
+ goto done;
}
}
@@ -116,57 +123,47 @@ restart:
atomic_dec(&pag->pagf_fstrms);
}
- if (err) {
- xfs_perag_rele(pag);
- if (max_pag)
- xfs_perag_rele(max_pag);
- return err;
+ /*
+ * Allow a second pass to give xfs_bmap_longest_free_extent() another
+ * attempt at locking AGFs that it might have skipped over before we
+ * fail.
+ */
+ if (first_pass) {
+ first_pass = false;
+ goto restart;
}
- if (!pag) {
- /*
- * Allow a second pass to give xfs_bmap_longest_free_extent()
- * another attempt at locking AGFs that it might have skipped
- * over before we fail.
- */
- if (first_pass) {
- first_pass = false;
- goto restart;
- }
+ /*
+ * We must be low on data space, so run a final lowspace optimised
+ * selection pass if we haven't already.
+ */
+ if (!(flags & XFS_PICK_LOWSPACE)) {
+ flags |= XFS_PICK_LOWSPACE;
+ goto restart;
+ }
- /*
- * We must be low on data space, so run a final lowspace
- * optimised selection pass if we haven't already.
- */
- if (!(flags & XFS_PICK_LOWSPACE)) {
- flags |= XFS_PICK_LOWSPACE;
- goto restart;
+ /*
+ * No unassociated AGs are available, so select the AG with the most
+ * free space, regardless of whether it's already in use by another
+ * filestream. It none suit, just use whatever AG we can grab.
+ */
+ if (!max_pag) {
+ for_each_perag_wrap(args->mp, 0, start_agno, pag) {
+ max_pag = pag;
+ break;
}
- /*
- * No unassociated AGs are available, so select the AG with the
- * most free space, regardless of whether it's already in use by
- * another filestream. It none suit, just use whatever AG we can
- * grab.
- */
- if (!max_pag) {
- for_each_perag_wrap(args->mp, 0, start_agno, args->pag)
- break;
- atomic_inc(&args->pag->pagf_fstrms);
- *longest = 0;
- } else {
- pag = max_pag;
- free = maxfree;
- atomic_inc(&pag->pagf_fstrms);
- }
- } else if (max_pag) {
- xfs_perag_rele(max_pag);
+ /* Bail if there are no AGs at all to select from. */
+ if (!max_pag)
+ return -ENOSPC;
}
- trace_xfs_filestream_pick(pag, pino, free);
+ pag = max_pag;
+ atomic_inc(&pag->pagf_fstrms);
+done:
+ trace_xfs_filestream_pick(pag, pino);
args->pag = pag;
return 0;
-
}
static struct xfs_inode *
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 3643cc843f62..b247d895c276 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -87,6 +87,7 @@ xfs_growfs_data_private(
struct xfs_mount *mp, /* mount point for filesystem */
struct xfs_growfs_data *in) /* growfs data input struct */
{
+ xfs_agnumber_t oagcount = mp->m_sb.sb_agcount;
struct xfs_buf *bp;
int error;
xfs_agnumber_t nagcount;
@@ -94,7 +95,6 @@ xfs_growfs_data_private(
xfs_rfsblock_t nb, nb_div, nb_mod;
int64_t delta;
bool lastag_extended = false;
- xfs_agnumber_t oagcount;
struct xfs_trans *tp;
struct aghdr_init_data id = {};
struct xfs_perag *last_pag;
@@ -138,16 +138,14 @@ xfs_growfs_data_private(
if (delta == 0)
return 0;
- oagcount = mp->m_sb.sb_agcount;
- /* allocate the new per-ag structures */
- if (nagcount > oagcount) {
- error = xfs_initialize_perag(mp, nagcount, nb, &nagimax);
- if (error)
- return error;
- } else if (nagcount < oagcount) {
- /* TODO: shrinking the entire AGs hasn't yet completed */
+ /* TODO: shrinking the entire AGs hasn't yet completed */
+ if (nagcount < oagcount)
return -EINVAL;
- }
+
+ /* allocate the new per-ag structures */
+ error = xfs_initialize_perag(mp, oagcount, nagcount, nb, &nagimax);
+ if (error)
+ return error;
if (delta > 0)
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
@@ -231,7 +229,7 @@ out_trans_cancel:
xfs_trans_cancel(tp);
out_free_unused_perag:
if (nagcount > oagcount)
- xfs_free_unused_perag_range(mp, oagcount, nagcount);
+ xfs_free_perag_range(mp, oagcount, nagcount);
return error;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index bcc277fc0a83..19dcb569a3e7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1409,7 +1409,7 @@ xfs_inactive(
if (S_ISREG(VFS_I(ip)->i_mode) &&
(ip->i_disk_size != 0 || XFS_ISIZE(ip) != 0 ||
- ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0))
+ xfs_inode_has_filedata(ip)))
truncate = 1;
if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 97ed912306fd..03944b6c5fba 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -292,6 +292,11 @@ static inline bool xfs_is_cow_inode(struct xfs_inode *ip)
return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip);
}
+static inline bool xfs_inode_has_filedata(const struct xfs_inode *ip)
+{
+ return ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0;
+}
+
/*
* Check if an inode has any data in the COW fork. This might be often false
* even for inodes with the reflink flag when there is no pending COW operation.
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index a20d426ef021..2567fd2a0994 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -481,7 +481,7 @@ xfs_ioctl_setattr_xflags(
if (rtflag != XFS_IS_REALTIME_INODE(ip)) {
/* Can't change realtime flag if any extents are allocated. */
- if (ip->i_df.if_nextents || ip->i_delayed_blks)
+ if (xfs_inode_has_filedata(ip))
return -EINVAL;
/*
@@ -602,7 +602,7 @@ xfs_ioctl_setattr_check_extsize(
if (!fa->fsx_valid)
return 0;
- if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents &&
+ if (S_ISREG(VFS_I(ip)->i_mode) && xfs_inode_has_filedata(ip) &&
XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize)
return -EINVAL;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 1e11f48814c0..86da16f54be9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -707,7 +707,7 @@ imap_needs_cow(
return false;
/* when zeroing we don't have to COW holes or unwritten extents */
- if (flags & IOMAP_ZERO) {
+ if (flags & (IOMAP_UNSHARE | IOMAP_ZERO)) {
if (!nimaps ||
imap->br_startblock == HOLESTARTBLOCK ||
imap->br_state == XFS_EXT_UNWRITTEN)
@@ -975,6 +975,7 @@ xfs_buffered_write_iomap_begin(
int allocfork = XFS_DATA_FORK;
int error = 0;
unsigned int lockmode = XFS_ILOCK_EXCL;
+ unsigned int iomap_flags = 0;
u64 seq;
if (xfs_is_shutdown(mp))
@@ -1145,6 +1146,11 @@ xfs_buffered_write_iomap_begin(
}
}
+ /*
+ * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
+ * them out if the write happens to fail.
+ */
+ iomap_flags |= IOMAP_F_NEW;
if (allocfork == XFS_COW_FORK) {
error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
end_fsb - offset_fsb, prealloc_blocks, &cmap,
@@ -1162,19 +1168,11 @@ xfs_buffered_write_iomap_begin(
if (error)
goto out_unlock;
- /*
- * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
- * them out if the write happens to fail.
- */
- seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW);
- xfs_iunlock(ip, lockmode);
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq);
-
found_imap:
- seq = xfs_iomap_inode_sequence(ip, 0);
+ seq = xfs_iomap_inode_sequence(ip, iomap_flags);
xfs_iunlock(ip, lockmode);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq);
convert_delay:
xfs_iunlock(ip, lockmode);
@@ -1188,20 +1186,20 @@ convert_delay:
return 0;
found_cow:
- seq = xfs_iomap_inode_sequence(ip, 0);
if (imap.br_startoff <= offset_fsb) {
- error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
+ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0,
+ xfs_iomap_inode_sequence(ip, 0));
if (error)
goto out_unlock;
- seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
- xfs_iunlock(ip, lockmode);
- return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
- IOMAP_F_SHARED, seq);
+ } else {
+ xfs_trim_extent(&cmap, offset_fsb,
+ imap.br_startoff - offset_fsb);
}
- xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
+ iomap_flags |= IOMAP_F_SHARED;
+ seq = xfs_iomap_inode_sequence(ip, iomap_flags);
xfs_iunlock(ip, lockmode);
- return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq);
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, iomap_flags, seq);
out_unlock:
xfs_iunlock(ip, lockmode);
@@ -1215,7 +1213,10 @@ xfs_buffered_write_delalloc_punch(
loff_t length,
struct iomap *iomap)
{
- xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, offset + length);
+ xfs_bmap_punch_delalloc_range(XFS_I(inode),
+ (iomap->flags & IOMAP_F_SHARED) ?
+ XFS_COW_FORK : XFS_DATA_FORK,
+ offset, offset + length);
}
static int
@@ -1227,8 +1228,30 @@ xfs_buffered_write_iomap_end(
unsigned flags,
struct iomap *iomap)
{
- iomap_file_buffered_write_punch_delalloc(inode, offset, length, written,
- flags, iomap, &xfs_buffered_write_delalloc_punch);
+ loff_t start_byte, end_byte;
+
+ /* If we didn't reserve the blocks, we're not allowed to punch them. */
+ if (iomap->type != IOMAP_DELALLOC || !(iomap->flags & IOMAP_F_NEW))
+ return 0;
+
+ /* Nothing to do if we've written the entire delalloc extent */
+ start_byte = iomap_last_written_block(inode, offset, written);
+ end_byte = round_up(offset + length, i_blocksize(inode));
+ if (start_byte >= end_byte)
+ return 0;
+
+ /* For zeroing operations the callers already hold invalidate_lock. */
+ if (flags & (IOMAP_UNSHARE | IOMAP_ZERO)) {
+ rwsem_assert_held_write(&inode->i_mapping->invalidate_lock);
+ iomap_write_delalloc_release(inode, start_byte, end_byte, flags,
+ iomap, xfs_buffered_write_delalloc_punch);
+ } else {
+ filemap_invalidate_lock(inode->i_mapping);
+ iomap_write_delalloc_release(inode, start_byte, end_byte, flags,
+ iomap, xfs_buffered_write_delalloc_punch);
+ filemap_invalidate_unlock(inode->i_mapping);
+ }
+
return 0;
}
@@ -1435,6 +1458,8 @@ xfs_zero_range(
{
struct inode *inode = VFS_I(ip);
+ xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
+
if (IS_DAX(inode))
return dax_zero_range(inode, pos, len, did_zero,
&xfs_dax_write_iomap_ops);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a13bf53fea49..704aaadb61cf 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3393,13 +3393,6 @@ xlog_do_recover(
/* re-initialise in-core superblock and geometry structures */
mp->m_features |= xfs_sb_version_to_features(sbp);
xfs_reinit_percpu_counters(mp);
- error = xfs_initialize_perag(mp, sbp->sb_agcount, sbp->sb_dblocks,
- &mp->m_maxagi);
- if (error) {
- xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
- return error;
- }
- mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
/* Normal transactions can now occur */
clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 1fdd79c5bfa0..25bbcc3f4ee0 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -810,8 +810,8 @@ xfs_mountfs(
/*
* Allocate and initialize the per-ag data.
*/
- error = xfs_initialize_perag(mp, sbp->sb_agcount, mp->m_sb.sb_dblocks,
- &mp->m_maxagi);
+ error = xfs_initialize_perag(mp, 0, sbp->sb_agcount,
+ mp->m_sb.sb_dblocks, &mp->m_maxagi);
if (error) {
xfs_warn(mp, "Failed per-ag init: %d", error);
goto out_free_dir;
@@ -1048,7 +1048,7 @@ xfs_mountfs(
xfs_buftarg_drain(mp->m_logdev_targp);
xfs_buftarg_drain(mp->m_ddev_targp);
out_free_perag:
- xfs_free_perag(mp);
+ xfs_free_perag_range(mp, 0, mp->m_sb.sb_agcount);
out_free_dir:
xfs_da_unmount(mp);
out_remove_uuid:
@@ -1129,8 +1129,7 @@ xfs_unmountfs(
xfs_errortag_clearall(mp);
#endif
shrinker_free(mp->m_inodegc_shrinker);
- xfs_free_perag(mp);
-
+ xfs_free_perag_range(mp, 0, mp->m_sb.sb_agcount);
xfs_errortag_del(mp);
xfs_error_sysfs_del(mp);
xchk_stats_unregister(mp->m_scrub_stats);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ee9f0b1f548d..fcb2bad4f76e 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -691,8 +691,8 @@ DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
TRACE_EVENT(xfs_filestream_pick,
- TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino, xfs_extlen_t free),
- TP_ARGS(pag, ino, free),
+ TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino),
+ TP_ARGS(pag, ino),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
@@ -703,14 +703,9 @@ TRACE_EVENT(xfs_filestream_pick,
TP_fast_assign(
__entry->dev = pag->pag_mount->m_super->s_dev;
__entry->ino = ino;
- if (pag) {
- __entry->agno = pag->pag_agno;
- __entry->streams = atomic_read(&pag->pagf_fstrms);
- } else {
- __entry->agno = NULLAGNUMBER;
- __entry->streams = 0;
- }
- __entry->free = free;
+ __entry->agno = pag->pag_agno;
+ __entry->streams = atomic_read(&pag->pagf_fstrms);
+ __entry->free = pag->pagf_freeblks;
),
TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d",
MAJOR(__entry->dev), MINOR(__entry->dev),