diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/libxfs/xfs_ag.c | 75 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ag.h | 11 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.c | 2 | ||||
-rw-r--r-- | fs/xfs/scrub/bmap_repair.c | 2 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item_recover.c | 70 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 146 | ||||
-rw-r--r-- | fs/xfs/xfs_filestream.c | 99 | ||||
-rw-r--r-- | fs/xfs/xfs_fsops.c | 20 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 69 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 15 |
19 files changed, 330 insertions, 230 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 5f0494702e0b..5ca8d0106827 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -185,17 +185,20 @@ out: } /* - * Free up the per-ag resources associated with the mount structure. + * Free up the per-ag resources within the specified AG range. */ void -xfs_free_perag( - struct xfs_mount *mp) +xfs_free_perag_range( + struct xfs_mount *mp, + xfs_agnumber_t first_agno, + xfs_agnumber_t end_agno) + { - struct xfs_perag *pag; xfs_agnumber_t agno; - for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { - pag = xa_erase(&mp->m_perags, agno); + for (agno = first_agno; agno < end_agno; agno++) { + struct xfs_perag *pag = xa_erase(&mp->m_perags, agno); + ASSERT(pag); XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0); xfs_defer_drain_free(&pag->pag_intents_drain); @@ -270,54 +273,37 @@ xfs_agino_range( return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last); } -/* - * Free perag within the specified AG range, it is only used to free unused - * perags under the error handling path. - */ -void -xfs_free_unused_perag_range( +int +xfs_update_last_ag_size( struct xfs_mount *mp, - xfs_agnumber_t agstart, - xfs_agnumber_t agend) + xfs_agnumber_t prev_agcount) { - struct xfs_perag *pag; - xfs_agnumber_t index; + struct xfs_perag *pag = xfs_perag_grab(mp, prev_agcount - 1); - for (index = agstart; index < agend; index++) { - pag = xa_erase(&mp->m_perags, index); - if (!pag) - break; - xfs_buf_cache_destroy(&pag->pag_bcache); - xfs_defer_drain_free(&pag->pag_intents_drain); - kfree(pag); - } + if (!pag) + return -EFSCORRUPTED; + pag->block_count = __xfs_ag_block_count(mp, prev_agcount - 1, + mp->m_sb.sb_agcount, mp->m_sb.sb_dblocks); + __xfs_agino_range(mp, pag->block_count, &pag->agino_min, + &pag->agino_max); + xfs_perag_rele(pag); + return 0; } int xfs_initialize_perag( struct xfs_mount *mp, - xfs_agnumber_t agcount, + xfs_agnumber_t old_agcount, + xfs_agnumber_t new_agcount, xfs_rfsblock_t dblocks, xfs_agnumber_t *maxagi) { struct xfs_perag *pag; xfs_agnumber_t index; - xfs_agnumber_t first_initialised = NULLAGNUMBER; int error; - /* - * Walk the current per-ag tree so we don't try to initialise AGs - * that already exist (growfs case). Allocate and insert all the - * AGs we don't find ready for initialisation. - */ - for (index = 0; index < agcount; index++) { - pag = xfs_perag_get(mp, index); - if (pag) { - xfs_perag_put(pag); - continue; - } - - pag = kzalloc(sizeof(*pag), GFP_KERNEL | __GFP_RETRY_MAYFAIL); + for (index = old_agcount; index < new_agcount; index++) { + pag = kzalloc(sizeof(*pag), GFP_KERNEL); if (!pag) { error = -ENOMEM; goto out_unwind_new_pags; @@ -353,21 +339,17 @@ xfs_initialize_perag( /* Active ref owned by mount indicates AG is online. */ atomic_set(&pag->pag_active_ref, 1); - /* first new pag is fully initialized */ - if (first_initialised == NULLAGNUMBER) - first_initialised = index; - /* * Pre-calculated geometry */ - pag->block_count = __xfs_ag_block_count(mp, index, agcount, + pag->block_count = __xfs_ag_block_count(mp, index, new_agcount, dblocks); pag->min_block = XFS_AGFL_BLOCK(mp); __xfs_agino_range(mp, pag->block_count, &pag->agino_min, &pag->agino_max); } - index = xfs_set_inode_alloc(mp, agcount); + index = xfs_set_inode_alloc(mp, new_agcount); if (maxagi) *maxagi = index; @@ -381,8 +363,7 @@ out_remove_pag: out_free_pag: kfree(pag); out_unwind_new_pags: - /* unwind any prior newly initialized pags */ - xfs_free_unused_perag_range(mp, first_initialised, agcount); + xfs_free_perag_range(mp, old_agcount, index); return error; } diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index d9cccd093b60..9edfe0e96439 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -144,12 +144,13 @@ __XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA) __XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES) __XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET) -void xfs_free_unused_perag_range(struct xfs_mount *mp, xfs_agnumber_t agstart, - xfs_agnumber_t agend); -int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount, - xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi); +int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t old_agcount, + xfs_agnumber_t agcount, xfs_rfsblock_t dcount, + xfs_agnumber_t *maxagi); +void xfs_free_perag_range(struct xfs_mount *mp, xfs_agnumber_t first_agno, + xfs_agnumber_t end_agno); int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno); -void xfs_free_perag(struct xfs_mount *mp); +int xfs_update_last_ag_size(struct xfs_mount *mp, xfs_agnumber_t prev_agcount); /* Passive AG references */ struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 04f64cf9777e..22bdbb3e9980 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1923,7 +1923,7 @@ restart: error = -EFSCORRUPTED; goto error0; } - if (flen < bestrlen) + if (flen <= bestrlen) break; busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen, &busy_gen); diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c index 49dc38acc66b..4505f4829d53 100644 --- a/fs/xfs/scrub/bmap_repair.c +++ b/fs/xfs/scrub/bmap_repair.c @@ -801,7 +801,7 @@ xrep_bmap( { struct xrep_bmap *rb; char *descr; - unsigned int max_bmbt_recs; + xfs_extnum_t max_bmbt_recs; bool large_extcount; int error = 0; diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 67478294f11a..155bbaaa496e 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -1084,9 +1084,11 @@ xrep_metadata_inode_forks( return error; /* Make sure the attr fork looks ok before we delete it. */ - error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA); - if (error) - return error; + if (xfs_inode_hasattr(sc->ip)) { + error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA); + if (error) + return error; + } /* Clear the reflink flag since metadata never shares. */ if (xfs_is_reflink_inode(sc->ip)) { diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 6dead20338e2..559a3a577097 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -116,7 +116,7 @@ xfs_end_ioend( if (unlikely(error)) { if (ioend->io_flags & IOMAP_F_SHARED) { xfs_reflink_cancel_cow_range(ip, offset, size, true); - xfs_bmap_punch_delalloc_range(ip, offset, + xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset, offset + size); } goto done; @@ -456,7 +456,7 @@ xfs_discard_folio( * byte of the next folio. Hence the end offset is only dependent on the * folio itself and not the start offset that is passed in. */ - xfs_bmap_punch_delalloc_range(ip, pos, + xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos, folio_pos(folio) + folio_size(folio)); } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 053d567c9108..4719ec90029c 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -442,11 +442,12 @@ out_unlock_iolock: void xfs_bmap_punch_delalloc_range( struct xfs_inode *ip, + int whichfork, xfs_off_t start_byte, xfs_off_t end_byte) { struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp = &ip->i_df; + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); struct xfs_bmbt_irec got, del; @@ -474,11 +475,14 @@ xfs_bmap_punch_delalloc_range( continue; } - xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, &got, &del); + xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del); if (!xfs_iext_get_extent(ifp, &icur, &got)) break; } + if (whichfork == XFS_COW_FORK && !ifp->if_bytes) + xfs_inode_clear_cowblocks_tag(ip); + out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); } @@ -580,7 +584,7 @@ xfs_free_eofblocks( */ if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) { if (ip->i_delayed_blks) { - xfs_bmap_punch_delalloc_range(ip, + xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize), LLONG_MAX); } diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index eb0895bfb9da..b29760d36e1a 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -30,7 +30,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap) } #endif /* CONFIG_XFS_RT */ -void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, +void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, int whichfork, xfs_off_t start_byte, xfs_off_t end_byte); struct kgetbmap { diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index 09e893cf563c..5180cbf5a90b 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -22,6 +22,9 @@ #include "xfs_inode.h" #include "xfs_dir2.h" #include "xfs_quota.h" +#include "xfs_alloc.h" +#include "xfs_ag.h" +#include "xfs_sb.h" /* * This is the number of entries in the l_buf_cancel_table used during @@ -685,6 +688,67 @@ xlog_recover_do_inode_buffer( } /* + * Update the in-memory superblock and perag structures from the primary SB + * buffer. + * + * This is required because transactions running after growfs may require the + * updated values to be set in a previous fully commit transaction. + */ +static int +xlog_recover_do_primary_sb_buffer( + struct xfs_mount *mp, + struct xlog_recover_item *item, + struct xfs_buf *bp, + struct xfs_buf_log_format *buf_f, + xfs_lsn_t current_lsn) +{ + struct xfs_dsb *dsb = bp->b_addr; + xfs_agnumber_t orig_agcount = mp->m_sb.sb_agcount; + int error; + + xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn); + + if (orig_agcount == 0) { + xfs_alert(mp, "Trying to grow file system without AGs"); + return -EFSCORRUPTED; + } + + /* + * Update the in-core super block from the freshly recovered on-disk one. + */ + xfs_sb_from_disk(&mp->m_sb, dsb); + + if (mp->m_sb.sb_agcount < orig_agcount) { + xfs_alert(mp, "Shrinking AG count in log recovery not supported"); + return -EFSCORRUPTED; + } + + /* + * Growfs can also grow the last existing AG. In this case we also need + * to update the length in the in-core perag structure and values + * depending on it. + */ + error = xfs_update_last_ag_size(mp, orig_agcount); + if (error) + return error; + + /* + * Initialize the new perags, and also update various block and inode + * allocator setting based off the number of AGs or total blocks. + * Because of the latter this also needs to happen if the agcount did + * not change. + */ + error = xfs_initialize_perag(mp, orig_agcount, mp->m_sb.sb_agcount, + mp->m_sb.sb_dblocks, &mp->m_maxagi); + if (error) { + xfs_warn(mp, "Failed recovery per-ag init: %d", error); + return error; + } + mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); + return 0; +} + +/* * V5 filesystems know the age of the buffer on disk being recovered. We can * have newer objects on disk than we are replaying, and so for these cases we * don't want to replay the current change as that will make the buffer contents @@ -967,6 +1031,12 @@ xlog_recover_buf_commit_pass2( dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); if (!dirty) goto out_release; + } else if ((xfs_blft_from_flags(buf_f) & XFS_BLFT_SB_BUF) && + xfs_buf_daddr(bp) == 0) { + error = xlog_recover_do_primary_sb_buffer(mp, item, bp, buf_f, + current_lsn); + if (error) + goto out_release; } else { xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn); } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 412b1d71b52b..b19916b11fd5 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -348,9 +348,82 @@ xfs_file_splice_read( } /* + * Take care of zeroing post-EOF blocks when they might exist. + * + * Returns 0 if successfully, a negative error for a failure, or 1 if this + * function dropped the iolock and reacquired it exclusively and the caller + * needs to restart the write sanity checks. + */ +static ssize_t +xfs_file_write_zero_eof( + struct kiocb *iocb, + struct iov_iter *from, + unsigned int *iolock, + size_t count, + bool *drained_dio) +{ + struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host); + loff_t isize; + int error; + + /* + * We need to serialise against EOF updates that occur in IO completions + * here. We want to make sure that nobody is changing the size while + * we do this check until we have placed an IO barrier (i.e. hold + * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The + * spinlock effectively forms a memory barrier once we have + * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and + * hence be able to correctly determine if we need to run zeroing. + */ + spin_lock(&ip->i_flags_lock); + isize = i_size_read(VFS_I(ip)); + if (iocb->ki_pos <= isize) { + spin_unlock(&ip->i_flags_lock); + return 0; + } + spin_unlock(&ip->i_flags_lock); + + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + + if (!*drained_dio) { + /* + * If zeroing is needed and we are currently holding the iolock + * shared, we need to update it to exclusive which implies + * having to redo all checks before. + */ + if (*iolock == XFS_IOLOCK_SHARED) { + xfs_iunlock(ip, *iolock); + *iolock = XFS_IOLOCK_EXCL; + xfs_ilock(ip, *iolock); + iov_iter_reexpand(from, count); + } + + /* + * We now have an IO submission barrier in place, but AIO can do + * EOF updates during IO completion and hence we now need to + * wait for all of them to drain. Non-AIO DIO will have drained + * before we are given the XFS_IOLOCK_EXCL, and so for most + * cases this wait is a no-op. + */ + inode_dio_wait(VFS_I(ip)); + *drained_dio = true; + return 1; + } + + trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize); + + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL); + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); + + return error; +} + +/* * Common pre-write limit and setup checks. * - * Called with the iolocked held either shared and exclusive according to + * Called with the iolock held either shared and exclusive according to * @iolock, and returns with it held. Might upgrade the iolock to exclusive * if called for a direct write beyond i_size. */ @@ -360,13 +433,10 @@ xfs_file_write_checks( struct iov_iter *from, unsigned int *iolock) { - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - ssize_t error = 0; + struct inode *inode = iocb->ki_filp->f_mapping->host; size_t count = iov_iter_count(from); bool drained_dio = false; - loff_t isize; + ssize_t error; restart: error = generic_write_checks(iocb, from); @@ -389,7 +459,7 @@ restart: * exclusively. */ if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) { - xfs_iunlock(ip, *iolock); + xfs_iunlock(XFS_I(inode), *iolock); *iolock = XFS_IOLOCK_EXCL; error = xfs_ilock_iocb(iocb, *iolock); if (error) { @@ -400,64 +470,24 @@ restart: } /* - * If the offset is beyond the size of the file, we need to zero any + * If the offset is beyond the size of the file, we need to zero all * blocks that fall between the existing EOF and the start of this - * write. If zeroing is needed and we are currently holding the iolock - * shared, we need to update it to exclusive which implies having to - * redo all checks before. + * write. * - * We need to serialise against EOF updates that occur in IO completions - * here. We want to make sure that nobody is changing the size while we - * do this check until we have placed an IO barrier (i.e. hold the - * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The - * spinlock effectively forms a memory barrier once we have the - * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and - * hence be able to correctly determine if we need to run zeroing. - * - * We can do an unlocked check here safely as IO completion can only - * extend EOF. Truncate is locked out at this point, so the EOF can - * not move backwards, only forwards. Hence we only need to take the - * slow path and spin locks when we are at or beyond the current EOF. + * We can do an unlocked check for i_size here safely as I/O completion + * can only extend EOF. Truncate is locked out at this point, so the + * EOF can not move backwards, only forwards. Hence we only need to take + * the slow path when we are at or beyond the current EOF. */ - if (iocb->ki_pos <= i_size_read(inode)) - goto out; - - spin_lock(&ip->i_flags_lock); - isize = i_size_read(inode); - if (iocb->ki_pos > isize) { - spin_unlock(&ip->i_flags_lock); - - if (iocb->ki_flags & IOCB_NOWAIT) - return -EAGAIN; - - if (!drained_dio) { - if (*iolock == XFS_IOLOCK_SHARED) { - xfs_iunlock(ip, *iolock); - *iolock = XFS_IOLOCK_EXCL; - xfs_ilock(ip, *iolock); - iov_iter_reexpand(from, count); - } - /* - * We now have an IO submission barrier in place, but - * AIO can do EOF updates during IO completion and hence - * we now need to wait for all of them to drain. Non-AIO - * DIO will have drained before we are given the - * XFS_IOLOCK_EXCL, and so for most cases this wait is a - * no-op. - */ - inode_dio_wait(inode); - drained_dio = true; + if (iocb->ki_pos > i_size_read(inode)) { + error = xfs_file_write_zero_eof(iocb, from, iolock, count, + &drained_dio); + if (error == 1) goto restart; - } - - trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize); - error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL); if (error) return error; - } else - spin_unlock(&ip->i_flags_lock); + } -out: return kiocb_modified(iocb); } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index e3aaa0555597..290ba8887d29 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -64,25 +64,31 @@ xfs_filestream_pick_ag( struct xfs_perag *pag; struct xfs_perag *max_pag = NULL; xfs_extlen_t minlen = *longest; - xfs_extlen_t free = 0, minfree, maxfree = 0; + xfs_extlen_t minfree, maxfree = 0; xfs_agnumber_t agno; bool first_pass = true; - int err; /* 2% of an AG's blocks must be free for it to be chosen. */ minfree = mp->m_sb.sb_agblocks / 50; restart: for_each_perag_wrap(mp, start_agno, agno, pag) { + int err; + trace_xfs_filestream_scan(pag, pino); + *longest = 0; err = xfs_bmap_longest_free_extent(pag, NULL, longest); if (err) { - if (err != -EAGAIN) - break; - /* Couldn't lock the AGF, skip this AG. */ - err = 0; - continue; + if (err == -EAGAIN) { + /* Couldn't lock the AGF, skip this AG. */ + err = 0; + continue; + } + xfs_perag_rele(pag); + if (max_pag) + xfs_perag_rele(max_pag); + return err; } /* Keep track of the AG with the most free blocks. */ @@ -107,8 +113,9 @@ restart: !(flags & XFS_PICK_USERDATA) || (flags & XFS_PICK_LOWSPACE))) { /* Break out, retaining the reference on the AG. */ - free = pag->pagf_freeblks; - break; + if (max_pag) + xfs_perag_rele(max_pag); + goto done; } } @@ -116,57 +123,47 @@ restart: atomic_dec(&pag->pagf_fstrms); } - if (err) { - xfs_perag_rele(pag); - if (max_pag) - xfs_perag_rele(max_pag); - return err; + /* + * Allow a second pass to give xfs_bmap_longest_free_extent() another + * attempt at locking AGFs that it might have skipped over before we + * fail. + */ + if (first_pass) { + first_pass = false; + goto restart; } - if (!pag) { - /* - * Allow a second pass to give xfs_bmap_longest_free_extent() - * another attempt at locking AGFs that it might have skipped - * over before we fail. - */ - if (first_pass) { - first_pass = false; - goto restart; - } + /* + * We must be low on data space, so run a final lowspace optimised + * selection pass if we haven't already. + */ + if (!(flags & XFS_PICK_LOWSPACE)) { + flags |= XFS_PICK_LOWSPACE; + goto restart; + } - /* - * We must be low on data space, so run a final lowspace - * optimised selection pass if we haven't already. - */ - if (!(flags & XFS_PICK_LOWSPACE)) { - flags |= XFS_PICK_LOWSPACE; - goto restart; + /* + * No unassociated AGs are available, so select the AG with the most + * free space, regardless of whether it's already in use by another + * filestream. It none suit, just use whatever AG we can grab. + */ + if (!max_pag) { + for_each_perag_wrap(args->mp, 0, start_agno, pag) { + max_pag = pag; + break; } - /* - * No unassociated AGs are available, so select the AG with the - * most free space, regardless of whether it's already in use by - * another filestream. It none suit, just use whatever AG we can - * grab. - */ - if (!max_pag) { - for_each_perag_wrap(args->mp, 0, start_agno, args->pag) - break; - atomic_inc(&args->pag->pagf_fstrms); - *longest = 0; - } else { - pag = max_pag; - free = maxfree; - atomic_inc(&pag->pagf_fstrms); - } - } else if (max_pag) { - xfs_perag_rele(max_pag); + /* Bail if there are no AGs at all to select from. */ + if (!max_pag) + return -ENOSPC; } - trace_xfs_filestream_pick(pag, pino, free); + pag = max_pag; + atomic_inc(&pag->pagf_fstrms); +done: + trace_xfs_filestream_pick(pag, pino); args->pag = pag; return 0; - } static struct xfs_inode * diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 3643cc843f62..b247d895c276 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -87,6 +87,7 @@ xfs_growfs_data_private( struct xfs_mount *mp, /* mount point for filesystem */ struct xfs_growfs_data *in) /* growfs data input struct */ { + xfs_agnumber_t oagcount = mp->m_sb.sb_agcount; struct xfs_buf *bp; int error; xfs_agnumber_t nagcount; @@ -94,7 +95,6 @@ xfs_growfs_data_private( xfs_rfsblock_t nb, nb_div, nb_mod; int64_t delta; bool lastag_extended = false; - xfs_agnumber_t oagcount; struct xfs_trans *tp; struct aghdr_init_data id = {}; struct xfs_perag *last_pag; @@ -138,16 +138,14 @@ xfs_growfs_data_private( if (delta == 0) return 0; - oagcount = mp->m_sb.sb_agcount; - /* allocate the new per-ag structures */ - if (nagcount > oagcount) { - error = xfs_initialize_perag(mp, nagcount, nb, &nagimax); - if (error) - return error; - } else if (nagcount < oagcount) { - /* TODO: shrinking the entire AGs hasn't yet completed */ + /* TODO: shrinking the entire AGs hasn't yet completed */ + if (nagcount < oagcount) return -EINVAL; - } + + /* allocate the new per-ag structures */ + error = xfs_initialize_perag(mp, oagcount, nagcount, nb, &nagimax); + if (error) + return error; if (delta > 0) error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, @@ -231,7 +229,7 @@ out_trans_cancel: xfs_trans_cancel(tp); out_free_unused_perag: if (nagcount > oagcount) - xfs_free_unused_perag_range(mp, oagcount, nagcount); + xfs_free_perag_range(mp, oagcount, nagcount); return error; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bcc277fc0a83..19dcb569a3e7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1409,7 +1409,7 @@ xfs_inactive( if (S_ISREG(VFS_I(ip)->i_mode) && (ip->i_disk_size != 0 || XFS_ISIZE(ip) != 0 || - ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0)) + xfs_inode_has_filedata(ip))) truncate = 1; if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) { diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 97ed912306fd..03944b6c5fba 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -292,6 +292,11 @@ static inline bool xfs_is_cow_inode(struct xfs_inode *ip) return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip); } +static inline bool xfs_inode_has_filedata(const struct xfs_inode *ip) +{ + return ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0; +} + /* * Check if an inode has any data in the COW fork. This might be often false * even for inodes with the reflink flag when there is no pending COW operation. diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index a20d426ef021..2567fd2a0994 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -481,7 +481,7 @@ xfs_ioctl_setattr_xflags( if (rtflag != XFS_IS_REALTIME_INODE(ip)) { /* Can't change realtime flag if any extents are allocated. */ - if (ip->i_df.if_nextents || ip->i_delayed_blks) + if (xfs_inode_has_filedata(ip)) return -EINVAL; /* @@ -602,7 +602,7 @@ xfs_ioctl_setattr_check_extsize( if (!fa->fsx_valid) return 0; - if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents && + if (S_ISREG(VFS_I(ip)->i_mode) && xfs_inode_has_filedata(ip) && XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize) return -EINVAL; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1e11f48814c0..86da16f54be9 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -707,7 +707,7 @@ imap_needs_cow( return false; /* when zeroing we don't have to COW holes or unwritten extents */ - if (flags & IOMAP_ZERO) { + if (flags & (IOMAP_UNSHARE | IOMAP_ZERO)) { if (!nimaps || imap->br_startblock == HOLESTARTBLOCK || imap->br_state == XFS_EXT_UNWRITTEN) @@ -975,6 +975,7 @@ xfs_buffered_write_iomap_begin( int allocfork = XFS_DATA_FORK; int error = 0; unsigned int lockmode = XFS_ILOCK_EXCL; + unsigned int iomap_flags = 0; u64 seq; if (xfs_is_shutdown(mp)) @@ -1145,6 +1146,11 @@ xfs_buffered_write_iomap_begin( } } + /* + * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch + * them out if the write happens to fail. + */ + iomap_flags |= IOMAP_F_NEW; if (allocfork == XFS_COW_FORK) { error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, end_fsb - offset_fsb, prealloc_blocks, &cmap, @@ -1162,19 +1168,11 @@ xfs_buffered_write_iomap_begin( if (error) goto out_unlock; - /* - * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch - * them out if the write happens to fail. - */ - seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW); - xfs_iunlock(ip, lockmode); trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq); - found_imap: - seq = xfs_iomap_inode_sequence(ip, 0); + seq = xfs_iomap_inode_sequence(ip, iomap_flags); xfs_iunlock(ip, lockmode); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq); convert_delay: xfs_iunlock(ip, lockmode); @@ -1188,20 +1186,20 @@ convert_delay: return 0; found_cow: - seq = xfs_iomap_inode_sequence(ip, 0); if (imap.br_startoff <= offset_fsb) { - error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, + xfs_iomap_inode_sequence(ip, 0)); if (error) goto out_unlock; - seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); - xfs_iunlock(ip, lockmode); - return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, - IOMAP_F_SHARED, seq); + } else { + xfs_trim_extent(&cmap, offset_fsb, + imap.br_startoff - offset_fsb); } - xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb); + iomap_flags |= IOMAP_F_SHARED; + seq = xfs_iomap_inode_sequence(ip, iomap_flags); xfs_iunlock(ip, lockmode); - return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, iomap_flags, seq); out_unlock: xfs_iunlock(ip, lockmode); @@ -1215,7 +1213,10 @@ xfs_buffered_write_delalloc_punch( loff_t length, struct iomap *iomap) { - xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, offset + length); + xfs_bmap_punch_delalloc_range(XFS_I(inode), + (iomap->flags & IOMAP_F_SHARED) ? + XFS_COW_FORK : XFS_DATA_FORK, + offset, offset + length); } static int @@ -1227,8 +1228,30 @@ xfs_buffered_write_iomap_end( unsigned flags, struct iomap *iomap) { - iomap_file_buffered_write_punch_delalloc(inode, offset, length, written, - flags, iomap, &xfs_buffered_write_delalloc_punch); + loff_t start_byte, end_byte; + + /* If we didn't reserve the blocks, we're not allowed to punch them. */ + if (iomap->type != IOMAP_DELALLOC || !(iomap->flags & IOMAP_F_NEW)) + return 0; + + /* Nothing to do if we've written the entire delalloc extent */ + start_byte = iomap_last_written_block(inode, offset, written); + end_byte = round_up(offset + length, i_blocksize(inode)); + if (start_byte >= end_byte) + return 0; + + /* For zeroing operations the callers already hold invalidate_lock. */ + if (flags & (IOMAP_UNSHARE | IOMAP_ZERO)) { + rwsem_assert_held_write(&inode->i_mapping->invalidate_lock); + iomap_write_delalloc_release(inode, start_byte, end_byte, flags, + iomap, xfs_buffered_write_delalloc_punch); + } else { + filemap_invalidate_lock(inode->i_mapping); + iomap_write_delalloc_release(inode, start_byte, end_byte, flags, + iomap, xfs_buffered_write_delalloc_punch); + filemap_invalidate_unlock(inode->i_mapping); + } + return 0; } @@ -1435,6 +1458,8 @@ xfs_zero_range( { struct inode *inode = VFS_I(ip); + xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); + if (IS_DAX(inode)) return dax_zero_range(inode, pos, len, did_zero, &xfs_dax_write_iomap_ops); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index a13bf53fea49..704aaadb61cf 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3393,13 +3393,6 @@ xlog_do_recover( /* re-initialise in-core superblock and geometry structures */ mp->m_features |= xfs_sb_version_to_features(sbp); xfs_reinit_percpu_counters(mp); - error = xfs_initialize_perag(mp, sbp->sb_agcount, sbp->sb_dblocks, - &mp->m_maxagi); - if (error) { - xfs_warn(mp, "Failed post-recovery per-ag init: %d", error); - return error; - } - mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); /* Normal transactions can now occur */ clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 1fdd79c5bfa0..25bbcc3f4ee0 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -810,8 +810,8 @@ xfs_mountfs( /* * Allocate and initialize the per-ag data. */ - error = xfs_initialize_perag(mp, sbp->sb_agcount, mp->m_sb.sb_dblocks, - &mp->m_maxagi); + error = xfs_initialize_perag(mp, 0, sbp->sb_agcount, + mp->m_sb.sb_dblocks, &mp->m_maxagi); if (error) { xfs_warn(mp, "Failed per-ag init: %d", error); goto out_free_dir; @@ -1048,7 +1048,7 @@ xfs_mountfs( xfs_buftarg_drain(mp->m_logdev_targp); xfs_buftarg_drain(mp->m_ddev_targp); out_free_perag: - xfs_free_perag(mp); + xfs_free_perag_range(mp, 0, mp->m_sb.sb_agcount); out_free_dir: xfs_da_unmount(mp); out_remove_uuid: @@ -1129,8 +1129,7 @@ xfs_unmountfs( xfs_errortag_clearall(mp); #endif shrinker_free(mp->m_inodegc_shrinker); - xfs_free_perag(mp); - + xfs_free_perag_range(mp, 0, mp->m_sb.sb_agcount); xfs_errortag_del(mp); xfs_error_sysfs_del(mp); xchk_stats_unregister(mp->m_scrub_stats); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ee9f0b1f548d..fcb2bad4f76e 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -691,8 +691,8 @@ DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup); DEFINE_FILESTREAM_EVENT(xfs_filestream_scan); TRACE_EVENT(xfs_filestream_pick, - TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino, xfs_extlen_t free), - TP_ARGS(pag, ino, free), + TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino), + TP_ARGS(pag, ino), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) @@ -703,14 +703,9 @@ TRACE_EVENT(xfs_filestream_pick, TP_fast_assign( __entry->dev = pag->pag_mount->m_super->s_dev; __entry->ino = ino; - if (pag) { - __entry->agno = pag->pag_agno; - __entry->streams = atomic_read(&pag->pagf_fstrms); - } else { - __entry->agno = NULLAGNUMBER; - __entry->streams = 0; - } - __entry->free = free; + __entry->agno = pag->pag_agno; + __entry->streams = atomic_read(&pag->pagf_fstrms); + __entry->free = pag->pagf_freeblks; ), TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d", MAJOR(__entry->dev), MINOR(__entry->dev), |