aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c6
-rw-r--r--fs/xfs/libxfs/xfs_btree.c33
-rw-r--r--fs/xfs/libxfs/xfs_btree.h2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c16
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_rtgroup.c2
-rw-r--r--fs/xfs/libxfs/xfs_sb.c18
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c4
-rw-r--r--fs/xfs/scrub/agheader.c77
-rw-r--r--fs/xfs/scrub/agheader_repair.c6
-rw-r--r--fs/xfs/scrub/fscounters.c2
-rw-r--r--fs/xfs/scrub/health.c57
-rw-r--r--fs/xfs/scrub/ialloc.c4
-rw-r--r--fs/xfs/scrub/metapath.c68
-rw-r--r--fs/xfs/scrub/refcount.c2
-rw-r--r--fs/xfs/scrub/scrub.h6
-rw-r--r--fs/xfs/scrub/symlink_repair.c3
-rw-r--r--fs/xfs/scrub/tempfile.c22
-rw-r--r--fs/xfs/scrub/trace.h2
-rw-r--r--fs/xfs/xfs_attr_list.c3
-rw-r--r--fs/xfs/xfs_bmap_util.c2
-rw-r--r--fs/xfs/xfs_dquot.c195
-rw-r--r--fs/xfs/xfs_dquot.h6
-rw-r--r--fs/xfs/xfs_dquot_item.c51
-rw-r--r--fs/xfs/xfs_dquot_item.h7
-rw-r--r--fs/xfs/xfs_exchrange.c14
-rw-r--r--fs/xfs/xfs_file.c8
-rw-r--r--fs/xfs/xfs_fsmap.c38
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_log.c11
-rw-r--r--fs/xfs/xfs_log_cil.c5
-rw-r--r--fs/xfs/xfs_log_priv.h1
-rw-r--r--fs/xfs/xfs_qm.c102
-rw-r--r--fs/xfs/xfs_qm.h1
-rw-r--r--fs/xfs/xfs_qm_syscalls.c13
-rw-r--r--fs/xfs/xfs_quota.h5
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_trans.c56
-rw-r--r--fs/xfs/xfs_trans_ail.c2
-rw-r--r--fs/xfs/xfs_trans_dquot.c31
40 files changed, 603 insertions, 286 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 9052839305e2..5255f93bae31 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -2620,8 +2620,7 @@ xfs_bmap_add_extent_hole_delay(
*/
if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
left.br_startoff + left.br_blockcount == new->br_startoff &&
- left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
- xfs_bmap_same_rtgroup(ip, whichfork, &left, new))
+ left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
state |= BMAP_LEFT_CONTIG;
if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
@@ -2629,8 +2628,7 @@ xfs_bmap_add_extent_hole_delay(
new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
(!(state & BMAP_LEFT_CONTIG) ||
(left.br_blockcount + new->br_blockcount +
- right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)) &&
- xfs_bmap_same_rtgroup(ip, whichfork, new, &right))
+ right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)))
state |= BMAP_RIGHT_CONTIG;
/*
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 2b5fc5fd1643..68ee1c299c25 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -3557,14 +3557,31 @@ xfs_btree_insrec(
xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
/*
- * If we just inserted into a new tree block, we have to
- * recalculate nkey here because nkey is out of date.
+ * Update btree keys to reflect the newly added record or keyptr.
+ * There are three cases here to be aware of. Normally, all we have to
+ * do is walk towards the root, updating keys as necessary.
*
- * Otherwise we're just updating an existing block (having shoved
- * some records into the new tree block), so use the regular key
- * update mechanism.
+ * If the caller had us target a full block for the insertion, we dealt
+ * with that by calling the _make_block_unfull function. If the
+ * "make unfull" function splits the block, it'll hand us back the key
+ * and pointer of the new block. We haven't yet added the new block to
+ * the next level up, so if we decide to add the new record to the new
+ * block (bp->b_bn != old_bn), we have to update the caller's pointer
+ * so that the caller adds the new block with the correct key.
+ *
+ * However, there is a third possibility-- if the selected block is the
+ * root block of an inode-rooted btree and cannot be expanded further,
+ * the "make unfull" function moves the root block contents to a new
+ * block and updates the root block to point to the new block. In this
+ * case, no block pointer is passed back because the block has already
+ * been added to the btree. In this case, we need to use the regular
+ * key update function, just like the first case. This is critical for
+ * overlapping btrees, because the high key must be updated to reflect
+ * the entire tree, not just the subtree accessible through the first
+ * child of the root (which is now two levels down from the root).
*/
- if (bp && xfs_buf_daddr(bp) != old_bn) {
+ if (!xfs_btree_ptr_is_null(cur, &nptr) &&
+ bp && xfs_buf_daddr(bp) != old_bn) {
xfs_btree_get_keys(cur, block, lkey);
} else if (xfs_btree_needs_key_update(cur, optr)) {
error = xfs_btree_update_keys(cur, level);
@@ -5144,7 +5161,7 @@ xfs_btree_count_blocks_helper(
int level,
void *data)
{
- xfs_extlen_t *blocks = data;
+ xfs_filblks_t *blocks = data;
(*blocks)++;
return 0;
@@ -5154,7 +5171,7 @@ xfs_btree_count_blocks_helper(
int
xfs_btree_count_blocks(
struct xfs_btree_cur *cur,
- xfs_extlen_t *blocks)
+ xfs_filblks_t *blocks)
{
*blocks = 0;
return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 3b739459ebb0..c5bff273cae2 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -484,7 +484,7 @@ typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
xfs_btree_visit_blocks_fn fn, unsigned int flags, void *data);
-int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks);
+int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_filblks_t *blocks);
union xfs_btree_rec *xfs_btree_rec_addr(struct xfs_btree_cur *cur, int n,
struct xfs_btree_block *block);
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 8b84e2cf711b..f3a840a425f5 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -853,7 +853,8 @@ sparse_alloc:
* the end of the AG.
*/
args.min_agbno = args.mp->m_sb.sb_inoalignmt;
- args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
+ args.max_agbno = round_down(xfs_ag_block_count(args.mp,
+ pag_agno(pag)),
args.mp->m_sb.sb_inoalignmt) -
igeo->ialloc_blks;
@@ -2349,9 +2350,9 @@ xfs_difree(
return -EINVAL;
}
agbno = XFS_AGINO_TO_AGBNO(mp, agino);
- if (agbno >= mp->m_sb.sb_agblocks) {
- xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
- __func__, agbno, mp->m_sb.sb_agblocks);
+ if (agbno >= xfs_ag_block_count(mp, pag_agno(pag))) {
+ xfs_warn(mp, "%s: agbno >= xfs_ag_block_count (%d >= %d).",
+ __func__, agbno, xfs_ag_block_count(mp, pag_agno(pag)));
ASSERT(0);
return -EINVAL;
}
@@ -2474,7 +2475,7 @@ xfs_imap(
*/
agino = XFS_INO_TO_AGINO(mp, ino);
agbno = XFS_AGINO_TO_AGBNO(mp, agino);
- if (agbno >= mp->m_sb.sb_agblocks ||
+ if (agbno >= xfs_ag_block_count(mp, pag_agno(pag)) ||
ino != xfs_agino_to_ino(pag, agino)) {
error = -EINVAL;
#ifdef DEBUG
@@ -2484,11 +2485,12 @@ xfs_imap(
*/
if (flags & XFS_IGET_UNTRUSTED)
return error;
- if (agbno >= mp->m_sb.sb_agblocks) {
+ if (agbno >= xfs_ag_block_count(mp, pag_agno(pag))) {
xfs_alert(mp,
"%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
__func__, (unsigned long long)agbno,
- (unsigned long)mp->m_sb.sb_agblocks);
+ (unsigned long)xfs_ag_block_count(mp,
+ pag_agno(pag)));
}
if (ino != xfs_agino_to_ino(pag, agino)) {
xfs_alert(mp,
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 9b34896dd1a3..6f270d8f4270 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -744,6 +744,7 @@ xfs_finobt_count_blocks(
{
struct xfs_buf *agbp = NULL;
struct xfs_btree_cur *cur;
+ xfs_filblks_t blocks;
int error;
error = xfs_ialloc_read_agi(pag, tp, 0, &agbp);
@@ -751,9 +752,10 @@ xfs_finobt_count_blocks(
return error;
cur = xfs_finobt_init_cursor(pag, tp, agbp);
- error = xfs_btree_count_blocks(cur, tree_blocks);
+ error = xfs_btree_count_blocks(cur, &blocks);
xfs_btree_del_cursor(cur, error);
xfs_trans_brelse(tp, agbp);
+ *tree_blocks = blocks;
return error;
}
diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c
index e74bb059f24f..4f3bfc884aff 100644
--- a/fs/xfs/libxfs/xfs_rtgroup.c
+++ b/fs/xfs/libxfs/xfs_rtgroup.c
@@ -496,7 +496,7 @@ xfs_rtginode_create(
error = xfs_metadir_create(&upd, S_IFREG);
if (error)
- return error;
+ goto out_cancel;
xfs_rtginode_lockdep_setup(upd.ip, rtg_rgno(rtg), type);
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index e81b240b7158..3b5623611eba 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -326,13 +326,6 @@ xfs_validate_sb_write(
* the kernel cannot support since we checked for unsupported bits in
* the read verifier, which means that memory is corrupt.
*/
- if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) {
- xfs_warn(mp,
-"Corruption detected in superblock compatible features (0x%x)!",
- (sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN));
- return -EFSCORRUPTED;
- }
-
if (!xfs_is_readonly(mp) &&
xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
xfs_alert(mp,
@@ -501,12 +494,13 @@ xfs_validate_sb_common(
return -EINVAL;
}
- if (!sbp->sb_spino_align ||
- sbp->sb_spino_align > sbp->sb_inoalignmt ||
- (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0) {
+ if (sbp->sb_spino_align &&
+ (sbp->sb_spino_align > sbp->sb_inoalignmt ||
+ (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0)) {
xfs_warn(mp,
- "Sparse inode alignment (%u) is invalid.",
- sbp->sb_spino_align);
+"Sparse inode alignment (%u) is invalid, must be integer factor of (%u).",
+ sbp->sb_spino_align,
+ sbp->sb_inoalignmt);
return -EINVAL;
}
} else if (sbp->sb_spino_align) {
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index f228127a88ff..fb47a76ead18 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -92,8 +92,10 @@ xfs_symlink_verify(
struct xfs_mount *mp = bp->b_mount;
struct xfs_dsymlink_hdr *dsl = bp->b_addr;
+ /* no verification of non-crc buffers */
if (!xfs_has_crc(mp))
- return __this_address;
+ return NULL;
+
if (!xfs_verify_magic(bp, dsl->sl_magic))
return __this_address;
if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid))
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 61f80a6410c7..9f8c312dfd3c 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -60,6 +60,32 @@ xchk_superblock_xref(
}
/*
+ * Calculate the ondisk superblock size in bytes given the feature set of the
+ * mounted filesystem (aka the primary sb). This is subtlely different from
+ * the logic in xfs_repair, which computes the size of a secondary sb given the
+ * featureset listed in the secondary sb.
+ */
+STATIC size_t
+xchk_superblock_ondisk_size(
+ struct xfs_mount *mp)
+{
+ if (xfs_has_metadir(mp))
+ return offsetofend(struct xfs_dsb, sb_pad);
+ if (xfs_has_metauuid(mp))
+ return offsetofend(struct xfs_dsb, sb_meta_uuid);
+ if (xfs_has_crc(mp))
+ return offsetofend(struct xfs_dsb, sb_lsn);
+ if (xfs_sb_version_hasmorebits(&mp->m_sb))
+ return offsetofend(struct xfs_dsb, sb_bad_features2);
+ if (xfs_has_logv2(mp))
+ return offsetofend(struct xfs_dsb, sb_logsunit);
+ if (xfs_has_sector(mp))
+ return offsetofend(struct xfs_dsb, sb_logsectsize);
+ /* only support dirv2 or more recent */
+ return offsetofend(struct xfs_dsb, sb_dirblklog);
+}
+
+/*
* Scrub the filesystem superblock.
*
* Note: We do /not/ attempt to check AG 0's superblock. Mount is
@@ -75,6 +101,7 @@ xchk_superblock(
struct xfs_buf *bp;
struct xfs_dsb *sb;
struct xfs_perag *pag;
+ size_t sblen;
xfs_agnumber_t agno;
uint32_t v2_ok;
__be32 features_mask;
@@ -145,8 +172,11 @@ xchk_superblock(
xchk_block_set_preen(sc, bp);
if (xfs_has_metadir(sc->mp)) {
- if (sb->sb_metadirino != cpu_to_be64(mp->m_sb.sb_metadirino))
- xchk_block_set_preen(sc, bp);
+ if (sb->sb_rbmino != cpu_to_be64(0))
+ xchk_block_set_corrupt(sc, bp);
+
+ if (sb->sb_rsumino != cpu_to_be64(0))
+ xchk_block_set_corrupt(sc, bp);
} else {
if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino))
xchk_block_set_preen(sc, bp);
@@ -229,7 +259,13 @@ xchk_superblock(
* sb_icount, sb_ifree, sb_fdblocks, sb_frexents
*/
- if (!xfs_has_metadir(mp)) {
+ if (xfs_has_metadir(mp)) {
+ if (sb->sb_uquotino != cpu_to_be64(0))
+ xchk_block_set_corrupt(sc, bp);
+
+ if (sb->sb_gquotino != cpu_to_be64(0))
+ xchk_block_set_preen(sc, bp);
+ } else {
if (sb->sb_uquotino != cpu_to_be64(mp->m_sb.sb_uquotino))
xchk_block_set_preen(sc, bp);
@@ -281,15 +317,8 @@ xchk_superblock(
if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok)))
xchk_block_set_corrupt(sc, bp);
- if (xfs_has_metadir(mp)) {
- if (sb->sb_rgblklog != mp->m_sb.sb_rgblklog)
- xchk_block_set_corrupt(sc, bp);
- if (memchr_inv(sb->sb_pad, 0, sizeof(sb->sb_pad)))
- xchk_block_set_preen(sc, bp);
- } else {
- if (sb->sb_features2 != sb->sb_bad_features2)
- xchk_block_set_preen(sc, bp);
- }
+ if (sb->sb_features2 != sb->sb_bad_features2)
+ xchk_block_set_preen(sc, bp);
}
/* Check sb_features2 flags that are set at mkfs time. */
@@ -351,7 +380,10 @@ xchk_superblock(
if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align))
xchk_block_set_corrupt(sc, bp);
- if (!xfs_has_metadir(mp)) {
+ if (xfs_has_metadir(mp)) {
+ if (sb->sb_pquotino != cpu_to_be64(0))
+ xchk_block_set_corrupt(sc, bp);
+ } else {
if (sb->sb_pquotino != cpu_to_be64(mp->m_sb.sb_pquotino))
xchk_block_set_preen(sc, bp);
}
@@ -366,16 +398,25 @@ xchk_superblock(
}
if (xfs_has_metadir(mp)) {
+ if (sb->sb_metadirino != cpu_to_be64(mp->m_sb.sb_metadirino))
+ xchk_block_set_preen(sc, bp);
+
if (sb->sb_rgcount != cpu_to_be32(mp->m_sb.sb_rgcount))
xchk_block_set_corrupt(sc, bp);
if (sb->sb_rgextents != cpu_to_be32(mp->m_sb.sb_rgextents))
xchk_block_set_corrupt(sc, bp);
+
+ if (sb->sb_rgblklog != mp->m_sb.sb_rgblklog)
+ xchk_block_set_corrupt(sc, bp);
+
+ if (memchr_inv(sb->sb_pad, 0, sizeof(sb->sb_pad)))
+ xchk_block_set_corrupt(sc, bp);
}
/* Everything else must be zero. */
- if (memchr_inv(sb + 1, 0,
- BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
+ sblen = xchk_superblock_ondisk_size(mp);
+ if (memchr_inv((char *)sb + sblen, 0, BBTOB(bp->b_length) - sblen))
xchk_block_set_corrupt(sc, bp);
xchk_superblock_xref(sc, bp);
@@ -458,7 +499,7 @@ xchk_agf_xref_btreeblks(
{
struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
struct xfs_mount *mp = sc->mp;
- xfs_agblock_t blocks;
+ xfs_filblks_t blocks;
xfs_agblock_t btreeblks;
int error;
@@ -507,7 +548,7 @@ xchk_agf_xref_refcblks(
struct xfs_scrub *sc)
{
struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
- xfs_agblock_t blocks;
+ xfs_filblks_t blocks;
int error;
if (!sc->sa.refc_cur)
@@ -840,7 +881,7 @@ xchk_agi_xref_fiblocks(
struct xfs_scrub *sc)
{
struct xfs_agi *agi = sc->sa.agi_bp->b_addr;
- xfs_agblock_t blocks;
+ xfs_filblks_t blocks;
int error = 0;
if (!xfs_has_inobtcounts(sc->mp))
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 0fad0baaba2f..b45d2b32051a 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -256,7 +256,7 @@ xrep_agf_calc_from_btrees(
struct xfs_agf *agf = agf_bp->b_addr;
struct xfs_mount *mp = sc->mp;
xfs_agblock_t btreeblks;
- xfs_agblock_t blocks;
+ xfs_filblks_t blocks;
int error;
/* Update the AGF counters from the bnobt. */
@@ -946,7 +946,7 @@ xrep_agi_calc_from_btrees(
if (error)
goto err;
if (xfs_has_inobtcounts(mp)) {
- xfs_agblock_t blocks;
+ xfs_filblks_t blocks;
error = xfs_btree_count_blocks(cur, &blocks);
if (error)
@@ -959,7 +959,7 @@ xrep_agi_calc_from_btrees(
agi->agi_freecount = cpu_to_be32(freecount);
if (xfs_has_finobt(mp) && xfs_has_inobtcounts(mp)) {
- xfs_agblock_t blocks;
+ xfs_filblks_t blocks;
cur = xfs_finobt_init_cursor(sc->sa.pag, sc->tp, agi_bp);
error = xfs_btree_count_blocks(cur, &blocks);
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 4a50f8e00040..ca23cf4db6c5 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -261,7 +261,7 @@ xchk_fscount_btreeblks(
struct xchk_fscounters *fsc,
xfs_agnumber_t agno)
{
- xfs_extlen_t blocks;
+ xfs_filblks_t blocks;
int error;
error = xchk_ag_init_existing(sc, agno, &sc->sa);
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index ce86bdad37fa..ccc6ca5934ca 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -71,7 +71,8 @@
/* Map our scrub type to a sick mask and a set of health update functions. */
enum xchk_health_group {
- XHG_FS = 1,
+ XHG_NONE = 1,
+ XHG_FS,
XHG_AG,
XHG_INO,
XHG_RTGROUP,
@@ -83,6 +84,7 @@ struct xchk_health_map {
};
static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
+ [XFS_SCRUB_TYPE_PROBE] = { XHG_NONE, 0 },
[XFS_SCRUB_TYPE_SB] = { XHG_AG, XFS_SICK_AG_SB },
[XFS_SCRUB_TYPE_AGF] = { XHG_AG, XFS_SICK_AG_AGF },
[XFS_SCRUB_TYPE_AGFL] = { XHG_AG, XFS_SICK_AG_AGFL },
@@ -133,7 +135,7 @@ xchk_mark_healthy_if_clean(
{
if (!(sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
XFS_SCRUB_OFLAG_XCORRUPT)))
- sc->sick_mask |= mask;
+ sc->healthy_mask |= mask;
}
/*
@@ -189,6 +191,7 @@ xchk_update_health(
{
struct xfs_perag *pag;
struct xfs_rtgroup *rtg;
+ unsigned int mask = sc->sick_mask;
bool bad;
/*
@@ -203,50 +206,56 @@ xchk_update_health(
return;
}
- if (!sc->sick_mask)
- return;
-
bad = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
XFS_SCRUB_OFLAG_XCORRUPT));
+ if (!bad)
+ mask |= sc->healthy_mask;
switch (type_to_health_flag[sc->sm->sm_type].group) {
+ case XHG_NONE:
+ break;
case XHG_AG:
+ if (!mask)
+ return;
pag = xfs_perag_get(sc->mp, sc->sm->sm_agno);
if (bad)
- xfs_group_mark_corrupt(pag_group(pag), sc->sick_mask);
+ xfs_group_mark_corrupt(pag_group(pag), mask);
else
- xfs_group_mark_healthy(pag_group(pag), sc->sick_mask);
+ xfs_group_mark_healthy(pag_group(pag), mask);
xfs_perag_put(pag);
break;
case XHG_INO:
if (!sc->ip)
return;
- if (bad) {
- unsigned int mask = sc->sick_mask;
-
- /*
- * If we're coming in for repairs then we don't want
- * sickness flags to propagate to the incore health
- * status if the inode gets inactivated before we can
- * fix it.
- */
- if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
- mask |= XFS_SICK_INO_FORGET;
+ /*
+ * If we're coming in for repairs then we don't want sickness
+ * flags to propagate to the incore health status if the inode
+ * gets inactivated before we can fix it.
+ */
+ if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
+ mask |= XFS_SICK_INO_FORGET;
+ if (!mask)
+ return;
+ if (bad)
xfs_inode_mark_corrupt(sc->ip, mask);
- } else
- xfs_inode_mark_healthy(sc->ip, sc->sick_mask);
+ else
+ xfs_inode_mark_healthy(sc->ip, mask);
break;
case XHG_FS:
+ if (!mask)
+ return;
if (bad)
- xfs_fs_mark_corrupt(sc->mp, sc->sick_mask);
+ xfs_fs_mark_corrupt(sc->mp, mask);
else
- xfs_fs_mark_healthy(sc->mp, sc->sick_mask);
+ xfs_fs_mark_healthy(sc->mp, mask);
break;
case XHG_RTGROUP:
+ if (!mask)
+ return;
rtg = xfs_rtgroup_get(sc->mp, sc->sm->sm_agno);
if (bad)
- xfs_group_mark_corrupt(rtg_group(rtg), sc->sick_mask);
+ xfs_group_mark_corrupt(rtg_group(rtg), mask);
else
- xfs_group_mark_healthy(rtg_group(rtg), sc->sick_mask);
+ xfs_group_mark_healthy(rtg_group(rtg), mask);
xfs_rtgroup_put(rtg);
break;
default:
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index abad54c3621d..4dc7c83dc08a 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -650,8 +650,8 @@ xchk_iallocbt_xref_rmap_btreeblks(
struct xfs_scrub *sc)
{
xfs_filblks_t blocks;
- xfs_extlen_t inobt_blocks = 0;
- xfs_extlen_t finobt_blocks = 0;
+ xfs_filblks_t inobt_blocks = 0;
+ xfs_filblks_t finobt_blocks = 0;
int error;
if (!sc->sa.ino_cur || !sc->sa.rmap_cur ||
diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c
index b78db6513465..c678cba1ffc3 100644
--- a/fs/xfs/scrub/metapath.c
+++ b/fs/xfs/scrub/metapath.c
@@ -171,23 +171,13 @@ static int
xchk_setup_metapath_quotadir(
struct xfs_scrub *sc)
{
- struct xfs_trans *tp;
- struct xfs_inode *dp = NULL;
- int error;
-
- error = xfs_trans_alloc_empty(sc->mp, &tp);
- if (error)
- return error;
+ struct xfs_quotainfo *qi = sc->mp->m_quotainfo;
- error = xfs_dqinode_load_parent(tp, &dp);
- xfs_trans_cancel(tp);
- if (error)
- return error;
+ if (!qi || !qi->qi_dirip)
+ return -ENOENT;
- error = xchk_setup_metapath_scan(sc, sc->mp->m_metadirip,
- kasprintf(GFP_KERNEL, "quota"), dp);
- xfs_irele(dp);
- return error;
+ return xchk_setup_metapath_scan(sc, sc->mp->m_metadirip,
+ kstrdup("quota", GFP_KERNEL), qi->qi_dirip);
}
/* Scan a quota inode under the /quota directory. */
@@ -196,37 +186,31 @@ xchk_setup_metapath_dqinode(
struct xfs_scrub *sc,
xfs_dqtype_t type)
{
- struct xfs_trans *tp = NULL;
- struct xfs_inode *dp = NULL;
+ struct xfs_quotainfo *qi = sc->mp->m_quotainfo;
struct xfs_inode *ip = NULL;
- const char *path;
- int error;
-
- error = xfs_trans_alloc_empty(sc->mp, &tp);
- if (error)
- return error;
- error = xfs_dqinode_load_parent(tp, &dp);
- if (error)
- goto out_cancel;
-
- error = xfs_dqinode_load(tp, dp, type, &ip);
- if (error)
- goto out_dp;
-
- xfs_trans_cancel(tp);
- tp = NULL;
+ if (!qi)
+ return -ENOENT;
- path = kasprintf(GFP_KERNEL, "%s", xfs_dqinode_path(type));
- error = xchk_setup_metapath_scan(sc, dp, path, ip);
+ switch (type) {
+ case XFS_DQTYPE_USER:
+ ip = qi->qi_uquotaip;
+ break;
+ case XFS_DQTYPE_GROUP:
+ ip = qi->qi_gquotaip;
+ break;
+ case XFS_DQTYPE_PROJ:
+ ip = qi->qi_pquotaip;
+ break;
+ default:
+ ASSERT(0);
+ return -EINVAL;
+ }
+ if (!ip)
+ return -ENOENT;
- xfs_irele(ip);
-out_dp:
- xfs_irele(dp);
-out_cancel:
- if (tp)
- xfs_trans_cancel(tp);
- return error;
+ return xchk_setup_metapath_scan(sc, qi->qi_dirip,
+ kstrdup(xfs_dqinode_path(type), GFP_KERNEL), ip);
}
#else
# define xchk_setup_metapath_quotadir(...) (-ENOENT)
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 2b6be75e9424..1c5e45cc6419 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -491,7 +491,7 @@ xchk_refcount_xref_rmap(
struct xfs_scrub *sc,
xfs_filblks_t cow_blocks)
{
- xfs_extlen_t refcbt_blocks = 0;
+ xfs_filblks_t refcbt_blocks = 0;
xfs_filblks_t blocks;
int error;
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index a7fda3e2b013..5dbbe93cb49b 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -184,6 +184,12 @@ struct xfs_scrub {
*/
unsigned int sick_mask;
+ /*
+ * Clear these XFS_SICK_* flags but only if the scan is ok. Useful for
+ * removing ZAPPED flags after a repair.
+ */
+ unsigned int healthy_mask;
+
/* next time we want to cond_resched() */
struct xchk_relax relax;
diff --git a/fs/xfs/scrub/symlink_repair.c b/fs/xfs/scrub/symlink_repair.c
index d015a86ef460..953ce7be78dc 100644
--- a/fs/xfs/scrub/symlink_repair.c
+++ b/fs/xfs/scrub/symlink_repair.c
@@ -36,6 +36,7 @@
#include "scrub/tempfile.h"
#include "scrub/tempexch.h"
#include "scrub/reap.h"
+#include "scrub/health.h"
/*
* Symbolic Link Repair
@@ -233,7 +234,7 @@ xrep_symlink_salvage(
* target zapped flag.
*/
if (buflen == 0) {
- sc->sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
+ xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_SYMLINK_ZAPPED);
sprintf(target_buf, DUMMY_TARGET);
}
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
index 4b7f7860e37e..2d7ca7e1bbca 100644
--- a/fs/xfs/scrub/tempfile.c
+++ b/fs/xfs/scrub/tempfile.c
@@ -184,11 +184,18 @@ out_release_dquots:
}
/*
+ * Move sc->tempip from the regular directory tree to the metadata directory
+ * tree if sc->ip is part of the metadata directory tree and tempip has an
+ * eligible file mode.
+ *
* Temporary files have to be created before we even know which inode we're
* going to scrub, so we assume that they will be part of the regular directory
* tree. If it turns out that we're actually scrubbing a file from the
* metadata directory tree, we have to subtract the temp file from the root
- * dquots and detach the dquots.
+ * dquots and detach the dquots prior to setting the METADATA iflag. However,
+ * the scrub setup functions grab sc->ip and create sc->tempip before we
+ * actually get around to checking if the file mode is the right type for the
+ * scrubber.
*/
int
xrep_tempfile_adjust_directory_tree(
@@ -204,6 +211,9 @@ xrep_tempfile_adjust_directory_tree(
if (!sc->ip || !xfs_is_metadir_inode(sc->ip))
return 0;
+ if (!S_ISDIR(VFS_I(sc->tempip)->i_mode) &&
+ !S_ISREG(VFS_I(sc->tempip)->i_mode))
+ return 0;
xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
@@ -223,6 +233,7 @@ xrep_tempfile_adjust_directory_tree(
if (error)
goto out_ilock;
+ xfs_iflags_set(sc->tempip, XFS_IRECOVERY);
xfs_qm_dqdetach(sc->tempip);
out_ilock:
xrep_tempfile_iunlock(sc);
@@ -246,6 +257,8 @@ xrep_tempfile_remove_metadir(
ASSERT(sc->tp == NULL);
+ xfs_iflags_clear(sc->tempip, XFS_IRECOVERY);
+
xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
@@ -945,10 +958,13 @@ xrep_is_tempfile(
/*
* Files in the metadata directory tree also have S_PRIVATE set and
- * IOP_XATTR unset, so we must distinguish them separately.
+ * IOP_XATTR unset, so we must distinguish them separately. We (ab)use
+ * the IRECOVERY flag to mark temporary metadir inodes knowing that the
+ * end of log recovery clears IRECOVERY, so the only ones that can
+ * exist during online repair are the ones we create.
*/
if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA))
- return false;
+ return __xfs_iflags_test(ip, XFS_IRECOVERY);
if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
return true;
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 9b38f5ad1eaf..d2ae7e93acb0 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -605,7 +605,7 @@ TRACE_EVENT(xchk_ifork_btree_op_error,
TP_fast_assign(
xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level);
__entry->dev = sc->mp->m_super->s_dev;
- __entry->ino = sc->ip->i_ino;
+ __entry->ino = cur->bc_ino.ip->i_ino;
__entry->whichfork = cur->bc_ino.whichfork;
__entry->type = sc->sm->sm_type;
__assign_str(name);
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 7db386304875..379b48d015d2 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -114,7 +114,8 @@ xfs_attr_shortform_list(
* It didn't all fit, so we have to sort everything on hashval.
*/
sbsize = sf->count * sizeof(*sbuf);
- sbp = sbuf = kmalloc(sbsize, GFP_KERNEL | __GFP_NOFAIL);
+ sbp = sbuf = kmalloc(sbsize,
+ GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
/*
* Scan the attribute list for the rest of the entries, storing
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index a59bbe767a7d..0836fea2d6d8 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -103,7 +103,7 @@ xfs_bmap_count_blocks(
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
struct xfs_btree_cur *cur;
- xfs_extlen_t btblocks = 0;
+ xfs_filblks_t btblocks = 0;
int error;
*nextents = 0;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index ff982d983989..f11d475898f2 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -69,6 +69,30 @@ xfs_dquot_mark_sick(
}
/*
+ * Detach the dquot buffer if it's still attached, because we can get called
+ * through dqpurge after a log shutdown. Caller must hold the dqflock or have
+ * otherwise isolated the dquot.
+ */
+void
+xfs_dquot_detach_buf(
+ struct xfs_dquot *dqp)
+{
+ struct xfs_dq_logitem *qlip = &dqp->q_logitem;
+ struct xfs_buf *bp = NULL;
+
+ spin_lock(&qlip->qli_lock);
+ if (qlip->qli_item.li_buf) {
+ bp = qlip->qli_item.li_buf;
+ qlip->qli_item.li_buf = NULL;
+ }
+ spin_unlock(&qlip->qli_lock);
+ if (bp) {
+ list_del_init(&qlip->qli_item.li_bio_list);
+ xfs_buf_rele(bp);
+ }
+}
+
+/*
* This is called to free all the memory associated with a dquot
*/
void
@@ -76,6 +100,7 @@ xfs_qm_dqdestroy(
struct xfs_dquot *dqp)
{
ASSERT(list_empty(&dqp->q_lru));
+ ASSERT(dqp->q_logitem.qli_item.li_buf == NULL);
kvfree(dqp->q_logitem.qli_item.li_lv_shadow);
mutex_destroy(&dqp->q_qlock);
@@ -1142,9 +1167,11 @@ static void
xfs_qm_dqflush_done(
struct xfs_log_item *lip)
{
- struct xfs_dq_logitem *qip = (struct xfs_dq_logitem *)lip;
- struct xfs_dquot *dqp = qip->qli_dquot;
+ struct xfs_dq_logitem *qlip =
+ container_of(lip, struct xfs_dq_logitem, qli_item);
+ struct xfs_dquot *dqp = qlip->qli_dquot;
struct xfs_ail *ailp = lip->li_ailp;
+ struct xfs_buf *bp = NULL;
xfs_lsn_t tail_lsn;
/*
@@ -1156,12 +1183,12 @@ xfs_qm_dqflush_done(
* holding the lock before removing the dquot from the AIL.
*/
if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) &&
- ((lip->li_lsn == qip->qli_flush_lsn) ||
+ (lip->li_lsn == qlip->qli_flush_lsn ||
test_bit(XFS_LI_FAILED, &lip->li_flags))) {
spin_lock(&ailp->ail_lock);
xfs_clear_li_failed(lip);
- if (lip->li_lsn == qip->qli_flush_lsn) {
+ if (lip->li_lsn == qlip->qli_flush_lsn) {
/* xfs_ail_update_finish() drops the AIL lock */
tail_lsn = xfs_ail_delete_one(ailp, lip);
xfs_ail_update_finish(ailp, tail_lsn);
@@ -1171,6 +1198,20 @@ xfs_qm_dqflush_done(
}
/*
+ * If this dquot hasn't been dirtied since initiating the last dqflush,
+ * release the buffer reference. We already unlinked this dquot item
+ * from the buffer.
+ */
+ spin_lock(&qlip->qli_lock);
+ if (!qlip->qli_dirty) {
+ bp = lip->li_buf;
+ lip->li_buf = NULL;
+ }
+ spin_unlock(&qlip->qli_lock);
+ if (bp)
+ xfs_buf_rele(bp);
+
+ /*
* Release the dq's flush lock since we're done with it.
*/
xfs_dqfunlock(dqp);
@@ -1196,7 +1237,7 @@ xfs_buf_dquot_io_fail(
spin_lock(&bp->b_mount->m_ail->ail_lock);
list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
- xfs_set_li_failed(lip, bp);
+ set_bit(XFS_LI_FAILED, &lip->li_flags);
spin_unlock(&bp->b_mount->m_ail->ail_lock);
}
@@ -1239,6 +1280,111 @@ xfs_qm_dqflush_check(
}
/*
+ * Get the buffer containing the on-disk dquot.
+ *
+ * Requires dquot flush lock, will clear the dirty flag, delete the quota log
+ * item from the AIL, and shut down the system if something goes wrong.
+ */
+static int
+xfs_dquot_read_buf(
+ struct xfs_trans *tp,
+ struct xfs_dquot *dqp,
+ struct xfs_buf **bpp)
+{
+ struct xfs_mount *mp = dqp->q_mount;
+ struct xfs_buf *bp = NULL;
+ int error;
+
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno,
+ mp->m_quotainfo->qi_dqchunklen, 0,
+ &bp, &xfs_dquot_buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_dquot_mark_sick(dqp);
+ if (error)
+ goto out_abort;
+
+ *bpp = bp;
+ return 0;
+
+out_abort:
+ dqp->q_flags &= ~XFS_DQFLAG_DIRTY;
+ xfs_trans_ail_delete(&dqp->q_logitem.qli_item, 0);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ return error;
+}
+
+/*
+ * Attach a dquot buffer to this dquot to avoid allocating a buffer during a
+ * dqflush, since dqflush can be called from reclaim context.
+ */
+int
+xfs_dquot_attach_buf(
+ struct xfs_trans *tp,
+ struct xfs_dquot *dqp)
+{
+ struct xfs_dq_logitem *qlip = &dqp->q_logitem;
+ struct xfs_log_item *lip = &qlip->qli_item;
+ int error;
+
+ spin_lock(&qlip->qli_lock);
+ if (!lip->li_buf) {
+ struct xfs_buf *bp = NULL;
+
+ spin_unlock(&qlip->qli_lock);
+ error = xfs_dquot_read_buf(tp, dqp, &bp);
+ if (error)
+ return error;
+
+ /*
+ * Attach the dquot to the buffer so that the AIL does not have
+ * to read the dquot buffer to push this item.
+ */
+ xfs_buf_hold(bp);
+ spin_lock(&qlip->qli_lock);
+ lip->li_buf = bp;
+ xfs_trans_brelse(tp, bp);
+ }
+ qlip->qli_dirty = true;
+ spin_unlock(&qlip->qli_lock);
+
+ return 0;
+}
+
+/*
+ * Get a new reference the dquot buffer attached to this dquot for a dqflush
+ * operation.
+ *
+ * Returns 0 and a NULL bp if none was attached to the dquot; 0 and a locked
+ * bp; or -EAGAIN if the buffer could not be locked.
+ */
+int
+xfs_dquot_use_attached_buf(
+ struct xfs_dquot *dqp,
+ struct xfs_buf **bpp)
+{
+ struct xfs_buf *bp = dqp->q_logitem.qli_item.li_buf;
+
+ /*
+ * A NULL buffer can happen if the dquot dirty flag was set but the
+ * filesystem shut down before transaction commit happened. In that
+ * case we're not going to flush anyway.
+ */
+ if (!bp) {
+ ASSERT(xfs_is_shutdown(dqp->q_mount));
+
+ *bpp = NULL;
+ return 0;
+ }
+
+ if (!xfs_buf_trylock(bp))
+ return -EAGAIN;
+
+ xfs_buf_hold(bp);
+ *bpp = bp;
+ return 0;
+}
+
+/*
* Write a modified dquot to disk.
* The dquot must be locked and the flush lock too taken by caller.
* The flush lock will not be unlocked until the dquot reaches the disk,
@@ -1249,11 +1395,11 @@ xfs_qm_dqflush_check(
int
xfs_qm_dqflush(
struct xfs_dquot *dqp,
- struct xfs_buf **bpp)
+ struct xfs_buf *bp)
{
struct xfs_mount *mp = dqp->q_mount;
- struct xfs_log_item *lip = &dqp->q_logitem.qli_item;
- struct xfs_buf *bp;
+ struct xfs_dq_logitem *qlip = &dqp->q_logitem;
+ struct xfs_log_item *lip = &qlip->qli_item;
struct xfs_dqblk *dqblk;
xfs_failaddr_t fa;
int error;
@@ -1263,28 +1409,12 @@ xfs_qm_dqflush(
trace_xfs_dqflush(dqp);
- *bpp = NULL;
-
xfs_qm_dqunpin_wait(dqp);
- /*
- * Get the buffer containing the on-disk dquot
- */
- error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK,
- &bp, &xfs_dquot_buf_ops);
- if (error == -EAGAIN)
- goto out_unlock;
- if (xfs_metadata_is_sick(error))
- xfs_dquot_mark_sick(dqp);
- if (error)
- goto out_abort;
-
fa = xfs_qm_dqflush_check(dqp);
if (fa) {
xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
dqp->q_id, fa);
- xfs_buf_relse(bp);
xfs_dquot_mark_sick(dqp);
error = -EFSCORRUPTED;
goto out_abort;
@@ -1299,8 +1429,15 @@ xfs_qm_dqflush(
*/
dqp->q_flags &= ~XFS_DQFLAG_DIRTY;
- xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
- &dqp->q_logitem.qli_item.li_lsn);
+ /*
+ * We hold the dquot lock, so nobody can dirty it while we're
+ * scheduling the write out. Clear the dirty-since-flush flag.
+ */
+ spin_lock(&qlip->qli_lock);
+ qlip->qli_dirty = false;
+ spin_unlock(&qlip->qli_lock);
+
+ xfs_trans_ail_copy_lsn(mp->m_ail, &qlip->qli_flush_lsn, &lip->li_lsn);
/*
* copy the lsn into the on-disk dquot now while we have the in memory
@@ -1312,7 +1449,7 @@ xfs_qm_dqflush(
* of a dquot without an up-to-date CRC getting to disk.
*/
if (xfs_has_crc(mp)) {
- dqblk->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
+ dqblk->dd_lsn = cpu_to_be64(lip->li_lsn);
xfs_update_cksum((char *)dqblk, sizeof(struct xfs_dqblk),
XFS_DQUOT_CRC_OFF);
}
@@ -1322,7 +1459,7 @@ xfs_qm_dqflush(
* the AIL and release the flush lock once the dquot is synced to disk.
*/
bp->b_flags |= _XBF_DQUOTS;
- list_add_tail(&dqp->q_logitem.qli_item.li_bio_list, &bp->b_li_list);
+ list_add_tail(&lip->li_bio_list, &bp->b_li_list);
/*
* If the buffer is pinned then push on the log so we won't
@@ -1334,14 +1471,12 @@ xfs_qm_dqflush(
}
trace_xfs_dqflush_done(dqp);
- *bpp = bp;
return 0;
out_abort:
dqp->q_flags &= ~XFS_DQFLAG_DIRTY;
xfs_trans_ail_delete(lip, 0);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-out_unlock:
xfs_dqfunlock(dqp);
return error;
}
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index d73d179df009..c617bac75361 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -214,7 +214,7 @@ void xfs_dquot_to_disk(struct xfs_disk_dquot *ddqp, struct xfs_dquot *dqp);
#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->q_flags & XFS_DQFLAG_DIRTY)
void xfs_qm_dqdestroy(struct xfs_dquot *dqp);
-int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf **bpp);
+int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf *bp);
void xfs_qm_dqunpin_wait(struct xfs_dquot *dqp);
void xfs_qm_adjust_dqtimers(struct xfs_dquot *d);
void xfs_qm_adjust_dqlimits(struct xfs_dquot *d);
@@ -237,6 +237,10 @@ void xfs_dqlockn(struct xfs_dqtrx *q);
void xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
+int xfs_dquot_attach_buf(struct xfs_trans *tp, struct xfs_dquot *dqp);
+int xfs_dquot_use_attached_buf(struct xfs_dquot *dqp, struct xfs_buf **bpp);
+void xfs_dquot_detach_buf(struct xfs_dquot *dqp);
+
static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
{
xfs_dqlock(dqp);
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 7d19091215b0..271b195ebb93 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -123,8 +123,9 @@ xfs_qm_dquot_logitem_push(
__releases(&lip->li_ailp->ail_lock)
__acquires(&lip->li_ailp->ail_lock)
{
- struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
- struct xfs_buf *bp = lip->li_buf;
+ struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
+ struct xfs_dquot *dqp = qlip->qli_dquot;
+ struct xfs_buf *bp;
uint rval = XFS_ITEM_SUCCESS;
int error;
@@ -155,14 +156,25 @@ xfs_qm_dquot_logitem_push(
spin_unlock(&lip->li_ailp->ail_lock);
- error = xfs_qm_dqflush(dqp, &bp);
+ error = xfs_dquot_use_attached_buf(dqp, &bp);
+ if (error == -EAGAIN) {
+ xfs_dqfunlock(dqp);
+ rval = XFS_ITEM_LOCKED;
+ goto out_relock_ail;
+ }
+
+ /*
+ * dqflush completes dqflock on error, and the delwri ioend does it on
+ * success.
+ */
+ error = xfs_qm_dqflush(dqp, bp);
if (!error) {
if (!xfs_buf_delwri_queue(bp, buffer_list))
rval = XFS_ITEM_FLUSHING;
- xfs_buf_relse(bp);
- } else if (error == -EAGAIN)
- rval = XFS_ITEM_LOCKED;
+ }
+ xfs_buf_relse(bp);
+out_relock_ail:
spin_lock(&lip->li_ailp->ail_lock);
out_unlock:
xfs_dqunlock(dqp);
@@ -195,12 +207,10 @@ xfs_qm_dquot_logitem_committing(
}
#ifdef DEBUG_EXPENSIVE
-static int
-xfs_qm_dquot_logitem_precommit(
- struct xfs_trans *tp,
- struct xfs_log_item *lip)
+static void
+xfs_qm_dquot_logitem_precommit_check(
+ struct xfs_dquot *dqp)
{
- struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
struct xfs_mount *mp = dqp->q_mount;
struct xfs_disk_dquot ddq = { };
xfs_failaddr_t fa;
@@ -216,13 +226,24 @@ xfs_qm_dquot_logitem_precommit(
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
ASSERT(fa == NULL);
}
-
- return 0;
}
#else
-# define xfs_qm_dquot_logitem_precommit NULL
+# define xfs_qm_dquot_logitem_precommit_check(...) ((void)0)
#endif
+static int
+xfs_qm_dquot_logitem_precommit(
+ struct xfs_trans *tp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
+ struct xfs_dquot *dqp = qlip->qli_dquot;
+
+ xfs_qm_dquot_logitem_precommit_check(dqp);
+
+ return xfs_dquot_attach_buf(tp, dqp);
+}
+
static const struct xfs_item_ops xfs_dquot_item_ops = {
.iop_size = xfs_qm_dquot_logitem_size,
.iop_precommit = xfs_qm_dquot_logitem_precommit,
@@ -247,5 +268,7 @@ xfs_qm_dquot_logitem_init(
xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
&xfs_dquot_item_ops);
+ spin_lock_init(&lp->qli_lock);
lp->qli_dquot = dqp;
+ lp->qli_dirty = false;
}
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h
index 794710c24474..d66e52807d76 100644
--- a/fs/xfs/xfs_dquot_item.h
+++ b/fs/xfs/xfs_dquot_item.h
@@ -14,6 +14,13 @@ struct xfs_dq_logitem {
struct xfs_log_item qli_item; /* common portion */
struct xfs_dquot *qli_dquot; /* dquot ptr */
xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
+
+ /*
+ * We use this spinlock to coordinate access to the li_buf pointer in
+ * the log item and the qli_dirty flag.
+ */
+ spinlock_t qli_lock;
+ bool qli_dirty; /* dirtied since last flush? */
};
void xfs_qm_dquot_logitem_init(struct xfs_dquot *dqp);
diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c
index 9ab05ad224d1..265c42449893 100644
--- a/fs/xfs/xfs_exchrange.c
+++ b/fs/xfs/xfs_exchrange.c
@@ -854,7 +854,7 @@ xfs_ioc_start_commit(
struct xfs_commit_range __user *argp)
{
struct xfs_commit_range args = { };
- struct timespec64 ts;
+ struct kstat kstat = { };
struct xfs_commit_range_fresh *kern_f;
struct xfs_commit_range_fresh __user *user_f;
struct inode *inode2 = file_inode(file);
@@ -871,12 +871,12 @@ xfs_ioc_start_commit(
memcpy(&kern_f->fsid, ip2->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
xfs_ilock(ip2, lockflags);
- ts = inode_get_ctime(inode2);
- kern_f->file2_ctime = ts.tv_sec;
- kern_f->file2_ctime_nsec = ts.tv_nsec;
- ts = inode_get_mtime(inode2);
- kern_f->file2_mtime = ts.tv_sec;
- kern_f->file2_mtime_nsec = ts.tv_nsec;
+ /* Force writing of a distinct ctime if any writes happen. */
+ fill_mg_cmtime(&kstat, STATX_CTIME | STATX_MTIME, inode2);
+ kern_f->file2_ctime = kstat.ctime.tv_sec;
+ kern_f->file2_ctime_nsec = kstat.ctime.tv_nsec;
+ kern_f->file2_mtime = kstat.mtime.tv_sec;
+ kern_f->file2_mtime_nsec = kstat.mtime.tv_nsec;
kern_f->file2_ino = ip2->i_ino;
kern_f->file2_gen = inode2->i_generation;
kern_f->magic = XCR_FRESH_MAGIC;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 4a0b7de4f7ae..9a435b1ff264 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1242,6 +1242,14 @@ out_unlock:
xfs_iunlock2_remapping(src, dest);
if (ret)
trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
+ /*
+ * If the caller did not set CAN_SHORTEN, then it is not prepared to
+ * handle partial results -- either the whole remap succeeds, or we
+ * must say why it did not. In this case, any error should be returned
+ * to the caller.
+ */
+ if (ret && remapped < len && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
+ return ret;
return remapped > 0 ? remapped : ret;
}
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 82f2e0dd2249..3290dd8524a6 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -163,7 +163,8 @@ struct xfs_getfsmap_info {
xfs_daddr_t next_daddr; /* next daddr we expect */
/* daddr of low fsmap key when we're using the rtbitmap */
xfs_daddr_t low_daddr;
- xfs_daddr_t end_daddr; /* daddr of high fsmap key */
+ /* daddr of high fsmap key, or the last daddr on the device */
+ xfs_daddr_t end_daddr;
u64 missing_owner; /* owner of holes */
u32 dev; /* device id */
/*
@@ -387,8 +388,8 @@ xfs_getfsmap_group_helper(
* we calculated from userspace's high key to synthesize the record.
* Note that if the btree query found a mapping, there won't be a gap.
*/
- if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL)
- frec->start_daddr = info->end_daddr;
+ if (info->last)
+ frec->start_daddr = info->end_daddr + 1;
else
frec->start_daddr = xfs_gbno_to_daddr(xg, startblock);
@@ -736,11 +737,10 @@ xfs_getfsmap_rtdev_rtbitmap_helper(
* we calculated from userspace's high key to synthesize the record.
* Note that if the btree query found a mapping, there won't be a gap.
*/
- if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL) {
- frec.start_daddr = info->end_daddr;
- } else {
+ if (info->last)
+ frec.start_daddr = info->end_daddr + 1;
+ else
frec.start_daddr = xfs_rtb_to_daddr(mp, start_rtb);
- }
frec.len_daddr = XFS_FSB_TO_BB(mp, rtbcount);
return xfs_getfsmap_helper(tp, info, &frec);
@@ -933,7 +933,10 @@ xfs_getfsmap(
struct xfs_trans *tp = NULL;
struct xfs_fsmap dkeys[2]; /* per-dev keys */
struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS];
- struct xfs_getfsmap_info info = { NULL };
+ struct xfs_getfsmap_info info = {
+ .fsmap_recs = fsmap_recs,
+ .head = head,
+ };
bool use_rmap;
int i;
int error = 0;
@@ -998,9 +1001,6 @@ xfs_getfsmap(
info.next_daddr = head->fmh_keys[0].fmr_physical +
head->fmh_keys[0].fmr_length;
- info.end_daddr = XFS_BUF_DADDR_NULL;
- info.fsmap_recs = fsmap_recs;
- info.head = head;
/* For each device we support... */
for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
@@ -1013,17 +1013,23 @@ xfs_getfsmap(
break;
/*
- * If this device number matches the high key, we have
- * to pass the high key to the handler to limit the
- * query results. If the device number exceeds the
- * low key, zero out the low key so that we get
- * everything from the beginning.
+ * If this device number matches the high key, we have to pass
+ * the high key to the handler to limit the query results, and
+ * set the end_daddr so that we can synthesize records at the
+ * end of the query range or device.
*/
if (handlers[i].dev == head->fmh_keys[1].fmr_device) {
dkeys[1] = head->fmh_keys[1];
info.end_daddr = min(handlers[i].nr_sectors - 1,
dkeys[1].fmr_physical);
+ } else {
+ info.end_daddr = handlers[i].nr_sectors - 1;
}
+
+ /*
+ * If the device number exceeds the low key, zero out the low
+ * key so that we get everything from the beginning.
+ */
if (handlers[i].dev > head->fmh_keys[0].fmr_device)
memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index b0de3d924d4c..1648dc5a8068 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -231,7 +231,7 @@ xfs_iflags_clear(xfs_inode_t *ip, unsigned long flags)
}
static inline int
-__xfs_iflags_test(xfs_inode_t *ip, unsigned long flags)
+__xfs_iflags_test(const struct xfs_inode *ip, unsigned long flags)
{
return (ip->i_flags & flags);
}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 26b2f5887b88..05daad8a8d34 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3456,6 +3456,16 @@ xlog_force_shutdown(
return false;
/*
+ * Ensure that there is only ever one log shutdown being processed.
+ * If we allow the log force below on a second pass after shutting
+ * down the log, we risk deadlocking the CIL push as it may require
+ * locks on objects the current shutdown context holds (e.g. taking
+ * buffer locks to abort buffers on last unpin of buf log items).
+ */
+ if (test_and_set_bit(XLOG_SHUTDOWN_STARTED, &log->l_opstate))
+ return false;
+
+ /*
* Flush all the completed transactions to disk before marking the log
* being shut down. We need to do this first as shutting down the log
* before the force will prevent the log force from flushing the iclogs
@@ -3487,6 +3497,7 @@ xlog_force_shutdown(
spin_lock(&log->l_icloglock);
if (test_and_set_bit(XLOG_IO_ERROR, &log->l_opstate)) {
spin_unlock(&log->l_icloglock);
+ ASSERT(0);
return false;
}
spin_unlock(&log->l_icloglock);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 2e9157b650e6..1ca406ec1b40 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -171,11 +171,8 @@ xlog_cil_insert_pcp_aggregate(
*/
for_each_cpu(cpu, &ctx->cil_pcpmask) {
struct xlog_cil_pcp *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
- int old = READ_ONCE(cilpcp->space_used);
- while (!try_cmpxchg(&cilpcp->space_used, &old, 0))
- ;
- count += old;
+ count += xchg(&cilpcp->space_used, 0);
}
atomic_add(count, &ctx->space_used);
}
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index b8778a4fd6b6..f3d78869e5e5 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -458,6 +458,7 @@ struct xlog {
#define XLOG_IO_ERROR 2 /* log hit an I/O error, and being
shutdown */
#define XLOG_TAIL_WARN 3 /* log tail verify warning issued */
+#define XLOG_SHUTDOWN_STARTED 4 /* xlog_force_shutdown() exclusion */
static inline bool
xlog_recovery_needed(struct xlog *log)
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 61ee110b47d7..dc8b1010d4d3 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -148,17 +148,29 @@ xfs_qm_dqpurge(
* We don't care about getting disk errors here. We need
* to purge this dquot anyway, so we go ahead regardless.
*/
- error = xfs_qm_dqflush(dqp, &bp);
+ error = xfs_dquot_use_attached_buf(dqp, &bp);
+ if (error == -EAGAIN) {
+ xfs_dqfunlock(dqp);
+ dqp->q_flags &= ~XFS_DQFLAG_FREEING;
+ goto out_unlock;
+ }
+ if (!bp)
+ goto out_funlock;
+
+ /*
+ * dqflush completes dqflock on error, and the bwrite ioend
+ * does it on success.
+ */
+ error = xfs_qm_dqflush(dqp, bp);
if (!error) {
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
- } else if (error == -EAGAIN) {
- dqp->q_flags &= ~XFS_DQFLAG_FREEING;
- goto out_unlock;
}
xfs_dqflock(dqp);
}
+ xfs_dquot_detach_buf(dqp);
+out_funlock:
ASSERT(atomic_read(&dqp->q_pincount) == 0);
ASSERT(xlog_is_shutdown(dqp->q_logitem.qli_item.li_log) ||
!test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags));
@@ -241,6 +253,10 @@ xfs_qm_destroy_quotainos(
xfs_irele(qi->qi_pquotaip);
qi->qi_pquotaip = NULL;
}
+ if (qi->qi_dirip) {
+ xfs_irele(qi->qi_dirip);
+ qi->qi_dirip = NULL;
+ }
}
/*
@@ -490,7 +506,17 @@ xfs_qm_dquot_isolate(
/* we have to drop the LRU lock to flush the dquot */
spin_unlock(&lru->lock);
- error = xfs_qm_dqflush(dqp, &bp);
+ error = xfs_dquot_use_attached_buf(dqp, &bp);
+ if (!bp || error == -EAGAIN) {
+ xfs_dqfunlock(dqp);
+ goto out_unlock_dirty;
+ }
+
+ /*
+ * dqflush completes dqflock on error, and the delwri ioend
+ * does it on success.
+ */
+ error = xfs_qm_dqflush(dqp, bp);
if (error)
goto out_unlock_dirty;
@@ -498,6 +524,8 @@ xfs_qm_dquot_isolate(
xfs_buf_relse(bp);
goto out_unlock_dirty;
}
+
+ xfs_dquot_detach_buf(dqp);
xfs_dqfunlock(dqp);
/*
@@ -646,8 +674,7 @@ xfs_qm_init_timelimits(
static int
xfs_qm_load_metadir_qinos(
struct xfs_mount *mp,
- struct xfs_quotainfo *qi,
- struct xfs_inode **dpp)
+ struct xfs_quotainfo *qi)
{
struct xfs_trans *tp;
int error;
@@ -656,7 +683,7 @@ xfs_qm_load_metadir_qinos(
if (error)
return error;
- error = xfs_dqinode_load_parent(tp, dpp);
+ error = xfs_dqinode_load_parent(tp, &qi->qi_dirip);
if (error == -ENOENT) {
/* no quota dir directory, but we'll create one later */
error = 0;
@@ -666,21 +693,21 @@ xfs_qm_load_metadir_qinos(
goto out_trans;
if (XFS_IS_UQUOTA_ON(mp)) {
- error = xfs_dqinode_load(tp, *dpp, XFS_DQTYPE_USER,
+ error = xfs_dqinode_load(tp, qi->qi_dirip, XFS_DQTYPE_USER,
&qi->qi_uquotaip);
if (error && error != -ENOENT)
goto out_trans;
}
if (XFS_IS_GQUOTA_ON(mp)) {
- error = xfs_dqinode_load(tp, *dpp, XFS_DQTYPE_GROUP,
+ error = xfs_dqinode_load(tp, qi->qi_dirip, XFS_DQTYPE_GROUP,
&qi->qi_gquotaip);
if (error && error != -ENOENT)
goto out_trans;
}
if (XFS_IS_PQUOTA_ON(mp)) {
- error = xfs_dqinode_load(tp, *dpp, XFS_DQTYPE_PROJ,
+ error = xfs_dqinode_load(tp, qi->qi_dirip, XFS_DQTYPE_PROJ,
&qi->qi_pquotaip);
if (error && error != -ENOENT)
goto out_trans;
@@ -696,34 +723,40 @@ out_trans:
STATIC int
xfs_qm_create_metadir_qinos(
struct xfs_mount *mp,
- struct xfs_quotainfo *qi,
- struct xfs_inode **dpp)
+ struct xfs_quotainfo *qi)
{
int error;
- if (!*dpp) {
- error = xfs_dqinode_mkdir_parent(mp, dpp);
+ if (!qi->qi_dirip) {
+ error = xfs_dqinode_mkdir_parent(mp, &qi->qi_dirip);
if (error && error != -EEXIST)
return error;
+ /*
+ * If the /quotas dirent points to an inode that isn't
+ * loadable, qi_dirip will be NULL but mkdir_parent will return
+ * -EEXIST. In this case the metadir is corrupt, so bail out.
+ */
+ if (XFS_IS_CORRUPT(mp, qi->qi_dirip == NULL))
+ return -EFSCORRUPTED;
}
if (XFS_IS_UQUOTA_ON(mp) && !qi->qi_uquotaip) {
- error = xfs_dqinode_metadir_create(*dpp, XFS_DQTYPE_USER,
- &qi->qi_uquotaip);
+ error = xfs_dqinode_metadir_create(qi->qi_dirip,
+ XFS_DQTYPE_USER, &qi->qi_uquotaip);
if (error)
return error;
}
if (XFS_IS_GQUOTA_ON(mp) && !qi->qi_gquotaip) {
- error = xfs_dqinode_metadir_create(*dpp, XFS_DQTYPE_GROUP,
- &qi->qi_gquotaip);
+ error = xfs_dqinode_metadir_create(qi->qi_dirip,
+ XFS_DQTYPE_GROUP, &qi->qi_gquotaip);
if (error)
return error;
}
if (XFS_IS_PQUOTA_ON(mp) && !qi->qi_pquotaip) {
- error = xfs_dqinode_metadir_create(*dpp, XFS_DQTYPE_PROJ,
- &qi->qi_pquotaip);
+ error = xfs_dqinode_metadir_create(qi->qi_dirip,
+ XFS_DQTYPE_PROJ, &qi->qi_pquotaip);
if (error)
return error;
}
@@ -768,7 +801,6 @@ xfs_qm_init_metadir_qinos(
struct xfs_mount *mp)
{
struct xfs_quotainfo *qi = mp->m_quotainfo;
- struct xfs_inode *dp = NULL;
int error;
if (!xfs_has_quota(mp)) {
@@ -777,20 +809,22 @@ xfs_qm_init_metadir_qinos(
return error;
}
- error = xfs_qm_load_metadir_qinos(mp, qi, &dp);
+ error = xfs_qm_load_metadir_qinos(mp, qi);
if (error)
goto out_err;
- error = xfs_qm_create_metadir_qinos(mp, qi, &dp);
+ error = xfs_qm_create_metadir_qinos(mp, qi);
if (error)
goto out_err;
- xfs_irele(dp);
+ /* The only user of the quota dir inode is online fsck */
+#if !IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB)
+ xfs_irele(qi->qi_dirip);
+ qi->qi_dirip = NULL;
+#endif
return 0;
out_err:
xfs_qm_destroy_quotainos(mp->m_quotainfo);
- if (dp)
- xfs_irele(dp);
return error;
}
@@ -1304,6 +1338,10 @@ xfs_qm_quotacheck_dqadjust(
return error;
}
+ error = xfs_dquot_attach_buf(NULL, dqp);
+ if (error)
+ return error;
+
trace_xfs_dqadjust(dqp);
/*
@@ -1486,11 +1524,17 @@ xfs_qm_flush_one(
goto out_unlock;
}
- error = xfs_qm_dqflush(dqp, &bp);
+ error = xfs_dquot_use_attached_buf(dqp, &bp);
if (error)
goto out_unlock;
+ if (!bp) {
+ error = -EFSCORRUPTED;
+ goto out_unlock;
+ }
- xfs_buf_delwri_queue(bp, buffer_list);
+ error = xfs_qm_dqflush(dqp, bp);
+ if (!error)
+ xfs_buf_delwri_queue(bp, buffer_list);
xfs_buf_relse(bp);
out_unlock:
xfs_dqunlock(dqp);
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index e919c7f62f57..35b64bc3a7a8 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -55,6 +55,7 @@ struct xfs_quotainfo {
struct xfs_inode *qi_uquotaip; /* user quota inode */
struct xfs_inode *qi_gquotaip; /* group quota inode */
struct xfs_inode *qi_pquotaip; /* project quota inode */
+ struct xfs_inode *qi_dirip; /* quota metadir */
struct list_lru qi_lru;
int qi_dquots;
struct mutex qi_quotaofflock;/* to serialize quotaoff */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 4eda50ae2d1c..0c78f30fa4a3 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -427,19 +427,6 @@ xfs_qm_scall_getquota_fill_qc(
dst->d_ino_timer = 0;
dst->d_rt_spc_timer = 0;
}
-
-#ifdef DEBUG
- if (xfs_dquot_is_enforced(dqp) && dqp->q_id != 0) {
- if ((dst->d_space > dst->d_spc_softlimit) &&
- (dst->d_spc_softlimit > 0)) {
- ASSERT(dst->d_spc_timer != 0);
- }
- if ((dst->d_ino_count > dqp->q_ino.softlimit) &&
- (dqp->q_ino.softlimit > 0)) {
- ASSERT(dst->d_ino_timer != 0);
- }
- }
-#endif
}
/* Return the quota information for the dquot matching id. */
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index fa1317cc396c..d7565462af3d 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -101,7 +101,8 @@ extern void xfs_trans_free_dqinfo(struct xfs_trans *);
extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *,
uint, int64_t);
extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *);
-extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
+void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *tp,
+ bool already_locked);
int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, struct xfs_inode *ip,
int64_t dblocks, int64_t rblocks, bool force);
extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
@@ -173,7 +174,7 @@ static inline void xfs_trans_mod_dquot_byino(struct xfs_trans *tp,
{
}
#define xfs_trans_apply_dquot_deltas(tp)
-#define xfs_trans_unreserve_and_mod_dquots(tp)
+#define xfs_trans_unreserve_and_mod_dquots(tp, a)
static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,
struct xfs_inode *ip, int64_t dblocks, int64_t rblocks,
bool force)
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 0cb534d71119..fcfa6e0eb3ad 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1827,7 +1827,7 @@ xfs_rtallocate_rtg(
* For an allocation to an empty file at offset 0, pick an extent that
* will space things out in the rt area.
*/
- if (bno_hint)
+ if (bno_hint != NULLFSBLOCK)
start = xfs_rtb_to_rtx(args.mp, bno_hint);
else if (!xfs_has_rtgroups(args.mp) && initial_user_data)
start = xfs_rtpick_extent(args.rtg, tp, maxlen);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 30fbed27cf05..4cd25717c9d1 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -860,29 +860,17 @@ __xfs_trans_commit(
trace_xfs_trans_commit(tp, _RET_IP_);
- error = xfs_trans_run_precommits(tp);
- if (error) {
- if (tp->t_flags & XFS_TRANS_PERM_LOG_RES)
- xfs_defer_cancel(tp);
- goto out_unreserve;
- }
-
/*
- * Finish deferred items on final commit. Only permanent transactions
- * should ever have deferred ops.
+ * Commit per-transaction changes that are not already tracked through
+ * log items. This can add dirty log items to the transaction.
*/
- WARN_ON_ONCE(!list_empty(&tp->t_dfops) &&
- !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
- if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) {
- error = xfs_defer_finish_noroll(&tp);
- if (error)
- goto out_unreserve;
+ if (tp->t_flags & XFS_TRANS_SB_DIRTY)
+ xfs_trans_apply_sb_deltas(tp);
+ xfs_trans_apply_dquot_deltas(tp);
- /* Run precommits from final tx in defer chain. */
- error = xfs_trans_run_precommits(tp);
- if (error)
- goto out_unreserve;
- }
+ error = xfs_trans_run_precommits(tp);
+ if (error)
+ goto out_unreserve;
/*
* If there is nothing to be logged by the transaction,
@@ -907,13 +895,6 @@ __xfs_trans_commit(
ASSERT(tp->t_ticket != NULL);
- /*
- * If we need to update the superblock, then do it now.
- */
- if (tp->t_flags & XFS_TRANS_SB_DIRTY)
- xfs_trans_apply_sb_deltas(tp);
- xfs_trans_apply_dquot_deltas(tp);
-
xlog_cil_commit(log, tp, &commit_seq, regrant);
xfs_trans_free(tp);
@@ -939,7 +920,7 @@ out_unreserve:
* the dqinfo portion to be. All that means is that we have some
* (non-persistent) quota reservations that need to be unreserved.
*/
- xfs_trans_unreserve_and_mod_dquots(tp);
+ xfs_trans_unreserve_and_mod_dquots(tp, true);
if (tp->t_ticket) {
if (regrant && !xlog_is_shutdown(log))
xfs_log_ticket_regrant(log, tp->t_ticket);
@@ -958,6 +939,20 @@ int
xfs_trans_commit(
struct xfs_trans *tp)
{
+ /*
+ * Finish deferred items on final commit. Only permanent transactions
+ * should ever have deferred ops.
+ */
+ WARN_ON_ONCE(!list_empty(&tp->t_dfops) &&
+ !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
+ if (tp->t_flags & XFS_TRANS_PERM_LOG_RES) {
+ int error = xfs_defer_finish_noroll(&tp);
+ if (error) {
+ xfs_trans_cancel(tp);
+ return error;
+ }
+ }
+
return __xfs_trans_commit(tp, false);
}
@@ -1019,7 +1014,7 @@ xfs_trans_cancel(
}
#endif
xfs_trans_unreserve_and_mod_sb(tp);
- xfs_trans_unreserve_and_mod_dquots(tp);
+ xfs_trans_unreserve_and_mod_dquots(tp, false);
if (tp->t_ticket) {
xfs_log_ticket_ungrant(log, tp->t_ticket);
@@ -1435,5 +1430,8 @@ done:
out_cancel:
xfs_trans_cancel(tp);
+ xfs_iunlock(dp, XFS_ILOCK_EXCL);
+ if (dp != ip)
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 8ede9d099d1f..f56d62dced97 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -360,7 +360,7 @@ xfsaild_resubmit_item(
/* protected by ail_lock */
list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
- if (bp->b_flags & _XBF_INODES)
+ if (bp->b_flags & (_XBF_INODES | _XBF_DQUOTS))
clear_bit(XFS_LI_FAILED, &lip->li_flags);
else
xfs_clear_li_failed(lip);
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 481ba3dc9f19..713b6d243e56 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -606,6 +606,24 @@ xfs_trans_apply_dquot_deltas(
ASSERT(dqp->q_blk.reserved >= dqp->q_blk.count);
ASSERT(dqp->q_ino.reserved >= dqp->q_ino.count);
ASSERT(dqp->q_rtb.reserved >= dqp->q_rtb.count);
+
+ /*
+ * We've applied the count changes and given back
+ * whatever reservation we didn't use. Zero out the
+ * dqtrx fields.
+ */
+ qtrx->qt_blk_res = 0;
+ qtrx->qt_bcount_delta = 0;
+ qtrx->qt_delbcnt_delta = 0;
+
+ qtrx->qt_rtblk_res = 0;
+ qtrx->qt_rtblk_res_used = 0;
+ qtrx->qt_rtbcount_delta = 0;
+ qtrx->qt_delrtb_delta = 0;
+
+ qtrx->qt_ino_res = 0;
+ qtrx->qt_ino_res_used = 0;
+ qtrx->qt_icount_delta = 0;
}
}
}
@@ -642,7 +660,8 @@ xfs_trans_unreserve_and_mod_dquots_hook(
*/
void
xfs_trans_unreserve_and_mod_dquots(
- struct xfs_trans *tp)
+ struct xfs_trans *tp,
+ bool already_locked)
{
int i, j;
struct xfs_dquot *dqp;
@@ -671,10 +690,12 @@ xfs_trans_unreserve_and_mod_dquots(
* about the number of blocks used field, or deltas.
* Also we don't bother to zero the fields.
*/
- locked = false;
+ locked = already_locked;
if (qtrx->qt_blk_res) {
- xfs_dqlock(dqp);
- locked = true;
+ if (!locked) {
+ xfs_dqlock(dqp);
+ locked = true;
+ }
dqp->q_blk.reserved -=
(xfs_qcnt_t)qtrx->qt_blk_res;
}
@@ -695,7 +716,7 @@ xfs_trans_unreserve_and_mod_dquots(
dqp->q_rtb.reserved -=
(xfs_qcnt_t)qtrx->qt_rtblk_res;
}
- if (locked)
+ if (locked && !already_locked)
xfs_dqunlock(dqp);
}