diff options
Diffstat (limited to 'fs/xfs/libxfs')
50 files changed, 4083 insertions, 939 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 5ca8d0106827..b59cb461e096 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -30,86 +30,7 @@ #include "xfs_trace.h" #include "xfs_inode.h" #include "xfs_icache.h" - - -/* - * Passive reference counting access wrappers to the perag structures. If the - * per-ag structure is to be freed, the freeing code is responsible for cleaning - * up objects with passive references before freeing the structure. This is - * things like cached buffers. - */ -struct xfs_perag * -xfs_perag_get( - struct xfs_mount *mp, - xfs_agnumber_t agno) -{ - struct xfs_perag *pag; - - rcu_read_lock(); - pag = xa_load(&mp->m_perags, agno); - if (pag) { - trace_xfs_perag_get(pag, _RET_IP_); - ASSERT(atomic_read(&pag->pag_ref) >= 0); - atomic_inc(&pag->pag_ref); - } - rcu_read_unlock(); - return pag; -} - -/* Get a passive reference to the given perag. */ -struct xfs_perag * -xfs_perag_hold( - struct xfs_perag *pag) -{ - ASSERT(atomic_read(&pag->pag_ref) > 0 || - atomic_read(&pag->pag_active_ref) > 0); - - trace_xfs_perag_hold(pag, _RET_IP_); - atomic_inc(&pag->pag_ref); - return pag; -} - -void -xfs_perag_put( - struct xfs_perag *pag) -{ - trace_xfs_perag_put(pag, _RET_IP_); - ASSERT(atomic_read(&pag->pag_ref) > 0); - atomic_dec(&pag->pag_ref); -} - -/* - * Active references for perag structures. This is for short term access to the - * per ag structures for walking trees or accessing state. If an AG is being - * shrunk or is offline, then this will fail to find that AG and return NULL - * instead. - */ -struct xfs_perag * -xfs_perag_grab( - struct xfs_mount *mp, - xfs_agnumber_t agno) -{ - struct xfs_perag *pag; - - rcu_read_lock(); - pag = xa_load(&mp->m_perags, agno); - if (pag) { - trace_xfs_perag_grab(pag, _RET_IP_); - if (!atomic_inc_not_zero(&pag->pag_active_ref)) - pag = NULL; - } - rcu_read_unlock(); - return pag; -} - -void -xfs_perag_rele( - struct xfs_perag *pag) -{ - trace_xfs_perag_rele(pag, _RET_IP_); - if (atomic_dec_and_test(&pag->pag_active_ref)) - wake_up(&pag->pag_active_wq); -} +#include "xfs_group.h" /* * xfs_initialize_perag_data @@ -184,6 +105,18 @@ out: return error; } +static void +xfs_perag_uninit( + struct xfs_group *xg) +{ +#ifdef __KERNEL__ + struct xfs_perag *pag = to_perag(xg); + + cancel_delayed_work_sync(&pag->pag_blockgc_work); + xfs_buf_cache_destroy(&pag->pag_bcache); +#endif +} + /* * Free up the per-ag resources within the specified AG range. */ @@ -196,22 +129,8 @@ xfs_free_perag_range( { xfs_agnumber_t agno; - for (agno = first_agno; agno < end_agno; agno++) { - struct xfs_perag *pag = xa_erase(&mp->m_perags, agno); - - ASSERT(pag); - XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0); - xfs_defer_drain_free(&pag->pag_intents_drain); - - cancel_delayed_work_sync(&pag->pag_blockgc_work); - xfs_buf_cache_destroy(&pag->pag_bcache); - - /* drop the mount's active reference */ - xfs_perag_rele(pag); - XFS_IS_CORRUPT(pag->pag_mount, - atomic_read(&pag->pag_active_ref) != 0); - kfree_rcu_mightsleep(pag); - } + for (agno = first_agno; agno < end_agno; agno++) + xfs_group_free(mp, agno, XG_TYPE_AG, xfs_perag_uninit); } /* Find the size of the AG, in blocks. */ @@ -273,6 +192,10 @@ xfs_agino_range( return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last); } +/* + * Update the perag of the previous tail AG if it has been changed during + * recovery (i.e. recovery of a growfs). + */ int xfs_update_last_ag_size( struct xfs_mount *mp, @@ -282,88 +205,88 @@ xfs_update_last_ag_size( if (!pag) return -EFSCORRUPTED; - pag->block_count = __xfs_ag_block_count(mp, prev_agcount - 1, - mp->m_sb.sb_agcount, mp->m_sb.sb_dblocks); - __xfs_agino_range(mp, pag->block_count, &pag->agino_min, + pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp, + prev_agcount - 1, mp->m_sb.sb_agcount, + mp->m_sb.sb_dblocks); + __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min, &pag->agino_max); xfs_perag_rele(pag); return 0; } -int -xfs_initialize_perag( +static int +xfs_perag_alloc( struct xfs_mount *mp, - xfs_agnumber_t old_agcount, - xfs_agnumber_t new_agcount, - xfs_rfsblock_t dblocks, - xfs_agnumber_t *maxagi) + xfs_agnumber_t index, + xfs_agnumber_t agcount, + xfs_rfsblock_t dblocks) { struct xfs_perag *pag; - xfs_agnumber_t index; int error; - for (index = old_agcount; index < new_agcount; index++) { - pag = kzalloc(sizeof(*pag), GFP_KERNEL); - if (!pag) { - error = -ENOMEM; - goto out_unwind_new_pags; - } - pag->pag_agno = index; - pag->pag_mount = mp; - - error = xa_insert(&mp->m_perags, index, pag, GFP_KERNEL); - if (error) { - WARN_ON_ONCE(error == -EBUSY); - goto out_free_pag; - } + pag = kzalloc(sizeof(*pag), GFP_KERNEL); + if (!pag) + return -ENOMEM; #ifdef __KERNEL__ - /* Place kernel structure only init below this point. */ - spin_lock_init(&pag->pag_ici_lock); - spin_lock_init(&pag->pagb_lock); - spin_lock_init(&pag->pag_state_lock); - INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); - INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); - xfs_defer_drain_init(&pag->pag_intents_drain); - init_waitqueue_head(&pag->pagb_wait); - init_waitqueue_head(&pag->pag_active_wq); - pag->pagb_count = 0; - pag->pagb_tree = RB_ROOT; - xfs_hooks_init(&pag->pag_rmap_update_hooks); + /* Place kernel structure only init below this point. */ + spin_lock_init(&pag->pag_ici_lock); + INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); + INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); #endif /* __KERNEL__ */ - error = xfs_buf_cache_init(&pag->pag_bcache); - if (error) - goto out_remove_pag; - - /* Active ref owned by mount indicates AG is online. */ - atomic_set(&pag->pag_active_ref, 1); + error = xfs_buf_cache_init(&pag->pag_bcache); + if (error) + goto out_free_perag; - /* - * Pre-calculated geometry - */ - pag->block_count = __xfs_ag_block_count(mp, index, new_agcount, + /* + * Pre-calculated geometry + */ + pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp, index, agcount, dblocks); - pag->min_block = XFS_AGFL_BLOCK(mp); - __xfs_agino_range(mp, pag->block_count, &pag->agino_min, - &pag->agino_max); - } + pag_group(pag)->xg_min_gbno = XFS_AGFL_BLOCK(mp) + 1; + __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min, + &pag->agino_max); - index = xfs_set_inode_alloc(mp, new_agcount); + error = xfs_group_insert(mp, pag_group(pag), index, XG_TYPE_AG); + if (error) + goto out_buf_cache_destroy; - if (maxagi) - *maxagi = index; + return 0; + +out_buf_cache_destroy: + xfs_buf_cache_destroy(&pag->pag_bcache); +out_free_perag: + kfree(pag); + return error; +} +int +xfs_initialize_perag( + struct xfs_mount *mp, + xfs_agnumber_t orig_agcount, + xfs_agnumber_t new_agcount, + xfs_rfsblock_t dblocks, + xfs_agnumber_t *maxagi) +{ + xfs_agnumber_t index; + int error; + + if (orig_agcount >= new_agcount) + return 0; + + for (index = orig_agcount; index < new_agcount; index++) { + error = xfs_perag_alloc(mp, index, new_agcount, dblocks); + if (error) + goto out_unwind_new_pags; + } + + *maxagi = xfs_set_inode_alloc(mp, new_agcount); mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); return 0; -out_remove_pag: - xfs_defer_drain_free(&pag->pag_intents_drain); - pag = xa_erase(&mp->m_perags, index); -out_free_pag: - kfree(pag); out_unwind_new_pags: - xfs_free_perag_range(mp, old_agcount, index); + xfs_free_perag_range(mp, orig_agcount, index); return error; } @@ -818,7 +741,7 @@ xfs_ag_shrink_space( struct xfs_trans **tpp, xfs_extlen_t delta) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_alloc_arg args = { .tp = *tpp, .mp = mp, @@ -835,7 +758,7 @@ xfs_ag_shrink_space( xfs_agblock_t aglen; int error, err2; - ASSERT(pag->pag_agno == mp->m_sb.sb_agcount - 1); + ASSERT(pag_agno(pag) == mp->m_sb.sb_agcount - 1); error = xfs_ialloc_read_agi(pag, *tpp, 0, &agibp); if (error) return error; @@ -872,7 +795,7 @@ xfs_ag_shrink_space( /* internal log shouldn't also show up in the free space btrees */ error = xfs_alloc_vextent_exact_bno(&args, - XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta)); + xfs_agbno_to_fsb(pag, aglen - delta)); if (!error && args.agbno == NULLAGBLOCK) error = -ENOSPC; @@ -931,9 +854,9 @@ xfs_ag_shrink_space( } /* Update perag geometry */ - pag->block_count -= delta; - __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min, - &pag->agino_max); + pag_group(pag)->xg_block_count -= delta; + __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min, + &pag->agino_max); xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH); xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH); @@ -958,12 +881,13 @@ xfs_ag_extend_space( struct xfs_trans *tp, xfs_extlen_t len) { + struct xfs_mount *mp = pag_mount(pag); struct xfs_buf *bp; struct xfs_agi *agi; struct xfs_agf *agf; int error; - ASSERT(pag->pag_agno == pag->pag_mount->m_sb.sb_agcount - 1); + ASSERT(pag_agno(pag) == mp->m_sb.sb_agcount - 1); error = xfs_ialloc_read_agi(pag, tp, 0, &bp); if (error) @@ -1002,9 +926,9 @@ xfs_ag_extend_space( return error; /* Update perag geometry */ - pag->block_count = be32_to_cpu(agf->agf_length); - __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min, - &pag->agino_max); + pag_group(pag)->xg_block_count = be32_to_cpu(agf->agf_length); + __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min, + &pag->agino_max); return 0; } @@ -1031,7 +955,7 @@ xfs_ag_get_geometry( /* Fill out form. */ memset(ageo, 0, sizeof(*ageo)); - ageo->ag_number = pag->pag_agno; + ageo->ag_number = pag_agno(pag); agi = agi_bp->b_addr; ageo->ag_icount = be32_to_cpu(agi->agi_count); diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 9edfe0e96439..1f24cfa27321 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -7,6 +7,8 @@ #ifndef __LIBXFS_AG_H #define __LIBXFS_AG_H 1 +#include "xfs_group.h" + struct xfs_mount; struct xfs_trans; struct xfs_perag; @@ -30,11 +32,7 @@ struct xfs_ag_resv { * performance of allocation group selection. */ struct xfs_perag { - struct xfs_mount *pag_mount; /* owner filesystem */ - xfs_agnumber_t pag_agno; /* AG this structure belongs to */ - atomic_t pag_ref; /* passive reference count */ - atomic_t pag_active_ref; /* active reference count */ - wait_queue_head_t pag_active_wq;/* woken active_ref falls to zero */ + struct xfs_group pag_group; unsigned long pag_opstate; uint8_t pagf_bno_level; /* # of levels in bno btree */ uint8_t pagf_cnt_level; /* # of levels in cnt btree */ @@ -55,7 +53,6 @@ struct xfs_perag { xfs_agino_t pagl_leftrec; xfs_agino_t pagl_rightrec; - int pagb_count; /* pagb slots in use */ uint8_t pagf_refcount_level; /* recount btree height */ /* Blocks reserved for all kinds of metadata. */ @@ -64,21 +61,12 @@ struct xfs_perag { struct xfs_ag_resv pag_rmapbt_resv; /* Precalculated geometry info */ - xfs_agblock_t block_count; - xfs_agblock_t min_block; xfs_agino_t agino_min; xfs_agino_t agino_max; #ifdef __KERNEL__ /* -- kernel only structures below this line -- */ - /* - * Bitsets of per-ag metadata that have been checked and/or are sick. - * Callers should hold pag_state_lock before accessing this field. - */ - uint16_t pag_checked; - uint16_t pag_sick; - #ifdef CONFIG_XFS_ONLINE_REPAIR /* * Alternate btree heights so that online repair won't trip the write @@ -90,13 +78,6 @@ struct xfs_perag { uint8_t pagf_repair_rmap_level; #endif - spinlock_t pag_state_lock; - - spinlock_t pagb_lock; /* lock for pagb_tree */ - struct rb_root pagb_tree; /* ordered tree of busy extents */ - unsigned int pagb_gen; /* generation count for pagb_tree */ - wait_queue_head_t pagb_wait; /* woken when pagb_gen changes */ - atomic_t pagf_fstrms; /* # of filestreams active in this AG */ spinlock_t pag_ici_lock; /* incore inode cache lock */ @@ -108,21 +89,29 @@ struct xfs_perag { /* background prealloc block trimming */ struct delayed_work pag_blockgc_work; - - /* - * We use xfs_drain to track the number of deferred log intent items - * that have been queued (but not yet processed) so that waiters (e.g. - * scrub) will not lock resources when other threads are in the middle - * of processing a chain of intent items only to find momentary - * inconsistencies. - */ - struct xfs_defer_drain pag_intents_drain; - - /* Hook to feed rmapbt updates to an active online repair. */ - struct xfs_hooks pag_rmap_update_hooks; #endif /* __KERNEL__ */ }; +static inline struct xfs_perag *to_perag(struct xfs_group *xg) +{ + return container_of(xg, struct xfs_perag, pag_group); +} + +static inline struct xfs_group *pag_group(struct xfs_perag *pag) +{ + return &pag->pag_group; +} + +static inline struct xfs_mount *pag_mount(const struct xfs_perag *pag) +{ + return pag->pag_group.xg_mount; +} + +static inline xfs_agnumber_t pag_agno(const struct xfs_perag *pag) +{ + return pag->pag_group.xg_gno; +} + /* * Per-AG operational state. These are atomic flag bits. */ @@ -144,8 +133,8 @@ __XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA) __XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES) __XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET) -int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t old_agcount, - xfs_agnumber_t agcount, xfs_rfsblock_t dcount, +int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t orig_agcount, + xfs_agnumber_t new_agcount, xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi); void xfs_free_perag_range(struct xfs_mount *mp, xfs_agnumber_t first_agno, xfs_agnumber_t end_agno); @@ -153,13 +142,71 @@ int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno); int xfs_update_last_ag_size(struct xfs_mount *mp, xfs_agnumber_t prev_agcount); /* Passive AG references */ -struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); -struct xfs_perag *xfs_perag_hold(struct xfs_perag *pag); -void xfs_perag_put(struct xfs_perag *pag); +static inline struct xfs_perag * +xfs_perag_get( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + return to_perag(xfs_group_get(mp, agno, XG_TYPE_AG)); +} + +static inline struct xfs_perag * +xfs_perag_hold( + struct xfs_perag *pag) +{ + return to_perag(xfs_group_hold(pag_group(pag))); +} + +static inline void +xfs_perag_put( + struct xfs_perag *pag) +{ + xfs_group_put(pag_group(pag)); +} /* Active AG references */ -struct xfs_perag *xfs_perag_grab(struct xfs_mount *, xfs_agnumber_t); -void xfs_perag_rele(struct xfs_perag *pag); +static inline struct xfs_perag * +xfs_perag_grab( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + return to_perag(xfs_group_grab(mp, agno, XG_TYPE_AG)); +} + +static inline void +xfs_perag_rele( + struct xfs_perag *pag) +{ + xfs_group_rele(pag_group(pag)); +} + +static inline struct xfs_perag * +xfs_perag_next_range( + struct xfs_mount *mp, + struct xfs_perag *pag, + xfs_agnumber_t start_agno, + xfs_agnumber_t end_agno) +{ + return to_perag(xfs_group_next_range(mp, pag ? pag_group(pag) : NULL, + start_agno, end_agno, XG_TYPE_AG)); +} + +static inline struct xfs_perag * +xfs_perag_next_from( + struct xfs_mount *mp, + struct xfs_perag *pag, + xfs_agnumber_t start_agno) +{ + return xfs_perag_next_range(mp, pag, start_agno, mp->m_sb.sb_agcount - 1); +} + +static inline struct xfs_perag * +xfs_perag_next( + struct xfs_mount *mp, + struct xfs_perag *pag) +{ + return xfs_perag_next_from(mp, pag, 0); +} /* * Per-ag geometry infomation and validation @@ -171,11 +218,7 @@ void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, static inline bool xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno) { - if (agbno >= pag->block_count) - return false; - if (agbno <= pag->min_block) - return false; - return true; + return xfs_verify_gbno(pag_group(pag), agbno); } static inline bool @@ -184,13 +227,7 @@ xfs_verify_agbext( xfs_agblock_t agbno, xfs_agblock_t len) { - if (agbno + len <= agbno) - return false; - - if (!xfs_verify_agbno(pag, agbno)) - return false; - - return xfs_verify_agbno(pag, agbno + len - 1); + return xfs_verify_gbext(pag_group(pag), agbno, len); } /* @@ -226,40 +263,6 @@ xfs_ag_contains_log(struct xfs_mount *mp, xfs_agnumber_t agno) agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); } -/* - * Perag iteration APIs - */ -static inline struct xfs_perag * -xfs_perag_next( - struct xfs_perag *pag, - xfs_agnumber_t *agno, - xfs_agnumber_t end_agno) -{ - struct xfs_mount *mp = pag->pag_mount; - - *agno = pag->pag_agno + 1; - xfs_perag_rele(pag); - while (*agno <= end_agno) { - pag = xfs_perag_grab(mp, *agno); - if (pag) - return pag; - (*agno)++; - } - return NULL; -} - -#define for_each_perag_range(mp, agno, end_agno, pag) \ - for ((pag) = xfs_perag_grab((mp), (agno)); \ - (pag) != NULL; \ - (pag) = xfs_perag_next((pag), &(agno), (end_agno))) - -#define for_each_perag_from(mp, agno, pag) \ - for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag)) - -#define for_each_perag(mp, agno, pag) \ - (agno) = 0; \ - for_each_perag_from((mp), (agno), (pag)) - static inline struct xfs_perag * xfs_perag_next_wrap( struct xfs_perag *pag, @@ -268,9 +271,9 @@ xfs_perag_next_wrap( xfs_agnumber_t restart_agno, xfs_agnumber_t wrap_agno) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); - *agno = pag->pag_agno + 1; + *agno = pag_agno(pag) + 1; xfs_perag_rele(pag); while (*agno != stop_agno) { if (*agno >= wrap_agno) { @@ -332,4 +335,28 @@ int xfs_ag_extend_space(struct xfs_perag *pag, struct xfs_trans *tp, xfs_extlen_t len); int xfs_ag_get_geometry(struct xfs_perag *pag, struct xfs_ag_geometry *ageo); +static inline xfs_fsblock_t +xfs_agbno_to_fsb( + struct xfs_perag *pag, + xfs_agblock_t agbno) +{ + return XFS_AGB_TO_FSB(pag_mount(pag), pag_agno(pag), agbno); +} + +static inline xfs_daddr_t +xfs_agbno_to_daddr( + struct xfs_perag *pag, + xfs_agblock_t agbno) +{ + return XFS_AGB_TO_DADDR(pag_mount(pag), pag_agno(pag), agbno); +} + +static inline xfs_ino_t +xfs_agino_to_ino( + struct xfs_perag *pag, + xfs_agino_t agino) +{ + return XFS_AGINO_TO_INO(pag_mount(pag), pag_agno(pag), agino); +} + #endif /* __LIBXFS_AG_H */ diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 216423df939e..f5d853089019 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -70,6 +70,7 @@ xfs_ag_resv_critical( struct xfs_perag *pag, enum xfs_ag_resv_type type) { + struct xfs_mount *mp = pag_mount(pag); xfs_extlen_t avail; xfs_extlen_t orig; @@ -92,8 +93,8 @@ xfs_ag_resv_critical( /* Critically low if less than 10% or max btree height remains. */ return XFS_TEST_ERROR(avail < orig / 10 || - avail < pag->pag_mount->m_agbtree_maxlevels, - pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL); + avail < mp->m_agbtree_maxlevels, + mp, XFS_ERRTAG_AG_RESV_CRITICAL); } /* @@ -137,8 +138,8 @@ __xfs_ag_resv_free( trace_xfs_ag_resv_free(pag, type, 0); resv = xfs_perag_resv(pag, type); - if (pag->pag_agno == 0) - pag->pag_mount->m_ag_max_usable += resv->ar_asked; + if (pag_agno(pag) == 0) + pag_mount(pag)->m_ag_max_usable += resv->ar_asked; /* * RMAPBT blocks come from the AGFL and AGFL blocks are always * considered "free", so whatever was reserved at mount time must be @@ -148,7 +149,7 @@ __xfs_ag_resv_free( oldresv = resv->ar_orig_reserved; else oldresv = resv->ar_reserved; - xfs_add_fdblocks(pag->pag_mount, oldresv); + xfs_add_fdblocks(pag_mount(pag), oldresv); resv->ar_reserved = 0; resv->ar_asked = 0; resv->ar_orig_reserved = 0; @@ -170,7 +171,7 @@ __xfs_ag_resv_init( xfs_extlen_t ask, xfs_extlen_t used) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_ag_resv *resv; int error; xfs_extlen_t hidden_space; @@ -206,11 +207,10 @@ __xfs_ag_resv_init( else error = xfs_dec_fdblocks(mp, hidden_space, true); if (error) { - trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, - error, _RET_IP_); + trace_xfs_ag_resv_init_error(pag, error, _RET_IP_); xfs_warn(mp, "Per-AG reservation for AG %u failed. Filesystem may run out of space.", - pag->pag_agno); + pag_agno(pag)); return error; } @@ -220,7 +220,7 @@ __xfs_ag_resv_init( * counter, we only make the adjustment for AG 0. This assumes that * there aren't any AGs hungrier for per-AG reservation than AG 0. */ - if (pag->pag_agno == 0) + if (pag_agno(pag) == 0) mp->m_ag_max_usable -= ask; resv = xfs_perag_resv(pag, type); @@ -238,7 +238,7 @@ xfs_ag_resv_init( struct xfs_perag *pag, struct xfs_trans *tp) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); xfs_extlen_t ask; xfs_extlen_t used; int error = 0, error2; diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 22bdbb3e9980..3d33e17f2e5c 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -275,7 +275,7 @@ xfs_alloc_complain_bad_rec( xfs_warn(mp, "%sbt record corruption in AG %d detected at %pS!", - cur->bc_ops->name, cur->bc_ag.pag->pag_agno, fa); + cur->bc_ops->name, cur->bc_group->xg_gno, fa); xfs_warn(mp, "start block 0x%x block count 0x%x", irec->ar_startblock, irec->ar_blockcount); @@ -303,7 +303,7 @@ xfs_alloc_get_rec( return error; xfs_alloc_btrec_to_irec(rec, &irec); - fa = xfs_alloc_check_irec(cur->bc_ag.pag, &irec); + fa = xfs_alloc_check_irec(to_perag(cur->bc_group), &irec); if (fa) return xfs_alloc_complain_bad_rec(cur, fa, &irec); @@ -331,7 +331,8 @@ xfs_alloc_compute_aligned( bool busy; /* Trim busy sections out of found extent */ - busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen); + busy = xfs_extent_busy_trim(pag_group(args->pag), args->minlen, + args->maxlen, &bno, &len, busy_gen); /* * If we have a largish extent that happens to start before min_agbno, @@ -539,7 +540,7 @@ static int xfs_alloc_fixup_longest( struct xfs_btree_cur *cnt_cur) { - struct xfs_perag *pag = cnt_cur->bc_ag.pag; + struct xfs_perag *pag = to_perag(cnt_cur->bc_group); struct xfs_buf *bp = cnt_cur->bc_ag.agbp; struct xfs_agf *agf = bp->b_addr; xfs_extlen_t longest = 0; @@ -799,7 +800,7 @@ xfs_agfl_verify( * use it by using uncached buffers that don't have the perag attached * so we can detect and avoid this problem. */ - if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) + if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != pag_agno((bp->b_pag))) return __this_address; for (i = 0; i < xfs_agfl_size(mp); i++) { @@ -879,13 +880,12 @@ xfs_alloc_read_agfl( struct xfs_trans *tp, struct xfs_buf **bpp) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_buf *bp; int error; - error = xfs_trans_read_buf( - mp, tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)), + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + XFS_AG_DADDR(mp, pag_agno(pag), XFS_AGFL_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); if (xfs_metadata_is_sick(error)) xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL); @@ -1252,14 +1252,14 @@ xfs_alloc_ag_vextent_small( if (fbno == NULLAGBLOCK) goto out; - xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1, + xfs_extent_busy_reuse(pag_group(args->pag), fbno, 1, (args->datatype & XFS_ALLOC_NOBUSY)); if (args->datatype & XFS_ALLOC_USERDATA) { struct xfs_buf *bp; error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp, - XFS_AGB_TO_DADDR(args->mp, args->agno, fbno), + xfs_agbno_to_daddr(args->pag, fbno), args->mp->m_bsize, 0, &bp); if (error) goto error; @@ -1365,7 +1365,8 @@ xfs_alloc_ag_vextent_exact( */ tbno = fbno; tlen = flen; - xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen); + xfs_extent_busy_trim(pag_group(args->pag), args->minlen, args->maxlen, + &tbno, &tlen, &busy_gen); /* * Give up if the start of the extent is busy, or the freespace isn't @@ -1758,8 +1759,9 @@ restart: * the allocation can be retried. */ trace_xfs_alloc_near_busy(args); - error = xfs_extent_busy_flush(args->tp, args->pag, - acur.busy_gen, alloc_flags); + error = xfs_extent_busy_flush(args->tp, + pag_group(args->pag), acur.busy_gen, + alloc_flags); if (error) goto out; @@ -1874,8 +1876,9 @@ restart: * the allocation can be retried. */ trace_xfs_alloc_size_busy(args); - error = xfs_extent_busy_flush(args->tp, args->pag, - busy_gen, alloc_flags); + error = xfs_extent_busy_flush(args->tp, + pag_group(args->pag), busy_gen, + alloc_flags); if (error) goto error0; @@ -1973,8 +1976,9 @@ restart: * the allocation can be retried. */ trace_xfs_alloc_size_busy(args); - error = xfs_extent_busy_flush(args->tp, args->pag, - busy_gen, alloc_flags); + error = xfs_extent_busy_flush(args->tp, + pag_group(args->pag), busy_gen, + alloc_flags); if (error) goto error0; @@ -2037,7 +2041,6 @@ int xfs_free_ag_extent( struct xfs_trans *tp, struct xfs_buf *agbp, - xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, @@ -2358,19 +2361,19 @@ xfs_free_ag_extent( * Update the freespace totals in the ag and superblock. */ error = xfs_alloc_update_counters(tp, agbp, len); - xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len); + xfs_ag_resv_free_extent(pag, type, tp, len); if (error) goto error0; XFS_STATS_INC(mp, xs_freex); XFS_STATS_ADD(mp, xs_freeb, len); - trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright); + trace_xfs_free_extent(pag, bno, len, type, haveleft, haveright); return 0; error0: - trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1); + trace_xfs_free_extent(pag, bno, len, type, -1, -1); if (bno_cur) xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); if (cnt_cur) @@ -2429,7 +2432,7 @@ xfs_alloc_longest_free_extent( * reservations and AGFL rules in place, we can return this extent. */ if (pag->pagf_longest > delta) - return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable, + return min_t(xfs_extlen_t, pag_mount(pag)->m_ag_max_usable, pag->pagf_longest - delta); /* Otherwise, let the caller try for 1 block if there's space. */ @@ -2612,7 +2615,7 @@ xfs_agfl_reset( xfs_warn(mp, "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. " "Please unmount and run xfs_repair.", - pag->pag_agno, pag->pagf_flcount); + pag_agno(pag), pag->pagf_flcount); agf->agf_flfirst = 0; agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1); @@ -2645,8 +2648,17 @@ xfs_defer_extent_free( ASSERT(!isnullstartblock(bno)); ASSERT(!(free_flags & ~XFS_FREE_EXTENT_ALL_FLAGS)); - if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) - return -EFSCORRUPTED; + if (free_flags & XFS_FREE_EXTENT_REALTIME) { + if (type != XFS_AG_RESV_NONE) { + ASSERT(type == XFS_AG_RESV_NONE); + return -EFSCORRUPTED; + } + if (XFS_IS_CORRUPT(mp, !xfs_verify_rtbext(mp, bno, len))) + return -EFSCORRUPTED; + } else { + if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) + return -EFSCORRUPTED; + } xefi = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); @@ -2655,6 +2667,8 @@ xfs_defer_extent_free( xefi->xefi_agresv = type; if (free_flags & XFS_FREE_EXTENT_SKIP_DISCARD) xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD; + if (free_flags & XFS_FREE_EXTENT_REALTIME) + xefi->xefi_flags |= XFS_EFI_REALTIME; if (oinfo) { ASSERT(oinfo->oi_offset == 0); @@ -2934,9 +2948,8 @@ xfs_alloc_fix_freelist( * Deferring the free disconnects freeing up the AGFL slot from * freeing the block. */ - error = xfs_free_extent_later(tp, - XFS_AGB_TO_FSB(mp, args->agno, bno), 1, - &targs.oinfo, XFS_AG_RESV_AGFL, 0); + error = xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, bno), + 1, &targs.oinfo, XFS_AG_RESV_AGFL, 0); if (error) goto out_agbp_relse; } @@ -3156,8 +3169,6 @@ xfs_alloc_put_freelist( logflags |= XFS_AGF_BTREEBLKS; } - xfs_alloc_log_agf(tp, agbp, logflags); - ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)); agfl_bno = xfs_buf_to_agfl_bno(agflbp); @@ -3190,7 +3201,7 @@ xfs_validate_ag_length( * use it by using uncached buffers that don't have the perag attached * so we can detect and avoid this problem. */ - if (bp->b_pag && seqno != bp->b_pag->pag_agno) + if (bp->b_pag && seqno != pag_agno(bp->b_pag)) return __this_address; /* @@ -3359,13 +3370,13 @@ xfs_read_agf( int flags, struct xfs_buf **agfbpp) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); int error; - trace_xfs_read_agf(pag->pag_mount, pag->pag_agno); + trace_xfs_read_agf(pag); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)), + XFS_AG_DADDR(mp, pag_agno(pag), XFS_AGF_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops); if (xfs_metadata_is_sick(error)) xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF); @@ -3388,12 +3399,13 @@ xfs_alloc_read_agf( int flags, struct xfs_buf **agfbpp) { + struct xfs_mount *mp = pag_mount(pag); struct xfs_buf *agfbp; struct xfs_agf *agf; int error; int allocbt_blks; - trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno); + trace_xfs_alloc_read_agf(pag); /* We don't support trylock when freeing. */ ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) != @@ -3414,7 +3426,7 @@ xfs_alloc_read_agf( pag->pagf_cnt_level = be32_to_cpu(agf->agf_cnt_level); pag->pagf_rmap_level = be32_to_cpu(agf->agf_rmap_level); pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); - if (xfs_agfl_needs_reset(pag->pag_mount, agf)) + if (xfs_agfl_needs_reset(mp, agf)) set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate); else clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate); @@ -3427,16 +3439,15 @@ xfs_alloc_read_agf( * counter only tracks non-root blocks. */ allocbt_blks = pag->pagf_btreeblks; - if (xfs_has_rmapbt(pag->pag_mount)) + if (xfs_has_rmapbt(mp)) allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1; if (allocbt_blks > 0) - atomic64_add(allocbt_blks, - &pag->pag_mount->m_allocbt_blks); + atomic64_add(allocbt_blks, &mp->m_allocbt_blks); set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); } #ifdef DEBUG - else if (!xfs_is_shutdown(pag->pag_mount)) { + else if (!xfs_is_shutdown(mp)) { ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); @@ -3597,7 +3608,7 @@ xfs_alloc_vextent_finish( goto out_drop_perag; } - args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); + args->fsbno = xfs_agbno_to_fsb(args->pag, args->agbno); ASSERT(args->len >= args->minlen); ASSERT(args->len <= args->maxlen); @@ -3618,8 +3629,8 @@ xfs_alloc_vextent_finish( if (error) goto out_drop_perag; - ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno, - args->len)); + ASSERT(!xfs_extent_busy_search(pag_group(args->pag), + args->agbno, args->len)); } xfs_ag_resv_alloc_extent(args->pag, args->resv, args); @@ -3649,21 +3660,20 @@ xfs_alloc_vextent_this_ag( struct xfs_alloc_arg *args, xfs_agnumber_t agno) { - struct xfs_mount *mp = args->mp; xfs_agnumber_t minimum_agno; uint32_t alloc_flags = 0; int error; ASSERT(args->pag != NULL); - ASSERT(args->pag->pag_agno == agno); + ASSERT(pag_agno(args->pag) == agno); args->agno = agno; args->agbno = 0; trace_xfs_alloc_vextent_this_ag(args); - error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0), - &minimum_agno); + error = xfs_alloc_vextent_check_args(args, + xfs_agbno_to_fsb(args->pag, 0), &minimum_agno); if (error) { if (error == -ENOSPC) return 0; @@ -3868,7 +3878,7 @@ xfs_alloc_vextent_exact_bno( int error; ASSERT(args->pag != NULL); - ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target)); + ASSERT(pag_agno(args->pag) == XFS_FSB_TO_AGNO(mp, target)); args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); @@ -3907,7 +3917,7 @@ xfs_alloc_vextent_near_bno( int error; if (!needs_perag) - ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target)); + ASSERT(pag_agno(args->pag) == XFS_FSB_TO_AGNO(mp, target)); args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); @@ -3944,7 +3954,7 @@ xfs_free_extent_fix_freelist( memset(&args, 0, sizeof(struct xfs_alloc_arg)); args.tp = tp; args.mp = tp->t_mountp; - args.agno = pag->pag_agno; + args.agno = pag_agno(pag); args.pag = pag; /* @@ -4012,14 +4022,13 @@ __xfs_free_extent( goto err_release; } - error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo, - type); + error = xfs_free_ag_extent(tp, agbp, agbno, len, oinfo, type); if (error) goto err_release; if (skip_discard) busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD; - xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags); + xfs_extent_busy_insert(tp, pag_group(pag), agbno, len, busy_flags); return 0; err_release: @@ -4044,7 +4053,7 @@ xfs_alloc_query_range_helper( xfs_failaddr_t fa; xfs_alloc_btrec_to_irec(rec, &irec); - fa = xfs_alloc_check_irec(cur->bc_ag.pag, &irec); + fa = xfs_alloc_check_irec(to_perag(cur->bc_group), &irec); if (fa) return xfs_alloc_complain_bad_rec(cur, fa, &irec); diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 0165452e7cd0..50ef79a1ed41 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -79,9 +79,8 @@ int xfs_alloc_put_freelist(struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agfbp, struct xfs_buf *agflbp, xfs_agblock_t bno, int btreeblk); int xfs_free_ag_extent(struct xfs_trans *tp, struct xfs_buf *agbp, - xfs_agnumber_t agno, xfs_agblock_t bno, - xfs_extlen_t len, const struct xfs_owner_info *oinfo, - enum xfs_ag_resv_type type); + xfs_agblock_t bno, xfs_extlen_t len, + const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type); /* * Compute and fill in value of m_alloc_maxlevels. @@ -238,7 +237,11 @@ int xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, /* Don't issue a discard for the blocks freed. */ #define XFS_FREE_EXTENT_SKIP_DISCARD (1U << 0) -#define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD) +/* Free blocks on the realtime device. */ +#define XFS_FREE_EXTENT_REALTIME (1U << 1) + +#define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD | \ + XFS_FREE_EXTENT_REALTIME) /* * List of extents to be free "later". @@ -249,7 +252,7 @@ struct xfs_extent_free_item { uint64_t xefi_owner; xfs_fsblock_t xefi_startblock;/* starting fs block number */ xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ - struct xfs_perag *xefi_pag; + struct xfs_group *xefi_group; unsigned int xefi_flags; enum xfs_ag_resv_type xefi_agresv; }; @@ -258,6 +261,12 @@ struct xfs_extent_free_item { #define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ #define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ #define XFS_EFI_CANCELLED (1U << 3) /* dont actually free the space */ +#define XFS_EFI_REALTIME (1U << 4) /* freeing realtime extent */ + +static inline bool xfs_efi_is_realtime(const struct xfs_extent_free_item *xefi) +{ + return xefi->xefi_flags & XFS_EFI_REALTIME; +} struct xfs_alloc_autoreap { struct xfs_defer_pending *dfp; diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index aada676eee51..a4ac37ba5d51 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -28,7 +28,7 @@ xfs_bnobt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_bnobt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp, - cur->bc_ag.pag); + to_perag(cur->bc_group)); } STATIC struct xfs_btree_cur * @@ -36,29 +36,29 @@ xfs_cntbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_cntbt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp, - cur->bc_ag.pag); + to_perag(cur->bc_group)); } - STATIC void xfs_allocbt_set_root( struct xfs_btree_cur *cur, const union xfs_btree_ptr *ptr, int inc) { - struct xfs_buf *agbp = cur->bc_ag.agbp; - struct xfs_agf *agf = agbp->b_addr; + struct xfs_perag *pag = to_perag(cur->bc_group); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; ASSERT(ptr->s != 0); if (xfs_btree_is_bno(cur->bc_ops)) { agf->agf_bno_root = ptr->s; be32_add_cpu(&agf->agf_bno_level, inc); - cur->bc_ag.pag->pagf_bno_level += inc; + pag->pagf_bno_level += inc; } else { agf->agf_cnt_root = ptr->s; be32_add_cpu(&agf->agf_cnt_level, inc); - cur->bc_ag.pag->pagf_cnt_level += inc; + pag->pagf_cnt_level += inc; } xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); @@ -75,7 +75,7 @@ xfs_allocbt_alloc_block( xfs_agblock_t bno; /* Allocate the new block from the freelist. If we can't, give up. */ - error = xfs_alloc_get_freelist(cur->bc_ag.pag, cur->bc_tp, + error = xfs_alloc_get_freelist(to_perag(cur->bc_group), cur->bc_tp, cur->bc_ag.agbp, &bno, 1); if (error) return error; @@ -86,7 +86,7 @@ xfs_allocbt_alloc_block( } atomic64_inc(&cur->bc_mp->m_allocbt_blks); - xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.pag, bno, 1, false); + xfs_extent_busy_reuse(cur->bc_group, bno, 1, false); new->s = cpu_to_be32(bno); @@ -104,13 +104,13 @@ xfs_allocbt_free_block( int error; bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp)); - error = xfs_alloc_put_freelist(cur->bc_ag.pag, cur->bc_tp, agbp, NULL, - bno, 1); + error = xfs_alloc_put_freelist(to_perag(cur->bc_group), cur->bc_tp, + agbp, NULL, bno, 1); if (error) return error; atomic64_dec(&cur->bc_mp->m_allocbt_blks); - xfs_extent_busy_insert(cur->bc_tp, agbp->b_pag, bno, 1, + xfs_extent_busy_insert(cur->bc_tp, pag_group(agbp->b_pag), bno, 1, XFS_EXTENT_BUSY_SKIP_DISCARD); return 0; } @@ -178,7 +178,7 @@ xfs_allocbt_init_ptr_from_cur( { struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agf->agf_seqno)); if (xfs_btree_is_bno(cur->bc_ops)) ptr->s = agf->agf_bno_root; @@ -492,7 +492,7 @@ xfs_bnobt_init_cursor( cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bnobt_ops, mp->m_alloc_maxlevels, xfs_allocbt_cur_cache); - cur->bc_ag.pag = xfs_perag_hold(pag); + cur->bc_group = xfs_group_hold(pag_group(pag)); cur->bc_ag.agbp = agbp; if (agbp) { struct xfs_agf *agf = agbp->b_addr; @@ -518,7 +518,7 @@ xfs_cntbt_init_cursor( cur = xfs_btree_alloc_cursor(mp, tp, &xfs_cntbt_ops, mp->m_alloc_maxlevels, xfs_allocbt_cur_cache); - cur->bc_ag.pag = xfs_perag_hold(pag); + cur->bc_group = xfs_group_hold(pag_group(pag)); cur->bc_ag.agbp = agbp; if (agbp) { struct xfs_agf *agf = agbp->b_addr; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index c63da14eee04..17875ad865f5 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1004,7 +1004,10 @@ xfs_attr_add_fork( unsigned int blks; /* space reservation */ int error; /* error return value */ - ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); + if (xfs_is_metadir_inode(ip)) + ASSERT(XFS_IS_DQDETACHED(ip)); + else + ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); blks = XFS_ADDAFORK_SPACE_RES(mp); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 36dd08d13293..9052839305e2 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -40,6 +40,7 @@ #include "xfs_bmap_item.h" #include "xfs_symlink_remote.h" #include "xfs_inode_util.h" +#include "xfs_rtgroup.h" struct kmem_cache *xfs_bmap_intent_cache; @@ -1042,7 +1043,10 @@ xfs_bmap_add_attrfork( int error; /* error return value */ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); - ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); + if (xfs_is_metadir_inode(ip)) + ASSERT(XFS_IS_DQDETACHED(ip)); + else + ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); ASSERT(!xfs_inode_has_attr_fork(ip)); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -1423,6 +1427,24 @@ xfs_bmap_last_offset( * Extent tree manipulation functions used during allocation. */ +static inline bool +xfs_bmap_same_rtgroup( + struct xfs_inode *ip, + int whichfork, + struct xfs_bmbt_irec *left, + struct xfs_bmbt_irec *right) +{ + struct xfs_mount *mp = ip->i_mount; + + if (xfs_ifork_is_realtime(ip, whichfork) && xfs_has_rtgroups(mp)) { + if (xfs_rtb_to_rgno(mp, left->br_startblock) != + xfs_rtb_to_rgno(mp, right->br_startblock)) + return false; + } + + return true; +} + /* * Convert a delayed allocation to a real allocation. */ @@ -1492,7 +1514,8 @@ xfs_bmap_add_extent_delay_real( LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && LEFT.br_state == new->br_state && - LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) + LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && + xfs_bmap_same_rtgroup(bma->ip, whichfork, &LEFT, new)) state |= BMAP_LEFT_CONTIG; /* @@ -1516,7 +1539,8 @@ xfs_bmap_add_extent_delay_real( (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING) || LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount - <= XFS_MAX_BMBT_EXTLEN)) + <= XFS_MAX_BMBT_EXTLEN) && + xfs_bmap_same_rtgroup(bma->ip, whichfork, new, &RIGHT)) state |= BMAP_RIGHT_CONTIG; error = 0; @@ -2061,7 +2085,8 @@ xfs_bmap_add_extent_unwritten_real( LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && LEFT.br_state == new->br_state && - LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) + LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && + xfs_bmap_same_rtgroup(ip, whichfork, &LEFT, new)) state |= BMAP_LEFT_CONTIG; /* @@ -2085,7 +2110,8 @@ xfs_bmap_add_extent_unwritten_real( (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING) || LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount - <= XFS_MAX_BMBT_EXTLEN)) + <= XFS_MAX_BMBT_EXTLEN) && + xfs_bmap_same_rtgroup(ip, whichfork, new, &RIGHT)) state |= BMAP_RIGHT_CONTIG; /* @@ -2594,7 +2620,8 @@ xfs_bmap_add_extent_hole_delay( */ if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && left.br_startoff + left.br_blockcount == new->br_startoff && - left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) + left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && + xfs_bmap_same_rtgroup(ip, whichfork, &left, new)) state |= BMAP_LEFT_CONTIG; if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && @@ -2602,7 +2629,8 @@ xfs_bmap_add_extent_hole_delay( new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN && (!(state & BMAP_LEFT_CONTIG) || (left.br_blockcount + new->br_blockcount + - right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))) + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)) && + xfs_bmap_same_rtgroup(ip, whichfork, new, &right)) state |= BMAP_RIGHT_CONTIG; /* @@ -2745,7 +2773,8 @@ xfs_bmap_add_extent_hole_real( left.br_startoff + left.br_blockcount == new->br_startoff && left.br_startblock + left.br_blockcount == new->br_startblock && left.br_state == new->br_state && - left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) + left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && + xfs_bmap_same_rtgroup(ip, whichfork, &left, new)) state |= BMAP_LEFT_CONTIG; if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && @@ -2755,7 +2784,8 @@ xfs_bmap_add_extent_hole_real( new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN && (!(state & BMAP_LEFT_CONTIG) || left.br_blockcount + new->br_blockcount + - right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)) + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN) && + xfs_bmap_same_rtgroup(ip, whichfork, new, &right)) state |= BMAP_RIGHT_CONTIG; error = 0; @@ -3121,8 +3151,15 @@ xfs_bmap_adjacent_valid( struct xfs_mount *mp = ap->ip->i_mount; if (XFS_IS_REALTIME_INODE(ap->ip) && - (ap->datatype & XFS_ALLOC_USERDATA)) - return x < mp->m_sb.sb_rblocks; + (ap->datatype & XFS_ALLOC_USERDATA)) { + if (!xfs_has_rtgroups(mp)) + return x < mp->m_sb.sb_rblocks; + + return xfs_rtb_to_rgno(mp, x) == xfs_rtb_to_rgno(mp, y) && + xfs_rtb_to_rgno(mp, x) < mp->m_sb.sb_rgcount && + xfs_rtb_to_rtx(mp, x) < mp->m_sb.sb_rgextents; + + } return XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && @@ -3280,7 +3317,7 @@ xfs_bmap_longest_free_extent( } longest = xfs_alloc_longest_free_extent(pag, - xfs_alloc_min_freelist(pag->pag_mount, pag), + xfs_alloc_min_freelist(pag_mount(pag), pag), xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE)); if (*blen < longest) *blen = longest; @@ -4091,7 +4128,7 @@ retry: fdblocks = indlen; if (XFS_IS_REALTIME_INODE(ip)) { - error = xfs_dec_frextents(mp, xfs_rtb_to_rtx(mp, alen)); + error = xfs_dec_frextents(mp, xfs_blen_to_rtbxlen(mp, alen)); if (error) goto out_unreserve_quota; } else { @@ -4126,7 +4163,7 @@ retry: out_unreserve_frextents: if (XFS_IS_REALTIME_INODE(ip)) - xfs_add_frextents(mp, xfs_rtb_to_rtx(mp, alen)); + xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, alen)); out_unreserve_quota: if (XFS_IS_QUOTA_ON(mp)) xfs_quota_unreserve_blkres(ip, alen); @@ -5034,7 +5071,7 @@ xfs_bmap_del_extent_delay( fdblocks = da_diff; if (isrt) - xfs_add_frextents(mp, xfs_rtb_to_rtx(mp, del->br_blockcount)); + xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, del->br_blockcount)); else fdblocks += del->br_blockcount; @@ -5113,6 +5150,34 @@ xfs_bmap_del_extent_cow( ip->i_delayed_blks -= del->br_blockcount; } +static int +xfs_bmap_free_rtblocks( + struct xfs_trans *tp, + struct xfs_bmbt_irec *del) +{ + struct xfs_rtgroup *rtg; + int error; + + rtg = xfs_rtgroup_grab(tp->t_mountp, 0); + if (!rtg) + return -EIO; + + /* + * Ensure the bitmap and summary inodes are locked and joined to the + * transaction before modifying them. + */ + if (!(tp->t_flags & XFS_TRANS_RTBITMAP_LOCKED)) { + tp->t_flags |= XFS_TRANS_RTBITMAP_LOCKED; + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP); + xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_BITMAP); + } + + error = xfs_rtfree_blocks(tp, rtg, del->br_startblock, + del->br_blockcount); + xfs_rtgroup_rele(rtg); + return error; +} + /* * Called by xfs_bmapi to update file extent records and the btree * after removing space. @@ -5325,20 +5390,12 @@ xfs_bmap_del_extent_real( * If we need to, add to list of extents to delete. */ if (!(bflags & XFS_BMAPI_REMAP)) { + bool isrt = xfs_ifork_is_realtime(ip, whichfork); + if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { xfs_refcount_decrease_extent(tp, del); - } else if (xfs_ifork_is_realtime(ip, whichfork)) { - /* - * Ensure the bitmap and summary inodes are locked - * and joined to the transaction before modifying them. - */ - if (!(tp->t_flags & XFS_TRANS_RTBITMAP_LOCKED)) { - tp->t_flags |= XFS_TRANS_RTBITMAP_LOCKED; - xfs_rtbitmap_lock(mp); - xfs_rtbitmap_trans_join(tp); - } - error = xfs_rtfree_blocks(tp, del->br_startblock, - del->br_blockcount); + } else if (isrt && !xfs_has_rtgroups(mp)) { + error = xfs_bmap_free_rtblocks(tp, del); } else { unsigned int efi_flags = 0; @@ -5346,6 +5403,19 @@ xfs_bmap_del_extent_real( del->br_state == XFS_EXT_UNWRITTEN) efi_flags |= XFS_FREE_EXTENT_SKIP_DISCARD; + /* + * Historically, we did not use EFIs to free realtime + * extents. However, when reverse mapping is enabled, + * we must maintain the same order of operations as the + * data device, which is: Remove the file mapping, + * remove the reverse mapping, and then free the + * blocks. Reflink for realtime volumes requires the + * same sort of ordering. Both features rely on + * rtgroups, so let's gate rt EFI usage on rtgroups. + */ + if (isrt) + efi_flags |= XFS_FREE_EXTENT_REALTIME; + error = xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, XFS_AG_RESV_NONE, efi_flags); @@ -5694,6 +5764,8 @@ xfs_bunmapi( */ STATIC bool xfs_bmse_can_merge( + struct xfs_inode *ip, + int whichfork, struct xfs_bmbt_irec *left, /* preceding extent */ struct xfs_bmbt_irec *got, /* current extent to shift */ xfs_fileoff_t shift) /* shift fsb */ @@ -5709,7 +5781,8 @@ xfs_bmse_can_merge( if ((left->br_startoff + left->br_blockcount != startoff) || (left->br_startblock + left->br_blockcount != got->br_startblock) || (left->br_state != got->br_state) || - (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN)) + (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN) || + !xfs_bmap_same_rtgroup(ip, whichfork, left, got)) return false; return true; @@ -5745,7 +5818,7 @@ xfs_bmse_merge( blockcount = left->br_blockcount + got->br_blockcount; xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); - ASSERT(xfs_bmse_can_merge(left, got, shift)); + ASSERT(xfs_bmse_can_merge(ip, whichfork, left, got, shift)); new = *left; new.br_blockcount = blockcount; @@ -5907,7 +5980,8 @@ xfs_bmap_collapse_extents( goto del_cursor; } - if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) { + if (xfs_bmse_can_merge(ip, whichfork, &prev, &got, + offset_shift_fsb)) { error = xfs_bmse_merge(tp, ip, whichfork, offset_shift_fsb, &icur, &got, &prev, cur, &logflags); @@ -6043,7 +6117,8 @@ xfs_bmap_insert_extents( * never find mergeable extents in this scenario. Check anyways * and warn if we encounter two extents that could be one. */ - if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb)) + if (xfs_bmse_can_merge(ip, whichfork, &got, &next, + offset_shift_fsb)) WARN_ON_ONCE(1); } diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 7592d46e97c6..4b721d935994 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -248,7 +248,7 @@ struct xfs_bmap_intent { enum xfs_bmap_intent_type bi_type; int bi_whichfork; struct xfs_inode *bi_owner; - struct xfs_perag *bi_pag; + struct xfs_group *bi_group; struct xfs_bmbt_irec bi_bmap; }; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index a5c4af148853..2b5fc5fd1643 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -225,7 +225,7 @@ __xfs_btree_check_agblock( struct xfs_buf *bp) { struct xfs_mount *mp = cur->bc_mp; - struct xfs_perag *pag = cur->bc_ag.pag; + struct xfs_perag *pag = to_perag(cur->bc_group); xfs_failaddr_t fa; xfs_agblock_t agbno; @@ -331,7 +331,7 @@ __xfs_btree_check_ptr( return -EFSCORRUPTED; break; case XFS_BTREE_TYPE_AG: - if (!xfs_verify_agbno(cur->bc_ag.pag, + if (!xfs_verify_agbno(to_perag(cur->bc_group), be32_to_cpu((&ptr->s)[index]))) return -EFSCORRUPTED; break; @@ -372,7 +372,7 @@ xfs_btree_check_ptr( case XFS_BTREE_TYPE_AG: xfs_err(cur->bc_mp, "AG %u: Corrupt %sbt pointer at level %d index %d.", - cur->bc_ag.pag->pag_agno, cur->bc_ops->name, + cur->bc_group->xg_gno, cur->bc_ops->name, level, index); break; } @@ -523,20 +523,8 @@ xfs_btree_del_cursor( ASSERT(!xfs_btree_is_bmap(cur->bc_ops) || cur->bc_bmap.allocated == 0 || xfs_is_shutdown(cur->bc_mp) || error != 0); - switch (cur->bc_ops->type) { - case XFS_BTREE_TYPE_AG: - if (cur->bc_ag.pag) - xfs_perag_put(cur->bc_ag.pag); - break; - case XFS_BTREE_TYPE_INODE: - /* nothing to do */ - break; - case XFS_BTREE_TYPE_MEM: - if (cur->bc_mem.pag) - xfs_perag_put(cur->bc_mem.pag); - break; - } - + if (cur->bc_group) + xfs_group_put(cur->bc_group); kmem_cache_free(cur->bc_cache, cur); } @@ -1017,22 +1005,22 @@ xfs_btree_readahead_agblock( struct xfs_btree_block *block) { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno; + struct xfs_perag *pag = to_perag(cur->bc_group); xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); int rval = 0; if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { xfs_buf_readahead(mp->m_ddev_targp, - XFS_AGB_TO_DADDR(mp, agno, left), - mp->m_bsize, cur->bc_ops->buf_ops); + xfs_agbno_to_daddr(pag, left), mp->m_bsize, + cur->bc_ops->buf_ops); rval++; } if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { xfs_buf_readahead(mp->m_ddev_targp, - XFS_AGB_TO_DADDR(mp, agno, right), - mp->m_bsize, cur->bc_ops->buf_ops); + xfs_agbno_to_daddr(pag, right), mp->m_bsize, + cur->bc_ops->buf_ops); rval++; } @@ -1091,7 +1079,7 @@ xfs_btree_ptr_to_daddr( switch (cur->bc_ops->type) { case XFS_BTREE_TYPE_AG: - *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno, + *daddr = xfs_agbno_to_daddr(to_perag(cur->bc_group), be32_to_cpu(ptr->s)); break; case XFS_BTREE_TYPE_INODE: @@ -1313,7 +1301,7 @@ xfs_btree_owner( case XFS_BTREE_TYPE_INODE: return cur->bc_ino.ip->i_ino; case XFS_BTREE_TYPE_AG: - return cur->bc_ag.pag->pag_agno; + return cur->bc_group->xg_gno; default: ASSERT(0); return 0; @@ -4745,7 +4733,7 @@ xfs_btree_agblock_v5hdr_verify( return __this_address; if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp))) return __this_address; - if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) + if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag_agno(pag)) return __this_address; return NULL; } diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 10b7ddc3b2b3..3b739459ebb0 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -254,6 +254,7 @@ struct xfs_btree_cur union xfs_btree_irec bc_rec; /* current insert/search record value */ uint8_t bc_nlevels; /* number of levels in the tree */ uint8_t bc_maxlevels; /* maximum levels for this btree type */ + struct xfs_group *bc_group; /* per-type information */ union { @@ -264,13 +265,11 @@ struct xfs_btree_cur struct xbtree_ifakeroot *ifake; /* for staging cursor */ } bc_ino; struct { - struct xfs_perag *pag; struct xfs_buf *agbp; struct xbtree_afakeroot *afake; /* for staging cursor */ } bc_ag; struct { struct xfbtree *xfbtree; - struct xfs_perag *pag; } bc_mem; }; diff --git a/fs/xfs/libxfs/xfs_btree_mem.c b/fs/xfs/libxfs/xfs_btree_mem.c index 036061fe32cc..df3d613675a1 100644 --- a/fs/xfs/libxfs/xfs_btree_mem.c +++ b/fs/xfs/libxfs/xfs_btree_mem.c @@ -57,10 +57,8 @@ xfbtree_dup_cursor( ncur->bc_flags = cur->bc_flags; ncur->bc_nlevels = cur->bc_nlevels; ncur->bc_mem.xfbtree = cur->bc_mem.xfbtree; - - if (cur->bc_mem.pag) - ncur->bc_mem.pag = xfs_perag_hold(cur->bc_mem.pag); - + if (cur->bc_group) + ncur->bc_group = xfs_group_hold(cur->bc_group); return ncur; } diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 2cd212ad2c1d..5b377cbbb1f7 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -846,6 +846,12 @@ xfs_defer_add( ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); + if (!ops->finish_item) { + ASSERT(ops->finish_item != NULL); + xfs_force_shutdown(tp->t_mountp, SHUTDOWN_CORRUPT_INCORE); + return NULL; + } + dfp = xfs_defer_find_last(tp, ops); if (!dfp || !xfs_defer_can_append(dfp, ops)) dfp = xfs_defer_alloc(&tp->t_dfops, ops); diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index 8b338031e487..ec51b8465e61 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -71,6 +71,7 @@ extern const struct xfs_defer_op_type xfs_refcount_update_defer_type; extern const struct xfs_defer_op_type xfs_rmap_update_defer_type; extern const struct xfs_defer_op_type xfs_extent_free_defer_type; extern const struct xfs_defer_op_type xfs_agfl_free_defer_type; +extern const struct xfs_defer_op_type xfs_rtextent_free_defer_type; extern const struct xfs_defer_op_type xfs_attr_defer_type; extern const struct xfs_defer_op_type xfs_exchmaps_defer_type; diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 15a362e2f5ea..dceef2abd4e2 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -16,6 +16,9 @@ #include "xfs_trans.h" #include "xfs_qm.h" #include "xfs_error.h" +#include "xfs_health.h" +#include "xfs_metadir.h" +#include "xfs_metafile.h" int xfs_calc_dquots_per_chunk( @@ -323,3 +326,190 @@ xfs_dquot_to_disk_ts( return cpu_to_be32(t); } + +inline unsigned int +xfs_dqinode_sick_mask(xfs_dqtype_t type) +{ + switch (type) { + case XFS_DQTYPE_USER: + return XFS_SICK_FS_UQUOTA; + case XFS_DQTYPE_GROUP: + return XFS_SICK_FS_GQUOTA; + case XFS_DQTYPE_PROJ: + return XFS_SICK_FS_PQUOTA; + } + + ASSERT(0); + return 0; +} + +/* + * Load the inode for a given type of quota, assuming that the sb fields have + * been sorted out. This is not true when switching quota types on a V4 + * filesystem, so do not use this function for that. If metadir is enabled, + * @dp must be the /quota metadir. + * + * Returns -ENOENT if the quota inode field is NULLFSINO; 0 and an inode on + * success; or a negative errno. + */ +int +xfs_dqinode_load( + struct xfs_trans *tp, + struct xfs_inode *dp, + xfs_dqtype_t type, + struct xfs_inode **ipp) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_inode *ip; + enum xfs_metafile_type metafile_type = xfs_dqinode_metafile_type(type); + int error; + + if (!xfs_has_metadir(mp)) { + xfs_ino_t ino; + + switch (type) { + case XFS_DQTYPE_USER: + ino = mp->m_sb.sb_uquotino; + break; + case XFS_DQTYPE_GROUP: + ino = mp->m_sb.sb_gquotino; + break; + case XFS_DQTYPE_PROJ: + ino = mp->m_sb.sb_pquotino; + break; + default: + ASSERT(0); + return -EFSCORRUPTED; + } + + /* Should have set 0 to NULLFSINO when loading superblock */ + if (ino == NULLFSINO) + return -ENOENT; + + error = xfs_trans_metafile_iget(tp, ino, metafile_type, &ip); + } else { + error = xfs_metadir_load(tp, dp, xfs_dqinode_path(type), + metafile_type, &ip); + if (error == -ENOENT) + return error; + } + if (error) { + if (xfs_metadata_is_sick(error)) + xfs_fs_mark_sick(mp, xfs_dqinode_sick_mask(type)); + return error; + } + + if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && + ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) { + xfs_irele(ip); + xfs_fs_mark_sick(mp, xfs_dqinode_sick_mask(type)); + return -EFSCORRUPTED; + } + + if (XFS_IS_CORRUPT(mp, ip->i_projid != 0)) { + xfs_irele(ip); + xfs_fs_mark_sick(mp, xfs_dqinode_sick_mask(type)); + return -EFSCORRUPTED; + } + + *ipp = ip; + return 0; +} + +/* Create a metadata directory quota inode. */ +int +xfs_dqinode_metadir_create( + struct xfs_inode *dp, + xfs_dqtype_t type, + struct xfs_inode **ipp) +{ + struct xfs_metadir_update upd = { + .dp = dp, + .metafile_type = xfs_dqinode_metafile_type(type), + .path = xfs_dqinode_path(type), + }; + int error; + + error = xfs_metadir_start_create(&upd); + if (error) + return error; + + error = xfs_metadir_create(&upd, S_IFREG); + if (error) + return error; + + xfs_trans_log_inode(upd.tp, upd.ip, XFS_ILOG_CORE); + + error = xfs_metadir_commit(&upd); + if (error) + return error; + + xfs_finish_inode_setup(upd.ip); + *ipp = upd.ip; + return 0; +} + +#ifndef __KERNEL__ +/* Link a metadata directory quota inode. */ +int +xfs_dqinode_metadir_link( + struct xfs_inode *dp, + xfs_dqtype_t type, + struct xfs_inode *ip) +{ + struct xfs_metadir_update upd = { + .dp = dp, + .metafile_type = xfs_dqinode_metafile_type(type), + .path = xfs_dqinode_path(type), + .ip = ip, + }; + int error; + + error = xfs_metadir_start_link(&upd); + if (error) + return error; + + error = xfs_metadir_link(&upd); + if (error) + return error; + + xfs_trans_log_inode(upd.tp, upd.ip, XFS_ILOG_CORE); + + return xfs_metadir_commit(&upd); +} +#endif /* __KERNEL__ */ + +/* Create the parent directory for all quota inodes and load it. */ +int +xfs_dqinode_mkdir_parent( + struct xfs_mount *mp, + struct xfs_inode **dpp) +{ + if (!mp->m_metadirip) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); + return -EFSCORRUPTED; + } + + return xfs_metadir_mkdir(mp->m_metadirip, "quota", dpp); +} + +/* + * Load the parent directory of all quota inodes. Pass the inode to the caller + * because quota functions (e.g. QUOTARM) can be called on the quota files even + * if quotas are not enabled. + */ +int +xfs_dqinode_load_parent( + struct xfs_trans *tp, + struct xfs_inode **dpp) +{ + struct xfs_mount *mp = tp->t_mountp; + + if (!mp->m_metadirip) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); + return -EFSCORRUPTED; + } + + return xfs_metadir_load(tp, mp->m_metadirip, "quota", XFS_METAFILE_DIR, + dpp); +} diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index e1bfee0c3b1a..4d47a3e723aa 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -174,6 +174,14 @@ typedef struct xfs_sb { xfs_lsn_t sb_lsn; /* last write sequence */ uuid_t sb_meta_uuid; /* metadata file system unique id */ + xfs_ino_t sb_metadirino; /* metadata directory tree root */ + + xfs_rgnumber_t sb_rgcount; /* number of realtime groups */ + xfs_rtxlen_t sb_rgextents; /* size of a realtime group in rtx */ + + uint8_t sb_rgblklog; /* rt group number shift */ + uint8_t sb_pad[7]; /* zeroes */ + /* must be padded to 64 bit alignment */ } xfs_sb_t; @@ -259,7 +267,19 @@ struct xfs_dsb { __be64 sb_lsn; /* last write sequence */ uuid_t sb_meta_uuid; /* metadata file system unique id */ - /* must be padded to 64 bit alignment */ + __be64 sb_metadirino; /* metadata directory tree root */ + __be32 sb_rgcount; /* # of realtime groups */ + __be32 sb_rgextents; /* size of rtgroup in rtx */ + + __u8 sb_rgblklog; /* rt group number shift */ + __u8 sb_pad[7]; /* zeroes */ + + /* + * The size of this structure must be padded to 64 bit alignment. + * + * NOTE: Don't forget to update secondary_sb_whack in xfs_repair when + * adding new fields here. + */ }; #define XFS_SB_CRC_OFF offsetof(struct xfs_dsb, sb_crc) @@ -278,7 +298,7 @@ struct xfs_dsb { #define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) -static inline bool xfs_sb_is_v5(struct xfs_sb *sbp) +static inline bool xfs_sb_is_v5(const struct xfs_sb *sbp) { return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; } @@ -287,12 +307,12 @@ static inline bool xfs_sb_is_v5(struct xfs_sb *sbp) * Detect a mismatched features2 field. Older kernels read/wrote * this into the wrong slot, so to be safe we keep them in sync. */ -static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp) +static inline bool xfs_sb_has_mismatched_features2(const struct xfs_sb *sbp) { return sbp->sb_bad_features2 != sbp->sb_features2; } -static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp) +static inline bool xfs_sb_version_hasmorebits(const struct xfs_sb *sbp) { return xfs_sb_is_v5(sbp) || (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); @@ -342,8 +362,8 @@ static inline void xfs_sb_version_addprojid32(struct xfs_sb *sbp) #define XFS_SB_FEAT_COMPAT_UNKNOWN ~XFS_SB_FEAT_COMPAT_ALL static inline bool xfs_sb_has_compat_feature( - struct xfs_sb *sbp, - uint32_t feature) + const struct xfs_sb *sbp, + uint32_t feature) { return (sbp->sb_features_compat & feature) != 0; } @@ -360,8 +380,8 @@ xfs_sb_has_compat_feature( #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL static inline bool xfs_sb_has_ro_compat_feature( - struct xfs_sb *sbp, - uint32_t feature) + const struct xfs_sb *sbp, + uint32_t feature) { return (sbp->sb_features_ro_compat & feature) != 0; } @@ -374,6 +394,7 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */ #define XFS_SB_FEAT_INCOMPAT_EXCHRANGE (1 << 6) /* exchangerange supported */ #define XFS_SB_FEAT_INCOMPAT_PARENT (1 << 7) /* parent pointers */ +#define XFS_SB_FEAT_INCOMPAT_METADIR (1 << 8) /* metadata dir tree */ #define XFS_SB_FEAT_INCOMPAT_ALL \ (XFS_SB_FEAT_INCOMPAT_FTYPE | \ XFS_SB_FEAT_INCOMPAT_SPINODES | \ @@ -382,13 +403,14 @@ xfs_sb_has_ro_compat_feature( XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR | \ XFS_SB_FEAT_INCOMPAT_NREXT64 | \ XFS_SB_FEAT_INCOMPAT_EXCHRANGE | \ - XFS_SB_FEAT_INCOMPAT_PARENT) + XFS_SB_FEAT_INCOMPAT_PARENT | \ + XFS_SB_FEAT_INCOMPAT_METADIR) #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL static inline bool xfs_sb_has_incompat_feature( - struct xfs_sb *sbp, - uint32_t feature) + const struct xfs_sb *sbp, + uint32_t feature) { return (sbp->sb_features_incompat & feature) != 0; } @@ -399,8 +421,8 @@ xfs_sb_has_incompat_feature( #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL static inline bool xfs_sb_has_incompat_log_feature( - struct xfs_sb *sbp, - uint32_t feature) + const struct xfs_sb *sbp, + uint32_t feature) { return (sbp->sb_features_log_incompat & feature) != 0; } @@ -420,7 +442,7 @@ xfs_sb_add_incompat_log_features( sbp->sb_features_log_incompat |= features; } -static inline bool xfs_sb_version_haslogxattrs(struct xfs_sb *sbp) +static inline bool xfs_sb_version_haslogxattrs(const struct xfs_sb *sbp) { return xfs_sb_is_v5(sbp) && (sbp->sb_features_log_incompat & XFS_SB_FEAT_INCOMPAT_LOG_XATTRS); @@ -694,21 +716,58 @@ struct xfs_agfl { /* * Realtime bitmap information is accessed by the word, which is currently - * stored in host-endian format. + * stored in host-endian format. Starting with the realtime groups feature, + * the words are stored in be32 ondisk. */ union xfs_rtword_raw { __u32 old; + __be32 rtg; }; /* * Realtime summary counts are accessed by the word, which is currently - * stored in host-endian format. + * stored in host-endian format. Starting with the realtime groups feature, + * the words are stored in be32 ondisk. */ union xfs_suminfo_raw { __u32 old; + __be32 rtg; }; /* + * Realtime allocation groups break the rt section into multiple pieces that + * could be locked independently. Realtime block group numbers are 32-bit + * quantities. Block numbers within a group are also 32-bit quantities, but + * the upper bit must never be set. rtgroup 0 might have a superblock in it, + * so the minimum size of an rtgroup is 2 rtx. + */ +#define XFS_MAX_RGBLOCKS ((xfs_rgblock_t)(1U << 31) - 1) +#define XFS_MIN_RGEXTENTS ((xfs_rtxlen_t)2) +#define XFS_MAX_RGNUMBER ((xfs_rgnumber_t)(-1U)) + +#define XFS_RTSB_MAGIC 0x46726F67 /* 'Frog' */ + +/* + * Realtime superblock - on disk version. Must be padded to 64 bit alignment. + * The first block of the realtime volume contains this superblock. + */ +struct xfs_rtsb { + __be32 rsb_magicnum; /* magic number == XFS_RTSB_MAGIC */ + __le32 rsb_crc; /* superblock crc */ + + __be32 rsb_pad; /* zero */ + unsigned char rsb_fname[XFSLABEL_MAX]; /* file system name */ + + uuid_t rsb_uuid; /* user-visible file system unique id */ + uuid_t rsb_meta_uuid; /* metadata file system unique id */ + + /* must be padded to 64 bit alignment */ +}; + +#define XFS_RTSB_CRC_OFF offsetof(struct xfs_rtsb, rsb_crc) +#define XFS_RTSB_DADDR ((xfs_daddr_t)0) /* daddr in rt section */ + +/* * XFS Timestamps * ============== * @@ -790,6 +849,27 @@ static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds) return (time64_t)ondisk_seconds - XFS_BIGTIME_EPOCH_OFFSET; } +enum xfs_metafile_type { + XFS_METAFILE_UNKNOWN, /* unknown */ + XFS_METAFILE_DIR, /* metadir directory */ + XFS_METAFILE_USRQUOTA, /* user quota */ + XFS_METAFILE_GRPQUOTA, /* group quota */ + XFS_METAFILE_PRJQUOTA, /* project quota */ + XFS_METAFILE_RTBITMAP, /* rt bitmap */ + XFS_METAFILE_RTSUMMARY, /* rt summary */ + + XFS_METAFILE_MAX +} __packed; + +#define XFS_METAFILE_TYPE_STR \ + { XFS_METAFILE_UNKNOWN, "unknown" }, \ + { XFS_METAFILE_DIR, "dir" }, \ + { XFS_METAFILE_USRQUOTA, "usrquota" }, \ + { XFS_METAFILE_GRPQUOTA, "grpquota" }, \ + { XFS_METAFILE_PRJQUOTA, "prjquota" }, \ + { XFS_METAFILE_RTBITMAP, "rtbitmap" }, \ + { XFS_METAFILE_RTSUMMARY, "rtsummary" } + /* * On-disk inode structure. * @@ -812,7 +892,7 @@ struct xfs_dinode { __be16 di_mode; /* mode and type of file */ __u8 di_version; /* inode version */ __u8 di_format; /* format of di_c data */ - __be16 di_onlink; /* old number of links to file */ + __be16 di_metatype; /* XFS_METAFILE_*; was di_onlink */ __be32 di_uid; /* owner's user id */ __be32 di_gid; /* owner's group id */ __be32 di_nlink; /* number of links to file */ @@ -1088,21 +1168,60 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) * Values for di_flags2 These start by being exposed to userspace in the upper * 16 bits of the XFS_XFLAG_s range. */ -#define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */ -#define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */ -#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */ -#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */ -#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */ +/* use DAX for this inode */ +#define XFS_DIFLAG2_DAX_BIT 0 + +/* file's blocks may be shared */ +#define XFS_DIFLAG2_REFLINK_BIT 1 -#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) -#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) -#define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT) -#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT) -#define XFS_DIFLAG2_NREXT64 (1 << XFS_DIFLAG2_NREXT64_BIT) +/* copy on write extent size hint */ +#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 + +/* big timestamps */ +#define XFS_DIFLAG2_BIGTIME_BIT 3 + +/* large extent counters */ +#define XFS_DIFLAG2_NREXT64_BIT 4 + +/* + * The inode contains filesystem metadata and can be found through the metadata + * directory tree. Metadata inodes must satisfy the following constraints: + * + * - V5 filesystem (and ftype) are enabled; + * - The only valid modes are regular files and directories; + * - The access bits must be zero; + * - DMAPI event and state masks are zero; + * - The user and group IDs must be zero; + * - The project ID can be used as a u32 annotation; + * - The immutable, sync, noatime, nodump, nodefrag flags must be set. + * - The dax flag must not be set. + * - Directories must have nosymlinks set. + * + * These requirements are chosen defensively to minimize the ability of + * userspace to read or modify the contents, should a metadata file ever + * escape to userspace. + * + * There are further constraints on the directory tree itself: + * + * - Metadata inodes must never be resolvable through the root directory; + * - They must never be accessed by userspace; + * - Metadata directory entries must have correct ftype. + * + * Superblock-rooted metadata files must have the METADATA iflag set even + * though they do not have a parent directory. + */ +#define XFS_DIFLAG2_METADATA_BIT 5 + +#define XFS_DIFLAG2_DAX (1ULL << XFS_DIFLAG2_DAX_BIT) +#define XFS_DIFLAG2_REFLINK (1ULL << XFS_DIFLAG2_REFLINK_BIT) +#define XFS_DIFLAG2_COWEXTSIZE (1ULL << XFS_DIFLAG2_COWEXTSIZE_BIT) +#define XFS_DIFLAG2_BIGTIME (1ULL << XFS_DIFLAG2_BIGTIME_BIT) +#define XFS_DIFLAG2_NREXT64 (1ULL << XFS_DIFLAG2_NREXT64_BIT) +#define XFS_DIFLAG2_METADATA (1ULL << XFS_DIFLAG2_METADATA_BIT) #define XFS_DIFLAG2_ANY \ (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ - XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64) + XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADATA) static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip) { @@ -1117,6 +1236,12 @@ static inline bool xfs_dinode_has_large_extent_counts( (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_NREXT64)); } +static inline bool xfs_dinode_is_metadir(const struct xfs_dinode *dip) +{ + return dip->di_version >= 3 && + (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADATA)); +} + /* * Inode number format: * low inopblog bits - offset in block @@ -1165,6 +1290,24 @@ static inline bool xfs_dinode_has_large_extent_counts( #define XFS_MIN_RTEXTSIZE (4 * 1024) /* 4kB */ /* + * RT bit manipulation macros. + */ +#define XFS_RTBITMAP_MAGIC 0x424D505A /* BMPZ */ +#define XFS_RTSUMMARY_MAGIC 0x53554D59 /* SUMY */ + +struct xfs_rtbuf_blkinfo { + __be32 rt_magic; /* validity check on block */ + __be32 rt_crc; /* CRC of block */ + __be64 rt_owner; /* inode that owns the block */ + __be64 rt_blkno; /* first block of the buffer */ + __be64 rt_lsn; /* sequence number of last write */ + uuid_t rt_uuid; /* filesystem we belong to */ +}; + +#define XFS_RTBUF_CRC_OFF \ + offsetof(struct xfs_rtbuf_blkinfo, rt_crc) + +/* * Dquot and dquot block format definitions */ #define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */ diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 860284064c5a..41ce4d3d650e 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -187,7 +187,9 @@ struct xfs_fsop_geom { __u32 logsunit; /* log stripe unit, bytes */ uint32_t sick; /* o: unhealthy fs & rt metadata */ uint32_t checked; /* o: checked fs & rt metadata */ - __u64 reserved[17]; /* reserved space */ + __u32 rgextents; /* rt extents in a realtime group */ + __u32 rgcount; /* number of realtime groups */ + __u64 reserved[16]; /* reserved space */ }; #define XFS_FSOP_GEOM_SICK_COUNTERS (1 << 0) /* summary counters */ @@ -198,6 +200,8 @@ struct xfs_fsop_geom { #define XFS_FSOP_GEOM_SICK_RT_SUMMARY (1 << 5) /* realtime summary */ #define XFS_FSOP_GEOM_SICK_QUOTACHECK (1 << 6) /* quota counts */ #define XFS_FSOP_GEOM_SICK_NLINKS (1 << 7) /* inode link counts */ +#define XFS_FSOP_GEOM_SICK_METADIR (1 << 8) /* metadata directory */ +#define XFS_FSOP_GEOM_SICK_METAPATH (1 << 9) /* metadir tree path */ /* Output for XFS_FS_COUNTS */ typedef struct xfs_fsop_counts { @@ -242,6 +246,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_NREXT64 (1 << 23) /* large extent counters */ #define XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE (1 << 24) /* exchange range */ #define XFS_FSOP_GEOM_FLAGS_PARENT (1 << 25) /* linux parent pointers */ +#define XFS_FSOP_GEOM_FLAGS_METADIR (1 << 26) /* metadata directories */ /* * Minimum and maximum sizes need for growth checks. @@ -489,9 +494,17 @@ struct xfs_bulk_ireq { */ #define XFS_BULK_IREQ_NREXT64 (1U << 2) +/* + * Allow bulkstat to return information about metadata directories. This + * enables xfs_scrub to find them for scanning, as they are otherwise ordinary + * directories. + */ +#define XFS_BULK_IREQ_METADIR (1U << 3) + #define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \ XFS_BULK_IREQ_SPECIAL | \ - XFS_BULK_IREQ_NREXT64) + XFS_BULK_IREQ_NREXT64 | \ + XFS_BULK_IREQ_METADIR) /* Operate on the root directory inode. */ #define XFS_BULK_IREQ_SPECIAL_ROOT (1) @@ -722,9 +735,11 @@ struct xfs_scrub_metadata { #define XFS_SCRUB_TYPE_NLINKS 26 /* inode link counts */ #define XFS_SCRUB_TYPE_HEALTHY 27 /* everything checked out ok */ #define XFS_SCRUB_TYPE_DIRTREE 28 /* directory tree structure */ +#define XFS_SCRUB_TYPE_METAPATH 29 /* metadata directory tree paths */ +#define XFS_SCRUB_TYPE_RGSUPER 30 /* realtime superblock */ /* Number of scrub subcommands. */ -#define XFS_SCRUB_TYPE_NR 29 +#define XFS_SCRUB_TYPE_NR 31 /* * This special type code only applies to the vectored scrub implementation. @@ -803,6 +818,22 @@ struct xfs_scrub_vec_head { #define XFS_SCRUB_VEC_FLAGS_ALL (0) /* + * i: sm_ino values for XFS_SCRUB_TYPE_METAPATH to select a metadata file for + * path checking. + */ +#define XFS_SCRUB_METAPATH_PROBE (0) /* do we have a metapath scrubber? */ +#define XFS_SCRUB_METAPATH_RTDIR (1) /* rtrgroups metadir */ +#define XFS_SCRUB_METAPATH_RTBITMAP (2) /* per-rtg bitmap */ +#define XFS_SCRUB_METAPATH_RTSUMMARY (3) /* per-rtg summary */ +#define XFS_SCRUB_METAPATH_QUOTADIR (4) /* quota metadir */ +#define XFS_SCRUB_METAPATH_USRQUOTA (5) /* user quota */ +#define XFS_SCRUB_METAPATH_GRPQUOTA (6) /* group quota */ +#define XFS_SCRUB_METAPATH_PRJQUOTA (7) /* project quota */ + +/* Number of metapath sm_ino values */ +#define XFS_SCRUB_METAPATH_NR (8) + +/* * ioctl limits */ #ifdef XATTR_LIST_MAX @@ -949,6 +980,21 @@ struct xfs_getparents_by_handle { }; /* + * Output for XFS_IOC_RTGROUP_GEOMETRY + */ +struct xfs_rtgroup_geometry { + __u32 rg_number; /* i/o: rtgroup number */ + __u32 rg_length; /* o: length in blocks */ + __u32 rg_sick; /* o: sick things in ag */ + __u32 rg_checked; /* o: checked metadata in ag */ + __u32 rg_flags; /* i/o: flags for this ag */ + __u32 rg_reserved[27]; /* o: zero */ +}; +#define XFS_RTGROUP_GEOM_SICK_SUPER (1U << 0) /* superblock */ +#define XFS_RTGROUP_GEOM_SICK_BITMAP (1U << 1) /* rtbitmap */ +#define XFS_RTGROUP_GEOM_SICK_SUMMARY (1U << 2) /* rtsummary */ + +/* * ioctl commands that are used by Linux filesystems */ #define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS @@ -986,6 +1032,7 @@ struct xfs_getparents_by_handle { #define XFS_IOC_GETPARENTS _IOWR('X', 62, struct xfs_getparents) #define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle) #define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head) +#define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 65, struct xfs_rtgroup_geometry) /* * ioctl commands that replace IRIX syssgi()'s diff --git a/fs/xfs/libxfs/xfs_group.c b/fs/xfs/libxfs/xfs_group.c new file mode 100644 index 000000000000..e9d76bcdc820 --- /dev/null +++ b/fs/xfs/libxfs/xfs_group.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018 Red Hat, Inc. + */ + +#include "xfs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_extent_busy.h" +#include "xfs_group.h" + +/* + * Groups can have passive and active references. + * + * For passive references the code freeing a group is responsible for cleaning + * up objects that hold the passive references (e.g. cached buffers). + * Routines manipulating passive references are xfs_group_get, xfs_group_hold + * and xfs_group_put. + * + * Active references are for short term access to the group for walking trees or + * accessing state. If a group is being shrunk or offlined, the lookup will fail + * to find that group and return NULL instead. + * Routines manipulating active references are xfs_group_grab and + * xfs_group_rele. + */ + +struct xfs_group * +xfs_group_get( + struct xfs_mount *mp, + uint32_t index, + enum xfs_group_type type) +{ + struct xfs_group *xg; + + rcu_read_lock(); + xg = xa_load(&mp->m_groups[type].xa, index); + if (xg) { + trace_xfs_group_get(xg, _RET_IP_); + ASSERT(atomic_read(&xg->xg_ref) >= 0); + atomic_inc(&xg->xg_ref); + } + rcu_read_unlock(); + return xg; +} + +struct xfs_group * +xfs_group_hold( + struct xfs_group *xg) +{ + ASSERT(atomic_read(&xg->xg_ref) > 0 || + atomic_read(&xg->xg_active_ref) > 0); + + trace_xfs_group_hold(xg, _RET_IP_); + atomic_inc(&xg->xg_ref); + return xg; +} + +void +xfs_group_put( + struct xfs_group *xg) +{ + trace_xfs_group_put(xg, _RET_IP_); + + ASSERT(atomic_read(&xg->xg_ref) > 0); + atomic_dec(&xg->xg_ref); +} + +struct xfs_group * +xfs_group_grab( + struct xfs_mount *mp, + uint32_t index, + enum xfs_group_type type) +{ + struct xfs_group *xg; + + rcu_read_lock(); + xg = xa_load(&mp->m_groups[type].xa, index); + if (xg) { + trace_xfs_group_grab(xg, _RET_IP_); + if (!atomic_inc_not_zero(&xg->xg_active_ref)) + xg = NULL; + } + rcu_read_unlock(); + return xg; +} + +/* + * Iterate to the next group. To start the iteration at @start_index, a %NULL + * @xg is passed, else the previous group returned from this function. The + * caller should break out of the loop when this returns %NULL. If the caller + * wants to break out of a loop that did not finish it needs to release the + * active reference to @xg using xfs_group_rele() itself. + */ +struct xfs_group * +xfs_group_next_range( + struct xfs_mount *mp, + struct xfs_group *xg, + uint32_t start_index, + uint32_t end_index, + enum xfs_group_type type) +{ + uint32_t index = start_index; + + if (xg) { + index = xg->xg_gno + 1; + xfs_group_rele(xg); + } + if (index > end_index) + return NULL; + return xfs_group_grab(mp, index, type); +} + +/* + * Find the next group after @xg, or the first group if @xg is NULL. + */ +struct xfs_group * +xfs_group_grab_next_mark( + struct xfs_mount *mp, + struct xfs_group *xg, + xa_mark_t mark, + enum xfs_group_type type) +{ + unsigned long index = 0; + + if (xg) { + index = xg->xg_gno + 1; + xfs_group_rele(xg); + } + + rcu_read_lock(); + xg = xa_find(&mp->m_groups[type].xa, &index, ULONG_MAX, mark); + if (xg) { + trace_xfs_group_grab_next_tag(xg, _RET_IP_); + if (!atomic_inc_not_zero(&xg->xg_active_ref)) + xg = NULL; + } + rcu_read_unlock(); + return xg; +} + +void +xfs_group_rele( + struct xfs_group *xg) +{ + trace_xfs_group_rele(xg, _RET_IP_); + atomic_dec(&xg->xg_active_ref); +} + +void +xfs_group_free( + struct xfs_mount *mp, + uint32_t index, + enum xfs_group_type type, + void (*uninit)(struct xfs_group *xg)) +{ + struct xfs_group *xg = xa_erase(&mp->m_groups[type].xa, index); + + XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_ref) != 0); + + xfs_defer_drain_free(&xg->xg_intents_drain); +#ifdef __KERNEL__ + kfree(xg->xg_busy_extents); +#endif + + if (uninit) + uninit(xg); + + /* drop the mount's active reference */ + xfs_group_rele(xg); + XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_active_ref) != 0); + kfree_rcu_mightsleep(xg); +} + +int +xfs_group_insert( + struct xfs_mount *mp, + struct xfs_group *xg, + uint32_t index, + enum xfs_group_type type) +{ + int error; + + xg->xg_mount = mp; + xg->xg_gno = index; + xg->xg_type = type; + +#ifdef __KERNEL__ + xg->xg_busy_extents = xfs_extent_busy_alloc(); + if (!xg->xg_busy_extents) + return -ENOMEM; + spin_lock_init(&xg->xg_state_lock); + xfs_hooks_init(&xg->xg_rmap_update_hooks); +#endif + xfs_defer_drain_init(&xg->xg_intents_drain); + + /* Active ref owned by mount indicates group is online. */ + atomic_set(&xg->xg_active_ref, 1); + + error = xa_insert(&mp->m_groups[type].xa, index, xg, GFP_KERNEL); + if (error) { + WARN_ON_ONCE(error == -EBUSY); + goto out_drain; + } + + return 0; +out_drain: + xfs_defer_drain_free(&xg->xg_intents_drain); +#ifdef __KERNEL__ + kfree(xg->xg_busy_extents); +#endif + return error; +} + +struct xfs_group * +xfs_group_get_by_fsb( + struct xfs_mount *mp, + xfs_fsblock_t fsbno, + enum xfs_group_type type) +{ + return xfs_group_get(mp, xfs_fsb_to_gno(mp, fsbno, type), type); +} diff --git a/fs/xfs/libxfs/xfs_group.h b/fs/xfs/libxfs/xfs_group.h new file mode 100644 index 000000000000..242b05627c7a --- /dev/null +++ b/fs/xfs/libxfs/xfs_group.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2018 Red Hat, Inc. + */ +#ifndef __LIBXFS_GROUP_H +#define __LIBXFS_GROUP_H 1 + +struct xfs_group { + struct xfs_mount *xg_mount; + uint32_t xg_gno; + enum xfs_group_type xg_type; + atomic_t xg_ref; /* passive reference count */ + atomic_t xg_active_ref; /* active reference count */ + + /* Precalculated geometry info */ + uint32_t xg_block_count; /* max usable gbno */ + uint32_t xg_min_gbno; /* min usable gbno */ + +#ifdef __KERNEL__ + /* -- kernel only structures below this line -- */ + + /* + * Track freed but not yet committed extents. + */ + struct xfs_extent_busy_tree *xg_busy_extents; + + /* + * Bitsets of per-ag metadata that have been checked and/or are sick. + * Callers should hold xg_state_lock before accessing this field. + */ + uint16_t xg_checked; + uint16_t xg_sick; + spinlock_t xg_state_lock; + + /* + * We use xfs_drain to track the number of deferred log intent items + * that have been queued (but not yet processed) so that waiters (e.g. + * scrub) will not lock resources when other threads are in the middle + * of processing a chain of intent items only to find momentary + * inconsistencies. + */ + struct xfs_defer_drain xg_intents_drain; + + /* + * Hook to feed rmapbt updates to an active online repair. + */ + struct xfs_hooks xg_rmap_update_hooks; +#endif /* __KERNEL__ */ +}; + +struct xfs_group *xfs_group_get(struct xfs_mount *mp, uint32_t index, + enum xfs_group_type type); +struct xfs_group *xfs_group_get_by_fsb(struct xfs_mount *mp, + xfs_fsblock_t fsbno, enum xfs_group_type type); +struct xfs_group *xfs_group_hold(struct xfs_group *xg); +void xfs_group_put(struct xfs_group *xg); + +struct xfs_group *xfs_group_grab(struct xfs_mount *mp, uint32_t index, + enum xfs_group_type type); +struct xfs_group *xfs_group_next_range(struct xfs_mount *mp, + struct xfs_group *xg, uint32_t start_index, uint32_t end_index, + enum xfs_group_type type); +struct xfs_group *xfs_group_grab_next_mark(struct xfs_mount *mp, + struct xfs_group *xg, xa_mark_t mark, enum xfs_group_type type); +void xfs_group_rele(struct xfs_group *xg); + +void xfs_group_free(struct xfs_mount *mp, uint32_t index, + enum xfs_group_type type, void (*uninit)(struct xfs_group *xg)); +int xfs_group_insert(struct xfs_mount *mp, struct xfs_group *xg, + uint32_t index, enum xfs_group_type); + +#define xfs_group_set_mark(_xg, _mark) \ + xa_set_mark(&(_xg)->xg_mount->m_groups[(_xg)->xg_type].xa, \ + (_xg)->xg_gno, (_mark)) +#define xfs_group_clear_mark(_xg, _mark) \ + xa_clear_mark(&(_xg)->xg_mount->m_groups[(_xg)->xg_type].xa, \ + (_xg)->xg_gno, (_mark)) +#define xfs_group_marked(_mp, _type, _mark) \ + xa_marked(&(_mp)->m_groups[(_type)].xa, (_mark)) + +static inline xfs_agblock_t +xfs_group_max_blocks( + struct xfs_group *xg) +{ + return xg->xg_mount->m_groups[xg->xg_type].blocks; +} + +static inline xfs_fsblock_t +xfs_group_start_fsb( + struct xfs_group *xg) +{ + return ((xfs_fsblock_t)xg->xg_gno) << + xg->xg_mount->m_groups[xg->xg_type].blklog; +} + +static inline xfs_fsblock_t +xfs_gbno_to_fsb( + struct xfs_group *xg, + xfs_agblock_t gbno) +{ + return xfs_group_start_fsb(xg) | gbno; +} + +static inline xfs_daddr_t +xfs_gbno_to_daddr( + struct xfs_group *xg, + xfs_agblock_t gbno) +{ + struct xfs_mount *mp = xg->xg_mount; + uint32_t blocks = mp->m_groups[xg->xg_type].blocks; + + return XFS_FSB_TO_BB(mp, (xfs_fsblock_t)xg->xg_gno * blocks + gbno); +} + +static inline uint32_t +xfs_fsb_to_gno( + struct xfs_mount *mp, + xfs_fsblock_t fsbno, + enum xfs_group_type type) +{ + if (!mp->m_groups[type].blklog) + return 0; + return fsbno >> mp->m_groups[type].blklog; +} + +static inline xfs_agblock_t +xfs_fsb_to_gbno( + struct xfs_mount *mp, + xfs_fsblock_t fsbno, + enum xfs_group_type type) +{ + return fsbno & mp->m_groups[type].blkmask; +} + +static inline bool +xfs_verify_gbno( + struct xfs_group *xg, + uint32_t gbno) +{ + if (gbno >= xg->xg_block_count) + return false; + if (gbno < xg->xg_min_gbno) + return false; + return true; +} + +static inline bool +xfs_verify_gbext( + struct xfs_group *xg, + uint32_t gbno, + uint32_t glen) +{ + uint32_t end; + + if (!xfs_verify_gbno(xg, gbno)) + return false; + if (glen == 0 || check_add_overflow(gbno, glen - 1, &end)) + return false; + if (!xfs_verify_gbno(xg, end)) + return false; + return true; +} + +#endif /* __LIBXFS_GROUP_H */ diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index b0edb4288e59..d34986ac18c3 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -6,6 +6,8 @@ #ifndef __XFS_HEALTH_H__ #define __XFS_HEALTH_H__ +struct xfs_group; + /* * In-Core Filesystem Health Assessments * ===================================== @@ -52,6 +54,7 @@ struct xfs_inode; struct xfs_fsop_geom; struct xfs_btree_cur; struct xfs_da_args; +struct xfs_rtgroup; /* Observable health issues for metadata spanning the entire filesystem. */ #define XFS_SICK_FS_COUNTERS (1 << 0) /* summary counters */ @@ -60,10 +63,13 @@ struct xfs_da_args; #define XFS_SICK_FS_PQUOTA (1 << 3) /* project quota */ #define XFS_SICK_FS_QUOTACHECK (1 << 4) /* quota counts */ #define XFS_SICK_FS_NLINKS (1 << 5) /* inode link counts */ +#define XFS_SICK_FS_METADIR (1 << 6) /* metadata directory tree */ +#define XFS_SICK_FS_METAPATH (1 << 7) /* metadata directory tree path */ -/* Observable health issues for realtime volume metadata. */ -#define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */ -#define XFS_SICK_RT_SUMMARY (1 << 1) /* realtime summary */ +/* Observable health issues for realtime group metadata. */ +#define XFS_SICK_RG_SUPER (1 << 0) /* rt group superblock */ +#define XFS_SICK_RG_BITMAP (1 << 1) /* rt group bitmap */ +#define XFS_SICK_RG_SUMMARY (1 << 2) /* rt groups summary */ /* Observable health issues for AG metadata. */ #define XFS_SICK_AG_SB (1 << 0) /* superblock */ @@ -103,10 +109,13 @@ struct xfs_da_args; XFS_SICK_FS_GQUOTA | \ XFS_SICK_FS_PQUOTA | \ XFS_SICK_FS_QUOTACHECK | \ - XFS_SICK_FS_NLINKS) + XFS_SICK_FS_NLINKS | \ + XFS_SICK_FS_METADIR | \ + XFS_SICK_FS_METAPATH) -#define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \ - XFS_SICK_RT_SUMMARY) +#define XFS_SICK_RG_PRIMARY (XFS_SICK_RG_SUPER | \ + XFS_SICK_RG_BITMAP | \ + XFS_SICK_RG_SUMMARY) #define XFS_SICK_AG_PRIMARY (XFS_SICK_AG_SB | \ XFS_SICK_AG_AGF | \ @@ -136,26 +145,26 @@ struct xfs_da_args; /* Secondary state related to (but not primary evidence of) health problems. */ #define XFS_SICK_FS_SECONDARY (0) -#define XFS_SICK_RT_SECONDARY (0) +#define XFS_SICK_RG_SECONDARY (0) #define XFS_SICK_AG_SECONDARY (0) #define XFS_SICK_INO_SECONDARY (XFS_SICK_INO_FORGET) /* Evidence of health problems elsewhere. */ #define XFS_SICK_FS_INDIRECT (0) -#define XFS_SICK_RT_INDIRECT (0) +#define XFS_SICK_RG_INDIRECT (0) #define XFS_SICK_AG_INDIRECT (XFS_SICK_AG_INODES) #define XFS_SICK_INO_INDIRECT (0) /* All health masks. */ -#define XFS_SICK_FS_ALL (XFS_SICK_FS_PRIMARY | \ +#define XFS_SICK_FS_ALL (XFS_SICK_FS_PRIMARY | \ XFS_SICK_FS_SECONDARY | \ XFS_SICK_FS_INDIRECT) -#define XFS_SICK_RT_ALL (XFS_SICK_RT_PRIMARY | \ - XFS_SICK_RT_SECONDARY | \ - XFS_SICK_RT_INDIRECT) +#define XFS_SICK_RG_ALL (XFS_SICK_RG_PRIMARY | \ + XFS_SICK_RG_SECONDARY | \ + XFS_SICK_RG_INDIRECT) -#define XFS_SICK_AG_ALL (XFS_SICK_AG_PRIMARY | \ +#define XFS_SICK_AG_ALL (XFS_SICK_AG_PRIMARY | \ XFS_SICK_AG_SECONDARY | \ XFS_SICK_AG_INDIRECT) @@ -189,18 +198,17 @@ void xfs_fs_mark_healthy(struct xfs_mount *mp, unsigned int mask); void xfs_fs_measure_sickness(struct xfs_mount *mp, unsigned int *sick, unsigned int *checked); -void xfs_rt_mark_sick(struct xfs_mount *mp, unsigned int mask); -void xfs_rt_mark_corrupt(struct xfs_mount *mp, unsigned int mask); -void xfs_rt_mark_healthy(struct xfs_mount *mp, unsigned int mask); -void xfs_rt_measure_sickness(struct xfs_mount *mp, unsigned int *sick, - unsigned int *checked); +void xfs_rgno_mark_sick(struct xfs_mount *mp, xfs_rgnumber_t rgno, + unsigned int mask); void xfs_agno_mark_sick(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int mask); -void xfs_ag_mark_sick(struct xfs_perag *pag, unsigned int mask); -void xfs_ag_mark_corrupt(struct xfs_perag *pag, unsigned int mask); -void xfs_ag_mark_healthy(struct xfs_perag *pag, unsigned int mask); -void xfs_ag_measure_sickness(struct xfs_perag *pag, unsigned int *sick, +void xfs_group_mark_sick(struct xfs_group *xg, unsigned int mask); +#define xfs_ag_mark_sick(pag, mask) \ + xfs_group_mark_sick(pag_group(pag), (mask)) +void xfs_group_mark_corrupt(struct xfs_group *xg, unsigned int mask); +void xfs_group_mark_healthy(struct xfs_group *xg, unsigned int mask); +void xfs_group_measure_sickness(struct xfs_group *xg, unsigned int *sick, unsigned int *checked); void xfs_inode_mark_sick(struct xfs_inode *ip, unsigned int mask); @@ -227,22 +235,25 @@ xfs_fs_has_sickness(struct xfs_mount *mp, unsigned int mask) } static inline bool -xfs_rt_has_sickness(struct xfs_mount *mp, unsigned int mask) +xfs_group_has_sickness( + struct xfs_group *xg, + unsigned int mask) { - unsigned int sick, checked; + unsigned int sick, checked; - xfs_rt_measure_sickness(mp, &sick, &checked); + xfs_group_measure_sickness(xg, &sick, &checked); return sick & mask; } -static inline bool -xfs_ag_has_sickness(struct xfs_perag *pag, unsigned int mask) -{ - unsigned int sick, checked; +#define xfs_ag_has_sickness(pag, mask) \ + xfs_group_has_sickness(pag_group(pag), (mask)) +#define xfs_ag_is_healthy(pag) \ + (!xfs_ag_has_sickness((pag), UINT_MAX)) - xfs_ag_measure_sickness(pag, &sick, &checked); - return sick & mask; -} +#define xfs_rtgroup_has_sickness(rtg, mask) \ + xfs_group_has_sickness(rtg_group(rtg), (mask)) +#define xfs_rtgroup_is_healthy(rtg) \ + (!xfs_rtgroup_has_sickness((rtg), UINT_MAX)) static inline bool xfs_inode_has_sickness(struct xfs_inode *ip, unsigned int mask) @@ -260,18 +271,6 @@ xfs_fs_is_healthy(struct xfs_mount *mp) } static inline bool -xfs_rt_is_healthy(struct xfs_mount *mp) -{ - return !xfs_rt_has_sickness(mp, -1U); -} - -static inline bool -xfs_ag_is_healthy(struct xfs_perag *pag) -{ - return !xfs_ag_has_sickness(pag, -1U); -} - -static inline bool xfs_inode_is_healthy(struct xfs_inode *ip) { return !xfs_inode_has_sickness(ip, -1U); @@ -279,6 +278,8 @@ xfs_inode_is_healthy(struct xfs_inode *ip) void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo); void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo); +void xfs_rtgroup_geom_health(struct xfs_rtgroup *rtg, + struct xfs_rtgroup_geometry *rgeo); void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs); #define xfs_metadata_is_sick(error) \ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 271855227514..8b84e2cf711b 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -142,7 +142,7 @@ xfs_inobt_complain_bad_rec( xfs_warn(mp, "%sbt record corruption in AG %d detected at %pS!", - cur->bc_ops->name, cur->bc_ag.pag->pag_agno, fa); + cur->bc_ops->name, cur->bc_group->xg_gno, fa); xfs_warn(mp, "start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x", irec->ir_startino, irec->ir_count, irec->ir_freecount, @@ -170,7 +170,7 @@ xfs_inobt_get_rec( return error; xfs_inobt_btrec_to_irec(mp, rec, irec); - fa = xfs_inobt_check_irec(cur->bc_ag.pag, irec); + fa = xfs_inobt_check_irec(to_perag(cur->bc_group), irec); if (fa) return xfs_inobt_complain_bad_rec(cur, fa, irec); @@ -275,8 +275,10 @@ xfs_check_agi_freecount( } } while (i == 1); - if (!xfs_is_shutdown(cur->bc_mp)) - ASSERT(freecount == cur->bc_ag.pag->pagi_freecount); + if (!xfs_is_shutdown(cur->bc_mp)) { + ASSERT(freecount == + to_perag(cur->bc_group)->pagi_freecount); + } } return 0; } @@ -551,7 +553,7 @@ xfs_inobt_insert_sprec( struct xfs_buf *agbp, struct xfs_inobt_rec_incore *nrec) /* in/out: new/merged rec. */ { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_btree_cur *cur; int error; int i; @@ -606,15 +608,12 @@ xfs_inobt_insert_sprec( goto error; } - trace_xfs_irec_merge_pre(mp, pag->pag_agno, rec.ir_startino, - rec.ir_holemask, nrec->ir_startino, - nrec->ir_holemask); + trace_xfs_irec_merge_pre(pag, &rec, nrec); /* merge to nrec to output the updated record */ __xfs_inobt_rec_merge(nrec, &rec); - trace_xfs_irec_merge_post(mp, pag->pag_agno, nrec->ir_startino, - nrec->ir_holemask); + trace_xfs_irec_merge_post(pag, nrec); error = xfs_inobt_rec_check_count(mp, nrec); if (error) @@ -648,7 +647,7 @@ xfs_finobt_insert_sprec( struct xfs_buf *agbp, struct xfs_inobt_rec_incore *nrec) /* in/out: new rec. */ { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_btree_cur *cur; int error; int i; @@ -768,8 +767,7 @@ xfs_ialloc_ag_alloc( /* Allow space for the inode btree to split. */ args.minleft = igeo->inobt_maxlevels; error = xfs_alloc_vextent_exact_bno(&args, - XFS_AGB_TO_FSB(args.mp, pag->pag_agno, - args.agbno)); + xfs_agbno_to_fsb(pag, args.agbno)); if (error) return error; @@ -811,8 +809,8 @@ xfs_ialloc_ag_alloc( */ args.minleft = igeo->inobt_maxlevels; error = xfs_alloc_vextent_near_bno(&args, - XFS_AGB_TO_FSB(args.mp, pag->pag_agno, - be32_to_cpu(agi->agi_root))); + xfs_agbno_to_fsb(pag, + be32_to_cpu(agi->agi_root))); if (error) return error; } @@ -824,8 +822,8 @@ xfs_ialloc_ag_alloc( if (isaligned && args.fsbno == NULLFSBLOCK) { args.alignment = igeo->cluster_align; error = xfs_alloc_vextent_near_bno(&args, - XFS_AGB_TO_FSB(args.mp, pag->pag_agno, - be32_to_cpu(agi->agi_root))); + xfs_agbno_to_fsb(pag, + be32_to_cpu(agi->agi_root))); if (error) return error; } @@ -860,8 +858,8 @@ sparse_alloc: igeo->ialloc_blks; error = xfs_alloc_vextent_near_bno(&args, - XFS_AGB_TO_FSB(args.mp, pag->pag_agno, - be32_to_cpu(agi->agi_root))); + xfs_agbno_to_fsb(pag, + be32_to_cpu(agi->agi_root))); if (error) return error; @@ -884,7 +882,7 @@ sparse_alloc: * rather than a linear progression to prevent the next generation * number from being easily guessable. */ - error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag->pag_agno, + error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag_agno(pag), args.agbno, args.len, get_random_u32()); if (error) @@ -915,8 +913,7 @@ sparse_alloc: if (error == -EFSCORRUPTED) { xfs_alert(args.mp, "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u", - XFS_AGINO_TO_INO(args.mp, pag->pag_agno, - rec.ir_startino), + xfs_agino_to_ino(pag, rec.ir_startino), rec.ir_holemask, rec.ir_count); xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE); } @@ -1076,7 +1073,7 @@ xfs_dialloc_check_ino( if (error) return -EAGAIN; - error = xfs_imap_to_bp(pag->pag_mount, tp, &imap, &bp); + error = xfs_imap_to_bp(pag_mount(pag), tp, &imap, &bp); if (error) return -EAGAIN; @@ -1127,7 +1124,7 @@ xfs_dialloc_ag_inobt( /* * If in the same AG as the parent, try to get near the parent. */ - if (pagno == pag->pag_agno) { + if (pagno == pag_agno(pag)) { int doneleft; /* done, to the left */ int doneright; /* done, to the right */ @@ -1335,7 +1332,7 @@ alloc_inode: ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % XFS_INODES_PER_CHUNK) == 0); - ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset); + ino = xfs_agino_to_ino(pag, rec.ir_startino + offset); if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) { error = xfs_dialloc_check_ino(pag, tp, ino); @@ -1604,7 +1601,7 @@ xfs_dialloc_ag( * parent. If so, find the closest available inode to the parent. If * not, consider the agi hint or find the first free inode in the AG. */ - if (pag->pag_agno == pagno) + if (pag_agno(pag) == pagno) error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); else error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); @@ -1616,7 +1613,7 @@ xfs_dialloc_ag( ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % XFS_INODES_PER_CHUNK) == 0); - ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset); + ino = xfs_agino_to_ino(pag, rec.ir_startino + offset); if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) { error = xfs_dialloc_check_ino(pag, tp, ino); @@ -1845,6 +1842,40 @@ out_release: } /* + * Pick an AG for the new inode. + * + * Directories, symlinks, and regular files frequently allocate at least one + * block, so factor that potential expansion when we examine whether an AG has + * enough space for file creation. Try to keep metadata files all in the same + * AG. + */ +static inline xfs_agnumber_t +xfs_dialloc_pick_ag( + struct xfs_mount *mp, + struct xfs_inode *dp, + umode_t mode) +{ + xfs_agnumber_t start_agno; + + if (!dp) + return 0; + if (xfs_is_metadir_inode(dp)) { + if (mp->m_sb.sb_logstart) + return XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); + return 0; + } + + if (S_ISDIR(mode)) + return (atomic_inc_return(&mp->m_agirotor) - 1) % mp->m_maxagi; + + start_agno = XFS_INO_TO_AGNO(mp, dp->i_ino); + if (start_agno >= mp->m_maxagi) + start_agno = 0; + + return start_agno; +} + +/* * Allocate an on-disk inode. * * Mode is used to tell whether the new inode is a directory and hence where to @@ -1859,31 +1890,19 @@ xfs_dialloc( xfs_ino_t *new_ino) { struct xfs_mount *mp = (*tpp)->t_mountp; + struct xfs_perag *pag; + struct xfs_ino_geometry *igeo = M_IGEO(mp); + xfs_ino_t ino = NULLFSINO; xfs_ino_t parent = args->pip ? args->pip->i_ino : 0; - umode_t mode = args->mode & S_IFMT; xfs_agnumber_t agno; - int error = 0; xfs_agnumber_t start_agno; - struct xfs_perag *pag; - struct xfs_ino_geometry *igeo = M_IGEO(mp); + umode_t mode = args->mode & S_IFMT; bool ok_alloc = true; bool low_space = false; int flags; - xfs_ino_t ino = NULLFSINO; + int error = 0; - /* - * Directories, symlinks, and regular files frequently allocate at least - * one block, so factor that potential expansion when we examine whether - * an AG has enough space for file creation. - */ - if (S_ISDIR(mode)) - start_agno = (atomic_inc_return(&mp->m_agirotor) - 1) % - mp->m_maxagi; - else { - start_agno = XFS_INO_TO_AGNO(mp, parent); - if (start_agno >= mp->m_maxagi) - start_agno = 0; - } + start_agno = xfs_dialloc_pick_ag(mp, args->pip, mode); /* * If we have already hit the ceiling of inode blocks then clear @@ -1974,7 +1993,7 @@ retry: static int xfs_difree_inode_chunk( struct xfs_trans *tp, - xfs_agnumber_t agno, + struct xfs_perag *pag, struct xfs_inobt_rec_incore *rec) { struct xfs_mount *mp = tp->t_mountp; @@ -1988,8 +2007,7 @@ xfs_difree_inode_chunk( if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ - return xfs_free_extent_later(tp, - XFS_AGB_TO_FSB(mp, agno, sagbno), + return xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, sagbno), M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE, 0); } @@ -2035,9 +2053,9 @@ xfs_difree_inode_chunk( ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); - error = xfs_free_extent_later(tp, - XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, - &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE, 0); + error = xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, agbno), + contigblk, &XFS_RMAP_OINFO_INODES, + XFS_AG_RESV_NONE, 0); if (error) return error; @@ -2059,7 +2077,7 @@ xfs_difree_inobt( struct xfs_icluster *xic, struct xfs_inobt_rec_incore *orec) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_agi *agi = agbp->b_addr; struct xfs_btree_cur *cur; struct xfs_inobt_rec_incore rec; @@ -2124,8 +2142,7 @@ xfs_difree_inobt( if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE && mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) { xic->deleted = true; - xic->first_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, - rec.ir_startino); + xic->first_ino = xfs_agino_to_ino(pag, rec.ir_startino); xic->alloc = xfs_inobt_irec_to_allocmask(&rec); /* @@ -2148,7 +2165,7 @@ xfs_difree_inobt( goto error0; } - error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec); + error = xfs_difree_inode_chunk(tp, pag, &rec); if (error) goto error0; } else { @@ -2194,7 +2211,7 @@ xfs_difree_finobt( xfs_agino_t agino, struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_btree_cur *cur; struct xfs_inobt_rec_incore rec; int offset = agino - ibtrec->ir_startino; @@ -2317,17 +2334,17 @@ xfs_difree( /* * Break up inode number into its components. */ - if (pag->pag_agno != XFS_INO_TO_AGNO(mp, inode)) { - xfs_warn(mp, "%s: agno != pag->pag_agno (%d != %d).", - __func__, XFS_INO_TO_AGNO(mp, inode), pag->pag_agno); + if (pag_agno(pag) != XFS_INO_TO_AGNO(mp, inode)) { + xfs_warn(mp, "%s: agno != pag_agno(pag) (%d != %d).", + __func__, XFS_INO_TO_AGNO(mp, inode), pag_agno(pag)); ASSERT(0); return -EINVAL; } agino = XFS_INO_TO_AGINO(mp, inode); - if (inode != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { - xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", + if (inode != xfs_agino_to_ino(pag, agino)) { + xfs_warn(mp, "%s: inode != xfs_agino_to_ino() (%llu != %llu).", __func__, (unsigned long long)inode, - (unsigned long long)XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)); + (unsigned long long)xfs_agino_to_ino(pag, agino)); ASSERT(0); return -EINVAL; } @@ -2380,7 +2397,7 @@ xfs_imap_lookup( xfs_agblock_t *offset_agbno, int flags) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_inobt_rec_incore rec; struct xfs_btree_cur *cur; struct xfs_buf *agbp; @@ -2391,7 +2408,7 @@ xfs_imap_lookup( if (error) { xfs_alert(mp, "%s: xfs_ialloc_read_agi() returned error %d, agno %d", - __func__, error, pag->pag_agno); + __func__, error, pag_agno(pag)); return error; } @@ -2441,7 +2458,7 @@ xfs_imap( struct xfs_imap *imap, /* location map structure */ uint flags) /* flags for inode btree lookup */ { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); xfs_agblock_t agbno; /* block number of inode in the alloc group */ xfs_agino_t agino; /* inode number within alloc group */ xfs_agblock_t chunk_agbno; /* first block in inode chunk */ @@ -2458,7 +2475,7 @@ xfs_imap( agino = XFS_INO_TO_AGINO(mp, ino); agbno = XFS_AGINO_TO_AGBNO(mp, agino); if (agbno >= mp->m_sb.sb_agblocks || - ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { + ino != xfs_agino_to_ino(pag, agino)) { error = -EINVAL; #ifdef DEBUG /* @@ -2473,11 +2490,11 @@ xfs_imap( __func__, (unsigned long long)agbno, (unsigned long)mp->m_sb.sb_agblocks); } - if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { + if (ino != xfs_agino_to_ino(pag, agino)) { xfs_alert(mp, - "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", + "%s: ino (0x%llx) != xfs_agino_to_ino() (0x%llx)", __func__, ino, - XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)); + xfs_agino_to_ino(pag, agino)); } xfs_stack_trace(); #endif /* DEBUG */ @@ -2507,7 +2524,7 @@ xfs_imap( offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); - imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, agbno); + imap->im_blkno = xfs_agbno_to_daddr(pag, agbno); imap->im_len = XFS_FSB_TO_BB(mp, 1); imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); @@ -2537,7 +2554,7 @@ out_map: offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + XFS_INO_TO_OFFSET(mp, ino); - imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, cluster_agbno); + imap->im_blkno = xfs_agbno_to_daddr(pag, cluster_agbno); imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster); imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); @@ -2733,13 +2750,13 @@ xfs_read_agi( xfs_buf_flags_t flags, struct xfs_buf **agibpp) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); int error; - trace_xfs_read_agi(pag->pag_mount, pag->pag_agno); + trace_xfs_read_agi(pag); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGI_DADDR(mp)), + XFS_AG_DADDR(mp, pag_agno(pag), XFS_AGI_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), flags, agibpp, &xfs_agi_buf_ops); if (xfs_metadata_is_sick(error)) xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); @@ -2767,7 +2784,7 @@ xfs_ialloc_read_agi( struct xfs_agi *agi; int error; - trace_xfs_ialloc_read_agi(pag->pag_mount, pag->pag_agno); + trace_xfs_ialloc_read_agi(pag); error = xfs_read_agi(pag, tp, (flags & XFS_IALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, @@ -2787,7 +2804,7 @@ xfs_ialloc_read_agi( * we are in the middle of a forced shutdown. */ ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || - xfs_is_shutdown(pag->pag_mount)); + xfs_is_shutdown(pag_mount(pag))); if (agibpp) *agibpp = agibp; else @@ -2887,7 +2904,7 @@ xfs_ialloc_count_inodes_rec( xfs_failaddr_t fa; xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec); - fa = xfs_inobt_check_irec(cur->bc_ag.pag, &irec); + fa = xfs_inobt_check_irec(to_perag(cur->bc_group), &irec); if (fa) return xfs_inobt_complain_bad_rec(cur, fa, &irec); @@ -3126,13 +3143,13 @@ xfs_ialloc_check_shrink( int has; int error; - if (!xfs_has_sparseinodes(pag->pag_mount)) + if (!xfs_has_sparseinodes(pag_mount(pag))) return 0; cur = xfs_inobt_init_cursor(pag, tp, agibp); /* Look up the inobt record that would correspond to the new EOFS. */ - agino = XFS_AGB_TO_AGINO(pag->pag_mount, new_length); + agino = XFS_AGB_TO_AGINO(pag_mount(pag), new_length); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has); if (error || !has) goto out; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 401b42d52af6..9b34896dd1a3 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -37,7 +37,7 @@ STATIC struct xfs_btree_cur * xfs_inobt_dup_cursor( struct xfs_btree_cur *cur) { - return xfs_inobt_init_cursor(cur->bc_ag.pag, cur->bc_tp, + return xfs_inobt_init_cursor(to_perag(cur->bc_group), cur->bc_tp, cur->bc_ag.agbp); } @@ -45,7 +45,7 @@ STATIC struct xfs_btree_cur * xfs_finobt_dup_cursor( struct xfs_btree_cur *cur) { - return xfs_finobt_init_cursor(cur->bc_ag.pag, cur->bc_tp, + return xfs_finobt_init_cursor(to_perag(cur->bc_group), cur->bc_tp, cur->bc_ag.agbp); } @@ -112,7 +112,7 @@ __xfs_inobt_alloc_block( memset(&args, 0, sizeof(args)); args.tp = cur->bc_tp; args.mp = cur->bc_mp; - args.pag = cur->bc_ag.pag; + args.pag = to_perag(cur->bc_group); args.oinfo = XFS_RMAP_OINFO_INOBT; args.minlen = 1; args.maxlen = 1; @@ -120,7 +120,7 @@ __xfs_inobt_alloc_block( args.resv = resv; error = xfs_alloc_vextent_near_bno(&args, - XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno, sbno)); + xfs_agbno_to_fsb(args.pag, sbno)); if (error) return error; @@ -248,7 +248,7 @@ xfs_inobt_init_ptr_from_cur( { struct xfs_agi *agi = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agi->agi_seqno)); + ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agi->agi_seqno)); ptr->s = agi->agi_root; } @@ -260,7 +260,8 @@ xfs_finobt_init_ptr_from_cur( { struct xfs_agi *agi = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agi->agi_seqno)); + ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agi->agi_seqno)); + ptr->s = agi->agi_free_root; } @@ -478,12 +479,12 @@ xfs_inobt_init_cursor( struct xfs_trans *tp, struct xfs_buf *agbp) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_btree_cur *cur; cur = xfs_btree_alloc_cursor(mp, tp, &xfs_inobt_ops, M_IGEO(mp)->inobt_maxlevels, xfs_inobt_cur_cache); - cur->bc_ag.pag = xfs_perag_hold(pag); + cur->bc_group = xfs_group_hold(pag_group(pag)); cur->bc_ag.agbp = agbp; if (agbp) { struct xfs_agi *agi = agbp->b_addr; @@ -504,12 +505,12 @@ xfs_finobt_init_cursor( struct xfs_trans *tp, struct xfs_buf *agbp) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_btree_cur *cur; cur = xfs_btree_alloc_cursor(mp, tp, &xfs_finobt_ops, M_IGEO(mp)->inobt_maxlevels, xfs_inobt_cur_cache); - cur->bc_ag.pag = xfs_perag_hold(pag); + cur->bc_group = xfs_group_hold(pag_group(pag)); cur->bc_ag.agbp = agbp; if (agbp) { struct xfs_agi *agi = agbp->b_addr; @@ -715,8 +716,8 @@ static xfs_extlen_t xfs_inobt_max_size( struct xfs_perag *pag) { - struct xfs_mount *mp = pag->pag_mount; - xfs_agblock_t agblocks = pag->block_count; + struct xfs_mount *mp = pag_mount(pag); + xfs_agblock_t agblocks = pag_group(pag)->xg_block_count; /* Bail out if we're uninitialized, which can happen in mkfs. */ if (M_IGEO(mp)->inobt_mxr[0] == 0) @@ -727,7 +728,7 @@ xfs_inobt_max_size( * never be available for the kinds of things that would require btree * expansion. We therefore can pretend the space isn't there. */ - if (xfs_ag_contains_log(mp, pag->pag_agno)) + if (xfs_ag_contains_log(mp, pag_agno(pag))) agblocks -= mp->m_sb.sb_logblocks; return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, @@ -791,10 +792,10 @@ xfs_finobt_calc_reserves( xfs_extlen_t tree_len = 0; int error; - if (!xfs_has_finobt(pag->pag_mount)) + if (!xfs_has_finobt(pag_mount(pag))) return 0; - if (xfs_has_inobtcounts(pag->pag_mount)) + if (xfs_has_inobtcounts(pag_mount(pag))) error = xfs_finobt_read_blocks(pag, tp, &tree_len); else error = xfs_finobt_count_blocks(pag, tp, &tree_len); diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 79babeac9d75..424861fbf1bd 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -19,6 +19,7 @@ #include "xfs_ialloc.h" #include "xfs_dir2.h" #include "xfs_health.h" +#include "xfs_metafile.h" #include <linux/iversion.h> @@ -209,12 +210,15 @@ xfs_inode_from_disk( * They will also be unconditionally written back to disk as v2 inodes. */ if (unlikely(from->di_version == 1)) { - set_nlink(inode, be16_to_cpu(from->di_onlink)); + /* di_metatype used to be di_onlink */ + set_nlink(inode, be16_to_cpu(from->di_metatype)); ip->i_projid = 0; } else { set_nlink(inode, be32_to_cpu(from->di_nlink)); ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | be16_to_cpu(from->di_projid_lo); + if (xfs_dinode_is_metadir(from)) + ip->i_metatype = be16_to_cpu(from->di_metatype); } i_uid_write(inode, be32_to_cpu(from->di_uid)); @@ -315,7 +319,10 @@ xfs_inode_to_disk( struct inode *inode = VFS_I(ip); to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); - to->di_onlink = 0; + if (xfs_is_metadir_inode(ip)) + to->di_metatype = cpu_to_be16(ip->i_metatype); + else + to->di_metatype = 0; to->di_format = xfs_ifork_format(&ip->i_df); to->di_uid = cpu_to_be32(i_uid_read(inode)); @@ -483,6 +490,69 @@ xfs_dinode_verify_nrext64( return NULL; } +/* + * Validate all the picky requirements we have for a file that claims to be + * filesystem metadata. + */ +xfs_failaddr_t +xfs_dinode_verify_metadir( + struct xfs_mount *mp, + struct xfs_dinode *dip, + uint16_t mode, + uint16_t flags, + uint64_t flags2) +{ + if (!xfs_has_metadir(mp)) + return __this_address; + + /* V5 filesystem only */ + if (dip->di_version < 3) + return __this_address; + + if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) + return __this_address; + + /* V3 inode fields that are always zero */ + if ((flags2 & XFS_DIFLAG2_NREXT64) && dip->di_nrext64_pad) + return __this_address; + if (!(flags2 & XFS_DIFLAG2_NREXT64) && dip->di_flushiter) + return __this_address; + + /* Metadata files can only be directories or regular files */ + if (!S_ISDIR(mode) && !S_ISREG(mode)) + return __this_address; + + /* They must have zero access permissions */ + if (mode & 0777) + return __this_address; + + /* DMAPI event and state masks are zero */ + if (dip->di_dmevmask || dip->di_dmstate) + return __this_address; + + /* + * User and group IDs must be zero. The project ID is used for + * grouping inodes. Metadata inodes are never accounted to quotas. + */ + if (dip->di_uid || dip->di_gid) + return __this_address; + + /* Mandatory inode flags must be set */ + if (S_ISDIR(mode)) { + if ((flags & XFS_METADIR_DIFLAGS) != XFS_METADIR_DIFLAGS) + return __this_address; + } else { + if ((flags & XFS_METAFILE_DIFLAGS) != XFS_METAFILE_DIFLAGS) + return __this_address; + } + + /* dax flags2 must not be set */ + if (flags2 & XFS_DIFLAG2_DAX) + return __this_address; + + return NULL; +} + xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, @@ -523,8 +593,11 @@ xfs_dinode_verify( * di_nlink==0 on a V1 inode. V2/3 inodes would get written out with * di_onlink==0, so we can check that. */ - if (dip->di_version >= 2) { - if (dip->di_onlink) + if (dip->di_version == 2) { + if (dip->di_metatype) + return __this_address; + } else if (dip->di_version >= 3) { + if (!xfs_dinode_is_metadir(dip) && dip->di_metatype) return __this_address; } @@ -546,7 +619,8 @@ xfs_dinode_verify( if (dip->di_nlink) return __this_address; } else { - if (dip->di_onlink) + /* di_metatype used to be di_onlink */ + if (dip->di_metatype) return __this_address; } } @@ -663,6 +737,12 @@ xfs_dinode_verify( !xfs_has_bigtime(mp)) return __this_address; + if (flags2 & XFS_DIFLAG2_METADATA) { + fa = xfs_dinode_verify_metadir(mp, dip, mode, flags, flags2); + if (fa) + return fa; + } + return NULL; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 585ed5a110af..8d43d2641c73 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -28,6 +28,9 @@ int xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, struct xfs_dinode *dip); +xfs_failaddr_t xfs_dinode_verify_metadir(struct xfs_mount *mp, + struct xfs_dinode *dip, uint16_t mode, uint16_t flags, + uint64_t flags2); xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp, uint32_t extsize, uint16_t mode, uint16_t flags); xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c index cc38e1c3c3e1..deb0b7c00a1f 100644 --- a/fs/xfs/libxfs/xfs_inode_util.c +++ b/fs/xfs/libxfs/xfs_inode_util.c @@ -224,6 +224,8 @@ xfs_inode_inherit_flags2( } if (pip->i_diflags2 & XFS_DIFLAG2_DAX) ip->i_diflags2 |= XFS_DIFLAG2_DAX; + if (xfs_is_metadir_inode(pip)) + ip->i_diflags2 |= XFS_DIFLAG2_METADATA; /* Don't let invalid cowextsize hints propagate. */ failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, @@ -442,8 +444,8 @@ xfs_iunlink_update_bucket( ASSERT(xfs_verify_agino_or_null(pag, new_agino)); old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); - trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index, - old_value, new_agino); + trace_xfs_iunlink_update_bucket(pag, bucket_index, old_value, + new_agino); /* * We should never find the head of the list already set to the value diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 3e6682ed656b..15dec19b6c32 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -248,6 +248,8 @@ typedef struct xfs_trans_header { #define XFS_LI_ATTRD 0x1247 /* attr set/remove done */ #define XFS_LI_XMI 0x1248 /* mapping exchange intent */ #define XFS_LI_XMD 0x1249 /* mapping exchange done */ +#define XFS_LI_EFI_RT 0x124a /* realtime extent free intent */ +#define XFS_LI_EFD_RT 0x124b /* realtime extent free done */ #define XFS_LI_TYPE_DESC \ { XFS_LI_EFI, "XFS_LI_EFI" }, \ @@ -267,7 +269,9 @@ typedef struct xfs_trans_header { { XFS_LI_ATTRI, "XFS_LI_ATTRI" }, \ { XFS_LI_ATTRD, "XFS_LI_ATTRD" }, \ { XFS_LI_XMI, "XFS_LI_XMI" }, \ - { XFS_LI_XMD, "XFS_LI_XMD" } + { XFS_LI_XMD, "XFS_LI_XMD" }, \ + { XFS_LI_EFI_RT, "XFS_LI_EFI_RT" }, \ + { XFS_LI_EFD_RT, "XFS_LI_EFD_RT" } /* * Inode Log Item Format definitions. @@ -404,7 +408,7 @@ struct xfs_log_dinode { uint16_t di_mode; /* mode and type of file */ int8_t di_version; /* inode version */ int8_t di_format; /* format of di_c data */ - uint8_t di_pad3[2]; /* unused in v2/3 inodes */ + uint16_t di_metatype; /* metadata type, if DIFLAG2_METADATA */ uint32_t di_uid; /* owner's user id */ uint32_t di_gid; /* owner's group id */ uint32_t di_nlink; /* number of links to file */ diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index 521d327e4c89..5397a8ff004d 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -77,6 +77,8 @@ extern const struct xlog_recover_item_ops xlog_attri_item_ops; extern const struct xlog_recover_item_ops xlog_attrd_item_ops; extern const struct xlog_recover_item_ops xlog_xmi_item_ops; extern const struct xlog_recover_item_ops xlog_xmd_item_ops; +extern const struct xlog_recover_item_ops xlog_rtefi_item_ops; +extern const struct xlog_recover_item_ops xlog_rtefd_item_ops; /* * Macros, structures, prototypes for internal log manager use. diff --git a/fs/xfs/libxfs/xfs_metadir.c b/fs/xfs/libxfs/xfs_metadir.c new file mode 100644 index 000000000000..bae7377c0f22 --- /dev/null +++ b/fs/xfs/libxfs/xfs_metadir.c @@ -0,0 +1,481 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_trans.h" +#include "xfs_metafile.h" +#include "xfs_metadir.h" +#include "xfs_trace.h" +#include "xfs_inode.h" +#include "xfs_quota.h" +#include "xfs_ialloc.h" +#include "xfs_bmap_btree.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_trans_space.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_parent.h" +#include "xfs_health.h" + +/* + * Metadata Directory Tree + * ======================= + * + * These functions provide an abstraction layer for looking up, creating, and + * deleting metadata inodes that live within a special metadata directory tree. + * + * This code does not manage the five existing metadata inodes: real time + * bitmap & summary; and the user, group, and quotas. All other metadata + * inodes must use only the xfs_meta{dir,file}_* functions. + * + * Callers wishing to create or hardlink a metadata inode must create an + * xfs_metadir_update structure, call the appropriate xfs_metadir* function, + * and then call xfs_metadir_commit or xfs_metadir_cancel to commit or cancel + * the update. Files in the metadata directory tree currently cannot be + * unlinked. + * + * When the metadir feature is enabled, all metadata inodes must have the + * "metadata" inode flag set to prevent them from being exposed to the outside + * world. + * + * Callers must take the ILOCK of any inode in the metadata directory tree to + * synchronize access to that inode. It is never necessary to take the IOLOCK + * or the MMAPLOCK since metadata inodes must not be exposed to user space. + */ + +static inline void +xfs_metadir_set_xname( + struct xfs_name *xname, + const char *path, + unsigned char ftype) +{ + xname->name = (const unsigned char *)path; + xname->len = strlen(path); + xname->type = ftype; +} + +/* + * Given a parent directory @dp and a metadata inode path component @xname, + * Look up the inode number in the directory, returning it in @ino. + * @xname.type must match the directory entry's ftype. + * + * Caller must hold ILOCK_EXCL. + */ +static inline int +xfs_metadir_lookup( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_name *xname, + xfs_ino_t *ino) +{ + struct xfs_mount *mp = dp->i_mount; + struct xfs_da_args args = { + .trans = tp, + .dp = dp, + .geo = mp->m_dir_geo, + .name = xname->name, + .namelen = xname->len, + .hashval = xfs_dir2_hashname(mp, xname), + .whichfork = XFS_DATA_FORK, + .op_flags = XFS_DA_OP_OKNOENT, + .owner = dp->i_ino, + }; + int error; + + if (!S_ISDIR(VFS_I(dp)->i_mode)) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); + return -EFSCORRUPTED; + } + if (xfs_is_shutdown(mp)) + return -EIO; + + error = xfs_dir_lookup_args(&args); + if (error) + return error; + + if (!xfs_verify_ino(mp, args.inumber)) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); + return -EFSCORRUPTED; + } + if (xname->type != XFS_DIR3_FT_UNKNOWN && xname->type != args.filetype) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); + return -EFSCORRUPTED; + } + + trace_xfs_metadir_lookup(dp, xname, args.inumber); + *ino = args.inumber; + return 0; +} + +/* + * Look up and read a metadata inode from the metadata directory. If the path + * component doesn't exist, return -ENOENT. + */ +int +xfs_metadir_load( + struct xfs_trans *tp, + struct xfs_inode *dp, + const char *path, + enum xfs_metafile_type metafile_type, + struct xfs_inode **ipp) +{ + struct xfs_name xname; + xfs_ino_t ino; + int error; + + xfs_metadir_set_xname(&xname, path, XFS_DIR3_FT_UNKNOWN); + + xfs_ilock(dp, XFS_ILOCK_EXCL); + error = xfs_metadir_lookup(tp, dp, &xname, &ino); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + if (error) + return error; + return xfs_trans_metafile_iget(tp, ino, metafile_type, ipp); +} + +/* + * Unlock and release resources after committing (or cancelling) a metadata + * directory tree operation. The caller retains its reference to @upd->ip + * and must release it explicitly. + */ +static inline void +xfs_metadir_teardown( + struct xfs_metadir_update *upd, + int error) +{ + trace_xfs_metadir_teardown(upd, error); + + if (upd->ppargs) { + xfs_parent_finish(upd->dp->i_mount, upd->ppargs); + upd->ppargs = NULL; + } + + if (upd->ip) { + if (upd->ip_locked) + xfs_iunlock(upd->ip, XFS_ILOCK_EXCL); + upd->ip_locked = false; + } + + if (upd->dp_locked) + xfs_iunlock(upd->dp, XFS_ILOCK_EXCL); + upd->dp_locked = false; +} + +/* + * Begin the process of creating a metadata file by allocating transactions + * and taking whatever resources we're going to need. + */ +int +xfs_metadir_start_create( + struct xfs_metadir_update *upd) +{ + struct xfs_mount *mp = upd->dp->i_mount; + int error; + + ASSERT(upd->dp != NULL); + ASSERT(upd->ip == NULL); + ASSERT(xfs_has_metadir(mp)); + ASSERT(upd->metafile_type != XFS_METAFILE_UNKNOWN); + + error = xfs_parent_start(mp, &upd->ppargs); + if (error) + return error; + + /* + * If we ever need the ability to create rt metadata files on a + * pre-metadir filesystem, we'll need to dqattach the parent here. + * Currently we assume that mkfs will create the files and quotacheck + * will account for them. + */ + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create, + xfs_create_space_res(mp, MAXNAMELEN), 0, 0, &upd->tp); + if (error) + goto out_teardown; + + /* + * Lock the parent directory if there is one. We can't ijoin it to + * the transaction until after the child file has been created. + */ + xfs_ilock(upd->dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); + upd->dp_locked = true; + + trace_xfs_metadir_start_create(upd); + return 0; +out_teardown: + xfs_metadir_teardown(upd, error); + return error; +} + +/* + * Create a metadata inode with the given @mode, and insert it into the + * metadata directory tree at the given @upd->path. The path up to the final + * component must already exist. The final path component must not exist. + * + * The new metadata inode will be attached to the update structure @upd->ip, + * with the ILOCK held until the caller releases it. + * + * NOTE: This function may return a new inode to the caller even if it returns + * a negative error code. If an inode is passed back, the caller must finish + * setting up the inode before releasing it. + */ +int +xfs_metadir_create( + struct xfs_metadir_update *upd, + umode_t mode) +{ + struct xfs_icreate_args args = { + .pip = upd->dp, + .mode = mode, + }; + struct xfs_name xname; + struct xfs_dir_update du = { + .dp = upd->dp, + .name = &xname, + .ppargs = upd->ppargs, + }; + struct xfs_mount *mp = upd->dp->i_mount; + xfs_ino_t ino; + unsigned int resblks; + int error; + + xfs_assert_ilocked(upd->dp, XFS_ILOCK_EXCL); + + /* Check that the name does not already exist in the directory. */ + xfs_metadir_set_xname(&xname, upd->path, XFS_DIR3_FT_UNKNOWN); + error = xfs_metadir_lookup(upd->tp, upd->dp, &xname, &ino); + switch (error) { + case -ENOENT: + break; + case 0: + error = -EEXIST; + fallthrough; + default: + return error; + } + + /* + * A newly created regular or special file just has one directory + * entry pointing to them, but a directory also the "." entry + * pointing to itself. + */ + error = xfs_dialloc(&upd->tp, &args, &ino); + if (error) + return error; + error = xfs_icreate(upd->tp, ino, &args, &upd->ip); + if (error) + return error; + du.ip = upd->ip; + xfs_metafile_set_iflag(upd->tp, upd->ip, upd->metafile_type); + upd->ip_locked = true; + + /* + * Join the directory inode to the transaction. We do not do it + * earlier because xfs_dialloc rolls the transaction. + */ + xfs_trans_ijoin(upd->tp, upd->dp, 0); + + /* Create the entry. */ + if (S_ISDIR(args.mode)) + resblks = xfs_mkdir_space_res(mp, xname.len); + else + resblks = xfs_create_space_res(mp, xname.len); + xname.type = xfs_mode_to_ftype(args.mode); + + trace_xfs_metadir_try_create(upd); + + error = xfs_dir_create_child(upd->tp, resblks, &du); + if (error) + return error; + + /* Metadir files are not accounted to quota. */ + + trace_xfs_metadir_create(upd); + + return 0; +} + +#ifndef __KERNEL__ +/* + * Begin the process of linking a metadata file by allocating transactions + * and locking whatever resources we're going to need. + */ +int +xfs_metadir_start_link( + struct xfs_metadir_update *upd) +{ + struct xfs_mount *mp = upd->dp->i_mount; + unsigned int resblks; + int nospace_error = 0; + int error; + + ASSERT(upd->dp != NULL); + ASSERT(upd->ip != NULL); + ASSERT(xfs_has_metadir(mp)); + + error = xfs_parent_start(mp, &upd->ppargs); + if (error) + return error; + + resblks = xfs_link_space_res(mp, MAXNAMELEN); + error = xfs_trans_alloc_dir(upd->dp, &M_RES(mp)->tr_link, upd->ip, + &resblks, &upd->tp, &nospace_error); + if (error) + goto out_teardown; + if (!resblks) { + /* We don't allow reservationless updates. */ + xfs_trans_cancel(upd->tp); + upd->tp = NULL; + xfs_iunlock(upd->dp, XFS_ILOCK_EXCL); + xfs_iunlock(upd->ip, XFS_ILOCK_EXCL); + error = nospace_error; + goto out_teardown; + } + + upd->dp_locked = true; + upd->ip_locked = true; + + trace_xfs_metadir_start_link(upd); + return 0; +out_teardown: + xfs_metadir_teardown(upd, error); + return error; +} + +/* + * Link the metadata directory given by @path to the inode @upd->ip. + * The path (up to the final component) must already exist, but the final + * component must not already exist. + */ +int +xfs_metadir_link( + struct xfs_metadir_update *upd) +{ + struct xfs_name xname; + struct xfs_dir_update du = { + .dp = upd->dp, + .name = &xname, + .ip = upd->ip, + .ppargs = upd->ppargs, + }; + struct xfs_mount *mp = upd->dp->i_mount; + xfs_ino_t ino; + unsigned int resblks; + int error; + + xfs_assert_ilocked(upd->dp, XFS_ILOCK_EXCL); + xfs_assert_ilocked(upd->ip, XFS_ILOCK_EXCL); + + /* Look up the name in the current directory. */ + xfs_metadir_set_xname(&xname, upd->path, + xfs_mode_to_ftype(VFS_I(upd->ip)->i_mode)); + error = xfs_metadir_lookup(upd->tp, upd->dp, &xname, &ino); + switch (error) { + case -ENOENT: + break; + case 0: + error = -EEXIST; + fallthrough; + default: + return error; + } + + resblks = xfs_link_space_res(mp, xname.len); + error = xfs_dir_add_child(upd->tp, resblks, &du); + if (error) + return error; + + trace_xfs_metadir_link(upd); + + return 0; +} +#endif /* ! __KERNEL__ */ + +/* Commit a metadir update and unlock/drop all resources. */ +int +xfs_metadir_commit( + struct xfs_metadir_update *upd) +{ + int error; + + trace_xfs_metadir_commit(upd); + + error = xfs_trans_commit(upd->tp); + upd->tp = NULL; + + xfs_metadir_teardown(upd, error); + return error; +} + +/* Cancel a metadir update and unlock/drop all resources. */ +void +xfs_metadir_cancel( + struct xfs_metadir_update *upd, + int error) +{ + trace_xfs_metadir_cancel(upd); + + xfs_trans_cancel(upd->tp); + upd->tp = NULL; + + xfs_metadir_teardown(upd, error); +} + +/* Create a metadata for the last component of the path. */ +int +xfs_metadir_mkdir( + struct xfs_inode *dp, + const char *path, + struct xfs_inode **ipp) +{ + struct xfs_metadir_update upd = { + .dp = dp, + .path = path, + .metafile_type = XFS_METAFILE_DIR, + }; + int error; + + if (xfs_is_shutdown(dp->i_mount)) + return -EIO; + + /* Allocate a transaction to create the last directory. */ + error = xfs_metadir_start_create(&upd); + if (error) + return error; + + /* Create the subdirectory and take our reference. */ + error = xfs_metadir_create(&upd, S_IFDIR); + if (error) + goto out_cancel; + + error = xfs_metadir_commit(&upd); + if (error) + goto out_irele; + + xfs_finish_inode_setup(upd.ip); + *ipp = upd.ip; + return 0; + +out_cancel: + xfs_metadir_cancel(&upd, error); +out_irele: + /* Have to finish setting up the inode to ensure it's deleted. */ + if (upd.ip) { + xfs_finish_inode_setup(upd.ip); + xfs_irele(upd.ip); + } + return error; +} diff --git a/fs/xfs/libxfs/xfs_metadir.h b/fs/xfs/libxfs/xfs_metadir.h new file mode 100644 index 000000000000..bfecac7d3d14 --- /dev/null +++ b/fs/xfs/libxfs/xfs_metadir.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_METADIR_H__ +#define __XFS_METADIR_H__ + +/* Cleanup widget for metadata inode creation and deletion. */ +struct xfs_metadir_update { + /* Parent directory */ + struct xfs_inode *dp; + + /* Path to metadata file */ + const char *path; + + /* Parent pointer update context */ + struct xfs_parent_args *ppargs; + + /* Child metadata file */ + struct xfs_inode *ip; + + struct xfs_trans *tp; + + enum xfs_metafile_type metafile_type; + + unsigned int dp_locked:1; + unsigned int ip_locked:1; +}; + +int xfs_metadir_load(struct xfs_trans *tp, struct xfs_inode *dp, + const char *path, enum xfs_metafile_type metafile_type, + struct xfs_inode **ipp); + +int xfs_metadir_start_create(struct xfs_metadir_update *upd); +int xfs_metadir_create(struct xfs_metadir_update *upd, umode_t mode); + +int xfs_metadir_start_link(struct xfs_metadir_update *upd); +int xfs_metadir_link(struct xfs_metadir_update *upd); + +int xfs_metadir_commit(struct xfs_metadir_update *upd); +void xfs_metadir_cancel(struct xfs_metadir_update *upd, int error); + +int xfs_metadir_mkdir(struct xfs_inode *dp, const char *path, + struct xfs_inode **ipp); + +#endif /* __XFS_METADIR_H__ */ diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c new file mode 100644 index 000000000000..adeb25d1a444 --- /dev/null +++ b/fs/xfs/libxfs/xfs_metafile.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_trans.h" +#include "xfs_metafile.h" +#include "xfs_trace.h" +#include "xfs_inode.h" + +/* Set up an inode to be recognized as a metadata directory inode. */ +void +xfs_metafile_set_iflag( + struct xfs_trans *tp, + struct xfs_inode *ip, + enum xfs_metafile_type metafile_type) +{ + VFS_I(ip)->i_mode &= ~0777; + VFS_I(ip)->i_uid = GLOBAL_ROOT_UID; + VFS_I(ip)->i_gid = GLOBAL_ROOT_GID; + if (S_ISDIR(VFS_I(ip)->i_mode)) + ip->i_diflags |= XFS_METADIR_DIFLAGS; + else + ip->i_diflags |= XFS_METAFILE_DIFLAGS; + ip->i_diflags2 &= ~XFS_DIFLAG2_DAX; + ip->i_diflags2 |= XFS_DIFLAG2_METADATA; + ip->i_metatype = metafile_type; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); +} + +/* Clear the metadata directory inode flag. */ +void +xfs_metafile_clear_iflag( + struct xfs_trans *tp, + struct xfs_inode *ip) +{ + ASSERT(xfs_is_metadir_inode(ip)); + ASSERT(VFS_I(ip)->i_nlink == 0); + + ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); +} diff --git a/fs/xfs/libxfs/xfs_metafile.h b/fs/xfs/libxfs/xfs_metafile.h new file mode 100644 index 000000000000..acec400123db --- /dev/null +++ b/fs/xfs/libxfs/xfs_metafile.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_METAFILE_H__ +#define __XFS_METAFILE_H__ + +/* All metadata files must have these flags set. */ +#define XFS_METAFILE_DIFLAGS (XFS_DIFLAG_IMMUTABLE | \ + XFS_DIFLAG_SYNC | \ + XFS_DIFLAG_NOATIME | \ + XFS_DIFLAG_NODUMP | \ + XFS_DIFLAG_NODEFRAG) + +/* All metadata directories must have these flags set. */ +#define XFS_METADIR_DIFLAGS (XFS_METAFILE_DIFLAGS | \ + XFS_DIFLAG_NOSYMLINKS) + +void xfs_metafile_set_iflag(struct xfs_trans *tp, struct xfs_inode *ip, + enum xfs_metafile_type metafile_type); +void xfs_metafile_clear_iflag(struct xfs_trans *tp, struct xfs_inode *ip); + +/* Code specific to kernel/userspace; must be provided externally. */ + +int xfs_trans_metafile_iget(struct xfs_trans *tp, xfs_ino_t ino, + enum xfs_metafile_type metafile_type, struct xfs_inode **ipp); +int xfs_metafile_iget(struct xfs_mount *mp, xfs_ino_t ino, + enum xfs_metafile_type metafile_type, struct xfs_inode **ipp); + +#endif /* __XFS_METAFILE_H__ */ diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h index 23c133fd36f5..ad0dedf00f18 100644 --- a/fs/xfs/libxfs/xfs_ondisk.h +++ b/fs/xfs/libxfs/xfs_ondisk.h @@ -19,40 +19,46 @@ static_assert((value) == (expected), \ "XFS: value of " #value " is wrong, expected " #expected) +#define XFS_CHECK_SB_OFFSET(field, offset) \ + XFS_CHECK_OFFSET(struct xfs_dsb, field, offset); \ + XFS_CHECK_OFFSET(struct xfs_sb, field, offset); + static inline void __init xfs_check_ondisk_structs(void) { - /* ag/file structures */ + /* file structures */ XFS_CHECK_STRUCT_SIZE(struct xfs_acl, 4); XFS_CHECK_STRUCT_SIZE(struct xfs_acl_entry, 12); - XFS_CHECK_STRUCT_SIZE(struct xfs_agf, 224); - XFS_CHECK_STRUCT_SIZE(struct xfs_agfl, 36); - XFS_CHECK_STRUCT_SIZE(struct xfs_agi, 344); XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_key, 8); XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_rec, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_bmdr_block, 4); - XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_shdr, 48); - XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_lhdr, 64); - XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block, 72); XFS_CHECK_STRUCT_SIZE(struct xfs_dinode, 176); XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot, 104); XFS_CHECK_STRUCT_SIZE(struct xfs_dqblk, 136); - XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 264); XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56); + XFS_CHECK_STRUCT_SIZE(xfs_timestamp_t, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_legacy_timestamp, 8); + + /* space btrees */ + XFS_CHECK_STRUCT_SIZE(struct xfs_agf, 224); + XFS_CHECK_STRUCT_SIZE(struct xfs_agfl, 36); + XFS_CHECK_STRUCT_SIZE(struct xfs_agi, 344); + XFS_CHECK_STRUCT_SIZE(struct xfs_alloc_rec, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block, 72); + XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_lhdr, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_shdr, 48); XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4); XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_key, 4); XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_rec, 12); XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key, 20); XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec, 24); - XFS_CHECK_STRUCT_SIZE(xfs_timestamp_t, 8); - XFS_CHECK_STRUCT_SIZE(struct xfs_legacy_timestamp, 8); XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t, 8); XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4); - XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8); XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t, 4); XFS_CHECK_STRUCT_SIZE(xfs_refcount_ptr_t, 4); XFS_CHECK_STRUCT_SIZE(xfs_rmap_ptr_t, 4); + XFS_CHECK_STRUCT_SIZE(xfs_bmdr_key_t, 8); /* dir/attr trees */ XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80); @@ -67,33 +73,34 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free_hdr, 64); XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf, 64); XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf_hdr, 64); - XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_entry_t, 8); - XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_hdr_t, 32); - XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_map_t, 4); - XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_local_t, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_entry, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_hdr, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_map, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_name_local, 4); /* realtime structures */ + XFS_CHECK_STRUCT_SIZE(struct xfs_rtsb, 56); XFS_CHECK_STRUCT_SIZE(union xfs_rtword_raw, 4); XFS_CHECK_STRUCT_SIZE(union xfs_suminfo_raw, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_rtbuf_blkinfo, 48); /* - * m68k has problems with xfs_attr_leaf_name_remote_t, but we pad it to - * 4 bytes anyway so it's not obviously a problem. Hence for the moment - * we don't check this structure. This can be re-instated when the attr - * definitions are updated to use c99 VLA definitions. + * m68k has problems with struct xfs_attr_leaf_name_remote, but we pad + * it to 4 bytes anyway so it's not obviously a problem. Hence for the + * moment we don't check this structure. This can be re-instated when + * the attr definitions are updated to use c99 VLA definitions. * - XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_remote_t, 12); + XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_name_remote, 12); */ - XFS_CHECK_OFFSET(struct xfs_dsb, sb_crc, 224); - XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, valuelen, 0); - XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, namelen, 2); - XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, nameval, 3); - XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, valueblk, 0); - XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, valuelen, 4); - XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, namelen, 8); - XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, name, 9); - XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t, 32); + XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_local, valuelen, 0); + XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_local, namelen, 2); + XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_local, nameval, 3); + XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_remote, valueblk, 0); + XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_remote, valuelen, 4); + XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_remote, namelen, 8); + XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_remote, name, 9); + XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leafblock, 32); XFS_CHECK_STRUCT_SIZE(struct xfs_attr_sf_hdr, 4); XFS_CHECK_OFFSET(struct xfs_attr_sf_hdr, totsize, 0); XFS_CHECK_OFFSET(struct xfs_attr_sf_hdr, count, 2); @@ -101,27 +108,41 @@ xfs_check_ondisk_structs(void) XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, valuelen, 1); XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, flags, 2); XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, nameval, 3); - XFS_CHECK_STRUCT_SIZE(xfs_da_blkinfo_t, 12); - XFS_CHECK_STRUCT_SIZE(xfs_da_intnode_t, 16); - XFS_CHECK_STRUCT_SIZE(xfs_da_node_entry_t, 8); - XFS_CHECK_STRUCT_SIZE(xfs_da_node_hdr_t, 16); - XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_free_t, 4); - XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_hdr_t, 16); - XFS_CHECK_OFFSET(xfs_dir2_data_unused_t, freetag, 0); - XFS_CHECK_OFFSET(xfs_dir2_data_unused_t, length, 2); - XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_hdr_t, 16); - XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_t, 16); - XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_entry_t, 8); - XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_hdr_t, 16); - XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_t, 16); - XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_tail_t, 4); - XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_entry_t, 3); - XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, namelen, 0); - XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, offset, 1); - XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, name, 3); - XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t, 10); + XFS_CHECK_STRUCT_SIZE(struct xfs_da_blkinfo, 12); + XFS_CHECK_STRUCT_SIZE(struct xfs_da_intnode, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_da_node_entry, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_da_node_hdr, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_hdr, 16); + XFS_CHECK_OFFSET(struct xfs_dir2_data_unused, freetag, 0); + XFS_CHECK_OFFSET(struct xfs_dir2_data_unused, length, 2); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free_hdr, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_entry, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_hdr, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_tail, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_entry, 3); + XFS_CHECK_OFFSET(struct xfs_dir2_sf_entry, namelen, 0); + XFS_CHECK_OFFSET(struct xfs_dir2_sf_entry, offset, 1); + XFS_CHECK_OFFSET(struct xfs_dir2_sf_entry, name, 3); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_hdr, 10); XFS_CHECK_STRUCT_SIZE(struct xfs_parent_rec, 12); + /* ondisk dir/attr structures from xfs/122 */ + XFS_CHECK_STRUCT_SIZE(struct xfs_attr_sf_entry, 3); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_hdr, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_unused, 6); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free_hdr, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_entry, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_hdr, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_tail, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_entry, 3); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_hdr, 10); + /* log structures */ XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88); XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24); @@ -157,6 +178,11 @@ xfs_check_ondisk_structs(void) XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16); XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16); + /* ondisk log structures from xfs/122 */ + XFS_CHECK_STRUCT_SIZE(struct xfs_unmount_log_format, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_xmd_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_xmi_log_format, 88); + /* parent pointer ioctls */ XFS_CHECK_STRUCT_SIZE(struct xfs_getparents_rec, 32); XFS_CHECK_STRUCT_SIZE(struct xfs_getparents, 40); @@ -201,6 +227,70 @@ xfs_check_ondisk_structs(void) XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MIN << XFS_DQ_BIGTIME_SHIFT, 4); XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MAX << XFS_DQ_BIGTIME_SHIFT, 16299260424LL); + + /* superblock field checks we got from xfs/122 */ + XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 288); + XFS_CHECK_STRUCT_SIZE(struct xfs_sb, 288); + XFS_CHECK_SB_OFFSET(sb_magicnum, 0); + XFS_CHECK_SB_OFFSET(sb_blocksize, 4); + XFS_CHECK_SB_OFFSET(sb_dblocks, 8); + XFS_CHECK_SB_OFFSET(sb_rblocks, 16); + XFS_CHECK_SB_OFFSET(sb_rextents, 24); + XFS_CHECK_SB_OFFSET(sb_uuid, 32); + XFS_CHECK_SB_OFFSET(sb_logstart, 48); + XFS_CHECK_SB_OFFSET(sb_rootino, 56); + XFS_CHECK_SB_OFFSET(sb_rbmino, 64); + XFS_CHECK_SB_OFFSET(sb_rsumino, 72); + XFS_CHECK_SB_OFFSET(sb_rextsize, 80); + XFS_CHECK_SB_OFFSET(sb_agblocks, 84); + XFS_CHECK_SB_OFFSET(sb_agcount, 88); + XFS_CHECK_SB_OFFSET(sb_rbmblocks, 92); + XFS_CHECK_SB_OFFSET(sb_logblocks, 96); + XFS_CHECK_SB_OFFSET(sb_versionnum, 100); + XFS_CHECK_SB_OFFSET(sb_sectsize, 102); + XFS_CHECK_SB_OFFSET(sb_inodesize, 104); + XFS_CHECK_SB_OFFSET(sb_inopblock, 106); + XFS_CHECK_SB_OFFSET(sb_blocklog, 120); + XFS_CHECK_SB_OFFSET(sb_fname[12], 120); + XFS_CHECK_SB_OFFSET(sb_sectlog, 121); + XFS_CHECK_SB_OFFSET(sb_inodelog, 122); + XFS_CHECK_SB_OFFSET(sb_inopblog, 123); + XFS_CHECK_SB_OFFSET(sb_agblklog, 124); + XFS_CHECK_SB_OFFSET(sb_rextslog, 125); + XFS_CHECK_SB_OFFSET(sb_inprogress, 126); + XFS_CHECK_SB_OFFSET(sb_imax_pct, 127); + XFS_CHECK_SB_OFFSET(sb_icount, 128); + XFS_CHECK_SB_OFFSET(sb_ifree, 136); + XFS_CHECK_SB_OFFSET(sb_fdblocks, 144); + XFS_CHECK_SB_OFFSET(sb_frextents, 152); + XFS_CHECK_SB_OFFSET(sb_uquotino, 160); + XFS_CHECK_SB_OFFSET(sb_gquotino, 168); + XFS_CHECK_SB_OFFSET(sb_qflags, 176); + XFS_CHECK_SB_OFFSET(sb_flags, 178); + XFS_CHECK_SB_OFFSET(sb_shared_vn, 179); + XFS_CHECK_SB_OFFSET(sb_inoalignmt, 180); + XFS_CHECK_SB_OFFSET(sb_unit, 184); + XFS_CHECK_SB_OFFSET(sb_width, 188); + XFS_CHECK_SB_OFFSET(sb_dirblklog, 192); + XFS_CHECK_SB_OFFSET(sb_logsectlog, 193); + XFS_CHECK_SB_OFFSET(sb_logsectsize, 194); + XFS_CHECK_SB_OFFSET(sb_logsunit, 196); + XFS_CHECK_SB_OFFSET(sb_features2, 200); + XFS_CHECK_SB_OFFSET(sb_bad_features2, 204); + XFS_CHECK_SB_OFFSET(sb_features_compat, 208); + XFS_CHECK_SB_OFFSET(sb_features_ro_compat, 212); + XFS_CHECK_SB_OFFSET(sb_features_incompat, 216); + XFS_CHECK_SB_OFFSET(sb_features_log_incompat, 220); + XFS_CHECK_SB_OFFSET(sb_crc, 224); + XFS_CHECK_SB_OFFSET(sb_spino_align, 228); + XFS_CHECK_SB_OFFSET(sb_pquotino, 232); + XFS_CHECK_SB_OFFSET(sb_lsn, 240); + XFS_CHECK_SB_OFFSET(sb_meta_uuid, 248); + XFS_CHECK_SB_OFFSET(sb_metadirino, 264); + XFS_CHECK_SB_OFFSET(sb_rgcount, 272); + XFS_CHECK_SB_OFFSET(sb_rgextents, 276); + XFS_CHECK_SB_OFFSET(sb_rgblklog, 280); + XFS_CHECK_SB_OFFSET(sb_pad, 281); } #endif /* __XFS_ONDISK_H */ diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index fb05f44f6c75..763d941a8420 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -143,4 +143,47 @@ time64_t xfs_dquot_from_disk_ts(struct xfs_disk_dquot *ddq, __be32 dtimer); __be32 xfs_dquot_to_disk_ts(struct xfs_dquot *ddq, time64_t timer); +static inline const char * +xfs_dqinode_path(xfs_dqtype_t type) +{ + switch (type) { + case XFS_DQTYPE_USER: + return "user"; + case XFS_DQTYPE_GROUP: + return "group"; + case XFS_DQTYPE_PROJ: + return "project"; + } + + ASSERT(0); + return NULL; +} + +static inline enum xfs_metafile_type +xfs_dqinode_metafile_type(xfs_dqtype_t type) +{ + switch (type) { + case XFS_DQTYPE_USER: + return XFS_METAFILE_USRQUOTA; + case XFS_DQTYPE_GROUP: + return XFS_METAFILE_GRPQUOTA; + case XFS_DQTYPE_PROJ: + return XFS_METAFILE_PRJQUOTA; + } + + ASSERT(0); + return XFS_METAFILE_UNKNOWN; +} + +unsigned int xfs_dqinode_sick_mask(xfs_dqtype_t type); + +int xfs_dqinode_load(struct xfs_trans *tp, struct xfs_inode *dp, + xfs_dqtype_t type, struct xfs_inode **ipp); +int xfs_dqinode_metadir_create(struct xfs_inode *dp, xfs_dqtype_t type, + struct xfs_inode **ipp); +int xfs_dqinode_metadir_link(struct xfs_inode *dp, xfs_dqtype_t type, + struct xfs_inode *ip); +int xfs_dqinode_mkdir_parent(struct xfs_mount *mp, struct xfs_inode **dpp); +int xfs_dqinode_load_parent(struct xfs_trans *tp, struct xfs_inode **dpp); + #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 198b84117df1..2dbab68b4fe6 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -154,7 +154,7 @@ xfs_refcount_complain_bad_rec( xfs_warn(mp, "Refcount BTree record corruption in AG %d detected at %pS!", - cur->bc_ag.pag->pag_agno, fa); + cur->bc_group->xg_gno, fa); xfs_warn(mp, "Start block 0x%x, block count 0x%x, references 0x%x", irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); @@ -180,7 +180,7 @@ xfs_refcount_get_rec( return error; xfs_refcount_btrec_to_irec(rec, irec); - fa = xfs_refcount_check_irec(cur->bc_ag.pag, irec); + fa = xfs_refcount_check_irec(to_perag(cur->bc_group), irec); if (fa) return xfs_refcount_complain_bad_rec(cur, fa, irec); @@ -1154,8 +1154,7 @@ xfs_refcount_adjust_extents( goto out_error; } } else { - fsbno = XFS_AGB_TO_FSB(cur->bc_mp, - cur->bc_ag.pag->pag_agno, + fsbno = xfs_agbno_to_fsb(to_perag(cur->bc_group), tmp.rc_startblock); error = xfs_free_extent_later(cur->bc_tp, fsbno, tmp.rc_blockcount, NULL, @@ -1217,8 +1216,7 @@ xfs_refcount_adjust_extents( } goto advloop; } else { - fsbno = XFS_AGB_TO_FSB(cur->bc_mp, - cur->bc_ag.pag->pag_agno, + fsbno = xfs_agbno_to_fsb(to_perag(cur->bc_group), ext.rc_startblock); error = xfs_free_extent_later(cur->bc_tp, fsbno, ext.rc_blockcount, NULL, @@ -1312,7 +1310,7 @@ xfs_refcount_continue_op( xfs_agblock_t new_agbno) { struct xfs_mount *mp = cur->bc_mp; - struct xfs_perag *pag = cur->bc_ag.pag; + struct xfs_perag *pag = to_perag(cur->bc_group); if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, ri->ri_blockcount))) { @@ -1320,10 +1318,10 @@ xfs_refcount_continue_op( return -EFSCORRUPTED; } - ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + ri->ri_startblock = xfs_agbno_to_fsb(pag, new_agbno); ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount)); - ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); + ASSERT(pag_agno(pag) == XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); return 0; } @@ -1360,7 +1358,7 @@ xfs_refcount_finish_one( * If we haven't gotten a cursor or the cursor AG doesn't match * the startblock, get one now. */ - if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) { + if (rcur != NULL && rcur->bc_group != ri->ri_group) { nr_ops = rcur->bc_refc.nr_ops; shape_changes = rcur->bc_refc.shape_changes; xfs_btree_del_cursor(rcur, 0); @@ -1368,13 +1366,14 @@ xfs_refcount_finish_one( *pcur = NULL; } if (rcur == NULL) { - error = xfs_alloc_read_agf(ri->ri_pag, tp, + struct xfs_perag *pag = to_perag(ri->ri_group); + + error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_FREEING, &agbp); if (error) return error; - *pcur = rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, - ri->ri_pag); + *pcur = rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); rcur->bc_refc.nr_ops = nr_ops; rcur->bc_refc.shape_changes = shape_changes; } @@ -1880,7 +1879,8 @@ xfs_refcount_recover_extent( INIT_LIST_HEAD(&rr->rr_list); xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); - if (xfs_refcount_check_irec(cur->bc_ag.pag, &rr->rr_rrec) != NULL || + if (xfs_refcount_check_irec(to_perag(cur->bc_group), &rr->rr_rrec) != + NULL || XFS_IS_CORRUPT(cur->bc_mp, rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { xfs_btree_mark_sick(cur); @@ -1956,8 +1956,7 @@ xfs_refcount_recover_cow_leftovers( goto out_free; /* Free the orphan record */ - fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, - rr->rr_rrec.rc_startblock); + fsb = xfs_agbno_to_fsb(pag, rr->rr_rrec.rc_startblock); xfs_refcount_free_cow_extent(tp, fsb, rr->rr_rrec.rc_blockcount); @@ -2029,7 +2028,7 @@ xfs_refcount_query_range_helper( xfs_failaddr_t fa; xfs_refcount_btrec_to_irec(rec, &irec); - fa = xfs_refcount_check_irec(cur->bc_ag.pag, &irec); + fa = xfs_refcount_check_irec(to_perag(cur->bc_group), &irec); if (fa) return xfs_refcount_complain_bad_rec(cur, fa, &irec); diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 68acb0b1b4a8..62d78afcf1f3 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -56,7 +56,7 @@ enum xfs_refcount_intent_type { struct xfs_refcount_intent { struct list_head ri_list; - struct xfs_perag *ri_pag; + struct xfs_group *ri_group; enum xfs_refcount_intent_type ri_type; xfs_extlen_t ri_blockcount; xfs_fsblock_t ri_startblock; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 795928d1a66d..54505fee1852 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -30,7 +30,7 @@ xfs_refcountbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_refcountbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_ag.agbp, cur->bc_ag.pag); + cur->bc_ag.agbp, to_perag(cur->bc_group)); } STATIC void @@ -68,21 +68,20 @@ xfs_refcountbt_alloc_block( memset(&args, 0, sizeof(args)); args.tp = cur->bc_tp; args.mp = cur->bc_mp; - args.pag = cur->bc_ag.pag; + args.pag = to_perag(cur->bc_group); args.oinfo = XFS_RMAP_OINFO_REFC; args.minlen = args.maxlen = args.prod = 1; args.resv = XFS_AG_RESV_METADATA; error = xfs_alloc_vextent_near_bno(&args, - XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno, - xfs_refc_block(args.mp))); + xfs_agbno_to_fsb(args.pag, xfs_refc_block(args.mp))); if (error) goto out_error; if (args.fsbno == NULLFSBLOCK) { *stat = 0; return 0; } - ASSERT(args.agno == cur->bc_ag.pag->pag_agno); + ASSERT(args.agno == cur->bc_group->xg_gno); ASSERT(args.len == 1); new->s = cpu_to_be32(args.agbno); @@ -170,7 +169,7 @@ xfs_refcountbt_init_ptr_from_cur( { struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agf->agf_seqno)); ptr->s = agf->agf_refcount_root; } @@ -362,11 +361,11 @@ xfs_refcountbt_init_cursor( { struct xfs_btree_cur *cur; - ASSERT(pag->pag_agno < mp->m_sb.sb_agcount); + ASSERT(pag_agno(pag) < mp->m_sb.sb_agcount); cur = xfs_btree_alloc_cursor(mp, tp, &xfs_refcountbt_ops, mp->m_refc_maxlevels, xfs_refcountbt_cur_cache); - cur->bc_ag.pag = xfs_perag_hold(pag); + cur->bc_group = xfs_group_hold(pag_group(pag)); cur->bc_refc.nr_ops = 0; cur->bc_refc.shape_changes = 0; cur->bc_ag.agbp = agbp; @@ -515,7 +514,7 @@ xfs_refcountbt_calc_reserves( * never be available for the kinds of things that would require btree * expansion. We therefore can pretend the space isn't there. */ - if (xfs_ag_contains_log(mp, pag->pag_agno)) + if (xfs_ag_contains_log(mp, pag_agno(pag))) agblocks -= mp->m_sb.sb_logblocks; *ask += xfs_refcountbt_max_size(mp, agblocks); diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 6ef4687b3aba..d0df68dc3131 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -213,7 +213,7 @@ xfs_rmap_check_irec( struct xfs_perag *pag, const struct xfs_rmap_irec *irec) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); bool is_inode; bool is_unwritten; bool is_bmbt; @@ -269,9 +269,7 @@ xfs_rmap_check_btrec( struct xfs_btree_cur *cur, const struct xfs_rmap_irec *irec) { - if (xfs_btree_is_mem_rmap(cur->bc_ops)) - return xfs_rmap_check_irec(cur->bc_mem.pag, irec); - return xfs_rmap_check_irec(cur->bc_ag.pag, irec); + return xfs_rmap_check_irec(to_perag(cur->bc_group), irec); } static inline int @@ -288,7 +286,7 @@ xfs_rmap_complain_bad_rec( else xfs_warn(mp, "Reverse Mapping BTree record corruption in AG %d detected at %pS!", - cur->bc_ag.pag->pag_agno, fa); + cur->bc_group->xg_gno, fa); xfs_warn(mp, "Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x", irec->rm_owner, irec->rm_flags, irec->rm_startblock, @@ -835,7 +833,7 @@ xfs_rmap_hook_enable(void) static inline void xfs_rmap_update_hook( struct xfs_trans *tp, - struct xfs_perag *pag, + struct xfs_group *xg, enum xfs_rmap_intent_type op, xfs_agblock_t startblock, xfs_extlen_t blockcount, @@ -850,27 +848,27 @@ xfs_rmap_update_hook( .oinfo = *oinfo, /* struct copy */ }; - if (pag) - xfs_hooks_call(&pag->pag_rmap_update_hooks, op, &p); + if (xg) + xfs_hooks_call(&xg->xg_rmap_update_hooks, op, &p); } } /* Call the specified function during a reverse mapping update. */ int xfs_rmap_hook_add( - struct xfs_perag *pag, + struct xfs_group *xg, struct xfs_rmap_hook *hook) { - return xfs_hooks_add(&pag->pag_rmap_update_hooks, &hook->rmap_hook); + return xfs_hooks_add(&xg->xg_rmap_update_hooks, &hook->rmap_hook); } /* Stop calling the specified function during a reverse mapping update. */ void xfs_rmap_hook_del( - struct xfs_perag *pag, + struct xfs_group *xg, struct xfs_rmap_hook *hook) { - xfs_hooks_del(&pag->pag_rmap_update_hooks, &hook->rmap_hook); + xfs_hooks_del(&xg->xg_rmap_update_hooks, &hook->rmap_hook); } /* Configure rmap update hook functions. */ @@ -905,7 +903,8 @@ xfs_rmap_free( return 0; cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); - xfs_rmap_update_hook(tp, pag, XFS_RMAP_UNMAP, bno, len, false, oinfo); + xfs_rmap_update_hook(tp, pag_group(pag), XFS_RMAP_UNMAP, bno, len, + false, oinfo); error = xfs_rmap_unmap(cur, bno, len, false, oinfo); xfs_btree_del_cursor(cur, error); @@ -1149,7 +1148,8 @@ xfs_rmap_alloc( return 0; cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); - xfs_rmap_update_hook(tp, pag, XFS_RMAP_MAP, bno, len, false, oinfo); + xfs_rmap_update_hook(tp, pag_group(pag), XFS_RMAP_MAP, bno, len, false, + oinfo); error = xfs_rmap_map(cur, bno, len, false, oinfo); xfs_btree_del_cursor(cur, error); @@ -2586,28 +2586,30 @@ xfs_rmap_finish_one( * If we haven't gotten a cursor or the cursor AG doesn't match * the startblock, get one now. */ - if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) { + if (rcur != NULL && rcur->bc_group != ri->ri_group) { xfs_btree_del_cursor(rcur, 0); rcur = NULL; *pcur = NULL; } if (rcur == NULL) { + struct xfs_perag *pag = to_perag(ri->ri_group); + /* * Refresh the freelist before we start changing the * rmapbt, because a shape change could cause us to * allocate blocks. */ - error = xfs_free_extent_fix_freelist(tp, ri->ri_pag, &agbp); + error = xfs_free_extent_fix_freelist(tp, pag, &agbp); if (error) { - xfs_ag_mark_sick(ri->ri_pag, XFS_SICK_AG_AGFL); + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL); return error; } if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) { - xfs_ag_mark_sick(ri->ri_pag, XFS_SICK_AG_AGFL); + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL); return -EFSCORRUPTED; } - *pcur = rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, ri->ri_pag); + *pcur = rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); } xfs_rmap_ino_owner(&oinfo, ri->ri_owner, ri->ri_whichfork, @@ -2620,7 +2622,7 @@ xfs_rmap_finish_one( if (error) return error; - xfs_rmap_update_hook(tp, ri->ri_pag, ri->ri_type, bno, + xfs_rmap_update_hook(tp, ri->ri_group, ri->ri_type, bno, ri->ri_bmap.br_blockcount, unwritten, &oinfo); return 0; } diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index b783dd4dd95d..96b4321d8310 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -173,7 +173,7 @@ struct xfs_rmap_intent { int ri_whichfork; uint64_t ri_owner; struct xfs_bmbt_irec ri_bmap; - struct xfs_perag *ri_pag; + struct xfs_group *ri_group; }; /* functions for updating the rmapbt based on bmbt map/unmap operations */ @@ -264,8 +264,8 @@ struct xfs_rmap_hook { void xfs_rmap_hook_disable(void); void xfs_rmap_hook_enable(void); -int xfs_rmap_hook_add(struct xfs_perag *pag, struct xfs_rmap_hook *hook); -void xfs_rmap_hook_del(struct xfs_perag *pag, struct xfs_rmap_hook *hook); +int xfs_rmap_hook_add(struct xfs_group *xg, struct xfs_rmap_hook *hook); +void xfs_rmap_hook_del(struct xfs_group *xg, struct xfs_rmap_hook *hook); void xfs_rmap_hook_setup(struct xfs_rmap_hook *hook, notifier_fn_t mod_fn); #endif diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index ac2f1f499b76..2cab694ac58a 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -57,7 +57,7 @@ xfs_rmapbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_ag.agbp, cur->bc_ag.pag); + cur->bc_ag.agbp, to_perag(cur->bc_group)); } STATIC void @@ -66,14 +66,15 @@ xfs_rmapbt_set_root( const union xfs_btree_ptr *ptr, int inc) { - struct xfs_buf *agbp = cur->bc_ag.agbp; - struct xfs_agf *agf = agbp->b_addr; + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; + struct xfs_perag *pag = to_perag(cur->bc_group); ASSERT(ptr->s != 0); agf->agf_rmap_root = ptr->s; be32_add_cpu(&agf->agf_rmap_level, inc); - cur->bc_ag.pag->pagf_rmap_level += inc; + pag->pagf_rmap_level += inc; xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); } @@ -87,7 +88,7 @@ xfs_rmapbt_alloc_block( { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; - struct xfs_perag *pag = cur->bc_ag.pag; + struct xfs_perag *pag = to_perag(cur->bc_group); struct xfs_alloc_arg args = { .len = 1 }; int error; xfs_agblock_t bno; @@ -102,7 +103,7 @@ xfs_rmapbt_alloc_block( return 0; } - xfs_extent_busy_reuse(cur->bc_mp, pag, bno, 1, false); + xfs_extent_busy_reuse(pag_group(pag), bno, 1, false); new->s = cpu_to_be32(bno); be32_add_cpu(&agf->agf_rmap_blocks, 1); @@ -125,7 +126,7 @@ xfs_rmapbt_free_block( { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; - struct xfs_perag *pag = cur->bc_ag.pag; + struct xfs_perag *pag = to_perag(cur->bc_group); xfs_agblock_t bno; int error; @@ -136,7 +137,7 @@ xfs_rmapbt_free_block( if (error) return error; - xfs_extent_busy_insert(cur->bc_tp, pag, bno, 1, + xfs_extent_busy_insert(cur->bc_tp, pag_group(pag), bno, 1, XFS_EXTENT_BUSY_SKIP_DISCARD); xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1); @@ -227,7 +228,7 @@ xfs_rmapbt_init_ptr_from_cur( { struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agf->agf_seqno)); ptr->s = agf->agf_rmap_root; } @@ -538,7 +539,7 @@ xfs_rmapbt_init_cursor( cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_ops, mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache); - cur->bc_ag.pag = xfs_perag_hold(pag); + cur->bc_group = xfs_group_hold(pag_group(pag)); cur->bc_ag.agbp = agbp; if (agbp) { struct xfs_agf *agf = agbp->b_addr; @@ -647,14 +648,13 @@ xfs_rmapbt_mem_cursor( struct xfbtree *xfbt) { struct xfs_btree_cur *cur; - struct xfs_mount *mp = pag->pag_mount; - cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_mem_ops, + cur = xfs_btree_alloc_cursor(pag_mount(pag), tp, &xfs_rmapbt_mem_ops, xfs_rmapbt_maxlevels_ondisk(), xfs_rmapbt_cur_cache); cur->bc_mem.xfbtree = xfbt; cur->bc_nlevels = xfbt->nlevels; - cur->bc_mem.pag = xfs_perag_hold(pag); + cur->bc_group = xfs_group_hold(pag_group(pag)); return cur; } @@ -863,7 +863,7 @@ xfs_rmapbt_calc_reserves( * never be available for the kinds of things that would require btree * expansion. We therefore can pretend the space isn't there. */ - if (xfs_ag_contains_log(mp, pag->pag_agno)) + if (xfs_ag_contains_log(mp, pag_agno(pag))) agblocks -= mp->m_sb.sb_logblocks; /* Reserve 1% of the AG or enough for 1 block per record. */ diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 27a4472402ba..4ddfb7e395b3 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -20,28 +20,87 @@ #include "xfs_error.h" #include "xfs_rtbitmap.h" #include "xfs_health.h" +#include "xfs_sb.h" +#include "xfs_errortag.h" +#include "xfs_log.h" +#include "xfs_buf_item.h" +#include "xfs_extent_busy.h" /* * Realtime allocator bitmap functions shared with userspace. */ -/* - * Real time buffers need verifiers to avoid runtime warnings during IO. - * We don't have anything to verify, however, so these are just dummy - * operations. - */ +static xfs_failaddr_t +xfs_rtbuf_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_mount; + struct xfs_rtbuf_blkinfo *hdr = bp->b_addr; + + if (!xfs_verify_magic(bp, hdr->rt_magic)) + return __this_address; + if (!xfs_has_rtgroups(mp)) + return __this_address; + if (!xfs_has_crc(mp)) + return __this_address; + if (!uuid_equal(&hdr->rt_uuid, &mp->m_sb.sb_meta_uuid)) + return __this_address; + if (hdr->rt_blkno != cpu_to_be64(xfs_buf_daddr(bp))) + return __this_address; + return NULL; +} + static void xfs_rtbuf_verify_read( - struct xfs_buf *bp) + struct xfs_buf *bp) { + struct xfs_mount *mp = bp->b_mount; + struct xfs_rtbuf_blkinfo *hdr = bp->b_addr; + xfs_failaddr_t fa; + + if (!xfs_has_rtgroups(mp)) + return; + + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr->rt_lsn))) { + fa = __this_address; + goto fail; + } + + if (!xfs_buf_verify_cksum(bp, XFS_RTBUF_CRC_OFF)) { + fa = __this_address; + goto fail; + } + + fa = xfs_rtbuf_verify(bp); + if (fa) + goto fail; + return; +fail: + xfs_verifier_error(bp, -EFSCORRUPTED, fa); } static void xfs_rtbuf_verify_write( struct xfs_buf *bp) { - return; + struct xfs_mount *mp = bp->b_mount; + struct xfs_rtbuf_blkinfo *hdr = bp->b_addr; + struct xfs_buf_log_item *bip = bp->b_log_item; + xfs_failaddr_t fa; + + if (!xfs_has_rtgroups(mp)) + return; + + fa = xfs_rtbuf_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + return; + } + + if (bip) + hdr->rt_lsn = cpu_to_be64(bip->bli_item.li_lsn); + xfs_buf_update_cksum(bp, XFS_RTBUF_CRC_OFF); } const struct xfs_buf_ops xfs_rtbuf_ops = { @@ -50,6 +109,22 @@ const struct xfs_buf_ops xfs_rtbuf_ops = { .verify_write = xfs_rtbuf_verify_write, }; +const struct xfs_buf_ops xfs_rtbitmap_buf_ops = { + .name = "xfs_rtbitmap", + .magic = { 0, cpu_to_be32(XFS_RTBITMAP_MAGIC) }, + .verify_read = xfs_rtbuf_verify_read, + .verify_write = xfs_rtbuf_verify_write, + .verify_struct = xfs_rtbuf_verify, +}; + +const struct xfs_buf_ops xfs_rtsummary_buf_ops = { + .name = "xfs_rtsummary", + .magic = { 0, cpu_to_be32(XFS_RTSUMMARY_MAGIC) }, + .verify_read = xfs_rtbuf_verify_read, + .verify_write = xfs_rtbuf_verify_write, + .verify_struct = xfs_rtbuf_verify, +}; + /* Release cached rt bitmap and summary buffers. */ void xfs_rtbuf_cache_relse( @@ -75,28 +150,31 @@ static int xfs_rtbuf_get( struct xfs_rtalloc_args *args, xfs_fileoff_t block, /* block number in bitmap or summary */ - int issum) /* is summary not bitmap */ + enum xfs_rtg_inodes type) { + struct xfs_inode *ip = args->rtg->rtg_inodes[type]; struct xfs_mount *mp = args->mp; struct xfs_buf **cbpp; /* cached block buffer */ xfs_fileoff_t *coffp; /* cached block number */ struct xfs_buf *bp; /* block buffer, result */ - struct xfs_inode *ip; /* bitmap or summary inode */ struct xfs_bmbt_irec map; - enum xfs_blft type; + enum xfs_blft buf_type; int nmap = 1; int error; - if (issum) { + switch (type) { + case XFS_RTGI_SUMMARY: cbpp = &args->sumbp; coffp = &args->sumoff; - ip = mp->m_rsumip; - type = XFS_BLFT_RTSUMMARY_BUF; - } else { + buf_type = XFS_BLFT_RTSUMMARY_BUF; + break; + case XFS_RTGI_BITMAP: cbpp = &args->rbmbp; coffp = &args->rbmoff; - ip = mp->m_rbmip; - type = XFS_BLFT_RTBITMAP_BUF; + buf_type = XFS_BLFT_RTBITMAP_BUF; + break; + default: + return -EINVAL; } /* @@ -119,22 +197,32 @@ xfs_rtbuf_get( return error; if (XFS_IS_CORRUPT(mp, nmap == 0 || !xfs_bmap_is_written_extent(&map))) { - xfs_rt_mark_sick(mp, issum ? XFS_SICK_RT_SUMMARY : - XFS_SICK_RT_BITMAP); + xfs_rtginode_mark_sick(args->rtg, type); return -EFSCORRUPTED; } ASSERT(map.br_startblock != NULLFSBLOCK); error = xfs_trans_read_buf(mp, args->tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, map.br_startblock), - mp->m_bsize, 0, &bp, &xfs_rtbuf_ops); + mp->m_bsize, 0, &bp, + xfs_rtblock_ops(mp, type)); if (xfs_metadata_is_sick(error)) - xfs_rt_mark_sick(mp, issum ? XFS_SICK_RT_SUMMARY : - XFS_SICK_RT_BITMAP); + xfs_rtginode_mark_sick(args->rtg, type); if (error) return error; - xfs_trans_buf_set_type(args->tp, bp, type); + if (xfs_has_rtgroups(mp)) { + struct xfs_rtbuf_blkinfo *hdr = bp->b_addr; + + if (hdr->rt_owner != cpu_to_be64(ip->i_ino)) { + xfs_buf_mark_corrupt(bp); + xfs_trans_brelse(args->tp, bp); + xfs_rtginode_mark_sick(args->rtg, type); + return -EFSCORRUPTED; + } + } + + xfs_trans_buf_set_type(args->tp, bp, buf_type); *cbpp = bp; *coffp = block; return 0; @@ -148,11 +236,11 @@ xfs_rtbitmap_read_buf( struct xfs_mount *mp = args->mp; if (XFS_IS_CORRUPT(mp, block >= mp->m_sb.sb_rbmblocks)) { - xfs_rt_mark_sick(mp, XFS_SICK_RT_BITMAP); + xfs_rtginode_mark_sick(args->rtg, XFS_RTGI_BITMAP); return -EFSCORRUPTED; } - return xfs_rtbuf_get(args, block, 0); + return xfs_rtbuf_get(args, block, XFS_RTGI_BITMAP); } int @@ -163,10 +251,10 @@ xfs_rtsummary_read_buf( struct xfs_mount *mp = args->mp; if (XFS_IS_CORRUPT(mp, block >= mp->m_rsumblocks)) { - xfs_rt_mark_sick(args->mp, XFS_SICK_RT_SUMMARY); + xfs_rtginode_mark_sick(args->rtg, XFS_RTGI_SUMMARY); return -EFSCORRUPTED; } - return xfs_rtbuf_get(args, block, 1); + return xfs_rtbuf_get(args, block, XFS_RTGI_SUMMARY); } /* @@ -503,6 +591,7 @@ xfs_rtmodify_summary( { struct xfs_mount *mp = args->mp; xfs_rtsumoff_t so = xfs_rtsumoffs(mp, log, bbno); + uint8_t *rsum_cache = args->rtg->rtg_rsum_cache; unsigned int infoword; xfs_suminfo_t val; int error; @@ -514,11 +603,11 @@ xfs_rtmodify_summary( infoword = xfs_rtsumoffs_to_infoword(mp, so); val = xfs_suminfo_add(args, infoword, delta); - if (mp->m_rsum_cache) { - if (val == 0 && log + 1 == mp->m_rsum_cache[bbno]) - mp->m_rsum_cache[bbno] = log; - if (val != 0 && log >= mp->m_rsum_cache[bbno]) - mp->m_rsum_cache[bbno] = log + 1; + if (rsum_cache) { + if (val == 0 && log + 1 == rsum_cache[bbno]) + rsum_cache[bbno] = log; + if (val != 0 && log >= rsum_cache[bbno]) + rsum_cache[bbno] = log + 1; } xfs_trans_log_rtsummary(args, infoword); @@ -737,7 +826,7 @@ xfs_rtfree_range( /* * Find the next allocated block (end of allocated extent). */ - error = xfs_rtfind_forw(args, end, mp->m_sb.sb_rextents - 1, + error = xfs_rtfind_forw(args, end, args->rtg->rtg_extents - 1, &postblock); if (error) return error; @@ -961,19 +1050,25 @@ xfs_rtcheck_alloc_range( int xfs_rtfree_extent( struct xfs_trans *tp, /* transaction pointer */ + struct xfs_rtgroup *rtg, xfs_rtxnum_t start, /* starting rtext number to free */ xfs_rtxlen_t len) /* length of extent freed */ { struct xfs_mount *mp = tp->t_mountp; + struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP]; struct xfs_rtalloc_args args = { .mp = mp, .tp = tp, + .rtg = rtg, }; int error; struct timespec64 atime; - ASSERT(mp->m_rbmip->i_itemp != NULL); - xfs_assert_ilocked(mp->m_rbmip, XFS_ILOCK_EXCL); + ASSERT(rbmip->i_itemp != NULL); + xfs_assert_ilocked(rbmip, XFS_ILOCK_EXCL); + + if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FREE_EXTENT)) + return -EIO; error = xfs_rtcheck_alloc_range(&args, start, len); if (error) @@ -990,19 +1085,21 @@ xfs_rtfree_extent( * Mark more blocks free in the superblock. */ xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len); + /* * If we've now freed all the blocks, reset the file sequence - * number to 0. + * number to 0 for pre-RTG file systems. */ - if (tp->t_frextents_delta + mp->m_sb.sb_frextents == + if (!xfs_has_rtgroups(mp) && + tp->t_frextents_delta + mp->m_sb.sb_frextents == mp->m_sb.sb_rextents) { - if (!(mp->m_rbmip->i_diflags & XFS_DIFLAG_NEWRTBM)) - mp->m_rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM; + if (!(rbmip->i_diflags & XFS_DIFLAG_NEWRTBM)) + rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM; - atime = inode_get_atime(VFS_I(mp->m_rbmip)); + atime = inode_get_atime(VFS_I(rbmip)); atime.tv_sec = 0; - inode_set_atime_to_ts(VFS_I(mp->m_rbmip), atime); - xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); + inode_set_atime_to_ts(VFS_I(rbmip), atime); + xfs_trans_log_inode(tp, rbmip, XFS_ILOG_CORE); } error = 0; out: @@ -1018,15 +1115,17 @@ out: int xfs_rtfree_blocks( struct xfs_trans *tp, + struct xfs_rtgroup *rtg, xfs_fsblock_t rtbno, xfs_filblks_t rtlen) { struct xfs_mount *mp = tp->t_mountp; xfs_extlen_t mod; + int error; ASSERT(rtlen <= XFS_MAX_BMBT_EXTLEN); - mod = xfs_rtb_to_rtxoff(mp, rtlen); + mod = xfs_blen_to_rtxoff(mp, rtlen); if (mod) { ASSERT(mod == 0); return -EIO; @@ -1038,21 +1137,31 @@ xfs_rtfree_blocks( return -EIO; } - return xfs_rtfree_extent(tp, xfs_rtb_to_rtx(mp, rtbno), - xfs_rtb_to_rtx(mp, rtlen)); + error = xfs_rtfree_extent(tp, rtg, xfs_rtb_to_rtx(mp, rtbno), + xfs_extlen_to_rtxlen(mp, rtlen)); + if (error) + return error; + + if (xfs_has_rtgroups(mp)) + xfs_extent_busy_insert(tp, rtg_group(rtg), + xfs_rtb_to_rgbno(mp, rtbno), rtlen, 0); + + return 0; } /* Find all the free records within a given range. */ int xfs_rtalloc_query_range( - struct xfs_mount *mp, + struct xfs_rtgroup *rtg, struct xfs_trans *tp, xfs_rtxnum_t start, xfs_rtxnum_t end, xfs_rtalloc_query_range_fn fn, void *priv) { + struct xfs_mount *mp = rtg_mount(rtg); struct xfs_rtalloc_args args = { + .rtg = rtg, .mp = mp, .tp = tp, }; @@ -1060,10 +1169,10 @@ xfs_rtalloc_query_range( if (start > end) return -EINVAL; - if (start == end || start >= mp->m_sb.sb_rextents) + if (start == end || start >= rtg->rtg_extents) return 0; - end = min(end, mp->m_sb.sb_rextents - 1); + end = min(end, rtg->rtg_extents - 1); /* Iterate the bitmap, looking for discrepancies. */ while (start <= end) { @@ -1086,7 +1195,7 @@ xfs_rtalloc_query_range( rec.ar_startext = start; rec.ar_extcount = rtend - start + 1; - error = fn(mp, tp, &rec, priv); + error = fn(rtg, tp, &rec, priv); if (error) break; } @@ -1101,26 +1210,27 @@ xfs_rtalloc_query_range( /* Find all the free records. */ int xfs_rtalloc_query_all( - struct xfs_mount *mp, + struct xfs_rtgroup *rtg, struct xfs_trans *tp, xfs_rtalloc_query_range_fn fn, void *priv) { - return xfs_rtalloc_query_range(mp, tp, 0, mp->m_sb.sb_rextents - 1, fn, + return xfs_rtalloc_query_range(rtg, tp, 0, rtg->rtg_extents - 1, fn, priv); } /* Is the given extent all free? */ int xfs_rtalloc_extent_is_free( - struct xfs_mount *mp, + struct xfs_rtgroup *rtg, struct xfs_trans *tp, xfs_rtxnum_t start, xfs_rtxlen_t len, bool *is_free) { struct xfs_rtalloc_args args = { - .mp = mp, + .mp = rtg_mount(rtg), + .rtg = rtg, .tp = tp, }; xfs_rtxnum_t end; @@ -1136,88 +1246,71 @@ xfs_rtalloc_extent_is_free( return 0; } +/* Compute the number of rt extents tracked by a single bitmap block. */ +xfs_rtxnum_t +xfs_rtbitmap_rtx_per_rbmblock( + struct xfs_mount *mp) +{ + unsigned int rbmblock_bytes = mp->m_sb.sb_blocksize; + + if (xfs_has_rtgroups(mp)) + rbmblock_bytes -= sizeof(struct xfs_rtbuf_blkinfo); + + return rbmblock_bytes * NBBY; +} + /* * Compute the number of rtbitmap blocks needed to track the given number of rt * extents. */ xfs_filblks_t -xfs_rtbitmap_blockcount( +xfs_rtbitmap_blockcount_len( struct xfs_mount *mp, xfs_rtbxlen_t rtextents) { - return howmany_64(rtextents, NBBY * mp->m_sb.sb_blocksize); + return howmany_64(rtextents, xfs_rtbitmap_rtx_per_rbmblock(mp)); } -/* Compute the number of rtsummary blocks needed to track the given rt space. */ -xfs_filblks_t -xfs_rtsummary_blockcount( - struct xfs_mount *mp, - unsigned int rsumlevels, - xfs_extlen_t rbmblocks) +/* How many rt extents does each rtbitmap file track? */ +static inline xfs_rtbxlen_t +xfs_rtbitmap_bitcount( + struct xfs_mount *mp) { - unsigned long long rsumwords; + if (!mp->m_sb.sb_rextents) + return 0; - rsumwords = (unsigned long long)rsumlevels * rbmblocks; - return XFS_B_TO_FSB(mp, rsumwords << XFS_WORDLOG); -} + /* rtgroup size can be nonzero even if rextents is zero */ + if (xfs_has_rtgroups(mp)) + return mp->m_sb.sb_rgextents; -/* Lock both realtime free space metadata inodes for a freespace update. */ -void -xfs_rtbitmap_lock( - struct xfs_mount *mp) -{ - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP); - xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM); + return mp->m_sb.sb_rextents; } /* - * Join both realtime free space metadata inodes to the transaction. The - * ILOCKs will be released on transaction commit. + * Compute the number of rtbitmap blocks used for a given file system. */ -void -xfs_rtbitmap_trans_join( - struct xfs_trans *tp) -{ - xfs_trans_ijoin(tp, tp->t_mountp->m_rbmip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, tp->t_mountp->m_rsumip, XFS_ILOCK_EXCL); -} - -/* Unlock both realtime free space metadata inodes after a freespace update. */ -void -xfs_rtbitmap_unlock( +xfs_filblks_t +xfs_rtbitmap_blockcount( struct xfs_mount *mp) { - xfs_iunlock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM); - xfs_iunlock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP); + return xfs_rtbitmap_blockcount_len(mp, xfs_rtbitmap_bitcount(mp)); } /* - * Lock the realtime free space metadata inodes for a freespace scan. Callers - * must walk metadata blocks in order of increasing file offset. + * Compute the geometry of the rtsummary file needed to track the given rt + * space. */ -void -xfs_rtbitmap_lock_shared( - struct xfs_mount *mp, - unsigned int rbmlock_flags) -{ - if (rbmlock_flags & XFS_RBMLOCK_BITMAP) - xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); - - if (rbmlock_flags & XFS_RBMLOCK_SUMMARY) - xfs_ilock(mp->m_rsumip, XFS_ILOCK_SHARED | XFS_ILOCK_RTSUM); -} - -/* Unlock the realtime free space metadata inodes after a freespace scan. */ -void -xfs_rtbitmap_unlock_shared( +xfs_filblks_t +xfs_rtsummary_blockcount( struct xfs_mount *mp, - unsigned int rbmlock_flags) + unsigned int *rsumlevels) { - if (rbmlock_flags & XFS_RBMLOCK_SUMMARY) - xfs_iunlock(mp->m_rsumip, XFS_ILOCK_SHARED | XFS_ILOCK_RTSUM); + xfs_rtbxlen_t rextents = xfs_rtbitmap_bitcount(mp); + unsigned long long rsumwords; - if (rbmlock_flags & XFS_RBMLOCK_BITMAP) - xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); + *rsumlevels = xfs_compute_rextslog(rextents) + 1; + rsumwords = xfs_rtbitmap_blockcount_len(mp, rextents) * (*rsumlevels); + return howmany_64(rsumwords, mp->m_blockwsize); } static int @@ -1260,21 +1353,26 @@ out_trans_cancel: /* Get a buffer for the block. */ static int xfs_rtfile_initialize_block( - struct xfs_inode *ip, + struct xfs_rtgroup *rtg, + enum xfs_rtg_inodes type, xfs_fsblock_t fsbno, void *data) { - struct xfs_mount *mp = ip->i_mount; + struct xfs_mount *mp = rtg_mount(rtg); + struct xfs_inode *ip = rtg->rtg_inodes[type]; struct xfs_trans *tp; struct xfs_buf *bp; + void *bufdata; const size_t copylen = mp->m_blockwsize << XFS_WORDLOG; enum xfs_blft buf_type; int error; - if (ip == mp->m_rsumip) + if (type == XFS_RTGI_BITMAP) + buf_type = XFS_BLFT_RTBITMAP_BUF; + else if (type == XFS_RTGI_SUMMARY) buf_type = XFS_BLFT_RTSUMMARY_BUF; else - buf_type = XFS_BLFT_RTBITMAP_BUF; + return -EINVAL; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtzero, 0, 0, 0, &tp); if (error) @@ -1288,13 +1386,30 @@ xfs_rtfile_initialize_block( xfs_trans_cancel(tp); return error; } + bufdata = bp->b_addr; xfs_trans_buf_set_type(tp, bp, buf_type); - bp->b_ops = &xfs_rtbuf_ops; + bp->b_ops = xfs_rtblock_ops(mp, type); + + if (xfs_has_rtgroups(mp)) { + struct xfs_rtbuf_blkinfo *hdr = bp->b_addr; + + if (type == XFS_RTGI_BITMAP) + hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC); + else + hdr->rt_magic = cpu_to_be32(XFS_RTSUMMARY_MAGIC); + hdr->rt_owner = cpu_to_be64(ip->i_ino); + hdr->rt_blkno = cpu_to_be64(XFS_FSB_TO_DADDR(mp, fsbno)); + hdr->rt_lsn = 0; + uuid_copy(&hdr->rt_uuid, &mp->m_sb.sb_meta_uuid); + + bufdata += sizeof(*hdr); + } + if (data) - memcpy(bp->b_addr, data, copylen); + memcpy(bufdata, data, copylen); else - memset(bp->b_addr, 0, copylen); + memset(bufdata, 0, copylen); xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); return xfs_trans_commit(tp); } @@ -1306,12 +1421,13 @@ xfs_rtfile_initialize_block( */ int xfs_rtfile_initialize_blocks( - struct xfs_inode *ip, /* inode (bitmap/summary) */ + struct xfs_rtgroup *rtg, + enum xfs_rtg_inodes type, xfs_fileoff_t offset_fsb, /* offset to start from */ xfs_fileoff_t end_fsb, /* offset to allocate to */ void *data) /* data to fill the blocks */ { - struct xfs_mount *mp = ip->i_mount; + struct xfs_mount *mp = rtg_mount(rtg); const size_t copylen = mp->m_blockwsize << XFS_WORDLOG; while (offset_fsb < end_fsb) { @@ -1319,8 +1435,8 @@ xfs_rtfile_initialize_blocks( xfs_filblks_t i; int error; - error = xfs_rtfile_alloc_blocks(ip, offset_fsb, - end_fsb - offset_fsb, &map); + error = xfs_rtfile_alloc_blocks(rtg->rtg_inodes[type], + offset_fsb, end_fsb - offset_fsb, &map); if (error) return error; @@ -1330,7 +1446,7 @@ xfs_rtfile_initialize_blocks( * Do this one block per transaction, to keep it simple. */ for (i = 0; i < map.br_blockcount; i++) { - error = xfs_rtfile_initialize_block(ip, + error = xfs_rtfile_initialize_block(rtg, type, map.br_startblock + i, data); if (error) return error; @@ -1343,3 +1459,35 @@ xfs_rtfile_initialize_blocks( return 0; } + +int +xfs_rtbitmap_create( + struct xfs_rtgroup *rtg, + struct xfs_inode *ip, + struct xfs_trans *tp, + bool init) +{ + struct xfs_mount *mp = rtg_mount(rtg); + + ip->i_disk_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize; + if (init && !xfs_has_rtgroups(mp)) { + ip->i_diflags |= XFS_DIFLAG_NEWRTBM; + inode_set_atime(VFS_I(ip), 0, 0); + } + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + return 0; +} + +int +xfs_rtsummary_create( + struct xfs_rtgroup *rtg, + struct xfs_inode *ip, + struct xfs_trans *tp, + bool init) +{ + struct xfs_mount *mp = rtg_mount(rtg); + + ip->i_disk_size = mp->m_rsumblocks * mp->m_sb.sb_blocksize; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h index 140513d1d6bc..16563a44bd13 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.h +++ b/fs/xfs/libxfs/xfs_rtbitmap.h @@ -6,7 +6,10 @@ #ifndef __XFS_RTBITMAP_H__ #define __XFS_RTBITMAP_H__ +#include "xfs_rtgroup.h" + struct xfs_rtalloc_args { + struct xfs_rtgroup *rtg; struct xfs_mount *mp; struct xfs_trans *tp; @@ -19,13 +22,37 @@ struct xfs_rtalloc_args { static inline xfs_rtblock_t xfs_rtx_to_rtb( - struct xfs_mount *mp, + struct xfs_rtgroup *rtg, xfs_rtxnum_t rtx) { + struct xfs_mount *mp = rtg_mount(rtg); + xfs_rtblock_t start = xfs_group_start_fsb(rtg_group(rtg)); + + if (mp->m_rtxblklog >= 0) + return start + (rtx << mp->m_rtxblklog); + return start + (rtx * mp->m_sb.sb_rextsize); +} + +/* Convert an rgbno into an rt extent number. */ +static inline xfs_rtxnum_t +xfs_rgbno_to_rtx( + struct xfs_mount *mp, + xfs_rgblock_t rgbno) +{ + if (likely(mp->m_rtxblklog >= 0)) + return rgbno >> mp->m_rtxblklog; + return rgbno / mp->m_sb.sb_rextsize; +} + +static inline uint64_t +xfs_rtbxlen_to_blen( + struct xfs_mount *mp, + xfs_rtbxlen_t rtbxlen) +{ if (mp->m_rtxblklog >= 0) - return rtx << mp->m_rtxblklog; + return rtbxlen << mp->m_rtxblklog; - return rtx * mp->m_sb.sb_rextsize; + return rtbxlen * mp->m_sb.sb_rextsize; } static inline xfs_extlen_t @@ -62,15 +89,49 @@ xfs_extlen_to_rtxlen( return len / mp->m_sb.sb_rextsize; } +/* Convert an rt block count into an rt extent count. */ +static inline xfs_rtbxlen_t +xfs_blen_to_rtbxlen( + struct xfs_mount *mp, + uint64_t blen) +{ + if (likely(mp->m_rtxblklog >= 0)) + return blen >> mp->m_rtxblklog; + + return div_u64(blen, mp->m_sb.sb_rextsize); +} + +/* Return the offset of a file block length within an rt extent. */ +static inline xfs_extlen_t +xfs_blen_to_rtxoff( + struct xfs_mount *mp, + xfs_filblks_t blen) +{ + if (likely(mp->m_rtxblklog >= 0)) + return blen & mp->m_rtxblkmask; + + return do_div(blen, mp->m_sb.sb_rextsize); +} + +/* Round this block count up to the nearest rt extent size. */ +static inline xfs_filblks_t +xfs_blen_roundup_rtx( + struct xfs_mount *mp, + xfs_filblks_t blen) +{ + return roundup_64(blen, mp->m_sb.sb_rextsize); +} + /* Convert an rt block number into an rt extent number. */ static inline xfs_rtxnum_t xfs_rtb_to_rtx( struct xfs_mount *mp, xfs_rtblock_t rtbno) { + /* open-coded 64-bit masking operation */ + rtbno &= mp->m_groups[XG_TYPE_RTG].blkmask; if (likely(mp->m_rtxblklog >= 0)) return rtbno >> mp->m_rtxblklog; - return div_u64(rtbno, mp->m_sb.sb_rextsize); } @@ -80,48 +141,29 @@ xfs_rtb_to_rtxoff( struct xfs_mount *mp, xfs_rtblock_t rtbno) { + /* open-coded 64-bit masking operation */ + rtbno &= mp->m_groups[XG_TYPE_RTG].blkmask; if (likely(mp->m_rtxblklog >= 0)) return rtbno & mp->m_rtxblkmask; - return do_div(rtbno, mp->m_sb.sb_rextsize); } -/* - * Convert an rt block number into an rt extent number, rounding up to the next - * rt extent if the rt block is not aligned to an rt extent boundary. - */ -static inline xfs_rtxnum_t -xfs_rtb_to_rtxup( - struct xfs_mount *mp, - xfs_rtblock_t rtbno) -{ - if (likely(mp->m_rtxblklog >= 0)) { - if (rtbno & mp->m_rtxblkmask) - return (rtbno >> mp->m_rtxblklog) + 1; - return rtbno >> mp->m_rtxblklog; - } - - if (do_div(rtbno, mp->m_sb.sb_rextsize)) - rtbno++; - return rtbno; -} - -/* Round this rtblock up to the nearest rt extent size. */ +/* Round this file block offset up to the nearest rt extent size. */ static inline xfs_rtblock_t -xfs_rtb_roundup_rtx( +xfs_fileoff_roundup_rtx( struct xfs_mount *mp, - xfs_rtblock_t rtbno) + xfs_fileoff_t off) { - return roundup_64(rtbno, mp->m_sb.sb_rextsize); + return roundup_64(off, mp->m_sb.sb_rextsize); } -/* Round this rtblock down to the nearest rt extent size. */ +/* Round this file block offset down to the nearest rt extent size. */ static inline xfs_rtblock_t -xfs_rtb_rounddown_rtx( +xfs_fileoff_rounddown_rtx( struct xfs_mount *mp, - xfs_rtblock_t rtbno) + xfs_fileoff_t off) { - return rounddown_64(rtbno, mp->m_sb.sb_rextsize); + return rounddown_64(off, mp->m_sb.sb_rextsize); } /* Convert an rt extent number to a file block offset in the rt bitmap file. */ @@ -130,6 +172,9 @@ xfs_rtx_to_rbmblock( struct xfs_mount *mp, xfs_rtxnum_t rtx) { + if (xfs_has_rtgroups(mp)) + return div_u64(rtx, mp->m_rtx_per_rbmblock); + return rtx >> mp->m_blkbit_log; } @@ -139,6 +184,13 @@ xfs_rtx_to_rbmword( struct xfs_mount *mp, xfs_rtxnum_t rtx) { + if (xfs_has_rtgroups(mp)) { + unsigned int mod; + + div_u64_rem(rtx >> XFS_NBWORDLOG, mp->m_blockwsize, &mod); + return mod; + } + return (rtx >> XFS_NBWORDLOG) & (mp->m_blockwsize - 1); } @@ -148,6 +200,9 @@ xfs_rbmblock_to_rtx( struct xfs_mount *mp, xfs_fileoff_t rbmoff) { + if (xfs_has_rtgroups(mp)) + return rbmoff * mp->m_rtx_per_rbmblock; + return rbmoff << mp->m_blkbit_log; } @@ -157,7 +212,14 @@ xfs_rbmblock_wordptr( struct xfs_rtalloc_args *args, unsigned int index) { - union xfs_rtword_raw *words = args->rbmbp->b_addr; + struct xfs_mount *mp = args->mp; + union xfs_rtword_raw *words; + struct xfs_rtbuf_blkinfo *hdr = args->rbmbp->b_addr; + + if (xfs_has_rtgroups(mp)) + words = (union xfs_rtword_raw *)(hdr + 1); + else + words = args->rbmbp->b_addr; return words + index; } @@ -170,6 +232,8 @@ xfs_rtbitmap_getword( { union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index); + if (xfs_has_rtgroups(args->mp)) + return be32_to_cpu(word->rtg); return word->old; } @@ -182,7 +246,10 @@ xfs_rtbitmap_setword( { union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index); - word->old = value; + if (xfs_has_rtgroups(args->mp)) + word->rtg = cpu_to_be32(value); + else + word->old = value; } /* @@ -207,6 +274,9 @@ xfs_rtsumoffs_to_block( struct xfs_mount *mp, xfs_rtsumoff_t rsumoff) { + if (xfs_has_rtgroups(mp)) + return rsumoff / mp->m_blockwsize; + return XFS_B_TO_FSBT(mp, rsumoff * sizeof(xfs_suminfo_t)); } @@ -221,6 +291,9 @@ xfs_rtsumoffs_to_infoword( { unsigned int mask = mp->m_blockmask >> XFS_SUMINFOLOG; + if (xfs_has_rtgroups(mp)) + return rsumoff % mp->m_blockwsize; + return rsumoff & mask; } @@ -230,7 +303,13 @@ xfs_rsumblock_infoptr( struct xfs_rtalloc_args *args, unsigned int index) { - union xfs_suminfo_raw *info = args->sumbp->b_addr; + union xfs_suminfo_raw *info; + struct xfs_rtbuf_blkinfo *hdr = args->sumbp->b_addr; + + if (xfs_has_rtgroups(args->mp)) + info = (union xfs_suminfo_raw *)(hdr + 1); + else + info = args->sumbp->b_addr; return info + index; } @@ -243,6 +322,8 @@ xfs_suminfo_get( { union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index); + if (xfs_has_rtgroups(args->mp)) + return be32_to_cpu(info->rtg); return info->old; } @@ -255,10 +336,28 @@ xfs_suminfo_add( { union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index); + if (xfs_has_rtgroups(args->mp)) { + be32_add_cpu(&info->rtg, delta); + return be32_to_cpu(info->rtg); + } + info->old += delta; return info->old; } +static inline const struct xfs_buf_ops * +xfs_rtblock_ops( + struct xfs_mount *mp, + enum xfs_rtg_inodes type) +{ + if (xfs_has_rtgroups(mp)) { + if (type == XFS_RTGI_SUMMARY) + return &xfs_rtsummary_buf_ops; + return &xfs_rtbitmap_buf_ops; + } + return &xfs_rtbuf_ops; +} + /* * Functions for walking free space rtextents in the realtime bitmap. */ @@ -268,7 +367,7 @@ struct xfs_rtalloc_rec { }; typedef int (*xfs_rtalloc_query_range_fn)( - struct xfs_mount *mp, + struct xfs_rtgroup *rtg, struct xfs_trans *tp, const struct xfs_rtalloc_rec *rec, void *priv); @@ -291,53 +390,43 @@ int xfs_rtmodify_summary(struct xfs_rtalloc_args *args, int log, xfs_fileoff_t bbno, int delta); int xfs_rtfree_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxlen_t len); -int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp, +int xfs_rtalloc_query_range(struct xfs_rtgroup *rtg, struct xfs_trans *tp, xfs_rtxnum_t start, xfs_rtxnum_t end, xfs_rtalloc_query_range_fn fn, void *priv); -int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtalloc_query_range_fn fn, - void *priv); -int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtxnum_t start, xfs_rtxlen_t len, - bool *is_free); -/* - * Free an extent in the realtime subvolume. Length is expressed in - * realtime extents, as is the block number. - */ -int /* error */ -xfs_rtfree_extent( - struct xfs_trans *tp, /* transaction pointer */ - xfs_rtxnum_t start, /* starting rtext number to free */ - xfs_rtxlen_t len); /* length of extent freed */ - +int xfs_rtalloc_query_all(struct xfs_rtgroup *rtg, struct xfs_trans *tp, + xfs_rtalloc_query_range_fn fn, void *priv); +int xfs_rtalloc_extent_is_free(struct xfs_rtgroup *rtg, struct xfs_trans *tp, + xfs_rtxnum_t start, xfs_rtxlen_t len, bool *is_free); +int xfs_rtfree_extent(struct xfs_trans *tp, struct xfs_rtgroup *rtg, + xfs_rtxnum_t start, xfs_rtxlen_t len); /* Same as above, but in units of rt blocks. */ -int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, - xfs_filblks_t rtlen); +int xfs_rtfree_blocks(struct xfs_trans *tp, struct xfs_rtgroup *rtg, + xfs_fsblock_t rtbno, xfs_filblks_t rtlen); -xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t - rtextents); +xfs_rtxnum_t xfs_rtbitmap_rtx_per_rbmblock(struct xfs_mount *mp); +xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp); +xfs_filblks_t xfs_rtbitmap_blockcount_len(struct xfs_mount *mp, + xfs_rtbxlen_t rtextents); xfs_filblks_t xfs_rtsummary_blockcount(struct xfs_mount *mp, - unsigned int rsumlevels, xfs_extlen_t rbmblocks); - -int xfs_rtfile_initialize_blocks(struct xfs_inode *ip, - xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb, void *data); + unsigned int *rsumlevels); -void xfs_rtbitmap_lock(struct xfs_mount *mp); -void xfs_rtbitmap_unlock(struct xfs_mount *mp); -void xfs_rtbitmap_trans_join(struct xfs_trans *tp); +int xfs_rtfile_initialize_blocks(struct xfs_rtgroup *rtg, + enum xfs_rtg_inodes type, xfs_fileoff_t offset_fsb, + xfs_fileoff_t end_fsb, void *data); +int xfs_rtbitmap_create(struct xfs_rtgroup *rtg, struct xfs_inode *ip, + struct xfs_trans *tp, bool init); +int xfs_rtsummary_create(struct xfs_rtgroup *rtg, struct xfs_inode *ip, + struct xfs_trans *tp, bool init); -/* Lock the rt bitmap inode in shared mode */ -#define XFS_RBMLOCK_BITMAP (1U << 0) -/* Lock the rt summary inode in shared mode */ -#define XFS_RBMLOCK_SUMMARY (1U << 1) - -void xfs_rtbitmap_lock_shared(struct xfs_mount *mp, - unsigned int rbmlock_flags); -void xfs_rtbitmap_unlock_shared(struct xfs_mount *mp, - unsigned int rbmlock_flags); #else /* CONFIG_XFS_RT */ # define xfs_rtfree_extent(t,b,l) (-ENOSYS) -# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) + +static inline int xfs_rtfree_blocks(struct xfs_trans *tp, + struct xfs_rtgroup *rtg, xfs_fsblock_t rtbno, + xfs_filblks_t rtlen) +{ + return -ENOSYS; +} # define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS) # define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS) # define xfs_rtbitmap_read_buf(a,b) (-ENOSYS) @@ -345,17 +434,11 @@ void xfs_rtbitmap_unlock_shared(struct xfs_mount *mp, # define xfs_rtbuf_cache_relse(a) (0) # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) static inline xfs_filblks_t -xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents) +xfs_rtbitmap_blockcount_len(struct xfs_mount *mp, xfs_rtbxlen_t rtextents) { /* shut up gcc */ return 0; } -# define xfs_rtsummary_blockcount(mp, l, b) (0) -# define xfs_rtbitmap_lock(mp) do { } while (0) -# define xfs_rtbitmap_trans_join(tp) do { } while (0) -# define xfs_rtbitmap_unlock(mp) do { } while (0) -# define xfs_rtbitmap_lock_shared(mp, lf) do { } while (0) -# define xfs_rtbitmap_unlock_shared(mp, lf) do { } while (0) #endif /* CONFIG_XFS_RT */ #endif /* __XFS_RTBITMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c new file mode 100644 index 000000000000..e74bb059f24f --- /dev/null +++ b/fs/xfs/libxfs/xfs_rtgroup.c @@ -0,0 +1,697 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_rmap_btree.h" +#include "xfs_alloc.h" +#include "xfs_ialloc.h" +#include "xfs_rmap.h" +#include "xfs_ag.h" +#include "xfs_ag_resv.h" +#include "xfs_health.h" +#include "xfs_error.h" +#include "xfs_bmap.h" +#include "xfs_defer.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_trace.h" +#include "xfs_inode.h" +#include "xfs_icache.h" +#include "xfs_buf_item.h" +#include "xfs_rtgroup.h" +#include "xfs_rtbitmap.h" +#include "xfs_metafile.h" +#include "xfs_metadir.h" + +/* Find the first usable fsblock in this rtgroup. */ +static inline uint32_t +xfs_rtgroup_min_block( + struct xfs_mount *mp, + xfs_rgnumber_t rgno) +{ + if (xfs_has_rtsb(mp) && rgno == 0) + return mp->m_sb.sb_rextsize; + + return 0; +} + +/* Precompute this group's geometry */ +void +xfs_rtgroup_calc_geometry( + struct xfs_mount *mp, + struct xfs_rtgroup *rtg, + xfs_rgnumber_t rgno, + xfs_rgnumber_t rgcount, + xfs_rtbxlen_t rextents) +{ + rtg->rtg_extents = __xfs_rtgroup_extents(mp, rgno, rgcount, rextents); + rtg_group(rtg)->xg_block_count = rtg->rtg_extents * mp->m_sb.sb_rextsize; + rtg_group(rtg)->xg_min_gbno = xfs_rtgroup_min_block(mp, rgno); +} + +int +xfs_rtgroup_alloc( + struct xfs_mount *mp, + xfs_rgnumber_t rgno, + xfs_rgnumber_t rgcount, + xfs_rtbxlen_t rextents) +{ + struct xfs_rtgroup *rtg; + int error; + + rtg = kzalloc(sizeof(struct xfs_rtgroup), GFP_KERNEL); + if (!rtg) + return -ENOMEM; + + xfs_rtgroup_calc_geometry(mp, rtg, rgno, rgcount, rextents); + + error = xfs_group_insert(mp, rtg_group(rtg), rgno, XG_TYPE_RTG); + if (error) + goto out_free_rtg; + return 0; + +out_free_rtg: + kfree(rtg); + return error; +} + +void +xfs_rtgroup_free( + struct xfs_mount *mp, + xfs_rgnumber_t rgno) +{ + xfs_group_free(mp, rgno, XG_TYPE_RTG, NULL); +} + +/* Free a range of incore rtgroup objects. */ +void +xfs_free_rtgroups( + struct xfs_mount *mp, + xfs_rgnumber_t first_rgno, + xfs_rgnumber_t end_rgno) +{ + xfs_rgnumber_t rgno; + + for (rgno = first_rgno; rgno < end_rgno; rgno++) + xfs_rtgroup_free(mp, rgno); +} + +/* Initialize some range of incore rtgroup objects. */ +int +xfs_initialize_rtgroups( + struct xfs_mount *mp, + xfs_rgnumber_t first_rgno, + xfs_rgnumber_t end_rgno, + xfs_rtbxlen_t rextents) +{ + xfs_rgnumber_t index; + int error; + + if (first_rgno >= end_rgno) + return 0; + + for (index = first_rgno; index < end_rgno; index++) { + error = xfs_rtgroup_alloc(mp, index, end_rgno, rextents); + if (error) + goto out_unwind_new_rtgs; + } + + return 0; + +out_unwind_new_rtgs: + xfs_free_rtgroups(mp, first_rgno, index); + return error; +} + +/* Compute the number of rt extents in this realtime group. */ +xfs_rtxnum_t +__xfs_rtgroup_extents( + struct xfs_mount *mp, + xfs_rgnumber_t rgno, + xfs_rgnumber_t rgcount, + xfs_rtbxlen_t rextents) +{ + ASSERT(rgno < rgcount); + if (rgno == rgcount - 1) + return rextents - ((xfs_rtxnum_t)rgno * mp->m_sb.sb_rgextents); + + ASSERT(xfs_has_rtgroups(mp)); + return mp->m_sb.sb_rgextents; +} + +xfs_rtxnum_t +xfs_rtgroup_extents( + struct xfs_mount *mp, + xfs_rgnumber_t rgno) +{ + return __xfs_rtgroup_extents(mp, rgno, mp->m_sb.sb_rgcount, + mp->m_sb.sb_rextents); +} + +/* + * Update the rt extent count of the previous tail rtgroup if it changed during + * recovery (i.e. recovery of a growfs). + */ +int +xfs_update_last_rtgroup_size( + struct xfs_mount *mp, + xfs_rgnumber_t prev_rgcount) +{ + struct xfs_rtgroup *rtg; + + ASSERT(prev_rgcount > 0); + + rtg = xfs_rtgroup_grab(mp, prev_rgcount - 1); + if (!rtg) + return -EFSCORRUPTED; + rtg->rtg_extents = __xfs_rtgroup_extents(mp, prev_rgcount - 1, + mp->m_sb.sb_rgcount, mp->m_sb.sb_rextents); + rtg_group(rtg)->xg_block_count = rtg->rtg_extents * mp->m_sb.sb_rextsize; + xfs_rtgroup_rele(rtg); + return 0; +} + +/* Lock metadata inodes associated with this rt group. */ +void +xfs_rtgroup_lock( + struct xfs_rtgroup *rtg, + unsigned int rtglock_flags) +{ + ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS)); + ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) || + !(rtglock_flags & XFS_RTGLOCK_BITMAP)); + + if (rtglock_flags & XFS_RTGLOCK_BITMAP) { + /* + * Lock both realtime free space metadata inodes for a freespace + * update. + */ + xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL); + xfs_ilock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL); + } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) { + xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED); + } +} + +/* Unlock metadata inodes associated with this rt group. */ +void +xfs_rtgroup_unlock( + struct xfs_rtgroup *rtg, + unsigned int rtglock_flags) +{ + ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS)); + ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) || + !(rtglock_flags & XFS_RTGLOCK_BITMAP)); + + if (rtglock_flags & XFS_RTGLOCK_BITMAP) { + xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL); + xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL); + } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) { + xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED); + } +} + +/* + * Join realtime group metadata inodes to the transaction. The ILOCKs will be + * released on transaction commit. + */ +void +xfs_rtgroup_trans_join( + struct xfs_trans *tp, + struct xfs_rtgroup *rtg, + unsigned int rtglock_flags) +{ + ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS)); + ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED)); + + if (rtglock_flags & XFS_RTGLOCK_BITMAP) { + xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_BITMAP], + XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_SUMMARY], + XFS_ILOCK_EXCL); + } +} + +/* Retrieve rt group geometry. */ +int +xfs_rtgroup_get_geometry( + struct xfs_rtgroup *rtg, + struct xfs_rtgroup_geometry *rgeo) +{ + /* Fill out form. */ + memset(rgeo, 0, sizeof(*rgeo)); + rgeo->rg_number = rtg_rgno(rtg); + rgeo->rg_length = rtg_group(rtg)->xg_block_count; + xfs_rtgroup_geom_health(rtg, rgeo); + return 0; +} + +#ifdef CONFIG_PROVE_LOCKING +static struct lock_class_key xfs_rtginode_lock_class; + +static int +xfs_rtginode_ilock_cmp_fn( + const struct lockdep_map *m1, + const struct lockdep_map *m2) +{ + const struct xfs_inode *ip1 = + container_of(m1, struct xfs_inode, i_lock.dep_map); + const struct xfs_inode *ip2 = + container_of(m2, struct xfs_inode, i_lock.dep_map); + + if (ip1->i_projid < ip2->i_projid) + return -1; + if (ip1->i_projid > ip2->i_projid) + return 1; + return 0; +} + +static inline void +xfs_rtginode_ilock_print_fn( + const struct lockdep_map *m) +{ + const struct xfs_inode *ip = + container_of(m, struct xfs_inode, i_lock.dep_map); + + printk(KERN_CONT " rgno=%u", ip->i_projid); +} + +/* + * Most of the time each of the RTG inode locks are only taken one at a time. + * But when committing deferred ops, more than one of a kind can be taken. + * However, deferred rt ops will be committed in rgno order so there is no + * potential for deadlocks. The code here is needed to tell lockdep about this + * order. + */ +static inline void +xfs_rtginode_lockdep_setup( + struct xfs_inode *ip, + xfs_rgnumber_t rgno, + enum xfs_rtg_inodes type) +{ + lockdep_set_class_and_subclass(&ip->i_lock, &xfs_rtginode_lock_class, + type); + lock_set_cmp_fn(&ip->i_lock, xfs_rtginode_ilock_cmp_fn, + xfs_rtginode_ilock_print_fn); +} +#else +#define xfs_rtginode_lockdep_setup(ip, rgno, type) do { } while (0) +#endif /* CONFIG_PROVE_LOCKING */ + +struct xfs_rtginode_ops { + const char *name; /* short name */ + + enum xfs_metafile_type metafile_type; + + unsigned int sick; /* rtgroup sickness flag */ + + /* Does the fs have this feature? */ + bool (*enabled)(struct xfs_mount *mp); + + /* Create this rtgroup metadata inode and initialize it. */ + int (*create)(struct xfs_rtgroup *rtg, + struct xfs_inode *ip, + struct xfs_trans *tp, + bool init); +}; + +static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = { + [XFS_RTGI_BITMAP] = { + .name = "bitmap", + .metafile_type = XFS_METAFILE_RTBITMAP, + .sick = XFS_SICK_RG_BITMAP, + .create = xfs_rtbitmap_create, + }, + [XFS_RTGI_SUMMARY] = { + .name = "summary", + .metafile_type = XFS_METAFILE_RTSUMMARY, + .sick = XFS_SICK_RG_SUMMARY, + .create = xfs_rtsummary_create, + }, +}; + +/* Return the shortname of this rtgroup inode. */ +const char * +xfs_rtginode_name( + enum xfs_rtg_inodes type) +{ + return xfs_rtginode_ops[type].name; +} + +/* Return the metafile type of this rtgroup inode. */ +enum xfs_metafile_type +xfs_rtginode_metafile_type( + enum xfs_rtg_inodes type) +{ + return xfs_rtginode_ops[type].metafile_type; +} + +/* Should this rtgroup inode be present? */ +bool +xfs_rtginode_enabled( + struct xfs_rtgroup *rtg, + enum xfs_rtg_inodes type) +{ + const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; + + if (!ops->enabled) + return true; + return ops->enabled(rtg_mount(rtg)); +} + +/* Mark an rtgroup inode sick */ +void +xfs_rtginode_mark_sick( + struct xfs_rtgroup *rtg, + enum xfs_rtg_inodes type) +{ + const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; + + xfs_group_mark_sick(rtg_group(rtg), ops->sick); +} + +/* Load and existing rtgroup inode into the rtgroup structure. */ +int +xfs_rtginode_load( + struct xfs_rtgroup *rtg, + enum xfs_rtg_inodes type, + struct xfs_trans *tp) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_inode *ip; + const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; + int error; + + if (!xfs_rtginode_enabled(rtg, type)) + return 0; + + if (!xfs_has_rtgroups(mp)) { + xfs_ino_t ino; + + switch (type) { + case XFS_RTGI_BITMAP: + ino = mp->m_sb.sb_rbmino; + break; + case XFS_RTGI_SUMMARY: + ino = mp->m_sb.sb_rsumino; + break; + default: + /* None of the other types exist on !rtgroups */ + return 0; + } + + error = xfs_trans_metafile_iget(tp, ino, ops->metafile_type, + &ip); + } else { + const char *path; + + if (!mp->m_rtdirip) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); + return -EFSCORRUPTED; + } + + path = xfs_rtginode_path(rtg_rgno(rtg), type); + if (!path) + return -ENOMEM; + error = xfs_metadir_load(tp, mp->m_rtdirip, path, + ops->metafile_type, &ip); + kfree(path); + } + + if (error) { + if (xfs_metadata_is_sick(error)) + xfs_rtginode_mark_sick(rtg, type); + return error; + } + + if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && + ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) { + xfs_irele(ip); + xfs_rtginode_mark_sick(rtg, type); + return -EFSCORRUPTED; + } + + if (XFS_IS_CORRUPT(mp, ip->i_projid != rtg_rgno(rtg))) { + xfs_irele(ip); + xfs_rtginode_mark_sick(rtg, type); + return -EFSCORRUPTED; + } + + xfs_rtginode_lockdep_setup(ip, rtg_rgno(rtg), type); + rtg->rtg_inodes[type] = ip; + return 0; +} + +/* Release an rtgroup metadata inode. */ +void +xfs_rtginode_irele( + struct xfs_inode **ipp) +{ + if (*ipp) + xfs_irele(*ipp); + *ipp = NULL; +} + +/* Add a metadata inode for a realtime rmap btree. */ +int +xfs_rtginode_create( + struct xfs_rtgroup *rtg, + enum xfs_rtg_inodes type, + bool init) +{ + const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; + struct xfs_mount *mp = rtg_mount(rtg); + struct xfs_metadir_update upd = { + .dp = mp->m_rtdirip, + .metafile_type = ops->metafile_type, + }; + int error; + + if (!xfs_rtginode_enabled(rtg, type)) + return 0; + + if (!mp->m_rtdirip) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); + return -EFSCORRUPTED; + } + + upd.path = xfs_rtginode_path(rtg_rgno(rtg), type); + if (!upd.path) + return -ENOMEM; + + error = xfs_metadir_start_create(&upd); + if (error) + goto out_path; + + error = xfs_metadir_create(&upd, S_IFREG); + if (error) + return error; + + xfs_rtginode_lockdep_setup(upd.ip, rtg_rgno(rtg), type); + + upd.ip->i_projid = rtg_rgno(rtg); + error = ops->create(rtg, upd.ip, upd.tp, init); + if (error) + goto out_cancel; + + error = xfs_metadir_commit(&upd); + if (error) + goto out_path; + + kfree(upd.path); + xfs_finish_inode_setup(upd.ip); + rtg->rtg_inodes[type] = upd.ip; + return 0; + +out_cancel: + xfs_metadir_cancel(&upd, error); + /* Have to finish setting up the inode to ensure it's deleted. */ + if (upd.ip) { + xfs_finish_inode_setup(upd.ip); + xfs_irele(upd.ip); + } +out_path: + kfree(upd.path); + return error; +} + +/* Create the parent directory for all rtgroup inodes and load it. */ +int +xfs_rtginode_mkdir_parent( + struct xfs_mount *mp) +{ + if (!mp->m_metadirip) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); + return -EFSCORRUPTED; + } + + return xfs_metadir_mkdir(mp->m_metadirip, "rtgroups", &mp->m_rtdirip); +} + +/* Load the parent directory of all rtgroup inodes. */ +int +xfs_rtginode_load_parent( + struct xfs_trans *tp) +{ + struct xfs_mount *mp = tp->t_mountp; + + if (!mp->m_metadirip) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); + return -EFSCORRUPTED; + } + + return xfs_metadir_load(tp, mp->m_metadirip, "rtgroups", + XFS_METAFILE_DIR, &mp->m_rtdirip); +} + +/* Check superblock fields for a read or a write. */ +static xfs_failaddr_t +xfs_rtsb_verify_common( + struct xfs_buf *bp) +{ + struct xfs_rtsb *rsb = bp->b_addr; + + if (!xfs_verify_magic(bp, rsb->rsb_magicnum)) + return __this_address; + if (rsb->rsb_pad) + return __this_address; + + /* Everything to the end of the fs block must be zero */ + if (memchr_inv(rsb + 1, 0, BBTOB(bp->b_length) - sizeof(*rsb))) + return __this_address; + + return NULL; +} + +/* Check superblock fields for a read or revalidation. */ +static inline xfs_failaddr_t +xfs_rtsb_verify_all( + struct xfs_buf *bp) +{ + struct xfs_rtsb *rsb = bp->b_addr; + struct xfs_mount *mp = bp->b_mount; + xfs_failaddr_t fa; + + fa = xfs_rtsb_verify_common(bp); + if (fa) + return fa; + + if (memcmp(&rsb->rsb_fname, &mp->m_sb.sb_fname, XFSLABEL_MAX)) + return __this_address; + if (!uuid_equal(&rsb->rsb_uuid, &mp->m_sb.sb_uuid)) + return __this_address; + if (!uuid_equal(&rsb->rsb_meta_uuid, &mp->m_sb.sb_meta_uuid)) + return __this_address; + + return NULL; +} + +static void +xfs_rtsb_read_verify( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + if (!xfs_buf_verify_cksum(bp, XFS_RTSB_CRC_OFF)) { + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + return; + } + + fa = xfs_rtsb_verify_all(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); +} + +static void +xfs_rtsb_write_verify( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + fa = xfs_rtsb_verify_common(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + return; + } + + xfs_buf_update_cksum(bp, XFS_RTSB_CRC_OFF); +} + +const struct xfs_buf_ops xfs_rtsb_buf_ops = { + .name = "xfs_rtsb", + .magic = { 0, cpu_to_be32(XFS_RTSB_MAGIC) }, + .verify_read = xfs_rtsb_read_verify, + .verify_write = xfs_rtsb_write_verify, + .verify_struct = xfs_rtsb_verify_all, +}; + +/* Update a realtime superblock from the primary fs super */ +void +xfs_update_rtsb( + struct xfs_buf *rtsb_bp, + const struct xfs_buf *sb_bp) +{ + const struct xfs_dsb *dsb = sb_bp->b_addr; + struct xfs_rtsb *rsb = rtsb_bp->b_addr; + const uuid_t *meta_uuid; + + rsb->rsb_magicnum = cpu_to_be32(XFS_RTSB_MAGIC); + + rsb->rsb_pad = 0; + memcpy(&rsb->rsb_fname, &dsb->sb_fname, XFSLABEL_MAX); + + memcpy(&rsb->rsb_uuid, &dsb->sb_uuid, sizeof(rsb->rsb_uuid)); + + /* + * The metadata uuid is the fs uuid if the metauuid feature is not + * enabled. + */ + if (dsb->sb_features_incompat & + cpu_to_be32(XFS_SB_FEAT_INCOMPAT_META_UUID)) + meta_uuid = &dsb->sb_meta_uuid; + else + meta_uuid = &dsb->sb_uuid; + memcpy(&rsb->rsb_meta_uuid, meta_uuid, sizeof(rsb->rsb_meta_uuid)); +} + +/* + * Update the realtime superblock from a filesystem superblock and log it to + * the given transaction. + */ +struct xfs_buf * +xfs_log_rtsb( + struct xfs_trans *tp, + const struct xfs_buf *sb_bp) +{ + struct xfs_buf *rtsb_bp; + + if (!xfs_has_rtsb(tp->t_mountp)) + return NULL; + + rtsb_bp = xfs_trans_getrtsb(tp); + if (!rtsb_bp) { + /* + * It's possible for the rtgroups feature to be enabled but + * there is no incore rt superblock buffer if the rt geometry + * was specified at mkfs time but the rt section has not yet + * been attached. In this case, rblocks must be zero. + */ + ASSERT(tp->t_mountp->m_sb.sb_rblocks == 0); + return NULL; + } + + xfs_update_rtsb(rtsb_bp, sb_bp); + xfs_trans_ordered_buf(tp, rtsb_bp); + return rtsb_bp; +} diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h new file mode 100644 index 000000000000..7e7e491ff06f --- /dev/null +++ b/fs/xfs/libxfs/xfs_rtgroup.h @@ -0,0 +1,284 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __LIBXFS_RTGROUP_H +#define __LIBXFS_RTGROUP_H 1 + +#include "xfs_group.h" + +struct xfs_mount; +struct xfs_trans; + +enum xfs_rtg_inodes { + XFS_RTGI_BITMAP, /* allocation bitmap */ + XFS_RTGI_SUMMARY, /* allocation summary */ + + XFS_RTGI_MAX, +}; + +#ifdef MAX_LOCKDEP_SUBCLASSES +static_assert(XFS_RTGI_MAX <= MAX_LOCKDEP_SUBCLASSES); +#endif + +/* + * Realtime group incore structure, similar to the per-AG structure. + */ +struct xfs_rtgroup { + struct xfs_group rtg_group; + + /* per-rtgroup metadata inodes */ + struct xfs_inode *rtg_inodes[XFS_RTGI_MAX]; + + /* Number of blocks in this group */ + xfs_rtxnum_t rtg_extents; + + /* + * Cache of rt summary level per bitmap block with the invariant that + * rtg_rsum_cache[bbno] > the maximum i for which rsum[i][bbno] != 0, + * or 0 if rsum[i][bbno] == 0 for all i. + * + * Reads and writes are serialized by the rsumip inode lock. + */ + uint8_t *rtg_rsum_cache; +}; + +static inline struct xfs_rtgroup *to_rtg(struct xfs_group *xg) +{ + return container_of(xg, struct xfs_rtgroup, rtg_group); +} + +static inline struct xfs_group *rtg_group(struct xfs_rtgroup *rtg) +{ + return &rtg->rtg_group; +} + +static inline struct xfs_mount *rtg_mount(const struct xfs_rtgroup *rtg) +{ + return rtg->rtg_group.xg_mount; +} + +static inline xfs_rgnumber_t rtg_rgno(const struct xfs_rtgroup *rtg) +{ + return rtg->rtg_group.xg_gno; +} + +/* Passive rtgroup references */ +static inline struct xfs_rtgroup * +xfs_rtgroup_get( + struct xfs_mount *mp, + xfs_rgnumber_t rgno) +{ + return to_rtg(xfs_group_get(mp, rgno, XG_TYPE_RTG)); +} + +static inline struct xfs_rtgroup * +xfs_rtgroup_hold( + struct xfs_rtgroup *rtg) +{ + return to_rtg(xfs_group_hold(rtg_group(rtg))); +} + +static inline void +xfs_rtgroup_put( + struct xfs_rtgroup *rtg) +{ + xfs_group_put(rtg_group(rtg)); +} + +/* Active rtgroup references */ +static inline struct xfs_rtgroup * +xfs_rtgroup_grab( + struct xfs_mount *mp, + xfs_rgnumber_t rgno) +{ + return to_rtg(xfs_group_grab(mp, rgno, XG_TYPE_RTG)); +} + +static inline void +xfs_rtgroup_rele( + struct xfs_rtgroup *rtg) +{ + xfs_group_rele(rtg_group(rtg)); +} + +static inline struct xfs_rtgroup * +xfs_rtgroup_next_range( + struct xfs_mount *mp, + struct xfs_rtgroup *rtg, + xfs_rgnumber_t start_rgno, + xfs_rgnumber_t end_rgno) +{ + return to_rtg(xfs_group_next_range(mp, rtg ? rtg_group(rtg) : NULL, + start_rgno, end_rgno, XG_TYPE_RTG)); +} + +static inline struct xfs_rtgroup * +xfs_rtgroup_next( + struct xfs_mount *mp, + struct xfs_rtgroup *rtg) +{ + return xfs_rtgroup_next_range(mp, rtg, 0, mp->m_sb.sb_rgcount - 1); +} + +static inline xfs_rtblock_t +xfs_rgbno_to_rtb( + struct xfs_rtgroup *rtg, + xfs_rgblock_t rgbno) +{ + return xfs_gbno_to_fsb(rtg_group(rtg), rgbno); +} + +static inline xfs_rgnumber_t +xfs_rtb_to_rgno( + struct xfs_mount *mp, + xfs_rtblock_t rtbno) +{ + return xfs_fsb_to_gno(mp, rtbno, XG_TYPE_RTG); +} + +static inline xfs_rgblock_t +xfs_rtb_to_rgbno( + struct xfs_mount *mp, + xfs_rtblock_t rtbno) +{ + return xfs_fsb_to_gbno(mp, rtbno, XG_TYPE_RTG); +} + +/* Is rtbno the start of a RT group? */ +static inline bool +xfs_rtbno_is_group_start( + struct xfs_mount *mp, + xfs_rtblock_t rtbno) +{ + return (rtbno & mp->m_groups[XG_TYPE_RTG].blkmask) == 0; +} + +/* Convert an rtgroups rt extent number into an rgbno. */ +static inline xfs_rgblock_t +xfs_rtx_to_rgbno( + struct xfs_rtgroup *rtg, + xfs_rtxnum_t rtx) +{ + struct xfs_mount *mp = rtg_mount(rtg); + + if (likely(mp->m_rtxblklog >= 0)) + return rtx << mp->m_rtxblklog; + return rtx * mp->m_sb.sb_rextsize; +} + +static inline xfs_daddr_t +xfs_rtb_to_daddr( + struct xfs_mount *mp, + xfs_rtblock_t rtbno) +{ + struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG]; + xfs_rgnumber_t rgno = xfs_rtb_to_rgno(mp, rtbno); + uint64_t start_bno = (xfs_rtblock_t)rgno * g->blocks; + + return XFS_FSB_TO_BB(mp, start_bno + (rtbno & g->blkmask)); +} + +static inline xfs_rtblock_t +xfs_daddr_to_rtb( + struct xfs_mount *mp, + xfs_daddr_t daddr) +{ + xfs_rfsblock_t bno = XFS_BB_TO_FSBT(mp, daddr); + + if (xfs_has_rtgroups(mp)) { + struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG]; + xfs_rgnumber_t rgno; + uint32_t rgbno; + + rgno = div_u64_rem(bno, g->blocks, &rgbno); + return ((xfs_rtblock_t)rgno << g->blklog) + rgbno; + } + + return bno; +} + +#ifdef CONFIG_XFS_RT +int xfs_rtgroup_alloc(struct xfs_mount *mp, xfs_rgnumber_t rgno, + xfs_rgnumber_t rgcount, xfs_rtbxlen_t rextents); +void xfs_rtgroup_free(struct xfs_mount *mp, xfs_rgnumber_t rgno); + +void xfs_free_rtgroups(struct xfs_mount *mp, xfs_rgnumber_t first_rgno, + xfs_rgnumber_t end_rgno); +int xfs_initialize_rtgroups(struct xfs_mount *mp, xfs_rgnumber_t first_rgno, + xfs_rgnumber_t end_rgno, xfs_rtbxlen_t rextents); + +xfs_rtxnum_t __xfs_rtgroup_extents(struct xfs_mount *mp, xfs_rgnumber_t rgno, + xfs_rgnumber_t rgcount, xfs_rtbxlen_t rextents); +xfs_rtxnum_t xfs_rtgroup_extents(struct xfs_mount *mp, xfs_rgnumber_t rgno); +void xfs_rtgroup_calc_geometry(struct xfs_mount *mp, struct xfs_rtgroup *rtg, + xfs_rgnumber_t rgno, xfs_rgnumber_t rgcount, + xfs_rtbxlen_t rextents); + +int xfs_update_last_rtgroup_size(struct xfs_mount *mp, + xfs_rgnumber_t prev_rgcount); + +/* Lock the rt bitmap inode in exclusive mode */ +#define XFS_RTGLOCK_BITMAP (1U << 0) +/* Lock the rt bitmap inode in shared mode */ +#define XFS_RTGLOCK_BITMAP_SHARED (1U << 1) + +#define XFS_RTGLOCK_ALL_FLAGS (XFS_RTGLOCK_BITMAP | \ + XFS_RTGLOCK_BITMAP_SHARED) + +void xfs_rtgroup_lock(struct xfs_rtgroup *rtg, unsigned int rtglock_flags); +void xfs_rtgroup_unlock(struct xfs_rtgroup *rtg, unsigned int rtglock_flags); +void xfs_rtgroup_trans_join(struct xfs_trans *tp, struct xfs_rtgroup *rtg, + unsigned int rtglock_flags); + +int xfs_rtgroup_get_geometry(struct xfs_rtgroup *rtg, + struct xfs_rtgroup_geometry *rgeo); + +int xfs_rtginode_mkdir_parent(struct xfs_mount *mp); +int xfs_rtginode_load_parent(struct xfs_trans *tp); + +const char *xfs_rtginode_name(enum xfs_rtg_inodes type); +enum xfs_metafile_type xfs_rtginode_metafile_type(enum xfs_rtg_inodes type); +bool xfs_rtginode_enabled(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type); +void xfs_rtginode_mark_sick(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type); +int xfs_rtginode_load(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type, + struct xfs_trans *tp); +int xfs_rtginode_create(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type, + bool init); +void xfs_rtginode_irele(struct xfs_inode **ipp); + +static inline const char *xfs_rtginode_path(xfs_rgnumber_t rgno, + enum xfs_rtg_inodes type) +{ + return kasprintf(GFP_KERNEL, "%u.%s", rgno, xfs_rtginode_name(type)); +} + +void xfs_update_rtsb(struct xfs_buf *rtsb_bp, + const struct xfs_buf *sb_bp); +struct xfs_buf *xfs_log_rtsb(struct xfs_trans *tp, + const struct xfs_buf *sb_bp); +#else +static inline void xfs_free_rtgroups(struct xfs_mount *mp, + xfs_rgnumber_t first_rgno, xfs_rgnumber_t end_rgno) +{ +} + +static inline int xfs_initialize_rtgroups(struct xfs_mount *mp, + xfs_rgnumber_t first_rgno, xfs_rgnumber_t end_rgno, + xfs_rtbxlen_t rextents) +{ + return 0; +} + +# define xfs_rtgroup_extents(mp, rgno) (0) +# define xfs_update_last_rtgroup_size(mp, rgno) (-EOPNOTSUPP) +# define xfs_rtgroup_lock(rtg, gf) ((void)0) +# define xfs_rtgroup_unlock(rtg, gf) ((void)0) +# define xfs_rtgroup_trans_join(tp, rtg, gf) ((void)0) +# define xfs_update_rtsb(bp, sb_bp) ((void)0) +# define xfs_log_rtsb(tp, sb_bp) (NULL) +# define xfs_rtgroup_get_geometry(rtg, rgeo) (-EOPNOTSUPP) +#endif /* CONFIG_XFS_RT */ + +#endif /* __LIBXFS_RTGROUP_H */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index d95409f3cba6..e81b240b7158 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -27,6 +27,7 @@ #include "xfs_ag.h" #include "xfs_rtbitmap.h" #include "xfs_exchrange.h" +#include "xfs_rtgroup.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -180,6 +181,8 @@ xfs_sb_version_to_features( features |= XFS_FEAT_EXCHANGE_RANGE; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_PARENT) features |= XFS_FEAT_PARENT; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) + features |= XFS_FEAT_METADIR; return features; } @@ -232,11 +235,37 @@ xfs_validate_sb_read( return 0; } +/* Return the number of extents covered by a single rt bitmap file */ +static xfs_rtbxlen_t +xfs_extents_per_rbm( + struct xfs_sb *sbp) +{ + if (xfs_sb_is_v5(sbp) && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) + return sbp->sb_rgextents; + return sbp->sb_rextents; +} + +/* + * Return the payload size of a single rt bitmap block (without the metadata + * header if any). + */ +static inline unsigned int +xfs_rtbmblock_size( + struct xfs_sb *sbp) +{ + if (xfs_sb_is_v5(sbp) && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) + return sbp->sb_blocksize - sizeof(struct xfs_rtbuf_blkinfo); + return sbp->sb_blocksize; +} + static uint64_t -xfs_sb_calc_rbmblocks( +xfs_expected_rbmblocks( struct xfs_sb *sbp) { - return howmany_64(sbp->sb_rextents, NBBY * sbp->sb_blocksize); + return howmany_64(xfs_extents_per_rbm(sbp), + NBBY * xfs_rtbmblock_size(sbp)); } /* Validate the realtime geometry */ @@ -258,7 +287,7 @@ xfs_validate_rt_geometry( if (sbp->sb_rextents == 0 || sbp->sb_rextents != div_u64(sbp->sb_rblocks, sbp->sb_rextsize) || sbp->sb_rextslog != xfs_compute_rextslog(sbp->sb_rextents) || - sbp->sb_rbmblocks != xfs_sb_calc_rbmblocks(sbp)) + sbp->sb_rbmblocks != xfs_expected_rbmblocks(sbp)) return false; return true; @@ -339,6 +368,78 @@ xfs_validate_sb_write( return 0; } +int +xfs_compute_rgblklog( + xfs_rtxlen_t rgextents, + xfs_rgblock_t rextsize) +{ + uint64_t rgblocks = (uint64_t)rgextents * rextsize; + + return xfs_highbit64(rgblocks - 1) + 1; +} + +static int +xfs_validate_sb_rtgroups( + struct xfs_mount *mp, + struct xfs_sb *sbp) +{ + uint64_t groups; + int rgblklog; + + if (sbp->sb_rextsize == 0) { + xfs_warn(mp, +"Realtime extent size must not be zero."); + return -EINVAL; + } + + if (sbp->sb_rgextents > XFS_MAX_RGBLOCKS / sbp->sb_rextsize) { + xfs_warn(mp, +"Realtime group size (%u) must be less than %u rt extents.", + sbp->sb_rgextents, + XFS_MAX_RGBLOCKS / sbp->sb_rextsize); + return -EINVAL; + } + + if (sbp->sb_rgextents < XFS_MIN_RGEXTENTS) { + xfs_warn(mp, +"Realtime group size (%u) must be at least %u rt extents.", + sbp->sb_rgextents, XFS_MIN_RGEXTENTS); + return -EINVAL; + } + + if (sbp->sb_rgcount > XFS_MAX_RGNUMBER) { + xfs_warn(mp, +"Realtime groups (%u) must be less than %u.", + sbp->sb_rgcount, XFS_MAX_RGNUMBER); + return -EINVAL; + } + + groups = howmany_64(sbp->sb_rextents, sbp->sb_rgextents); + if (groups != sbp->sb_rgcount) { + xfs_warn(mp, +"Realtime groups (%u) do not cover the entire rt section; need (%llu) groups.", + sbp->sb_rgcount, groups); + return -EINVAL; + } + + /* Exchange-range is required for fsr to work on realtime files */ + if (!(sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_EXCHRANGE)) { + xfs_warn(mp, +"Realtime groups feature requires exchange-range support."); + return -EINVAL; + } + + rgblklog = xfs_compute_rgblklog(sbp->sb_rgextents, sbp->sb_rextsize); + if (sbp->sb_rgblklog != rgblklog) { + xfs_warn(mp, +"Realtime group log (%d) does not match expected value (%d).", + sbp->sb_rgblklog, rgblklog); + return -EINVAL; + } + + return 0; +} + /* Check the validity of the SB. */ STATIC int xfs_validate_sb_common( @@ -350,6 +451,7 @@ xfs_validate_sb_common( uint32_t agcount = 0; uint32_t rem; bool has_dalign; + int error; if (!xfs_verify_magic(bp, dsb->sb_magicnum)) { xfs_warn(mp, @@ -398,6 +500,32 @@ xfs_validate_sb_common( sbp->sb_inoalignmt, align); return -EINVAL; } + + if (!sbp->sb_spino_align || + sbp->sb_spino_align > sbp->sb_inoalignmt || + (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0) { + xfs_warn(mp, + "Sparse inode alignment (%u) is invalid.", + sbp->sb_spino_align); + return -EINVAL; + } + } else if (sbp->sb_spino_align) { + xfs_warn(mp, + "Sparse inode alignment (%u) should be zero.", + sbp->sb_spino_align); + return -EINVAL; + } + + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) { + if (memchr_inv(sbp->sb_pad, 0, sizeof(sbp->sb_pad))) { + xfs_warn(mp, +"Metadir superblock padding fields must be zero."); + return -EINVAL; + } + + error = xfs_validate_sb_rtgroups(mp, sbp); + if (error) + return error; } } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { @@ -566,6 +694,14 @@ xfs_validate_sb_common( void xfs_sb_quota_from_disk(struct xfs_sb *sbp) { + if (xfs_sb_is_v5(sbp) && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) { + sbp->sb_uquotino = NULLFSINO; + sbp->sb_gquotino = NULLFSINO; + sbp->sb_pquotino = NULLFSINO; + return; + } + /* * older mkfs doesn't initialize quota inodes to NULLFSINO. This * leads to in-core values having two different values for a quota @@ -689,6 +825,20 @@ __xfs_sb_from_disk( /* Convert on-disk flags to in-memory flags? */ if (convert_xquota) xfs_sb_quota_from_disk(to); + + if (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) { + to->sb_metadirino = be64_to_cpu(from->sb_metadirino); + to->sb_rgblklog = from->sb_rgblklog; + memcpy(to->sb_pad, from->sb_pad, sizeof(to->sb_pad)); + to->sb_rgcount = be32_to_cpu(from->sb_rgcount); + to->sb_rgextents = be32_to_cpu(from->sb_rgextents); + to->sb_rbmino = NULLFSINO; + to->sb_rsumino = NULLFSINO; + } else { + to->sb_metadirino = NULLFSINO; + to->sb_rgcount = 1; + to->sb_rgextents = 0; + } } void @@ -706,6 +856,15 @@ xfs_sb_quota_to_disk( { uint16_t qflags = from->sb_qflags; + if (xfs_sb_is_v5(from) && + (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) { + to->sb_qflags = cpu_to_be16(from->sb_qflags); + to->sb_uquotino = cpu_to_be64(0); + to->sb_gquotino = cpu_to_be64(0); + to->sb_pquotino = cpu_to_be64(0); + return; + } + to->sb_uquotino = cpu_to_be64(from->sb_uquotino); /* @@ -836,6 +995,16 @@ xfs_sb_to_disk( to->sb_lsn = cpu_to_be64(from->sb_lsn); if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID) uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); + + if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) { + to->sb_metadirino = cpu_to_be64(from->sb_metadirino); + to->sb_rgblklog = from->sb_rgblklog; + memset(to->sb_pad, 0, sizeof(to->sb_pad)); + to->sb_rgcount = cpu_to_be32(from->sb_rgcount); + to->sb_rgextents = cpu_to_be32(from->sb_rgextents); + to->sb_rbmino = cpu_to_be64(0); + to->sb_rsumino = cpu_to_be64(0); + } } /* @@ -965,13 +1134,43 @@ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = { .verify_write = xfs_sb_write_verify, }; +/* Compute cached rt geometry from the incore sb. */ void -xfs_mount_sb_set_rextsize( +xfs_sb_mount_rextsize( struct xfs_mount *mp, struct xfs_sb *sbp) { + struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG]; + mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize); mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize); + + if (xfs_sb_is_v5(sbp) && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) { + rgs->blocks = sbp->sb_rgextents * sbp->sb_rextsize; + rgs->blklog = mp->m_sb.sb_rgblklog; + rgs->blkmask = xfs_mask32lo(mp->m_sb.sb_rgblklog); + } else { + rgs->blocks = 0; + rgs->blklog = 0; + rgs->blkmask = (uint64_t)-1; + } +} + +/* Update incore sb rt extent size, then recompute the cached rt geometry. */ +void +xfs_mount_sb_set_rextsize( + struct xfs_mount *mp, + struct xfs_sb *sbp, + xfs_agblock_t rextsize) +{ + sbp->sb_rextsize = rextsize; + if (xfs_sb_is_v5(sbp) && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) + sbp->sb_rgblklog = xfs_compute_rgblklog(sbp->sb_rgextents, + rextsize); + + xfs_sb_mount_rextsize(mp, sbp); } /* @@ -988,6 +1187,8 @@ xfs_sb_mount_common( struct xfs_mount *mp, struct xfs_sb *sbp) { + struct xfs_groups *ags = &mp->m_groups[XG_TYPE_AG]; + mp->m_agfrotor = 0; atomic_set(&mp->m_agirotor, 0); mp->m_maxagi = mp->m_sb.sb_agcount; @@ -996,9 +1197,14 @@ xfs_sb_mount_common( mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; mp->m_blockmask = sbp->sb_blocksize - 1; - mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; - mp->m_blockwmask = mp->m_blockwsize - 1; - xfs_mount_sb_set_rextsize(mp, sbp); + mp->m_blockwsize = xfs_rtbmblock_size(sbp) >> XFS_WORDLOG; + mp->m_rtx_per_rbmblock = mp->m_blockwsize << XFS_NBWORDLOG; + + ags->blocks = mp->m_sb.sb_agblocks; + ags->blklog = mp->m_sb.sb_agblklog; + ags->blkmask = xfs_mask32lo(mp->m_sb.sb_agblklog); + + xfs_sb_mount_rextsize(mp, sbp); mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, true); mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, false); @@ -1045,11 +1251,6 @@ xfs_log_sb( * reservations that have been taken out percpu counters. If we have an * unclean shutdown, this will be corrected by log recovery rebuilding * the counters from the AGF block counts. - * - * Do not update sb_frextents here because it is not part of the lazy - * sb counters, despite having a percpu counter. It is always kept - * consistent with the ondisk rtbitmap by xfs_trans_apply_sb_deltas() - * and hence we don't need have to update it here. */ if (xfs_has_lazysbcount(mp)) { mp->m_sb.sb_icount = percpu_counter_sum_positive(&mp->m_icount); @@ -1060,6 +1261,16 @@ xfs_log_sb( percpu_counter_sum_positive(&mp->m_fdblocks); } + /* + * sb_frextents was added to the lazy sb counters when the rt groups + * feature was introduced. This counter can go negative due to the way + * we handle nearly-lockless reservations, so we must use the _positive + * variant here to avoid writing out nonsense frextents. + */ + if (xfs_has_rtgroups(mp)) + mp->m_sb.sb_frextents = + percpu_counter_sum_positive(&mp->m_frextents); + xfs_sb_to_disk(bp->b_addr, &mp->m_sb); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1); @@ -1109,18 +1320,17 @@ int xfs_update_secondary_sbs( struct xfs_mount *mp) { - struct xfs_perag *pag; - xfs_agnumber_t agno = 1; + struct xfs_perag *pag = NULL; int saved_error = 0; int error = 0; LIST_HEAD (buffer_list); /* update secondary superblocks. */ - for_each_perag_from(mp, agno, pag) { + while ((pag = xfs_perag_next_from(mp, pag, 1))) { struct xfs_buf *bp; error = xfs_buf_get(mp->m_ddev_targp, - XFS_AG_DADDR(mp, pag->pag_agno, XFS_SB_DADDR), + XFS_AG_DADDR(mp, pag_agno(pag), XFS_SB_DADDR), XFS_FSS_TO_BB(mp, 1), &bp); /* * If we get an error reading or writing alternate superblocks, @@ -1132,7 +1342,7 @@ xfs_update_secondary_sbs( if (error) { xfs_warn(mp, "error allocating secondary superblock for ag %d", - pag->pag_agno); + pag_agno(pag)); if (!saved_error) saved_error = error; continue; @@ -1146,26 +1356,22 @@ xfs_update_secondary_sbs( xfs_buf_relse(bp); /* don't hold too many buffers at once */ - if (agno % 16) + if (pag_agno(pag) % 16) continue; error = xfs_buf_delwri_submit(&buffer_list); if (error) { xfs_warn(mp, "write error %d updating a secondary superblock near ag %d", - error, pag->pag_agno); + error, pag_agno(pag)); if (!saved_error) saved_error = error; continue; } } error = xfs_buf_delwri_submit(&buffer_list); - if (error) { - xfs_warn(mp, - "write error %d updating a secondary superblock near ag %d", - error, agno); - } - + if (error) + xfs_warn(mp, "error %d writing secondary superblocks", error); return saved_error ? saved_error : error; } @@ -1175,10 +1381,12 @@ xfs_update_secondary_sbs( */ int xfs_sync_sb_buf( - struct xfs_mount *mp) + struct xfs_mount *mp, + bool update_rtsb) { struct xfs_trans *tp; struct xfs_buf *bp; + struct xfs_buf *rtsb_bp = NULL; int error; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, 0, &tp); @@ -1188,6 +1396,11 @@ xfs_sync_sb_buf( bp = xfs_trans_getsb(tp); xfs_log_sb(tp); xfs_trans_bhold(tp, bp); + if (update_rtsb) { + rtsb_bp = xfs_log_rtsb(tp, bp); + if (rtsb_bp) + xfs_trans_bhold(tp, rtsb_bp); + } xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); if (error) @@ -1196,7 +1409,11 @@ xfs_sync_sb_buf( * write out the sb buffer to get the changes to disk */ error = xfs_bwrite(bp); + if (!error && rtsb_bp) + error = xfs_bwrite(rtsb_bp); out: + if (rtsb_bp) + xfs_buf_relse(rtsb_bp); xfs_buf_relse(bp); return error; } @@ -1283,6 +1500,8 @@ xfs_fs_geometry( geo->flags |= XFS_FSOP_GEOM_FLAGS_NREXT64; if (xfs_has_exchange_range(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE; + if (xfs_has_metadir(mp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_METADIR; geo->rtsectsize = sbp->sb_blocksize; geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); @@ -1298,6 +1517,11 @@ xfs_fs_geometry( return; geo->version = XFS_FSOP_GEOM_VERSION_V5; + + if (xfs_has_rtgroups(mp)) { + geo->rgcount = sbp->sb_rgcount; + geo->rgextents = sbp->sb_rgextents; + } } /* Read a secondary superblock. */ diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 885c83755991..34d0dd374e9b 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -15,10 +15,11 @@ struct xfs_perag; extern void xfs_log_sb(struct xfs_trans *tp); extern int xfs_sync_sb(struct xfs_mount *mp, bool wait); -extern int xfs_sync_sb_buf(struct xfs_mount *mp); +extern int xfs_sync_sb_buf(struct xfs_mount *mp, bool update_rtsb); extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp); +void xfs_sb_mount_rextsize(struct xfs_mount *mp, struct xfs_sb *sbp); void xfs_mount_sb_set_rextsize(struct xfs_mount *mp, - struct xfs_sb *sbp); + struct xfs_sb *sbp, xfs_agblock_t rextsize); extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); @@ -43,5 +44,6 @@ bool xfs_validate_stripe_geometry(struct xfs_mount *mp, bool xfs_validate_rt_geometry(struct xfs_sb *sbp); uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); +int xfs_compute_rgblklog(xfs_rtxlen_t rgextents, xfs_rgblock_t rextsize); #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 33b84a3a83ff..e7efdb9ceaf3 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -38,7 +38,10 @@ extern const struct xfs_buf_ops xfs_inode_buf_ops; extern const struct xfs_buf_ops xfs_inode_buf_ra_ops; extern const struct xfs_buf_ops xfs_refcountbt_buf_ops; extern const struct xfs_buf_ops xfs_rmapbt_buf_ops; +extern const struct xfs_buf_ops xfs_rtbitmap_buf_ops; +extern const struct xfs_buf_ops xfs_rtsummary_buf_ops; extern const struct xfs_buf_ops xfs_rtbuf_ops; +extern const struct xfs_buf_ops xfs_rtsb_buf_ops; extern const struct xfs_buf_ops xfs_sb_buf_ops; extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; extern const struct xfs_buf_ops xfs_symlink_buf_ops; @@ -157,6 +160,7 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp, #define XFS_TRANS_SB_RBLOCKS 0x00000800 #define XFS_TRANS_SB_REXTENTS 0x00001000 #define XFS_TRANS_SB_REXTSLOG 0x00002000 +#define XFS_TRANS_SB_RGCOUNT 0x00004000 /* * Here we centralize the specification of XFS meta-data buffer reference count diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index 3c40f37e82c7..c962ad64b0c1 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -62,12 +62,12 @@ xfs_trans_ichgtime( ASSERT(tp); xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); - tv = current_time(inode); + /* If the mtime changes, then ctime must also change */ + ASSERT(flags & XFS_ICHGTIME_CHG); + tv = inode_set_ctime_current(inode); if (flags & XFS_ICHGTIME_MOD) inode_set_mtime_to_ts(inode, tv); - if (flags & XFS_ICHGTIME_CHG) - inode_set_ctime_to_ts(inode, tv); if (flags & XFS_ICHGTIME_ACCESS) inode_set_atime_to_ts(inode, tv); if (flags & XFS_ICHGTIME_CREATE) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 1a7f95bcf069..bab402340b5d 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -224,7 +224,7 @@ xfs_rtalloc_block_count( xfs_rtxlen_t rtxlen; rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN); - rtbmp_blocks = xfs_rtbitmap_blockcount(mp, rtxlen); + rtbmp_blocks = xfs_rtbitmap_blockcount_len(mp, rtxlen); return (rtbmp_blocks + 1) * num_ops; } diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index c299b16c9365..1faf04204c5d 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -12,6 +12,8 @@ #include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_ag.h" +#include "xfs_rtbitmap.h" +#include "xfs_rtgroup.h" /* @@ -111,7 +113,7 @@ xfs_verify_ino( /* Is this an internal inode number? */ inline bool -xfs_internal_inum( +xfs_is_sb_inum( struct xfs_mount *mp, xfs_ino_t ino) { @@ -129,24 +131,42 @@ xfs_verify_dir_ino( struct xfs_mount *mp, xfs_ino_t ino) { - if (xfs_internal_inum(mp, ino)) + if (xfs_is_sb_inum(mp, ino)) return false; return xfs_verify_ino(mp, ino); } /* - * Verify that an realtime block number pointer doesn't point off the - * end of the realtime device. + * Verify that a realtime block number pointer neither points outside the + * allocatable areas of the rtgroup nor off the end of the realtime + * device. */ inline bool xfs_verify_rtbno( struct xfs_mount *mp, xfs_rtblock_t rtbno) { + if (xfs_has_rtgroups(mp)) { + xfs_rgnumber_t rgno = xfs_rtb_to_rgno(mp, rtbno); + xfs_rtxnum_t rtx = xfs_rtb_to_rtx(mp, rtbno); + + if (rgno >= mp->m_sb.sb_rgcount) + return false; + if (rtx >= xfs_rtgroup_extents(mp, rgno)) + return false; + if (xfs_has_rtsb(mp) && rgno == 0 && rtx == 0) + return false; + return true; + } + return rtbno < mp->m_sb.sb_rblocks; } -/* Verify that a realtime device extent is fully contained inside the volume. */ +/* + * Verify that an allocated realtime device extent neither points outside + * allocatable areas of the rtgroup, across an rtgroup boundary, nor off the + * end of the realtime device. + */ bool xfs_verify_rtbext( struct xfs_mount *mp, @@ -159,7 +179,14 @@ xfs_verify_rtbext( if (!xfs_verify_rtbno(mp, rtbno)) return false; - return xfs_verify_rtbno(mp, rtbno + len - 1); + if (!xfs_verify_rtbno(mp, rtbno + len - 1)) + return false; + + if (xfs_has_rtgroups(mp) && + xfs_rtb_to_rgno(mp, rtbno) != xfs_rtb_to_rgno(mp, rtbno + len - 1)) + return false; + + return true; } /* Calculate the range of valid icount values. */ @@ -170,13 +197,12 @@ xfs_icount_range( unsigned long long *max) { unsigned long long nr_inos = 0; - struct xfs_perag *pag; - xfs_agnumber_t agno; + struct xfs_perag *pag = NULL; /* root, rtbitmap, rtsum all live in the first chunk */ *min = XFS_INODES_PER_CHUNK; - for_each_perag(mp, agno, pag) + while ((pag = xfs_perag_next(mp, pag))) nr_inos += pag->agino_max - pag->agino_min + 1; *max = nr_inos; } diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index a8cd44d03ef6..bf33c2b1e43e 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -9,10 +9,12 @@ typedef uint32_t prid_t; /* project ID */ typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */ +typedef uint32_t xfs_rgblock_t; /* blockno in realtime group */ typedef uint32_t xfs_agino_t; /* inode # within allocation grp */ typedef uint32_t xfs_extlen_t; /* extent length in blocks */ typedef uint32_t xfs_rtxlen_t; /* file extent length in rtextents */ typedef uint32_t xfs_agnumber_t; /* allocation group number */ +typedef uint32_t xfs_rgnumber_t; /* realtime group number */ typedef uint64_t xfs_extnum_t; /* # of extents in a file */ typedef uint32_t xfs_aextnum_t; /* # extents in an attribute fork */ typedef int64_t xfs_fsize_t; /* bytes in a file */ @@ -53,7 +55,9 @@ typedef void * xfs_failaddr_t; #define NULLFILEOFF ((xfs_fileoff_t)-1) #define NULLAGBLOCK ((xfs_agblock_t)-1) +#define NULLRGBLOCK ((xfs_rgblock_t)-1) #define NULLAGNUMBER ((xfs_agnumber_t)-1) +#define NULLRGNUMBER ((xfs_rgnumber_t)-1) #define NULLCOMMITLSN ((xfs_lsn_t)-1) @@ -212,6 +216,16 @@ enum xbtree_recpacking { XBTREE_RECPACKING_FULL, }; +enum xfs_group_type { + XG_TYPE_AG, + XG_TYPE_RTG, + XG_TYPE_MAX, +} __packed; + +#define XG_TYPE_STRINGS \ + { XG_TYPE_AG, "ag" }, \ + { XG_TYPE_RTG, "rtg" } + /* * Type verifier functions */ @@ -222,7 +236,7 @@ bool xfs_verify_fsbext(struct xfs_mount *mp, xfs_fsblock_t fsbno, xfs_fsblock_t len); bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino); -bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino); +bool xfs_is_sb_inum(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); bool xfs_verify_rtbext(struct xfs_mount *mp, xfs_rtblock_t rtbno, |