diff options
Diffstat (limited to 'fs/xfs/xfs_discard.c')
-rw-r--r-- | fs/xfs/xfs_discard.c | 308 |
1 files changed, 250 insertions, 58 deletions
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index d8c4a5dcca7a..c4bd145f5ec1 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -21,6 +21,7 @@ #include "xfs_ag.h" #include "xfs_health.h" #include "xfs_rtbitmap.h" +#include "xfs_rtgroup.h" /* * Notes on an efficient, low latency fstrim algorithm @@ -72,6 +73,8 @@ * extent search so that it overlaps in flight discard IO. */ +#define XFS_DISCARD_MAX_EXAMINE (100) + struct workqueue_struct *xfs_discard_wq; static void @@ -81,7 +84,7 @@ xfs_discard_endio_work( struct xfs_busy_extents *extents = container_of(work, struct xfs_busy_extents, endio_work); - xfs_extent_busy_clear(extents->mount, &extents->extent_list, false); + xfs_extent_busy_clear(&extents->extent_list, false); kfree(extents->owner); } @@ -100,6 +103,24 @@ xfs_discard_endio( bio_put(bio); } +static inline struct block_device * +xfs_group_bdev( + const struct xfs_group *xg) +{ + struct xfs_mount *mp = xg->xg_mount; + + switch (xg->xg_type) { + case XG_TYPE_AG: + return mp->m_ddev_targp->bt_bdev; + case XG_TYPE_RTG: + return mp->m_rtdev_targp->bt_bdev; + default: + ASSERT(0); + break; + } + return NULL; +} + /* * Walk the discard list and issue discards on all the busy extents in the * list. We plug and chain the bios so that we only need a single completion @@ -117,11 +138,11 @@ xfs_discard_extents( blk_start_plug(&plug); list_for_each_entry(busyp, &extents->extent_list, list) { - trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, - busyp->length); + trace_xfs_discard_extent(busyp->group, busyp->bno, + busyp->length); - error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, - XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), + error = __blkdev_issue_discard(xfs_group_bdev(busyp->group), + xfs_gbno_to_daddr(busyp->group, busyp->bno), XFS_FSB_TO_BB(mp, busyp->length), GFP_KERNEL, &bio); if (error && error != -EOPNOTSUPP) { @@ -160,13 +181,13 @@ xfs_trim_gather_extents( struct xfs_trim_cur *tcur, struct xfs_busy_extents *extents) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_trans *tp; struct xfs_btree_cur *cur; struct xfs_buf *agbp; int error; int i; - int batch = 100; + int batch = XFS_DISCARD_MAX_EXAMINE; /* * Force out the log. This means any transactions that might have freed @@ -239,11 +260,11 @@ xfs_trim_gather_extents( * overlapping ranges for now. */ if (fbno + flen < tcur->start) { - trace_xfs_discard_exclude(mp, pag->pag_agno, fbno, flen); + trace_xfs_discard_exclude(pag_group(pag), fbno, flen); goto next_extent; } if (fbno > tcur->end) { - trace_xfs_discard_exclude(mp, pag->pag_agno, fbno, flen); + trace_xfs_discard_exclude(pag_group(pag), fbno, flen); if (tcur->by_bno) { tcur->count = 0; break; @@ -261,7 +282,7 @@ xfs_trim_gather_extents( /* Too small? Give up. */ if (flen < tcur->minlen) { - trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno, flen); + trace_xfs_discard_toosmall(pag_group(pag), fbno, flen); if (tcur->by_bno) goto next_extent; tcur->count = 0; @@ -272,12 +293,12 @@ xfs_trim_gather_extents( * If any blocks in the range are still busy, skip the * discard and try again the next time. */ - if (xfs_extent_busy_search(mp, pag, fbno, flen)) { - trace_xfs_discard_busy(mp, pag->pag_agno, fbno, flen); + if (xfs_extent_busy_search(pag_group(pag), fbno, flen)) { + trace_xfs_discard_busy(pag_group(pag), fbno, flen); goto next_extent; } - xfs_extent_busy_insert_discard(pag, fbno, flen, + xfs_extent_busy_insert_discard(pag_group(pag), fbno, flen, &extents->extent_list); next_extent: if (tcur->by_bno) @@ -301,7 +322,7 @@ next_extent: * we aren't going to issue a discard on them any more. */ if (error) - xfs_extent_busy_clear(mp, &extents->extent_list, false); + xfs_extent_busy_clear(&extents->extent_list, false); out_del_cursor: xfs_btree_del_cursor(cur, error); out_trans_cancel: @@ -335,7 +356,7 @@ xfs_trim_perag_extents( }; int error = 0; - if (start != 0 || end != pag->block_count) + if (start != 0 || end != pag_group(pag)->xg_block_count) tcur.by_bno = true; do { @@ -347,7 +368,6 @@ xfs_trim_perag_extents( break; } - extents->mount = pag->pag_mount; extents->owner = extents; INIT_LIST_HEAD(&extents->extent_list); @@ -367,7 +387,7 @@ xfs_trim_perag_extents( * list after this function call, as it may have been freed by * the time control returns to us. */ - error = xfs_discard_extents(pag->pag_mount, extents); + error = xfs_discard_extents(pag_mount(pag), extents); if (error) break; @@ -389,8 +409,8 @@ xfs_trim_datadev_extents( { xfs_agnumber_t start_agno, end_agno; xfs_agblock_t start_agbno, end_agbno; + struct xfs_perag *pag = NULL; xfs_daddr_t ddev_end; - struct xfs_perag *pag; int last_error = 0, error; ddev_end = min_t(xfs_daddr_t, end, @@ -401,10 +421,10 @@ xfs_trim_datadev_extents( end_agno = xfs_daddr_to_agno(mp, ddev_end); end_agbno = xfs_daddr_to_agbno(mp, ddev_end); - for_each_perag_range(mp, start_agno, end_agno, pag) { - xfs_agblock_t agend = pag->block_count; + while ((pag = xfs_perag_next_range(mp, pag, start_agno, end_agno))) { + xfs_agblock_t agend = pag_group(pag)->xg_block_count; - if (start_agno == end_agno) + if (pag_agno(pag) == end_agno) agend = end_agbno; error = xfs_trim_perag_extents(pag, start_agbno, agend, minlen); if (error) @@ -479,7 +499,7 @@ xfs_discard_rtdev_extents( trace_xfs_discard_rtextent(mp, busyp->bno, busyp->length); error = __blkdev_issue_discard(bdev, - XFS_FSB_TO_BB(mp, busyp->bno), + xfs_rtb_to_daddr(mp, busyp->bno), XFS_FSB_TO_BB(mp, busyp->length), GFP_NOFS, &bio); if (error) @@ -506,7 +526,7 @@ xfs_discard_rtdev_extents( static int xfs_trim_gather_rtextent( - struct xfs_mount *mp, + struct xfs_rtgroup *rtg, struct xfs_trans *tp, const struct xfs_rtalloc_rec *rec, void *priv) @@ -525,12 +545,12 @@ xfs_trim_gather_rtextent( return -ECANCELED; } - rbno = xfs_rtx_to_rtb(mp, rec->ar_startext); - rlen = xfs_rtx_to_rtb(mp, rec->ar_extcount); + rbno = xfs_rtx_to_rtb(rtg, rec->ar_startext); + rlen = xfs_rtbxlen_to_blen(rtg_mount(rtg), rec->ar_extcount); /* Ignore too small. */ if (rlen < tr->minlen_fsb) { - trace_xfs_discard_rttoosmall(mp, rbno, rlen); + trace_xfs_discard_rttoosmall(rtg_mount(rtg), rbno, rlen); return 0; } @@ -547,70 +567,185 @@ xfs_trim_gather_rtextent( return 0; } +/* Trim extents on an !rtgroups realtime device */ static int -xfs_trim_rtdev_extents( - struct xfs_mount *mp, - xfs_daddr_t start, - xfs_daddr_t end, +xfs_trim_rtextents( + struct xfs_rtgroup *rtg, + xfs_rtxnum_t low, + xfs_rtxnum_t high, xfs_daddr_t minlen) { + struct xfs_mount *mp = rtg_mount(rtg); struct xfs_trim_rtdev tr = { .minlen_fsb = XFS_BB_TO_FSB(mp, minlen), + .extent_list = LIST_HEAD_INIT(tr.extent_list), }; - xfs_rtxnum_t low, high; struct xfs_trans *tp; - xfs_daddr_t rtdev_daddr; int error; - INIT_LIST_HEAD(&tr.extent_list); - - /* Shift the start and end downwards to match the rt device. */ - rtdev_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); - if (start > rtdev_daddr) - start -= rtdev_daddr; - else - start = 0; - - if (end <= rtdev_daddr) - return 0; - end -= rtdev_daddr; - error = xfs_trans_alloc_empty(mp, &tp); if (error) return error; - end = min_t(xfs_daddr_t, end, - XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks) - 1); - - /* Convert the rt blocks to rt extents */ - low = xfs_rtb_to_rtxup(mp, XFS_BB_TO_FSB(mp, start)); - high = xfs_rtb_to_rtx(mp, XFS_BB_TO_FSBT(mp, end)); - /* * Walk the free ranges between low and high. The query_range function * trims the extents returned. */ do { - tr.stop_rtx = low + (mp->m_sb.sb_blocksize * NBBY); - xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP); - error = xfs_rtalloc_query_range(mp, tp, low, high, + tr.stop_rtx = low + xfs_rtbitmap_rtx_per_rbmblock(mp); + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED); + error = xfs_rtalloc_query_range(rtg, tp, low, high, xfs_trim_gather_rtextent, &tr); if (error == -ECANCELED) error = 0; if (error) { - xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP); + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED); xfs_discard_free_rtdev_extents(&tr); break; } if (list_empty(&tr.extent_list)) { - xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP); + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED); break; } error = xfs_discard_rtdev_extents(mp, &tr); - xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP); + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED); + if (error) + break; + + low = tr.restart_rtx; + } while (!xfs_trim_should_stop() && low <= high); + + xfs_trans_cancel(tp); + return error; +} + +struct xfs_trim_rtgroup { + /* list of rtgroup extents to free */ + struct xfs_busy_extents *extents; + + /* minimum length that caller allows us to trim */ + xfs_rtblock_t minlen_fsb; + + /* restart point for the rtbitmap walk */ + xfs_rtxnum_t restart_rtx; + + /* number of extents to examine before stopping to issue discard ios */ + int batch; + + /* number of extents queued for discard */ + int queued; +}; + +static int +xfs_trim_gather_rtgroup_extent( + struct xfs_rtgroup *rtg, + struct xfs_trans *tp, + const struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_trim_rtgroup *tr = priv; + xfs_rgblock_t rgbno; + xfs_extlen_t len; + + if (--tr->batch <= 0) { + /* + * If we've checked a large number of extents, update the + * cursor to point at this extent so we restart the next batch + * from this extent. + */ + tr->restart_rtx = rec->ar_startext; + return -ECANCELED; + } + + rgbno = xfs_rtx_to_rgbno(rtg, rec->ar_startext); + len = xfs_rtxlen_to_extlen(rtg_mount(rtg), rec->ar_extcount); + + /* Ignore too small. */ + if (len < tr->minlen_fsb) { + trace_xfs_discard_toosmall(rtg_group(rtg), rgbno, len); + return 0; + } + + /* + * If any blocks in the range are still busy, skip the discard and try + * again the next time. + */ + if (xfs_extent_busy_search(rtg_group(rtg), rgbno, len)) { + trace_xfs_discard_busy(rtg_group(rtg), rgbno, len); + return 0; + } + + xfs_extent_busy_insert_discard(rtg_group(rtg), rgbno, len, + &tr->extents->extent_list); + + tr->queued++; + tr->restart_rtx = rec->ar_startext + rec->ar_extcount; + return 0; +} + +/* Trim extents in this rtgroup using the busy extent machinery. */ +static int +xfs_trim_rtgroup_extents( + struct xfs_rtgroup *rtg, + xfs_rtxnum_t low, + xfs_rtxnum_t high, + xfs_daddr_t minlen) +{ + struct xfs_mount *mp = rtg_mount(rtg); + struct xfs_trim_rtgroup tr = { + .minlen_fsb = XFS_BB_TO_FSB(mp, minlen), + }; + struct xfs_trans *tp; + int error; + + error = xfs_trans_alloc_empty(mp, &tp); + if (error) + return error; + + /* + * Walk the free ranges between low and high. The query_range function + * trims the extents returned. + */ + do { + tr.extents = kzalloc(sizeof(*tr.extents), GFP_KERNEL); + if (!tr.extents) { + error = -ENOMEM; + break; + } + + tr.queued = 0; + tr.batch = XFS_DISCARD_MAX_EXAMINE; + tr.extents->owner = tr.extents; + INIT_LIST_HEAD(&tr.extents->extent_list); + + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED); + error = xfs_rtalloc_query_range(rtg, tp, low, high, + xfs_trim_gather_rtgroup_extent, &tr); + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED); + if (error == -ECANCELED) + error = 0; + if (error) { + kfree(tr.extents); + break; + } + + if (!tr.queued) + break; + + /* + * We hand the extent list to the discard function here so the + * discarded extents can be removed from the busy extent list. + * This allows the discards to run asynchronously with + * gathering the next round of extents to discard. + * + * However, we must ensure that we do not reference the extent + * list after this function call, as it may have been freed by + * the time control returns to us. + */ + error = xfs_discard_extents(rtg_mount(rtg), tr.extents); if (error) break; @@ -620,6 +755,63 @@ xfs_trim_rtdev_extents( xfs_trans_cancel(tp); return error; } + +static int +xfs_trim_rtdev_extents( + struct xfs_mount *mp, + xfs_daddr_t start, + xfs_daddr_t end, + xfs_daddr_t minlen) +{ + xfs_rtblock_t start_rtbno, end_rtbno; + xfs_rtxnum_t start_rtx, end_rtx; + xfs_rgnumber_t start_rgno, end_rgno; + xfs_daddr_t daddr_offset; + int last_error = 0, error; + struct xfs_rtgroup *rtg = NULL; + + /* Shift the start and end downwards to match the rt device. */ + daddr_offset = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); + if (start > daddr_offset) + start -= daddr_offset; + else + start = 0; + start_rtbno = xfs_daddr_to_rtb(mp, start); + start_rtx = xfs_rtb_to_rtx(mp, start_rtbno); + start_rgno = xfs_rtb_to_rgno(mp, start_rtbno); + + if (end <= daddr_offset) + return 0; + else + end -= daddr_offset; + end_rtbno = xfs_daddr_to_rtb(mp, end); + end_rtx = xfs_rtb_to_rtx(mp, end_rtbno + mp->m_sb.sb_rextsize - 1); + end_rgno = xfs_rtb_to_rgno(mp, end_rtbno); + + while ((rtg = xfs_rtgroup_next_range(mp, rtg, start_rgno, end_rgno))) { + xfs_rtxnum_t rtg_end = rtg->rtg_extents; + + if (rtg_rgno(rtg) == end_rgno) + rtg_end = min(rtg_end, end_rtx); + + if (xfs_has_rtgroups(mp)) + error = xfs_trim_rtgroup_extents(rtg, start_rtx, + rtg_end, minlen); + else + error = xfs_trim_rtextents(rtg, start_rtx, rtg_end, + minlen); + if (error) + last_error = error; + + if (xfs_trim_should_stop()) { + xfs_rtgroup_rele(rtg); + break; + } + start_rtx = 0; + } + + return last_error; +} #else # define xfs_trim_rtdev_extents(...) (-EOPNOTSUPP) #endif /* CONFIG_XFS_RT */ |