aboutsummaryrefslogtreecommitdiff
path: root/fs/erofs/zdata.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/erofs/zdata.c')
-rw-r--r--fs/erofs/zdata.c221
1 files changed, 167 insertions, 54 deletions
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index a569ff9dfd04..01f147505487 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -44,12 +44,15 @@ __Z_EROFS_BVSET(z_erofs_bvset_inline, Z_EROFS_INLINE_BVECS);
* A: Field should be accessed / updated in atomic for parallelized code.
*/
struct z_erofs_pcluster {
- struct erofs_workgroup obj;
struct mutex lock;
+ struct lockref lockref;
/* A: point to next chained pcluster or TAILs */
z_erofs_next_pcluster_t next;
+ /* I: start block address of this pcluster */
+ erofs_off_t index;
+
/* L: the maximum decompression size of this round */
unsigned int length;
@@ -108,7 +111,7 @@ struct z_erofs_decompressqueue {
static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
{
- return !pcl->obj.index;
+ return !pcl->index;
}
static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
@@ -116,7 +119,6 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT;
}
-#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo)
{
return fo->mapping == MNGD_MAPPING(sbi);
@@ -548,7 +550,7 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
if (READ_ONCE(pcl->compressed_bvecs[i].page))
continue;
- page = find_get_page(mc, pcl->obj.index + i);
+ page = find_get_page(mc, pcl->index + i);
if (!page) {
/* I/O is needed, no possible to decompress directly */
standalone = false;
@@ -564,13 +566,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
continue;
set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
}
- spin_lock(&pcl->obj.lockref.lock);
+ spin_lock(&pcl->lockref.lock);
if (!pcl->compressed_bvecs[i].page) {
pcl->compressed_bvecs[i].page = page ? page : newpage;
- spin_unlock(&pcl->obj.lockref.lock);
+ spin_unlock(&pcl->lockref.lock);
continue;
}
- spin_unlock(&pcl->obj.lockref.lock);
+ spin_unlock(&pcl->lockref.lock);
if (page)
put_page(page);
@@ -587,11 +589,9 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
}
/* (erofs_shrinker) disconnect cached encoded data with pclusters */
-int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
- struct erofs_workgroup *grp)
+static int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
+ struct z_erofs_pcluster *pcl)
{
- struct z_erofs_pcluster *const pcl =
- container_of(grp, struct z_erofs_pcluster, obj);
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
struct folio *folio;
int i;
@@ -626,8 +626,8 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
return true;
ret = false;
- spin_lock(&pcl->obj.lockref.lock);
- if (pcl->obj.lockref.count <= 0) {
+ spin_lock(&pcl->lockref.lock);
+ if (pcl->lockref.count <= 0) {
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
for (; bvec < end; ++bvec) {
if (bvec->page && page_folio(bvec->page) == folio) {
@@ -638,7 +638,7 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
}
}
}
- spin_unlock(&pcl->obj.lockref.lock);
+ spin_unlock(&pcl->lockref.lock);
return ret;
}
@@ -689,15 +689,15 @@ static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
if (exclusive) {
/* give priority for inplaceio to use file pages first */
- spin_lock(&pcl->obj.lockref.lock);
+ spin_lock(&pcl->lockref.lock);
while (fe->icur > 0) {
if (pcl->compressed_bvecs[--fe->icur].page)
continue;
pcl->compressed_bvecs[fe->icur] = *bvec;
- spin_unlock(&pcl->obj.lockref.lock);
+ spin_unlock(&pcl->lockref.lock);
return 0;
}
- spin_unlock(&pcl->obj.lockref.lock);
+ spin_unlock(&pcl->lockref.lock);
/* otherwise, check if it can be used as a bvpage */
if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
@@ -710,13 +710,30 @@ static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
return ret;
}
+static bool z_erofs_get_pcluster(struct z_erofs_pcluster *pcl)
+{
+ if (lockref_get_not_zero(&pcl->lockref))
+ return true;
+
+ spin_lock(&pcl->lockref.lock);
+ if (__lockref_is_dead(&pcl->lockref)) {
+ spin_unlock(&pcl->lockref.lock);
+ return false;
+ }
+
+ if (!pcl->lockref.count++)
+ atomic_long_dec(&erofs_global_shrink_cnt);
+ spin_unlock(&pcl->lockref.lock);
+ return true;
+}
+
static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
{
struct erofs_map_blocks *map = &fe->map;
struct super_block *sb = fe->inode->i_sb;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
bool ztailpacking = map->m_flags & EROFS_MAP_META;
- struct z_erofs_pcluster *pcl;
- struct erofs_workgroup *grp;
+ struct z_erofs_pcluster *pcl, *pre;
int err;
if (!(map->m_flags & EROFS_MAP_ENCODED) ||
@@ -730,8 +747,8 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
if (IS_ERR(pcl))
return PTR_ERR(pcl);
- spin_lock_init(&pcl->obj.lockref.lock);
- pcl->obj.lockref.count = 1; /* one ref for this request */
+ spin_lock_init(&pcl->lockref.lock);
+ pcl->lockref.count = 1; /* one ref for this request */
pcl->algorithmformat = map->m_algorithmformat;
pcl->length = 0;
pcl->partial = true;
@@ -749,19 +766,26 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
DBG_BUGON(!mutex_trylock(&pcl->lock));
if (ztailpacking) {
- pcl->obj.index = 0; /* which indicates ztailpacking */
+ pcl->index = 0; /* which indicates ztailpacking */
} else {
- pcl->obj.index = erofs_blknr(sb, map->m_pa);
-
- grp = erofs_insert_workgroup(fe->inode->i_sb, &pcl->obj);
- if (IS_ERR(grp)) {
- err = PTR_ERR(grp);
- goto err_out;
+ pcl->index = erofs_blknr(sb, map->m_pa);
+ while (1) {
+ xa_lock(&sbi->managed_pslots);
+ pre = __xa_cmpxchg(&sbi->managed_pslots, pcl->index,
+ NULL, pcl, GFP_KERNEL);
+ if (!pre || xa_is_err(pre) || z_erofs_get_pcluster(pre)) {
+ xa_unlock(&sbi->managed_pslots);
+ break;
+ }
+ /* try to legitimize the current in-tree one */
+ xa_unlock(&sbi->managed_pslots);
+ cond_resched();
}
-
- if (grp != &pcl->obj) {
- fe->pcl = container_of(grp,
- struct z_erofs_pcluster, obj);
+ if (xa_is_err(pre)) {
+ err = xa_err(pre);
+ goto err_out;
+ } else if (pre) {
+ fe->pcl = pre;
err = -EEXIST;
goto err_out;
}
@@ -781,7 +805,7 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
struct erofs_map_blocks *map = &fe->map;
struct super_block *sb = fe->inode->i_sb;
erofs_blk_t blknr = erofs_blknr(sb, map->m_pa);
- struct erofs_workgroup *grp = NULL;
+ struct z_erofs_pcluster *pcl = NULL;
int ret;
DBG_BUGON(fe->pcl);
@@ -789,14 +813,23 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
if (!(map->m_flags & EROFS_MAP_META)) {
- grp = erofs_find_workgroup(sb, blknr);
+ while (1) {
+ rcu_read_lock();
+ pcl = xa_load(&EROFS_SB(sb)->managed_pslots, blknr);
+ if (!pcl || z_erofs_get_pcluster(pcl)) {
+ DBG_BUGON(pcl && blknr != pcl->index);
+ rcu_read_unlock();
+ break;
+ }
+ rcu_read_unlock();
+ }
} else if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
- if (grp) {
- fe->pcl = container_of(grp, struct z_erofs_pcluster, obj);
+ if (pcl) {
+ fe->pcl = pcl;
ret = -EEXIST;
} else {
ret = z_erofs_register_pcluster(fe);
@@ -851,12 +884,87 @@ static void z_erofs_rcu_callback(struct rcu_head *head)
struct z_erofs_pcluster, rcu));
}
-void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
+static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
+ struct z_erofs_pcluster *pcl)
{
- struct z_erofs_pcluster *const pcl =
- container_of(grp, struct z_erofs_pcluster, obj);
+ if (pcl->lockref.count)
+ return false;
- call_rcu(&pcl->rcu, z_erofs_rcu_callback);
+ /*
+ * Note that all cached folios should be detached before deleted from
+ * the XArray. Otherwise some folios could be still attached to the
+ * orphan old pcluster when the new one is available in the tree.
+ */
+ if (erofs_try_to_free_all_cached_folios(sbi, pcl))
+ return false;
+
+ /*
+ * It's impossible to fail after the pcluster is freezed, but in order
+ * to avoid some race conditions, add a DBG_BUGON to observe this.
+ */
+ DBG_BUGON(__xa_erase(&sbi->managed_pslots, pcl->index) != pcl);
+
+ lockref_mark_dead(&pcl->lockref);
+ return true;
+}
+
+static bool erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
+ struct z_erofs_pcluster *pcl)
+{
+ bool free;
+
+ spin_lock(&pcl->lockref.lock);
+ free = __erofs_try_to_release_pcluster(sbi, pcl);
+ spin_unlock(&pcl->lockref.lock);
+ if (free) {
+ atomic_long_dec(&erofs_global_shrink_cnt);
+ call_rcu(&pcl->rcu, z_erofs_rcu_callback);
+ }
+ return free;
+}
+
+unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi,
+ unsigned long nr_shrink)
+{
+ struct z_erofs_pcluster *pcl;
+ unsigned int freed = 0;
+ unsigned long index;
+
+ xa_lock(&sbi->managed_pslots);
+ xa_for_each(&sbi->managed_pslots, index, pcl) {
+ /* try to shrink each valid pcluster */
+ if (!erofs_try_to_release_pcluster(sbi, pcl))
+ continue;
+ xa_unlock(&sbi->managed_pslots);
+
+ ++freed;
+ if (!--nr_shrink)
+ return freed;
+ xa_lock(&sbi->managed_pslots);
+ }
+ xa_unlock(&sbi->managed_pslots);
+ return freed;
+}
+
+static void z_erofs_put_pcluster(struct erofs_sb_info *sbi,
+ struct z_erofs_pcluster *pcl, bool try_free)
+{
+ bool free = false;
+
+ if (lockref_put_or_lock(&pcl->lockref))
+ return;
+
+ DBG_BUGON(__lockref_is_dead(&pcl->lockref));
+ if (!--pcl->lockref.count) {
+ if (try_free && xa_trylock(&sbi->managed_pslots)) {
+ free = __erofs_try_to_release_pcluster(sbi, pcl);
+ xa_unlock(&sbi->managed_pslots);
+ }
+ atomic_long_add(!free, &erofs_global_shrink_cnt);
+ }
+ spin_unlock(&pcl->lockref.lock);
+ if (free)
+ call_rcu(&pcl->rcu, z_erofs_rcu_callback);
}
static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
@@ -877,7 +985,7 @@ static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
* any longer if the pcluster isn't hosted by ourselves.
*/
if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE)
- erofs_workgroup_put(&pcl->obj);
+ z_erofs_put_pcluster(EROFS_I_SB(fe->inode), pcl, false);
fe->pcl = NULL;
}
@@ -1179,6 +1287,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
int i, j, jtop, err2;
struct page *page;
bool overlapped;
+ bool try_free = true;
mutex_lock(&pcl->lock);
be->nr_pages = PAGE_ALIGN(pcl->length + pcl->pageofs_out) >> PAGE_SHIFT;
@@ -1236,9 +1345,12 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
/* managed folios are still left in compressed_bvecs[] */
for (i = 0; i < pclusterpages; ++i) {
page = be->compressed_pages[i];
- if (!page ||
- erofs_folio_is_managed(sbi, page_folio(page)))
+ if (!page)
+ continue;
+ if (erofs_folio_is_managed(sbi, page_folio(page))) {
+ try_free = false;
continue;
+ }
(void)z_erofs_put_shortlivedpage(be->pagepool, page);
WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
}
@@ -1284,6 +1396,11 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
/* pcluster lock MUST be taken before the following line */
WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL);
mutex_unlock(&pcl->lock);
+
+ if (z_erofs_is_inline_pcluster(pcl))
+ z_erofs_free_pcluster(pcl);
+ else
+ z_erofs_put_pcluster(sbi, pcl, try_free);
return err;
}
@@ -1306,10 +1423,6 @@ static int z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
owned = READ_ONCE(be.pcl->next);
err = z_erofs_decompress_pcluster(&be, err) ?: err;
- if (z_erofs_is_inline_pcluster(be.pcl))
- z_erofs_free_pcluster(be.pcl);
- else
- erofs_workgroup_put(&be.pcl->obj);
}
return err;
}
@@ -1391,9 +1504,9 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
bvec->bv_offset = 0;
bvec->bv_len = PAGE_SIZE;
repeat:
- spin_lock(&pcl->obj.lockref.lock);
+ spin_lock(&pcl->lockref.lock);
zbv = pcl->compressed_bvecs[nr];
- spin_unlock(&pcl->obj.lockref.lock);
+ spin_unlock(&pcl->lockref.lock);
if (!zbv.page)
goto out_allocfolio;
@@ -1455,23 +1568,23 @@ repeat:
folio_put(folio);
out_allocfolio:
page = __erofs_allocpage(&f->pagepool, gfp, true);
- spin_lock(&pcl->obj.lockref.lock);
+ spin_lock(&pcl->lockref.lock);
if (unlikely(pcl->compressed_bvecs[nr].page != zbv.page)) {
if (page)
erofs_pagepool_add(&f->pagepool, page);
- spin_unlock(&pcl->obj.lockref.lock);
+ spin_unlock(&pcl->lockref.lock);
cond_resched();
goto repeat;
}
pcl->compressed_bvecs[nr].page = page ? page : ERR_PTR(-ENOMEM);
- spin_unlock(&pcl->obj.lockref.lock);
+ spin_unlock(&pcl->lockref.lock);
bvec->bv_page = page;
if (!page)
return;
folio = page_folio(page);
out_tocache:
if (!tocache || bs != PAGE_SIZE ||
- filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp)) {
+ filemap_add_folio(mc, folio, pcl->index + nr, gfp)) {
/* turn into a temporary shortlived folio (1 ref) */
folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
return;
@@ -1603,7 +1716,7 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
/* no device id here, thus it will always succeed */
mdev = (struct erofs_map_dev) {
- .m_pa = erofs_pos(sb, pcl->obj.index),
+ .m_pa = erofs_pos(sb, pcl->index),
};
(void)erofs_map_dev(sb, &mdev);