aboutsummaryrefslogtreecommitdiff
path: root/mm/pgtable-generic.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/pgtable-generic.c')
-rw-r--r--mm/pgtable-generic.c41
1 files changed, 34 insertions, 7 deletions
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index a78a4adf711a..5297dcc38c37 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -305,8 +305,8 @@ nomap:
return NULL;
}
-pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
- unsigned long addr, spinlock_t **ptlp)
+pte_t *pte_offset_map_ro_nolock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, spinlock_t **ptlp)
{
pmd_t pmdval;
pte_t *pte;
@@ -317,6 +317,19 @@ pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
return pte;
}
+pte_t *pte_offset_map_rw_nolock(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, pmd_t *pmdvalp,
+ spinlock_t **ptlp)
+{
+ pte_t *pte;
+
+ VM_WARN_ON_ONCE(!pmdvalp);
+ pte = __pte_offset_map(pmd, addr, pmdvalp);
+ if (likely(pte))
+ *ptlp = pte_lockptr(mm, pmdvalp);
+ return pte;
+}
+
/*
* pte_offset_map_lock(mm, pmd, addr, ptlp), and its internal implementation
* __pte_offset_map_lock() below, is usually called with the pmd pointer for
@@ -347,14 +360,28 @@ pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
* and disconnected table. Until pte_unmap(pte) unmaps and rcu_read_unlock()s
* afterwards.
*
- * pte_offset_map_nolock(mm, pmd, addr, ptlp), above, is like pte_offset_map();
+ * pte_offset_map_ro_nolock(mm, pmd, addr, ptlp), above, is like pte_offset_map();
* but when successful, it also outputs a pointer to the spinlock in ptlp - as
* pte_offset_map_lock() does, but in this case without locking it. This helps
* the caller to avoid a later pte_lockptr(mm, *pmd), which might by that time
- * act on a changed *pmd: pte_offset_map_nolock() provides the correct spinlock
- * pointer for the page table that it returns. In principle, the caller should
- * recheck *pmd once the lock is taken; in practice, no callsite needs that -
- * either the mmap_lock for write, or pte_same() check on contents, is enough.
+ * act on a changed *pmd: pte_offset_map_ro_nolock() provides the correct spinlock
+ * pointer for the page table that it returns. Even after grabbing the spinlock,
+ * we might be looking either at a page table that is still mapped or one that
+ * was unmapped and is about to get freed. But for R/O access this is sufficient.
+ * So it is only applicable for read-only cases where any modification operations
+ * to the page table are not allowed even if the corresponding spinlock is held
+ * afterwards.
+ *
+ * pte_offset_map_rw_nolock(mm, pmd, addr, pmdvalp, ptlp), above, is like
+ * pte_offset_map_ro_nolock(); but when successful, it also outputs the pdmval.
+ * It is applicable for may-write cases where any modification operations to the
+ * page table may happen after the corresponding spinlock is held afterwards.
+ * But the users should make sure the page table is stable like checking pte_same()
+ * or checking pmd_same() by using the output pmdval before performing the write
+ * operations.
+ *
+ * Note: "RO" / "RW" expresses the intended semantics, not that the *kmap* will
+ * be read-only/read-write protected.
*
* Note that free_pgtables(), used after unmapping detached vmas, or when
* exiting the whole mm, does not take page table lock before freeing a page