Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe: "A smaller set of patches, nothing stands out as being particularly major this cycle. The biggest item would be the new HIP09 HW support from HNS, otherwise it was pretty quiet for new work here: - Driver bug fixes and updates: bnxt_re, cxgb4, rxe, hns, i40iw, cxgb4, mlx4 and mlx5 - Bug fixes and polishing for the new rts ULP - Cleanup of uverbs checking for allowed driver operations - Use sysfs_emit all over the place - Lots of bug fixes and clarity improvements for hns - hip09 support for hns - NDR and 50/100Gb signaling rates - Remove dma_virt_ops and go back to using the IB DMA wrappers - mlx5 optimizations for contiguous DMA regions" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (147 commits) RDMA/cma: Don't overwrite sgid_attr after device is released RDMA/mlx5: Fix MR cache memory leak RDMA/rxe: Use acquire/release for memory ordering RDMA/hns: Simplify AEQE process for different types of queue RDMA/hns: Fix inaccurate prints RDMA/hns: Fix incorrect symbol types RDMA/hns: Clear redundant variable initialization RDMA/hns: Fix coding style issues RDMA/hns: Remove unnecessary access right set during INIT2INIT RDMA/hns: WARN_ON if get a reserved sl from users RDMA/hns: Avoid filling sl in high 3 bits of vlan_id RDMA/hns: Do shift on traffic class when using RoCEv2 RDMA/hns: Normalization the judgment of some features RDMA/hns: Limit the length of data copied between kernel and userspace RDMA/mlx4: Remove bogus dev_base_lock usage RDMA/uverbs: Fix incorrect variable type RDMA/core: Do not indicate device ready when device enablement fails RDMA/core: Clean up cq pool mechanism RDMA/core: Update kernel documentation for ib_create_named_qp() MAINTAINERS: SOFT-ROCE: Change Zhu Yanjun's email address ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2020-12-16 13:42:26 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2020-12-16 13:42:26 -0800
commit: 009bd55dfcc857d8b00a5bbb17a8db060317af6f (patch)
tree: 3a623fc690ea03bd76630c5bcc003324136ae0f6 /drivers/infiniband/hw/mlx5/mem.c
parent: 60f7c503d971a731ee3c4f884a9f2e80d476730d (diff)
parent: e246b7c035d74abfb3507fa10082d0c42cc016c3 (diff)
download: linux-009bd55dfcc857d8b00a5bbb17a8db060317af6f.tar.gz
linux-009bd55dfcc857d8b00a5bbb17a8db060317af6f.tar.bz2
linux-009bd55dfcc857d8b00a5bbb17a8db060317af6f.zip
1 files changed, 48 insertions, 144 deletions
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 13de3d2edd34..844545064c9e 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -36,161 +36,65 @@
 #include "mlx5_ib.h"
 #include <linux/jiffies.h>
 
-/* @umem: umem object to scan
- * @addr: ib virtual address requested by the user
- * @max_page_shift: high limit for page_shift - 0 means no limit
- * @count: number of PAGE_SIZE pages covered by umem
- * @shift: page shift for the compound pages found in the region
- * @ncont: number of compund pages
- * @order: log2 of the number of compound pages
+/*
+ * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
+ * filled in the pas array.
  */
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
-			unsigned long max_page_shift,
-			int *count, int *shift,
-			int *ncont, int *order)
+void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
+			  u64 access_flags)
 {
-	unsigned long tmp;
-	unsigned long m;
-	u64 base = ~0, p = 0;
-	u64 len, pfn;
-	int i = 0;
-	struct scatterlist *sg;
-	int entry;
-
-	addr = addr >> PAGE_SHIFT;
-	tmp = (unsigned long)addr;
-	m = find_first_bit(&tmp, BITS_PER_LONG);
-	if (max_page_shift)
-		m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m);
-
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-		len = sg_dma_len(sg) >> PAGE_SHIFT;
-		pfn = sg_dma_address(sg) >> PAGE_SHIFT;
-		if (base + p != pfn) {
-			/* If either the offset or the new
-			 * base are unaligned update m
-			 */
-			tmp = (unsigned long)(pfn | p);
-			if (!IS_ALIGNED(tmp, 1 << m))
-				m = find_first_bit(&tmp, BITS_PER_LONG);
-
-			base = pfn;
-			p = 0;
-		}
-
-		p += len;
-		i += len;
-	}
-
-	if (i) {
-		m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
-
-		if (order)
-			*order = ilog2(roundup_pow_of_two(i) >> m);
-
-		*ncont = DIV_ROUND_UP(i, (1 << m));
-	} else {
-		m  = 0;
+	struct ib_block_iter biter;
 
-		if (order)
-			*order = 0;
-
-		*ncont = 0;
+	rdma_umem_for_each_dma_block (umem, &biter, page_size) {
+		*pas = cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+				   access_flags);
+		pas++;
 	}
-	*shift = PAGE_SHIFT + m;
-	*count = i;
 }
 
 /*
- * Populate the given array with bus addresses from the umem.
- *
- * dev - mlx5_ib device
- * umem - umem to use to fill the pages
- * page_shift - determines the page size used in the resulting array
- * offset - offset into the umem to start from,
- *          only implemented for ODP umems
- * num_pages - total number of pages to fill
- * pas - bus addresses array to fill
- * access_flags - access flags to set on all present pages.
-		  use enum mlx5_ib_mtt_access_flags for this.
+ * Compute the page shift and page_offset for mailboxes that use a quantized
+ * page_offset. The granulatity of the page offset scales according to page
+ * size.
  */
-void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
-			    int page_shift, size_t offset, size_t num_pages,
-			    __be64 *pas, int access_flags)
+unsigned long __mlx5_umem_find_best_quantized_pgoff(
+	struct ib_umem *umem, unsigned long pgsz_bitmap,
+	unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale,
+	unsigned int *page_offset_quantized)
 {
-	int shift = page_shift - PAGE_SHIFT;
-	int mask = (1 << shift) - 1;
-	int i, k, idx;
-	u64 cur = 0;
-	u64 base;
-	int len;
-	struct scatterlist *sg;
-	int entry;
-
-	i = 0;
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-		len = sg_dma_len(sg) >> PAGE_SHIFT;
-		base = sg_dma_address(sg);
-
-		/* Skip elements below offset */
-		if (i + len < offset << shift) {
-			i += len;
-			continue;
-		}
-
-		/* Skip pages below offset */
-		if (i < offset << shift) {
-			k = (offset << shift) - i;
-			i = offset << shift;
-		} else {
-			k = 0;
-		}
-
-		for (; k < len; k++) {
-			if (!(i & mask)) {
-				cur = base + (k << PAGE_SHIFT);
-				cur |= access_flags;
-				idx = (i >> shift) - offset;
-
-				pas[idx] = cpu_to_be64(cur);
-				mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
-					    i >> shift, be64_to_cpu(pas[idx]));
-			}
-			i++;
-
-			/* Stop after num_pages reached */
-			if (i >> shift >= offset + num_pages)
-				return;
-		}
+	const u64 page_offset_mask = (1UL << page_offset_bits) - 1;
+	unsigned long page_size;
+	u64 page_offset;
+
+	page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask);
+	if (!page_size)
+		return 0;
+
+	/*
+	 * page size is the largest possible page size.
+	 *
+	 * Reduce the page_size, and thus the page_offset and quanta, until the
+	 * page_offset fits into the mailbox field. Once page_size < scale this
+	 * loop is guaranteed to terminate.
+	 */
+	page_offset = ib_umem_dma_offset(umem, page_size);
+	while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) {
+		page_size /= 2;
+		page_offset = ib_umem_dma_offset(umem, page_size);
 	}
-}
 
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
-			  int page_shift, __be64 *pas, int access_flags)
-{
-	return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
-				      ib_umem_num_dma_blocks(umem, PAGE_SIZE),
-				      pas, access_flags);
-}
-int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
-{
-	u64 page_size;
-	u64 page_mask;
-	u64 off_size;
-	u64 off_mask;
-	u64 buf_off;
-
-	page_size = (u64)1 << page_shift;
-	page_mask = page_size - 1;
-	buf_off = addr & page_mask;
-	off_size = page_size >> 6;
-	off_mask = off_size - 1;
-
-	if (buf_off & off_mask)
-		return -EINVAL;
-
-	*offset = buf_off >> ilog2(off_size);
-	return 0;
+	/*
+	 * The address is not aligned, or otherwise cannot be represented by the
+	 * page_offset.
+	 */
+	if (!(pgsz_bitmap & page_size))
+		return 0;
+
+	*page_offset_quantized =
+		(unsigned long)page_offset / (page_size / scale);
+	if (WARN_ON(*page_offset_quantized > page_offset_mask))
+		return 0;
+	return page_size;
 }
 
 #define WR_ID_BF 0xBF
author	Linus Torvalds <torvalds@linux-foundation.org>	2020-12-16 13:42:26 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2020-12-16 13:42:26 -0800
commit	009bd55dfcc857d8b00a5bbb17a8db060317af6f (patch)
tree	3a623fc690ea03bd76630c5bcc003324136ae0f6 /drivers/infiniband/hw/mlx5/mem.c
parent	60f7c503d971a731ee3c4f884a9f2e80d476730d (diff)
parent	e246b7c035d74abfb3507fa10082d0c42cc016c3 (diff)
download	linux-009bd55dfcc857d8b00a5bbb17a8db060317af6f.tar.gz linux-009bd55dfcc857d8b00a5bbb17a8db060317af6f.tar.bz2 linux-009bd55dfcc857d8b00a5bbb17a8db060317af6f.zip