diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 89547c8bb82d85081ff2ade82acfe3587c13cb6e..1ad0cab94491f88d4ccd31464fcdb3053b20666d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -158,83 +158,101 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap)
 	kfree(bitmap->table);
 }
 
-void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
-		       struct hns_roce_buf *buf)
+void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf)
 {
-	int i;
-	struct device *dev = hr_dev->dev;
+	struct hns_roce_buf_list *trunks;
+	u32 i;
 
-	if (buf->nbufs == 1) {
-		dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map);
-	} else {
-		for (i = 0; i < buf->nbufs; ++i)
-			if (buf->page_list[i].buf)
-				dma_free_coherent(dev, 1 << buf->page_shift,
-						  buf->page_list[i].buf,
-						  buf->page_list[i].map);
-		kfree(buf->page_list);
+	if (!buf)
+		return;
+
+	trunks = buf->trunk_list;
+	if (trunks) {
+		buf->trunk_list = NULL;
+		for (i = 0; i < buf->ntrunks; i++)
+			dma_free_coherent(hr_dev->dev, 1 << buf->trunk_shift,
+					  trunks[i].buf, trunks[i].map);
+
+		kfree(trunks);
 	}
+
+	kfree(buf);
 }
 EXPORT_SYMBOL_GPL(hns_roce_buf_free);
 
-int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
-		       struct hns_roce_buf *buf, u32 page_shift)
+/*
+ * Allocate the dma buffer for storing ROCEE table entries
+ *
+ * @size: required size
+ * @page_shift: the unit size in a continuous dma address range
+ * @flags: HNS_ROCE_BUF_ flags to control the allocation flow.
+ */
+struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
+					u32 page_shift, u32 flags)
 {
-	int i = 0;
-	dma_addr_t t;
-	struct device *dev = hr_dev->dev;
-	u32 page_size = 1 << page_shift;
-	u32 order;
-
-	/* buf for SQ/RQ both at lease one page, SQ + RQ is 2 pages */
-	if (size <= max_direct) {
-		buf->nbufs = 1;
-		/* Npages calculated by page_size */
-		order = get_order(size);
-		if (order <= page_shift - PAGE_SHIFT)
-			order = 0;
-		else
-			order -= page_shift - PAGE_SHIFT;
-		buf->npages = 1 << order;
-		buf->page_shift = page_shift;
-		/* MTT PA must be recorded in 4k alignment, t is 4k aligned */
-		buf->direct.buf = dma_zalloc_coherent(dev,
-						      size, &t, GFP_KERNEL);
-		if (!buf->direct.buf)
-			return -ENOMEM;
-
-		buf->direct.map = t;
-
-		while (t & ((1 << buf->page_shift) - 1)) {
-			--buf->page_shift;
-			buf->npages *= 2;
-		}
+	u32 trunk_size, page_size, alloced_size;
+	struct hns_roce_buf_list *trunks;
+	struct hns_roce_buf *buf;
+	gfp_t gfp_flags;
+	u32 ntrunk, i;
+
+	/* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */
+	if (WARN_ON(page_shift < HNS_HW_PAGE_SHIFT))
+		return ERR_PTR(-EINVAL);
+
+	gfp_flags = (flags & HNS_ROCE_BUF_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL;
+	buf = kzalloc(sizeof(*buf), gfp_flags);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	buf->page_shift = page_shift;
+	page_size = 1 << buf->page_shift;
+
+	/* Calc the trunk size and num by required size and page_shift */
+	if (flags & HNS_ROCE_BUF_DIRECT) {
+		buf->trunk_shift = order_base_2(ALIGN(size, PAGE_SIZE));
+		ntrunk = 1;
 	} else {
-		buf->nbufs = (size + page_size - 1) / page_size;
-		buf->npages = buf->nbufs;
-		buf->page_shift = page_shift;
-		buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
-					 GFP_KERNEL);
-		if (!buf->page_list)
-			return -ENOMEM;
-
-		for (i = 0; i < buf->nbufs; ++i) {
-			buf->page_list[i].buf = dma_zalloc_coherent(dev,
-								  page_size, &t,
-								  GFP_KERNEL);
-			if (!buf->page_list[i].buf)
-				goto err_free;
-
-			buf->page_list[i].map = t;
-		}
+		buf->trunk_shift = order_base_2(ALIGN(page_size, PAGE_SIZE));
+		ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift);
 	}
 
-	return 0;
+	trunks = kcalloc(ntrunk, sizeof(*trunks), gfp_flags);
+	if (!trunks) {
+		kfree(buf);
+		return ERR_PTR(-ENOMEM);
+	}
 
-err_free:
-	hns_roce_buf_free(hr_dev, size, buf);
-	return -ENOMEM;
+	trunk_size = 1 << buf->trunk_shift;
+	alloced_size = 0;
+	for (i = 0; i < ntrunk; i++) {
+		trunks[i].buf = dma_alloc_coherent(hr_dev->dev, trunk_size,
+						   &trunks[i].map, gfp_flags);
+		if (!trunks[i].buf)
+			break;
+
+		alloced_size += trunk_size;
+	}
+
+	buf->ntrunks = i;
+
+	/* In nofail mode, it's only failed when the alloced size is 0 */
+	if ((flags & HNS_ROCE_BUF_NOFAIL) ? i == 0 : i != ntrunk) {
+		for (i = 0; i < buf->ntrunks; i++)
+			dma_free_coherent(hr_dev->dev, trunk_size,
+					  trunks[i].buf, trunks[i].map);
+
+		kfree(trunks);
+		kfree(buf);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	buf->npages = DIV_ROUND_UP(alloced_size, page_size);
+	buf->trunk_list = trunks;
+
+	return buf;
 }
+EXPORT_SYMBOL_GPL(hns_roce_buf_alloc);
 
 int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
 			   int buf_cnt, int start, struct hns_roce_buf *buf)
@@ -245,80 +263,46 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
 	end = start + buf_cnt;
 	if (end > buf->npages) {
 		dev_err(hr_dev->dev,
-			"Invalid kmem region,offset 0x%x plus buf_cnt 0x%x larger than total 0x%x!\n",
+			"failed to check kmem bufs, end %d + %d total %u!\n",
 			start, buf_cnt, buf->npages);
 		return -EINVAL;
 	}
 
 	total = 0;
 	for (i = start; i < end; i++)
-		if (buf->nbufs == 1)
-			bufs[total++] = buf->direct.map +
-					(i << buf->page_shift);
-		else
-			bufs[total++] = buf->page_list[i].map;
+		bufs[total++] = hns_roce_buf_page(buf, i);
 
 	return total;
 }
+EXPORT_SYMBOL_GPL(hns_roce_get_kmem_bufs);
 
 int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
 			   int buf_cnt, int start, struct ib_umem *umem,
-			   int page_shift)
+			   unsigned int page_shift)
 {
-	struct scatterlist *sg;
-	int npage_per_buf;
-	int npage_per_sg;
-	dma_addr_t addr;
-	int n, entry;
-	int idx, end;
-	int npage;
-	int total;
-
-	if (page_shift < PAGE_SHIFT || page_shift > umem->page_shift) {
-		dev_err(hr_dev->dev, "Invalid page shift %d, umem shift %d!\n",
-			page_shift, umem->page_shift);
+	struct ib_block_iter biter;
+	int total = 0;
+	int idx = 0;
+	u64 addr;
+
+	if (page_shift < HNS_HW_PAGE_SHIFT) {
+		dev_err(hr_dev->dev, "failed to check umem page shift %u!\n",
+			page_shift);
 		return -EINVAL;
 	}
 
 	/* convert system page cnt to hw page cnt */
-	npage_per_buf = (1 << (page_shift - PAGE_SHIFT));
-	total = DIV_ROUND_UP(ib_umem_page_count(umem), npage_per_buf);
-	end = start + buf_cnt;
-	if (end > total) {
-		dev_err(hr_dev->dev,
-			"Invalid umem region,offset 0x%x plus buf_cnt 0x%x larger than total 0x%x!\n",
-			start, buf_cnt, total);
-		return -EINVAL;
-	}
-
-	idx = 0;
-	npage = 0;
-	total = 0;
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-		npage_per_sg = sg_dma_len(sg) >> PAGE_SHIFT;
-		for (n = 0; n < npage_per_sg; n++) {
-			if (!(npage % npage_per_buf)) {
-				addr = sg_dma_address(sg) +
-					(n << umem->page_shift);
-				if (addr & ((1 << page_shift) - 1)) {
-					dev_err(hr_dev->dev,
-						"Umem addr not align to page_shift %d!\n",
-						page_shift);
-					return -ENOBUFS;
-				}
-
-				/* get buf addr between start and end */
-				if (start <= idx && idx < end) {
-					bufs[total++] = addr;
-					if (total >= buf_cnt)
-						goto done;
-				}
-
-				idx++;
-			}
-			npage++;
+	rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
+			    1 << page_shift) {
+		addr = rdma_block_iter_dma_address(&biter);
+		if (idx >= start) {
+			bufs[total++] = addr;
+			if (total >= buf_cnt)
+				goto done;
 		}
+		idx++;
 	}
+
 done:
 	return total;
 }
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 75afed8ee7a624a323e87886b74008e041372339..5420bec32da881bf1b1b947f6b42917478f37821 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -275,29 +275,32 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
 static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev,
 				    struct hns_roce_cq_buf *buf, u32 nent)
 {
-	int ret;
 	u32 page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
+	struct hns_roce_buf *kbuf;
+	int ret;
 
-	ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz,
-				 (1 << page_shift) * 2, &buf->hr_buf,
-				 page_shift);
-	if (ret)
+	kbuf = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz,
+				  page_shift, 0);
+	if (IS_ERR(kbuf)) {
+		ret = -ENOMEM;
 		goto out;
+	}
 
+	buf->hr_buf = kbuf;
 	if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
 		buf->hr_mtt.mtt_type = MTT_TYPE_CQE;
 	else
 		buf->hr_mtt.mtt_type = MTT_TYPE_WQE;
 
-	ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages,
-				buf->hr_buf.page_shift, &buf->hr_mtt);
+	ret = hns_roce_mtt_init(hr_dev, kbuf->npages, kbuf->page_shift,
+				&buf->hr_mtt);
 	if (ret) {
 		dev_err(hr_dev->dev, "hns_roce_mtt_init error(%d) for kernel create cq.\n",
 			ret);
 		goto err_buf;
 	}
 
-	ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, &buf->hr_buf);
+	ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, buf->hr_buf);
 	if (ret) {
 		dev_err(hr_dev->dev, "hns_roce_ib_umem_write_mtt error(%d) for kernel create cq.\n",
 			ret);
@@ -310,8 +313,7 @@ static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev,
 	hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt);
 
 err_buf:
-	hns_roce_buf_free(hr_dev, nent * hr_dev->caps.cq_entry_sz,
-			  &buf->hr_buf);
+	hns_roce_buf_free(hr_dev, buf->hr_buf);
 out:
 	return ret;
 }
@@ -319,8 +321,7 @@ static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev,
 static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev,
 				    struct hns_roce_cq_buf *buf, int cqe)
 {
-	hns_roce_buf_free(hr_dev, (cqe + 1) * hr_dev->caps.cq_entry_sz,
-			  &buf->hr_buf);
+	hns_roce_buf_free(hr_dev, buf->hr_buf);
 }
 
 static int create_user_cq(struct hns_roce_dev *hr_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 0b1d3a5e4ca510fe0b86a0ba8456426ff6dd5d71..ff76c0bcd1a68a1054c18d146abff08a023bfca2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -318,6 +318,10 @@ static inline void hns_roce_inc_rdma_hw_stats(struct ib_device *dev, int stats)
 
 #define HNS_ROCE_MTT_ENTRY_PER_SEG		8
 
+/* The minimum page size is 4K for hardware */
+#define HNS_HW_PAGE_SHIFT			12
+#define HNS_HW_PAGE_SIZE			(1 << HNS_HW_PAGE_SHIFT)
+
 #define PAGE_ADDR_SHIFT				12
 
 #define HNS_ROCE_IS_RESETTING			1
@@ -511,12 +515,27 @@ struct hns_roce_buf_list {
 	dma_addr_t	map;
 };
 
+/*
+ * %HNS_ROCE_BUF_DIRECT indicates that the all memory must be in a continuous
+ * dma address range.
+ *
+ * %HNS_ROCE_BUF_NOSLEEP indicates that the caller cannot sleep.
+ *
+ * %HNS_ROCE_BUF_NOFAIL allocation only failed when allocated size is zero, even
+ * the allocated size is smaller than the required size.
+ */
+enum {
+	HNS_ROCE_BUF_DIRECT = BIT(0),
+	HNS_ROCE_BUF_NOSLEEP = BIT(1),
+	HNS_ROCE_BUF_NOFAIL = BIT(2),
+};
+
 struct hns_roce_buf {
-	struct hns_roce_buf_list	direct;
-	struct hns_roce_buf_list	*page_list;
-	int				nbufs;
+	struct hns_roce_buf_list	*trunk_list;
+	u32				ntrunks;
 	u32				npages;
-	int				page_shift;
+	unsigned int			trunk_shift;
+	unsigned int			page_shift;
 };
 
 struct hns_roce_db_pgdir {
@@ -548,7 +567,7 @@ struct hns_roce_db {
 };
 
 struct hns_roce_cq_buf {
-	struct hns_roce_buf hr_buf;
+	struct hns_roce_buf *hr_buf;
 	struct hns_roce_mtt hr_mtt;
 };
 
@@ -587,7 +606,7 @@ struct hns_roce_cq {
 };
 
 struct hns_roce_idx_que {
-	struct hns_roce_buf		idx_buf;
+	struct hns_roce_buf		*idx_buf;
 	int				entry_sz;
 	u32				buf_size;
 	struct ib_umem			*umem;
@@ -608,7 +627,7 @@ struct hns_roce_srq {
 	refcount_t		refcount;
 	struct completion	free;
 
-	struct hns_roce_buf	buf;
+	struct hns_roce_buf    *buf;
 	u64		       *wrid;
 	struct ib_umem	       *umem;
 	struct hns_roce_mtt	mtt;
@@ -754,7 +773,7 @@ enum hns_roce_qp_dfx_cnt {
 
 struct hns_roce_qp {
 	struct ib_qp		ibqp;
-	struct hns_roce_buf	hr_buf;
+	struct hns_roce_buf	*hr_buf;
 	struct hns_roce_wq	rq;
 	struct hns_roce_db	rdb;
 	struct hns_roce_db	sdb;
@@ -1305,15 +1324,23 @@ static inline struct hns_roce_qp
 				 qpn & (hr_dev->caps.num_qps - 1));
 }
 
-static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset)
+static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf,
+					unsigned int offset)
+{
+	return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) +
+			(offset & ((1 << buf->trunk_shift) - 1));
+}
+
+static inline dma_addr_t hns_roce_buf_dma_addr(struct hns_roce_buf *buf,
+					       unsigned int offset)
 {
-	u32 page_size = 1 << buf->page_shift;
+	return buf->trunk_list[offset >> buf->trunk_shift].map +
+			(offset & ((1 << buf->trunk_shift) - 1));
+}
 
-	if (buf->nbufs == 1)
-		return (char *)(buf->direct.buf) + offset;
-	else
-		return (char *)(buf->page_list[offset >> buf->page_shift].buf) +
-		       (offset & (page_size - 1));
+static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx)
+{
+	return hns_roce_buf_dma_addr(buf, idx << buf->page_shift);
 }
 
 static inline u8 to_rdma_port_num(u8 phy_port_num)
@@ -1425,10 +1452,9 @@ struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
 				struct ib_udata *udata);
 int hns_roce_dealloc_mw(struct ib_mw *ibmw);
 
-void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
-		       struct hns_roce_buf *buf);
-int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
-		       struct hns_roce_buf *buf, u32 page_shift);
+void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf);
+struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
+					u32 page_shift, u32 flags);
 
 int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
 			       struct hns_roce_mtt *mtt, struct ib_umem *umem);
@@ -1443,7 +1469,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
 			   int buf_cnt, int start, struct hns_roce_buf *buf);
 int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
 			   int buf_cnt, int start, struct ib_umem *umem,
-			   int page_shift);
+			   unsigned int page_shift);
 
 struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
 				   struct ib_srq_init_attr *srq_init_attr,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 6bcde55548f48c794369810e3a37a6c4a71e9282..b8c5502f8fa89c8c1093722f644c4406d7a8bb6b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2004,7 +2004,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
 
 static void *get_cqe(struct hns_roce_cq *hr_cq, int n)
 {
-	return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
+	return hns_roce_buf_offset(hr_cq->hr_buf.hr_buf,
 				   n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
 }
 
@@ -3680,7 +3680,6 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq)
 	struct device *dev = &hr_dev->pdev->dev;
 	u32 cqe_cnt_ori;
 	u32 cqe_cnt_cur;
-	u32 cq_buf_size;
 	int wait_time = 0;
 	int ret = 0;
 
@@ -3714,11 +3713,8 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq)
 
 	if (ibcq->uobject)
 		ib_umem_release(hr_cq->umem);
-	else {
-		/* Free the buff of stored cq */
-		cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz;
-		hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf);
-	}
+	else
+		hns_roce_buf_free(hr_dev, hr_cq->hr_buf.hr_buf);
 
 	kfree(hr_cq);
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 2852f97b987cbafc017f7c9d7b1d82b723243619..70f1ae3b60570d8281535903e0e9bcd52a5b429b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -149,7 +149,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr,
 		num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE;
 	extend_sge_num = valid_num_sge - num_in_wqe;
 	sg = wr->sg_list + num_in_wqe;
-	shift = qp->hr_buf.page_shift;
+	shift = qp->hr_buf->page_shift;
 
 	/*
 	 * Check whether wr->num_sge sges are in the same page. If not, we
@@ -3001,7 +3001,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
 
 static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
 {
-	return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
+	return hns_roce_buf_offset(hr_cq->hr_buf.hr_buf,
 				   n * HNS_ROCE_V2_CQE_ENTRY_SIZE);
 }
 
@@ -3021,7 +3021,7 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq)
 
 static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
 {
-	return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift);
+	return hns_roce_buf_offset(srq->buf, n << srq->wqe_shift);
 }
 
 static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
@@ -7051,7 +7051,7 @@ static void fill_idx_queue(struct hns_roce_idx_que *idx_que,
 {
 	unsigned int *addr;
 
-	addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf,
+	addr = (unsigned int *)hns_roce_buf_offset(idx_que->idx_buf,
 		cur_idx * idx_que->entry_sz);
 	*addr = wqe_idx;
 }
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index ce5c0544fc1b353a3c4b38eb56326c68acce3e10..bef0ceb24b72db372abbb5679a83854b33cf3633 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -908,13 +908,9 @@ int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
 	if (!page_list)
 		return -ENOMEM;
 
-	for (i = 0; i < buf->npages; ++i) {
-		if (buf->nbufs == 1)
-			page_list[i] = buf->direct.map + (i << buf->page_shift);
-		else
-			page_list[i] = buf->page_list[i].map;
+	for (i = 0; i < buf->npages; ++i)
+		page_list[i] = hns_roce_buf_page(buf, i);
 
-	}
 	ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list);
 
 	kfree(page_list);
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index ecfae6e7c6033f2bff9029fad51a60d5747eb305..62fe471a81ca7a6e3b04da640768041c7595ce80 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -400,7 +400,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev,
 	}
 
 	max_cnt = max(1U, cap->max_recv_sge);
-	hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt);
+	hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt) + hr_qp->rq.rsv_sge;
 
 	if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE)
 		hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
@@ -847,7 +847,7 @@ static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 					page_shift);
 		else
 			buf_count = hns_roce_get_kmem_bufs(hr_dev, buf_list[i],
-					r->count, r->offset, &hr_qp->hr_buf);
+					r->count, r->offset, hr_qp->hr_buf);
 
 		if (buf_count != r->count) {
 			dev_err(hr_dev->dev, "Failed to get %s WQE buf, expect %d = %d.\n",
@@ -878,7 +878,7 @@ static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 
 static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 			struct ib_qp_init_attr *init_attr,
-			struct ib_udata *udata, unsigned long addr)
+			struct ib_uobject *uobject, unsigned long addr)
 {
 	u32 page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
 	bool is_rq_buf_inline;
@@ -894,21 +894,26 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 		}
 	}
 
-	if (hr_qp->ibqp.pd->uobject->context) {
-		hr_qp->umem = ib_umem_get(hr_qp->ibqp.pd->uobject->context, addr, hr_qp->buff_size, 0, 0);
+	if (uobject) {
+		hr_qp->umem = ib_umem_get(uobject->context, addr,
+					  hr_qp->buff_size, 0, 0);
 		if (IS_ERR(hr_qp->umem)) {
 			ret = PTR_ERR(hr_qp->umem);
 			goto err_inline;
 		}
 	} else {
-		ret = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size,
-					 (1 << page_shift) * 2,
-					 &hr_qp->hr_buf, page_shift);
-		if (ret)
+		struct hns_roce_buf *kmem;
+
+		kmem = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, page_shift,
+					  0);
+		if (IS_ERR(hr_qp->umem)) {
+			ret = PTR_ERR(hr_qp->umem);
 			goto err_inline;
+		}
+		hr_qp->hr_buf = kmem;
 	}
 
-	ret = map_wqe_buf(hr_dev, hr_qp, page_shift, udata);
+	ret = map_wqe_buf(hr_dev, hr_qp, page_shift, !!uobject);
 	if (ret)
 		goto err_alloc;
 
@@ -919,11 +924,12 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 		hns_roce_free_recv_inline_buffer(hr_qp);
 
 err_alloc:
-	if (udata) {
+	if (uobject) {
 		ib_umem_release(hr_qp->umem);
 		hr_qp->umem = NULL;
 	} else {
-		hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
+		hns_roce_buf_free(hr_dev, hr_qp->hr_buf);
+		hr_qp->hr_buf = NULL;
 	}
 
 	dev_err(hr_dev->dev, "Failed to alloc WQE buffer, ret %d.\n", ret);
@@ -937,11 +943,11 @@ static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
 	if (hr_qp->umem) {
 		ib_umem_release(hr_qp->umem);
 		hr_qp->umem = NULL;
+	} else {
+		hns_roce_buf_free(hr_dev, hr_qp->hr_buf);
+		hr_qp->hr_buf = NULL;
 	}
 
-	if (hr_qp->hr_buf.nbufs > 0)
-		hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
-
 	if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
 	     hr_qp->rq.wqe_cnt)
 		hns_roce_free_recv_inline_buffer(hr_qp);
@@ -1104,7 +1110,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 		}
 	}
 
-	ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr);
+	ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, ib_pd->uobject,
+			   ucmd.buf_addr);
 	if (ret) {
 		dev_err(hr_dev->dev, "Failed to alloc QP buffer\n");
 		goto err_db;
@@ -1532,8 +1539,7 @@ EXPORT_SYMBOL_GPL(hns_roce_unlock_cqs);
 
 static void *get_wqe(struct hns_roce_qp *hr_qp, int offset)
 {
-
-	return hns_roce_buf_offset(&hr_qp->hr_buf, offset);
+	return hns_roce_buf_offset(hr_qp->hr_buf, offset);
 }
 
 void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n)
@@ -1550,7 +1556,7 @@ EXPORT_SYMBOL_GPL(get_send_wqe);
 
 void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n)
 {
-	return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset +
+	return hns_roce_buf_offset(hr_qp->hr_buf, hr_qp->sge.offset +
 					(n << hr_qp->sge.sge_shift));
 }
 EXPORT_SYMBOL_GPL(get_send_extend_sge);
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 9d4aec18be0fcddc39f096436de35463ce792be6..630bf17c281c9b5a0c514452f23fb1e8cc183e52 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -310,6 +310,7 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
 	struct hns_roce_idx_que *idx_que = &srq->idx_que;
+	struct hns_roce_buf *kbuf;
 	u32 bitmap_num;
 	int i;
 
@@ -324,12 +325,13 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
 
 	idx_que->buf_size = srq->max * idx_que->entry_sz;
 
-	if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2,
-			       &idx_que->idx_buf, page_shift)) {
+	kbuf = hns_roce_buf_alloc(hr_dev, idx_que->buf_size, page_shift, 0);
+	if (IS_ERR(kbuf)) {
 		kfree(idx_que->bitmap);
 		return -ENOMEM;
 	}
 
+	idx_que->idx_buf = kbuf;
 	for (i = 0; i < bitmap_num; i++)
 		idx_que->bitmap[i] = ~(0UL);
 
@@ -341,17 +343,19 @@ static int create_kernel_srq(struct ib_pd *pd, struct hns_roce_srq *srq,
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
 	u32 page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+	struct hns_roce_buf *kbuf;
 	int ret;
 
-	if (hns_roce_buf_alloc(hr_dev, srq_buf_size, (1 << page_shift) * 2,
-			       &srq->buf, page_shift))
+	kbuf = hns_roce_buf_alloc(hr_dev, srq_buf_size, page_shift, 0);
+	if (IS_ERR(kbuf))
 		return -ENOMEM;
 
+	srq->buf = kbuf;
 	srq->head = 0;
 	srq->tail = srq->max - 1;
 	srq->wqe_ctr = 0;
 
-	ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift,
+	ret = hns_roce_mtt_init(hr_dev, kbuf->npages, kbuf->page_shift,
 				&srq->mtt);
 	if (ret) {
 		dev_err(hr_dev->dev, "Mtt init error(%d) when create srq.\n",
@@ -359,7 +363,7 @@ static int create_kernel_srq(struct ib_pd *pd, struct hns_roce_srq *srq,
 		goto err_kernel_buf;
 	}
 
-	ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf);
+	ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, srq->buf);
 	if (ret)
 		goto err_kernel_srq_mtt;
 
@@ -371,8 +375,8 @@ static int create_kernel_srq(struct ib_pd *pd, struct hns_roce_srq *srq,
 	}
 
 	/* Init mtt table for idx_que */
-	ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages,
-				srq->idx_que.idx_buf.page_shift,
+	ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf->npages,
+				srq->idx_que.idx_buf->page_shift,
 				&srq->idx_que.mtt);
 	if (ret) {
 		dev_err(hr_dev->dev, "Kernel mtt init error(%d) for idx que.\n",
@@ -381,7 +385,7 @@ static int create_kernel_srq(struct ib_pd *pd, struct hns_roce_srq *srq,
 	}
 	/* Write buffer address into the mtt table */
 	ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt,
-				     &srq->idx_que.idx_buf);
+				     srq->idx_que.idx_buf);
 	if (ret) {
 		dev_err(hr_dev->dev, "Write mtt error(%d) for idx que.\n", ret);
 		goto err_kernel_idx_buf;
@@ -398,15 +402,14 @@ static int create_kernel_srq(struct ib_pd *pd, struct hns_roce_srq *srq,
 	hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
 
 err_kernel_create_idx:
-	hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
-			  &srq->idx_que.idx_buf);
+	hns_roce_buf_free(hr_dev, srq->idx_que.idx_buf);
 	kfree(srq->idx_que.bitmap);
 
 err_kernel_srq_mtt:
 	hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
 
 err_kernel_buf:
-	hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+	hns_roce_buf_free(hr_dev, srq->buf);
 
 	return ret;
 }
@@ -425,10 +428,10 @@ static void destroy_kernel_srq(struct hns_roce_dev *hr_dev,
 {
 	kfree(srq->wrid);
 	hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
-	hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, &srq->idx_que.idx_buf);
+	hns_roce_buf_free(hr_dev, srq->idx_que.idx_buf);
 	kfree(srq->idx_que.bitmap);
 	hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
-	hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+	hns_roce_buf_free(hr_dev, srq->buf);
 }
 
 static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq,
@@ -564,8 +567,7 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq)
 		ib_umem_release(srq->umem);
 	} else {
 		kfree(srq->wrid);
-		hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift,
-				  &srq->buf);
+		hns_roce_buf_free(hr_dev, srq->buf);
 	}
 
 	kfree(srq);
diff --git a/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c b/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c
index 8c2de43bcf14b2bb23bd832e5be458913d8f9201..bb8c4d7d2449acc67a82839faea9c8ccf84cc685 100644
--- a/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c
+++ b/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c
@@ -145,7 +145,7 @@ void rdfx_cp_sq_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
 	memcpy(dfx_qp_buf, dfx_hns_wqe_sge,
 		2 * sizeof(struct hns_roce_v2_wqe_data_seg));
 	dfx_qp_buf = hns_roce_buf_offset(rdfx_qp->buf, qp->sge.offset);
-	dfx_hns_wqe_sge = hns_roce_buf_offset(&qp->hr_buf, qp->sge.offset);
+	dfx_hns_wqe_sge = hns_roce_buf_offset(qp->hr_buf, qp->sge.offset);
 	rdfx_change_sq_buf(wr, atomic_en, dfx_qp_buf,
 			   dfx_hns_wqe_sge, sq, hr_dev, qp);
 }
diff --git a/drivers/infiniband/hw/hns/roce-customer/rdfx_intf.c b/drivers/infiniband/hw/hns/roce-customer/rdfx_intf.c
index 9a84e7ce6417022ad5ba0a45c7ebdedc4b9c7869..d658621d93a2284cdfc74990c0f45944710e6a17 100644
--- a/drivers/infiniband/hw/hns/roce-customer/rdfx_intf.c
+++ b/drivers/infiniband/hw/hns/roce-customer/rdfx_intf.c
@@ -23,17 +23,17 @@ struct rdfx_cq_info *rdfx_find_rdfx_cq(struct rdfx_info *rdfx,
 static void rdfx_v2_free_cqe_dma_buf(struct rdfx_cq_info *rdfx_cq)
 {
 	struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)rdfx_cq->priv;
-	u32 size = (rdfx_cq->cq_depth) * hr_dev->caps.cq_entry_sz;
 
-	hns_roce_buf_free(hr_dev, size, (struct hns_roce_buf *)rdfx_cq->buf);
+	hns_roce_buf_free(hr_dev, (struct hns_roce_buf *)rdfx_cq->buf);
+	rdfx_cq->buf = NULL;
 }
 
 static void rdfx_v2_free_wqe_dma_buf(struct rdfx_qp_info *rdfx_qp)
 {
 	struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)rdfx_qp->priv;
-	u32 size = rdfx_qp->buf_size;
 
-	hns_roce_buf_free(hr_dev, size, (struct hns_roce_buf *)rdfx_qp->buf);
+	hns_roce_buf_free(hr_dev, (struct hns_roce_buf *)rdfx_qp->buf);
+	rdfx_qp->buf = NULL;
 }
 
 void qp_release(struct kref *ref)
@@ -41,7 +41,6 @@ void qp_release(struct kref *ref)
 	struct rdfx_qp_info *rdfx_qp =
 		container_of(ref, struct rdfx_qp_info, cnt);
 	rdfx_v2_free_wqe_dma_buf(rdfx_qp);
-	kfree(rdfx_qp->buf);
 	kfree(rdfx_qp);
 }
 EXPORT_SYMBOL_GPL(qp_release);
@@ -51,7 +50,6 @@ void cq_release(struct kref *ref)
 	struct rdfx_cq_info *rdfx_cq =
 		container_of(ref, struct rdfx_cq_info, cnt);
 	rdfx_v2_free_cqe_dma_buf(rdfx_cq);
-	kfree(rdfx_cq->buf);
 	kfree(rdfx_cq);
 }
 EXPORT_SYMBOL_GPL(cq_release);
@@ -647,21 +645,16 @@ void rdfx_alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
 	unsigned long flags;
 	u32 page_shift;
 	int cq_entries;
-	int ret;
 
 	cq_entries = hr_cq->cq_depth;
 
-	dfx_cq_buf = kzalloc(sizeof(struct hns_roce_buf), GFP_KERNEL);
-	if (ZERO_OR_NULL_PTR(dfx_cq_buf))
-		return;
-
 	page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
-
-	ret = hns_roce_buf_alloc(hr_dev, cq_entries * hr_dev->caps.cq_entry_sz,
-				 (1 << page_shift) * 2, dfx_cq_buf, page_shift);
-	if (ret) {
+	dfx_cq_buf = hns_roce_buf_alloc(hr_dev,
+					cq_entries * hr_dev->caps.cq_entry_sz,
+					page_shift, 0);
+	if (IS_ERR(dfx_cq_buf)) {
 		dev_err(hr_dev->dev, "hns_roce_dfx_buf_alloc error!\n");
-		goto err_dfx_buf;
+		return;
 	}
 
 #ifdef CONFIG_INFINIBAND_HNS_DFX_ENHANCE
@@ -690,10 +683,7 @@ void rdfx_alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
 	return;
 
 err_buf:
-	hns_roce_buf_free(hr_dev, cq_entries * hr_dev->caps.cq_entry_sz,
-		dfx_cq_buf);
-err_dfx_buf:
-	kfree(dfx_cq_buf);
+	hns_roce_buf_free(hr_dev, dfx_cq_buf);
 }
 
 void rdfx_free_cq_buff(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
@@ -711,15 +701,10 @@ void rdfx_alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
 	u32 page_shift = 0;
 	unsigned long flags;
 
-	dfx_qp_buf = kzalloc(sizeof(struct hns_roce_buf), GFP_KERNEL);
-	if (ZERO_OR_NULL_PTR(dfx_qp_buf))
-		return;
-
 	page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
-
-	if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, (1 << page_shift) * 2,
-		dfx_qp_buf, page_shift)) {
-		kfree(dfx_qp_buf);
+	dfx_qp_buf = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, page_shift,
+					0);
+	if (IS_ERR(dfx_qp_buf)) {
 		dev_err(hr_dev->dev, "alloc dfx qp 0x%lx buff failed!\n",
 			hr_qp->qpn);
 		return;
@@ -727,8 +712,7 @@ void rdfx_alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
 
 	rdfx_qp = kzalloc(sizeof(*rdfx_qp), GFP_KERNEL);
 	if (ZERO_OR_NULL_PTR(rdfx_qp)) {
-		hns_roce_buf_free(hr_dev, hr_qp->buff_size, dfx_qp_buf);
-		kfree(dfx_qp_buf);
+		hns_roce_buf_free(hr_dev, dfx_qp_buf);
 		return;
 	}