Linux-RDMA Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/2] rds: MR(Memory Region) related patches
@ 2020-10-19 21:48 Manjunath Patil
  2020-10-19 21:48 ` [PATCH 1/2] rds: track memory region (MR) usage in kernel Manjunath Patil
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Manjunath Patil @ 2020-10-19 21:48 UTC (permalink / raw)
  To: santosh.shilimkar
  Cc: netdev, linux-rdma, rama.nichanamatlu, manjunath.b.patil

This patchset intends to add functionality to track MR usages by RDS
applications.

Manjunath Patil (2):
  rds: track memory region (MR) usage in kernel
  rds: add functionality to print MR related information

 include/uapi/linux/rds.h | 13 ++++++++++++-
 net/rds/af_rds.c         | 42 ++++++++++++++++++++++++++++++++++++++++
 net/rds/ib.c             |  1 +
 net/rds/rdma.c           | 29 ++++++++++++++++++++-------
 net/rds/rds.h            | 10 +++++++++-
 5 files changed, 86 insertions(+), 9 deletions(-)

-- 
2.27.0.112.g101b320


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] rds: track memory region (MR) usage in kernel
  2020-10-19 21:48 [PATCH 0/2] rds: MR(Memory Region) related patches Manjunath Patil
@ 2020-10-19 21:48 ` Manjunath Patil
  2020-10-19 21:48 ` [PATCH 2/2] rds: add functionality to print MR related information Manjunath Patil
  2020-10-27 14:18 ` [PATCH 0/2] rds: MR(Memory Region) related patches Manjunath Patil
  2 siblings, 0 replies; 4+ messages in thread
From: Manjunath Patil @ 2020-10-19 21:48 UTC (permalink / raw)
  To: santosh.shilimkar
  Cc: netdev, linux-rdma, rama.nichanamatlu, manjunath.b.patil

Excessive MR utilization by certain RDS applications can starve other
RDS applications from getting MRs. Therefore tracking MR usage by RDS
applications is beneficial.

The collected data is intended to be exported to userspace using
rds-info interface.

Signed-off-by: Manjunath Patil <manjunath.b.patil@oracle.com>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
---
 net/rds/af_rds.c |  4 ++++
 net/rds/rdma.c   | 29 ++++++++++++++++++++++-------
 net/rds/rds.h    |  6 ++++++
 3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 1a5bf3fa4578..e291095e5224 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -688,6 +688,10 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
 	rs->rs_rx_traces = 0;
 	rs->rs_tos = 0;
 	rs->rs_conn = NULL;
+	rs->rs_pid = current->pid;
+	get_task_comm(rs->rs_comm, current);
+	atomic64_set(&rs->rs_mr_gets, 0);
+	atomic64_set(&rs->rs_mr_puts, 0);
 
 	spin_lock_bh(&rds_sock_lock);
 	list_add_tail(&rs->rs_item, &rds_sock_list);
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 585e6b3b69ce..a1ae7b5ea3b2 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -44,6 +44,23 @@
  *  - an rdma is an mlock, apply rlimit?
  */
 
+static inline void mr_stats_update_gets(struct rds_sock *rs)
+{
+	atomic64_inc(&rs->rs_mr_gets);
+}
+
+static inline void mr_stats_update_puts(struct rds_sock *rs)
+{
+	atomic64_inc(&rs->rs_mr_puts);
+}
+
+static inline void rds_rb_erase(struct rds_sock *rs, struct rds_mr *mr)
+{
+	rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
+	RB_CLEAR_NODE(&mr->r_rb_node);
+	mr_stats_update_puts(rs);
+}
+
 /*
  * get the number of pages by looking at the page indices that the start and
  * end addresses fall in.
@@ -106,7 +123,7 @@ static void rds_destroy_mr(struct rds_mr *mr)
 
 	spin_lock_irqsave(&rs->rs_rdma_lock, flags);
 	if (!RB_EMPTY_NODE(&mr->r_rb_node))
-		rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
+		rds_rb_erase(rs, mr);
 	trans_private = mr->r_trans_private;
 	mr->r_trans_private = NULL;
 	spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
@@ -137,8 +154,7 @@ void rds_rdma_drop_keys(struct rds_sock *rs)
 		mr = rb_entry(node, struct rds_mr, r_rb_node);
 		if (mr->r_trans == rs->rs_transport)
 			mr->r_invalidate = 0;
-		rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
-		RB_CLEAR_NODE(&mr->r_rb_node);
+		rds_rb_erase(rs, mr);
 		spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
 		rds_destroy_mr(mr);
 		rds_mr_put(mr);
@@ -337,6 +353,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
 	 * reference count. */
 	spin_lock_irqsave(&rs->rs_rdma_lock, flags);
 	found = rds_mr_tree_walk(&rs->rs_rdma_keys, mr->r_key, mr);
+	mr_stats_update_gets(rs);
 	spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
 
 	BUG_ON(found && found != mr);
@@ -424,8 +441,7 @@ int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen)
 	spin_lock_irqsave(&rs->rs_rdma_lock, flags);
 	mr = rds_mr_tree_walk(&rs->rs_rdma_keys, rds_rdma_cookie_key(args.cookie), NULL);
 	if (mr) {
-		rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
-		RB_CLEAR_NODE(&mr->r_rb_node);
+		rds_rb_erase(rs, mr);
 		if (args.flags & RDS_RDMA_INVALIDATE)
 			mr->r_invalidate = 1;
 	}
@@ -465,8 +481,7 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
 	}
 
 	if (mr->r_use_once || force) {
-		rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
-		RB_CLEAR_NODE(&mr->r_rb_node);
+		rds_rb_erase(rs, mr);
 		zot_me = 1;
 	}
 	spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index e4a603523083..5e61868e1799 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -654,6 +654,12 @@ struct rds_sock {
 	spinlock_t		rs_rdma_lock;
 	struct rb_root		rs_rdma_keys;
 
+	/* per rds_sock MR stats */
+	pid_t                   rs_pid;
+	char                    rs_comm[TASK_COMM_LEN];
+	atomic64_t              rs_mr_gets;
+	atomic64_t              rs_mr_puts;
+
 	/* Socket options - in case there will be more */
 	unsigned char		rs_recverr,
 				rs_cong_monitor;
-- 
2.27.0.112.g101b320


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 2/2] rds: add functionality to print MR related information
  2020-10-19 21:48 [PATCH 0/2] rds: MR(Memory Region) related patches Manjunath Patil
  2020-10-19 21:48 ` [PATCH 1/2] rds: track memory region (MR) usage in kernel Manjunath Patil
@ 2020-10-19 21:48 ` Manjunath Patil
  2020-10-27 14:18 ` [PATCH 0/2] rds: MR(Memory Region) related patches Manjunath Patil
  2 siblings, 0 replies; 4+ messages in thread
From: Manjunath Patil @ 2020-10-19 21:48 UTC (permalink / raw)
  To: santosh.shilimkar
  Cc: netdev, linux-rdma, rama.nichanamatlu, manjunath.b.patil

RDS keeps its own pool of limited MRs[Memory Regions taken from ib
device for rdma operation] which are shared by RDS applications. Now, we
can print the applications along with their usage of MRs from userspace
using 'rds-info -m' command. This would help in tracking the limited
MRs.

MR related information is stored in rds_sock. This patch exposes the
information to userspace using rds-info command. The usage is limited to
CAP_NET_ADMIN privilege.

sample output:
 # rds-info -m

RDS MRs:
Program          PID    MR-gets    MR-puts    MR-inuse   <IP,port,ToS>
rds-stress       17743  28468      28464      4          <192.168.18..
rds-stress       17744  19385      19381      4          <192.168.18..

Signed-off-by: Manjunath Patil <manjunath.b.patil@oracle.com>
Reviewed-by: Ka-cheong Poon <ka-cheong.poon@oracle.com>
---
 include/uapi/linux/rds.h | 13 ++++++++++++-
 net/rds/af_rds.c         | 38 ++++++++++++++++++++++++++++++++++++++
 net/rds/ib.c             |  1 +
 net/rds/rds.h            |  4 +++-
 4 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index cba368e55863..a6e8e28d95fb 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -134,8 +134,9 @@ typedef __u8	rds_tos_t;
 #define RDS6_INFO_SOCKETS		10015
 #define RDS6_INFO_TCP_SOCKETS		10016
 #define RDS6_INFO_IB_CONNECTIONS	10017
+#define RDS_INFO_MRS			10018
 
-#define RDS_INFO_LAST			10017
+#define RDS_INFO_LAST			10018
 
 struct rds_info_counter {
 	__u8	name[32];
@@ -270,6 +271,16 @@ struct rds6_info_rdma_connection {
 	__u32		cache_allocs;
 };
 
+struct rds_info_mr {
+	__u32		pid;
+	__u8		comm[TASK_COMM_LEN];
+	__u64		gets;
+	__u64		puts;
+	struct in6_addr	laddr;
+	__be16		lport;
+	__u8		tos;
+} __attribute__((packed));
+
 /* RDS message Receive Path Latency points */
 enum rds_message_rxpath_latency {
 	RDS_MSG_RX_HDR_TO_DGRAM_START = 0,
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index e291095e5224..c81acf1a9457 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -486,6 +486,7 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
 			  char __user *optval, int __user *optlen)
 {
 	struct rds_sock *rs = rds_sk_to_rs(sock->sk);
+	struct net *net = sock_net(sock->sk);
 	int ret = -ENOPROTOOPT, len;
 	int trans;
 
@@ -499,6 +500,11 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
 
 	switch (optname) {
 	case RDS_INFO_FIRST ... RDS_INFO_LAST:
+		if (optname == RDS_INFO_MRS &&
+		    !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+			ret = -EACCES;
+			break;
+		}
 		ret = rds_info_getsockopt(sock, optname, optval,
 					  optlen);
 		break;
@@ -878,6 +884,38 @@ static void rds6_sock_info(struct socket *sock, unsigned int len,
 }
 #endif
 
+void rds_info_mrs(struct socket *sock, unsigned int len,
+		  struct rds_info_iterator *iter,
+		  struct rds_info_lengths *lens)
+{
+	struct rds_sock *rs;
+	struct rds_info_mr mr_info;
+	unsigned int total = 0;
+
+	len /= sizeof(mr_info);
+
+	spin_lock_bh(&rds_sock_lock);
+	list_for_each_entry(rs, &rds_sock_list, rs_item) {
+		total++;
+		if (total <= len) {
+			memset(&mr_info, 0, sizeof(mr_info));
+			mr_info.pid = rs->rs_pid;
+			strncpy(mr_info.comm, rs->rs_comm, TASK_COMM_LEN);
+			mr_info.gets = atomic64_read(&rs->rs_mr_gets);
+			mr_info.puts = atomic64_read(&rs->rs_mr_puts);
+			mr_info.laddr = rs->rs_bound_addr;
+			mr_info.lport = rs->rs_bound_port;
+			mr_info.tos = rs->rs_tos;
+			rds_info_copy(iter, &mr_info, sizeof(mr_info));
+		}
+	}
+	spin_unlock_bh(&rds_sock_lock);
+
+	lens->nr = total;
+	lens->each = sizeof(mr_info);
+}
+EXPORT_SYMBOL_GPL(rds_info_mrs);
+
 static void rds_exit(void)
 {
 	sock_unregister(rds_family_ops.family);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index a792d8a3872a..48476ae95da9 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -599,6 +599,7 @@ int rds_ib_init(void)
 	rds_trans_register(&rds_ib_transport);
 
 	rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
+	rds_info_register_func(RDS_INFO_MRS, rds_info_mrs);
 #if IS_ENABLED(CONFIG_IPV6)
 	rds_info_register_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info);
 #endif
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 5e61868e1799..dd42bc95bbeb 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -746,7 +746,9 @@ static inline void __rds_wake_sk_sleep(struct sock *sk)
 		wake_up(waitq);
 }
 extern wait_queue_head_t rds_poll_waitq;
-
+void rds_info_mrs(struct socket *sock, unsigned int len,
+		  struct rds_info_iterator *iter,
+		  struct rds_info_lengths *lens);
 
 /* bind.c */
 int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
-- 
2.27.0.112.g101b320


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 0/2] rds: MR(Memory Region) related patches
  2020-10-19 21:48 [PATCH 0/2] rds: MR(Memory Region) related patches Manjunath Patil
  2020-10-19 21:48 ` [PATCH 1/2] rds: track memory region (MR) usage in kernel Manjunath Patil
  2020-10-19 21:48 ` [PATCH 2/2] rds: add functionality to print MR related information Manjunath Patil
@ 2020-10-27 14:18 ` Manjunath Patil
  2 siblings, 0 replies; 4+ messages in thread
From: Manjunath Patil @ 2020-10-27 14:18 UTC (permalink / raw)
  To: santosh.shilimkar; +Cc: netdev, linux-rdma, rama.nichanamatlu

ping!

On 10/19/2020 2:48 PM, Manjunath Patil wrote:
> This patchset intends to add functionality to track MR usages by RDS
> applications.
>
> Manjunath Patil (2):
>    rds: track memory region (MR) usage in kernel
>    rds: add functionality to print MR related information
>
>   include/uapi/linux/rds.h | 13 ++++++++++++-
>   net/rds/af_rds.c         | 42 ++++++++++++++++++++++++++++++++++++++++
>   net/rds/ib.c             |  1 +
>   net/rds/rdma.c           | 29 ++++++++++++++++++++-------
>   net/rds/rds.h            | 10 +++++++++-
>   5 files changed, 86 insertions(+), 9 deletions(-)
>


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, back to index

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-19 21:48 [PATCH 0/2] rds: MR(Memory Region) related patches Manjunath Patil
2020-10-19 21:48 ` [PATCH 1/2] rds: track memory region (MR) usage in kernel Manjunath Patil
2020-10-19 21:48 ` [PATCH 2/2] rds: add functionality to print MR related information Manjunath Patil
2020-10-27 14:18 ` [PATCH 0/2] rds: MR(Memory Region) related patches Manjunath Patil

Linux-RDMA Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-rdma/0 linux-rdma/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-rdma linux-rdma/ https://lore.kernel.org/linux-rdma \
		linux-rdma@vger.kernel.org
	public-inbox-index linux-rdma

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-rdma


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git