All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V2] libibverbs: Allow arbitrary int values for MTU
@ 2013-07-02 12:31 Jeff Squyres
       [not found] ` <1372768306-6786-1-git-send-email-jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jeff Squyres @ 2013-07-02 12:31 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Jeff Squyres

(Previous patch did not include updates for the man pages)

Keep IBV_MTU_* enums values as they are, but pass MTU values around as
a struct containing a single int.  

Per lengthy discusson on the linux-rdma list, this patch introdces a
source code incompatibility.  Although legacy applications can
continue to use the enum values, they will need to be updated to use
the struct.  Newer applications are encouraged to use arbitrary int
values, not the MTU enums (e.g., 1024, 1500, 9000).

Signed-off-by: Jeff Squyres <jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
---
 Makefile.am                |  3 +-
 examples/devinfo.c         | 20 +++----------
 examples/pingpong.c        | 12 --------
 examples/pingpong.h        |  1 -
 examples/rc_pingpong.c     | 10 +++----
 examples/srq_pingpong.c    | 10 +++----
 examples/uc_pingpong.c     | 10 +++----
 examples/ud_pingpong.c     |  2 +-
 include/infiniband/verbs.h | 61 +++++++++++++++++++++++++++++++++++++--
 man/ibv_modify_qp.3        |  2 +-
 man/ibv_mtu_to_num.3       | 71 ++++++++++++++++++++++++++++++++++++++++++++++
 man/ibv_query_port.3       |  4 +--
 man/ibv_query_qp.3         |  2 +-
 src/cmd.c                  |  8 +++---
 src/marshall.c             |  2 +-
 15 files changed, 160 insertions(+), 58 deletions(-)
 create mode 100644 man/ibv_mtu_to_num.3

diff --git a/Makefile.am b/Makefile.am
index 40e83be..1159e55 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -54,7 +54,8 @@ man_MANS = man/ibv_asyncwatch.1 man/ibv_devices.1 man/ibv_devinfo.1	\
     man/ibv_post_srq_recv.3 man/ibv_query_device.3 man/ibv_query_gid.3	\
     man/ibv_query_pkey.3 man/ibv_query_port.3 man/ibv_query_qp.3	\
     man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3		\
-    man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3
+    man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3  \
+    man/ibv_mtu_to_num.3
 
 DEBIAN = debian/changelog debian/compat debian/control debian/copyright \
     debian/ibverbs-utils.install debian/libibverbs1.install \
diff --git a/examples/devinfo.c b/examples/devinfo.c
index ff078e4..e8fb27e 100644
--- a/examples/devinfo.c
+++ b/examples/devinfo.c
@@ -111,18 +111,6 @@ static const char *atomic_cap_str(enum ibv_atomic_cap atom_cap)
 	}
 }
 
-static const char *mtu_str(enum ibv_mtu max_mtu)
-{
-	switch (max_mtu) {
-	case IBV_MTU_256:  return "256";
-	case IBV_MTU_512:  return "512";
-	case IBV_MTU_1024: return "1024";
-	case IBV_MTU_2048: return "2048";
-	case IBV_MTU_4096: return "4096";
-	default:           return "invalid MTU";
-	}
-}
-
 static const char *width_str(uint8_t width)
 {
 	switch (width) {
@@ -301,10 +289,10 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
 		printf("\t\tport:\t%d\n", port);
 		printf("\t\t\tstate:\t\t\t%s (%d)\n",
 		       port_state_str(port_attr.state), port_attr.state);
-		printf("\t\t\tmax_mtu:\t\t%s (%d)\n",
-		       mtu_str(port_attr.max_mtu), port_attr.max_mtu);
-		printf("\t\t\tactive_mtu:\t\t%s (%d)\n",
-		       mtu_str(port_attr.active_mtu), port_attr.active_mtu);
+		printf("\t\t\tmax_mtu:\t\t%d (%d)\n",
+		       ibv_mtu_to_num(port_attr.max_mtu), port_attr.max_mtu.mtu);
+		printf("\t\t\tactive_mtu:\t\t%d (%d)\n",
+			ibv_mtu_to_num(port_attr.active_mtu), port_attr.active_mtu.mtu);
 		printf("\t\t\tsm_lid:\t\t\t%d\n", port_attr.sm_lid);
 		printf("\t\t\tport_lid:\t\t%d\n", port_attr.lid);
 		printf("\t\t\tport_lmc:\t\t0x%02x\n", port_attr.lmc);
diff --git a/examples/pingpong.c b/examples/pingpong.c
index 90732ef..d1c22c9 100644
--- a/examples/pingpong.c
+++ b/examples/pingpong.c
@@ -36,18 +36,6 @@
 #include <stdio.h>
 #include <string.h>
 
-enum ibv_mtu pp_mtu_to_enum(int mtu)
-{
-	switch (mtu) {
-	case 256:  return IBV_MTU_256;
-	case 512:  return IBV_MTU_512;
-	case 1024: return IBV_MTU_1024;
-	case 2048: return IBV_MTU_2048;
-	case 4096: return IBV_MTU_4096;
-	default:   return -1;
-	}
-}
-
 uint16_t pp_get_local_lid(struct ibv_context *context, int port)
 {
 	struct ibv_port_attr attr;
diff --git a/examples/pingpong.h b/examples/pingpong.h
index 9cdc03e..91d217b 100644
--- a/examples/pingpong.h
+++ b/examples/pingpong.h
@@ -35,7 +35,6 @@
 
 #include <infiniband/verbs.h>
 
-enum ibv_mtu pp_mtu_to_enum(int mtu);
 uint16_t pp_get_local_lid(struct ibv_context *context, int port);
 int pp_get_port_info(struct ibv_context *context, int port,
 		     struct ibv_port_attr *attr);
diff --git a/examples/rc_pingpong.c b/examples/rc_pingpong.c
index 15494a1..a7e1836 100644
--- a/examples/rc_pingpong.c
+++ b/examples/rc_pingpong.c
@@ -78,7 +78,7 @@ struct pingpong_dest {
 };
 
 static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
-			  enum ibv_mtu mtu, int sl,
+			  struct ibv_mtu_t mtu, int sl,
 			  struct pingpong_dest *dest, int sgid_idx)
 {
 	struct ibv_qp_attr attr = {
@@ -209,7 +209,7 @@ out:
 }
 
 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
-						 int ib_port, enum ibv_mtu mtu,
+						 int ib_port, struct ibv_mtu_t mtu,
 						 int port, int sl,
 						 const struct pingpong_dest *my_dest,
 						 int sgid_idx)
@@ -547,7 +547,7 @@ int main(int argc, char *argv[])
 	int                      port = 18515;
 	int                      ib_port = 1;
 	int                      size = 4096;
-	enum ibv_mtu		 mtu = IBV_MTU_1024;
+	struct ibv_mtu_t	 mtu = num_to_ibv_mtu(1024);
 	int                      rx_depth = 500;
 	int                      iters = 1000;
 	int                      use_event = 0;
@@ -608,8 +608,8 @@ int main(int argc, char *argv[])
 			break;
 
 		case 'm':
-			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
-			if (mtu < 0) {
+			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
+			if (mtu.mtu < 0) {
 				usage(argv[0]);
 				return 1;
 			}
diff --git a/examples/srq_pingpong.c b/examples/srq_pingpong.c
index 6e00f8c..9173274 100644
--- a/examples/srq_pingpong.c
+++ b/examples/srq_pingpong.c
@@ -81,7 +81,7 @@ struct pingpong_dest {
 	union ibv_gid gid;
 };
 
-static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
+static int pp_connect_ctx(struct pingpong_context *ctx, int port, struct ibv_mtu_t mtu,
 			  int sl, const struct pingpong_dest *my_dest,
 			  const struct pingpong_dest *dest, int sgid_idx)
 {
@@ -229,7 +229,7 @@ out:
 }
 
 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
-						 int ib_port, enum ibv_mtu mtu,
+						 int ib_port, struct ibv_mtu_t mtu,
 						 int port, int sl,
 						 const struct pingpong_dest *my_dest,
 						 int sgid_idx)
@@ -620,7 +620,7 @@ int main(int argc, char *argv[])
 	int                      port = 18515;
 	int                      ib_port = 1;
 	int                      size = 4096;
-	enum ibv_mtu		 mtu = IBV_MTU_1024;
+	struct ibv_mtu_t	 mtu = num_to_ibv_mtu(1024);
 	int                      num_qp = 16;
 	int                      rx_depth = 500;
 	int                      iters = 1000;
@@ -685,8 +685,8 @@ int main(int argc, char *argv[])
 			break;
 
 		case 'm':
-			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
-			if (mtu < 0) {
+			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
+			if (mtu.mtu < 0) {
 				usage(argv[0]);
 				return 1;
 			}
diff --git a/examples/uc_pingpong.c b/examples/uc_pingpong.c
index 52c6c28..6b6bc85 100644
--- a/examples/uc_pingpong.c
+++ b/examples/uc_pingpong.c
@@ -78,7 +78,7 @@ struct pingpong_dest {
 };
 
 static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
-			  enum ibv_mtu mtu, int sl,
+			  struct ibv_mtu_t mtu, int sl,
 			  struct pingpong_dest *dest, int sgid_idx)
 {
 	struct ibv_qp_attr attr = {
@@ -197,7 +197,7 @@ out:
 }
 
 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
-						 int ib_port, enum ibv_mtu mtu,
+						 int ib_port, struct ibv_mtu_t mtu,
 						 int port, int sl,
 						 const struct pingpong_dest *my_dest,
 						 int sgid_idx)
@@ -535,7 +535,7 @@ int main(int argc, char *argv[])
 	int                      port = 18515;
 	int                      ib_port = 1;
 	int                      size = 4096;
-	enum ibv_mtu		 mtu = IBV_MTU_1024;
+	struct ibv_mtu_t	 mtu = num_to_ibv_mtu(1024);
 	int                      rx_depth = 500;
 	int                      iters = 1000;
 	int                      use_event = 0;
@@ -596,8 +596,8 @@ int main(int argc, char *argv[])
 			break;
 
 		case 'm':
-			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
-			if (mtu < 0) {
+			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
+			if (mtu.mtu < 0) {
 				usage(argv[0]);
 				return 1;
 			}
diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
index 21c551d..5a0656f 100644
--- a/examples/ud_pingpong.c
+++ b/examples/ud_pingpong.c
@@ -332,7 +332,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
 			fprintf(stderr, "Unable to query port info for port %d\n", port);
 			goto clean_device;
 		}
-		mtu = 1 << (port_info.active_mtu + 7);
+		mtu = ibv_mtu_to_num(port_info.active_mtu);
 		if (size > mtu) {
 			fprintf(stderr, "Requested size larger than port MTU (%d)\n", mtu);
 			goto clean_device;
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index 4b1ab57..0545a8c 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -144,6 +144,10 @@ struct ibv_device_attr {
 	uint8_t			phys_port_cnt;
 };
 
+/*
+ * Symbolic enum names for MTU values are preserved for backwards
+ * compatibility.
+ */
 enum ibv_mtu {
 	IBV_MTU_256  = 1,
 	IBV_MTU_512  = 2,
@@ -152,6 +156,16 @@ enum ibv_mtu {
 	IBV_MTU_4096 = 5
 };
 
+/*
+ * ibv_mtu_t is an encoded integer type that represents an MTU value.
+ * If the value is <= IBV_MTU_4096, it is treated as one of the
+ * IBV_MTU_* enum values.  Otherwise, it is treated as its integer
+ * value.
+ */
+struct ibv_mtu_t {
+	int mtu;
+};
+
 enum ibv_port_state {
 	IBV_PORT_NOP		= 0,
 	IBV_PORT_DOWN		= 1,
@@ -169,8 +183,8 @@ enum {
 
 struct ibv_port_attr {
 	enum ibv_port_state	state;
-	enum ibv_mtu		max_mtu;
-	enum ibv_mtu		active_mtu;
+	struct ibv_mtu_t	max_mtu;
+	struct ibv_mtu_t	active_mtu;
 	int			gid_tbl_len;
 	uint32_t		port_cap_flags;
 	uint32_t		max_msg_sz;
@@ -485,7 +499,7 @@ enum ibv_mig_state {
 struct ibv_qp_attr {
 	enum ibv_qp_state	qp_state;
 	enum ibv_qp_state	cur_qp_state;
-	enum ibv_mtu		path_mtu;
+	struct ibv_mtu_t	path_mtu;
 	enum ibv_mig_state	path_mig_state;
 	uint32_t		qkey;
 	uint32_t		rq_psn;
@@ -1138,6 +1152,47 @@ const char *ibv_port_state_str(enum ibv_port_state port_state);
  */
 const char *ibv_event_type_str(enum ibv_event_type event);
 
+/**
+ * num_to_ibv_mtu - Convert an integer to its corresponding encoded
+ * ibv_mtu_t value.  If an integer value corresponding to an IBV_MTU_*
+ * enum value is passed, return the enum value (e.g., 1024 ->
+ * IBV_MTU_1024).  Otherwise, just return the value (e.g., 1500 ->
+ * 1500).
+ */
+static inline struct ibv_mtu_t num_to_ibv_mtu(int num)
+{
+	struct ibv_mtu_t mtu;
+
+	switch (num) {
+	case 256:  mtu.mtu = IBV_MTU_256;  break;
+	case 512:  mtu.mtu = IBV_MTU_512;  break;
+	case 1024: mtu.mtu = IBV_MTU_1024; break;
+	case 2048: mtu.mtu = IBV_MTU_2048; break;
+	case 4096: mtu.mtu = IBV_MTU_4096; break;
+	default:   mtu.mtu = num;          break;
+	}
+
+	return mtu;
+}
+
+/**
+ * ibv_mtu_to_num - Convert an encoded ibv_mtu_t value to its
+ * corresponding integer value.  If an enum ibv_mtu value is passed,
+ * return its integer value (e.g., IBV_MTU_1024 -> 1024).  Otherwise,
+ * just return the value (e.g., 1500 -> 1500).
+ */
+static inline int ibv_mtu_to_num(struct ibv_mtu_t mtu)
+{
+	switch (mtu.mtu) {
+	case IBV_MTU_256:  return 256;
+	case IBV_MTU_512:  return 512;
+	case IBV_MTU_1024: return 1024;
+	case IBV_MTU_2048: return 2048;
+	case IBV_MTU_4096: return 4096;
+	default:           return mtu.mtu;
+	}
+}
+
 END_C_DECLS
 
 #  undef __attribute_const
diff --git a/man/ibv_modify_qp.3 b/man/ibv_modify_qp.3
index cb3faaa..dd93674 100644
--- a/man/ibv_modify_qp.3
+++ b/man/ibv_modify_qp.3
@@ -25,7 +25,7 @@ struct ibv_qp_attr {
 .in +8
 enum ibv_qp_state       qp_state;               /* Move the QP to this state */
 enum ibv_qp_state       cur_qp_state;           /* Assume this is the current QP state */
-enum ibv_mtu            path_mtu;               /* Path MTU (valid only for RC/UC QPs) */
+struct ibv_mtu_t        path_mtu;               /* Path MTU (valid only for RC/UC QPs) */
 enum ibv_mig_state      path_mig_state;         /* Path migration state (valid if HCA supports APM) */
 uint32_t                qkey;                   /* Q_Key for the QP (valid only for UD QPs) */
 uint32_t                rq_psn;                 /* PSN for receive queue (valid only for RC/UC QPs) */
diff --git a/man/ibv_mtu_to_num.3 b/man/ibv_mtu_to_num.3
new file mode 100644
index 0000000..ddb9bd0
--- /dev/null
+++ b/man/ibv_mtu_to_num.3
@@ -0,0 +1,71 @@
+.\" -*- nroff -*-
+.\"
+.TH IBV_MTU_TO_NUM 3 2013-06-20 libibverbs "Libibverbs Programmer's Manual"
+.SH "NAME"
+.nf
+ibv_mtu_to_num \- convert encoded MTU value to integer
+.sp
+num_to_ibv_mtu \- convert integer to encoded MTU value
+.SH "SYNOPSIS"
+.nf
+.B #include <infiniband/verbs.h>
+.sp
+.BI "int ibv_mtu_to_num(struct ibv_mtu_t " "mtu" ");
+.sp
+.BI "struct ibv_mtu_t num_to_ibv_mtu(int " "num" ");
+.fi
+.SH "DESCRIPTION"
+.PP
+The
+.I struct ibv_mtu_t
+type is an encoded integer used to represent MTU values in order to
+preserve backwards compatibility.  When the value of an
+.I struct ibv_mtu_t
+variable is <=
+.BR IBV_MTU_4096\fR,
+it is treated as the corresponding
+.B IBV_MTU_*
+enum value.  Otherwise, it is treated as its integer value.
+.PP
+MTU values less than the value of the enum
+.B IBV_MTU_4096
+(i.e., 5) cannot be represented.
+.PP
+.B ibv_mtu_to_num()
+converts the encoded MTU value
+.I mtu
+to a plain integer value.  For example, if
+.I mtu
+contains the value
+.BR IBV_MTU_1024\fR,
+then the value 1024 will be returned.  Likewise, if
+.I mtu
+contains the value 1500, then 1500 will be returned.
+.PP
+.B num_to_ibv_mtu()
+converts the integer
+.I num
+to its corresponding encoded
+.I struct ibv_mtu_t
+value.  For example, if
+.I num
+is 1024, then a
+.I struct ibv_mtu_t
+containing the value
+.B IBV_MTU_1024
+will be returned.  Likewise, if
+.I num
+is 1500, then a
+.I struct ibv_mtu_t
+containing the value 1500 will be returned.
+.SH "RETURN VALUE"
+.B ibv_mtu_to_num()
+returns an integer MTU value.
+.PP
+.B num_to_ibv_mtu()
+returns an encoded MTU value.
+.SH "SEE ALSO"
+.BR ibv_query_port (3)
+.SH "AUTHORS"
+.TP
+Jeff Squyres <jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
diff --git a/man/ibv_query_port.3 b/man/ibv_query_port.3
index 9bedd90..5620049 100644
--- a/man/ibv_query_port.3
+++ b/man/ibv_query_port.3
@@ -26,8 +26,8 @@ is an ibv_port_attr struct, as defined in <infiniband/verbs.h>.
 struct ibv_port_attr {
 .in +8
 enum ibv_port_state     state;          /* Logical port state */
-enum ibv_mtu            max_mtu;        /* Max MTU supported by port */
-enum ibv_mtu            active_mtu;     /* Actual MTU */
+struct ibv_mtu_t        max_mtu;        /* Max MTU supported by port */
+struct ibv_mtu_t        active_mtu;     /* Actual MTU */
 int                     gid_tbl_len;    /* Length of source GID table */
 uint32_t                port_cap_flags; /* Port capabilities */
 uint32_t                max_msg_sz;     /* Maximum message size */
diff --git a/man/ibv_query_qp.3 b/man/ibv_query_qp.3
index 3893ec8..fbf7ec3 100644
--- a/man/ibv_query_qp.3
+++ b/man/ibv_query_qp.3
@@ -30,7 +30,7 @@ struct ibv_qp_attr {
 .in +8
 enum ibv_qp_state       qp_state;            /* Current QP state */
 enum ibv_qp_state       cur_qp_state;        /* Current QP state - irrelevant for ibv_query_qp */
-enum ibv_mtu            path_mtu;            /* Path MTU (valid only for RC/UC QPs) */
+struct ibv_mtu_t        path_mtu;            /* Path MTU (valid only for RC/UC QPs) */
 enum ibv_mig_state      path_mig_state;      /* Path migration state (valid if HCA supports APM) */
 uint32_t                qkey;                /* Q_Key of the QP (valid only for UD QPs) */
 uint32_t                rq_psn;              /* PSN for receive queue (valid only for RC/UC QPs) */
diff --git a/src/cmd.c b/src/cmd.c
index 9789092..13fac0d 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -180,8 +180,8 @@ int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
 
 	port_attr->state      	   = resp.state;
-	port_attr->max_mtu         = resp.max_mtu;
-	port_attr->active_mtu      = resp.active_mtu;
+	port_attr->max_mtu.mtu     = resp.max_mtu;
+	port_attr->active_mtu.mtu  = resp.active_mtu;
 	port_attr->gid_tbl_len     = resp.gid_tbl_len;
 	port_attr->port_cap_flags  = resp.port_cap_flags;
 	port_attr->max_msg_sz      = resp.max_msg_sz;
@@ -678,7 +678,7 @@ int ibv_cmd_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	attr->alt_pkey_index                = resp.alt_pkey_index;
 	attr->qp_state                      = resp.qp_state;
 	attr->cur_qp_state                  = resp.cur_qp_state;
-	attr->path_mtu                      = resp.path_mtu;
+	attr->path_mtu.mtu                  = resp.path_mtu;
 	attr->path_mig_state                = resp.path_mig_state;
 	attr->sq_draining                   = resp.sq_draining;
 	attr->max_rd_atomic                 = resp.max_rd_atomic;
@@ -752,7 +752,7 @@ int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 	cmd->alt_pkey_index 	 = attr->alt_pkey_index;
 	cmd->qp_state 		 = attr->qp_state;
 	cmd->cur_qp_state 	 = attr->cur_qp_state;
-	cmd->path_mtu 		 = attr->path_mtu;
+	cmd->path_mtu		 = attr->path_mtu.mtu;
 	cmd->path_mig_state 	 = attr->path_mig_state;
 	cmd->en_sqd_async_notify = attr->en_sqd_async_notify;
 	cmd->max_rd_atomic 	 = attr->max_rd_atomic;
diff --git a/src/marshall.c b/src/marshall.c
index 577b4b1..a3595dc 100644
--- a/src/marshall.c
+++ b/src/marshall.c
@@ -59,7 +59,7 @@ void ibv_copy_qp_attr_from_kern(struct ibv_qp_attr *dst,
 				struct ibv_kern_qp_attr *src)
 {
 	dst->cur_qp_state = src->cur_qp_state;
-	dst->path_mtu = src->path_mtu;
+	dst->path_mtu.mtu = src->path_mtu;
 	dst->path_mig_state = src->path_mig_state;
 	dst->qkey = src->qkey;
 	dst->rq_psn = src->rq_psn;
-- 
1.8.2.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found] ` <1372768306-6786-1-git-send-email-jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
@ 2013-07-04  0:36   ` Jeff Squyres (jsquyres)
  2013-07-05 19:11   ` Roland Dreier
  1 sibling, 0 replies; 47+ messages in thread
From: Jeff Squyres (jsquyres) @ 2013-07-04  0:36 UTC (permalink / raw)
  To: <linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>

Bump.

On Jul 2, 2013, at 8:31 AM, Jeff Squyres <jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:

> (Previous patch did not include updates for the man pages)
> 
> Keep IBV_MTU_* enums values as they are, but pass MTU values around as
> a struct containing a single int.  
> 
> Per lengthy discusson on the linux-rdma list, this patch introdces a
> source code incompatibility.  Although legacy applications can
> continue to use the enum values, they will need to be updated to use
> the struct.  Newer applications are encouraged to use arbitrary int
> values, not the MTU enums (e.g., 1024, 1500, 9000).
> 
> Signed-off-by: Jeff Squyres <jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
> ---
> Makefile.am                |  3 +-
> examples/devinfo.c         | 20 +++----------
> examples/pingpong.c        | 12 --------
> examples/pingpong.h        |  1 -
> examples/rc_pingpong.c     | 10 +++----
> examples/srq_pingpong.c    | 10 +++----
> examples/uc_pingpong.c     | 10 +++----
> examples/ud_pingpong.c     |  2 +-
> include/infiniband/verbs.h | 61 +++++++++++++++++++++++++++++++++++++--
> man/ibv_modify_qp.3        |  2 +-
> man/ibv_mtu_to_num.3       | 71 ++++++++++++++++++++++++++++++++++++++++++++++
> man/ibv_query_port.3       |  4 +--
> man/ibv_query_qp.3         |  2 +-
> src/cmd.c                  |  8 +++---
> src/marshall.c             |  2 +-
> 15 files changed, 160 insertions(+), 58 deletions(-)
> create mode 100644 man/ibv_mtu_to_num.3
> 
> diff --git a/Makefile.am b/Makefile.am
> index 40e83be..1159e55 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -54,7 +54,8 @@ man_MANS = man/ibv_asyncwatch.1 man/ibv_devices.1 man/ibv_devinfo.1	\
>     man/ibv_post_srq_recv.3 man/ibv_query_device.3 man/ibv_query_gid.3	\
>     man/ibv_query_pkey.3 man/ibv_query_port.3 man/ibv_query_qp.3	\
>     man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3		\
> -    man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3
> +    man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3  \
> +    man/ibv_mtu_to_num.3
> 
> DEBIAN = debian/changelog debian/compat debian/control debian/copyright \
>     debian/ibverbs-utils.install debian/libibverbs1.install \
> diff --git a/examples/devinfo.c b/examples/devinfo.c
> index ff078e4..e8fb27e 100644
> --- a/examples/devinfo.c
> +++ b/examples/devinfo.c
> @@ -111,18 +111,6 @@ static const char *atomic_cap_str(enum ibv_atomic_cap atom_cap)
> 	}
> }
> 
> -static const char *mtu_str(enum ibv_mtu max_mtu)
> -{
> -	switch (max_mtu) {
> -	case IBV_MTU_256:  return "256";
> -	case IBV_MTU_512:  return "512";
> -	case IBV_MTU_1024: return "1024";
> -	case IBV_MTU_2048: return "2048";
> -	case IBV_MTU_4096: return "4096";
> -	default:           return "invalid MTU";
> -	}
> -}
> -
> static const char *width_str(uint8_t width)
> {
> 	switch (width) {
> @@ -301,10 +289,10 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
> 		printf("\t\tport:\t%d\n", port);
> 		printf("\t\t\tstate:\t\t\t%s (%d)\n",
> 		       port_state_str(port_attr.state), port_attr.state);
> -		printf("\t\t\tmax_mtu:\t\t%s (%d)\n",
> -		       mtu_str(port_attr.max_mtu), port_attr.max_mtu);
> -		printf("\t\t\tactive_mtu:\t\t%s (%d)\n",
> -		       mtu_str(port_attr.active_mtu), port_attr.active_mtu);
> +		printf("\t\t\tmax_mtu:\t\t%d (%d)\n",
> +		       ibv_mtu_to_num(port_attr.max_mtu), port_attr.max_mtu.mtu);
> +		printf("\t\t\tactive_mtu:\t\t%d (%d)\n",
> +			ibv_mtu_to_num(port_attr.active_mtu), port_attr.active_mtu.mtu);
> 		printf("\t\t\tsm_lid:\t\t\t%d\n", port_attr.sm_lid);
> 		printf("\t\t\tport_lid:\t\t%d\n", port_attr.lid);
> 		printf("\t\t\tport_lmc:\t\t0x%02x\n", port_attr.lmc);
> diff --git a/examples/pingpong.c b/examples/pingpong.c
> index 90732ef..d1c22c9 100644
> --- a/examples/pingpong.c
> +++ b/examples/pingpong.c
> @@ -36,18 +36,6 @@
> #include <stdio.h>
> #include <string.h>
> 
> -enum ibv_mtu pp_mtu_to_enum(int mtu)
> -{
> -	switch (mtu) {
> -	case 256:  return IBV_MTU_256;
> -	case 512:  return IBV_MTU_512;
> -	case 1024: return IBV_MTU_1024;
> -	case 2048: return IBV_MTU_2048;
> -	case 4096: return IBV_MTU_4096;
> -	default:   return -1;
> -	}
> -}
> -
> uint16_t pp_get_local_lid(struct ibv_context *context, int port)
> {
> 	struct ibv_port_attr attr;
> diff --git a/examples/pingpong.h b/examples/pingpong.h
> index 9cdc03e..91d217b 100644
> --- a/examples/pingpong.h
> +++ b/examples/pingpong.h
> @@ -35,7 +35,6 @@
> 
> #include <infiniband/verbs.h>
> 
> -enum ibv_mtu pp_mtu_to_enum(int mtu);
> uint16_t pp_get_local_lid(struct ibv_context *context, int port);
> int pp_get_port_info(struct ibv_context *context, int port,
> 		     struct ibv_port_attr *attr);
> diff --git a/examples/rc_pingpong.c b/examples/rc_pingpong.c
> index 15494a1..a7e1836 100644
> --- a/examples/rc_pingpong.c
> +++ b/examples/rc_pingpong.c
> @@ -78,7 +78,7 @@ struct pingpong_dest {
> };
> 
> static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
> -			  enum ibv_mtu mtu, int sl,
> +			  struct ibv_mtu_t mtu, int sl,
> 			  struct pingpong_dest *dest, int sgid_idx)
> {
> 	struct ibv_qp_attr attr = {
> @@ -209,7 +209,7 @@ out:
> }
> 
> static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
> -						 int ib_port, enum ibv_mtu mtu,
> +						 int ib_port, struct ibv_mtu_t mtu,
> 						 int port, int sl,
> 						 const struct pingpong_dest *my_dest,
> 						 int sgid_idx)
> @@ -547,7 +547,7 @@ int main(int argc, char *argv[])
> 	int                      port = 18515;
> 	int                      ib_port = 1;
> 	int                      size = 4096;
> -	enum ibv_mtu		 mtu = IBV_MTU_1024;
> +	struct ibv_mtu_t	 mtu = num_to_ibv_mtu(1024);
> 	int                      rx_depth = 500;
> 	int                      iters = 1000;
> 	int                      use_event = 0;
> @@ -608,8 +608,8 @@ int main(int argc, char *argv[])
> 			break;
> 
> 		case 'm':
> -			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
> -			if (mtu < 0) {
> +			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
> +			if (mtu.mtu < 0) {
> 				usage(argv[0]);
> 				return 1;
> 			}
> diff --git a/examples/srq_pingpong.c b/examples/srq_pingpong.c
> index 6e00f8c..9173274 100644
> --- a/examples/srq_pingpong.c
> +++ b/examples/srq_pingpong.c
> @@ -81,7 +81,7 @@ struct pingpong_dest {
> 	union ibv_gid gid;
> };
> 
> -static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
> +static int pp_connect_ctx(struct pingpong_context *ctx, int port, struct ibv_mtu_t mtu,
> 			  int sl, const struct pingpong_dest *my_dest,
> 			  const struct pingpong_dest *dest, int sgid_idx)
> {
> @@ -229,7 +229,7 @@ out:
> }
> 
> static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
> -						 int ib_port, enum ibv_mtu mtu,
> +						 int ib_port, struct ibv_mtu_t mtu,
> 						 int port, int sl,
> 						 const struct pingpong_dest *my_dest,
> 						 int sgid_idx)
> @@ -620,7 +620,7 @@ int main(int argc, char *argv[])
> 	int                      port = 18515;
> 	int                      ib_port = 1;
> 	int                      size = 4096;
> -	enum ibv_mtu		 mtu = IBV_MTU_1024;
> +	struct ibv_mtu_t	 mtu = num_to_ibv_mtu(1024);
> 	int                      num_qp = 16;
> 	int                      rx_depth = 500;
> 	int                      iters = 1000;
> @@ -685,8 +685,8 @@ int main(int argc, char *argv[])
> 			break;
> 
> 		case 'm':
> -			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
> -			if (mtu < 0) {
> +			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
> +			if (mtu.mtu < 0) {
> 				usage(argv[0]);
> 				return 1;
> 			}
> diff --git a/examples/uc_pingpong.c b/examples/uc_pingpong.c
> index 52c6c28..6b6bc85 100644
> --- a/examples/uc_pingpong.c
> +++ b/examples/uc_pingpong.c
> @@ -78,7 +78,7 @@ struct pingpong_dest {
> };
> 
> static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
> -			  enum ibv_mtu mtu, int sl,
> +			  struct ibv_mtu_t mtu, int sl,
> 			  struct pingpong_dest *dest, int sgid_idx)
> {
> 	struct ibv_qp_attr attr = {
> @@ -197,7 +197,7 @@ out:
> }
> 
> static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
> -						 int ib_port, enum ibv_mtu mtu,
> +						 int ib_port, struct ibv_mtu_t mtu,
> 						 int port, int sl,
> 						 const struct pingpong_dest *my_dest,
> 						 int sgid_idx)
> @@ -535,7 +535,7 @@ int main(int argc, char *argv[])
> 	int                      port = 18515;
> 	int                      ib_port = 1;
> 	int                      size = 4096;
> -	enum ibv_mtu		 mtu = IBV_MTU_1024;
> +	struct ibv_mtu_t	 mtu = num_to_ibv_mtu(1024);
> 	int                      rx_depth = 500;
> 	int                      iters = 1000;
> 	int                      use_event = 0;
> @@ -596,8 +596,8 @@ int main(int argc, char *argv[])
> 			break;
> 
> 		case 'm':
> -			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
> -			if (mtu < 0) {
> +			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
> +			if (mtu.mtu < 0) {
> 				usage(argv[0]);
> 				return 1;
> 			}
> diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
> index 21c551d..5a0656f 100644
> --- a/examples/ud_pingpong.c
> +++ b/examples/ud_pingpong.c
> @@ -332,7 +332,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
> 			fprintf(stderr, "Unable to query port info for port %d\n", port);
> 			goto clean_device;
> 		}
> -		mtu = 1 << (port_info.active_mtu + 7);
> +		mtu = ibv_mtu_to_num(port_info.active_mtu);
> 		if (size > mtu) {
> 			fprintf(stderr, "Requested size larger than port MTU (%d)\n", mtu);
> 			goto clean_device;
> diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
> index 4b1ab57..0545a8c 100644
> --- a/include/infiniband/verbs.h
> +++ b/include/infiniband/verbs.h
> @@ -144,6 +144,10 @@ struct ibv_device_attr {
> 	uint8_t			phys_port_cnt;
> };
> 
> +/*
> + * Symbolic enum names for MTU values are preserved for backwards
> + * compatibility.
> + */
> enum ibv_mtu {
> 	IBV_MTU_256  = 1,
> 	IBV_MTU_512  = 2,
> @@ -152,6 +156,16 @@ enum ibv_mtu {
> 	IBV_MTU_4096 = 5
> };
> 
> +/*
> + * ibv_mtu_t is an encoded integer type that represents an MTU value.
> + * If the value is <= IBV_MTU_4096, it is treated as one of the
> + * IBV_MTU_* enum values.  Otherwise, it is treated as its integer
> + * value.
> + */
> +struct ibv_mtu_t {
> +	int mtu;
> +};
> +
> enum ibv_port_state {
> 	IBV_PORT_NOP		= 0,
> 	IBV_PORT_DOWN		= 1,
> @@ -169,8 +183,8 @@ enum {
> 
> struct ibv_port_attr {
> 	enum ibv_port_state	state;
> -	enum ibv_mtu		max_mtu;
> -	enum ibv_mtu		active_mtu;
> +	struct ibv_mtu_t	max_mtu;
> +	struct ibv_mtu_t	active_mtu;
> 	int			gid_tbl_len;
> 	uint32_t		port_cap_flags;
> 	uint32_t		max_msg_sz;
> @@ -485,7 +499,7 @@ enum ibv_mig_state {
> struct ibv_qp_attr {
> 	enum ibv_qp_state	qp_state;
> 	enum ibv_qp_state	cur_qp_state;
> -	enum ibv_mtu		path_mtu;
> +	struct ibv_mtu_t	path_mtu;
> 	enum ibv_mig_state	path_mig_state;
> 	uint32_t		qkey;
> 	uint32_t		rq_psn;
> @@ -1138,6 +1152,47 @@ const char *ibv_port_state_str(enum ibv_port_state port_state);
>  */
> const char *ibv_event_type_str(enum ibv_event_type event);
> 
> +/**
> + * num_to_ibv_mtu - Convert an integer to its corresponding encoded
> + * ibv_mtu_t value.  If an integer value corresponding to an IBV_MTU_*
> + * enum value is passed, return the enum value (e.g., 1024 ->
> + * IBV_MTU_1024).  Otherwise, just return the value (e.g., 1500 ->
> + * 1500).
> + */
> +static inline struct ibv_mtu_t num_to_ibv_mtu(int num)
> +{
> +	struct ibv_mtu_t mtu;
> +
> +	switch (num) {
> +	case 256:  mtu.mtu = IBV_MTU_256;  break;
> +	case 512:  mtu.mtu = IBV_MTU_512;  break;
> +	case 1024: mtu.mtu = IBV_MTU_1024; break;
> +	case 2048: mtu.mtu = IBV_MTU_2048; break;
> +	case 4096: mtu.mtu = IBV_MTU_4096; break;
> +	default:   mtu.mtu = num;          break;
> +	}
> +
> +	return mtu;
> +}
> +
> +/**
> + * ibv_mtu_to_num - Convert an encoded ibv_mtu_t value to its
> + * corresponding integer value.  If an enum ibv_mtu value is passed,
> + * return its integer value (e.g., IBV_MTU_1024 -> 1024).  Otherwise,
> + * just return the value (e.g., 1500 -> 1500).
> + */
> +static inline int ibv_mtu_to_num(struct ibv_mtu_t mtu)
> +{
> +	switch (mtu.mtu) {
> +	case IBV_MTU_256:  return 256;
> +	case IBV_MTU_512:  return 512;
> +	case IBV_MTU_1024: return 1024;
> +	case IBV_MTU_2048: return 2048;
> +	case IBV_MTU_4096: return 4096;
> +	default:           return mtu.mtu;
> +	}
> +}
> +
> END_C_DECLS
> 
> #  undef __attribute_const
> diff --git a/man/ibv_modify_qp.3 b/man/ibv_modify_qp.3
> index cb3faaa..dd93674 100644
> --- a/man/ibv_modify_qp.3
> +++ b/man/ibv_modify_qp.3
> @@ -25,7 +25,7 @@ struct ibv_qp_attr {
> .in +8
> enum ibv_qp_state       qp_state;               /* Move the QP to this state */
> enum ibv_qp_state       cur_qp_state;           /* Assume this is the current QP state */
> -enum ibv_mtu            path_mtu;               /* Path MTU (valid only for RC/UC QPs) */
> +struct ibv_mtu_t        path_mtu;               /* Path MTU (valid only for RC/UC QPs) */
> enum ibv_mig_state      path_mig_state;         /* Path migration state (valid if HCA supports APM) */
> uint32_t                qkey;                   /* Q_Key for the QP (valid only for UD QPs) */
> uint32_t                rq_psn;                 /* PSN for receive queue (valid only for RC/UC QPs) */
> diff --git a/man/ibv_mtu_to_num.3 b/man/ibv_mtu_to_num.3
> new file mode 100644
> index 0000000..ddb9bd0
> --- /dev/null
> +++ b/man/ibv_mtu_to_num.3
> @@ -0,0 +1,71 @@
> +.\" -*- nroff -*-
> +.\"
> +.TH IBV_MTU_TO_NUM 3 2013-06-20 libibverbs "Libibverbs Programmer's Manual"
> +.SH "NAME"
> +.nf
> +ibv_mtu_to_num \- convert encoded MTU value to integer
> +.sp
> +num_to_ibv_mtu \- convert integer to encoded MTU value
> +.SH "SYNOPSIS"
> +.nf
> +.B #include <infiniband/verbs.h>
> +.sp
> +.BI "int ibv_mtu_to_num(struct ibv_mtu_t " "mtu" ");
> +.sp
> +.BI "struct ibv_mtu_t num_to_ibv_mtu(int " "num" ");
> +.fi
> +.SH "DESCRIPTION"
> +.PP
> +The
> +.I struct ibv_mtu_t
> +type is an encoded integer used to represent MTU values in order to
> +preserve backwards compatibility.  When the value of an
> +.I struct ibv_mtu_t
> +variable is <=
> +.BR IBV_MTU_4096\fR,
> +it is treated as the corresponding
> +.B IBV_MTU_*
> +enum value.  Otherwise, it is treated as its integer value.
> +.PP
> +MTU values less than the value of the enum
> +.B IBV_MTU_4096
> +(i.e., 5) cannot be represented.
> +.PP
> +.B ibv_mtu_to_num()
> +converts the encoded MTU value
> +.I mtu
> +to a plain integer value.  For example, if
> +.I mtu
> +contains the value
> +.BR IBV_MTU_1024\fR,
> +then the value 1024 will be returned.  Likewise, if
> +.I mtu
> +contains the value 1500, then 1500 will be returned.
> +.PP
> +.B num_to_ibv_mtu()
> +converts the integer
> +.I num
> +to its corresponding encoded
> +.I struct ibv_mtu_t
> +value.  For example, if
> +.I num
> +is 1024, then a
> +.I struct ibv_mtu_t
> +containing the value
> +.B IBV_MTU_1024
> +will be returned.  Likewise, if
> +.I num
> +is 1500, then a
> +.I struct ibv_mtu_t
> +containing the value 1500 will be returned.
> +.SH "RETURN VALUE"
> +.B ibv_mtu_to_num()
> +returns an integer MTU value.
> +.PP
> +.B num_to_ibv_mtu()
> +returns an encoded MTU value.
> +.SH "SEE ALSO"
> +.BR ibv_query_port (3)
> +.SH "AUTHORS"
> +.TP
> +Jeff Squyres <jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
> diff --git a/man/ibv_query_port.3 b/man/ibv_query_port.3
> index 9bedd90..5620049 100644
> --- a/man/ibv_query_port.3
> +++ b/man/ibv_query_port.3
> @@ -26,8 +26,8 @@ is an ibv_port_attr struct, as defined in <infiniband/verbs.h>.
> struct ibv_port_attr {
> .in +8
> enum ibv_port_state     state;          /* Logical port state */
> -enum ibv_mtu            max_mtu;        /* Max MTU supported by port */
> -enum ibv_mtu            active_mtu;     /* Actual MTU */
> +struct ibv_mtu_t        max_mtu;        /* Max MTU supported by port */
> +struct ibv_mtu_t        active_mtu;     /* Actual MTU */
> int                     gid_tbl_len;    /* Length of source GID table */
> uint32_t                port_cap_flags; /* Port capabilities */
> uint32_t                max_msg_sz;     /* Maximum message size */
> diff --git a/man/ibv_query_qp.3 b/man/ibv_query_qp.3
> index 3893ec8..fbf7ec3 100644
> --- a/man/ibv_query_qp.3
> +++ b/man/ibv_query_qp.3
> @@ -30,7 +30,7 @@ struct ibv_qp_attr {
> .in +8
> enum ibv_qp_state       qp_state;            /* Current QP state */
> enum ibv_qp_state       cur_qp_state;        /* Current QP state - irrelevant for ibv_query_qp */
> -enum ibv_mtu            path_mtu;            /* Path MTU (valid only for RC/UC QPs) */
> +struct ibv_mtu_t        path_mtu;            /* Path MTU (valid only for RC/UC QPs) */
> enum ibv_mig_state      path_mig_state;      /* Path migration state (valid if HCA supports APM) */
> uint32_t                qkey;                /* Q_Key of the QP (valid only for UD QPs) */
> uint32_t                rq_psn;              /* PSN for receive queue (valid only for RC/UC QPs) */
> diff --git a/src/cmd.c b/src/cmd.c
> index 9789092..13fac0d 100644
> --- a/src/cmd.c
> +++ b/src/cmd.c
> @@ -180,8 +180,8 @@ int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
> 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
> 
> 	port_attr->state      	   = resp.state;
> -	port_attr->max_mtu         = resp.max_mtu;
> -	port_attr->active_mtu      = resp.active_mtu;
> +	port_attr->max_mtu.mtu     = resp.max_mtu;
> +	port_attr->active_mtu.mtu  = resp.active_mtu;
> 	port_attr->gid_tbl_len     = resp.gid_tbl_len;
> 	port_attr->port_cap_flags  = resp.port_cap_flags;
> 	port_attr->max_msg_sz      = resp.max_msg_sz;
> @@ -678,7 +678,7 @@ int ibv_cmd_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
> 	attr->alt_pkey_index                = resp.alt_pkey_index;
> 	attr->qp_state                      = resp.qp_state;
> 	attr->cur_qp_state                  = resp.cur_qp_state;
> -	attr->path_mtu                      = resp.path_mtu;
> +	attr->path_mtu.mtu                  = resp.path_mtu;
> 	attr->path_mig_state                = resp.path_mig_state;
> 	attr->sq_draining                   = resp.sq_draining;
> 	attr->max_rd_atomic                 = resp.max_rd_atomic;
> @@ -752,7 +752,7 @@ int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
> 	cmd->alt_pkey_index 	 = attr->alt_pkey_index;
> 	cmd->qp_state 		 = attr->qp_state;
> 	cmd->cur_qp_state 	 = attr->cur_qp_state;
> -	cmd->path_mtu 		 = attr->path_mtu;
> +	cmd->path_mtu		 = attr->path_mtu.mtu;
> 	cmd->path_mig_state 	 = attr->path_mig_state;
> 	cmd->en_sqd_async_notify = attr->en_sqd_async_notify;
> 	cmd->max_rd_atomic 	 = attr->max_rd_atomic;
> diff --git a/src/marshall.c b/src/marshall.c
> index 577b4b1..a3595dc 100644
> --- a/src/marshall.c
> +++ b/src/marshall.c
> @@ -59,7 +59,7 @@ void ibv_copy_qp_attr_from_kern(struct ibv_qp_attr *dst,
> 				struct ibv_kern_qp_attr *src)
> {
> 	dst->cur_qp_state = src->cur_qp_state;
> -	dst->path_mtu = src->path_mtu;
> +	dst->path_mtu.mtu = src->path_mtu;
> 	dst->path_mig_state = src->path_mig_state;
> 	dst->qkey = src->qkey;
> 	dst->rq_psn = src->rq_psn;
> -- 
> 1.8.2.1
> 


-- 
Jeff Squyres
jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found] ` <1372768306-6786-1-git-send-email-jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
  2013-07-04  0:36   ` Jeff Squyres (jsquyres)
@ 2013-07-05 19:11   ` Roland Dreier
       [not found]     ` <CAL1RGDVU9otyMtoit1PJR5JkVy4XvU=4xjL1rN1QB7pq0rVcxA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  1 sibling, 1 reply; 47+ messages in thread
From: Roland Dreier @ 2013-07-05 19:11 UTC (permalink / raw)
  To: Jeff Squyres; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Tue, Jul 2, 2013 at 5:31 AM, Jeff Squyres <jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
> Keep IBV_MTU_* enums values as they are, but pass MTU values around as
> a struct containing a single int.
>
> Per lengthy discusson on the linux-rdma list, this patch introdces a
> source code incompatibility.  Although legacy applications can
> continue to use the enum values, they will need to be updated to use
> the struct.  Newer applications are encouraged to use arbitrary int
> values, not the MTU enums (e.g., 1024, 1500, 9000).

So what happens if I have an old application binary, and I run against
a new libibverbs without recompiling?

Also it seems that I'm forced to change my source code to be able to
compile against new libibverbs?

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]     ` <CAL1RGDVU9otyMtoit1PJR5JkVy4XvU=4xjL1rN1QB7pq0rVcxA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2013-07-08 16:26       ` Jeff Squyres (jsquyres)
       [not found]         ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F6F31A6-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jeff Squyres (jsquyres) @ 2013-07-08 16:26 UTC (permalink / raw)
  To: Roland Dreier
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Jason Gunthorpe, Doug Ledford

On Jul 5, 2013, at 3:11 PM, Roland Dreier <roland-BHEL68pLQRGGvPXPguhicg@public.gmane.org> wrote:

> So what happens if I have an old application binary, and I run against
> a new libibverbs without recompiling?
> 
> Also it seems that I'm forced to change my source code to be able to
> compile against new libibverbs?


I previously sent an ABI-preserving version of this patch, but it was hated by Doug Ledford and (eventually) Jason Gunthorpe.  

After long discussion (see thread starting here: http://www.spinics.net/lists/linux-rdma/msg15951.html), they decided that they wanted a clean break that forces both source code and ABI changes, which resulted in this patch.

I personally don't care which way this goes; I just want the ability to have non-enum MTU values.

-- 
Jeff Squyres
jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]         ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F6F31A6-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
@ 2013-07-08 17:19           ` Roland Dreier
       [not found]             ` <CAL1RGDU+4CSJyF0TAfAv-YYcjA4FG+XNeg5pGCHPeW5iqhvwpA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Roland Dreier @ 2013-07-08 17:19 UTC (permalink / raw)
  To: Jeff Squyres (jsquyres)
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Jason Gunthorpe, Doug Ledford

[resending to reply-all, sorry Jeff]

On Mon, Jul 8, 2013 at 9:26 AM, Jeff Squyres (jsquyres)
<jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
>> So what happens if I have an old application binary, and I run against
>> a new libibverbs without recompiling?

>> Also it seems that I'm forced to change my source code to be able to
>> compile against new libibverbs?

> I previously sent an ABI-preserving version of this patch, but it was hated by Doug Ledford and (eventually) Jason Gunthorpe.

> After long discussion (see thread starting here: http://www.spinics.net/lists/linux-rdma/msg15951.html), they decided that they wanted a clean break that forces both source code and ABI changes, which resulted in this patch.

> I personally don't care which way this goes; I just want the ability to have non-enum MTU values.

So I guess I need to go back and read all of that thread carefully,
but I don't think that silently breaking old binaries and also
breaking sources is the right way to go.  What about preserving the
behavior of the existing API / ABI and then adding a new function to
return the size of the maximum datagram for a device?

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]             ` <CAL1RGDU+4CSJyF0TAfAv-YYcjA4FG+XNeg5pGCHPeW5iqhvwpA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2013-07-08 17:26               ` Jason Gunthorpe
       [not found]                 ` <20130708172621.GA3852-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2013-07-08 17:26 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Jeff Squyres (jsquyres), linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

On Mon, Jul 08, 2013 at 10:19:33AM -0700, Roland Dreier wrote:
> [resending to reply-all, sorry Jeff]
> 
> On Mon, Jul 8, 2013 at 9:26 AM, Jeff Squyres (jsquyres)
> <jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
> >> So what happens if I have an old application binary, and I run against
> >> a new libibverbs without recompiling?
> 
> >> Also it seems that I'm forced to change my source code to be able to
> >> compile against new libibverbs?
> 
> > I previously sent an ABI-preserving version of this patch, but it was hated by Doug Ledford and (eventually) Jason Gunthorpe.
> 
> > After long discussion (see thread starting here: http://www.spinics.net/lists/linux-rdma/msg15951.html), they decided that they wanted a clean break that forces both source code and ABI changes, which resulted in this patch.
> 
> > I personally don't care which way this goes; I just want the ability to have non-enum MTU values.
> 
> So I guess I need to go back and read all of that thread carefully,
> but I don't think that silently breaking old binaries and also
> breaking sources is the right way to go.  What about preserving the
> behavior of the existing API / ABI and then adding a new function to
> return the size of the maximum datagram for a device?

It is not just a device, but the QP attrs and so on, so there would be
quite a few new core functions needed to extend the MTU, and that
flows back into the kernel interface too...

Jeff's patch doesn't break old binaries, old binaries, running with
normal IB MTUs work fine. The structure layouts all stay the same,
etc.

Old sources will need an update to support non-IB MTUs no matter what,
forcing an update via the compiler seems saner then letting them
remain silently out of date..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                 ` <20130708172621.GA3852-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2013-07-10 12:14                   ` Jeff Squyres (jsquyres)
       [not found]                     ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F70BF83-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jeff Squyres (jsquyres) @ 2013-07-10 12:14 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Roland Dreier, linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 2704 bytes --]

On Jul 8, 2013, at 1:26 PM, Jason Gunthorpe <jgunthorpe@obsidianresearch.com> wrote:

> Jeff's patch doesn't break old binaries, old binaries, running with
> normal IB MTUs work fine. The structure layouts all stay the same,
> etc.


FWIW, I did a simple test to confirm this.  I installed a stock git HEAD libibverbs into $HOME/libibverbs-HEAD and a libibverbs with the MTU patch in $HOME/libibverbs-mtu-patch.  The mlx4 driver was installed into both trees (I used some fairly old Mellanox HCAs+Dell servers for this test).

This is the base case:

-----
[5:06] dell012:~ ❯❯❯ cd libibverbs-HEAD
[5:07] dell012:~/libibverbs-HEAD ❯❯❯ ldd bin/ibv_rc_pingpong
	linux-vdso.so.1 =>  (0x00002aaaaaacb000)
	libibverbs.so.1 => /home/jsquyres/libibverbs-HEAD/lib/libibverbs.so.1 (0x00002aaaaaccd000)
	libpthread.so.0 => /lib64/libpthread.so.0 (0x00002aaaaaeec000)
	libdl.so.2 => /lib64/libdl.so.2 (0x00002aaaab109000)
	libc.so.6 => /lib64/libc.so.6 (0x00002aaaab30e000)
	/lib64/ld-linux-x86-64.so.2 (0x00002aaaaaaab000)
[5:07] dell012:~/libibverbs-HEAD ❯❯❯ ./bin/ibv_rc_pingpong dell011
  local address:  LID 0x0004, QPN 0x04004a, PSN 0xc08742, GID ::
  remote address: LID 0x0019, QPN 0x20004a, PSN 0x44c48e, GID ::
8192000 bytes in 0.02 seconds = 4170.28 Mbit/sec
1000 iters in 0.02 seconds = 15.72 usec/iter
-----

Works fine.  Now let's use the same libibverbs-HEAD rc pingpong binary, but with the MTU-patched libibverbs.so:

-----
[5:07] dell012:~/libibverbs-HEAD ❯❯❯ mv lib/libibverbs.so.1 lib/libibverbs.so.1-bogus
[5:07] dell012:~/libibverbs-HEAD ❯❯❯ export LD_LIBRARY_PATH=$HOME/libibverbs-mtu-patch/lib
[5:08] dell012:~/libibverbs-HEAD ❯❯❯ ldd bin/ibv_rc_pingpong
	linux-vdso.so.1 =>  (0x00002aaaaaacb000)
	libibverbs.so.1 => /home/jsquyres/libibverbs-mtu-patch/lib/libibverbs.so.1 (0x00002aaaaaccd000)
	libpthread.so.0 => /lib64/libpthread.so.0 (0x00002aaaaaeed000)
	libdl.so.2 => /lib64/libdl.so.2 (0x00002aaaab10a000)
	libc.so.6 => /lib64/libc.so.6 (0x00002aaaab30e000)
	/lib64/ld-linux-x86-64.so.2 (0x00002aaaaaaab000)
[5:08] dell012:~/libibverbs-HEAD ❯❯❯ ./bin/ibv_rc_pingpong dell011
  local address:  LID 0x0004, QPN 0x08004a, PSN 0x65391c, GID ::
  remote address: LID 0x0019, QPN 0x24004a, PSN 0x7d137e, GID ::
8192000 bytes in 0.02 seconds = 4163.39 Mbit/sec
1000 iters in 0.02 seconds = 15.74 usec/iter
-----

Still works fine.

-- 
Jeff Squyres
jsquyres@cisco.com
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/

N‹§²æìr¸›yúèšØb²X¬¶Ç§vØ^–)Þº{.nÇ+‰·¥Š{±­ÙšŠ{ayº\x1dʇڙë,j\a­¢f£¢·hš‹»öì\x17/oSc¾™Ú³9˜uÀ¦æå‰È&jw¨®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿïêäz¹Þ–Šàþf£¢·hšˆ§~ˆmš

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                     ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F70BF83-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
@ 2013-07-15 14:20                       ` Jeff Squyres (jsquyres)
       [not found]                         ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F71CB0F-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
  2013-07-16  8:04                       ` Hefty, Sean
  1 sibling, 1 reply; 47+ messages in thread
From: Jeff Squyres (jsquyres) @ 2013-07-15 14:20 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Roland Dreier, linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

Bump.

On Jul 10, 2013, at 8:14 AM, Jeff Squyres (jsquyres) <jsquyres@cisco.com> wrote:

> On Jul 8, 2013, at 1:26 PM, Jason Gunthorpe <jgunthorpe@obsidianresearch.com> wrote:
> 
>> Jeff's patch doesn't break old binaries, old binaries, running with
>> normal IB MTUs work fine. The structure layouts all stay the same,
>> etc.
> 
> 
> FWIW, I did a simple test to confirm this.  I installed a stock git HEAD libibverbs into $HOME/libibverbs-HEAD and a libibverbs with the MTU patch in $HOME/libibverbs-mtu-patch.  The mlx4 driver was installed into both trees (I used some fairly old Mellanox HCAs+Dell servers for this test).
> 
> This is the base case:
> 
> -----
> [5:06] dell012:~ ❯❯❯ cd libibverbs-HEAD
> [5:07] dell012:~/libibverbs-HEAD ❯❯❯ ldd bin/ibv_rc_pingpong
> 	linux-vdso.so.1 =>  (0x00002aaaaaacb000)
> 	libibverbs.so.1 => /home/jsquyres/libibverbs-HEAD/lib/libibverbs.so.1 (0x00002aaaaaccd000)
> 	libpthread.so.0 => /lib64/libpthread.so.0 (0x00002aaaaaeec000)
> 	libdl.so.2 => /lib64/libdl.so.2 (0x00002aaaab109000)
> 	libc.so.6 => /lib64/libc.so.6 (0x00002aaaab30e000)
> 	/lib64/ld-linux-x86-64.so.2 (0x00002aaaaaaab000)
> [5:07] dell012:~/libibverbs-HEAD ❯❯❯ ./bin/ibv_rc_pingpong dell011
>  local address:  LID 0x0004, QPN 0x04004a, PSN 0xc08742, GID ::
>  remote address: LID 0x0019, QPN 0x20004a, PSN 0x44c48e, GID ::
> 8192000 bytes in 0.02 seconds = 4170.28 Mbit/sec
> 1000 iters in 0.02 seconds = 15.72 usec/iter
> -----
> 
> Works fine.  Now let's use the same libibverbs-HEAD rc pingpong binary, but with the MTU-patched libibverbs.so:
> 
> -----
> [5:07] dell012:~/libibverbs-HEAD ❯❯❯ mv lib/libibverbs.so.1 lib/libibverbs.so.1-bogus
> [5:07] dell012:~/libibverbs-HEAD ❯❯❯ export LD_LIBRARY_PATH=$HOME/libibverbs-mtu-patch/lib
> [5:08] dell012:~/libibverbs-HEAD ❯❯❯ ldd bin/ibv_rc_pingpong
> 	linux-vdso.so.1 =>  (0x00002aaaaaacb000)
> 	libibverbs.so.1 => /home/jsquyres/libibverbs-mtu-patch/lib/libibverbs.so.1 (0x00002aaaaaccd000)
> 	libpthread.so.0 => /lib64/libpthread.so.0 (0x00002aaaaaeed000)
> 	libdl.so.2 => /lib64/libdl.so.2 (0x00002aaaab10a000)
> 	libc.so.6 => /lib64/libc.so.6 (0x00002aaaab30e000)
> 	/lib64/ld-linux-x86-64.so.2 (0x00002aaaaaaab000)
> [5:08] dell012:~/libibverbs-HEAD ❯❯❯ ./bin/ibv_rc_pingpong dell011
>  local address:  LID 0x0004, QPN 0x08004a, PSN 0x65391c, GID ::
>  remote address: LID 0x0019, QPN 0x24004a, PSN 0x7d137e, GID ::
> 8192000 bytes in 0.02 seconds = 4163.39 Mbit/sec
> 1000 iters in 0.02 seconds = 15.74 usec/iter
> -----
> 
> Still works fine.


-- 
Jeff Squyres
jsquyres@cisco.com
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/


^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                     ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F70BF83-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
  2013-07-15 14:20                       ` Jeff Squyres (jsquyres)
@ 2013-07-16  8:04                       ` Hefty, Sean
       [not found]                         ` <1828884A29C6694DAF28B7E6B8A82373805AAB74-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
  1 sibling, 1 reply; 47+ messages in thread
From: Hefty, Sean @ 2013-07-16  8:04 UTC (permalink / raw)
  To: Jeff Squyres (jsquyres), Jason Gunthorpe
  Cc: Roland Dreier, linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

> > Jeff's patch doesn't break old binaries, old binaries, running with
> > normal IB MTUs work fine. The structure layouts all stay the same,
> > etc.
> 
> 
> FWIW, I did a simple test to confirm this.  I installed a stock git HEAD
> libibverbs into $HOME/libibverbs-HEAD and a libibverbs with the MTU patch in
> $HOME/libibverbs-mtu-patch.  The mlx4 driver was installed into both trees (I
> used some fairly old Mellanox HCAs+Dell servers for this test).

This creates a bit of pain for anyone (like, hypothetically, say, for example, me) who wants to release source that supports both the older and newer version of libibverbs.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                         ` <1828884A29C6694DAF28B7E6B8A82373805AAB74-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2013-07-16 14:47                           ` Jason Gunthorpe
       [not found]                             ` <20130716144747.GA7304-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2013-07-16 14:47 UTC (permalink / raw)
  To: Hefty, Sean
  Cc: Jeff Squyres (jsquyres),
	Roland Dreier, linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

On Tue, Jul 16, 2013 at 08:04:08AM +0000, Hefty, Sean wrote:
> > > Jeff's patch doesn't break old binaries, old binaries, running with
> > > normal IB MTUs work fine. The structure layouts all stay the same,
> > > etc.
> > 
> > 
> > FWIW, I did a simple test to confirm this.  I installed a stock git HEAD
> > libibverbs into $HOME/libibverbs-HEAD and a libibverbs with the MTU patch in
> > $HOME/libibverbs-mtu-patch.  The mlx4 driver was installed into both trees (I
> > used some fairly old Mellanox HCAs+Dell servers for this test).
> 
> This creates a bit of pain for anyone (like, hypothetically, say,
> for example, me) who wants to release source that supports both the
> older and newer version of libibverbs.

But it is so minor. An autoconf test for the ibv_mtu_to_num function
in the headers. If it is not defined then define your own in your
private headers. All the other source remains the same, the only
change is you must call the conversion functions.

A source change is completely unvaoidable. Supporting the new MTU
values requires updated source.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                             ` <20130716144747.GA7304-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2013-07-16 17:11                               ` Jeff Squyres (jsquyres)
       [not found]                                 ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F7211E6-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jeff Squyres (jsquyres) @ 2013-07-16 17:11 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Hefty, Sean, Roland Dreier, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	Doug Ledford

On Jul 16, 2013, at 10:47 AM, Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:

> A source change is completely unvaoidable. Supporting the new MTU
> values requires updated source.


I don't really care one way or the other; I'll submit whatever patch people want.  :-)

But FWIW, I tend to believe the Doug/Jason position:

- MTU really needs to be a plain integer (not an enum)
- forcing application source change/adaptation is the safest way to move forward
- doing it this way preserves ABI, so existing binaries are safe

-- 
Jeff Squyres
jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                 ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F7211E6-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
@ 2013-07-17  0:16                                   ` Roland Dreier
       [not found]                                     ` <CAL1RGDWKmecLXsqphwdP9XG=4Y=65Z_ACSdRBhEqVQb6Q4LrUg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2013-07-17  4:06                                   ` Hefty, Sean
  1 sibling, 1 reply; 47+ messages in thread
From: Roland Dreier @ 2013-07-17  0:16 UTC (permalink / raw)
  To: Jeff Squyres (jsquyres)
  Cc: Jason Gunthorpe, Hefty, Sean, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	Doug Ledford

On Tue, Jul 16, 2013 at 10:11 AM, Jeff Squyres (jsquyres)
<jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
> - doing it this way preserves ABI, so existing binaries are safe

I still don't get this.  Wouldn't an existing binary be pretty
surprised to get a value wildly out of range of the enum?

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                 ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F7211E6-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
  2013-07-17  0:16                                   ` Roland Dreier
@ 2013-07-17  4:06                                   ` Hefty, Sean
       [not found]                                     ` <1828884A29C6694DAF28B7E6B8A82373805AECC0-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
  1 sibling, 1 reply; 47+ messages in thread
From: Hefty, Sean @ 2013-07-17  4:06 UTC (permalink / raw)
  To: Jeff Squyres (jsquyres), Jason Gunthorpe
  Cc: Roland Dreier, linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

> > A source change is completely unvaoidable. Supporting the new MTU
> > values requires updated source.
> 
> 
> I don't really care one way or the other; I'll submit whatever patch people
> want.  :-)
> 
> But FWIW, I tend to believe the Doug/Jason position:
> 
> - MTU really needs to be a plain integer (not an enum)
> - forcing application source change/adaptation is the safest way to move
> forward
> - doing it this way preserves ABI, so existing binaries are safe

I don't remember.  Is it known how the mtu is communicated with the kernel?  Looking at kern-abi.h, the mtu fields are:

struct ibv_query_port_resp {
	__u8  max_mtu;
	__u8  active_mtu;

struct ibv_kern_qp_attr {
	__u32	path_mtu;

struct ibv_query_qp_resp {
	__u8  path_mtu;

struct ibv_modify_qp {
	__u8  path_mtu;

In most cases, we only have 8 bits available to/from the kernel.  (There are at least 16 bits of reserved space in these structures.)

- Sean
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                     ` <CAL1RGDWKmecLXsqphwdP9XG=4Y=65Z_ACSdRBhEqVQb6Q4LrUg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2013-07-17 17:57                                       ` Doug Ledford
       [not found]                                         ` <51E6DB11.9050906-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Doug Ledford @ 2013-07-17 17:57 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Jeff Squyres (jsquyres),
	Jason Gunthorpe, Hefty, Sean, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 07/16/2013 08:16 PM, Roland Dreier wrote:
> On Tue, Jul 16, 2013 at 10:11 AM, Jeff Squyres (jsquyres)
> <jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
>> - doing it this way preserves ABI, so existing binaries are safe
> 
> I still don't get this.  Wouldn't an existing binary be pretty
> surprised to get a value wildly out of range of the enum?

Yes, but there's no way around that without simply lying about the MTU.
 So, the argument was made in the thread that historically, applications
have had to be modified when moved to a new link layer (aka, iWARP meant
IB apps had to be slightly modified for connection reasons, RoCE again
required some slight app modifications, etc) so this was seen as a case
of "the app will work on fabrics it already knows about, and will only
get confused if moved to this new fabric, and in that case, the app
needs to be modified anyway, so that's acceptable breakage for keeping
the apps working the rest of the time".  That was the argument anyway.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                     ` <1828884A29C6694DAF28B7E6B8A82373805AECC0-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2013-07-17 18:12                                       ` Jeff Squyres (jsquyres)
       [not found]                                         ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F725B8C-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jeff Squyres (jsquyres) @ 2013-07-17 18:12 UTC (permalink / raw)
  To: Hefty, Sean
  Cc: Jason Gunthorpe, Roland Dreier,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

On Jul 17, 2013, at 12:06 AM, "Hefty, Sean" <sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> wrote:

> I don't remember.  Is it known how the mtu is communicated with the kernel?  

I hadn't looked at the kernel side yet; I was waiting for the userspace side to sort itself out first.

> Looking at kern-abi.h, the mtu fields are:
> 
> struct ibv_query_port_resp {
> 	__u8  max_mtu;
> 	__u8  active_mtu;
> 
> struct ibv_kern_qp_attr {
> 	__u32	path_mtu;
> 
> struct ibv_query_qp_resp {
> 	__u8  path_mtu;
> 
> struct ibv_modify_qp {
> 	__u8  path_mtu;
> 
> In most cases, we only have 8 bits available to/from the kernel.  (There are at least 16 bits of reserved space in these structures.)

Hmm.  16 bits is probably enough for the MTU values, but still, changing kern-abi.h will be problematic from an ABI perspective.  Do people care about the kernel ABI, or is that mainly a userspace issue?

-- 
Jeff Squyres
jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                         ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F725B8C-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
@ 2013-07-17 21:41                                           ` Hefty, Sean
       [not found]                                             ` <1828884A29C6694DAF28B7E6B8A82373805B941D-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Hefty, Sean @ 2013-07-17 21:41 UTC (permalink / raw)
  To: Jeff Squyres (jsquyres), Steve Wise
  Cc: Jason Gunthorpe, Roland Dreier,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

> I hadn't looked at the kernel side yet; I was waiting for the userspace side to
> sort itself out first.

I think it makes sense to start with how user space can get the data.  Without eating up reserved fields, we're starting with 8 bit values.

> Hmm.  16 bits is probably enough for the MTU values, but still, changing kern-
> abi.h will be problematic from an ABI perspective.  Do people care about the
> kernel ABI, or is that mainly a userspace issue?

Well, we definitely care about the kernel to user ABI.

I can't imagine that we're dealing with more than a handful of actual MTU values.  Maybe the simplest thing is to extend the mtu enum to include what new values are needed, plus add a function to convert it.  (Can we call mulligan?)

I don't know how iwarp handles this.  Does it just report the wrong mtu, since it doesn't necessarily matter?  Steve - any idea here?

- Sean
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                             ` <1828884A29C6694DAF28B7E6B8A82373805B941D-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2013-07-17 21:44                                               ` Steve Wise
       [not found]                                                 ` <51E71023.2060006-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Steve Wise @ 2013-07-17 21:44 UTC (permalink / raw)
  To: Hefty, Sean
  Cc: Jeff Squyres (jsquyres),
	Jason Gunthorpe, Roland Dreier,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

On 7/17/2013 4:41 PM, Hefty, Sean wrote:
>> I hadn't looked at the kernel side yet; I was waiting for the userspace side to
>> sort itself out first.
> I think it makes sense to start with how user space can get the data.  Without eating up reserved fields, we're starting with 8 bit values.
>
>> Hmm.  16 bits is probably enough for the MTU values, but still, changing kern-
>> abi.h will be problematic from an ABI perspective.  Do people care about the
>> kernel ABI, or is that mainly a userspace issue?
> Well, we definitely care about the kernel to user ABI.
>
> I can't imagine that we're dealing with more than a handful of actual MTU values.  Maybe the simplest thing is to extend the mtu enum to include what new values are needed, plus add a function to convert it.  (Can we call mulligan?)
>
> I don't know how iwarp handles this.  Does it just report the wrong mtu, since it doesn't necessarily matter?  Steve - any idea here?

The iwarp drivers just report the nearest mtu enum.  Apps don't need it 
for iwarp like they do for ib.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                                 ` <51E71023.2060006-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
@ 2013-07-18  2:32                                                   ` Jeff Squyres (jsquyres)
       [not found]                                                     ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F727D11-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jeff Squyres (jsquyres) @ 2013-07-18  2:32 UTC (permalink / raw)
  To: Steve Wise
  Cc: Hefty, Sean, Jason Gunthorpe, Roland Dreier,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

On Jul 17, 2013, at 5:44 PM, Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org> wrote:

> The iwarp drivers just report the nearest mtu enum.  Apps don't need it for iwarp like they do for ib.


For RC, it doesn't matter much.  So the fact that RoCE and iWARP lie about their MTU isn't a huge deal.  It's wrong, but it doesn't matter much.

We need it for UD for our upcoming device, however, because the MTU is the only way to get the max message size.

-- 
Jeff Squyres
jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                                     ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F727D11-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
@ 2013-07-18 16:50                                                       ` Jason Gunthorpe
       [not found]                                                         ` <20130718165003.GB9237-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2013-07-18 16:50 UTC (permalink / raw)
  To: Jeff Squyres (jsquyres)
  Cc: Steve Wise, Hefty, Sean, Roland Dreier,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

On Thu, Jul 18, 2013 at 02:32:05AM +0000, Jeff Squyres (jsquyres) wrote:
> On Jul 17, 2013, at 5:44 PM, Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org> wrote:
> 
> > The iwarp drivers just report the nearest mtu enum.  Apps don't need it for iwarp like they do for ib.

> For RC, it doesn't matter much.  So the fact that RoCE and iWARP lie
> about their MTU isn't a huge deal.  It's wrong, but it doesn't
> matter much.
> 
> We need it for UD for our upcoming device, however, because the MTU
> is the only way to get the max message size.

.. and UD is the least abstracted transport, so existing apps won't
support Jeff's new NIC anyhow, MTU is the least of their problems.

Existing apps with existing transports see the same old values.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                                         ` <20130718165003.GB9237-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2013-07-23 13:26                                                           ` Jeff Squyres (jsquyres)
       [not found]                                                             ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F749B82-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jeff Squyres (jsquyres) @ 2013-07-23 13:26 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Steve Wise, Hefty, Sean, Roland Dreier,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

On Jul 18, 2013, at 12:50 PM, Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:

>> We need it for UD for our upcoming device, however, because the MTU
>> is the only way to get the max message size.
> 
> .. and UD is the least abstracted transport, so existing apps won't
> support Jeff's new NIC anyhow, MTU is the least of their problems.
> 
> Existing apps with existing transports see the same old values.


...so how do we move forward?

-- 
Jeff Squyres
jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                                             ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F749B82-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
@ 2013-07-30 12:59                                                               ` Jeff Squyres (jsquyres)
  0 siblings, 0 replies; 47+ messages in thread
From: Jeff Squyres (jsquyres) @ 2013-07-30 12:59 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Steve Wise, Hefty, Sean, Roland Dreier,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

On Jul 23, 2013, at 9:26 AM, Jeff Squyres (jsquyres) <jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:

>> .. and UD is the least abstracted transport, so existing apps won't
>> support Jeff's new NIC anyhow, MTU is the least of their problems.
>> 
>> Existing apps with existing transports see the same old values.


Bump.

-- 
Jeff Squyres
jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                         ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F71CB0F-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
@ 2013-07-30 16:44                           ` Christoph Lameter
       [not found]                             ` <0000014030779fed-e7bdfc9a-6714-4473-b093-4b247ab940b6-000000-p/GC64/jrecnJqMo6gzdpkEOCMrvLtNR@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Christoph Lameter @ 2013-07-30 16:44 UTC (permalink / raw)
  To: Jeff Squyres (jsquyres)
  Cc: Jason Gunthorpe, Roland Dreier,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

On Mon, 15 Jul 2013, Jeff Squyres (jsquyres) wrote:

> Bump.

What in the world does that mean? I am an oldtimer I guess. Seems that
this is something that can be done in the newfangled forum? How does this
affect mailing lists?

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                             ` <0000014030779fed-e7bdfc9a-6714-4473-b093-4b247ab940b6-000000-p/GC64/jrecnJqMo6gzdpkEOCMrvLtNR@public.gmane.org>
@ 2013-07-30 17:21                               ` Jeff Squyres (jsquyres)
       [not found]                                 ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F7883AD-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jeff Squyres (jsquyres) @ 2013-07-30 17:21 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Jason Gunthorpe, Roland Dreier,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

On Jul 30, 2013, at 12:44 PM, Christoph Lameter <cl-gkYfJU5Cukgdnm+yROfE0A@public.gmane.org> wrote:

> What in the world does that mean? I am an oldtimer I guess. Seems that
> this is something that can be done in the newfangled forum? How does this
> affect mailing lists?


I'm not sure what you're asking me; please see the prior posts on this thread that describes the MTU issue and why we still need a solution.

-- 
Jeff Squyres
jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                 ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F7883AD-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
@ 2013-07-30 18:31                                   ` Christoph Lameter
       [not found]                                     ` <0000014030d9bc3d-2916693b-9669-4017-a724-75b51c6dc3dc-000000-p/GC64/jrecnJqMo6gzdpkEOCMrvLtNR@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Christoph Lameter @ 2013-07-30 18:31 UTC (permalink / raw)
  To: Jeff Squyres (jsquyres)
  Cc: Jason Gunthorpe, Roland Dreier,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

On Tue, 30 Jul 2013, Jeff Squyres (jsquyres) wrote:

> On Jul 30, 2013, at 12:44 PM, Christoph Lameter <cl-gkYfJU5Cukgdnm+yROfE0A@public.gmane.org> wrote:
>
> > What in the world does that mean? I am an oldtimer I guess. Seems that
> > this is something that can be done in the newfangled forum? How does this
> > affect mailing lists?
>
>
> I'm not sure what you're asking me; please see the prior posts on this
> thread that describes the MTU issue and why we still need a solution.

What does "bump" mean? You keep sending replies that just says "bump".
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                     ` <0000014030d9bc3d-2916693b-9669-4017-a724-75b51c6dc3dc-000000-p/GC64/jrecnJqMo6gzdpkEOCMrvLtNR@public.gmane.org>
@ 2013-07-30 18:45                                       ` Atchley, Scott
       [not found]                                         ` <17370FDF-C64B-4D74-A848-79C1438E6283-1Heg1YXhbW8@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Atchley, Scott @ 2013-07-30 18:45 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Jeff Squyres (jsquyres),
	Jason Gunthorpe, Roland Dreier,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford

On Jul 30, 2013, at 2:31 PM, Christoph Lameter <cl-gkYfJU5Cukgdnm+yROfE0A@public.gmane.org> wrote:

> On Tue, 30 Jul 2013, Jeff Squyres (jsquyres) wrote:
> 
>> On Jul 30, 2013, at 12:44 PM, Christoph Lameter <cl-gkYfJU5Cukgdnm+yROfE0A@public.gmane.org> wrote:
>> 
>>> What in the world does that mean? I am an oldtimer I guess. Seems that
>>> this is something that can be done in the newfangled forum? How does this
>>> affect mailing lists?
>> 
>> 
>> I'm not sure what you're asking me; please see the prior posts on this
>> thread that describes the MTU issue and why we still need a solution.
> 
> What does "bump" mean? You keep sending replies that just says "bump".

http://en.wikipedia.org/wiki/Internet_forum#Thread

"When a member posts in a thread it will jump to the top since it is the latest updated thread. Similarly, other threads will jump in front of it when they receive posts. When a member posts in a thread for no reason but to have it go to the top, it is referred to as a bump or bumping."

He is trying to bring it back to everyone's attention.

Scott

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                         ` <17370FDF-C64B-4D74-A848-79C1438E6283-1Heg1YXhbW8@public.gmane.org>
@ 2013-07-30 18:47                                           ` Doug Ledford
  0 siblings, 0 replies; 47+ messages in thread
From: Doug Ledford @ 2013-07-30 18:47 UTC (permalink / raw)
  To: Atchley, Scott
  Cc: Christoph Lameter, Jeff Squyres (jsquyres),
	Jason Gunthorpe, Roland Dreier,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 07/30/2013 02:45 PM, Atchley, Scott wrote:
> http://en.wikipedia.org/wiki/Internet_forum#Thread
> 
> "When a member posts in a thread it will jump to the top since it is
> the latest updated thread. Similarly, other threads will jump in
> front of it when they receive posts. When a member posts in a thread
> for no reason but to have it go to the top, it is referred to as a
> bump or bumping."
> 
> He is trying to bring it back to everyone's attention.

Exactly.  On a mailing list like this, it's shorthand for "You seem to
have dropped this on the floor, let me resend it to you".

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                         ` <51E6DB11.9050906-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2013-08-05 18:48                                           ` Roland Dreier
       [not found]                                             ` <CAL1RGDVuuQU-NZ2o+T0_kuxPMRadGPeqzO1J9iuW=DRuFMMYfQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Roland Dreier @ 2013-08-05 18:48 UTC (permalink / raw)
  To: Doug Ledford
  Cc: Jeff Squyres (jsquyres),
	Jason Gunthorpe, Hefty, Sean, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Wed, Jul 17, 2013 at 10:57 AM, Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
>>> - doing it this way preserves ABI, so existing binaries are safe

>> I still don't get this.  Wouldn't an existing binary be pretty
>> surprised to get a value wildly out of range of the enum?

> Yes, but there's no way around that without simply lying about the MTU.
>  So, the argument was made in the thread that historically, applications
> have had to be modified when moved to a new link layer (aka, iWARP meant
> IB apps had to be slightly modified for connection reasons, RoCE again
> required some slight app modifications, etc) so this was seen as a case
> of "the app will work on fabrics it already knows about, and will only
> get confused if moved to this new fabric, and in that case, the app
> needs to be modified anyway, so that's acceptable breakage for keeping
> the apps working the rest of the time".  That was the argument anyway.

So I've been thinking about this for a while and this doesn't seem
like the right tradeoff to me.  If we break source compatibility, then
everyone needs to add some annoying #ifdef-ery (and configure tests
etc), even if they don't care about this new fabric.  And by *not*
breaking binary compatibility, we leave the risk of old binaries
misbehaving.

Wouldn't the following be a better path forward:

 - Add a new API (ibv_get_max_datagram_size() or some such) that
returns the real value as an int, based on the true MTU
 - Have old query verbs continue to return only old MTU values, but
deprecate that field with the idea of removing it in a few years

We can implement this however is most convenient.  If we want to
preserve the kernel-user ABI (though it's not clear how to do that)
then we can have code in libibverbs that implements all the necessary
compatibility that makes sure the MTU enum field is one of the old
enums, etc.  Adding a new function to libibverbs is easy and safe, and
the only case where things go wrong (if we add compatibility code in
libibverbs) is a new kernel with a new device, but old libibverbs.

But as Sean pointed out, it's not clear how we expect to return
integer MTUs via the existing kernel-user ABI anyway, and we should
really at least figure that out before we risk breaking userspace for
no good reason.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                             ` <CAL1RGDVuuQU-NZ2o+T0_kuxPMRadGPeqzO1J9iuW=DRuFMMYfQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2013-08-05 19:02                                               ` Jason Gunthorpe
       [not found]                                                 ` <20130805190238.GA14356-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2013-08-05 19:02 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Doug Ledford, Jeff Squyres (jsquyres),
	Hefty, Sean, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Mon, Aug 05, 2013 at 11:48:20AM -0700, Roland Dreier wrote:
> On Wed, Jul 17, 2013 at 10:57 AM, Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
> >>> - doing it this way preserves ABI, so existing binaries are safe
> 
> >> I still don't get this.  Wouldn't an existing binary be pretty
> >> surprised to get a value wildly out of range of the enum?
> 
> > Yes, but there's no way around that without simply lying about the MTU.
> >  So, the argument was made in the thread that historically, applications
> > have had to be modified when moved to a new link layer (aka, iWARP meant
> > IB apps had to be slightly modified for connection reasons, RoCE again
> > required some slight app modifications, etc) so this was seen as a case
> > of "the app will work on fabrics it already knows about, and will only
> > get confused if moved to this new fabric, and in that case, the app
> > needs to be modified anyway, so that's acceptable breakage for keeping
> > the apps working the rest of the time".  That was the argument anyway.
> 
> So I've been thinking about this for a while and this doesn't seem
> like the right tradeoff to me.  If we break source compatibility, then
> everyone needs to add some annoying #ifdef-ery (and configure tests
> etc), even if they don't care about this new fabric.  And by *not*
> breaking binary compatibility, we leave the risk of old binaries
> misbehaving.

It isn't that hard. One ifdef and configure test to provide the two
conversion functions for old verbs completely takes care of it.

Most apps never touch that field anyhow.

> Wouldn't the following be a better path forward:
> 
>  - Add a new API (ibv_get_max_datagram_size() or some such) that
> returns the real value as an int, based on the true MTU
>  - Have old query verbs continue to return only old MTU values, but
> deprecate that field with the idea of removing it in a few years

It isn't that easy, one API certainly doesn't cover it. The MTU is in
two structs:

struct ibv_port_attr
struct ibv_qp_attr

Which touch ibv_query_port, ibv_modify_qp and ibv_query_qp. All of
which need to be changed, and you can't change the _qp functions just
by adding a new call.

> But as Sean pointed out, it's not clear how we expect to return
> integer MTUs via the existing kernel-user ABI anyway, and we should
> really at least figure that out before we risk breaking userspace for
> no good reason.

It would be good to understand this, but maybe it is just a new field
in a bigger structure via the extendable call mechanism like Or has
been working on..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                                 ` <20130805190238.GA14356-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2013-08-05 20:06                                                   ` Roland Dreier
       [not found]                                                     ` <CAL1RGDWPHWzAWx7XXxGGCVQQ1xHU6QDVybrttnvoqUPrzqzvWA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Roland Dreier @ 2013-08-05 20:06 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Doug Ledford, Jeff Squyres (jsquyres),
	Hefty, Sean, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Mon, Aug 5, 2013 at 12:02 PM, Jason Gunthorpe
<jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:
>> Wouldn't the following be a better path forward:
>>
>>  - Add a new API (ibv_get_max_datagram_size() or some such) that
>> returns the real value as an int, based on the true MTU
>>  - Have old query verbs continue to return only old MTU values, but
>> deprecate that field with the idea of removing it in a few years

> It isn't that easy, one API certainly doesn't cover it. The MTU is in
> two structs:

> struct ibv_port_attr
> struct ibv_qp_attr

> Which touch ibv_query_port, ibv_modify_qp and ibv_query_qp. All of
> which need to be changed, and you can't change the _qp functions just
> by adding a new call.

Well, for full generality I agree we would have to handle the QP
stuff.  But for now at least the only QPs that need (or allow) MTU to
be set are connected (RC, UC and XRC) QPs.  The current motivation for
allowing extended MTU is for datagrams.  So strictly speaking we only
need to deal with the query port verb.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                                     ` <CAL1RGDWPHWzAWx7XXxGGCVQQ1xHU6QDVybrttnvoqUPrzqzvWA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2013-08-05 22:22                                                       ` Jason Gunthorpe
       [not found]                                                         ` <20130805222250.GA4486-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2013-08-05 22:22 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Doug Ledford, Jeff Squyres (jsquyres),
	Hefty, Sean, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Mon, Aug 05, 2013 at 01:06:36PM -0700, Roland Dreier wrote:
> On Mon, Aug 5, 2013 at 12:02 PM, Jason Gunthorpe
> <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:
> >> Wouldn't the following be a better path forward:
> >>
> >>  - Add a new API (ibv_get_max_datagram_size() or some such) that
> >> returns the real value as an int, based on the true MTU
> >>  - Have old query verbs continue to return only old MTU values, but
> >> deprecate that field with the idea of removing it in a few years
> 
> > It isn't that easy, one API certainly doesn't cover it. The MTU is in
> > two structs:
> 
> > struct ibv_port_attr
> > struct ibv_qp_attr
> 
> > Which touch ibv_query_port, ibv_modify_qp and ibv_query_qp. All of
> > which need to be changed, and you can't change the _qp functions just
> > by adding a new call.
> 
> Well, for full generality I agree we would have to handle the QP
> stuff.  But for now at least the only QPs that need (or allow) MTU to
> be set are connected (RC, UC and XRC) QPs.  The current motivation for
> allowing extended MTU is for datagrams.  So strictly speaking we only
> need to deal with the query port verb.

Okay, but if you want to narrow things to just be Jeff's application
like that, then do we even need this to be part of verbs?

IP path MTU should be discovered by a netlink routing query to lookup
the target IP(s), the device maximum should not really be used by
apps..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                                         ` <20130805222250.GA4486-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2013-08-05 22:35                                                           ` Roland Dreier
       [not found]                                                             ` <CAL1RGDW5V_N62LSZa1YRG1JA1fOf+nbQwBG1xQ7yYiuF6z0fMw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Roland Dreier @ 2013-08-05 22:35 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Doug Ledford, Jeff Squyres (jsquyres),
	Hefty, Sean, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Mon, Aug 5, 2013 at 3:22 PM, Jason Gunthorpe
<jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:
> Okay, but if you want to narrow things to just be Jeff's application
> like that, then do we even need this to be part of verbs?
>
> IP path MTU should be discovered by a netlink routing query to lookup
> the target IP(s), the device maximum should not really be used by
> apps..

Jeff's case is not an IP transport.  Also seems a bit unfriendly to
make apps that just want to use verbs to have to figure out which
netdev to ask about.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU
       [not found]                                                             ` <CAL1RGDW5V_N62LSZa1YRG1JA1fOf+nbQwBG1xQ7yYiuF6z0fMw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2013-08-05 22:43                                                               ` Jason Gunthorpe
  0 siblings, 0 replies; 47+ messages in thread
From: Jason Gunthorpe @ 2013-08-05 22:43 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Doug Ledford, Jeff Squyres (jsquyres),
	Hefty, Sean, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Mon, Aug 05, 2013 at 03:35:22PM -0700, Roland Dreier wrote:
> On Mon, Aug 5, 2013 at 3:22 PM, Jason Gunthorpe
> <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:
> > Okay, but if you want to narrow things to just be Jeff's application
> > like that, then do we even need this to be part of verbs?
> >
> > IP path MTU should be discovered by a netlink routing query to lookup
> > the target IP(s), the device maximum should not really be used by
> > apps..
> 
> Jeff's case is not an IP transport.  

Oh, I'm confused then..

> Also seems a bit unfriendly to make apps that just want to use verbs
> to have to figure out which netdev to ask about.

Right, but we solved that by hiding that in the RDMA CM for IB cases,
presumably other cases using IP would need a similar shim.

But.. wait, if it isn't IP, then aren't there going to be other
changes required for a new address family? Will the device struct and
QP struct survive that unchanged? eg, can we roll the MTU change with
other required changes?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found]                                         ` <20130621212016.GA10637-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2013-06-21 21:56                                           ` Hefty, Sean
@ 2013-06-22 13:13                                           ` Jeff Squyres (jsquyres)
  1 sibling, 0 replies; 47+ messages in thread
From: Jeff Squyres (jsquyres) @ 2013-06-22 13:13 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Doug Ledford, <linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>

On Jun 21, 2013, at 5:20 PM, Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org> wrote:

> Jeff: If you are still reading -

I am still reading, just didn't have much to contribute until now.  :-)

> one concrete suggestion, I think, is
> to ensure compile-time failure when the new-format MTU variable is
> touched.  This is trivially done by wrapping it in a struct:
> 
> struct ibv_mtu_t {int __mtu;};

Sure, I can work up a patch that does this.

Do others agree?  Roland?

-- 
Jeff Squyres
jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org
For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found]                                         ` <20130621212016.GA10637-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2013-06-21 21:56                                           ` Hefty, Sean
  2013-06-22 13:13                                           ` Jeff Squyres (jsquyres)
  1 sibling, 0 replies; 47+ messages in thread
From: Hefty, Sean @ 2013-06-21 21:56 UTC (permalink / raw)
  To: Jason Gunthorpe, Doug Ledford
  Cc: Jeff Squyres, linux-rdma-u79uwXL29TY76Z2rM5mHXA

> > <raises hand>  I'm ready for a new project.  I already had one in mind,
> > but it required some changes to libibverbs that are pretty deep changes.
> >  Instead of doing that deep work in libibverbs, I can start here and
> > move on to my next project when this is up and running.
> 
> That would be amazing, I will try to help you as best my limited time
> permits.

I've given this some thought recently as well.  The concepts that I envision would be fairly significant relative to the current API, but build off the libibverbs framework and encapsulate all of the kernel ABI as well.  The obvious goal is to avoid introducing any performance impact.  Working down to something concrete would definitely take some time.

- Sean
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found]                                     ` <51C4BE25.9000501-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2013-06-21 21:20                                       ` Jason Gunthorpe
       [not found]                                         ` <20130621212016.GA10637-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2013-06-21 21:20 UTC (permalink / raw)
  To: Doug Ledford; +Cc: Jeff Squyres, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Fri, Jun 21, 2013 at 04:57:09PM -0400, Doug Ledford wrote:

> > I disagree, *verbs* needs to expose the HW capabilities, efficiently.
> 
> What we need is an efficient RDMA API (of which verbs is a candidate for
> the underlying implementation).  It need not expose HW capabilities in
> its base form.

Well, there is uDAPL, but lots of people don't like it...

> > K.. So, nobody seems to be paid to work on these verbs libraries, what
> > you are talking about is a tonne of typing - who is going to do
> > this?
> 
> <raises hand>  I'm ready for a new project.  I already had one in mind,
> but it required some changes to libibverbs that are pretty deep changes.
>  Instead of doing that deep work in libibverbs, I can start here and
> move on to my next project when this is up and running.

That would be amazing, I will try to help you as best my limited time
permits.

> Realistically I'm talking about starting off by cannibalizing
> libibverbs, librdmacm, and libibcm into a single library.  That's a lot
> of cut and paste work, with ensuing fixups.  After the initial bring
> together, then we can start re-architecting and moving forward.

The core umad and the issm buisness should be included as well, eg
single core library API that wraps the entire API the kernel exports
from drivers/infiniband.

I've done some elements of this in my python-rdma project.

> If they wish to keep getting their infrastructure for free, then they
> kinda get what they get.  I mean, you don't want to just change things
> and piss them off for no good reason, but when good reason exists, they
> can suck it up and deal with it.  No one in the open source community is
> obligated to preserve an outdated API forever just to make them happy.
> The other alternative is that they can pick up long term maintenance of
> libibverbs, librdmacm, and libibcm themselves (in fairness I can't
> say

A fact of things right now is that the ISVs are contributing a
majority of the manpower for changes to the API.

> As I mentioned, this patch is just another in a line of kludges.  That
> doesn't make it wrong, it makes it ugly.  I'm for breaking the cycle.
> But even if we do go ahead and break the cycle, that will take more time
> than I imagine Jeff is willing to wait.  So in the meantime, I guess we
> just gotta do what we just gotta do...

K..

Jeff: If you are still reading - one concrete suggestion, I think, is
to ensure compile-time failure when the new-format MTU variable is
touched.

This is trivially done by wrapping it in a struct:

struct ibv_mtu_t {int __mtu;};

At least this will cause compile failure at all sites that need
revision.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found]                                 ` <20130621182617.GA17700-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2013-06-21 20:57                                   ` Doug Ledford
       [not found]                                     ` <51C4BE25.9000501-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Doug Ledford @ 2013-06-21 20:57 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Jeff Squyres, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 06/21/2013 02:26 PM, Jason Gunthorpe wrote:
> On Fri, Jun 21, 2013 at 01:36:01PM -0400, Doug Ledford wrote:
> 
>>>>> The new transports have new requirements, and the apps have new
>>>>> required behaviors - the API simply can't hide all this in every
>>>>> case. The changes before had nothing to do with MTU, FWIW.
>>>>
>>>> It demonstrates what I would call a leakage between layer 2 and higher
>>>> layer APIs though.
>>>
>>> What do you mean? Verbs is intended to expose transport specific
>>> behaviors at a very bare-metal level. The fact there are wide
>>> variations between the transports doesn't reflect a fault in verbs.
>>
>> When verbs only had to worry about IB, this made sense.  When we added
>> iWARP, IBoE, and now usNIC, it no longer does.  Verbs *needs* to be
>> transport agnostic.  Then a person should do non-agnostic things using
>> extension libraries or raw packet mode, similar to how transport
>> non-agnostic things are done for the sockets API.
> 
> I disagree, *verbs* needs to expose the HW capabilities, efficiently.

What we need is an efficient RDMA API (of which verbs is a candidate for
the underlying implementation).  It need not expose HW capabilities in
its base form.

> .. and what is this about 'extension libraries'!? rdmacm *IS* an
> extension library and here we are talking about the problems that
> causes.

It should not be.  There is no such thing as RDMA communications without
connection establishment.  It isn't really an extension, it's part of
the core.

> You want more extension libraries?

Architecturally different than the libibverbs/librdmacm mess, but yes
(think extensions that the core library offers to the user space app,
not that the user space app links against and must be kept in sync with
the core library).

> .. and raw packet mode is entirely a non-starter for controller HW
> assisted offload features.
> 
> Like I said before, these things are getting pushed into verbs because
> they don't fit in the sockets model. You really can't use sockets as a
> template for solving problems that sockets have already failed to solve
> :P

I'm not using sockets as a model.  I couldn't care less about them.  I
want a generic RDMA model and sockets does not work well for that (not
at all really, unless you just want to neuter RDMA capabilities).

>> I wouldn't care about verbs being transport agnostic if we already
>> had a reasonable transport agnostic API for RDMA usage that allowed
>> all of the base verbs to be used.  I don't see that from the
>> examples you list above.
> 
> There is some stuff in the latest rdma-cm library that is getting
> close to this, but you are broadly right. It is a hard problem,
> especially when new transports don't support many of the verbs.

However, given a minimal set of functioning verbs, some of the other
verbs can be emulated.

>> Except in the context of, as Sean picked up on, my suggestion that
>> reworking the API split a bit and bringing these highly related items
>> under one umbrella.  
> 
> I agree that combining the core libraries would simplify the
> maintenance, but it you are still talking about adding symbol versions
> to a big wack of APIs, and that still leaves non-core helper libraries
> out in the cold.

Depends on how you layer things as I mentioned above.

> Again, the problems are not caused by the split up libraries.

No, it partially is exactly caused by this.

> The
> problems are inherent to how the verbs API works.

Nothing is inherent to the API, only it's current implementation.

> Joining the
> libraries reduces the 'impact surface' but doesn't fundamentally solve
> the problem. Perhaps that is pragmatically '99%' good enough, as you
> say..
> 
> Admittedly, this seems to be how other projects use symbol versions,
> and I don't hear alot of screaming from other camps, but on the other
> hand, verbs has a larger than normal binary-only ISV user base..
> 
> .. and symbol versions do work well if an entirely contained ISV build
> tries to link to a set of self-consistent system libraries.
> 
>> You wouldn't expect to link against glibc for read/write/socket, and
>> against a different library for listen/accept, yet that's what we do
>> in rdma land.  We have an artificial split that doesn't make sense
>> and it causes these problems.
> 
> Have you used SCTP? Some calls are part of glibc, some are part of
> libsctp.. For instance listen is in glibc, and sctp_send is in
> libsctp. It isn't entirely unprecedented.

Not-unprecedented still doesn't mean "good idea" ;-)

>>>> So I hear you that people object to breaking the API for a new library
>>>> version.  My objection (which I'm sure I'll be overruled on) is that
>>>> people are taking the easy way out instead of fixing things up the right
>>>> way.
>>>
>>> So what is the 'right way' here? I'm not hearing any problem free
>>> solution.
>>
>> Who ever said that the right solution is either A) problem free or B)
>> always easy?  If you're looking for a for-free solution, don't bother
>> asking me.  As I outlined above, it seems to me that you can catch 99%
>> of the target audience with a limited set of targeted provider stack
>> updates.  This is totally doable, although not necessarily easy.  The
>> remaining code will have to be done by the end users.  If people think
>> that's too much work to fix the mess things are in currently, my
>> response would be "Suck it up, cupcake!  It needs done."  You could
>> even
> 
> K.. So, nobody seems to be paid to work on these verbs libraries, what
> you are talking about is a tonne of typing - who is going to do
> this?

<raises hand>  I'm ready for a new project.  I already had one in mind,
but it required some changes to libibverbs that are pretty deep changes.
 Instead of doing that deep work in libibverbs, I can start here and
move on to my next project when this is up and running.

Realistically I'm talking about starting off by cannibalizing
libibverbs, librdmacm, and libibcm into a single library.  That's a lot
of cut and paste work, with ensuing fixups.  After the initial bring
together, then we can start re-architecting and moving forward.

Since that cut and paste work will take a while, that also gives some
time for us to discuss what things might look like before starting that
"tonne of typing" as you called it ;-)

> I already tried pretty hard to convince some ISVs that it was OK to
> rev the SONAME and the extensions could be handled with new symbols,
> and I didn't get too far. :(

If they wish to keep getting their infrastructure for free, then they
kinda get what they get.  I mean, you don't want to just change things
and piss them off for no good reason, but when good reason exists, they
can suck it up and deal with it.  No one in the open source community is
obligated to preserve an outdated API forever just to make them happy.
The other alternative is that they can pick up long term maintenance of
libibverbs, librdmacm, and libibcm themselves (in fairness I can't say
that...Roland may think it is a horrible idea to put connection
management into the base verbs library and just want to keep on going
the way things are going, ditto to Sean, I obviously can't speak for
other people...but if we write a new library and the other maintainers
of the older libraries think the new one is the right path forward and
decide to quite updating the old ones, then you end up in this
situation).  They wouldn't be left with no choice, just with no choice
that includes someone providing them with ongoing feature enhancements
to an increasingly inappropriate and unmaintainable mess of an API for free.

> In the mean time, what about Jeff's problem?

We did get way off the point ;-)

As I mentioned, this patch is just another in a line of kludges.  That
doesn't make it wrong, it makes it ugly.  I'm for breaking the cycle.
But even if we do go ahead and break the cycle, that will take more time
than I imagine Jeff is willing to wait.  So in the meantime, I guess we
just gotta do what we just gotta do...

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found]                             ` <51C48F01.5030103-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2013-06-21 18:26                               ` Jason Gunthorpe
       [not found]                                 ` <20130621182617.GA17700-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2013-06-21 18:26 UTC (permalink / raw)
  To: Doug Ledford; +Cc: Jeff Squyres, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Fri, Jun 21, 2013 at 01:36:01PM -0400, Doug Ledford wrote:

> >>> The new transports have new requirements, and the apps have new
> >>> required behaviors - the API simply can't hide all this in every
> >>> case. The changes before had nothing to do with MTU, FWIW.
> >>
> >> It demonstrates what I would call a leakage between layer 2 and higher
> >> layer APIs though.
> > 
> > What do you mean? Verbs is intended to expose transport specific
> > behaviors at a very bare-metal level. The fact there are wide
> > variations between the transports doesn't reflect a fault in verbs.
> 
> When verbs only had to worry about IB, this made sense.  When we added
> iWARP, IBoE, and now usNIC, it no longer does.  Verbs *needs* to be
> transport agnostic.  Then a person should do non-agnostic things using
> extension libraries or raw packet mode, similar to how transport
> non-agnostic things are done for the sockets API.

I disagree, *verbs* needs to expose the HW capabilities, efficiently.

.. and what is this about 'extension libraries'!? rdmacm *IS* an
extension library and here we are talking about the problems that
causes. You want more extension libraries?

.. and raw packet mode is entirely a non-starter for controller HW
assisted offload features.

Like I said before, these things are getting pushed into verbs because
they don't fit in the sockets model. You really can't use sockets as a
template for solving problems that sockets have already failed to solve
:P

> I wouldn't care about verbs being transport agnostic if we already
> had a reasonable transport agnostic API for RDMA usage that allowed
> all of the base verbs to be used.  I don't see that from the
> examples you list above.

There is some stuff in the latest rdma-cm library that is getting
close to this, but you are broadly right. It is a hard problem,
especially when new transports don't support many of the verbs.

> > The packaging tool still doesn't solve the problem I outlined. A
> > correctly packaged app, built for verbs v1.0 will still be installable
> > on a system with verbs v1.1, and the same inter-library problems I
> > described with symbol versioning in verbs will still show up.
> 
> Not if you also version the other libraries (and I admit you are getting
> into a lot of work here, but what I'm referring to is when you rev
> ibv_get_device_list for the new ibv_device_attr struct, you also rev
> rdma_get_device_list in the same way, keeping a back compat entry in
> rdmacm as well).

Yes, that is right, plus actually making compat calls across the two
library boundaries and other complexity. It is certainly a big job.

> >> This isn't a problem if library A doesn't call into library B and try to
> >> use the same struct as the app itself when the app calls into library B.
> > 
> > K, but they do, there are good reasons why they do, and saying "don't
> > do that" is really not helpful.
> 
> Except in the context of, as Sean picked up on, my suggestion that
> reworking the API split a bit and bringing these highly related items
> under one umbrella.  

I agree that combining the core libraries would simplify the
maintenance, but it you are still talking about adding symbol versions
to a big wack of APIs, and that still leaves non-core helper libraries
out in the cold.

Again, the problems are not caused by the split up libraries. The
problems are inherent to how the verbs API works. Joining the
libraries reduces the 'impact surface' but doesn't fundamentally solve
the problem. Perhaps that is pragmatically '99%' good enough, as you
say..

Admittedly, this seems to be how other projects use symbol versions,
and I don't hear alot of screaming from other camps, but on the other
hand, verbs has a larger than normal binary-only ISV user base..

.. and symbol versions do work well if an entirely contained ISV build
tries to link to a set of self-consistent system libraries.

> You wouldn't expect to link against glibc for read/write/socket, and
> against a different library for listen/accept, yet that's what we do
> in rdma land.  We have an artificial split that doesn't make sense
> and it causes these problems.

Have you used SCTP? Some calls are part of glibc, some are part of
libsctp.. For instance listen is in glibc, and sctp_send is in
libsctp. It isn't entirely unprecedented.

> >> So I hear you that people object to breaking the API for a new library
> >> version.  My objection (which I'm sure I'll be overruled on) is that
> >> people are taking the easy way out instead of fixing things up the right
> >> way.
> > 
> > So what is the 'right way' here? I'm not hearing any problem free
> > solution.
> 
> Who ever said that the right solution is either A) problem free or B)
> always easy?  If you're looking for a for-free solution, don't bother
> asking me.  As I outlined above, it seems to me that you can catch 99%
> of the target audience with a limited set of targeted provider stack
> updates.  This is totally doable, although not necessarily easy.  The
> remaining code will have to be done by the end users.  If people think
> that's too much work to fix the mess things are in currently, my
> response would be "Suck it up, cupcake!  It needs done."  You could
> even

K.. So, nobody seems to be paid to work on these verbs libraries, what
you are talking about is a tonne of typing - who is going to do
this?

I already tried pretty hard to convince some ISVs that it was OK to
rev the SONAME and the extensions could be handled with new symbols,
and I didn't get too far. :(

In the mean time, what about Jeff's problem?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found]                         ` <20130621063648.GA27963-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2013-06-21 17:36                           ` Doug Ledford
       [not found]                             ` <51C48F01.5030103-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Doug Ledford @ 2013-06-21 17:36 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Jeff Squyres, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 06/21/2013 02:36 AM, Jason Gunthorpe wrote:
> On Thu, Jun 20, 2013 at 08:31:07PM -0400, Doug Ledford wrote:
> 
>>> The new transports have new requirements, and the apps have new
>>> required behaviors - the API simply can't hide all this in every
>>> case. The changes before had nothing to do with MTU, FWIW.
>>
>> It demonstrates what I would call a leakage between layer 2 and higher
>> layer APIs though.
> 
> What do you mean? Verbs is intended to expose transport specific
> behaviors at a very bare-metal level. The fact there are wide
> variations between the transports doesn't reflect a fault in verbs.

When verbs only had to worry about IB, this made sense.  When we added
iWARP, IBoE, and now usNIC, it no longer does.  Verbs *needs* to be
transport agnostic.  Then a person should do non-agnostic things using
extension libraries or raw packet mode, similar to how transport
non-agnostic things are done for the sockets API.

> If you want a transport-agnostic API then there are projects that do
> that: sockets, MPI, portals, CCI, etc, etc.
> 
> But look at some of the new extensions people are proposing, they tend
> to be very transport specific, narrowly focused on certain performance
> niches and not really abstractable through general APIs.
> 
> Frankly, that is why they are getting shoved into verbs, not run over
> sockets :)

I wouldn't care about verbs being transport agnostic if we already had a
reasonable transport agnostic API for RDMA usage that allowed all of the
base verbs to be used.  I don't see that from the examples you list above.

>>> The issue is not sorting out the install of the core libraries via
>>> package management tricks, but what happens when an app/middleware
>>> outside the package management dynamically links to this mess.
>>
>> If a user chooses not to use packaging, that's their prerogative.
>> However, they can also collect the pieces when things break.  If a ISV
>> chooses to do the same, then that ISV is just being flat lazy and
>> sloppy.  The package management stacks are there for a reason and serve
>> a valuable purpose.  Ignoring them is akin to just thumbing your nose at
>> the libibverbs version as well.
> 
> The packaging tool still doesn't solve the problem I outlined. A
> correctly packaged app, built for verbs v1.0 will still be installable
> on a system with verbs v1.1, and the same inter-library problems I
> described with symbol versioning in verbs will still show up.

Not if you also version the other libraries (and I admit you are getting
into a lot of work here, but what I'm referring to is when you rev
ibv_get_device_list for the new ibv_device_attr struct, you also rev
rdma_get_device_list in the same way, keeping a back compat entry in
rdmacm as well).

> You can avoid some nasty cases in the core libraries themselves with
> packaging, but it doesn't solve the general problem.
> 
> .. and ISVs don't seem to like packaging for some insane reason.
> 
>>> It explodes. The fundamental problem with the v1.0/v1.1 switch is the
>>> v1.0 functions are returning pointers that cannot be passed into a
>>> v1.1 function, eg iv_close_device@1.1(ibv_open_device@1.0(..))
>>> crashes.
>>
>> This isn't a problem if library A doesn't call into library B and try to
>> use the same struct as the app itself when the app calls into library B.
> 
> K, but they do, there are good reasons why they do, and saying "don't
> do that" is really not helpful.

Except in the context of, as Sean picked up on, my suggestion that
reworking the API split a bit and bringing these highly related items
under one umbrella.  You wouldn't expect to link against glibc for
read/write/socket, and against a different library for listen/accept,
yet that's what we do in rdma land.  We have an artificial split that
doesn't make sense and it causes these problems.

>> I would argue that this is because the libraries are so disjoint (that
>> librdmacm needs the deep internal knowledge it needs of libibverbs
> 
> No, it isn't deep internal knowledge. It uses the exposed, defined,
> public verbs ABI, adds some helper functions and then re-exports verbs
> objects to it's own users with minimal overhead. The re-exporting is
> what burns symbol versions so badly.
> 
> And rdma cm isn't the only one, but it is the easiest to talk
> about.
> 
> IMHO, the #1 fundamental issue is that all the low speed APIs use
> exposed structures, often caller-stack-allocated and that very badly
> limits our ability to elegantly adjust the API without breaking the ABI.
> 
>> back end to work IP v. GUID connection setup).  So, I think there is
>> significant room to improve the layout of the overall RDMA APIs and
>> doing that would address this particular issue and is probably the right
>> way to go.
> 
> I won't argue with you there...
> 
>> However, aside from that, my current objection to all of this is that
>> this solution, while meeting the needs of the "we don't want to have to
>> change anything unless the app wants to run on this new fabric" results
>> in what I would call a gross hack (some enum, some int, same variable).
>>  I'm not so much complaining about Jeff's solution, more the requirement
>> that we come up with such an ugly construct.  We are headed down a
>> course of putting in gross hacks in order to preserve an outdated
>> design, one which has much more elegant solutions today than what we are
>> currently using.  At *some* point, this becomes a miserable,
>> unmaintainable mess.
> 
> At some point? We are already there! Have you looked at the extension
> mechanism? It is horrible, and not what any sane person
> would want to do. It exists soley to satisfy the ISVs that don't want
> to see verbs rev'd.
> 
> And they have a point. There are lot of things built on verbs, a rev
> to the soname would create a terrible mess. An incompatible rev to the
> API would be even worse.

I disagree.  For end users, that *vast* majority would never have to see
the API change.  The MPI stacks and a few ISV stacks would need updated,
they would change their internal implementations but not necessarily
anything externally visible (for instance, OpenMPI would not need to
change the MPI interface to update to a newer verbs provider, and MPI
programs linked against OpenMPI would not need to be changed at all),
and I'm guessing that 99% of all applications by CPU hours consumed
would end up magically updated to the latest version.  Or am I wrong
that by far and away the two largest uses of RDMA in general are A) MPI
stacks and B) messaging stacks in financial sectors...both of which I
know for a fact hide the actual RDMA API from the end user's code?

>> So I hear you that people object to breaking the API for a new library
>> version.  My objection (which I'm sure I'll be overruled on) is that
>> people are taking the easy way out instead of fixing things up the right
>> way.
> 
> So what is the 'right way' here? I'm not hearing any problem free
> solution.

Who ever said that the right solution is either A) problem free or B)
always easy?  If you're looking for a for-free solution, don't bother
asking me.  As I outlined above, it seems to me that you can catch 99%
of the target audience with a limited set of targeted provider stack
updates.  This is totally doable, although not necessarily easy.  The
remaining code will have to be done by the end users.  If people think
that's too much work to fix the mess things are in currently, my
response would be "Suck it up, cupcake!  It needs done."  You could even
leave the current libibverbs/librdmacm/libibcm combo in place and allow
non-updated programs to continue working, while providing a new librdma
that handles all three jobs, but in a transport agnostic way.  Then you
could add transport specific extensions to librdma to get those extra
features you want.  That way, if people who haven't updated want new
features, then they can take the time to do the forward port to the new
library, and if not they can continue to use the deprecated but still
present older libraries.  So I guess I'm failing to really see the issue
that people keep making this out to be...

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found]                     ` <51C39ECB.3040006-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  2013-06-21  1:03                       ` Hefty, Sean
@ 2013-06-21  6:36                       ` Jason Gunthorpe
       [not found]                         ` <20130621063648.GA27963-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  1 sibling, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2013-06-21  6:36 UTC (permalink / raw)
  To: Doug Ledford; +Cc: Jeff Squyres, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Thu, Jun 20, 2013 at 08:31:07PM -0400, Doug Ledford wrote:

> > The new transports have new requirements, and the apps have new
> > required behaviors - the API simply can't hide all this in every
> > case. The changes before had nothing to do with MTU, FWIW.
> 
> It demonstrates what I would call a leakage between layer 2 and higher
> layer APIs though.

What do you mean? Verbs is intended to expose transport specific
behaviors at a very bare-metal level. The fact there are wide
variations between the transports doesn't reflect a fault in verbs.

If you want a transport-agnostic API then there are projects that do
that: sockets, MPI, portals, CCI, etc, etc.

But look at some of the new extensions people are proposing, they tend
to be very transport specific, narrowly focused on certain performance
niches and not really abstractable through general APIs.

Frankly, that is why they are getting shoved into verbs, not run over
sockets :)

> > The issue is not sorting out the install of the core libraries via
> > package management tricks, but what happens when an app/middleware
> > outside the package management dynamically links to this mess.
> 
> If a user chooses not to use packaging, that's their prerogative.
> However, they can also collect the pieces when things break.  If a ISV
> chooses to do the same, then that ISV is just being flat lazy and
> sloppy.  The package management stacks are there for a reason and serve
> a valuable purpose.  Ignoring them is akin to just thumbing your nose at
> the libibverbs version as well.

The packaging tool still doesn't solve the problem I outlined. A
correctly packaged app, built for verbs v1.0 will still be installable
on a system with verbs v1.1, and the same inter-library problems I
described with symbol versioning in verbs will still show up.

You can avoid some nasty cases in the core libraries themselves with
packaging, but it doesn't solve the general problem.

.. and ISVs don't seem to like packaging for some insane reason.

> > It explodes. The fundamental problem with the v1.0/v1.1 switch is the
> > v1.0 functions are returning pointers that cannot be passed into a
> > v1.1 function, eg iv_close_device@1.1(ibv_open_device@1.0(..))
> > crashes.
> 
> This isn't a problem if library A doesn't call into library B and try to
> use the same struct as the app itself when the app calls into library B.

K, but they do, there are good reasons why they do, and saying "don't
do that" is really not helpful.

> I would argue that this is because the libraries are so disjoint (that
> librdmacm needs the deep internal knowledge it needs of libibverbs

No, it isn't deep internal knowledge. It uses the exposed, defined,
public verbs ABI, adds some helper functions and then re-exports verbs
objects to it's own users with minimal overhead. The re-exporting is
what burns symbol versions so badly.

And rdma cm isn't the only one, but it is the easiest to talk
about.

IMHO, the #1 fundamental issue is that all the low speed APIs use
exposed structures, often caller-stack-allocated and that very badly
limits our ability to elegantly adjust the API without breaking the ABI.

> back end to work IP v. GUID connection setup).  So, I think there is
> significant room to improve the layout of the overall RDMA APIs and
> doing that would address this particular issue and is probably the right
> way to go.

I won't argue with you there...

> However, aside from that, my current objection to all of this is that
> this solution, while meeting the needs of the "we don't want to have to
> change anything unless the app wants to run on this new fabric" results
> in what I would call a gross hack (some enum, some int, same variable).
>  I'm not so much complaining about Jeff's solution, more the requirement
> that we come up with such an ugly construct.  We are headed down a
> course of putting in gross hacks in order to preserve an outdated
> design, one which has much more elegant solutions today than what we are
> currently using.  At *some* point, this becomes a miserable,
> unmaintainable mess.

At some point? We are already there! Have you looked at the extension
mechanism? It is horrible, and not what any sane person
would want to do. It exists soley to satisfy the ISVs that don't want
to see verbs rev'd.

And they have a point. There are lot of things built on verbs, a rev
to the soname would create a terrible mess. An incompatible rev to the
API would be even worse.

> So I hear you that people object to breaking the API for a new library
> version.  My objection (which I'm sure I'll be overruled on) is that
> people are taking the easy way out instead of fixing things up the right
> way.

So what is the 'right way' here? I'm not hearing any problem free
solution.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found]                     ` <51C39ECB.3040006-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2013-06-21  1:03                       ` Hefty, Sean
  2013-06-21  6:36                       ` Jason Gunthorpe
  1 sibling, 0 replies; 47+ messages in thread
From: Hefty, Sean @ 2013-06-21  1:03 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Jeff Squyres (jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org),
	linux-rdma (linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org)

> I would argue that this is because the libraries are so disjoint (that
> librdmacm needs the deep internal knowledge it needs of libibverbs
> indicates that maybe these two shouldn't be separate from each other for
> example, or that maybe libibverbs should provide a unified connection
> API to the user and internally use both librdmacm and libibcm on the
> back end to work IP v. GUID connection setup).  So, I think there is
> significant room to improve the layout of the overall RDMA APIs and
> doing that would address this particular issue and is probably the right
> way to go.
... 
> That would address structures, but I think the API itself could use some
> love and care, and that wouldn't be addressed by just the verbs
> extension mechanism (and in fact if you rethink some of the exposed API,
> it might drastically change how you might want to handle
> extensions...who knows).

I agree with Doug.  A merged library that can evolve the RDMA APIs with fewer compatibility constraints could be beneficial.  I just think such an approach would require some thought and a lot of discussion.

- Sean
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found]                 ` <20130620211454.GA2434-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2013-06-21  0:31                   ` Doug Ledford
       [not found]                     ` <51C39ECB.3040006-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Doug Ledford @ 2013-06-21  0:31 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Jeff Squyres, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 06/20/2013 05:14 PM, Jason Gunthorpe wrote:
> On Thu, Jun 20, 2013 at 04:31:14PM -0400, Doug Ledford wrote:
>>> happened for iwarp, rocee, etc.
>>
>> If it happened once, then I would agree with you above.  That it *keeps*
>> happening is the issue.  To me, that's a clear indication that instead
>> of fixing the shortcomings of the current API properly, band-aids just
>> keep getting applied.
> 
> The new transports have new requirements, and the apps have new
> required behaviors - the API simply can't hide all this in every
> case. The changes before had nothing to do with MTU, FWIW.

It demonstrates what I would call a leakage between layer 2 and higher
layer APIs though.

> Nope, people want new apps (using extensions/etc) to run on old verbs
> versions. I don't really like that, mind you, but it has been strongly
> asked for.

At some point people just need to suck it up and deal with a new
version.  Once you reach a certain level of maturity you can maintain
long term back compatibility and forward compatibility.  But that
requires that the API be sufficiently well thought out that each change
is more evolutionary than revolutionary.  The entire IB stack still
likes to do major, revolutionary changes.  It has not reached the level
of maturity where it can truly maintain a long term forward/back
compatibility IMO.  And the layer level leakage I mention earlier just
makes this more problematic.

> Both the app and librdmacm have a DT_NEEDED on libibverbs, and both
> call into libibverbs.
> 
> The issue is not sorting out the install of the core libraries via
> package management tricks, but what happens when an app/middleware
> outside the package management dynamically links to this mess.

If a user chooses not to use packaging, that's their prerogative.
However, they can also collect the pieces when things break.  If a ISV
chooses to do the same, then that ISV is just being flat lazy and
sloppy.  The package management stacks are there for a reason and serve
a valuable purpose.  Ignoring them is akin to just thumbing your nose at
the libibverbs version as well.

> We've already seen this fail in the field with apps that link to the
> v1.0 verbs ABI that call into other libraries that were linked to the
> v1.1 API.

So this exposes an issue, I agree.

> It explodes. The fundamental problem with the v1.0/v1.1 switch is the
> v1.0 functions are returning pointers that cannot be passed into a
> v1.1 function, eg iv_close_device@1.1(ibv_open_device@1.0(..))
> crashes. 

This isn't a problem if library A doesn't call into library B and try to
use the same struct as the app itself when the app calls into library B.

> Your idea to change the MTU causes the same problem with structure
> versioning. If I use a rdmacm/etc API to get a MTU containing
> structure then I still get the new meaning because rdmacm is linked to
> the v1.2 verbs symbols, but my app is linked to the v1.1 symbols and
> can't support it.
> 
> .. and of course rdmacm is just an example, there are other middleware
> libraries (uDAPL, MPI, etc) that may be affected.
> 
> Symbol versioning *doesn't* solve the problem, it just creates a new
> class of subtle failure modes. It appears to work in simple cases so
> people think it is a silver bullet, but it is not. It is very complex,
> the failures cases are screwy and subtle, and verbs tends to hit them
> head on because of how exposed all the internal structures are.

I would argue that this is because the libraries are so disjoint (that
librdmacm needs the deep internal knowledge it needs of libibverbs
indicates that maybe these two shouldn't be separate from each other for
example, or that maybe libibverbs should provide a unified connection
API to the user and internally use both librdmacm and libibcm on the
back end to work IP v. GUID connection setup).  So, I think there is
significant room to improve the layout of the overall RDMA APIs and
doing that would address this particular issue and is probably the right
way to go.

However, aside from that, my current objection to all of this is that
this solution, while meeting the needs of the "we don't want to have to
change anything unless the app wants to run on this new fabric" results
in what I would call a gross hack (some enum, some int, same variable).
 I'm not so much complaining about Jeff's solution, more the requirement
that we come up with such an ugly construct.  We are headed down a
course of putting in gross hacks in order to preserve an outdated
design, one which has much more elegant solutions today than what we are
currently using.  At *some* point, this becomes a miserable,
unmaintainable mess.

So I hear you that people object to breaking the API for a new library
version.  My objection (which I'm sure I'll be overruled on) is that
people are taking the easy way out instead of fixing things up the right
way.

>> So, this isn't broken, it's just that no one is taking the time to
>> properly identify incompatible versions and force compatible versions to
>> be installed before things are allowed to link up.
> 
> You can't enforce things on binary-only proprietary apps being
> installed from outside package management.

Correcting the API resolves this, and you can possibly play games in
header files to catch issues at compile time.  But if people ignore
package management, then they get to keep their own pieces as far as I'm
concerned.

> The verbs extension mechanism can safely deal with this kind of
> change, it effectively adds structure versioning to the ABI, but it is
> not mainlined yet and is also pretty complex.

That would address structures, but I think the API itself could use some
love and care, and that wouldn't be addressed by just the verbs
extension mechanism (and in fact if you rethink some of the exposed API,
it might drastically change how you might want to handle
extensions...who knows).

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found]             ` <51C36692.7000507-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2013-06-20 21:14               ` Jason Gunthorpe
       [not found]                 ` <20130620211454.GA2434-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2013-06-20 21:14 UTC (permalink / raw)
  To: Doug Ledford; +Cc: Jeff Squyres, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Thu, Jun 20, 2013 at 04:31:14PM -0400, Doug Ledford wrote:
> > happened for iwarp, rocee, etc.
> 
> If it happened once, then I would agree with you above.  That it *keeps*
> happening is the issue.  To me, that's a clear indication that instead
> of fixing the shortcomings of the current API properly, band-aids just
> keep getting applied.

The new transports have new requirements, and the apps have new
required behaviors - the API simply can't hide all this in every
case. The changes before had nothing to do with MTU, FWIW.

Jeff: Does your new transport support 100% of ibverbs and MTU is the
only change an app would need?

> > .. and this is sketchy anyhow, the above maths are not defined to work
> > anywhere, it just happens to work with the constants that have been
> > defined so far. This would break equally if we added any new constant
> > to the enum. So no, these maths are not important.
> 
> No, but I also skipped a number of patches where code did switch
> statements to convert from enum to byte value, or enum to string
> representation.  All of those would break too.

Yes, but often either doesn't matter (they are just print strings) or
there are default fall throughs.

UD apps are ones that are going to have a problem, but we already have
very poor transport agnostic support for UD, so it is unlikely an
existing UD app will run on a new transport.
 
> > There is a huge resistance to reving the symbol versions in
> > ibverbs. See the whole extension mess.
> 
> I thought the resistance was to revving the libibverbs soname, not just
> the internal symbol versions.

Nope, people want new apps (using extensions/etc) to run on old verbs
versions. I don't really like that, mind you, but it has been strongly
asked for.

> At the time the app is compiled, it will be compiled against a librdmacm
> that needs a specific version of the libibverbs symbols because
> librdmacm has already been compiled.  That means that if you want
> things to "just work" for the end user, when you rev the internal libibverbs
> symbols, then you make a corresponding change in librdmacm and when
> you

Both the app and librdmacm have a DT_NEEDED on libibverbs, and both
call into libibverbs.

The issue is not sorting out the install of the core libraries via
package management tricks, but what happens when an app/middleware
outside the package management dynamically links to this mess.

We've already seen this fail in the field with apps that link to the
v1.0 verbs ABI that call into other libraries that were linked to the
v1.1 API.

It explodes. The fundamental problem with the v1.0/v1.1 switch is the
v1.0 functions are returning pointers that cannot be passed into a
v1.1 function, eg iv_close_device@1.1(ibv_open_device@1.0(..))
crashes. 

Your idea to change the MTU causes the same problem with structure
versioning. If I use a rdmacm/etc API to get a MTU containing
structure then I still get the new meaning because rdmacm is linked to
the v1.2 verbs symbols, but my app is linked to the v1.1 symbols and
can't support it.

.. and of course rdmacm is just an example, there are other middleware
libraries (uDAPL, MPI, etc) that may be affected.

Symbol versioning *doesn't* solve the problem, it just creates a new
class of subtle failure modes. It appears to work in simple cases so
people think it is a silver bullet, but it is not. It is very complex,
the failures cases are screwy and subtle, and verbs tends to hit them
head on because of how exposed all the internal structures are.

> So, this isn't broken, it's just that no one is taking the time to
> properly identify incompatible versions and force compatible versions to
> be installed before things are allowed to link up.

You can't enforce things on binary-only proprietary apps being
installed from outside package management.

The verbs extension mechanism can safely deal with this kind of
change, it effectively adds structure versioning to the ABI, but it is
not mainlined yet and is also pretty complex.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found]         ` <20130620165305.GA19800-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2013-06-20 20:31           ` Doug Ledford
       [not found]             ` <51C36692.7000507-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Doug Ledford @ 2013-06-20 20:31 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Jeff Squyres, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 06/20/2013 12:53 PM, Jason Gunthorpe wrote:
> On Thu, Jun 20, 2013 at 12:34:04PM -0400, Doug Ledford wrote:
>> On 06/20/2013 10:21 AM, Jeff Squyres wrote:
>>> Keep IBV_MTU_* enums values as they are, but pass MTU values around as
>>> int's.  This is an ABI-compatible change; legacy applications will use
>>> the enum values,
>>
>> I'm not really concerned with what the legacy apps use so much as what
>> they are presented with.  In other words, I'm sure they won't request
>> anything other than an MTU represented by one of the enum values.  The
>> problem though, is what if they are run on a link with a non-IB MTU and
>> they are presented with it?  Let's look at one of the ports you did in
>> this patch as an example:
> 
> Remember, apps will only see a wonky value if they are being used on
> one of Jeff's new not-IB, not-ROCE, not-iWARP transports.

So?  That's just today.  The only reason RoCE/IBoE maps to IB MTUs is
that they didn't bother to make this ABI break for it, but it could
benefit from having a more flexible MTU that followed the underlying
Ethernet MTU.  So who's to say that isn't next?

> Who knows if
> they will even work on this new transport unmodified anyhow??

Either we should be trying to keep back compatibility or we shouldn't.
If we are, then it should work.  If we aren't, then there is no sense
doing the magic hocus-pocus tricks with the MTU where in some cases it
is the old enum value and other cases the real MTU value.

> An app update to suport future transports is not unreasonable,

I disagree.

> it
> happened for iwarp, rocee, etc.

If it happened once, then I would agree with you above.  That it *keeps*
happening is the issue.  To me, that's a clear indication that instead
of fixing the shortcomings of the current API properly, band-aids just
keep getting applied.

>>> diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
>>> index 21c551d..5a0656f 100644
>>> +++ b/examples/ud_pingpong.c
>>> @@ -332,7 +332,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
>>>  			fprintf(stderr, "Unable to query port info for port %d\n", port);
>>>  			goto clean_device;
>>>  		}
>>> -		mtu = 1 << (port_info.active_mtu + 7);
> 
> .. and this is sketchy anyhow, the above maths are not defined to work
> anywhere, it just happens to work with the constants that have been
> defined so far. This would break equally if we added any new constant
> to the enum. So no, these maths are not important.

No, but I also skipped a number of patches where code did switch
statements to convert from enum to byte value, or enum to string
representation.  All of those would break too.

>> One possible solution to this problem is to use ld.linux's symbolic
>> symbol versions to solve this problem for us.  Fortunately, Roland has
>> been excellent in the past about keeping all of libibverbs symbols
>> versioned.  That can save us here.
> 
> There is a huge resistance to reving the symbol versions in
> ibverbs. See the whole extension mess.

I thought the resistance was to revving the libibverbs soname, not just
the internal symbol versions.

> Further, the symbol versions don't work well in verbs, the internal
> structures are too exposed. The existing support is already broken and
> only works in very limited cases.
> 
> What you propose breaks in fairly common use cases, eg if
> librdmacm/etc and the app link to different ibverbs versions then
> things go wrong.

At the time the app is compiled, it will be compiled against a librdmacm
that needs a specific version of the libibverbs symbols because
librdmacm has already been compiled.  That means that if you want things
to "just work" for the end user, when you rev the internal libibverbs
symbols, then you make a corresponding change in librdmacm and when you
install libibverbs-devel, you make it have a Conflict: with librdmacm <
new-version.  Likewise, you make librdmacm have a BuildRequires:
libibverbs-devel >= new-version, and make librdmacm-devel have a
Requires: libibverbs-devel >= new-version.  In this way, librdmacm-devel
will automatically require that the installed libibverbs-devel be of the
right version or it won't install itself.  Likewise, updating
libibverbs-devel without also updating librdmacm-devel will cause the
entire transaction to get kicked out or, depending on options, cause
librdmacm to be removed from the system to be updated later.

Now, these are package install time checks that can be implemented in
either rpm or, I assume, apt.  If you want compile time checks, that
could be done too with header file magic.

So, this isn't broken, it's just that no one is taking the time to
properly identify incompatible versions and force compatible versions to
be installed before things are allowed to link up.

> rdmacm and the app pass pointers to verbs structures
> across their boundary but they are unware they are versioned
> differently, and will pass them back to the wrong verbs entry
> point. This has already been seen to fail with the existing symbol
> version support.
> 
> Basically: the verbs ABI was not designed to work with symbol
> versions.

The verbs ABI is perfectly fine working with versioned symbols.  The
package management of interrelated libraries has not been managed
sufficiently to provide even a modicum of assurance that things will
work properly.  This isn't for lack of tools, it's most likely simply
for lack of knowledge of how to, and desire to, deal with sorting the
issues out.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found]     ` <51C32EFC.8060202-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  2013-06-20 16:42       ` Doug Ledford
@ 2013-06-20 16:53       ` Jason Gunthorpe
       [not found]         ` <20130620165305.GA19800-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  1 sibling, 1 reply; 47+ messages in thread
From: Jason Gunthorpe @ 2013-06-20 16:53 UTC (permalink / raw)
  To: Doug Ledford; +Cc: Jeff Squyres, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Thu, Jun 20, 2013 at 12:34:04PM -0400, Doug Ledford wrote:
> On 06/20/2013 10:21 AM, Jeff Squyres wrote:
> > Keep IBV_MTU_* enums values as they are, but pass MTU values around as
> > int's.  This is an ABI-compatible change; legacy applications will use
> > the enum values,
> 
> I'm not really concerned with what the legacy apps use so much as what
> they are presented with.  In other words, I'm sure they won't request
> anything other than an MTU represented by one of the enum values.  The
> problem though, is what if they are run on a link with a non-IB MTU and
> they are presented with it?  Let's look at one of the ports you did in
> this patch as an example:

Remember, apps will only see a wonky value if they are being used on
one of Jeff's new not-IB, not-ROCE, not-iWARP transports. Who knows if
they will even work on this new transport unmodified anyhow??

An app update to suport future transports is not unreasonable, it
happened for iwarp, rocee, etc.

> > diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
> > index 21c551d..5a0656f 100644
> > +++ b/examples/ud_pingpong.c
> > @@ -332,7 +332,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
> >  			fprintf(stderr, "Unable to query port info for port %d\n", port);
> >  			goto clean_device;
> >  		}
> > -		mtu = 1 << (port_info.active_mtu + 7);

.. and this is sketchy anyhow, the above maths are not defined to work
anywhere, it just happens to work with the constants that have been
defined so far. This would break equally if we added any new constant
to the enum. So no, these maths are not important.

> One possible solution to this problem is to use ld.linux's symbolic
> symbol versions to solve this problem for us.  Fortunately, Roland has
> been excellent in the past about keeping all of libibverbs symbols
> versioned.  That can save us here.

There is a huge resistance to reving the symbol versions in
ibverbs. See the whole extension mess.

Further, the symbol versions don't work well in verbs, the internal
structures are too exposed. The existing support is already broken and
only works in very limited cases.

What you propose breaks in fairly common use cases, eg if
librdmacm/etc and the app link to different ibverbs versions then
things go wrong. rdmacm and the app pass pointers to verbs structures
across their boundary but they are unware they are versioned
differently, and will pass them back to the wrong verbs entry
point. This has already been seen to fail with the existing symbol
version support.

Basically: the verbs ABI was not designed to work with symbol
versions.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found]     ` <51C32EFC.8060202-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2013-06-20 16:42       ` Doug Ledford
  2013-06-20 16:53       ` Jason Gunthorpe
  1 sibling, 0 replies; 47+ messages in thread
From: Doug Ledford @ 2013-06-20 16:42 UTC (permalink / raw)
  To: Jeff Squyres; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 06/20/2013 12:34 PM, Doug Ledford wrote:
> On 06/20/2013 10:21 AM, Jeff Squyres wrote:
>> Keep IBV_MTU_* enums values as they are, but pass MTU values around as
>> int's.  This is an ABI-compatible change; legacy applications will use
>> the enum values,
> 
> I'm not really concerned with what the legacy apps use so much as what
> they are presented with.  In other words, I'm sure they won't request
> anything other than an MTU represented by one of the enum values.  The
> problem though, is what if they are run on a link with a non-IB MTU and
> they are presented with it?  Let's look at one of the ports you did in
> this patch as an example:
> 
>> diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
>> index 21c551d..5a0656f 100644
>> --- a/examples/ud_pingpong.c
>> +++ b/examples/ud_pingpong.c
>> @@ -332,7 +332,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
>>  			fprintf(stderr, "Unable to query port info for port %d\n", port);
>>  			goto clean_device;
>>  		}
>> -		mtu = 1 << (port_info.active_mtu + 7);
>> +		mtu = ibv_mtu_to_num(port_info.active_mtu);
>>  		if (size > mtu) {
>>  			fprintf(stderr, "Requested size larger than port MTU (%d)\n", mtu);
>>  			goto clean_device;
> 
> That used to be a valid mathematical construct.  Now it isn't.  It will
> work for all of the IBV_MTU_* values, but if you run this program,
> unmodified, on an MTU 9000 link, you get 1 << 9007 ;-)
> 
> Saying that this is backwards compatible is therefore incorrect.  If it
> were entirely backwards compatible, then apps would not need to be
> recompiled in order to avoid this error.
> 
> One possible solution to this problem is to use ld.linux's symbolic
> symbol versions to solve this problem for us.  Fortunately, Roland has
> been excellent in the past about keeping all of libibverbs symbols
> versioned.  That can save us here.
> 
> We would need to redefine the active_mtu and max_mtu in ibv_device_attr
> and path_mtu in ibv_qp_attr to all be ints.  No need to maintain back
> compatibility.

I should point out here that I would also change their name slightly in
order to force current apps to fail to compile.  This *is* an API
change, and apps need to have to make the very minor touchups necessary
in order to work again.  You don't want someone to recompile their app
without making this change and then be surprised.  The other option is
to add the int based MTUs as new elements and leave these, and also
leave these elements as an enum, but in that case I would warn people
that use these items in some way (a compiler warning about touching an
item that's deprecated might be good if that's even possible to do).

> Then we define ibv_device_attr_1.1 and ibv_qp_attr_1.1 structs that use
> the old enum.
> 
> Then we define version IBVERBS_1.2 version of ibv_get_device_list plus a
> version 1.2 of any other symbols that pass around ibv_device_attr struct
> and ibv_qp_attr struct.  The new default will be to use the new structs
> that have MTUs defined as ints, but the old 1.1 version of things will
> use the compat structs to pass things around.  When we query the kernel
> about a device/qp, if we are linked against an old app we will be using
> the compat struct and we can do the conversion from int MTU to ibv_enum
> based MTU and vice versa in the IBVERBS_1.1 wrapper functions that need
> to be called to convert ints to enums and vice versa.
> 
> It's a lot more work, but it's the right way to do this.
> 
> So, sorry Jeff, but I'm going to Nack this patch as it stands on design
> and back compatibility.  This really needs an API update, and it can be
> done in a back compatible way by using the shared library symbol version
> mapping and compat wrapper functions.
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
       [not found] ` <1371738080-18537-1-git-send-email-jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
@ 2013-06-20 16:34   ` Doug Ledford
       [not found]     ` <51C32EFC.8060202-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Doug Ledford @ 2013-06-20 16:34 UTC (permalink / raw)
  To: Jeff Squyres; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 06/20/2013 10:21 AM, Jeff Squyres wrote:
> Keep IBV_MTU_* enums values as they are, but pass MTU values around as
> int's.  This is an ABI-compatible change; legacy applications will use
> the enum values,

I'm not really concerned with what the legacy apps use so much as what
they are presented with.  In other words, I'm sure they won't request
anything other than an MTU represented by one of the enum values.  The
problem though, is what if they are run on a link with a non-IB MTU and
they are presented with it?  Let's look at one of the ports you did in
this patch as an example:

> diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
> index 21c551d..5a0656f 100644
> --- a/examples/ud_pingpong.c
> +++ b/examples/ud_pingpong.c
> @@ -332,7 +332,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
>  			fprintf(stderr, "Unable to query port info for port %d\n", port);
>  			goto clean_device;
>  		}
> -		mtu = 1 << (port_info.active_mtu + 7);
> +		mtu = ibv_mtu_to_num(port_info.active_mtu);
>  		if (size > mtu) {
>  			fprintf(stderr, "Requested size larger than port MTU (%d)\n", mtu);
>  			goto clean_device;

That used to be a valid mathematical construct.  Now it isn't.  It will
work for all of the IBV_MTU_* values, but if you run this program,
unmodified, on an MTU 9000 link, you get 1 << 9007 ;-)

Saying that this is backwards compatible is therefore incorrect.  If it
were entirely backwards compatible, then apps would not need to be
recompiled in order to avoid this error.

One possible solution to this problem is to use ld.linux's symbolic
symbol versions to solve this problem for us.  Fortunately, Roland has
been excellent in the past about keeping all of libibverbs symbols
versioned.  That can save us here.

We would need to redefine the active_mtu and max_mtu in ibv_device_attr
and path_mtu in ibv_qp_attr to all be ints.  No need to maintain back
compatibility.

Then we define ibv_device_attr_1.1 and ibv_qp_attr_1.1 structs that use
the old enum.

Then we define version IBVERBS_1.2 version of ibv_get_device_list plus a
version 1.2 of any other symbols that pass around ibv_device_attr struct
and ibv_qp_attr struct.  The new default will be to use the new structs
that have MTUs defined as ints, but the old 1.1 version of things will
use the compat structs to pass things around.  When we query the kernel
about a device/qp, if we are linked against an old app we will be using
the compat struct and we can do the conversion from int MTU to ibv_enum
based MTU and vice versa in the IBVERBS_1.1 wrapper functions that need
to be called to convert ints to enums and vice versa.

It's a lot more work, but it's the right way to do this.

So, sorry Jeff, but I'm going to Nack this patch as it stands on design
and back compatibility.  This really needs an API update, and it can be
done in a back compatible way by using the shared library symbol version
mapping and compat wrapper functions.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 47+ messages in thread

* [PATCH V2] libibverbs: Allow arbitrary int values for MTU.
@ 2013-06-20 14:21 Jeff Squyres
       [not found] ` <1371738080-18537-1-git-send-email-jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 47+ messages in thread
From: Jeff Squyres @ 2013-06-20 14:21 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Jeff Squyres

Keep IBV_MTU_* enums values as they are, but pass MTU values around as
int's.  This is an ABI-compatible change; legacy applications will use
the enum values, but newer applications can use an int for values that
do not currently exist in the enum set (e.g., 1500, 9000).

(if people like the idea of this patch, I will send the corresponding
kernel patch)

Signed-off-by: Jeff Squyres <jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
---
 Makefile.am                |  3 ++-
 examples/devinfo.c         | 20 +++-----------
 examples/pingpong.c        | 12 ---------
 examples/pingpong.h        |  1 -
 examples/rc_pingpong.c     |  8 +++---
 examples/srq_pingpong.c    |  8 +++---
 examples/uc_pingpong.c     |  8 +++---
 examples/ud_pingpong.c     |  2 +-
 include/infiniband/verbs.h | 55 ++++++++++++++++++++++++++++++++++---
 man/ibv_modify_qp.3        |  2 +-
 man/ibv_mtu_to_num.3       | 67 ++++++++++++++++++++++++++++++++++++++++++++++
 man/ibv_query_port.3       |  4 +--
 man/ibv_query_qp.3         |  2 +-
 13 files changed, 142 insertions(+), 50 deletions(-)
 create mode 100644 man/ibv_mtu_to_num.3

diff --git a/Makefile.am b/Makefile.am
index 40e83be..1159e55 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -54,7 +54,8 @@ man_MANS = man/ibv_asyncwatch.1 man/ibv_devices.1 man/ibv_devinfo.1	\
     man/ibv_post_srq_recv.3 man/ibv_query_device.3 man/ibv_query_gid.3	\
     man/ibv_query_pkey.3 man/ibv_query_port.3 man/ibv_query_qp.3	\
     man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3		\
-    man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3
+    man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3  \
+    man/ibv_mtu_to_num.3
 
 DEBIAN = debian/changelog debian/compat debian/control debian/copyright \
     debian/ibverbs-utils.install debian/libibverbs1.install \
diff --git a/examples/devinfo.c b/examples/devinfo.c
index ff078e4..9f51dcb 100644
--- a/examples/devinfo.c
+++ b/examples/devinfo.c
@@ -111,18 +111,6 @@ static const char *atomic_cap_str(enum ibv_atomic_cap atom_cap)
 	}
 }
 
-static const char *mtu_str(enum ibv_mtu max_mtu)
-{
-	switch (max_mtu) {
-	case IBV_MTU_256:  return "256";
-	case IBV_MTU_512:  return "512";
-	case IBV_MTU_1024: return "1024";
-	case IBV_MTU_2048: return "2048";
-	case IBV_MTU_4096: return "4096";
-	default:           return "invalid MTU";
-	}
-}
-
 static const char *width_str(uint8_t width)
 {
 	switch (width) {
@@ -301,10 +289,10 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
 		printf("\t\tport:\t%d\n", port);
 		printf("\t\t\tstate:\t\t\t%s (%d)\n",
 		       port_state_str(port_attr.state), port_attr.state);
-		printf("\t\t\tmax_mtu:\t\t%s (%d)\n",
-		       mtu_str(port_attr.max_mtu), port_attr.max_mtu);
-		printf("\t\t\tactive_mtu:\t\t%s (%d)\n",
-		       mtu_str(port_attr.active_mtu), port_attr.active_mtu);
+		printf("\t\t\tmax_mtu:\t\t%d (%d)\n",
+		       ibv_mtu_to_num(port_attr.max_mtu), port_attr.max_mtu);
+		printf("\t\t\tactive_mtu:\t\t%d (%d)\n",
+		       ibv_mtu_to_num(port_attr.active_mtu), port_attr.active_mtu);
 		printf("\t\t\tsm_lid:\t\t\t%d\n", port_attr.sm_lid);
 		printf("\t\t\tport_lid:\t\t%d\n", port_attr.lid);
 		printf("\t\t\tport_lmc:\t\t0x%02x\n", port_attr.lmc);
diff --git a/examples/pingpong.c b/examples/pingpong.c
index 90732ef..d1c22c9 100644
--- a/examples/pingpong.c
+++ b/examples/pingpong.c
@@ -36,18 +36,6 @@
 #include <stdio.h>
 #include <string.h>
 
-enum ibv_mtu pp_mtu_to_enum(int mtu)
-{
-	switch (mtu) {
-	case 256:  return IBV_MTU_256;
-	case 512:  return IBV_MTU_512;
-	case 1024: return IBV_MTU_1024;
-	case 2048: return IBV_MTU_2048;
-	case 4096: return IBV_MTU_4096;
-	default:   return -1;
-	}
-}
-
 uint16_t pp_get_local_lid(struct ibv_context *context, int port)
 {
 	struct ibv_port_attr attr;
diff --git a/examples/pingpong.h b/examples/pingpong.h
index 9cdc03e..91d217b 100644
--- a/examples/pingpong.h
+++ b/examples/pingpong.h
@@ -35,7 +35,6 @@
 
 #include <infiniband/verbs.h>
 
-enum ibv_mtu pp_mtu_to_enum(int mtu);
 uint16_t pp_get_local_lid(struct ibv_context *context, int port);
 int pp_get_port_info(struct ibv_context *context, int port,
 		     struct ibv_port_attr *attr);
diff --git a/examples/rc_pingpong.c b/examples/rc_pingpong.c
index 15494a1..2d6d30e 100644
--- a/examples/rc_pingpong.c
+++ b/examples/rc_pingpong.c
@@ -78,7 +78,7 @@ struct pingpong_dest {
 };
 
 static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
-			  enum ibv_mtu mtu, int sl,
+			  ibv_mtu_t mtu, int sl,
 			  struct pingpong_dest *dest, int sgid_idx)
 {
 	struct ibv_qp_attr attr = {
@@ -209,7 +209,7 @@ out:
 }
 
 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
-						 int ib_port, enum ibv_mtu mtu,
+						 int ib_port, ibv_mtu_t mtu,
 						 int port, int sl,
 						 const struct pingpong_dest *my_dest,
 						 int sgid_idx)
@@ -547,7 +547,7 @@ int main(int argc, char *argv[])
 	int                      port = 18515;
 	int                      ib_port = 1;
 	int                      size = 4096;
-	enum ibv_mtu		 mtu = IBV_MTU_1024;
+	ibv_mtu_t		 mtu = num_to_ibv_mtu(1024);
 	int                      rx_depth = 500;
 	int                      iters = 1000;
 	int                      use_event = 0;
@@ -608,7 +608,7 @@ int main(int argc, char *argv[])
 			break;
 
 		case 'm':
-			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
+			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
 			if (mtu < 0) {
 				usage(argv[0]);
 				return 1;
diff --git a/examples/srq_pingpong.c b/examples/srq_pingpong.c
index 6e00f8c..44915a7 100644
--- a/examples/srq_pingpong.c
+++ b/examples/srq_pingpong.c
@@ -81,7 +81,7 @@ struct pingpong_dest {
 	union ibv_gid gid;
 };
 
-static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
+static int pp_connect_ctx(struct pingpong_context *ctx, int port, ibv_mtu_t mtu,
 			  int sl, const struct pingpong_dest *my_dest,
 			  const struct pingpong_dest *dest, int sgid_idx)
 {
@@ -229,7 +229,7 @@ out:
 }
 
 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
-						 int ib_port, enum ibv_mtu mtu,
+						 int ib_port, ibv_mtu_t mtu,
 						 int port, int sl,
 						 const struct pingpong_dest *my_dest,
 						 int sgid_idx)
@@ -620,7 +620,7 @@ int main(int argc, char *argv[])
 	int                      port = 18515;
 	int                      ib_port = 1;
 	int                      size = 4096;
-	enum ibv_mtu		 mtu = IBV_MTU_1024;
+	ibv_mtu_t		 mtu = num_to_ibv_mtu(1024);
 	int                      num_qp = 16;
 	int                      rx_depth = 500;
 	int                      iters = 1000;
@@ -685,7 +685,7 @@ int main(int argc, char *argv[])
 			break;
 
 		case 'm':
-			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
+			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
 			if (mtu < 0) {
 				usage(argv[0]);
 				return 1;
diff --git a/examples/uc_pingpong.c b/examples/uc_pingpong.c
index 52c6c28..bafc2a6 100644
--- a/examples/uc_pingpong.c
+++ b/examples/uc_pingpong.c
@@ -78,7 +78,7 @@ struct pingpong_dest {
 };
 
 static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
-			  enum ibv_mtu mtu, int sl,
+			  ibv_mtu_t mtu, int sl,
 			  struct pingpong_dest *dest, int sgid_idx)
 {
 	struct ibv_qp_attr attr = {
@@ -197,7 +197,7 @@ out:
 }
 
 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
-						 int ib_port, enum ibv_mtu mtu,
+						 int ib_port, ibv_mtu_t mtu,
 						 int port, int sl,
 						 const struct pingpong_dest *my_dest,
 						 int sgid_idx)
@@ -535,7 +535,7 @@ int main(int argc, char *argv[])
 	int                      port = 18515;
 	int                      ib_port = 1;
 	int                      size = 4096;
-	enum ibv_mtu		 mtu = IBV_MTU_1024;
+	ibv_mtu_t		 mtu = num_to_ibv_mtu(1024);
 	int                      rx_depth = 500;
 	int                      iters = 1000;
 	int                      use_event = 0;
@@ -596,7 +596,7 @@ int main(int argc, char *argv[])
 			break;
 
 		case 'm':
-			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
+			mtu = num_to_ibv_mtu(strtol(optarg, NULL, 0));
 			if (mtu < 0) {
 				usage(argv[0]);
 				return 1;
diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
index 21c551d..5a0656f 100644
--- a/examples/ud_pingpong.c
+++ b/examples/ud_pingpong.c
@@ -332,7 +332,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
 			fprintf(stderr, "Unable to query port info for port %d\n", port);
 			goto clean_device;
 		}
-		mtu = 1 << (port_info.active_mtu + 7);
+		mtu = ibv_mtu_to_num(port_info.active_mtu);
 		if (size > mtu) {
 			fprintf(stderr, "Requested size larger than port MTU (%d)\n", mtu);
 			goto clean_device;
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index 4b1ab57..2108629 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -144,6 +144,10 @@ struct ibv_device_attr {
 	uint8_t			phys_port_cnt;
 };
 
+/*
+ * Symbolic enum names for MTU values are preserved for backwards
+ * compatibility.
+ */
 enum ibv_mtu {
 	IBV_MTU_256  = 1,
 	IBV_MTU_512  = 2,
@@ -152,6 +156,14 @@ enum ibv_mtu {
 	IBV_MTU_4096 = 5
 };
 
+/*
+ * ibv_mtu_t is an encoded integer type that represents an MTU value.
+ * If the value is <= IBV_MTU_4096, it is treated as one of the
+ * IBV_MTU_* enum values.  Otherwise, it is treated as its integer
+ * value.
+ */
+typedef int ibv_mtu_t;
+
 enum ibv_port_state {
 	IBV_PORT_NOP		= 0,
 	IBV_PORT_DOWN		= 1,
@@ -169,8 +181,8 @@ enum {
 
 struct ibv_port_attr {
 	enum ibv_port_state	state;
-	enum ibv_mtu		max_mtu;
-	enum ibv_mtu		active_mtu;
+	ibv_mtu_t		max_mtu;
+	ibv_mtu_t		active_mtu;
 	int			gid_tbl_len;
 	uint32_t		port_cap_flags;
 	uint32_t		max_msg_sz;
@@ -485,7 +497,7 @@ enum ibv_mig_state {
 struct ibv_qp_attr {
 	enum ibv_qp_state	qp_state;
 	enum ibv_qp_state	cur_qp_state;
-	enum ibv_mtu		path_mtu;
+	ibv_mtu_t		path_mtu;
 	enum ibv_mig_state	path_mig_state;
 	uint32_t		qkey;
 	uint32_t		rq_psn;
@@ -1138,6 +1150,43 @@ const char *ibv_port_state_str(enum ibv_port_state port_state);
  */
 const char *ibv_event_type_str(enum ibv_event_type event);
 
+/**
+ * num_to_ibv_mtu - Convert an integer to its corresponding encoded
+ * ibv_mtu_t value.  If an integer value corresponding to an IBV_MTU_*
+ * enum value is passed, return the enum value (e.g., 1024 ->
+ * IBV_MTU_1024).  Otherwise, just return the value (e.g., 1500 ->
+ * 1500).
+ */
+static inline ibv_mtu_t num_to_ibv_mtu(int num)
+{
+	switch (num) {
+	case 256:  return IBV_MTU_256;
+	case 512:  return IBV_MTU_512;
+	case 1024: return IBV_MTU_1024;
+	case 2048: return IBV_MTU_2048;
+	case 4096: return IBV_MTU_4096;
+	default:   return num;
+	}
+}
+
+/**
+ * ibv_mtu_to_num - Convert an encoded ibv_mtu_t value to its
+ * corresponding integer value.  If an enum ibv_mtu value is passed,
+ * return its integer value (e.g., IBV_MTU_1024 -> 1024).  Otherwise,
+ * just return the value (e.g., 1500 -> 1500).
+ */
+static inline int ibv_mtu_to_num(ibv_mtu_t mtu)
+{
+	switch (mtu) {
+	case IBV_MTU_256:  return 256;
+	case IBV_MTU_512:  return 512;
+	case IBV_MTU_1024: return 1024;
+	case IBV_MTU_2048: return 2048;
+	case IBV_MTU_4096: return 4096;
+	default:           return mtu;
+	}
+}
+
 END_C_DECLS
 
 #  undef __attribute_const
diff --git a/man/ibv_modify_qp.3 b/man/ibv_modify_qp.3
index cb3faaa..26dfcf3 100644
--- a/man/ibv_modify_qp.3
+++ b/man/ibv_modify_qp.3
@@ -25,7 +25,7 @@ struct ibv_qp_attr {
 .in +8
 enum ibv_qp_state       qp_state;               /* Move the QP to this state */
 enum ibv_qp_state       cur_qp_state;           /* Assume this is the current QP state */
-enum ibv_mtu            path_mtu;               /* Path MTU (valid only for RC/UC QPs) */
+ibv_mtu_t               path_mtu;               /* Path MTU (valid only for RC/UC QPs) */
 enum ibv_mig_state      path_mig_state;         /* Path migration state (valid if HCA supports APM) */
 uint32_t                qkey;                   /* Q_Key for the QP (valid only for UD QPs) */
 uint32_t                rq_psn;                 /* PSN for receive queue (valid only for RC/UC QPs) */
diff --git a/man/ibv_mtu_to_num.3 b/man/ibv_mtu_to_num.3
new file mode 100644
index 0000000..5603efa
--- /dev/null
+++ b/man/ibv_mtu_to_num.3
@@ -0,0 +1,67 @@
+.\" -*- nroff -*-
+.\"
+.TH IBV_MTU_TO_NUM 3 2013-06-20 libibverbs "Libibverbs Programmer's Manual"
+.SH "NAME"
+.nf
+ibv_mtu_to_num \- convert encoded MTU value to integer
+.sp
+num_to_ibv_mtu \- convert integer to encoded MTU value
+.SH "SYNOPSIS"
+.nf
+.B #include <infiniband/verbs.h>
+.sp
+.BI "int ibv_mtu_to_num(ibv_mtu_t " "mtu" ");
+.sp
+.BI "ibv_mtu_t num_to_ibv_mtu(int " "num" ");
+.fi
+.SH "DESCRIPTION"
+.PP
+The
+.I ibv_mtu_t
+type is an encoded integer used to represent MTU values in order to
+preserve backwards compatibility.  When the value of an
+.I ibv_mtu_t
+variable is <=
+.BR IBV_MTU_4096\fR,
+it is treated as the corresponding
+.B IBV_MTU_*
+enum value.  Otherwise, it is treated as its integer value.
+.PP
+MTU values less than the value of the enum
+.B IBV_MTU_4096
+(i.e., 5) cannot be represented.
+.PP
+.B ibv_mtu_to_num()
+converts the encoded MTU value
+.I mtu
+to a plain integer value.  For example, if
+.I mtu
+is
+.BR IBV_MTU_1024\fR,
+the value 1024 will be returned.  Likewise, if
+.I mtu
+is 1500, then 1500 will be returned.
+.PP
+.B num_to_ibv_mtu()
+converts the integer
+.I num
+to its corresponding encoded
+.I ibv_mtu_t
+value.  For example, if
+.I num
+is 1024, then
+.B IBV_MTU_1024
+will be returned.  Likewise, if
+.I num
+is 1500, then 1500 will be returned.
+.SH "RETURN VALUE"
+.B ibv_mtu_to_num()
+returns an integer MTU value.
+.PP
+.B num_to_ibv_mtu()
+returns an encoded MTU value.
+.SH "SEE ALSO"
+.BR ibv_query_port (3)
+.SH "AUTHORS"
+.TP
+Jeff Squyres <jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
diff --git a/man/ibv_query_port.3 b/man/ibv_query_port.3
index 9bedd90..3700a5e 100644
--- a/man/ibv_query_port.3
+++ b/man/ibv_query_port.3
@@ -26,8 +26,8 @@ is an ibv_port_attr struct, as defined in <infiniband/verbs.h>.
 struct ibv_port_attr {
 .in +8
 enum ibv_port_state     state;          /* Logical port state */
-enum ibv_mtu            max_mtu;        /* Max MTU supported by port */
-enum ibv_mtu            active_mtu;     /* Actual MTU */
+ibv_mtu_t               max_mtu;        /* Max MTU supported by port */
+ibv_mtu_t               active_mtu;     /* Actual MTU */
 int                     gid_tbl_len;    /* Length of source GID table */
 uint32_t                port_cap_flags; /* Port capabilities */
 uint32_t                max_msg_sz;     /* Maximum message size */
diff --git a/man/ibv_query_qp.3 b/man/ibv_query_qp.3
index 3893ec8..ad7d9d5 100644
--- a/man/ibv_query_qp.3
+++ b/man/ibv_query_qp.3
@@ -30,7 +30,7 @@ struct ibv_qp_attr {
 .in +8
 enum ibv_qp_state       qp_state;            /* Current QP state */
 enum ibv_qp_state       cur_qp_state;        /* Current QP state - irrelevant for ibv_query_qp */
-enum ibv_mtu            path_mtu;            /* Path MTU (valid only for RC/UC QPs) */
+ibv_mtu_t               path_mtu;            /* Path MTU (valid only for RC/UC QPs) */
 enum ibv_mig_state      path_mig_state;      /* Path migration state (valid if HCA supports APM) */
 uint32_t                qkey;                /* Q_Key of the QP (valid only for UD QPs) */
 uint32_t                rq_psn;              /* PSN for receive queue (valid only for RC/UC QPs) */
-- 
1.8.2.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2013-08-05 22:43 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-02 12:31 [PATCH V2] libibverbs: Allow arbitrary int values for MTU Jeff Squyres
     [not found] ` <1372768306-6786-1-git-send-email-jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
2013-07-04  0:36   ` Jeff Squyres (jsquyres)
2013-07-05 19:11   ` Roland Dreier
     [not found]     ` <CAL1RGDVU9otyMtoit1PJR5JkVy4XvU=4xjL1rN1QB7pq0rVcxA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2013-07-08 16:26       ` Jeff Squyres (jsquyres)
     [not found]         ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F6F31A6-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
2013-07-08 17:19           ` Roland Dreier
     [not found]             ` <CAL1RGDU+4CSJyF0TAfAv-YYcjA4FG+XNeg5pGCHPeW5iqhvwpA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2013-07-08 17:26               ` Jason Gunthorpe
     [not found]                 ` <20130708172621.GA3852-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2013-07-10 12:14                   ` Jeff Squyres (jsquyres)
     [not found]                     ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F70BF83-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
2013-07-15 14:20                       ` Jeff Squyres (jsquyres)
     [not found]                         ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F71CB0F-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
2013-07-30 16:44                           ` Christoph Lameter
     [not found]                             ` <0000014030779fed-e7bdfc9a-6714-4473-b093-4b247ab940b6-000000-p/GC64/jrecnJqMo6gzdpkEOCMrvLtNR@public.gmane.org>
2013-07-30 17:21                               ` Jeff Squyres (jsquyres)
     [not found]                                 ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F7883AD-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
2013-07-30 18:31                                   ` Christoph Lameter
     [not found]                                     ` <0000014030d9bc3d-2916693b-9669-4017-a724-75b51c6dc3dc-000000-p/GC64/jrecnJqMo6gzdpkEOCMrvLtNR@public.gmane.org>
2013-07-30 18:45                                       ` Atchley, Scott
     [not found]                                         ` <17370FDF-C64B-4D74-A848-79C1438E6283-1Heg1YXhbW8@public.gmane.org>
2013-07-30 18:47                                           ` Doug Ledford
2013-07-16  8:04                       ` Hefty, Sean
     [not found]                         ` <1828884A29C6694DAF28B7E6B8A82373805AAB74-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2013-07-16 14:47                           ` Jason Gunthorpe
     [not found]                             ` <20130716144747.GA7304-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2013-07-16 17:11                               ` Jeff Squyres (jsquyres)
     [not found]                                 ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F7211E6-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
2013-07-17  0:16                                   ` Roland Dreier
     [not found]                                     ` <CAL1RGDWKmecLXsqphwdP9XG=4Y=65Z_ACSdRBhEqVQb6Q4LrUg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2013-07-17 17:57                                       ` Doug Ledford
     [not found]                                         ` <51E6DB11.9050906-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2013-08-05 18:48                                           ` Roland Dreier
     [not found]                                             ` <CAL1RGDVuuQU-NZ2o+T0_kuxPMRadGPeqzO1J9iuW=DRuFMMYfQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2013-08-05 19:02                                               ` Jason Gunthorpe
     [not found]                                                 ` <20130805190238.GA14356-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2013-08-05 20:06                                                   ` Roland Dreier
     [not found]                                                     ` <CAL1RGDWPHWzAWx7XXxGGCVQQ1xHU6QDVybrttnvoqUPrzqzvWA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2013-08-05 22:22                                                       ` Jason Gunthorpe
     [not found]                                                         ` <20130805222250.GA4486-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2013-08-05 22:35                                                           ` Roland Dreier
     [not found]                                                             ` <CAL1RGDW5V_N62LSZa1YRG1JA1fOf+nbQwBG1xQ7yYiuF6z0fMw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2013-08-05 22:43                                                               ` Jason Gunthorpe
2013-07-17  4:06                                   ` Hefty, Sean
     [not found]                                     ` <1828884A29C6694DAF28B7E6B8A82373805AECC0-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2013-07-17 18:12                                       ` Jeff Squyres (jsquyres)
     [not found]                                         ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F725B8C-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
2013-07-17 21:41                                           ` Hefty, Sean
     [not found]                                             ` <1828884A29C6694DAF28B7E6B8A82373805B941D-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2013-07-17 21:44                                               ` Steve Wise
     [not found]                                                 ` <51E71023.2060006-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
2013-07-18  2:32                                                   ` Jeff Squyres (jsquyres)
     [not found]                                                     ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F727D11-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
2013-07-18 16:50                                                       ` Jason Gunthorpe
     [not found]                                                         ` <20130718165003.GB9237-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2013-07-23 13:26                                                           ` Jeff Squyres (jsquyres)
     [not found]                                                             ` <EF66BBEB19BADC41AC8CCF5F684F07FC4F749B82-nsZYYkk5h5QQ2GdVW7+PtKBKnGwkPULj@public.gmane.org>
2013-07-30 12:59                                                               ` Jeff Squyres (jsquyres)
  -- strict thread matches above, loose matches on Subject: below --
2013-06-20 14:21 Jeff Squyres
     [not found] ` <1371738080-18537-1-git-send-email-jsquyres-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
2013-06-20 16:34   ` Doug Ledford
     [not found]     ` <51C32EFC.8060202-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2013-06-20 16:42       ` Doug Ledford
2013-06-20 16:53       ` Jason Gunthorpe
     [not found]         ` <20130620165305.GA19800-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2013-06-20 20:31           ` Doug Ledford
     [not found]             ` <51C36692.7000507-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2013-06-20 21:14               ` Jason Gunthorpe
     [not found]                 ` <20130620211454.GA2434-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2013-06-21  0:31                   ` Doug Ledford
     [not found]                     ` <51C39ECB.3040006-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2013-06-21  1:03                       ` Hefty, Sean
2013-06-21  6:36                       ` Jason Gunthorpe
     [not found]                         ` <20130621063648.GA27963-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2013-06-21 17:36                           ` Doug Ledford
     [not found]                             ` <51C48F01.5030103-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2013-06-21 18:26                               ` Jason Gunthorpe
     [not found]                                 ` <20130621182617.GA17700-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2013-06-21 20:57                                   ` Doug Ledford
     [not found]                                     ` <51C4BE25.9000501-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2013-06-21 21:20                                       ` Jason Gunthorpe
     [not found]                                         ` <20130621212016.GA10637-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2013-06-21 21:56                                           ` Hefty, Sean
2013-06-22 13:13                                           ` Jeff Squyres (jsquyres)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.