All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next v3 1/4] net: Rename sock_recv_ts_and_drops() to sock_cmsg_recv()
@ 2015-02-23 17:52 Eyal Birger
  2015-02-23 17:52 ` [PATCH net-next v3 2/4] ipv4,v6: avoid setting skb->priority when skb->reserved_tailroom is in use in igmpv3/mld Eyal Birger
                   ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Eyal Birger @ 2015-02-23 17:52 UTC (permalink / raw)
  To: davem; +Cc: edumazet, shmulik.ladkani, netdev, Eyal Birger

sock_recv_ts_and_drops() - as its name suggests - allows receiving
timestamp information and drop statistics.
Generalize the function name in preparation for adding additional
ancillary information.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
---
 include/net/sock.h           | 10 +++++-----
 net/atm/common.c             |  2 +-
 net/bluetooth/af_bluetooth.c |  4 ++--
 net/can/bcm.c                |  2 +-
 net/can/raw.c                |  2 +-
 net/ieee802154/socket.c      |  4 ++--
 net/ipv4/raw.c               |  2 +-
 net/ipv4/udp.c               |  2 +-
 net/ipv6/raw.c               |  2 +-
 net/ipv6/udp.c               |  2 +-
 net/key/af_key.c             |  2 +-
 net/packet/af_packet.c       |  2 +-
 net/rxrpc/ar-recvmsg.c       |  2 +-
 net/sctp/socket.c            |  2 +-
 net/socket.c                 |  6 +++---
 15 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index ab186b1..83c4529 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2108,11 +2108,11 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 		__sock_recv_wifi_status(msg, sk, skb);
 }
 
-void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
-			      struct sk_buff *skb);
+void __sock_cmsg_recv(struct msghdr *msg, struct sock *sk,
+		      struct sk_buff *skb);
 
-static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
-					  struct sk_buff *skb)
+static inline void sock_cmsg_recv(struct msghdr *msg, struct sock *sk,
+				  struct sk_buff *skb)
 {
 #define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL)			| \
 			   (1UL << SOCK_RCVTSTAMP))
@@ -2120,7 +2120,7 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 			   SOF_TIMESTAMPING_RAW_HARDWARE)
 
 	if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY)
-		__sock_recv_ts_and_drops(msg, sk, skb);
+		__sock_cmsg_recv(msg, sk, skb);
 	else
 		sk->sk_stamp = skb->tstamp;
 }
diff --git a/net/atm/common.c b/net/atm/common.c
index b84057e..611b1c2 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -557,7 +557,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	error = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (error)
 		return error;
-	sock_recv_ts_and_drops(msg, sk, skb);
+	sock_cmsg_recv(msg, sk, skb);
 
 	if (!(flags & MSG_PEEK)) {
 		pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc),
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index ce22e0c..4bdcbc9 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -241,7 +241,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err == 0) {
-		sock_recv_ts_and_drops(msg, sk, skb);
+		sock_cmsg_recv(msg, sk, skb);
 
 		if (bt_sk(sk)->skb_msg_name)
 			bt_sk(sk)->skb_msg_name(skb, msg->msg_name,
@@ -339,7 +339,7 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 		copied += chunk;
 		size   -= chunk;
 
-		sock_recv_ts_and_drops(msg, sk, skb);
+		sock_cmsg_recv(msg, sk, skb);
 
 		if (!(flags & MSG_PEEK)) {
 			int skb_len = skb_headlen(skb);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index ee9ffd9..f741f29 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1559,7 +1559,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return err;
 	}
 
-	sock_recv_ts_and_drops(msg, sk, skb);
+	sock_cmsg_recv(msg, sk, skb);
 
 	if (msg->msg_name) {
 		__sockaddr_check_size(sizeof(struct sockaddr_can));
diff --git a/net/can/raw.c b/net/can/raw.c
index 00c13ef..549b871 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -754,7 +754,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return err;
 	}
 
-	sock_recv_ts_and_drops(msg, sk, skb);
+	sock_cmsg_recv(msg, sk, skb);
 
 	if (msg->msg_name) {
 		__sockaddr_check_size(sizeof(struct sockaddr_can));
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 2878d8c..41ac5fa 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -348,7 +348,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (err)
 		goto done;
 
-	sock_recv_ts_and_drops(msg, sk, skb);
+	sock_cmsg_recv(msg, sk, skb);
 
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
@@ -739,7 +739,7 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (err)
 		goto done;
 
-	sock_recv_ts_and_drops(msg, sk, skb);
+	sock_cmsg_recv(msg, sk, skb);
 
 	if (saddr) {
 		saddr->family = AF_IEEE802154;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index f027a70..a28a83b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -740,7 +740,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (err)
 		goto done;
 
-	sock_recv_ts_and_drops(msg, sk, skb);
+	sock_cmsg_recv(msg, sk, skb);
 
 	/* Copy the address. */
 	if (sin) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 97ef1f8b..07b1394 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1318,7 +1318,7 @@ try_again:
 		UDP_INC_STATS_USER(sock_net(sk),
 				UDP_MIB_INDATAGRAMS, is_udplite);
 
-	sock_recv_ts_and_drops(msg, sk, skb);
+	sock_cmsg_recv(msg, sk, skb);
 
 	/* Copy the address. */
 	if (sin) {
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0d84b2c..e810faa 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -510,7 +510,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 		*addr_len = sizeof(*sin6);
 	}
 
-	sock_recv_ts_and_drops(msg, sk, skb);
+	sock_cmsg_recv(msg, sk, skb);
 
 	if (np->rxopt.all)
 		ip6_datagram_recv_ctl(sk, msg, skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d048d46..be0cfae 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -469,7 +469,7 @@ try_again:
 					UDP_MIB_INDATAGRAMS, is_udplite);
 	}
 
-	sock_recv_ts_and_drops(msg, sk, skb);
+	sock_cmsg_recv(msg, sk, skb);
 
 	/* Copy the address. */
 	if (msg->msg_name) {
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f8ac939..e03ba49 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3658,7 +3658,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
 	if (err)
 		goto out_free;
 
-	sock_recv_ts_and_drops(msg, sk, skb);
+	sock_cmsg_recv(msg, sk, skb);
 
 	err = (flags & MSG_TRUNC) ? skb->len : copied;
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9c28cec..6033520 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2982,7 +2982,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (err)
 		goto out_free;
 
-	sock_recv_ts_and_drops(msg, sk, skb);
+	sock_cmsg_recv(msg, sk, skb);
 
 	if (msg->msg_name) {
 		/* If the address length field is there to be filled
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 4575485..323b1c7 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -150,7 +150,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 				       &call->conn->trans->peer->srx, len);
 				msg->msg_namelen = len;
 			}
-			sock_recv_ts_and_drops(msg, &rx->sk, skb);
+			sock_cmsg_recv(msg, &rx->sk, skb);
 		}
 
 		/* receive the message */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index aafe94b..27835aa 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2108,7 +2108,7 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (err)
 		goto out_free;
 
-	sock_recv_ts_and_drops(msg, sk, skb);
+	sock_cmsg_recv(msg, sk, skb);
 	if (sctp_ulpevent_is_notification(event)) {
 		msg->msg_flags |= MSG_NOTIFICATION;
 		sp->pf->event_msgname(event, msg->msg_name, addr_len);
diff --git a/net/socket.c b/net/socket.c
index bbedbfc..17be338 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -736,13 +736,13 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
 			sizeof(__u32), &skb->dropcount);
 }
 
-void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
-	struct sk_buff *skb)
+void __sock_cmsg_recv(struct msghdr *msg, struct sock *sk,
+		      struct sk_buff *skb)
 {
 	sock_recv_timestamp(msg, sk, skb);
 	sock_recv_drops(msg, sk, skb);
 }
-EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
+EXPORT_SYMBOL_GPL(__sock_cmsg_recv);
 
 static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
 				       struct msghdr *msg, size_t size, int flags)
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH net-next v3 2/4] ipv4,v6: avoid setting skb->priority when skb->reserved_tailroom is in use in igmpv3/mld
  2015-02-23 17:52 [PATCH net-next v3 1/4] net: Rename sock_recv_ts_and_drops() to sock_cmsg_recv() Eyal Birger
@ 2015-02-23 17:52 ` Eyal Birger
  2015-02-23 17:52 ` [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark Eyal Birger
  2015-02-23 17:52 ` [PATCH net-next v3 4/4] net: Introducing socket mark receive socket option Eyal Birger
  2 siblings, 0 replies; 21+ messages in thread
From: Eyal Birger @ 2015-02-23 17:52 UTC (permalink / raw)
  To: davem; +Cc: edumazet, shmulik.ladkani, netdev, Eyal Birger

IGMPv3/MLD packets are sent with TC_PRIO_CONTROL.
skb->priority is set immediately after skb allocation.

skb->reserved_tailroom is also set after allocation and is
used by the IGMPv3/MLD code prior to transmittion.

In order to be able to alias skb->priority and
skb->reserved_tailroom in struct sk_buff, the two fields
must not be used at the same time.

The skb->priority value is not required before the packet
is sent down the IP stack. Therefore, move setting
skb->priority prior to transmittion.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
---
 net/ipv4/igmp.c  | 3 ++-
 net/ipv6/mcast.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4b1172d..f28dc9c 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -339,7 +339,6 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
 		if (size < 256)
 			return NULL;
 	}
-	skb->priority = TC_PRIO_CONTROL;
 
 	rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0,
 				   0, 0,
@@ -391,6 +390,8 @@ static int igmpv3_sendpack(struct sk_buff *skb)
 	struct igmphdr *pig = igmp_hdr(skb);
 	const int igmplen = skb_tail_pointer(skb) - skb_transport_header(skb);
 
+	skb->priority = TC_PRIO_CONTROL;
+
 	pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
 
 	return ip_local_out(skb);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ce107c..67e6c0a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1575,7 +1575,6 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
 	if (!skb)
 		return NULL;
 
-	skb->priority = TC_PRIO_CONTROL;
 	skb->reserved_tailroom = skb_end_offset(skb) -
 				 min(mtu, skb_end_offset(skb));
 	skb_reserve(skb, hlen);
@@ -1616,6 +1615,8 @@ static void mld_sendpack(struct sk_buff *skb)
 	struct flowi6 fl6;
 	struct dst_entry *dst;
 
+	skb->priority = TC_PRIO_CONTROL;
+
 	rcu_read_lock();
 	idev = __in6_dev_get(skb->dev);
 	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-23 17:52 [PATCH net-next v3 1/4] net: Rename sock_recv_ts_and_drops() to sock_cmsg_recv() Eyal Birger
  2015-02-23 17:52 ` [PATCH net-next v3 2/4] ipv4,v6: avoid setting skb->priority when skb->reserved_tailroom is in use in igmpv3/mld Eyal Birger
@ 2015-02-23 17:52 ` Eyal Birger
  2015-02-23 18:56   ` Shmulik Ladkani
  2015-02-23 17:52 ` [PATCH net-next v3 4/4] net: Introducing socket mark receive socket option Eyal Birger
  2 siblings, 1 reply; 21+ messages in thread
From: Eyal Birger @ 2015-02-23 17:52 UTC (permalink / raw)
  To: davem; +Cc: edumazet, shmulik.ladkani, netdev, Eyal Birger

Commit 977750076d98 ("af_packet: add interframe drop cmsg (v6)")
unionized skb->mark and skb->dropcount in order to allow recording
of the socket drop count while maintaining struct sk_buff size.

In order to allow for the skb->mark to be fetched by user-space code
it needs to be detached from this union; skb->priority is used
instead.

The purpose of overloading skb->priority is solely for retaining
struct sk_buff size; skb->priority is not used after the skb is
queued to the socket.

Use of aliased fields, skb->dropcount and skb->reserved_tailroom,
is either done after the skb is cloned (e.g. in packet_rcv()) or
before skb->priority is used (e.g. in IGMPv3/MLD/TCP).

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
---
 include/linux/skbuff.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d898b32..23e77cb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -625,7 +625,7 @@ struct sk_buff {
 			__u16	csum_offset;
 		};
 	};
-	__u32			priority;
+	__u32			mark;
 	int			skb_iif;
 	__u32			hash;
 	__be16			vlan_proto;
@@ -640,7 +640,7 @@ struct sk_buff {
 	__u32			secmark;
 #endif
 	union {
-		__u32		mark;
+		__u32		priority;
 		__u32		dropcount;
 		__u32		reserved_tailroom;
 	};
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH net-next v3 4/4] net: Introducing socket mark receive socket option
  2015-02-23 17:52 [PATCH net-next v3 1/4] net: Rename sock_recv_ts_and_drops() to sock_cmsg_recv() Eyal Birger
  2015-02-23 17:52 ` [PATCH net-next v3 2/4] ipv4,v6: avoid setting skb->priority when skb->reserved_tailroom is in use in igmpv3/mld Eyal Birger
  2015-02-23 17:52 ` [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark Eyal Birger
@ 2015-02-23 17:52 ` Eyal Birger
  2 siblings, 0 replies; 21+ messages in thread
From: Eyal Birger @ 2015-02-23 17:52 UTC (permalink / raw)
  To: davem; +Cc: edumazet, shmulik.ladkani, netdev, Eyal Birger

A userspace program may wish to receive the mark of packets it
receives.

Packets may be marked by Netfilter, by other userspace applications
using the SO_MARK socket option, or by other kernel means.

Receiving the mark in userspace is useful for example for
distinguishing between different TPROXY diversion rules to the same
userspace proxy socket.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
---
 arch/alpha/include/uapi/asm/socket.h   | 2 ++
 arch/avr32/include/uapi/asm/socket.h   | 2 ++
 arch/cris/include/uapi/asm/socket.h    | 2 ++
 arch/frv/include/uapi/asm/socket.h     | 2 ++
 arch/ia64/include/uapi/asm/socket.h    | 2 ++
 arch/m32r/include/uapi/asm/socket.h    | 2 ++
 arch/mips/include/uapi/asm/socket.h    | 2 ++
 arch/mn10300/include/uapi/asm/socket.h | 2 ++
 arch/parisc/include/uapi/asm/socket.h  | 2 ++
 arch/powerpc/include/uapi/asm/socket.h | 2 ++
 arch/s390/include/uapi/asm/socket.h    | 2 ++
 arch/sparc/include/uapi/asm/socket.h   | 2 ++
 arch/xtensa/include/uapi/asm/socket.h  | 2 ++
 include/net/sock.h                     | 8 +++++---
 include/uapi/asm-generic/socket.h      | 2 ++
 net/core/sock.c                        | 8 ++++++++
 net/socket.c                           | 9 +++++++++
 17 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 9a20821..7c49f4b 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -92,4 +92,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		51
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 2b65ed6..6c81e0e 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -85,4 +85,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		51
+
 #endif /* _UAPI__ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
index e2503d9f..55ca3be 100644
--- a/arch/cris/include/uapi/asm/socket.h
+++ b/arch/cris/include/uapi/asm/socket.h
@@ -87,6 +87,8 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		51
+
 #endif /* _ASM_SOCKET_H */
 
 
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index 4823ad1..b69f0d5 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -85,5 +85,7 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		51
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 59be3d8..ee6abaf 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -94,4 +94,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		51
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 7bc4cb2..bf0f932 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -85,4 +85,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		51
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index dec3c85..47a61c4 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -103,4 +103,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		51
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index cab7d6d..216305b 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -85,4 +85,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		51
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index a5cd40c..047980a3 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -84,4 +84,6 @@
 #define SO_ATTACH_BPF		0x402B
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		0x402C
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index c046666..188abb6 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -92,4 +92,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		51
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 296942d..70a5791 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -91,4 +91,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		51
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index e6a16c4..f7ee249 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -81,6 +81,8 @@
 #define SO_ATTACH_BPF		0x0034
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		0x0035
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 4120af0..5b588fa 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -96,4 +96,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		51
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 83c4529..e71495f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -719,6 +719,7 @@ enum sock_flags {
 		     */
 	SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
 	SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
+	SOCK_RCVMARK,
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
@@ -2114,12 +2115,13 @@ void __sock_cmsg_recv(struct msghdr *msg, struct sock *sk,
 static inline void sock_cmsg_recv(struct msghdr *msg, struct sock *sk,
 				  struct sk_buff *skb)
 {
-#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL)			| \
-			   (1UL << SOCK_RCVTSTAMP))
+#define FLAGS_CMSG_ANY	  ((1UL << SOCK_RXQ_OVFL)			| \
+			   (1UL << SOCK_RCVTSTAMP)			| \
+			   (1UL << SOCK_RCVMARK))
 #define TSFLAGS_ANY	  (SOF_TIMESTAMPING_SOFTWARE			| \
 			   SOF_TIMESTAMPING_RAW_HARDWARE)
 
-	if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY)
+	if (sk->sk_flags & FLAGS_CMSG_ANY || sk->sk_tsflags & TSFLAGS_ANY)
 		__sock_cmsg_recv(msg, sk, skb);
 	else
 		sk->sk_stamp = skb->tstamp;
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 5c15c2a..17a1e2e 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -87,4 +87,6 @@
 #define SO_ATTACH_BPF		50
 #define SO_DETACH_BPF		SO_DETACH_FILTER
 
+#define SO_RCVMARK		51
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 93c8b20..36fe687 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -928,6 +928,10 @@ set_rcvbuf:
 			sk->sk_mark = val;
 		break;
 
+	case SO_RCVMARK:
+		sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
+		break;
+
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
 	case SO_RXQ_OVFL:
@@ -1179,6 +1183,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_mark;
 		break;
 
+	case SO_RCVMARK:
+		v.val = sock_flag(sk, SOCK_RCVMARK);
+		break;
+
 	case SO_RXQ_OVFL:
 		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
 		break;
diff --git a/net/socket.c b/net/socket.c
index 17be338..d4a860b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -736,11 +736,20 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
 			sizeof(__u32), &skb->dropcount);
 }
 
+static inline void sock_recv_mark(struct msghdr *msg, struct sock *sk,
+				  struct sk_buff *skb)
+{
+	if (sock_flag(sk, SOCK_RCVMARK) && skb)
+		put_cmsg(msg, SOL_SOCKET, SO_RCVMARK,
+			 sizeof(__u32), &skb->mark);
+}
+
 void __sock_cmsg_recv(struct msghdr *msg, struct sock *sk,
 		      struct sk_buff *skb)
 {
 	sock_recv_timestamp(msg, sk, skb);
 	sock_recv_drops(msg, sk, skb);
+	sock_recv_mark(msg, sk, skb);
 }
 EXPORT_SYMBOL_GPL(__sock_cmsg_recv);
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-23 17:52 ` [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark Eyal Birger
@ 2015-02-23 18:56   ` Shmulik Ladkani
  2015-02-23 19:10     ` Eric Dumazet
  2015-02-23 21:48     ` David Miller
  0 siblings, 2 replies; 21+ messages in thread
From: Shmulik Ladkani @ 2015-02-23 18:56 UTC (permalink / raw)
  To: Eyal Birger, davem; +Cc: edumazet, netdev

Hi,

On Mon, 23 Feb 2015 19:52:03 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
> Commit 977750076d98 ("af_packet: add interframe drop cmsg (v6)")
> unionized skb->mark and skb->dropcount in order to allow recording
> of the socket drop count while maintaining struct sk_buff size.
> 
> In order to allow for the skb->mark to be fetched by user-space code
> it needs to be detached from this union; skb->priority is used
> instead.
> 
> The purpose of overloading skb->priority is solely for retaining
> struct sk_buff size; skb->priority is not used after the skb is
> queued to the socket.
> 
> Use of aliased fields, skb->dropcount and skb->reserved_tailroom,
> is either done after the skb is cloned (e.g. in packet_rcv()) or
> before skb->priority is used (e.g. in IGMPv3/MLD/TCP).

Preserving a small skb size is an important cause.

In this case however, it seems that aliasing 'priority' sacrifices
maintainability: 'priority' does not seem "naturally orthogonal" to
'drop_count' or 'reserved_tailroom'.

One might arm 'priority' in various code flows (future and existing),
which may accidentally clash with the aliases. It is unexpected, hard to
remember (especially since 'priority' wasn't formerly aliased), and
sometimes it's simply not trivial to assure there's no clash.

May I suggest to unalias 'mark' out of the union?

Regards,
Shmulik

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-23 18:56   ` Shmulik Ladkani
@ 2015-02-23 19:10     ` Eric Dumazet
  2015-02-23 19:25       ` Eyal Birger
  2015-02-23 21:48     ` David Miller
  1 sibling, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2015-02-23 19:10 UTC (permalink / raw)
  To: Shmulik Ladkani; +Cc: Eyal Birger, davem, edumazet, netdev

On Mon, 2015-02-23 at 20:56 +0200, Shmulik Ladkani wrote:
> Hi,
> 
> On Mon, 23 Feb 2015 19:52:03 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
> > Commit 977750076d98 ("af_packet: add interframe drop cmsg (v6)")
> > unionized skb->mark and skb->dropcount in order to allow recording
> > of the socket drop count while maintaining struct sk_buff size.
> > 
> > In order to allow for the skb->mark to be fetched by user-space code
> > it needs to be detached from this union; skb->priority is used
> > instead.
> > 
> > The purpose of overloading skb->priority is solely for retaining
> > struct sk_buff size; skb->priority is not used after the skb is
> > queued to the socket.
> > 
> > Use of aliased fields, skb->dropcount and skb->reserved_tailroom,
> > is either done after the skb is cloned (e.g. in packet_rcv()) or
> > before skb->priority is used (e.g. in IGMPv3/MLD/TCP).
> 
> Preserving a small skb size is an important cause.
> 
> In this case however, it seems that aliasing 'priority' sacrifices
> maintainability: 'priority' does not seem "naturally orthogonal" to
> 'drop_count' or 'reserved_tailroom'.
> 
> One might arm 'priority' in various code flows (future and existing),
> which may accidentally clash with the aliases. It is unexpected, hard to
> remember (especially since 'priority' wasn't formerly aliased), and
> sometimes it's simply not trivial to assure there's no clash.
> 
> May I suggest to unalias 'mark' out of the union?

My suggestion would be to move dropcount in skb->cb[] instead.

mark & reserved_tailroom can stay in the union.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-23 19:10     ` Eric Dumazet
@ 2015-02-23 19:25       ` Eyal Birger
  2015-02-23 19:51         ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Eyal Birger @ 2015-02-23 19:25 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Shmulik Ladkani, David Miller, Eric Dumazet, netdev

On Mon, Feb 23, 2015 at 9:10 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Mon, 2015-02-23 at 20:56 +0200, Shmulik Ladkani wrote:
>> Hi,
>>
>> On Mon, 23 Feb 2015 19:52:03 +0200 Eyal Birger <eyal.birger@gmail.com> wrote:
>> > Commit 977750076d98 ("af_packet: add interframe drop cmsg (v6)")
>> > unionized skb->mark and skb->dropcount in order to allow recording
>> > of the socket drop count while maintaining struct sk_buff size.
>> >
>> > In order to allow for the skb->mark to be fetched by user-space code
>> > it needs to be detached from this union; skb->priority is used
>> > instead.
>> >
>> > The purpose of overloading skb->priority is solely for retaining
>> > struct sk_buff size; skb->priority is not used after the skb is
>> > queued to the socket.
>> >
>> > Use of aliased fields, skb->dropcount and skb->reserved_tailroom,
>> > is either done after the skb is cloned (e.g. in packet_rcv()) or
>> > before skb->priority is used (e.g. in IGMPv3/MLD/TCP).
>>
>> Preserving a small skb size is an important cause.
>>
>> In this case however, it seems that aliasing 'priority' sacrifices
>> maintainability: 'priority' does not seem "naturally orthogonal" to
>> 'drop_count' or 'reserved_tailroom'.
>>
>> One might arm 'priority' in various code flows (future and existing),
>> which may accidentally clash with the aliases. It is unexpected, hard to
>> remember (especially since 'priority' wasn't formerly aliased), and
>> sometimes it's simply not trivial to assure there's no clash.
>>
>> May I suggest to unalias 'mark' out of the union?
>
> My suggestion would be to move dropcount in skb->cb[] instead.
>
> mark & reserved_tailroom can stay in the union.
>
>

The original commit introducing dropcount for af_packet
(see http://marc.info/?l=linux-netdev&m=125450261121971) also claimed
the proper location would in fact be skb->cb[]. However, it was also
claimed that
skb->cb[] is all used up.

Things are even more complicated now as dropcount became a socket
level feature and skb->cb[] is handled differently in each protocol family.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-23 19:25       ` Eyal Birger
@ 2015-02-23 19:51         ` Eric Dumazet
  0 siblings, 0 replies; 21+ messages in thread
From: Eric Dumazet @ 2015-02-23 19:51 UTC (permalink / raw)
  To: Eyal Birger; +Cc: Shmulik Ladkani, David Miller, Eric Dumazet, netdev

On Mon, 2015-02-23 at 21:25 +0200, Eyal Birger wrote:

> The original commit introducing dropcount for af_packet
> (see http://marc.info/?l=linux-netdev&m=125450261121971) also claimed
> the proper location would in fact be skb->cb[]. However, it was also
> claimed that
> skb->cb[] is all used up.
> 
> Things are even more complicated now as dropcount became a socket
> level feature and skb->cb[] is handled differently in each protocol family.

I did a check, and all current sock_recv_ts_and_drops() users have room
in skb->cb[] to store dropcount, say at the beginning of the array.

I have no big opinion here, but intuitively dropcount has a very short
lifetime and skb->cb[] seems appropriate.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-23 18:56   ` Shmulik Ladkani
  2015-02-23 19:10     ` Eric Dumazet
@ 2015-02-23 21:48     ` David Miller
  2015-02-24  6:58       ` Eyal Birger
  1 sibling, 1 reply; 21+ messages in thread
From: David Miller @ 2015-02-23 21:48 UTC (permalink / raw)
  To: shmulik.ladkani; +Cc: eyal.birger, edumazet, netdev

From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Date: Mon, 23 Feb 2015 20:56:33 +0200

> May I suggest to unalias 'mark' out of the union?

Only in exchange for finding a way to remove completely another object
of the same size from sk_buff.

I refuse to let it grow any more, it's already way too big.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-23 21:48     ` David Miller
@ 2015-02-24  6:58       ` Eyal Birger
  2015-02-24 10:10         ` Eyal Birger
  0 siblings, 1 reply; 21+ messages in thread
From: Eyal Birger @ 2015-02-24  6:58 UTC (permalink / raw)
  To: David Miller; +Cc: Shmulik Ladkani, Eric Dumazet, netdev

On Mon, Feb 23, 2015 at 11:48 PM, David Miller <davem@davemloft.net> wrote:
> From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
> Date: Mon, 23 Feb 2015 20:56:33 +0200
>
>> May I suggest to unalias 'mark' out of the union?
>
> Only in exchange for finding a way to remove completely another object
> of the same size from sk_buff.
>
> I refuse to let it grow any more, it's already way too big.

I agree that aliasing skb->priority with skb->dropcount and
skb->reserved_tailroom is
less than optimal.

I can suggest an alternative patch following Eric Dumazet's suggestion in which
skb->dropcount is removed and skb->cb[] is used instead for keeping dropcount.

That would require protocol family implementations to access skb->cb[]
via a common
accessor or use a common offset #define from the start of skb->cb[].

Eyal.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-24  6:58       ` Eyal Birger
@ 2015-02-24 10:10         ` Eyal Birger
  2015-02-24 13:18           ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Eyal Birger @ 2015-02-24 10:10 UTC (permalink / raw)
  To: David Miller; +Cc: Shmulik Ladkani, Eric Dumazet, netdev

On Tue, Feb 24, 2015 at 8:58 AM, Eyal Birger <eyal.birger@gmail.com> wrote:
> On Mon, Feb 23, 2015 at 11:48 PM, David Miller <davem@davemloft.net> wrote:
>> From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
>> Date: Mon, 23 Feb 2015 20:56:33 +0200
>>
>>> May I suggest to unalias 'mark' out of the union?
>>
>> Only in exchange for finding a way to remove completely another object
>> of the same size from sk_buff.
>>
>> I refuse to let it grow any more, it's already way too big.
>
> I agree that aliasing skb->priority with skb->dropcount and
> skb->reserved_tailroom is
> less than optimal.
>
> I can suggest an alternative patch following Eric Dumazet's suggestion in which
> skb->dropcount is removed and skb->cb[] is used instead for keeping dropcount.
>
> That would require protocol family implementations to access skb->cb[]
> via a common
> accessor or use a common offset #define from the start of skb->cb[].
>

Well, gave it a shot... it looks like several protocol families
(packet, rxrpc, bluetooth) do not
have room in skb->cb[] for the dropcount - at least on my 64 bit machine.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-24 10:10         ` Eyal Birger
@ 2015-02-24 13:18           ` Eric Dumazet
  2015-02-24 14:07             ` Eyal Birger
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2015-02-24 13:18 UTC (permalink / raw)
  To: Eyal Birger; +Cc: David Miller, Shmulik Ladkani, Eric Dumazet, netdev

On Tue, 2015-02-24 at 12:10 +0200, Eyal Birger wrote:

> 
> Well, gave it a shot... it looks like several protocol families
> (packet, rxrpc, bluetooth) do not
> have room in skb->cb[] for the dropcount - at least on my 64 bit machine.

No idea how you took a look ?

sizeof(struct packet_skb_cb) == 24  : We have plenty of room ?


bluetooth : Whole struct rxrpc_skb_priv is not used when packet is
stored in receive queue.

We only need bt_cb(skb)->psm & bt_cb(skb)->bdaddr according to
l2cap_skb_msg_name()

An union will be possible.

rxpc is buggy right now anyway, as it reads skb->mark _and_ uses
sock_recv_ts_and_drops(), so skb->mark value is pretty much void.

Note that resend_at field could probably converted into u32. tcp stack
uses same (u32)jiffies trick (tcp_time_stamp)


I never said it was going to be easy ;)

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-24 13:18           ` Eric Dumazet
@ 2015-02-24 14:07             ` Eyal Birger
  2015-02-24 14:25               ` Eric Dumazet
                                 ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Eyal Birger @ 2015-02-24 14:07 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Shmulik Ladkani, Eric Dumazet, netdev

Hi,

On Tue, Feb 24, 2015 at 3:18 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2015-02-24 at 12:10 +0200, Eyal Birger wrote:
>
>>
>> Well, gave it a shot... it looks like several protocol families
>> (packet, rxrpc, bluetooth) do not
>> have room in skb->cb[] for the dropcount - at least on my 64 bit machine.
>
> No idea how you took a look ?
>
> sizeof(struct packet_skb_cb) == 24  : We have plenty of room ?
>

Well, I took a look, then I tried :)

It breaks in packet_rcv() in a BUILD_BUG_ON() assertion. The asserted size is:

sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8

This was introduced in ffbc61117d32dc4e768 ("[PACKET]: Fix skb->cb
clobbering between aux and sockaddr")
It requires the ability to store the maximal possible address length (32).

>
> bluetooth : Whole struct rxrpc_skb_priv is not used when packet is
> stored in receive queue.
>
> We only need bt_cb(skb)->psm & bt_cb(skb)->bdaddr according to
> l2cap_skb_msg_name()
>
> An union will be possible.
>

I can look into that.

> rxpc is buggy right now anyway, as it reads skb->mark _and_ uses
> sock_recv_ts_and_drops(), so skb->mark value is pretty much void.
>

Yes, but it does not seem to use sock_queue_rcv_skb() which sets skb->dropcount.
BTW, aliasing priority instead of mark would allow someone to refactor rxrpc to
use sock_queue_rcv_skb() if desired.

> Note that resend_at field could probably converted into u32. tcp stack
> uses same (u32)jiffies trick (tcp_time_stamp)
>
>
> I never said it was going to be easy ;)
>

I don't mind :) though I wonder whether the added complexity in
partitioning skb->cb[]
between the protocol families and the socket layer is worth it.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-24 14:07             ` Eyal Birger
@ 2015-02-24 14:25               ` Eric Dumazet
  2015-02-24 14:41                 ` Eyal Birger
  2015-02-24 14:41               ` Eric Dumazet
  2015-02-25 14:10               ` Eyal Birger
  2 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2015-02-24 14:25 UTC (permalink / raw)
  To: Eyal Birger; +Cc: David Miller, Shmulik Ladkani, Eric Dumazet, netdev

On Tue, 2015-02-24 at 16:07 +0200, Eyal Birger wrote:

> > rxpc is buggy right now anyway, as it reads skb->mark _and_ uses
> > sock_recv_ts_and_drops(), so skb->mark value is pretty much void.
> >
> 
> Yes, but it does not seem to use sock_queue_rcv_skb() which sets skb->dropcount.

Well, SO_RXQ_OVFL is broken for these sockets, as we'll report a
dropcount if skb->mark was set.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-24 14:07             ` Eyal Birger
  2015-02-24 14:25               ` Eric Dumazet
@ 2015-02-24 14:41               ` Eric Dumazet
  2015-02-24 15:24                 ` Eyal Birger
  2015-02-25 14:10               ` Eyal Birger
  2 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2015-02-24 14:41 UTC (permalink / raw)
  To: Eyal Birger; +Cc: David Miller, Shmulik Ladkani, Eric Dumazet, netdev

On Tue, 2015-02-24 at 16:07 +0200, Eyal Birger wrote:

> Well, I took a look, then I tried :)
> 
> It breaks in packet_rcv() in a BUILD_BUG_ON() assertion. The asserted size is:
> 
> sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8
> 
> This was introduced in ffbc61117d32dc4e768 ("[PACKET]: Fix skb->cb
> clobbering between aux and sockaddr")
> It requires the ability to store the maximal possible address length (32).

OK this might require to move origlen elsewhere in skb.

skb->dev for example is set to NULL, same for skb dst.

We have to be a bit creative/hacky to keep skb size small.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-24 14:25               ` Eric Dumazet
@ 2015-02-24 14:41                 ` Eyal Birger
  0 siblings, 0 replies; 21+ messages in thread
From: Eyal Birger @ 2015-02-24 14:41 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Shmulik Ladkani, Eric Dumazet, netdev



> On 24 Feb 2015, at 16:25, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
> On Tue, 2015-02-24 at 16:07 +0200, Eyal Birger wrote:
> 
>>> rxpc is buggy right now anyway, as it reads skb->mark _and_ uses
>>> sock_recv_ts_and_drops(), so skb->mark value is pretty much void.
>> 
>> Yes, but it does not seem to use sock_queue_rcv_skb() which sets skb->dropcount.
> 
> Well, SO_RXQ_OVFL is broken for these sockets, as we'll report a
> dropcount if skb->mark was set.
> 
> 
Right. That feature seems broken.

Fixing it would mean either 
avoiding the use of skb->mark in 
rxrpc or aliasing dropcount with 
something else.

I am also not sure what should be the
semantics of SO_RCVMARK on these
sockets.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-24 14:41               ` Eric Dumazet
@ 2015-02-24 15:24                 ` Eyal Birger
  2015-02-25 20:49                   ` Willem de Bruijn
  0 siblings, 1 reply; 21+ messages in thread
From: Eyal Birger @ 2015-02-24 15:24 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Shmulik Ladkani, Eric Dumazet, netdev

On Tue, Feb 24, 2015 at 4:41 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2015-02-24 at 16:07 +0200, Eyal Birger wrote:
>
>> Well, I took a look, then I tried :)
>>
>> It breaks in packet_rcv() in a BUILD_BUG_ON() assertion. The asserted size is:
>>
>> sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8
>>
>> This was introduced in ffbc61117d32dc4e768 ("[PACKET]: Fix skb->cb
>> clobbering between aux and sockaddr")
>> It requires the ability to store the maximal possible address length (32).
>
> OK this might require to move origlen elsewhere in skb.
>
> skb->dev for example is set to NULL, same for skb dst.
>
> We have to be a bit creative/hacky to keep skb size small.
>

I agree. I don't think these feature deserve an skb size increase, so
some hackery is required.

Though, as skb->cb[] is somewhat 'owned' by the protocol families on
socket enqueue
I tend to find aliasing skb->priority with skb->dropcount "a lesser
evil" as compared with
partitioning the skb->cb[] space.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-24 14:07             ` Eyal Birger
  2015-02-24 14:25               ` Eric Dumazet
  2015-02-24 14:41               ` Eric Dumazet
@ 2015-02-25 14:10               ` Eyal Birger
  2015-02-25 16:50                 ` Eric Dumazet
  2 siblings, 1 reply; 21+ messages in thread
From: Eyal Birger @ 2015-02-25 14:10 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Shmulik Ladkani, Eric Dumazet, netdev

>>
>> bluetooth : Whole struct rxrpc_skb_priv is not used when packet is
>> stored in receive queue.
>>
>> We only need bt_cb(skb)->psm & bt_cb(skb)->bdaddr according to
>> l2cap_skb_msg_name()
>>
>> An union will be possible.
>>
>
> I can look into that.

Another approach is to get rid struct_req_ctrl defintion -
It is never used as a separate object outside of struct bt_skb_cb and inlining
it saves 8 bytes on a 64 bit machine, i.e.:

 typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status,
u16 opcode);

-struct hci_req_ctrl {
-       bool                    start;
-       u8                      event;
-       hci_req_complete_t      complete;
-};
-
 struct bt_skb_cb {
        __u8 pkt_type;
        __u8 incoming;
        __u16 opcode;
        __u16 expect;
        __u8 force_active;
+       bool req_start;
+       u8 req_event;
+       hci_req_complete_t req_complete;
        struct l2cap_chan *chan;
        struct l2cap_ctrl control;
-       struct hci_req_ctrl req;
        bdaddr_t bdaddr;
        __le16 psm;
 };

Would you prefer this approach?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-25 14:10               ` Eyal Birger
@ 2015-02-25 16:50                 ` Eric Dumazet
  0 siblings, 0 replies; 21+ messages in thread
From: Eric Dumazet @ 2015-02-25 16:50 UTC (permalink / raw)
  To: Eyal Birger; +Cc: David Miller, Shmulik Ladkani, Eric Dumazet, netdev

On Wed, 2015-02-25 at 16:10 +0200, Eyal Birger wrote:

> Would you prefer this approach?

This looks fine to me.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-24 15:24                 ` Eyal Birger
@ 2015-02-25 20:49                   ` Willem de Bruijn
  2015-02-25 21:15                     ` Eyal Birger
  0 siblings, 1 reply; 21+ messages in thread
From: Willem de Bruijn @ 2015-02-25 20:49 UTC (permalink / raw)
  To: Eyal Birger
  Cc: Eric Dumazet, David Miller, Shmulik Ladkani, Eric Dumazet,
	Network Development

>> We have to be a bit creative/hacky to keep skb size small.
>>
>
> I agree. I don't think these feature deserve an skb size increase, so
> some hackery is required.
>
> Though, as skb->cb[] is somewhat 'owned' by the protocol families on
> socket enqueue
> I tend to find aliasing skb->priority with skb->dropcount "a lesser
> evil" as compared with
> partitioning the skb->cb[] space.

Please consider another field than priority. There are use
cases that want to receive priority, similar to reading mark.

Indeed, I have a draft packet auxdata patch for one user who wants
to see the iptables changes (and infer tc effects) on egress packets.
Fwiw, that patch also exposes mark, and runs into exactly this issue
that you're fixing, so thanks :)

Fields that are consistently NULL on enqueue, such as the dev field
that Eric proposed, do not have this drawback.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark
  2015-02-25 20:49                   ` Willem de Bruijn
@ 2015-02-25 21:15                     ` Eyal Birger
  0 siblings, 0 replies; 21+ messages in thread
From: Eyal Birger @ 2015-02-25 21:15 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Eric Dumazet, David Miller, Shmulik Ladkani, Eric Dumazet,
	Network Development

On Wed, Feb 25, 2015 at 10:49 PM, Willem de Bruijn <willemb@google.com> wrote:
>>> We have to be a bit creative/hacky to keep skb size small.
>>>
>>
>> I agree. I don't think these feature deserve an skb size increase, so
>> some hackery is required.
>>
>> Though, as skb->cb[] is somewhat 'owned' by the protocol families on
>> socket enqueue
>> I tend to find aliasing skb->priority with skb->dropcount "a lesser
>> evil" as compared with
>> partitioning the skb->cb[] space.
>
> Please consider another field than priority. There are use
> cases that want to receive priority, similar to reading mark.
>
> Indeed, I have a draft packet auxdata patch for one user who wants
> to see the iptables changes (and infer tc effects) on egress packets.
> Fwiw, that patch also exposes mark, and runs into exactly this issue
> that you're fixing, so thanks :)
>
> Fields that are consistently NULL on enqueue, such as the dev field
> that Eric proposed, do not have this drawback.

Thanks! I was wondering whether someone would want to fetch skb->priority.

I had already conceded that aliasing skb->priority won't hold.

I plan to submit a patch series for moving skb->dropcount to skb->cb[].
Following that I will re-submit the patch for fetching skb->mark in auxdata.

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2015-02-25 21:15 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-23 17:52 [PATCH net-next v3 1/4] net: Rename sock_recv_ts_and_drops() to sock_cmsg_recv() Eyal Birger
2015-02-23 17:52 ` [PATCH net-next v3 2/4] ipv4,v6: avoid setting skb->priority when skb->reserved_tailroom is in use in igmpv3/mld Eyal Birger
2015-02-23 17:52 ` [PATCH net-next v3 3/4] net: use skb->priority for overloading skb->dropcount and skb->reserved_tailroom instead of skb->mark Eyal Birger
2015-02-23 18:56   ` Shmulik Ladkani
2015-02-23 19:10     ` Eric Dumazet
2015-02-23 19:25       ` Eyal Birger
2015-02-23 19:51         ` Eric Dumazet
2015-02-23 21:48     ` David Miller
2015-02-24  6:58       ` Eyal Birger
2015-02-24 10:10         ` Eyal Birger
2015-02-24 13:18           ` Eric Dumazet
2015-02-24 14:07             ` Eyal Birger
2015-02-24 14:25               ` Eric Dumazet
2015-02-24 14:41                 ` Eyal Birger
2015-02-24 14:41               ` Eric Dumazet
2015-02-24 15:24                 ` Eyal Birger
2015-02-25 20:49                   ` Willem de Bruijn
2015-02-25 21:15                     ` Eyal Birger
2015-02-25 14:10               ` Eyal Birger
2015-02-25 16:50                 ` Eric Dumazet
2015-02-23 17:52 ` [PATCH net-next v3 4/4] net: Introducing socket mark receive socket option Eyal Birger

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.