All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/3] TCP connection repair (v3)
@ 2012-03-28 15:36 Pavel Emelyanov
  2012-03-28 15:36 ` [PATCH 1/3] tcp: Move code around Pavel Emelyanov
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Pavel Emelyanov @ 2012-03-28 15:36 UTC (permalink / raw)
  To: Linux Netdev List, David Miller

Hi!

Attempt #3 with transparent TCP connection hijacking
(previous one is here http://lists.openwall.net/netdev/2012/03/06/65).


Changes since v2:

* The CAP_NET_ADMIN is required to turn repair on, not CAP_SYS_ADMIN

* Changed read queue seq sockoption to work on the rcv_nxt, not the
  copied_seq to address the issue with syn flag in the fake header
  (see below).

* Resolved issues with syn and fin flags in fake headers.

  Fin can and should be dropped. The repair mode is currently allowed
  only for closed and established sockets and thus we cannot meet an 
  skb with this flag in the original socket (queuing fin to receive
  queue switches the established state to the close-wait one).

  Syn can also be dropped. This flag in the recv queue's skb means the
  respective skb's seq is off-by-one relative to the actual amount of 
  data on it. Thus, removing the flag from fake skb and fixing the seq 
  respectively solves the issue.

  However, in order to do so it's not enough to know the copied_seq and
  recv queue length only (rcv_nxt should be copied_seq plus data length
  plus "syn-is-there"). Thus, the rcv queue seq get/set sockoption is
  changed to work on the rcv_nxt itself. IOW I emulate the situation
  when the packet with data and syn is splitted into two -- a packet with
  syn and a packet with data and the former one is already "eaten".


Thanks,
Pavel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/3] tcp: Move code around
  2012-03-28 15:36 [PATCH net-next 0/3] TCP connection repair (v3) Pavel Emelyanov
@ 2012-03-28 15:36 ` Pavel Emelyanov
  2012-03-28 15:37 ` [PATCH 2/3] tcp: Initial repair mode Pavel Emelyanov
  2012-03-28 15:38 ` [PATCH 3/3] tcp: Repair socket queues Pavel Emelyanov
  2 siblings, 0 replies; 15+ messages in thread
From: Pavel Emelyanov @ 2012-03-28 15:36 UTC (permalink / raw)
  To: Linux Netdev List, David Miller

This is just the preparation patch, which makes the needed for
TCP repair code ready for use.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
---
 include/net/tcp.h     |    3 ++
 net/ipv4/tcp.c        |    2 +-
 net/ipv4/tcp_input.c  |   81 +++++++++++++++++++++++++++++--------------------
 net/ipv4/tcp_output.c |    4 +-
 4 files changed, 54 insertions(+), 36 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f75a04d..c7a00eb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -435,6 +435,9 @@ extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 					struct request_values *rvp);
 extern int tcp_disconnect(struct sock *sk, int flags);
 
+void tcp_connect_init(struct sock *sk);
+void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
+void tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen);
 
 /* From syncookies.c */
 extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cfd7edd..9e7f9ba 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -918,7 +918,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	int iovlen, flags, err, copied;
-	int mss_now, size_goal;
+	int mss_now = 0, size_goal;
 	bool sg;
 	long timeo;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e886e2f..3767a1a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5322,6 +5322,14 @@ discard:
 	return 0;
 }
 
+void tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen)
+{
+	__skb_pull(skb, hdrlen);
+	__skb_queue_tail(&sk->sk_receive_queue, skb);
+	skb_set_owner_r(skb, sk);
+	tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+}
+
 /*
  *	TCP receive function for the ESTABLISHED state.
  *
@@ -5487,10 +5495,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
 
 				/* Bulk data transfer: receiver */
-				__skb_pull(skb, tcp_header_len);
-				__skb_queue_tail(&sk->sk_receive_queue, skb);
-				skb_set_owner_r(skb, sk);
-				tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+				tcp_queue_rcv(sk, skb, tcp_header_len);
 			}
 
 			tcp_event_data_recv(sk, skb);
@@ -5556,6 +5561,44 @@ discard:
 }
 EXPORT_SYMBOL(tcp_rcv_established);
 
+void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	tcp_set_state(sk, TCP_ESTABLISHED);
+
+	if (skb != NULL)
+		security_inet_conn_established(sk, skb);
+
+	/* Make sure socket is routed, for correct metrics.  */
+	icsk->icsk_af_ops->rebuild_header(sk);
+
+	tcp_init_metrics(sk);
+
+	tcp_init_congestion_control(sk);
+
+	/* Prevent spurious tcp_cwnd_restart() on first data
+	 * packet.
+	 */
+	tp->lsndtime = tcp_time_stamp;
+
+	tcp_init_buffer_space(sk);
+
+	if (sock_flag(sk, SOCK_KEEPOPEN))
+		inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
+
+	if (!tp->rx_opt.snd_wscale)
+		__tcp_fast_path_on(tp, tp->snd_wnd);
+	else
+		tp->pred_flags = 0;
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_state_change(sk);
+		sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
+	}
+}
+
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 					 const struct tcphdr *th, unsigned int len)
 {
@@ -5688,36 +5731,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		}
 
 		smp_mb();
-		tcp_set_state(sk, TCP_ESTABLISHED);
-
-		security_inet_conn_established(sk, skb);
-
-		/* Make sure socket is routed, for correct metrics.  */
-		icsk->icsk_af_ops->rebuild_header(sk);
-
-		tcp_init_metrics(sk);
 
-		tcp_init_congestion_control(sk);
-
-		/* Prevent spurious tcp_cwnd_restart() on first data
-		 * packet.
-		 */
-		tp->lsndtime = tcp_time_stamp;
-
-		tcp_init_buffer_space(sk);
-
-		if (sock_flag(sk, SOCK_KEEPOPEN))
-			inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
-
-		if (!tp->rx_opt.snd_wscale)
-			__tcp_fast_path_on(tp, tp->snd_wnd);
-		else
-			tp->pred_flags = 0;
-
-		if (!sock_flag(sk, SOCK_DEAD)) {
-			sk->sk_state_change(sk);
-			sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
-		}
+		tcp_finish_connect(sk, skb);
 
 		if (sk->sk_write_pending ||
 		    icsk->icsk_accept_queue.rskq_defer_accept ||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 364784a..1db25af 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2561,7 +2561,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 EXPORT_SYMBOL(tcp_make_synack);
 
 /* Do all connect socket setups that can be done AF independent. */
-static void tcp_connect_init(struct sock *sk)
+void tcp_connect_init(struct sock *sk)
 {
 	const struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -2616,6 +2616,7 @@ static void tcp_connect_init(struct sock *sk)
 	tp->snd_una = tp->write_seq;
 	tp->snd_sml = tp->write_seq;
 	tp->snd_up = tp->write_seq;
+	tp->snd_nxt = tp->write_seq;
 	tp->rcv_nxt = 0;
 	tp->rcv_wup = 0;
 	tp->copied_seq = 0;
@@ -2641,7 +2642,6 @@ int tcp_connect(struct sock *sk)
 	/* Reserve space for headers. */
 	skb_reserve(buff, MAX_TCP_HEADER);
 
-	tp->snd_nxt = tp->write_seq;
 	tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
 	TCP_ECN_send_syn(sk, buff);
 
-- 
1.5.5.6

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/3] tcp: Initial repair mode
  2012-03-28 15:36 [PATCH net-next 0/3] TCP connection repair (v3) Pavel Emelyanov
  2012-03-28 15:36 ` [PATCH 1/3] tcp: Move code around Pavel Emelyanov
@ 2012-03-28 15:37 ` Pavel Emelyanov
  2012-03-28 17:20   ` Glauber Costa
  2012-03-28 20:39   ` Ben Hutchings
  2012-03-28 15:38 ` [PATCH 3/3] tcp: Repair socket queues Pavel Emelyanov
  2 siblings, 2 replies; 15+ messages in thread
From: Pavel Emelyanov @ 2012-03-28 15:37 UTC (permalink / raw)
  To: Linux Netdev List, David Miller

This includes (according the the previous description):

* TCP_REPAIR sockoption

This one just puts the socket in/out of the repair mode.
Allowed for CAP_NET_ADMIN and for closed/establised sockets only.
When repair mode is turned off and the socket happens to be in
the established state the window probe is sent to the peer to
'unlock' the connection.

* TCP_REPAIR_QUEUE sockoption

This one sets the queue which we're about to repair. The
'no-queue' is set by default.

* TCP_QUEUE_SEQ socoption

Sets the write_seq/rcv_nxt of a selected repaired queue.
Allowed for TCP_CLOSE-d sockets only. When the socket changes
its state the other seq-s are changed by the kernel according
to the protocol rules (most of the existing code is actually
reused).

* Ability to forcibly bind a socket to a port

The sk->sk_reuse is set to 2 denoting, that the socket is question
should be bound as if all the others in the system are configured
with the SO_REUSEADDR option.

* Immediate connect modification

The connect syscall initializes the connection, then directly jumps
to the code which finalizes it.

* Silent close modification

The close just aborts the connection (similar to SO_LINGER with 0
time) but without sending any FIN/RST-s to peer.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
---
 include/linux/tcp.h             |   14 +++++++-
 include/net/tcp.h               |    2 +
 net/ipv4/inet_connection_sock.c |    3 ++
 net/ipv4/tcp.c                  |   68 ++++++++++++++++++++++++++++++++++++++-
 net/ipv4/tcp_ipv4.c             |   19 +++++++++--
 net/ipv4/tcp_output.c           |   16 +++++++--
 6 files changed, 114 insertions(+), 8 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index b6c62d2..4e90e6a 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -106,6 +106,16 @@ enum {
 #define TCP_THIN_LINEAR_TIMEOUTS 16      /* Use linear timeouts for thin streams*/
 #define TCP_THIN_DUPACK         17      /* Fast retrans. after 1 dupack */
 #define TCP_USER_TIMEOUT	18	/* How long for loss retry before timeout */
+#define TCP_REPAIR		19	/* TCP sock is under repair right now */
+#define TCP_REPAIR_QUEUE	20
+#define TCP_QUEUE_SEQ		21
+
+enum {
+	TCP_NO_QUEUE,
+	TCP_RECV_QUEUE,
+	TCP_SEND_QUEUE,
+	TCP_QUEUES_NR,
+};
 
 /* for TCP_INFO socket option */
 #define TCPI_OPT_TIMESTAMPS	1
@@ -353,7 +363,9 @@ struct tcp_sock {
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
 		thin_dupack : 1,/* Fast retransmit on first dupack      */
-		unused      : 2;
+		repair      : 1,
+		unused      : 1;
+	u8	repair_queue;
 
 /* RTT measurement */
 	u32	srtt;		/* smoothed round trip time << 3	*/
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c7a00eb..f577052 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -612,6 +612,8 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp)
  */
 extern u32 __tcp_select_window(struct sock *sk);
 
+void tcp_send_window_probe(struct sock *sk);
+
 /* TCP timestamps are only 32-bits, this causes a slight
  * complication on 64-bit systems since we store a snapshot
  * of jiffies in the buffer control blocks below.  We decided
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 19d66ce..92788af 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -172,6 +172,9 @@ have_snum:
 	goto tb_not_found;
 tb_found:
 	if (!hlist_empty(&tb->owners)) {
+		if (sk->sk_reuse == 2)
+			goto success;
+
 		if (tb->fastreuse > 0 &&
 		    sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
 		    smallest_size == -1) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9e7f9ba..65ae921 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1935,7 +1935,9 @@ void tcp_close(struct sock *sk, long timeout)
 	 * advertise a zero window, then kill -9 the FTP client, wheee...
 	 * Note: timeout is always zero in such a case.
 	 */
-	if (data_was_unread) {
+	if (tcp_sk(sk)->repair) {
+		sk->sk_prot->disconnect(sk, 0);
+	} else if (data_was_unread) {
 		/* Unread data was tossed, zap the connection. */
 		NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
 		tcp_set_state(sk, TCP_CLOSE);
@@ -2074,6 +2076,8 @@ int tcp_disconnect(struct sock *sk, int flags)
 	/* ABORT function of RFC793 */
 	if (old_state == TCP_LISTEN) {
 		inet_csk_listen_stop(sk);
+	} else if (unlikely(tp->repair)) {
+		sk->sk_err = ECONNABORTED;
 	} else if (tcp_need_reset(old_state) ||
 		   (tp->snd_nxt != tp->write_seq &&
 		    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
@@ -2125,6 +2129,12 @@ int tcp_disconnect(struct sock *sk, int flags)
 }
 EXPORT_SYMBOL(tcp_disconnect);
 
+static inline int tcp_can_repair_sock(struct sock *sk)
+{
+	return capable(CAP_NET_ADMIN) &&
+		((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
+}
+
 /*
  *	Socket option code for TCP.
  */
@@ -2297,6 +2307,42 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			tp->thin_dupack = val;
 		break;
 
+	case TCP_REPAIR:
+		if (!tcp_can_repair_sock(sk))
+			err = -EPERM;
+		else if (val == 1) {
+			tp->repair = 1;
+			sk->sk_reuse = 2;
+			tp->repair_queue = TCP_NO_QUEUE;
+		} else if (val == 0) {
+			tp->repair = 0;
+			sk->sk_reuse = 0;
+			tcp_send_window_probe(sk);
+		} else
+			err = -EINVAL;
+
+		break;
+
+	case TCP_REPAIR_QUEUE:
+		if (!tp->repair)
+			err = -EPERM;
+		else if (val <= TCP_QUEUES_NR)
+			tp->repair_queue = val;
+		else
+			err = -EINVAL;
+		break;
+
+	case TCP_QUEUE_SEQ:
+		if (sk->sk_state != TCP_CLOSE)
+			err = -EPERM;
+		else if (tp->repair_queue == TCP_SEND_QUEUE)
+			tp->write_seq = val;
+		else if (tp->repair_queue == TCP_RECV_QUEUE)
+			tp->rcv_nxt = val;
+		else
+			err = -EINVAL;
+		break;
+
 	case TCP_CORK:
 		/* When set indicates to always queue non-full frames.
 		 * Later the user clears this option and we transmit
@@ -2632,6 +2678,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		val = tp->thin_dupack;
 		break;
 
+	case TCP_REPAIR:
+		val = tp->repair;
+		break;
+
+	case TCP_REPAIR_QUEUE:
+		if (tp->repair)
+			val = tp->repair_queue;
+		else
+			return -EINVAL;
+		break;
+
+	case TCP_QUEUE_SEQ:
+		if (tp->repair_queue == TCP_SEND_QUEUE)
+			val = tp->write_seq;
+		else if (tp->repair_queue == TCP_RECV_QUEUE)
+			val = tp->rcv_nxt;
+		else
+			return -EINVAL;
+		break;
+
 	case TCP_USER_TIMEOUT:
 		val = jiffies_to_msecs(icsk->icsk_user_timeout);
 		break;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3a25cf7..69190a9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -138,6 +138,14 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 
+static int tcp_repair_connect(struct sock *sk)
+{
+	tcp_connect_init(sk);
+	tcp_finish_connect(sk, NULL);
+
+	return 0;
+}
+
 /* This will initiate an outgoing connection. */
 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -196,7 +204,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		/* Reset inherited state */
 		tp->rx_opt.ts_recent	   = 0;
 		tp->rx_opt.ts_recent_stamp = 0;
-		tp->write_seq		   = 0;
+		if (!tp->repair)
+			tp->write_seq	   = 0;
 	}
 
 	if (tcp_death_row.sysctl_tw_recycle &&
@@ -247,7 +256,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk->sk_gso_type = SKB_GSO_TCPV4;
 	sk_setup_caps(sk, &rt->dst);
 
-	if (!tp->write_seq)
+	if (!tp->write_seq && !tp->repair)
 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 							   inet->inet_daddr,
 							   inet->inet_sport,
@@ -255,7 +264,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	inet->inet_id = tp->write_seq ^ jiffies;
 
-	err = tcp_connect(sk);
+	if (likely(!tp->repair))
+		err = tcp_connect(sk);
+	else
+		err = tcp_repair_connect(sk);
+
 	rt = NULL;
 	if (err)
 		goto failure;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1db25af..4e2ce39 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2617,9 +2617,11 @@ void tcp_connect_init(struct sock *sk)
 	tp->snd_sml = tp->write_seq;
 	tp->snd_up = tp->write_seq;
 	tp->snd_nxt = tp->write_seq;
-	tp->rcv_nxt = 0;
-	tp->rcv_wup = 0;
-	tp->copied_seq = 0;
+
+	if (!tp->repair)
+		tp->rcv_nxt = 0;
+	tp->rcv_wup = tp->rcv_nxt;
+	tp->copied_seq = tp->rcv_nxt;
 
 	inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
 	inet_csk(sk)->icsk_retransmits = 0;
@@ -2790,6 +2792,14 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
 
+void tcp_send_window_probe(struct sock *sk)
+{
+	if (sk->sk_state == TCP_ESTABLISHED) {
+		tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
+		tcp_xmit_probe_skb(sk, 0);
+	}
+}
+
 /* Initiate keepalive or window probe from timer. */
 int tcp_write_wakeup(struct sock *sk)
 {
-- 
1.5.5.6

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/3] tcp: Repair socket queues
  2012-03-28 15:36 [PATCH net-next 0/3] TCP connection repair (v3) Pavel Emelyanov
  2012-03-28 15:36 ` [PATCH 1/3] tcp: Move code around Pavel Emelyanov
  2012-03-28 15:37 ` [PATCH 2/3] tcp: Initial repair mode Pavel Emelyanov
@ 2012-03-28 15:38 ` Pavel Emelyanov
  2012-03-29 10:30   ` Li Yu
  2 siblings, 1 reply; 15+ messages in thread
From: Pavel Emelyanov @ 2012-03-28 15:38 UTC (permalink / raw)
  To: Linux Netdev List, David Miller

Reading queues under repair mode is done with recvmsg call.
The queue-under-repair set by TCP_REPAIR_QUEUE option is used
to determine which queue should be read. Thus both send and
receive queue can be read with this.

Caller must pass the MSG_PEEK flag.

Writing to queues is done with sendmsg call and yet again --
the repair-queue option can be used to push data into the
receive queue.

When putting an skb into receive queue a zero tcp header is
appented to its head to address the tcp_hdr(skb)->syn and
the ->fin checks by the (after repair) tcp_recvmsg. These
flags flags are both set to zero and that's why.

The fin cannot be met in the queue while reading the source
socket, since the repair only works for closed/established
sockets and queueing fin packet always changes its state.

The syn in the queue denotes that the respective skb's seq
is "off-by-one" as compared to the actual payload lenght. Thus,
at the rcv queue refill we can just drop this flag and set the
skb's sequences to precice values. IOW -- emulate the situation
when the packet with data and syn is splitted into two -- a 
packet with syn and a packet with data and the former one is 
already "eaten".

When the repair mode is turned off, the write queue seqs are
updated so that the whole queue is considered to be 'already sent,
waiting for ACKs' (write_seq = snd_nxt <= snd_una). From the
protocol POV the send queue looks like it was sent, but the data
between the write_seq and snd_nxt is lost in the network.

This helps to avoid another sockoption for setting the snd_nxt
sequence. Leaving the whole queue in a 'not yet sent' state (as
it will be after sendmsg-s) will not allow to receive any acks
from the peer since the ack_seq will be after the snd_nxt. Thus
even the ack for the window probe will be dropped and the
connection will be 'locked' with the zero peer window.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
---
 net/ipv4/tcp.c        |   89 +++++++++++++++++++++++++++++++++++++++++++++++--
 net/ipv4/tcp_output.c |    1 +
 2 files changed, 87 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 65ae921..2ab3a31 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -911,6 +911,39 @@ static inline int select_size(const struct sock *sk, bool sg)
 	return tmp;
 }
 
+static int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	struct sk_buff *skb;
+	struct tcp_skb_cb *cb;
+	struct tcphdr *th;
+
+	skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
+	if (!skb)
+		goto err;
+
+	th = (struct tcphdr *)skb_put(skb, sizeof(*th));
+	skb_reset_transport_header(skb);
+	memset(th, 0, sizeof(*th));
+
+	if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
+		goto err_free;
+
+	cb = TCP_SKB_CB(skb);
+
+	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
+	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
+	TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
+
+	tcp_queue_rcv(sk, skb, sizeof(*th));
+
+	return size;
+
+err_free:
+	kfree_skb(skb);
+err:
+	return -ENOMEM;
+}
+
 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		size_t size)
 {
@@ -932,6 +965,19 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
 			goto out_err;
 
+	if (unlikely(tp->repair)) {
+		if (tp->repair_queue == TCP_RECV_QUEUE) {
+			copied = tcp_send_rcvq(sk, msg, size);
+			goto out;
+		}
+
+		err = -EINVAL;
+		if (tp->repair_queue == TCP_NO_QUEUE)
+			goto out_err;
+
+		/* 'common' sending to sendq */
+	}
+
 	/* This should be in poll */
 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
@@ -1089,7 +1135,7 @@ new_segment:
 			if ((seglen -= copy) == 0 && iovlen == 0)
 				goto out;
 
-			if (skb->len < max || (flags & MSG_OOB))
+			if (skb->len < max || (flags & MSG_OOB) || tp->repair)
 				continue;
 
 			if (forced_push(tp)) {
@@ -1102,7 +1148,7 @@ new_segment:
 wait_for_sndbuf:
 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
-			if (copied)
+			if (copied && !tp->repair)
 				tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
 			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
@@ -1113,7 +1159,7 @@ wait_for_memory:
 	}
 
 out:
-	if (copied)
+	if (copied && !tp->repair)
 		tcp_push(sk, flags, mss_now, tp->nonagle);
 	release_sock(sk);
 	return copied;
@@ -1187,6 +1233,24 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
 	return -EAGAIN;
 }
 
+static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct sk_buff *skb;
+	int copied = 0, err = 0;
+
+	/* XXX -- need to support SO_PEEK_OFF */
+
+	skb_queue_walk(&sk->sk_write_queue, skb) {
+		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len);
+		if (err)
+			break;
+
+		copied += skb->len;
+	}
+
+	return err ?: copied;
+}
+
 /* Clean up the receive buffer for full frames taken by the user,
  * then send an ACK if necessary.  COPIED is the number of bytes
  * tcp_recvmsg has given to the user so far, it speeds up the
@@ -1432,6 +1496,21 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (flags & MSG_OOB)
 		goto recv_urg;
 
+	if (unlikely(tp->repair)) {
+		err = -EPERM;
+		if (!(flags & MSG_PEEK))
+			goto out;
+
+		if (tp->repair_queue == TCP_SEND_QUEUE)
+			goto recv_sndq;
+
+		err = -EINVAL;
+		if (tp->repair_queue == TCP_NO_QUEUE)
+			goto out;
+
+		/* 'common' recv queue MSG_PEEK-ing */
+	}
+
 	seq = &tp->copied_seq;
 	if (flags & MSG_PEEK) {
 		peek_seq = tp->copied_seq;
@@ -1783,6 +1862,10 @@ out:
 recv_urg:
 	err = tcp_recv_urg(sk, msg, len, flags);
 	goto out;
+
+recv_sndq:
+	err = tcp_peek_sndq(sk, msg, len);
+	goto out;
 }
 EXPORT_SYMBOL(tcp_recvmsg);
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4e2ce39..b29d612 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2796,6 +2796,7 @@ void tcp_send_window_probe(struct sock *sk)
 {
 	if (sk->sk_state == TCP_ESTABLISHED) {
 		tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
+		tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq;
 		tcp_xmit_probe_skb(sk, 0);
 	}
 }
-- 
1.5.5.6

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] tcp: Initial repair mode
  2012-03-28 15:37 ` [PATCH 2/3] tcp: Initial repair mode Pavel Emelyanov
@ 2012-03-28 17:20   ` Glauber Costa
  2012-03-29  9:52     ` Pavel Emelyanov
  2012-03-28 20:39   ` Ben Hutchings
  1 sibling, 1 reply; 15+ messages in thread
From: Glauber Costa @ 2012-03-28 17:20 UTC (permalink / raw)
  To: Pavel Emelyanov; +Cc: Linux Netdev List, David Miller

> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 9e7f9ba..65ae921 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -1935,7 +1935,9 @@ void tcp_close(struct sock *sk, long timeout)
>   	 * advertise a zero window, then kill -9 the FTP client, wheee...
>   	 * Note: timeout is always zero in such a case.
>   	 */
> -	if (data_was_unread) {
> +	if (tcp_sk(sk)->repair) {
> +		sk->sk_prot->disconnect(sk, 0);
> +	} else if (data_was_unread) {
>   		/* Unread data was tossed, zap the connection. */
>   		NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
>   		tcp_set_state(sk, TCP_CLOSE);
> @@ -2074,6 +2076,8 @@ int tcp_disconnect(struct sock *sk, int flags)
>   	/* ABORT function of RFC793 */
>   	if (old_state == TCP_LISTEN) {
>   		inet_csk_listen_stop(sk);
> +	} else if (unlikely(tp->repair)) {
> +		sk->sk_err = ECONNABORTED;
>   	} else if (tcp_need_reset(old_state) ||
>   		   (tp->snd_nxt != tp->write_seq&&
>   		(1<<  old_state)&  (TCPF_CLOSING | TCPF_LAST_ACK))) {

The patch looks good in general.
Single nitpick is that maybe you should be consistent in your use of 
unlikely. All of them seems equally unlikely, so I'd say you should wrap 
both.

>
> +	case TCP_REPAIR:
> +		if (!tcp_can_repair_sock(sk))
> +			err = -EPERM;
> +		else if (val == 1) {
> +			tp->repair = 1;
> +			sk->sk_reuse = 2;
> +			tp->repair_queue = TCP_NO_QUEUE;
> +		} else if (val == 0) {
> +			tp->repair = 0;
> +			sk->sk_reuse = 0;
> +			tcp_send_window_probe(sk);
> +		} else
> +			err = -EINVAL;
> +
> +		break;
> +
> +	case TCP_REPAIR_QUEUE:

Don't we need to test tcp_can_repair_sock() in all of them?
I understand that TCP_REPAIR always comes before the other ones,
so that means the socket is already in repair mode. But what
should be the behavior in case the process drops privileges?
Should it still be able to continue with the repair?

My first impression is that we need CAP_NET_ADMIN all along, so we 
should make sure it's there.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] tcp: Initial repair mode
  2012-03-28 15:37 ` [PATCH 2/3] tcp: Initial repair mode Pavel Emelyanov
  2012-03-28 17:20   ` Glauber Costa
@ 2012-03-28 20:39   ` Ben Hutchings
  2012-03-29  9:53     ` Pavel Emelyanov
  1 sibling, 1 reply; 15+ messages in thread
From: Ben Hutchings @ 2012-03-28 20:39 UTC (permalink / raw)
  To: Pavel Emelyanov; +Cc: Linux Netdev List, David Miller

On Wed, 2012-03-28 at 19:37 +0400, Pavel Emelyanov wrote:
[...]
> * Ability to forcibly bind a socket to a port
> 
> The sk->sk_reuse is set to 2 denoting, that the socket is question
> should be bound as if all the others in the system are configured
> with the SO_REUSEADDR option.

Shouldn't this constant be named?

[...]
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
[...]
> +	case TCP_REPAIR_QUEUE:
> +		if (!tp->repair)
> +			err = -EPERM;
> +		else if (val <= TCP_QUEUES_NR)

Off-by-one.

> +			tp->repair_queue = val;
> +		else
> +			err = -EINVAL;
> +		break;
[...]

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] tcp: Initial repair mode
  2012-03-28 17:20   ` Glauber Costa
@ 2012-03-29  9:52     ` Pavel Emelyanov
  0 siblings, 0 replies; 15+ messages in thread
From: Pavel Emelyanov @ 2012-03-29  9:52 UTC (permalink / raw)
  To: Glauber Costa; +Cc: Linux Netdev List, David Miller

On 03/28/2012 09:20 PM, Glauber Costa wrote:
>> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
>> index 9e7f9ba..65ae921 100644
>> --- a/net/ipv4/tcp.c
>> +++ b/net/ipv4/tcp.c
>> @@ -1935,7 +1935,9 @@ void tcp_close(struct sock *sk, long timeout)
>>   	 * advertise a zero window, then kill -9 the FTP client, wheee...
>>   	 * Note: timeout is always zero in such a case.
>>   	 */
>> -	if (data_was_unread) {
>> +	if (tcp_sk(sk)->repair) {
>> +		sk->sk_prot->disconnect(sk, 0);
>> +	} else if (data_was_unread) {
>>   		/* Unread data was tossed, zap the connection. */
>>   		NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
>>   		tcp_set_state(sk, TCP_CLOSE);
>> @@ -2074,6 +2076,8 @@ int tcp_disconnect(struct sock *sk, int flags)
>>   	/* ABORT function of RFC793 */
>>   	if (old_state == TCP_LISTEN) {
>>   		inet_csk_listen_stop(sk);
>> +	} else if (unlikely(tp->repair)) {
>> +		sk->sk_err = ECONNABORTED;
>>   	} else if (tcp_need_reset(old_state) ||
>>   		   (tp->snd_nxt != tp->write_seq&&
>>   		(1<<  old_state)&  (TCPF_CLOSING | TCPF_LAST_ACK))) {
> 
> The patch looks good in general.
> Single nitpick is that maybe you should be consistent in your use of 
> unlikely. All of them seems equally unlikely, so I'd say you should wrap 
> both.

OK, will fix this.

>>
>> +	case TCP_REPAIR:
>> +		if (!tcp_can_repair_sock(sk))
>> +			err = -EPERM;
>> +		else if (val == 1) {
>> +			tp->repair = 1;
>> +			sk->sk_reuse = 2;
>> +			tp->repair_queue = TCP_NO_QUEUE;
>> +		} else if (val == 0) {
>> +			tp->repair = 0;
>> +			sk->sk_reuse = 0;
>> +			tcp_send_window_probe(sk);
>> +		} else
>> +			err = -EINVAL;
>> +
>> +		break;
>> +
>> +	case TCP_REPAIR_QUEUE:
> 
> Don't we need to test tcp_can_repair_sock() in all of them?
> I understand that TCP_REPAIR always comes before the other ones,
> so that means the socket is already in repair mode. But what
> should be the behavior in case the process drops privileges?
> Should it still be able to continue with the repair?

I believe it should. Because this model gives us the ability to do
both -- let others repair socket in non-root mode and keep one at
hands, giving it to anybody else only when the repair is complete.

> My first impression is that we need CAP_NET_ADMIN all along, so we 
> should make sure it's there.
> 
> .
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] tcp: Initial repair mode
  2012-03-28 20:39   ` Ben Hutchings
@ 2012-03-29  9:53     ` Pavel Emelyanov
  0 siblings, 0 replies; 15+ messages in thread
From: Pavel Emelyanov @ 2012-03-29  9:53 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: Linux Netdev List, David Miller

On 03/29/2012 12:39 AM, Ben Hutchings wrote:
> On Wed, 2012-03-28 at 19:37 +0400, Pavel Emelyanov wrote:
> [...]
>> * Ability to forcibly bind a socket to a port
>>
>> The sk->sk_reuse is set to 2 denoting, that the socket is question
>> should be bound as if all the others in the system are configured
>> with the SO_REUSEADDR option.
> 
> Shouldn't this constant be named?

Agree, I will fix this up.

> [...]
>> --- a/net/ipv4/tcp.c
>> +++ b/net/ipv4/tcp.c
> [...]
>> +	case TCP_REPAIR_QUEUE:
>> +		if (!tp->repair)
>> +			err = -EPERM;
>> +		else if (val <= TCP_QUEUES_NR)
> 
> Off-by-one.

Oops :( Thanks for noticing!

>> +			tp->repair_queue = val;
>> +		else
>> +			err = -EINVAL;
>> +		break;
> [...]
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] tcp: Repair socket queues
  2012-03-28 15:38 ` [PATCH 3/3] tcp: Repair socket queues Pavel Emelyanov
@ 2012-03-29 10:30   ` Li Yu
  2012-03-29 10:36     ` Pavel Emelyanov
  0 siblings, 1 reply; 15+ messages in thread
From: Li Yu @ 2012-03-29 10:30 UTC (permalink / raw)
  To: Pavel Emelyanov; +Cc: Linux Netdev List, David Miller

于 2012年03月28日 23:38, Pavel Emelyanov 写道:
> Reading queues under repair mode is done with recvmsg call.
> The queue-under-repair set by TCP_REPAIR_QUEUE option is used
> to determine which queue should be read. Thus both send and
> receive queue can be read with this.
>
> Caller must pass the MSG_PEEK flag.
>
> Writing to queues is done with sendmsg call and yet again --
> the repair-queue option can be used to push data into the
> receive queue.
>
> When putting an skb into receive queue a zero tcp header is
> appented to its head to address the tcp_hdr(skb)->syn and
> the ->fin checks by the (after repair) tcp_recvmsg. These
> flags flags are both set to zero and that's why.
>
> The fin cannot be met in the queue while reading the source
> socket, since the repair only works for closed/established
> sockets and queueing fin packet always changes its state.
>
> The syn in the queue denotes that the respective skb's seq
> is "off-by-one" as compared to the actual payload lenght. Thus,
> at the rcv queue refill we can just drop this flag and set the
> skb's sequences to precice values. IOW -- emulate the situation
> when the packet with data and syn is splitted into two -- a
> packet with syn and a packet with data and the former one is
> already "eaten".
>
> When the repair mode is turned off, the write queue seqs are
> updated so that the whole queue is considered to be 'already sent,
> waiting for ACKs' (write_seq = snd_nxt<= snd_una). From the
> protocol POV the send queue looks like it was sent, but the data
> between the write_seq and snd_nxt is lost in the network.
>
> This helps to avoid another sockoption for setting the snd_nxt
> sequence. Leaving the whole queue in a 'not yet sent' state (as
> it will be after sendmsg-s) will not allow to receive any acks
> from the peer since the ack_seq will be after the snd_nxt. Thus
> even the ack for the window probe will be dropped and the
> connection will be 'locked' with the zero peer window.
>

Do we need to restore various TCP options switch bits. e.g. window
scale factor, sack_ok and so on.

En, I think the recorded mss_cache may be need to restored too.

Thanks.

Yu

> Signed-off-by: Pavel Emelyanov<xemul@parallels.com>
> ---
>   net/ipv4/tcp.c        |   89 +++++++++++++++++++++++++++++++++++++++++++++++--
>   net/ipv4/tcp_output.c |    1 +
>   2 files changed, 87 insertions(+), 3 deletions(-)
>
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 65ae921..2ab3a31 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -911,6 +911,39 @@ static inline int select_size(const struct sock *sk, bool sg)
>   	return tmp;
>   }
>
> +static int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
> +{
> +	struct sk_buff *skb;
> +	struct tcp_skb_cb *cb;
> +	struct tcphdr *th;
> +
> +	skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
> +	if (!skb)
> +		goto err;
> +
> +	th = (struct tcphdr *)skb_put(skb, sizeof(*th));
> +	skb_reset_transport_header(skb);
> +	memset(th, 0, sizeof(*th));
> +
> +	if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
> +		goto err_free;
> +
> +	cb = TCP_SKB_CB(skb);
> +
> +	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
> +	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
> +	TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
> +
> +	tcp_queue_rcv(sk, skb, sizeof(*th));
> +
> +	return size;
> +
> +err_free:
> +	kfree_skb(skb);
> +err:
> +	return -ENOMEM;
> +}
> +
>   int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>   		size_t size)
>   {
> @@ -932,6 +965,19 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>   		if ((err = sk_stream_wait_connect(sk,&timeo)) != 0)
>   			goto out_err;
>
> +	if (unlikely(tp->repair)) {
> +		if (tp->repair_queue == TCP_RECV_QUEUE) {
> +			copied = tcp_send_rcvq(sk, msg, size);
> +			goto out;
> +		}
> +
> +		err = -EINVAL;
> +		if (tp->repair_queue == TCP_NO_QUEUE)
> +			goto out_err;
> +
> +		/* 'common' sending to sendq */
> +	}
> +
>   	/* This should be in poll */
>   	clear_bit(SOCK_ASYNC_NOSPACE,&sk->sk_socket->flags);
>
> @@ -1089,7 +1135,7 @@ new_segment:
>   			if ((seglen -= copy) == 0&&  iovlen == 0)
>   				goto out;
>
> -			if (skb->len<  max || (flags&  MSG_OOB))
> +			if (skb->len<  max || (flags&  MSG_OOB) || tp->repair)
>   				continue;
>
>   			if (forced_push(tp)) {
> @@ -1102,7 +1148,7 @@ new_segment:
>   wait_for_sndbuf:
>   			set_bit(SOCK_NOSPACE,&sk->sk_socket->flags);
>   wait_for_memory:
> -			if (copied)
> +			if (copied&&  !tp->repair)
>   				tcp_push(sk, flags&  ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
>
>   			if ((err = sk_stream_wait_memory(sk,&timeo)) != 0)
> @@ -1113,7 +1159,7 @@ wait_for_memory:
>   	}
>
>   out:
> -	if (copied)
> +	if (copied&&  !tp->repair)
>   		tcp_push(sk, flags, mss_now, tp->nonagle);
>   	release_sock(sk);
>   	return copied;
> @@ -1187,6 +1233,24 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
>   	return -EAGAIN;
>   }
>
> +static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
> +{
> +	struct sk_buff *skb;
> +	int copied = 0, err = 0;
> +
> +	/* XXX -- need to support SO_PEEK_OFF */
> +
> +	skb_queue_walk(&sk->sk_write_queue, skb) {
> +		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len);
> +		if (err)
> +			break;
> +
> +		copied += skb->len;
> +	}
> +
> +	return err ?: copied;
> +}
> +
>   /* Clean up the receive buffer for full frames taken by the user,
>    * then send an ACK if necessary.  COPIED is the number of bytes
>    * tcp_recvmsg has given to the user so far, it speeds up the
> @@ -1432,6 +1496,21 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>   	if (flags&  MSG_OOB)
>   		goto recv_urg;
>
> +	if (unlikely(tp->repair)) {
> +		err = -EPERM;
> +		if (!(flags&  MSG_PEEK))
> +			goto out;
> +
> +		if (tp->repair_queue == TCP_SEND_QUEUE)
> +			goto recv_sndq;
> +
> +		err = -EINVAL;
> +		if (tp->repair_queue == TCP_NO_QUEUE)
> +			goto out;
> +
> +		/* 'common' recv queue MSG_PEEK-ing */
> +	}
> +
>   	seq =&tp->copied_seq;
>   	if (flags&  MSG_PEEK) {
>   		peek_seq = tp->copied_seq;
> @@ -1783,6 +1862,10 @@ out:
>   recv_urg:
>   	err = tcp_recv_urg(sk, msg, len, flags);
>   	goto out;
> +
> +recv_sndq:
> +	err = tcp_peek_sndq(sk, msg, len);
> +	goto out;
>   }
>   EXPORT_SYMBOL(tcp_recvmsg);
>
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 4e2ce39..b29d612 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -2796,6 +2796,7 @@ void tcp_send_window_probe(struct sock *sk)
>   {
>   	if (sk->sk_state == TCP_ESTABLISHED) {
>   		tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
> +		tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq;
>   		tcp_xmit_probe_skb(sk, 0);
>   	}
>   }

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] tcp: Repair socket queues
  2012-03-29 10:30   ` Li Yu
@ 2012-03-29 10:36     ` Pavel Emelyanov
  2012-03-29 10:41       ` Li Yu
  2012-03-29 10:41       ` Li Yu
  0 siblings, 2 replies; 15+ messages in thread
From: Pavel Emelyanov @ 2012-03-29 10:36 UTC (permalink / raw)
  To: Li Yu; +Cc: Linux Netdev List, David Miller

On 03/29/2012 02:30 PM, Li Yu wrote:
> 于 2012年03月28日 23:38, Pavel Emelyanov 写道:
>> Reading queues under repair mode is done with recvmsg call.
>> The queue-under-repair set by TCP_REPAIR_QUEUE option is used
>> to determine which queue should be read. Thus both send and
>> receive queue can be read with this.
>>
>> Caller must pass the MSG_PEEK flag.
>>
>> Writing to queues is done with sendmsg call and yet again --
>> the repair-queue option can be used to push data into the
>> receive queue.
>>
>> When putting an skb into receive queue a zero tcp header is
>> appented to its head to address the tcp_hdr(skb)->syn and
>> the ->fin checks by the (after repair) tcp_recvmsg. These
>> flags flags are both set to zero and that's why.
>>
>> The fin cannot be met in the queue while reading the source
>> socket, since the repair only works for closed/established
>> sockets and queueing fin packet always changes its state.
>>
>> The syn in the queue denotes that the respective skb's seq
>> is "off-by-one" as compared to the actual payload lenght. Thus,
>> at the rcv queue refill we can just drop this flag and set the
>> skb's sequences to precice values. IOW -- emulate the situation
>> when the packet with data and syn is splitted into two -- a
>> packet with syn and a packet with data and the former one is
>> already "eaten".
>>
>> When the repair mode is turned off, the write queue seqs are
>> updated so that the whole queue is considered to be 'already sent,
>> waiting for ACKs' (write_seq = snd_nxt<= snd_una). From the
>> protocol POV the send queue looks like it was sent, but the data
>> between the write_seq and snd_nxt is lost in the network.
>>
>> This helps to avoid another sockoption for setting the snd_nxt
>> sequence. Leaving the whole queue in a 'not yet sent' state (as
>> it will be after sendmsg-s) will not allow to receive any acks
>> from the peer since the ack_seq will be after the snd_nxt. Thus
>> even the ack for the window probe will be dropped and the
>> connection will be 'locked' with the zero peer window.
>>
> 
> Do we need to restore various TCP options switch bits. e.g. window
> scale factor, sack_ok and so on.

SACK-s -- yes, this is in TODO list. Various window stuff -- not necessary.
TCP will eventually negotiate proper values again.

> En, I think the recorded mss_cache may be need to restored too.

Same with mss. As far as I understand this one will be re-detected after
a connection restore.

> Thanks.
> 
> Yu

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] tcp: Repair socket queues
  2012-03-29 10:36     ` Pavel Emelyanov
@ 2012-03-29 10:41       ` Li Yu
  2012-03-29 10:41       ` Li Yu
  1 sibling, 0 replies; 15+ messages in thread
From: Li Yu @ 2012-03-29 10:41 UTC (permalink / raw)
  To: Pavel Emelyanov; +Cc: Linux Netdev List, David Miller

于 2012年03月29日 18:36, Pavel Emelyanov 写道:
> On 03/29/2012 02:30 PM, Li Yu wrote:
>> 于 2012年03月28日 23:38, Pavel Emelyanov 写道:
>>> Reading queues under repair mode is done with recvmsg call.
>>> The queue-under-repair set by TCP_REPAIR_QUEUE option is used
>>> to determine which queue should be read. Thus both send and
>>> receive queue can be read with this.
>>>
>>> Caller must pass the MSG_PEEK flag.
>>>
>>> Writing to queues is done with sendmsg call and yet again --
>>> the repair-queue option can be used to push data into the
>>> receive queue.
>>>
>>> When putting an skb into receive queue a zero tcp header is
>>> appented to its head to address the tcp_hdr(skb)->syn and
>>> the ->fin checks by the (after repair) tcp_recvmsg. These
>>> flags flags are both set to zero and that's why.
>>>
>>> The fin cannot be met in the queue while reading the source
>>> socket, since the repair only works for closed/established
>>> sockets and queueing fin packet always changes its state.
>>>
>>> The syn in the queue denotes that the respective skb's seq
>>> is "off-by-one" as compared to the actual payload lenght. Thus,
>>> at the rcv queue refill we can just drop this flag and set the
>>> skb's sequences to precice values. IOW -- emulate the situation
>>> when the packet with data and syn is splitted into two -- a
>>> packet with syn and a packet with data and the former one is
>>> already "eaten".
>>>
>>> When the repair mode is turned off, the write queue seqs are
>>> updated so that the whole queue is considered to be 'already sent,
>>> waiting for ACKs' (write_seq = snd_nxt<= snd_una). From the
>>> protocol POV the send queue looks like it was sent, but the data
>>> between the write_seq and snd_nxt is lost in the network.
>>>
>>> This helps to avoid another sockoption for setting the snd_nxt
>>> sequence. Leaving the whole queue in a 'not yet sent' state (as
>>> it will be after sendmsg-s) will not allow to receive any acks
>>> from the peer since the ack_seq will be after the snd_nxt. Thus
>>> even the ack for the window probe will be dropped and the
>>> connection will be 'locked' with the zero peer window.
>>>
>>
>> Do we need to restore various TCP options switch bits. e.g. window
>> scale factor, sack_ok and so on.
>
> SACK-s -- yes, this is in TODO list. Various window stuff -- not necessary.
> TCP will eventually negotiate proper values again.
>
>> En, I think the recorded mss_cache may be need to restored too.
>
> Same with mss. As far as I understand this one will be re-detected after
> a connection restore.
>

After the connection are repaired, it directly enter ESTABLISHED state,
so this TCP connection has no chance to negotiate such optional
features, such negotiation only can occurs at 3WHS.

Thanks.

Yu

>> Thanks.
>>
>> Yu
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] tcp: Repair socket queues
  2012-03-29 10:36     ` Pavel Emelyanov
  2012-03-29 10:41       ` Li Yu
@ 2012-03-29 10:41       ` Li Yu
  1 sibling, 0 replies; 15+ messages in thread
From: Li Yu @ 2012-03-29 10:41 UTC (permalink / raw)
  To: Pavel Emelyanov; +Cc: Linux Netdev List, David Miller

于 2012年03月29日 18:36, Pavel Emelyanov 写道:
> On 03/29/2012 02:30 PM, Li Yu wrote:
>> 于 2012年03月28日 23:38, Pavel Emelyanov 写道:
>>> Reading queues under repair mode is done with recvmsg call.
>>> The queue-under-repair set by TCP_REPAIR_QUEUE option is used
>>> to determine which queue should be read. Thus both send and
>>> receive queue can be read with this.
>>>
>>> Caller must pass the MSG_PEEK flag.
>>>
>>> Writing to queues is done with sendmsg call and yet again --
>>> the repair-queue option can be used to push data into the
>>> receive queue.
>>>
>>> When putting an skb into receive queue a zero tcp header is
>>> appented to its head to address the tcp_hdr(skb)->syn and
>>> the ->fin checks by the (after repair) tcp_recvmsg. These
>>> flags flags are both set to zero and that's why.
>>>
>>> The fin cannot be met in the queue while reading the source
>>> socket, since the repair only works for closed/established
>>> sockets and queueing fin packet always changes its state.
>>>
>>> The syn in the queue denotes that the respective skb's seq
>>> is "off-by-one" as compared to the actual payload lenght. Thus,
>>> at the rcv queue refill we can just drop this flag and set the
>>> skb's sequences to precice values. IOW -- emulate the situation
>>> when the packet with data and syn is splitted into two -- a
>>> packet with syn and a packet with data and the former one is
>>> already "eaten".
>>>
>>> When the repair mode is turned off, the write queue seqs are
>>> updated so that the whole queue is considered to be 'already sent,
>>> waiting for ACKs' (write_seq = snd_nxt<= snd_una). From the
>>> protocol POV the send queue looks like it was sent, but the data
>>> between the write_seq and snd_nxt is lost in the network.
>>>
>>> This helps to avoid another sockoption for setting the snd_nxt
>>> sequence. Leaving the whole queue in a 'not yet sent' state (as
>>> it will be after sendmsg-s) will not allow to receive any acks
>>> from the peer since the ack_seq will be after the snd_nxt. Thus
>>> even the ack for the window probe will be dropped and the
>>> connection will be 'locked' with the zero peer window.
>>>
>>
>> Do we need to restore various TCP options switch bits. e.g. window
>> scale factor, sack_ok and so on.
>
> SACK-s -- yes, this is in TODO list. Various window stuff -- not necessary.
> TCP will eventually negotiate proper values again.
>
>> En, I think the recorded mss_cache may be need to restored too.
>
> Same with mss. As far as I understand this one will be re-detected after
> a connection restore.
>

After the connection are repaired, it directly enter ESTABLISHED state,
so this TCP connection has no chance to negotiate such optional
features, such negotiation only can occur at 3WHS.

Thanks.

Yu

>> Thanks.
>>
>> Yu
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] tcp: Initial repair mode
  2012-03-06 13:11   ` Glauber Costa
@ 2012-03-06 20:16     ` David Miller
  0 siblings, 0 replies; 15+ messages in thread
From: David Miller @ 2012-03-06 20:16 UTC (permalink / raw)
  To: glommer; +Cc: xemul, netdev, tj, eric.dumazet

From: Glauber Costa <glommer@parallels.com>
Date: Tue, 6 Mar 2012 17:11:47 +0400

> On 03/06/2012 01:55 PM, Pavel Emelyanov wrote:
>> This includes (according the the previous description):
>>
>> * TCP_REPAIR sockoption
>>
>> This one just puts the socket in/out of the repair mode.
>> Allowed for CAP_SYS_ADMIN and for closed/establised sockets only.
>> When repair mode is turned off and the socket happens to be in
>> the established state the window probe is sent to the peer to
>> 'unlock' the connection.
> 
> Why not CAP_NET_ADMIN, or something similar?

Agreed, we made this kind of mistake with the transparent proxy stuff,
let's not do that again.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] tcp: Initial repair mode
  2012-03-06  9:55 ` [PATCH 2/3] tcp: Initial repair mode Pavel Emelyanov
@ 2012-03-06 13:11   ` Glauber Costa
  2012-03-06 20:16     ` David Miller
  0 siblings, 1 reply; 15+ messages in thread
From: Glauber Costa @ 2012-03-06 13:11 UTC (permalink / raw)
  To: Pavel Emelyanov; +Cc: Linux Netdev List, David Miller, Tejun Heo, Eric Dumazet

On 03/06/2012 01:55 PM, Pavel Emelyanov wrote:
> This includes (according the the previous description):
>
> * TCP_REPAIR sockoption
>
> This one just puts the socket in/out of the repair mode.
> Allowed for CAP_SYS_ADMIN and for closed/establised sockets only.
> When repair mode is turned off and the socket happens to be in
> the established state the window probe is sent to the peer to
> 'unlock' the connection.

Why not CAP_NET_ADMIN, or something similar?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 2/3] tcp: Initial repair mode
  2012-03-06  9:54 [RFC][PATCH 0/3] TCP connection repair (v2) Pavel Emelyanov
@ 2012-03-06  9:55 ` Pavel Emelyanov
  2012-03-06 13:11   ` Glauber Costa
  0 siblings, 1 reply; 15+ messages in thread
From: Pavel Emelyanov @ 2012-03-06  9:55 UTC (permalink / raw)
  To: Linux Netdev List, David Miller, Tejun Heo, Eric Dumazet

This includes (according the the previous description):

* TCP_REPAIR sockoption

This one just puts the socket in/out of the repair mode.
Allowed for CAP_SYS_ADMIN and for closed/establised sockets only.
When repair mode is turned off and the socket happens to be in
the established state the window probe is sent to the peer to
'unlock' the connection.

* TCP_REPAIR_QUEUE sockoption

This one sets the queue which we're about to repair. The
'no-queue' is set by default.

* TCP_QUEUE_SEQ socoption

Sets the write_seq/copied_seq of a selected repaired queue.
Allowed for TCP_CLOSE-d sockets only. When the socket changes
its state the other seq-s are changed by the kernel according
to the protocol rules (most of the existing code is actually
reused).

* Ability to forcibly bind a socket to a port

The sk->sk_reuse is set to 2 denoting, that the socket is question
should be bound as if all the others in the system are configured
with the SO_REUSEADDR option.

* Immediate connect modification

The connect syscall initializes the connection, then directly jumps
to the code which finalizes it.

* Silent close modification

The close just aborts the connection (similar to SO_LINGER with 0
time) but without sending any FIN/RST-s to peer.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>

---
 include/linux/tcp.h             |   14 ++++++++-
 include/net/tcp.h               |    2 +
 net/ipv4/inet_connection_sock.c |    3 ++
 net/ipv4/tcp.c                  |   63 ++++++++++++++++++++++++++++++++++++++-
 net/ipv4/tcp_ipv4.c             |   19 ++++++++++--
 net/ipv4/tcp_output.c           |   16 ++++++++--
 6 files changed, 109 insertions(+), 8 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index b6c62d2..4e90e6a 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -106,6 +106,16 @@ enum {
 #define TCP_THIN_LINEAR_TIMEOUTS 16      /* Use linear timeouts for thin streams*/
 #define TCP_THIN_DUPACK         17      /* Fast retrans. after 1 dupack */
 #define TCP_USER_TIMEOUT	18	/* How long for loss retry before timeout */
+#define TCP_REPAIR		19	/* TCP sock is under repair right now */
+#define TCP_REPAIR_QUEUE	20
+#define TCP_QUEUE_SEQ		21
+
+enum {
+	TCP_NO_QUEUE,
+	TCP_RECV_QUEUE,
+	TCP_SEND_QUEUE,
+	TCP_QUEUES_NR,
+};
 
 /* for TCP_INFO socket option */
 #define TCPI_OPT_TIMESTAMPS	1
@@ -353,7 +363,9 @@ struct tcp_sock {
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
 		thin_dupack : 1,/* Fast retransmit on first dupack      */
-		unused      : 2;
+		repair      : 1,
+		unused      : 1;
+	u8	repair_queue;
 
 /* RTT measurement */
 	u32	srtt;		/* smoothed round trip time << 3	*/
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a08e886..9f4aa4c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -611,6 +611,8 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp)
  */
 extern u32 __tcp_select_window(struct sock *sk);
 
+void tcp_send_window_probe(struct sock *sk);
+
 /* TCP timestamps are only 32-bits, this causes a slight
  * complication on 64-bit systems since we store a snapshot
  * of jiffies in the buffer control blocks below.  We decided
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 19d66ce..92788af 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -172,6 +172,9 @@ have_snum:
 	goto tb_not_found;
 tb_found:
 	if (!hlist_empty(&tb->owners)) {
+		if (sk->sk_reuse == 2)
+			goto success;
+
 		if (tb->fastreuse > 0 &&
 		    sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
 		    smallest_size == -1) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0e0b974..8d9b2bc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1932,7 +1932,9 @@ void tcp_close(struct sock *sk, long timeout)
 	 * advertise a zero window, then kill -9 the FTP client, wheee...
 	 * Note: timeout is always zero in such a case.
 	 */
-	if (data_was_unread) {
+	if (tcp_sk(sk)->repair) {
+		sk->sk_prot->disconnect(sk, 0);
+	} else if (data_was_unread) {
 		/* Unread data was tossed, zap the connection. */
 		NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
 		tcp_set_state(sk, TCP_CLOSE);
@@ -2071,6 +2073,8 @@ int tcp_disconnect(struct sock *sk, int flags)
 	/* ABORT function of RFC793 */
 	if (old_state == TCP_LISTEN) {
 		inet_csk_listen_stop(sk);
+	} else if (unlikely(tp->repair)) {
+		sk->sk_err = ECONNABORTED;
 	} else if (tcp_need_reset(old_state) ||
 		   (tp->snd_nxt != tp->write_seq &&
 		    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
@@ -2294,6 +2298,43 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			tp->thin_dupack = val;
 		break;
 
+	case TCP_REPAIR:
+		if (!capable(CAP_SYS_ADMIN) || !(sk->sk_state == TCP_CLOSE ||
+					sk->sk_state == TCP_ESTABLISHED))
+			err = -EPERM;
+		else if (val == 1) {
+			tp->repair = 1;
+			sk->sk_reuse = 2;
+			tp->repair_queue = TCP_NO_QUEUE;
+		} else if (val == 0) {
+			tp->repair = 0;
+			sk->sk_reuse = 0;
+			tcp_send_window_probe(sk);
+		} else
+			err = -EINVAL;
+
+		break;
+
+	case TCP_REPAIR_QUEUE:
+		if (!tp->repair)
+			err = -EPERM;
+		else if (val <= TCP_QUEUES_NR)
+			tp->repair_queue = val;
+		else
+			err = -EINVAL;
+		break;
+
+	case TCP_QUEUE_SEQ:
+		if (sk->sk_state != TCP_CLOSE)
+			err = -EPERM;
+		else if (tp->repair_queue == TCP_SEND_QUEUE)
+			tp->write_seq = val;
+		else if (tp->repair_queue == TCP_RECV_QUEUE)
+			tp->copied_seq = val;
+		else
+			err = -EINVAL;
+		break;
+
 	case TCP_CORK:
 		/* When set indicates to always queue non-full frames.
 		 * Later the user clears this option and we transmit
@@ -2629,6 +2670,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		val = tp->thin_dupack;
 		break;
 
+	case TCP_REPAIR:
+		val = tp->repair;
+		break;
+
+	case TCP_REPAIR_QUEUE:
+		if (tp->repair)
+			val = tp->repair_queue;
+		else
+			return -EINVAL;
+		break;
+
+	case TCP_QUEUE_SEQ:
+		if (tp->repair_queue == TCP_SEND_QUEUE)
+			val = tp->write_seq;
+		else if (tp->repair_queue == TCP_RECV_QUEUE)
+			val = tp->copied_seq;
+		else
+			return -EINVAL;
+		break;
+
 	case TCP_USER_TIMEOUT:
 		val = jiffies_to_msecs(icsk->icsk_user_timeout);
 		break;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 94abee8..6118486 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -137,6 +137,14 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 
+static int tcp_repair_connect(struct sock *sk)
+{
+	tcp_connect_init(sk);
+	tcp_finish_connect(sk, NULL);
+
+	return 0;
+}
+
 /* This will initiate an outgoing connection. */
 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -195,7 +203,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		/* Reset inherited state */
 		tp->rx_opt.ts_recent	   = 0;
 		tp->rx_opt.ts_recent_stamp = 0;
-		tp->write_seq		   = 0;
+		if (!tp->repair)
+			tp->write_seq	   = 0;
 	}
 
 	if (tcp_death_row.sysctl_tw_recycle &&
@@ -246,7 +255,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk->sk_gso_type = SKB_GSO_TCPV4;
 	sk_setup_caps(sk, &rt->dst);
 
-	if (!tp->write_seq)
+	if (!tp->write_seq && !tp->repair)
 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
 							   inet->inet_daddr,
 							   inet->inet_sport,
@@ -254,7 +263,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	inet->inet_id = tp->write_seq ^ jiffies;
 
-	err = tcp_connect(sk);
+	if (likely(!tp->repair))
+		err = tcp_connect(sk);
+	else
+		err = tcp_repair_connect(sk);
+
 	rt = NULL;
 	if (err)
 		goto failure;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1db25af..f0525d1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2617,9 +2617,11 @@ void tcp_connect_init(struct sock *sk)
 	tp->snd_sml = tp->write_seq;
 	tp->snd_up = tp->write_seq;
 	tp->snd_nxt = tp->write_seq;
-	tp->rcv_nxt = 0;
-	tp->rcv_wup = 0;
-	tp->copied_seq = 0;
+
+	if (!tp->repair)
+		tp->copied_seq = 0;
+	tp->rcv_wup = tp->copied_seq;
+	tp->rcv_nxt = tp->copied_seq;
 
 	inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
 	inet_csk(sk)->icsk_retransmits = 0;
@@ -2790,6 +2792,14 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
 
+void tcp_send_window_probe(struct sock *sk)
+{
+	if (sk->sk_state == TCP_ESTABLISHED) {
+		tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
+		tcp_xmit_probe_skb(sk, 0);
+	}
+}
+
 /* Initiate keepalive or window probe from timer. */
 int tcp_write_wakeup(struct sock *sk)
 {
-- 
1.5.5.6

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2012-03-29 10:41 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-03-28 15:36 [PATCH net-next 0/3] TCP connection repair (v3) Pavel Emelyanov
2012-03-28 15:36 ` [PATCH 1/3] tcp: Move code around Pavel Emelyanov
2012-03-28 15:37 ` [PATCH 2/3] tcp: Initial repair mode Pavel Emelyanov
2012-03-28 17:20   ` Glauber Costa
2012-03-29  9:52     ` Pavel Emelyanov
2012-03-28 20:39   ` Ben Hutchings
2012-03-29  9:53     ` Pavel Emelyanov
2012-03-28 15:38 ` [PATCH 3/3] tcp: Repair socket queues Pavel Emelyanov
2012-03-29 10:30   ` Li Yu
2012-03-29 10:36     ` Pavel Emelyanov
2012-03-29 10:41       ` Li Yu
2012-03-29 10:41       ` Li Yu
  -- strict thread matches above, loose matches on Subject: below --
2012-03-06  9:54 [RFC][PATCH 0/3] TCP connection repair (v2) Pavel Emelyanov
2012-03-06  9:55 ` [PATCH 2/3] tcp: Initial repair mode Pavel Emelyanov
2012-03-06 13:11   ` Glauber Costa
2012-03-06 20:16     ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.