From: Rao Shoaib Signed-off-by: Rao Shoaib --- include/net/inet_common.h | 2 + include/net/tcp.h | 106 ++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/af_inet.c | 3 +- net/ipv4/tcp.c | 59 ++++++++++++++------------ net/ipv4/tcp_input.c | 89 +++++++++++++++++++++++--------------- net/ipv4/tcp_ipv4.c | 42 ++++++++++++------ net/ipv4/tcp_output.c | 55 +++++++++++++----------- net/ipv4/tcp_timer.c | 9 ++-- net/ipv6/af_inet6.c | 4 +- net/ipv6/tcp_ipv6.c | 63 ++++++++++++++------------- 10 files changed, 293 insertions(+), 139 deletions(-) diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 500f813..7b919c7 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -14,6 +14,8 @@ struct sock; struct sockaddr; struct socket; +int inet_create(struct net *net, struct socket *sock, int protocol, int kern); +int inet6_create(struct net *net, struct socket *sock, int protocol, int kern); int inet_release(struct socket *sock); int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); diff --git a/include/net/tcp.h b/include/net/tcp.h index f5d748a..3344b1d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -314,6 +314,12 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift) return false; } +static inline void tcp_drop(struct sock *sk, struct sk_buff *skb) +{ + sk_drops_add(sk, skb); + __kfree_skb(skb); +} + bool tcp_check_oom(struct sock *sk, int shift); extern struct proto tcp_prot; @@ -2273,6 +2279,106 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); } +/* MPTCP */ +unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, + int large_allowed); +ssize_t tcp_sendpages_xmit(struct sock *sk, struct page *page, int offset, + size_t size, int flags); +ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, + size_t size, int flags); +int tcp_close_state(struct sock *sk); +void tcp_enter_quickack_mode(struct sock *sk); +void tcp_sndbuf_expand_impl(struct sock *sk, u32 nr_segs); +bool tcp_check_rtt(struct sock *sk); +u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb); +void tcp_ack_probe(struct sock *sk); +bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, + const u32 ack_seq, const u32 nwin); +bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, + struct tcp_fastopen_cookie *cookie); +bool tcp_rcv_fastopen_synack_impl(struct sock *sk, struct sk_buff *synack, + struct tcp_fastopen_cookie *cookie, + bool rexmit); +void tcp_enter_quickack_mode(struct sock *sk); +void tcp_check_space(struct sock *sk); +int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, + bool *fragstolen); +void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb); +void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags); +bool tcp_urg_mode(const struct tcp_sock *tp); +int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, + gfp_t gfp_mask); +void tcp_queue_skb(struct sock *sk, struct sk_buff *skb); +void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now); +void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr); +int pskb_trim_head(struct sk_buff *skb, int len); +void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, + const struct sk_buff *skb); +unsigned int tcp_mss_split_point(const struct sock *sk, + const struct sk_buff *skb, + unsigned int mss_now, + unsigned int max_segs, + int nonagle); +unsigned int tcp_cwnd_test(const struct tcp_sock *tp, + const struct sk_buff *skb); +int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now); +bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, + unsigned int cur_mss, int nonagle); +bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb, + unsigned int cur_mss); +int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib); +void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb); +void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, + struct request_sock *req); +void tcp_v4_reqsk_destructor(struct request_sock *req); +struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb); +void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, + const struct tcphdr *th); +void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, + const struct tcphdr *th); +void tcp_write_err(struct sock *sk); +bool retransmits_timed_out(struct sock *sk, unsigned int boundary, + unsigned int timeout); +int tcp_write_timeout(struct sock *sk); +struct request_sock *tcp_cookie_req_alloc(struct sock *sk, + struct sk_buff *skb, + struct tcp_options_received *tcp_opts, + __u32 cookie, int mss); +void inet_twsk_free(struct inet_timewait_sock *tw); +#if IS_ENABLED(CONFIG_IPV6) +void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); +void tcp_v6_mtu_reduced(struct sock *sk); +void tcp_v6_reqsk_destructor(struct request_sock *req); +void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); +void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, + struct request_sock *req); +struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb); +int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb); +struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req); +int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); +#endif + +static inline void tcp_data_snd_check(struct sock *sk) +{ + tcp_push_pending_frames(sk); + tcp_check_space(sk); +} + +/* These states need RST on ABORT according to RFC793 */ + +static inline bool tcp_need_reset(int state) +{ + return (1 << state) & + (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 | + TCPF_FIN_WAIT2 | TCPF_SYN_RECV); +} + +/* END MPTCP */ + #if IS_ENABLED(CONFIG_SMC) extern struct static_key_false tcp_have_smc; #endif diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e8c7fad..5d8ea09 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -243,8 +243,7 @@ EXPORT_SYMBOL(inet_listen); * Create an inet socket. */ -static int inet_create(struct net *net, struct socket *sock, int protocol, - int kern) +int inet_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; struct inet_protosw *answer; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ea89a41..20a69eb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -429,6 +429,7 @@ static const struct tcp_operational_ops __tcp_default_op_ops = { .sndbuf_expand = tcp_sndbuf_expand, .shift_skb_data = tcp_shift_skb_data, .grow_window = tcp_grow_window, + .check_rtt = tcp_check_rtt, .try_coalesce = tcp_try_coalesce, .try_rmem_schedule = tcp_try_rmem_schedule, .collapse_one = tcp_collapse_one, @@ -963,8 +964,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, return NULL; } -static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, - int large_allowed) +unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed) { struct tcp_sock *tp = tcp_sk(sk); u32 new_size_goal, size_goal; @@ -998,8 +998,8 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags) return mss_now; } -ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, - size_t size, int flags) +ssize_t tcp_sendpages_xmit(struct sock *sk, struct page *page, int offset, + size_t size, int flags) { struct tcp_sock *tp = tcp_sk(sk); int mss_now, size_goal; @@ -1007,25 +1007,12 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, ssize_t copied; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - /* Wait for a connection to finish. One exception is TCP Fast Open - * (passive side) where data is allowed to be sent before a connection - * is fully established. - */ - if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && - !tcp_passive_fastopen(sk)) { - err = sk_stream_wait_connect(sk, &timeo); - if (err != 0) - goto out_err; - } - sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); mss_now = tp->op_ops->send_mss(sk, &size_goal, flags); copied = 0; err = -EPIPE; - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) - goto out_err; while (size > 0) { struct sk_buff *skb = tcp_write_queue_tail(sk); @@ -1120,6 +1107,33 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, do_error: if (copied) goto out; + return err; +} +EXPORT_SYMBOL_GPL(tcp_sendpages_xmit); + +ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, + size_t size, int flags) +{ + int err; + long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + + /* Wait for a connection to finish. One exception is TCP Fast Open + * (passive side) where data is allowed to be sent before a connection + * is fully established. + */ + if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && + !tcp_passive_fastopen(sk)) { + err = sk_stream_wait_connect(sk, &timeo); + if (err != 0) + goto out_err; + } + + if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + goto out_err; + + err = tcp_sendpages_xmit(sk, page, offset, size, flags); + if (err >= 0) + return(err); out_err: /* make sure we wake any epoll edge trigger waiter */ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && @@ -2193,7 +2207,7 @@ static const unsigned char new_state[16] = { [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */ }; -static int tcp_close_state(struct sock *sk) +int tcp_close_state(struct sock *sk) { int next = (int)new_state[sk->sk_state]; int ns = next & TCP_STATE_MASK; @@ -2419,15 +2433,6 @@ void tcp_close(struct sock *sk, long timeout) } EXPORT_SYMBOL(tcp_close); -/* These states need RST on ABORT according to RFC793 */ - -static inline bool tcp_need_reset(int state) -{ - return (1 << state) & - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 | - TCPF_FIN_WAIT2 | TCPF_SYN_RECV); -} - static void tcp_rtx_queue_purge(struct sock *sk) { struct rb_node *p = rb_first(&sk->tcp_rtx_queue); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8cc48bb..398505e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -195,7 +195,7 @@ static void tcp_incr_quickack(struct sock *sk) icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); } -static void tcp_enter_quickack_mode(struct sock *sk) +void tcp_enter_quickack_mode(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_incr_quickack(sk); @@ -293,12 +293,11 @@ static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr * 1. Tuning sk->sk_sndbuf, when connection enters established state. */ -void tcp_sndbuf_expand(struct sock *sk) +void tcp_sndbuf_expand_impl(struct sock *sk, u32 nr_segs) { const struct tcp_sock *tp = tcp_sk(sk); const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; int sndmem, per_mss; - u32 nr_segs; /* Worst case is non GSO/TSO : each frame consumes one skb * and skb->head is kmalloced using power of two area of memory @@ -310,8 +309,10 @@ void tcp_sndbuf_expand(struct sock *sk) per_mss = roundup_pow_of_two(per_mss) + SKB_DATA_ALIGN(sizeof(struct sk_buff)); - nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd); - nr_segs = max_t(u32, nr_segs, tp->reordering + 1); + if (nr_segs <= 0) { + nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd); + nr_segs = max_t(u32, nr_segs, tp->reordering + 1); + } /* Fast Recovery (RFC 5681 3.2) : * Cubic needs 1.7 factor, rounded to 2 to include @@ -324,6 +325,11 @@ void tcp_sndbuf_expand(struct sock *sk) sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]); } +void tcp_sndbuf_expand(struct sock *sk) +{ + tcp_sndbuf_expand_impl(sk, 0); +} + /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) * * All tcp_full_space() is split to two parts: "network" buffer, allocated @@ -572,6 +578,17 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, } } +bool tcp_check_rtt(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + int time; + + time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); + + if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) + return (true); + return false; +} /* * This function should be called every time data is copied to user space. * It calculates the appropriate TCP receive buffer space. @@ -580,11 +597,9 @@ void tcp_rcv_space_adjust(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); u32 copied; - int time; tcp_mstamp_refresh(tp); - time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); - if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) + if (tp->op_ops->check_rtt(sk)) return; /* Number of bytes copied to user in last RTT */ @@ -2966,7 +2981,7 @@ static void tcp_set_xmit_timer(struct sock *sk) } /* If we get here, the whole TSO packet has not been acked. */ -static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) +u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); u32 packets_acked; @@ -3201,7 +3216,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, return flag; } -static void tcp_ack_probe(struct sock *sk) +void tcp_ack_probe(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *head = tcp_send_head(sk); @@ -3273,7 +3288,7 @@ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, /* Check that window update is acceptable. * The function assumes that snd_una<=ack<=snd_next. */ -static inline bool tcp_may_update_window(const struct tcp_sock *tp, +inline bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, const u32 ack_seq, const u32 nwin) { @@ -4290,12 +4305,6 @@ bool tcp_try_coalesce(struct sock *sk, return true; } -static void tcp_drop(struct sock *sk, struct sk_buff *skb) -{ - sk_drops_add(sk, skb); - __kfree_skb(skb); -} - /* This one checks to see if we can put data from the * out_of_order queue into the receive_queue. */ @@ -4505,8 +4514,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) } } -static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, - bool *fragstolen) +int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, + bool *fragstolen) { int eaten; struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue); @@ -4580,7 +4589,11 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) int eaten; if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { - __kfree_skb(skb); + /* options that a layer above might be interested in */ + if (unlikely(tp->op_ops->ack_only)) + tp->op_ops->ack_only(sk, skb); + else + __kfree_skb(skb); return; } skb_dst_drop(skb); @@ -4995,7 +5008,7 @@ static void tcp_new_space(struct sock *sk) sk->sk_write_space(sk); } -static void tcp_check_space(struct sock *sk) +void tcp_check_space(struct sock *sk) { if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); @@ -5010,12 +5023,6 @@ static void tcp_check_space(struct sock *sk) } } -static inline void tcp_data_snd_check(struct sock *sk) -{ - tcp_push_pending_frames(sk); - tcp_check_space(sk); -} - /* * Check if sending an ack is needed. */ @@ -5504,8 +5511,9 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tp->pred_flags = 0; } -bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, - struct tcp_fastopen_cookie *cookie) +bool tcp_rcv_fastopen_synack_impl(struct sock *sk, struct sk_buff *synack, + struct tcp_fastopen_cookie *cookie, + bool rexmit) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL; @@ -5542,7 +5550,7 @@ bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp); - if (data) { /* Retransmit unacked data in SYN */ + if (data && rexmit) { /* Retransmit unacked data in SYN */ skb_rbtree_walk_from(data) { if (__tcp_retransmit_skb(sk, data, 1)) break; @@ -5562,6 +5570,12 @@ bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, return false; } +bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, + struct tcp_fastopen_cookie *cookie) +{ + return tcp_rcv_fastopen_synack_impl(sk, synack, cookie, true); +} + static void smc_check_reset_syn(struct tcp_sock *tp) { #if IS_ENABLED(CONFIG_SMC) @@ -5581,6 +5595,9 @@ int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, int saved_clamp = tp->rx_opt.mss_clamp; bool fastopen_fail; + tp->rx_opt.saw_tstamp = 0; + tcp_mstamp_refresh(tp); + tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc); if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5682,7 +5699,7 @@ int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_finish_connect(sk, skb); fastopen_fail = (tp->syn_fastopen || tp->syn_data) && - tcp_rcv_fastopen_synack(sk, skb, &foc); + tp->op_ops->fastopen_synack(sk, skb, &foc); if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); @@ -5842,9 +5859,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) goto discard; case TCP_SYN_SENT: - tp->rx_opt.saw_tstamp = 0; - tcp_mstamp_refresh(tp); - queued = tcp_rcv_synsent_state_process(sk, skb, th); + queued = tp->state_ops->synsent(sk, skb, th); if (queued >= 0) return queued; @@ -6052,8 +6067,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) } if (!queued) { + /* options that a layer above might be interested in */ + if (unlikely(tp->op_ops && tp->op_ops->ack_only)) + tp->op_ops->ack_only(sk, skb); + else discard: - tcp_drop(sk, skb); + tcp_drop(sk, skb); } return 0; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 482ca15..95d4c1f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -595,7 +595,7 @@ EXPORT_SYMBOL(tcp_v4_send_check); * Exception: precedence violation. We do not implement it in any case. */ -static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) +void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); struct { @@ -829,8 +829,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) inet_twsk_put(tw); } -static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, - struct request_sock *req) +void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, + struct request_sock *req) { /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. @@ -892,7 +892,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, /* * IPv4 request_sock destructor. */ -static void tcp_v4_reqsk_destructor(struct request_sock *req) +void tcp_v4_reqsk_destructor(struct request_sock *req) { kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); } @@ -1431,7 +1431,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, } EXPORT_SYMBOL(tcp_v4_syn_recv_sock); -static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) +struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) { #ifdef CONFIG_SYN_COOKIES const struct tcphdr *th = tcp_hdr(skb); @@ -1598,8 +1598,8 @@ static void tcp_v4_restore_cb(struct sk_buff *skb) sizeof(struct inet_skb_parm)); } -static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, - const struct tcphdr *th) +void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, + const struct tcphdr *th) { /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() * barrier() makes sure compiler wont play fool^Waliasing games. @@ -1620,6 +1620,9 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, skb->tstamp || skb_hwtstamps(skb)->hwtstamp; } +process_unclaimed tcp_process_unclaimed; +EXPORT_SYMBOL(tcp_process_unclaimed); + /* * From tcp_input.c */ @@ -1750,13 +1753,16 @@ int tcp_v4_rcv(struct sk_buff *skb) sk_incoming_cpu_update(sk); - bh_lock_sock_nested(sk); - tcp_segs_in(tcp_sk(sk), skb); - ret = 0; - if (!sock_owned_by_user(sk)) { - ret = tcp_v4_do_rcv(sk, skb); - } else if (tcp_add_backlog(sk, skb)) { - goto discard_and_relse; + if (likely(!tcp_sk(sk)->op_ops->rx)) { + bh_lock_sock_nested(sk); + tcp_segs_in(tcp_sk(sk), skb); + ret = 0; + if (!sock_owned_by_user(sk)) + ret = tcp_v4_do_rcv(sk, skb); + else if (tcp_add_backlog(sk, skb)) + goto discard_and_relse; + } else { + return(tcp_sk(sk)->op_ops->rx(sk, skb, refcounted)); } bh_unlock_sock(sk); @@ -1778,6 +1784,10 @@ int tcp_v4_rcv(struct sk_buff *skb) bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { + if (unlikely(tcp_process_unclaimed)) { + if (tcp_process_unclaimed(sk, skb)) + return (0); + } tcp_v4_send_reset(NULL, skb); } @@ -1820,6 +1830,10 @@ int tcp_v4_rcv(struct sk_buff *skb) refcounted = false; goto process; } + if (unlikely(tcp_process_unclaimed)) { + if (tcp_process_unclaimed(sk, skb)) + return 0; + } } /* to ACK */ /* fall through */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2fa5c05..72b494a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -46,7 +46,7 @@ #include /* Account for new data that has been sent to the network. */ -static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) +void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -375,7 +375,7 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, /* Constructs common control bits of non-data skb. If SYN/FIN is present, * auto increment end seqno. */ -static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) +void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) { skb->ip_summed = CHECKSUM_PARTIAL; @@ -390,7 +390,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) TCP_SKB_CB(skb)->end_seq = seq; } -static inline bool tcp_urg_mode(const struct tcp_sock *tp) +inline bool tcp_urg_mode(const struct tcp_sock *tp) { return tp->snd_una != tp->snd_up; } @@ -1031,8 +1031,8 @@ static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb) * We are working here with either a clone of the original * SKB, or a fresh unique copy made by the retransmit engine. */ -static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, - gfp_t gfp_mask) +int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, + gfp_t gfp_mask) { const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet; @@ -1193,7 +1193,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, * otherwise socket can stall. */ -static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) +void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -1206,7 +1206,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) } /* Initialize TSO segments for a packet. */ -static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) +void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) { if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal @@ -1223,7 +1223,7 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) /* Pcount in the middle of the write queue got changed, we need to do various * tweaks to fix counters */ -static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr) +void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr) { struct tcp_sock *tp = tcp_sk(sk); @@ -1426,6 +1426,11 @@ static int __pskb_trim_head(struct sk_buff *skb, int len) return len; } +int pskb_trim_head(struct sk_buff *skb, int len) +{ + return __pskb_trim_head(skb, len); +} + /* Remove acked data from a packet in the transmit queue. */ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { @@ -1434,7 +1439,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; - delta_truesize = __pskb_trim_head(skb, len); + delta_truesize = pskb_trim_head(skb, len); TCP_SKB_CB(skb)->seq += len; skb->ip_summed = CHECKSUM_PARTIAL; @@ -1693,8 +1698,8 @@ static bool tcp_minshall_check(const struct tcp_sock *tp) * But we can avoid doing the divide again given we already have * skb_pcount = skb->len / mss_now */ -static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, - const struct sk_buff *skb) +void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, + const struct sk_buff *skb) { if (skb->len < tcp_skb_pcount(skb) * mss_now) tp->snd_sml = TCP_SKB_CB(skb)->end_seq; @@ -1751,11 +1756,11 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) } /* Returns the portion of skb which can be sent right away */ -static unsigned int tcp_mss_split_point(const struct sock *sk, - const struct sk_buff *skb, - unsigned int mss_now, - unsigned int max_segs, - int nonagle) +unsigned int tcp_mss_split_point(const struct sock *sk, + const struct sk_buff *skb, + unsigned int mss_now, + unsigned int max_segs, + int nonagle) { const struct tcp_sock *tp = tcp_sk(sk); u32 partial, needed, window, max_len; @@ -1785,7 +1790,7 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, /* Can at least one segment of SKB be sent right now, according to the * congestion window rules? If so, return how many segments are allowed. */ -static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, +inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, const struct sk_buff *skb) { u32 in_flight, cwnd, halfcwnd; @@ -1811,7 +1816,7 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, * This must be invoked the first time we consider transmitting * SKB onto the wire. */ -static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now) +int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now) { int tso_segs = tcp_skb_pcount(skb); @@ -1826,8 +1831,8 @@ static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now) /* Return true if the Nagle test allows this packet to be * sent now. */ -static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, - unsigned int cur_mss, int nonagle) +inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, + unsigned int cur_mss, int nonagle) { /* Nagle rule does not apply to frames, which sit in the middle of the * write_queue (they have no chances to get new data). @@ -1849,9 +1854,9 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf } /* Does at least the first segment of SKB fit into the send window? */ -static bool tcp_snd_wnd_test(const struct tcp_sock *tp, - const struct sk_buff *skb, - unsigned int cur_mss) +bool tcp_snd_wnd_test(const struct tcp_sock *tp, + const struct sk_buff *skb, + unsigned int cur_mss) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -2148,7 +2153,7 @@ int tcp_mtu_probe(struct sock *sk) skb->csum = csum_partial(skb->data, skb->len, 0); } else { - __pskb_trim_head(skb, copy); + pskb_trim_head(skb, copy); tcp_set_skb_tso_segs(skb, mss_now); } TCP_SKB_CB(skb)->seq += copy; @@ -3639,7 +3644,7 @@ EXPORT_SYMBOL_GPL(tcp_send_ack); * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is * out-of-date with SND.UNA-1 to probe window. */ -static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) +int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index beaba7a..dbf284d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -29,7 +29,7 @@ * Returns: Nothing (void) */ -static void tcp_write_err(struct sock *sk) +void tcp_write_err(struct sock *sk) { sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; sk->sk_error_report(sk); @@ -155,9 +155,8 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) * after "boundary" unsuccessful, exponentially backed-off * retransmissions with an initial RTO of TCP_RTO_MIN. */ -static bool retransmits_timed_out(struct sock *sk, - unsigned int boundary, - unsigned int timeout) +bool retransmits_timed_out(struct sock *sk, unsigned int boundary, + unsigned int timeout) { const unsigned int rto_base = TCP_RTO_MIN; unsigned int linear_backoff_thresh, start_ts; @@ -187,7 +186,7 @@ static bool retransmits_timed_out(struct sock *sk, } /* A write timeout has occurred. Process the after effects. */ -static int tcp_write_timeout(struct sock *sk) +int tcp_write_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c1e292d..9a242a5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -107,8 +107,8 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } -static int inet6_create(struct net *net, struct socket *sock, int protocol, - int kern) +int inet6_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct inet_sock *inet; struct ipv6_pinfo *np; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 293bdc8..c226cf6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -71,12 +71,6 @@ #include -static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); -static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, - struct request_sock *req); - -static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); - #ifdef CONFIG_TCP_MD5SIG static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; @@ -88,7 +82,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, } #endif -static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -315,7 +309,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, return err; } -static void tcp_v6_mtu_reduced(struct sock *sk) +void tcp_v6_mtu_reduced(struct sock *sk) { struct dst_entry *dst; @@ -495,7 +489,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, } -static void tcp_v6_reqsk_destructor(struct request_sock *req) +void tcp_v6_reqsk_destructor(struct request_sock *req) { kfree(inet_rsk(req)->ipv6_opt); kfree_skb(inet_rsk(req)->pktopts); @@ -877,7 +871,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 kfree_skb(buff); } -static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) +void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); u32 seq = 0, ack_seq = 0; @@ -975,8 +969,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) inet_twsk_put(tw); } -static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, - struct request_sock *req) +void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, + struct request_sock *req) { /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. @@ -997,7 +991,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, } -static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) +struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) { #ifdef CONFIG_SYN_COOKIES const struct tcphdr *th = tcp_hdr(skb); @@ -1008,7 +1002,7 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) return sk; } -static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) +int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); @@ -1034,11 +1028,11 @@ static void tcp_v6_restore_cb(struct sk_buff *skb) sizeof(struct inet6_skb_parm)); } -static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst, - struct request_sock *req_unhash, - bool *own_req) +struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) { struct inet_request_sock *ireq; struct ipv6_pinfo *newnp; @@ -1250,7 +1244,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * * This is because we cannot sleep with the original spinlock * held. */ -static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) +int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp; @@ -1378,8 +1372,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } -static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, - const struct tcphdr *th) +void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, + const struct tcphdr *th) { /* This is tricky: we move IP6CB at its correct location into * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because @@ -1522,13 +1516,16 @@ static int tcp_v6_rcv(struct sk_buff *skb) sk_incoming_cpu_update(sk); - bh_lock_sock_nested(sk); - tcp_segs_in(tcp_sk(sk), skb); - ret = 0; - if (!sock_owned_by_user(sk)) { - ret = tcp_v6_do_rcv(sk, skb); - } else if (tcp_add_backlog(sk, skb)) { - goto discard_and_relse; + if (likely(!tcp_sk(sk)->op_ops->rx)) { + bh_lock_sock_nested(sk); + tcp_segs_in(tcp_sk(sk), skb); + ret = 0; + if (!sock_owned_by_user(sk)) + ret = tcp_v6_do_rcv(sk, skb); + else if (tcp_add_backlog(sk, skb)) + goto discard_and_relse; + } else { + return(tcp_sk(sk)->op_ops->rx(sk, skb, refcounted)); } bh_unlock_sock(sk); @@ -1549,6 +1546,10 @@ static int tcp_v6_rcv(struct sk_buff *skb) bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { + if (unlikely(tcp_process_unclaimed)) { + if (tcp_process_unclaimed(sk, skb)) + return(0); + } tcp_v6_send_reset(NULL, skb); } @@ -1594,6 +1595,10 @@ static int tcp_v6_rcv(struct sk_buff *skb) refcounted = false; goto process; } + if (unlikely(tcp_process_unclaimed)) { + if (tcp_process_unclaimed(sk, skb)) + return 0; + } } /* to ACK */ /* fall through */ -- 2.7.4