* Re: [MPTCP] [PATCH v2 11/25] tcp_extra_options: Make extra-option list per-socket
@ 2017-11-29 1:44 Mat Martineau
0 siblings, 0 replies; 3+ messages in thread
From: Mat Martineau @ 2017-11-29 1:44 UTC (permalink / raw)
To: mptcp
[-- Attachment #1: Type: text/plain, Size: 21726 bytes --]
On Mon, 27 Nov 2017, Christoph Paasch wrote:
> Each TCP-socket, TCP request-socket and TCP time-wait socket has its own
> list. That way we can send the proper options in each state.
>
> The list is copied across the different socket-types when needed.
>
> Signed-off-by: Christoph Paasch <cpaasch(a)apple.com>
> ---
> include/linux/tcp.h | 5 ++
> include/net/tcp.h | 49 +++++++++--
> net/ipv4/tcp.c | 214 ++++++++++++++++++++++++++++++-----------------
> net/ipv4/tcp_input.c | 5 ++
> net/ipv4/tcp_ipv4.c | 14 +++-
> net/ipv4/tcp_minisocks.c | 10 +++
> net/ipv6/tcp_ipv6.c | 9 ++
> 7 files changed, 219 insertions(+), 87 deletions(-)
>
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index a2a1676dfc52..4f653e3bd33f 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -156,6 +156,7 @@ struct tcp_request_sock {
> * FastOpen it's the seq#
> * after data-in-SYN.
> */
> + struct list_head tcp_option_list;
> };
>
> static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
> @@ -391,6 +392,8 @@ struct tcp_sock {
> */
> struct request_sock *fastopen_rsk;
> u32 *saved_syn;
> +
> + struct list_head tcp_option_list;
> };
>
> enum tsq_enum {
> @@ -435,6 +438,8 @@ struct tcp_timewait_sock {
> u32 tw_last_oow_ack_time;
>
> long tw_ts_recent_stamp;
> +
> + struct list_head tcp_option_list;
> #ifdef CONFIG_TCP_MD5SIG
> struct tcp_md5sig_key *tw_md5_key;
> #endif
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 0981a1b429ca..1f34b1708990 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -2066,31 +2066,56 @@ extern struct static_key_false tcp_have_smc;
> #endif
>
> extern struct static_key_false tcp_extra_options_enabled;
> +struct tcp_extra_option_store;
>
> struct tcp_extra_option_ops {
> - struct list_head list;
> unsigned char option_kind;
> unsigned char priority;
> void (*parse)(int opsize, const unsigned char *opptr,
> const struct sk_buff *skb,
> struct tcp_options_received *opt_rx,
> - struct sock *sk);
> + struct sock *sk,
> + struct tcp_extra_option_store *store);
> /* Return the number of bytes consumed */
> unsigned int (*prepare)(struct sk_buff *skb, u8 flags,
> unsigned int remaining,
> struct tcp_out_options *opts,
> - const struct sock *sk);
> + const struct sock *sk,
> + struct tcp_extra_option_store *store);
> void (*write)(__be32 *ptr, struct sk_buff *skb,
> - struct tcp_out_options *opts, struct sock *sk);
> + struct tcp_out_options *opts, struct sock *sk,
> + struct tcp_extra_option_store *store);
> void (*response_write)(__be32 *ptr, struct sk_buff *orig,
> struct tcphdr *th,
> struct tcp_out_options *opts,
> - const struct sock *sk);
> + const struct sock *sk,
> + struct tcp_extra_option_store *store);
> int (*add_header_len)(const struct sock *listener,
> - const struct sock *sk);
> + const struct sock *sk,
> + struct tcp_extra_option_store *store);
> + struct tcp_extra_option_store *(*copy)(struct sock *listener,
> + struct request_sock *req,
> + struct tcp_options_received *opt,
> + struct tcp_extra_option_store *from);
> + struct tcp_extra_option_store *(*move)(struct tcp_extra_option_store *from);
> + void (*destroy)(struct tcp_extra_option_store *store);
> struct module *owner;
> };
>
> +/* Every extra-option in a TCP-socket gets stored in a _store structure which
> + * has to have tcp_extra_option_store as the first element.
> + *
> + * That way, an extra option registers passes this to tcp_register_extra_option,
> + * and the surrounding structure can contain per-option state.
> + */
> +struct tcp_extra_option_store {
> + struct list_head list;
> + const struct tcp_extra_option_ops *ops;
> +};
> +
> +struct tcp_extra_option_store *tcp_extra_options_find_kind(unsigned char kind,
> + const struct sock *sk);
> +
> void tcp_extra_options_parse(int opcode, int opsize, const unsigned char *opptr,
> const struct sk_buff *skb,
> struct tcp_options_received *opt_rx,
> @@ -2113,7 +2138,15 @@ void tcp_extra_options_response_write(__be32 *ptr, struct sk_buff *orig,
> int tcp_extra_options_add_header(const struct sock *listener,
> const struct sock *sk);
>
> -int tcp_register_extra_option(struct tcp_extra_option_ops *ops);
> -void tcp_unregister_extra_option(struct tcp_extra_option_ops *ops);
> +int tcp_register_extra_option(struct tcp_extra_option_store *store,
> + struct sock *sk);
> +
> +void tcp_extra_options_copy(struct sock *listener,
> + struct request_sock *req,
> + struct tcp_options_received *opt);
> +
> +void tcp_extra_options_move(struct list_head *from, struct list_head *to);
> +
> +void tcp_extra_options_destroy(struct sock *sk);
>
> #endif /* _TCP_H */
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 487f28222d3c..01e646873613 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -308,8 +308,6 @@ EXPORT_SYMBOL(tcp_sockets_allocated);
> /*
> * Optional TCP option handlers
> */
> -static DEFINE_SPINLOCK(tcp_option_list_lock);
> -static LIST_HEAD(tcp_option_list);
> DEFINE_STATIC_KEY_FALSE(tcp_extra_options_enabled);
>
> /*
> @@ -423,6 +421,7 @@ void tcp_init_sock(struct sock *sk)
> tcp_init_xmit_timers(sk);
> INIT_LIST_HEAD(&tp->tsq_node);
> INIT_LIST_HEAD(&tp->tsorted_sent_queue);
> + INIT_LIST_HEAD(&tp->tcp_option_list);
>
> icsk->icsk_rto = TCP_TIMEOUT_INIT;
> tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
> @@ -3479,19 +3478,32 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
>
> #endif
>
> -/* Linear search, few entries are expected. The RCU read lock must
> - * be held before calling.
> - */
> -static struct tcp_extra_option_ops *tcp_extra_options_find_kind(unsigned char kind)
> +static struct list_head *tcp_extra_options_get_list(const struct sock *sk)
> {
> - struct tcp_extra_option_ops *entry;
> + if (sk_fullsock(sk))
> + return &tcp_sk(sk)->tcp_option_list;
> + else if (sk->sk_state == TCP_NEW_SYN_RECV)
> + return &tcp_rsk(inet_reqsk(sk))->tcp_option_list;
> + else if (sk->sk_state == TCP_TIME_WAIT)
> + return &tcp_twsk(sk)->tcp_option_list;
> +
> + return NULL;
> +}
> +
> +struct tcp_extra_option_store *tcp_extra_options_find_kind(unsigned char kind,
> + const struct sock *sk)
> +{
> + struct tcp_extra_option_store *entry;
> + struct list_head *lhead;
> +
> + lhead = tcp_extra_options_get_list(sk);
>
> - list_for_each_entry_rcu(entry, &tcp_option_list, list) {
> - if (entry->option_kind == kind)
> - return entry;
> + list_for_each_entry(entry, lhead, list) {
> + if (entry->ops->option_kind == kind)
> + break;
> }
>
> - return NULL;
> + return entry;
> }
>
> void tcp_extra_options_parse(int opcode, int opsize, const unsigned char *opptr,
> @@ -3499,75 +3511,79 @@ void tcp_extra_options_parse(int opcode, int opsize, const unsigned char *opptr,
> struct tcp_options_received *opt_rx,
> struct sock *sk)
> {
> - struct tcp_extra_option_ops *entry;
> + struct tcp_extra_option_store *entry;
> +
> + entry = tcp_extra_options_find_kind(opcode, sk);
>
> - rcu_read_lock();
> - entry = tcp_extra_options_find_kind(opcode);
> - if (entry && entry->parse)
> - entry->parse(opsize, opptr, skb, opt_rx, sk);
> - rcu_read_unlock();
> + if (entry && entry->ops->parse)
> + entry->ops->parse(opsize, opptr, skb, opt_rx, sk, entry);
> }
> EXPORT_SYMBOL_GPL(tcp_extra_options_parse);
>
> -/* The RCU read lock must be held before calling, and should span both
> - * the call to this function and tcp_extra_options_write to ensure that
> - * tcp_option_list does not change between the two calls. To preserve
> - * expected option alignment, always returns a multiple of 4 bytes.
> - */
> unsigned int tcp_extra_options_prepare(struct sk_buff *skb, u8 flags,
> unsigned int remaining,
> struct tcp_out_options *opts,
> const struct sock *sk)
> {
> - struct tcp_extra_option_ops *entry;
> + struct tcp_extra_option_store *entry;
> + struct list_head *lhead;
> unsigned int used = 0;
>
> - list_for_each_entry_rcu(entry, &tcp_option_list, list) {
> - if (unlikely(!entry->prepare))
> + if (!sk)
> + return 0;
> +
> + lhead = tcp_extra_options_get_list(sk);
> +
> + list_for_each_entry(entry, lhead, list) {
> + if (unlikely(!entry->ops->prepare))
> continue;
>
> - used += entry->prepare(skb, flags, remaining - used, opts, sk);
> + used += entry->ops->prepare(skb, flags, remaining - used, opts,
> + sk, entry);
> }
>
> return roundup(used, 4);
> }
> EXPORT_SYMBOL_GPL(tcp_extra_options_prepare);
>
> -/* The RCU read lock must be held before calling, and should span both
> - * the call to tcp_extra_options_write and this function to ensure that
> - * tcp_option_list does not change between the two calls.
> - */
> void tcp_extra_options_write(__be32 *ptr, struct sk_buff *skb,
> struct tcp_out_options *opts,
> struct sock *sk)
> {
> - struct tcp_extra_option_ops *entry;
> + struct tcp_extra_option_store *entry;
> + struct list_head *lhead;
> +
> + if (!sk)
> + return;
>
> - list_for_each_entry_rcu(entry, &tcp_option_list, list) {
> - if (unlikely(!entry->write))
> + lhead = tcp_extra_options_get_list(sk);
> +
> + list_for_each_entry_rcu(entry, lhead, list) {
An _rcu iterator snuck in here.
> + if (unlikely(!entry->ops->write))
> continue;
>
> - entry->write(ptr, skb, opts, sk);
> + entry->ops->write(ptr, skb, opts, sk, entry);
> }
> }
> EXPORT_SYMBOL_GPL(tcp_extra_options_write);
>
> -/* The RCU read lock must be held before calling, and should span both
> - * the call to tcp_extra_options_write and this function to ensure that
> - * tcp_option_list does not change between the two calls.
> - */
> void tcp_extra_options_response_write(__be32 *ptr, struct sk_buff *orig,
> struct tcphdr *th,
> struct tcp_out_options *opts,
> const struct sock *sk)
> {
> - struct tcp_extra_option_ops *entry;
> + struct tcp_extra_option_store *entry;
>
> - list_for_each_entry_rcu(entry, &tcp_option_list, list) {
> - if (unlikely(!entry->write))
> + if (!sk)
> + return;
> +
> + BUG_ON(!sk_fullsock(sk));
> +
> + list_for_each_entry_rcu(entry, &tcp_sk(sk)->tcp_option_list, list) {
Another _rcu
Mat
> + if (unlikely(!entry->ops->response_write))
> continue;
>
> - entry->response_write(ptr, orig, th, opts, sk);
> + entry->ops->response_write(ptr, orig, th, opts, sk, entry);
> }
> }
> EXPORT_SYMBOL_GPL(tcp_extra_options_response_write);
> @@ -3575,52 +3591,49 @@ EXPORT_SYMBOL_GPL(tcp_extra_options_response_write);
> int tcp_extra_options_add_header(const struct sock *listener,
> const struct sock *sk)
> {
> - struct tcp_extra_option_ops *entry;
> + struct tcp_extra_option_store *entry;
> int tcp_header_len = 0;
>
> - rcu_read_lock();
> - list_for_each_entry_rcu(entry, &tcp_option_list, list) {
> - if (unlikely(!entry->add_header_len))
> + list_for_each_entry(entry, &tcp_sk(sk)->tcp_option_list, list) {
> + if (unlikely(!entry->ops->add_header_len))
> continue;
>
> - tcp_header_len += entry->add_header_len(listener, sk);
> + tcp_header_len += entry->ops->add_header_len(listener, sk, entry);
> }
> - rcu_read_unlock();
>
> return tcp_header_len;
> }
>
> -int tcp_register_extra_option(struct tcp_extra_option_ops *ops)
> +int tcp_register_extra_option(struct tcp_extra_option_store *store,
> + struct sock *sk)
> {
> - struct tcp_extra_option_ops *entry;
> - struct list_head *add_before = &tcp_option_list;
> + struct list_head *lhead, *add_before;
> + struct tcp_extra_option_store *entry;
> int ret = 0;
>
> - if (!ops->option_kind)
> + lhead = tcp_extra_options_get_list(sk);
> + add_before = lhead;
> +
> + if (!store->ops->option_kind)
> return -EINVAL;
>
> - if (!try_module_get(ops->owner))
> + if (!try_module_get(store->ops->owner))
> return -ENOENT;
>
> - spin_lock(&tcp_option_list_lock);
> -
> - list_for_each_entry_rcu(entry, &tcp_option_list, list) {
> - if (entry->option_kind == ops->option_kind) {
> + list_for_each_entry(entry, lhead, list) {
> + if (entry->ops->option_kind == store->ops->option_kind) {
> pr_notice("Option kind %u already registered\n",
> - ops->option_kind);
> - spin_unlock(&tcp_option_list_lock);
> - module_put(ops->owner);
> + store->ops->option_kind);
> + module_put(store->ops->owner);
> return -EEXIST;
> }
>
> - if (entry->priority <= ops->priority)
> + if (entry->ops->priority <= store->ops->priority)
> add_before = &entry->list;
> }
>
> - list_add_tail_rcu(&ops->list, add_before);
> - pr_debug("Option kind %u registered\n", ops->option_kind);
> -
> - spin_unlock(&tcp_option_list_lock);
> + list_add_tail(&store->list, add_before);
> + pr_debug("Option kind %u registered\n", store->ops->option_kind);
>
> static_branch_inc(&tcp_extra_options_enabled);
>
> @@ -3628,19 +3641,72 @@ int tcp_register_extra_option(struct tcp_extra_option_ops *ops)
> }
> EXPORT_SYMBOL_GPL(tcp_register_extra_option);
>
> -void tcp_unregister_extra_option(struct tcp_extra_option_ops *ops)
> +void tcp_extra_options_copy(struct sock *listener,
> + struct request_sock *req,
> + struct tcp_options_received *opt)
> +{
> + struct tcp_extra_option_store *entry;
> + struct list_head *from, *to;
> +
> + from = tcp_extra_options_get_list(listener);
> + to = tcp_extra_options_get_list(req_to_sk(req));
> +
> + list_for_each_entry(entry, from, list) {
> + struct tcp_extra_option_store *new;
> +
> + if (!try_module_get(entry->ops->owner)) {
> + pr_err("%s Module get failed while copying\n", __func__);
> + continue;
> + }
> +
> + new = entry->ops->copy(listener, req, opt, entry);
> + if (!new) {
> + module_put(entry->ops->owner);
> + continue;
> + }
> +
> + list_add_tail(&new->list, to);
> +
> + static_branch_inc(&tcp_extra_options_enabled);
> + }
> +}
> +
> +void tcp_extra_options_move(struct list_head *from, struct list_head *to)
> {
> - spin_lock(&tcp_option_list_lock);
> - list_del_rcu(&ops->list);
> - spin_unlock(&tcp_option_list_lock);
> + struct tcp_extra_option_store *entry, *tmp;
>
> - synchronize_net();
> + list_for_each_entry_safe(entry, tmp, from, list) {
> + list_del(&entry->list);
>
> - static_branch_dec(&tcp_extra_options_enabled);
> + if (entry->ops->move) {
> + if (entry->ops->move(entry))
> + continue;
> + }
>
> - module_put(ops->owner);
> + list_add_tail(&entry->list, to);
> + }
> }
> -EXPORT_SYMBOL_GPL(tcp_unregister_extra_option);
> +EXPORT_SYMBOL_GPL(tcp_extra_options_move);
> +
> +void tcp_extra_options_destroy(struct sock *sk)
> +{
> + struct tcp_extra_option_store *entry, *tmp;
> + struct list_head *lhead;
> +
> + lhead = tcp_extra_options_get_list(sk);
> +
> + list_for_each_entry_safe(entry, tmp, lhead, list) {
> + struct module *owner = entry->ops->owner;
> + list_del(&entry->list);
> +
> + entry->ops->destroy(entry);
> +
> + static_branch_dec(&tcp_extra_options_enabled);
> +
> + module_put(owner);
> + }
> +}
> +EXPORT_SYMBOL_GPL(tcp_extra_options_destroy);
>
> void tcp_done(struct sock *sk)
> {
> @@ -3788,8 +3854,6 @@ void __init tcp_init(void)
> INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
> }
>
> - INIT_LIST_HEAD(&tcp_option_list);
> -
> cnt = tcp_hashinfo.ehash_mask + 1;
> sysctl_tcp_max_orphans = cnt / 2;
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 028da0a4eff5..4e84f299c96f 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -6133,6 +6133,7 @@ static void tcp_openreq_init(struct request_sock *req,
> #if IS_ENABLED(CONFIG_SMC)
> ireq->smc_ok = rx_opt->smc_ok;
> #endif
> + INIT_LIST_HEAD(&tcp_rsk(req)->tcp_option_list);
> }
>
> struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
> @@ -6309,6 +6310,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
> tcp_reqsk_record_syn(sk, req, skb);
> fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
> }
> +
> + if (static_branch_unlikely(&tcp_extra_options_enabled))
> + tcp_extra_options_copy(sk, req, &tmp_opt);
> +
> if (fastopen_sk) {
> af_ops->send_synack(fastopen_sk, dst, &fl, req,
> &foc, TCP_SYNACK_FASTOPEN);
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index f717069ad778..4a47bfd12662 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -692,12 +692,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
>
> memset(&opts, 0, sizeof(opts));
>
> - rcu_read_lock();
> used = tcp_extra_options_prepare(NULL, TCPHDR_RST, remaining,
> &opts, sk);
>
> tcp_extra_options_response_write(&rep.opt[0], skb, &rep.th, &opts, sk);
> - rcu_read_unlock();
>
> arg.iov[0].iov_len += used;
> offset += used / 4;
> @@ -825,12 +823,10 @@ static void tcp_v4_send_ack(const struct sock *sk,
> #endif
>
> memset(&opts, 0, sizeof(opts));
> - rcu_read_lock();
> used = tcp_extra_options_prepare(NULL, TCPHDR_ACK, remaining,
> &opts, sk);
>
> tcp_extra_options_response_write(&rep.opt[offset], skb, &rep.th, &opts, sk);
> - rcu_read_unlock();
>
> arg.iov[0].iov_len += used;
> offset += used / 4;
> @@ -952,6 +948,9 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
> */
> static void tcp_v4_reqsk_destructor(struct request_sock *req)
> {
> + if (static_branch_unlikely(&tcp_extra_options_enabled))
> + tcp_extra_options_destroy(req_to_sk(req));
> +
> kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
> }
>
> @@ -1446,6 +1445,11 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
>
> tcp_initialize_rcv_mss(newsk);
>
> + if (static_branch_unlikely(&tcp_extra_options_enabled)) {
> + tcp_extra_options_move(&tcp_rsk(req)->tcp_option_list,
> + &newtp->tcp_option_list);
> + INIT_LIST_HEAD(&tcp_rsk(req)->tcp_option_list);
> + }
> #ifdef CONFIG_TCP_MD5SIG
> /* Copy over the MD5 key from the original socket */
> key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
> @@ -1945,6 +1949,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
> /* Cleans up our, hopefully empty, out_of_order_queue. */
> skb_rbtree_purge(&tp->out_of_order_queue);
>
> + if (static_branch_unlikely(&tcp_extra_options_enabled))
> + tcp_extra_options_destroy(sk);
> #ifdef CONFIG_TCP_MD5SIG
> /* Clean up the MD5 key list, if any */
> if (tp->md5sig_info) {
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index 7f9403d3272c..fb50d3d0d158 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -271,6 +271,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
> tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
> tcptw->tw_ts_offset = tp->tsoffset;
> tcptw->tw_last_oow_ack_time = 0;
> + INIT_LIST_HEAD(&tcptw->tcp_option_list);
>
> #if IS_ENABLED(CONFIG_IPV6)
> if (tw->tw_family == PF_INET6) {
> @@ -284,6 +285,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
> }
> #endif
>
> + if (static_branch_unlikely(&tcp_extra_options_enabled)) {
> + tcp_extra_options_move(&tcp_sk(sk)->tcp_option_list,
> + &tcptw->tcp_option_list);
> + INIT_LIST_HEAD(&tcp_sk(sk)->tcp_option_list);
> + }
> #ifdef CONFIG_TCP_MD5SIG
> /*
> * The timewait bucket does not have the key DB from the
> @@ -334,6 +340,9 @@ void tcp_twsk_destructor(struct sock *sk)
> if (twsk->tw_md5_key)
> kfree_rcu(twsk->tw_md5_key, rcu);
> #endif
> +
> + if (static_branch_unlikely(&tcp_extra_options_enabled))
> + tcp_extra_options_destroy(sk);
> }
> EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
>
> @@ -463,6 +472,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
>
> INIT_LIST_HEAD(&newtp->tsq_node);
> INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
> + INIT_LIST_HEAD(&newtp->tcp_option_list);
>
> tcp_init_wl(newtp, treq->rcv_isn);
>
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index ef037990ea5e..0a29ef82db35 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -500,6 +500,9 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
>
> static void tcp_v6_reqsk_destructor(struct request_sock *req)
> {
> + if (static_branch_unlikely(&tcp_extra_options_enabled))
> + tcp_extra_options_destroy(req_to_sk(req));
> +
> kfree(inet_rsk(req)->ipv6_opt);
> kfree_skb(inet_rsk(req)->pktopts);
> }
> @@ -1215,6 +1218,12 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
> newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
> newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
>
> + if (static_branch_unlikely(&tcp_extra_options_enabled)) {
> + tcp_extra_options_move(&tcp_rsk(req)->tcp_option_list,
> + &newtp->tcp_option_list);
> + INIT_LIST_HEAD(&tcp_rsk(req)->tcp_option_list);
> + }
> +
> #ifdef CONFIG_TCP_MD5SIG
> /* Copy over the MD5 key from the original socket */
> key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
> --
> 2.15.0
>
>
--
Mat Martineau
Intel OTC
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [MPTCP] [PATCH v2 11/25] tcp_extra_options: Make extra-option list per-socket
@ 2017-11-29 17:59 Christoph Paasch
0 siblings, 0 replies; 3+ messages in thread
From: Christoph Paasch @ 2017-11-29 17:59 UTC (permalink / raw)
To: mptcp
[-- Attachment #1: Type: text/plain, Size: 11409 bytes --]
On 28/11/17 - 17:44:41, Mat Martineau wrote:
> On Mon, 27 Nov 2017, Christoph Paasch wrote:
>
> > Each TCP-socket, TCP request-socket and TCP time-wait socket has its own
> > list. That way we can send the proper options in each state.
> >
> > The list is copied across the different socket-types when needed.
> >
> > Signed-off-by: Christoph Paasch <cpaasch(a)apple.com>
> > ---
> > include/linux/tcp.h | 5 ++
> > include/net/tcp.h | 49 +++++++++--
> > net/ipv4/tcp.c | 214 ++++++++++++++++++++++++++++++-----------------
> > net/ipv4/tcp_input.c | 5 ++
> > net/ipv4/tcp_ipv4.c | 14 +++-
> > net/ipv4/tcp_minisocks.c | 10 +++
> > net/ipv6/tcp_ipv6.c | 9 ++
> > 7 files changed, 219 insertions(+), 87 deletions(-)
> >
> > diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> > index a2a1676dfc52..4f653e3bd33f 100644
> > --- a/include/linux/tcp.h
> > +++ b/include/linux/tcp.h
> > @@ -156,6 +156,7 @@ struct tcp_request_sock {
> > * FastOpen it's the seq#
> > * after data-in-SYN.
> > */
> > + struct list_head tcp_option_list;
> > };
> >
> > static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
> > @@ -391,6 +392,8 @@ struct tcp_sock {
> > */
> > struct request_sock *fastopen_rsk;
> > u32 *saved_syn;
> > +
> > + struct list_head tcp_option_list;
> > };
> >
> > enum tsq_enum {
> > @@ -435,6 +438,8 @@ struct tcp_timewait_sock {
> > u32 tw_last_oow_ack_time;
> >
> > long tw_ts_recent_stamp;
> > +
> > + struct list_head tcp_option_list;
> > #ifdef CONFIG_TCP_MD5SIG
> > struct tcp_md5sig_key *tw_md5_key;
> > #endif
> > diff --git a/include/net/tcp.h b/include/net/tcp.h
> > index 0981a1b429ca..1f34b1708990 100644
> > --- a/include/net/tcp.h
> > +++ b/include/net/tcp.h
> > @@ -2066,31 +2066,56 @@ extern struct static_key_false tcp_have_smc;
> > #endif
> >
> > extern struct static_key_false tcp_extra_options_enabled;
> > +struct tcp_extra_option_store;
> >
> > struct tcp_extra_option_ops {
> > - struct list_head list;
> > unsigned char option_kind;
> > unsigned char priority;
> > void (*parse)(int opsize, const unsigned char *opptr,
> > const struct sk_buff *skb,
> > struct tcp_options_received *opt_rx,
> > - struct sock *sk);
> > + struct sock *sk,
> > + struct tcp_extra_option_store *store);
> > /* Return the number of bytes consumed */
> > unsigned int (*prepare)(struct sk_buff *skb, u8 flags,
> > unsigned int remaining,
> > struct tcp_out_options *opts,
> > - const struct sock *sk);
> > + const struct sock *sk,
> > + struct tcp_extra_option_store *store);
> > void (*write)(__be32 *ptr, struct sk_buff *skb,
> > - struct tcp_out_options *opts, struct sock *sk);
> > + struct tcp_out_options *opts, struct sock *sk,
> > + struct tcp_extra_option_store *store);
> > void (*response_write)(__be32 *ptr, struct sk_buff *orig,
> > struct tcphdr *th,
> > struct tcp_out_options *opts,
> > - const struct sock *sk);
> > + const struct sock *sk,
> > + struct tcp_extra_option_store *store);
> > int (*add_header_len)(const struct sock *listener,
> > - const struct sock *sk);
> > + const struct sock *sk,
> > + struct tcp_extra_option_store *store);
> > + struct tcp_extra_option_store *(*copy)(struct sock *listener,
> > + struct request_sock *req,
> > + struct tcp_options_received *opt,
> > + struct tcp_extra_option_store *from);
> > + struct tcp_extra_option_store *(*move)(struct tcp_extra_option_store *from);
> > + void (*destroy)(struct tcp_extra_option_store *store);
> > struct module *owner;
> > };
> >
> > +/* Every extra-option in a TCP-socket gets stored in a _store structure which
> > + * has to have tcp_extra_option_store as the first element.
> > + *
> > + * That way, an extra option registers passes this to tcp_register_extra_option,
> > + * and the surrounding structure can contain per-option state.
> > + */
> > +struct tcp_extra_option_store {
> > + struct list_head list;
> > + const struct tcp_extra_option_ops *ops;
> > +};
> > +
> > +struct tcp_extra_option_store *tcp_extra_options_find_kind(unsigned char kind,
> > + const struct sock *sk);
> > +
> > void tcp_extra_options_parse(int opcode, int opsize, const unsigned char *opptr,
> > const struct sk_buff *skb,
> > struct tcp_options_received *opt_rx,
> > @@ -2113,7 +2138,15 @@ void tcp_extra_options_response_write(__be32 *ptr, struct sk_buff *orig,
> > int tcp_extra_options_add_header(const struct sock *listener,
> > const struct sock *sk);
> >
> > -int tcp_register_extra_option(struct tcp_extra_option_ops *ops);
> > -void tcp_unregister_extra_option(struct tcp_extra_option_ops *ops);
> > +int tcp_register_extra_option(struct tcp_extra_option_store *store,
> > + struct sock *sk);
> > +
> > +void tcp_extra_options_copy(struct sock *listener,
> > + struct request_sock *req,
> > + struct tcp_options_received *opt);
> > +
> > +void tcp_extra_options_move(struct list_head *from, struct list_head *to);
> > +
> > +void tcp_extra_options_destroy(struct sock *sk);
> >
> > #endif /* _TCP_H */
> > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> > index 487f28222d3c..01e646873613 100644
> > --- a/net/ipv4/tcp.c
> > +++ b/net/ipv4/tcp.c
> > @@ -308,8 +308,6 @@ EXPORT_SYMBOL(tcp_sockets_allocated);
> > /*
> > * Optional TCP option handlers
> > */
> > -static DEFINE_SPINLOCK(tcp_option_list_lock);
> > -static LIST_HEAD(tcp_option_list);
> > DEFINE_STATIC_KEY_FALSE(tcp_extra_options_enabled);
> >
> > /*
> > @@ -423,6 +421,7 @@ void tcp_init_sock(struct sock *sk)
> > tcp_init_xmit_timers(sk);
> > INIT_LIST_HEAD(&tp->tsq_node);
> > INIT_LIST_HEAD(&tp->tsorted_sent_queue);
> > + INIT_LIST_HEAD(&tp->tcp_option_list);
> >
> > icsk->icsk_rto = TCP_TIMEOUT_INIT;
> > tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
> > @@ -3479,19 +3478,32 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
> >
> > #endif
> >
> > -/* Linear search, few entries are expected. The RCU read lock must
> > - * be held before calling.
> > - */
> > -static struct tcp_extra_option_ops *tcp_extra_options_find_kind(unsigned char kind)
> > +static struct list_head *tcp_extra_options_get_list(const struct sock *sk)
> > {
> > - struct tcp_extra_option_ops *entry;
> > + if (sk_fullsock(sk))
> > + return &tcp_sk(sk)->tcp_option_list;
> > + else if (sk->sk_state == TCP_NEW_SYN_RECV)
> > + return &tcp_rsk(inet_reqsk(sk))->tcp_option_list;
> > + else if (sk->sk_state == TCP_TIME_WAIT)
> > + return &tcp_twsk(sk)->tcp_option_list;
> > +
> > + return NULL;
> > +}
> > +
> > +struct tcp_extra_option_store *tcp_extra_options_find_kind(unsigned char kind,
> > + const struct sock *sk)
> > +{
> > + struct tcp_extra_option_store *entry;
> > + struct list_head *lhead;
> > +
> > + lhead = tcp_extra_options_get_list(sk);
> >
> > - list_for_each_entry_rcu(entry, &tcp_option_list, list) {
> > - if (entry->option_kind == kind)
> > - return entry;
> > + list_for_each_entry(entry, lhead, list) {
> > + if (entry->ops->option_kind == kind)
> > + break;
> > }
> >
> > - return NULL;
> > + return entry;
> > }
> >
> > void tcp_extra_options_parse(int opcode, int opsize, const unsigned char *opptr,
> > @@ -3499,75 +3511,79 @@ void tcp_extra_options_parse(int opcode, int opsize, const unsigned char *opptr,
> > struct tcp_options_received *opt_rx,
> > struct sock *sk)
> > {
> > - struct tcp_extra_option_ops *entry;
> > + struct tcp_extra_option_store *entry;
> > +
> > + entry = tcp_extra_options_find_kind(opcode, sk);
> >
> > - rcu_read_lock();
> > - entry = tcp_extra_options_find_kind(opcode);
> > - if (entry && entry->parse)
> > - entry->parse(opsize, opptr, skb, opt_rx, sk);
> > - rcu_read_unlock();
> > + if (entry && entry->ops->parse)
> > + entry->ops->parse(opsize, opptr, skb, opt_rx, sk, entry);
> > }
> > EXPORT_SYMBOL_GPL(tcp_extra_options_parse);
> >
> > -/* The RCU read lock must be held before calling, and should span both
> > - * the call to this function and tcp_extra_options_write to ensure that
> > - * tcp_option_list does not change between the two calls. To preserve
> > - * expected option alignment, always returns a multiple of 4 bytes.
> > - */
> > unsigned int tcp_extra_options_prepare(struct sk_buff *skb, u8 flags,
> > unsigned int remaining,
> > struct tcp_out_options *opts,
> > const struct sock *sk)
> > {
> > - struct tcp_extra_option_ops *entry;
> > + struct tcp_extra_option_store *entry;
> > + struct list_head *lhead;
> > unsigned int used = 0;
> >
> > - list_for_each_entry_rcu(entry, &tcp_option_list, list) {
> > - if (unlikely(!entry->prepare))
> > + if (!sk)
> > + return 0;
> > +
> > + lhead = tcp_extra_options_get_list(sk);
> > +
> > + list_for_each_entry(entry, lhead, list) {
> > + if (unlikely(!entry->ops->prepare))
> > continue;
> >
> > - used += entry->prepare(skb, flags, remaining - used, opts, sk);
> > + used += entry->ops->prepare(skb, flags, remaining - used, opts,
> > + sk, entry);
> > }
> >
> > return roundup(used, 4);
> > }
> > EXPORT_SYMBOL_GPL(tcp_extra_options_prepare);
> >
> > -/* The RCU read lock must be held before calling, and should span both
> > - * the call to tcp_extra_options_write and this function to ensure that
> > - * tcp_option_list does not change between the two calls.
> > - */
> > void tcp_extra_options_write(__be32 *ptr, struct sk_buff *skb,
> > struct tcp_out_options *opts,
> > struct sock *sk)
> > {
> > - struct tcp_extra_option_ops *entry;
> > + struct tcp_extra_option_store *entry;
> > + struct list_head *lhead;
> > +
> > + if (!sk)
> > + return;
> >
> > - list_for_each_entry_rcu(entry, &tcp_option_list, list) {
> > - if (unlikely(!entry->write))
> > + lhead = tcp_extra_options_get_list(sk);
> > +
> > + list_for_each_entry_rcu(entry, lhead, list) {
>
> An _rcu iterator snuck in here.
>
> > + if (unlikely(!entry->ops->write))
> > continue;
> >
> > - entry->write(ptr, skb, opts, sk);
> > + entry->ops->write(ptr, skb, opts, sk, entry);
> > }
> > }
> > EXPORT_SYMBOL_GPL(tcp_extra_options_write);
> >
> > -/* The RCU read lock must be held before calling, and should span both
> > - * the call to tcp_extra_options_write and this function to ensure that
> > - * tcp_option_list does not change between the two calls.
> > - */
> > void tcp_extra_options_response_write(__be32 *ptr, struct sk_buff *orig,
> > struct tcphdr *th,
> > struct tcp_out_options *opts,
> > const struct sock *sk)
> > {
> > - struct tcp_extra_option_ops *entry;
> > + struct tcp_extra_option_store *entry;
> >
> > - list_for_each_entry_rcu(entry, &tcp_option_list, list) {
> > - if (unlikely(!entry->write))
> > + if (!sk)
> > + return;
> > +
> > + BUG_ON(!sk_fullsock(sk));
> > +
> > + list_for_each_entry_rcu(entry, &tcp_sk(sk)->tcp_option_list, list) {
>
> Another _rcu
Thanks, fixed both.
Christoph
^ permalink raw reply [flat|nested] 3+ messages in thread
* [MPTCP] [PATCH v2 11/25] tcp_extra_options: Make extra-option list per-socket
@ 2017-11-28 5:57 Christoph Paasch
0 siblings, 0 replies; 3+ messages in thread
From: Christoph Paasch @ 2017-11-28 5:57 UTC (permalink / raw)
To: mptcp
[-- Attachment #1: Type: text/plain, Size: 20651 bytes --]
Each TCP-socket, TCP request-socket and TCP time-wait socket has its own
list. That way we can send the proper options in each state.
The list is copied across the different socket-types when needed.
Signed-off-by: Christoph Paasch <cpaasch(a)apple.com>
---
include/linux/tcp.h | 5 ++
include/net/tcp.h | 49 +++++++++--
net/ipv4/tcp.c | 214 ++++++++++++++++++++++++++++++-----------------
net/ipv4/tcp_input.c | 5 ++
net/ipv4/tcp_ipv4.c | 14 +++-
net/ipv4/tcp_minisocks.c | 10 +++
net/ipv6/tcp_ipv6.c | 9 ++
7 files changed, 219 insertions(+), 87 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index a2a1676dfc52..4f653e3bd33f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -156,6 +156,7 @@ struct tcp_request_sock {
* FastOpen it's the seq#
* after data-in-SYN.
*/
+ struct list_head tcp_option_list;
};
static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -391,6 +392,8 @@ struct tcp_sock {
*/
struct request_sock *fastopen_rsk;
u32 *saved_syn;
+
+ struct list_head tcp_option_list;
};
enum tsq_enum {
@@ -435,6 +438,8 @@ struct tcp_timewait_sock {
u32 tw_last_oow_ack_time;
long tw_ts_recent_stamp;
+
+ struct list_head tcp_option_list;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *tw_md5_key;
#endif
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0981a1b429ca..1f34b1708990 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2066,31 +2066,56 @@ extern struct static_key_false tcp_have_smc;
#endif
extern struct static_key_false tcp_extra_options_enabled;
+struct tcp_extra_option_store;
struct tcp_extra_option_ops {
- struct list_head list;
unsigned char option_kind;
unsigned char priority;
void (*parse)(int opsize, const unsigned char *opptr,
const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
- struct sock *sk);
+ struct sock *sk,
+ struct tcp_extra_option_store *store);
/* Return the number of bytes consumed */
unsigned int (*prepare)(struct sk_buff *skb, u8 flags,
unsigned int remaining,
struct tcp_out_options *opts,
- const struct sock *sk);
+ const struct sock *sk,
+ struct tcp_extra_option_store *store);
void (*write)(__be32 *ptr, struct sk_buff *skb,
- struct tcp_out_options *opts, struct sock *sk);
+ struct tcp_out_options *opts, struct sock *sk,
+ struct tcp_extra_option_store *store);
void (*response_write)(__be32 *ptr, struct sk_buff *orig,
struct tcphdr *th,
struct tcp_out_options *opts,
- const struct sock *sk);
+ const struct sock *sk,
+ struct tcp_extra_option_store *store);
int (*add_header_len)(const struct sock *listener,
- const struct sock *sk);
+ const struct sock *sk,
+ struct tcp_extra_option_store *store);
+ struct tcp_extra_option_store *(*copy)(struct sock *listener,
+ struct request_sock *req,
+ struct tcp_options_received *opt,
+ struct tcp_extra_option_store *from);
+ struct tcp_extra_option_store *(*move)(struct tcp_extra_option_store *from);
+ void (*destroy)(struct tcp_extra_option_store *store);
struct module *owner;
};
+/* Every extra-option in a TCP-socket gets stored in a _store structure which
+ * has to have tcp_extra_option_store as the first element.
+ *
+ * That way, an extra option registers passes this to tcp_register_extra_option,
+ * and the surrounding structure can contain per-option state.
+ */
+struct tcp_extra_option_store {
+ struct list_head list;
+ const struct tcp_extra_option_ops *ops;
+};
+
+struct tcp_extra_option_store *tcp_extra_options_find_kind(unsigned char kind,
+ const struct sock *sk);
+
void tcp_extra_options_parse(int opcode, int opsize, const unsigned char *opptr,
const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
@@ -2113,7 +2138,15 @@ void tcp_extra_options_response_write(__be32 *ptr, struct sk_buff *orig,
int tcp_extra_options_add_header(const struct sock *listener,
const struct sock *sk);
-int tcp_register_extra_option(struct tcp_extra_option_ops *ops);
-void tcp_unregister_extra_option(struct tcp_extra_option_ops *ops);
+int tcp_register_extra_option(struct tcp_extra_option_store *store,
+ struct sock *sk);
+
+void tcp_extra_options_copy(struct sock *listener,
+ struct request_sock *req,
+ struct tcp_options_received *opt);
+
+void tcp_extra_options_move(struct list_head *from, struct list_head *to);
+
+void tcp_extra_options_destroy(struct sock *sk);
#endif /* _TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 487f28222d3c..01e646873613 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -308,8 +308,6 @@ EXPORT_SYMBOL(tcp_sockets_allocated);
/*
* Optional TCP option handlers
*/
-static DEFINE_SPINLOCK(tcp_option_list_lock);
-static LIST_HEAD(tcp_option_list);
DEFINE_STATIC_KEY_FALSE(tcp_extra_options_enabled);
/*
@@ -423,6 +421,7 @@ void tcp_init_sock(struct sock *sk)
tcp_init_xmit_timers(sk);
INIT_LIST_HEAD(&tp->tsq_node);
INIT_LIST_HEAD(&tp->tsorted_sent_queue);
+ INIT_LIST_HEAD(&tp->tcp_option_list);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
@@ -3479,19 +3478,32 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
#endif
-/* Linear search, few entries are expected. The RCU read lock must
- * be held before calling.
- */
-static struct tcp_extra_option_ops *tcp_extra_options_find_kind(unsigned char kind)
+static struct list_head *tcp_extra_options_get_list(const struct sock *sk)
{
- struct tcp_extra_option_ops *entry;
+ if (sk_fullsock(sk))
+ return &tcp_sk(sk)->tcp_option_list;
+ else if (sk->sk_state == TCP_NEW_SYN_RECV)
+ return &tcp_rsk(inet_reqsk(sk))->tcp_option_list;
+ else if (sk->sk_state == TCP_TIME_WAIT)
+ return &tcp_twsk(sk)->tcp_option_list;
+
+ return NULL;
+}
+
+struct tcp_extra_option_store *tcp_extra_options_find_kind(unsigned char kind,
+ const struct sock *sk)
+{
+ struct tcp_extra_option_store *entry;
+ struct list_head *lhead;
+
+ lhead = tcp_extra_options_get_list(sk);
- list_for_each_entry_rcu(entry, &tcp_option_list, list) {
- if (entry->option_kind == kind)
- return entry;
+ list_for_each_entry(entry, lhead, list) {
+ if (entry->ops->option_kind == kind)
+ break;
}
- return NULL;
+ return entry;
}
void tcp_extra_options_parse(int opcode, int opsize, const unsigned char *opptr,
@@ -3499,75 +3511,79 @@ void tcp_extra_options_parse(int opcode, int opsize, const unsigned char *opptr,
struct tcp_options_received *opt_rx,
struct sock *sk)
{
- struct tcp_extra_option_ops *entry;
+ struct tcp_extra_option_store *entry;
+
+ entry = tcp_extra_options_find_kind(opcode, sk);
- rcu_read_lock();
- entry = tcp_extra_options_find_kind(opcode);
- if (entry && entry->parse)
- entry->parse(opsize, opptr, skb, opt_rx, sk);
- rcu_read_unlock();
+ if (entry && entry->ops->parse)
+ entry->ops->parse(opsize, opptr, skb, opt_rx, sk, entry);
}
EXPORT_SYMBOL_GPL(tcp_extra_options_parse);
-/* The RCU read lock must be held before calling, and should span both
- * the call to this function and tcp_extra_options_write to ensure that
- * tcp_option_list does not change between the two calls. To preserve
- * expected option alignment, always returns a multiple of 4 bytes.
- */
unsigned int tcp_extra_options_prepare(struct sk_buff *skb, u8 flags,
unsigned int remaining,
struct tcp_out_options *opts,
const struct sock *sk)
{
- struct tcp_extra_option_ops *entry;
+ struct tcp_extra_option_store *entry;
+ struct list_head *lhead;
unsigned int used = 0;
- list_for_each_entry_rcu(entry, &tcp_option_list, list) {
- if (unlikely(!entry->prepare))
+ if (!sk)
+ return 0;
+
+ lhead = tcp_extra_options_get_list(sk);
+
+ list_for_each_entry(entry, lhead, list) {
+ if (unlikely(!entry->ops->prepare))
continue;
- used += entry->prepare(skb, flags, remaining - used, opts, sk);
+ used += entry->ops->prepare(skb, flags, remaining - used, opts,
+ sk, entry);
}
return roundup(used, 4);
}
EXPORT_SYMBOL_GPL(tcp_extra_options_prepare);
-/* The RCU read lock must be held before calling, and should span both
- * the call to tcp_extra_options_write and this function to ensure that
- * tcp_option_list does not change between the two calls.
- */
void tcp_extra_options_write(__be32 *ptr, struct sk_buff *skb,
struct tcp_out_options *opts,
struct sock *sk)
{
- struct tcp_extra_option_ops *entry;
+ struct tcp_extra_option_store *entry;
+ struct list_head *lhead;
+
+ if (!sk)
+ return;
- list_for_each_entry_rcu(entry, &tcp_option_list, list) {
- if (unlikely(!entry->write))
+ lhead = tcp_extra_options_get_list(sk);
+
+ list_for_each_entry_rcu(entry, lhead, list) {
+ if (unlikely(!entry->ops->write))
continue;
- entry->write(ptr, skb, opts, sk);
+ entry->ops->write(ptr, skb, opts, sk, entry);
}
}
EXPORT_SYMBOL_GPL(tcp_extra_options_write);
-/* The RCU read lock must be held before calling, and should span both
- * the call to tcp_extra_options_write and this function to ensure that
- * tcp_option_list does not change between the two calls.
- */
void tcp_extra_options_response_write(__be32 *ptr, struct sk_buff *orig,
struct tcphdr *th,
struct tcp_out_options *opts,
const struct sock *sk)
{
- struct tcp_extra_option_ops *entry;
+ struct tcp_extra_option_store *entry;
- list_for_each_entry_rcu(entry, &tcp_option_list, list) {
- if (unlikely(!entry->write))
+ if (!sk)
+ return;
+
+ BUG_ON(!sk_fullsock(sk));
+
+ list_for_each_entry_rcu(entry, &tcp_sk(sk)->tcp_option_list, list) {
+ if (unlikely(!entry->ops->response_write))
continue;
- entry->response_write(ptr, orig, th, opts, sk);
+ entry->ops->response_write(ptr, orig, th, opts, sk, entry);
}
}
EXPORT_SYMBOL_GPL(tcp_extra_options_response_write);
@@ -3575,52 +3591,49 @@ EXPORT_SYMBOL_GPL(tcp_extra_options_response_write);
int tcp_extra_options_add_header(const struct sock *listener,
const struct sock *sk)
{
- struct tcp_extra_option_ops *entry;
+ struct tcp_extra_option_store *entry;
int tcp_header_len = 0;
- rcu_read_lock();
- list_for_each_entry_rcu(entry, &tcp_option_list, list) {
- if (unlikely(!entry->add_header_len))
+ list_for_each_entry(entry, &tcp_sk(sk)->tcp_option_list, list) {
+ if (unlikely(!entry->ops->add_header_len))
continue;
- tcp_header_len += entry->add_header_len(listener, sk);
+ tcp_header_len += entry->ops->add_header_len(listener, sk, entry);
}
- rcu_read_unlock();
return tcp_header_len;
}
-int tcp_register_extra_option(struct tcp_extra_option_ops *ops)
+int tcp_register_extra_option(struct tcp_extra_option_store *store,
+ struct sock *sk)
{
- struct tcp_extra_option_ops *entry;
- struct list_head *add_before = &tcp_option_list;
+ struct list_head *lhead, *add_before;
+ struct tcp_extra_option_store *entry;
int ret = 0;
- if (!ops->option_kind)
+ lhead = tcp_extra_options_get_list(sk);
+ add_before = lhead;
+
+ if (!store->ops->option_kind)
return -EINVAL;
- if (!try_module_get(ops->owner))
+ if (!try_module_get(store->ops->owner))
return -ENOENT;
- spin_lock(&tcp_option_list_lock);
-
- list_for_each_entry_rcu(entry, &tcp_option_list, list) {
- if (entry->option_kind == ops->option_kind) {
+ list_for_each_entry(entry, lhead, list) {
+ if (entry->ops->option_kind == store->ops->option_kind) {
pr_notice("Option kind %u already registered\n",
- ops->option_kind);
- spin_unlock(&tcp_option_list_lock);
- module_put(ops->owner);
+ store->ops->option_kind);
+ module_put(store->ops->owner);
return -EEXIST;
}
- if (entry->priority <= ops->priority)
+ if (entry->ops->priority <= store->ops->priority)
add_before = &entry->list;
}
- list_add_tail_rcu(&ops->list, add_before);
- pr_debug("Option kind %u registered\n", ops->option_kind);
-
- spin_unlock(&tcp_option_list_lock);
+ list_add_tail(&store->list, add_before);
+ pr_debug("Option kind %u registered\n", store->ops->option_kind);
static_branch_inc(&tcp_extra_options_enabled);
@@ -3628,19 +3641,72 @@ int tcp_register_extra_option(struct tcp_extra_option_ops *ops)
}
EXPORT_SYMBOL_GPL(tcp_register_extra_option);
-void tcp_unregister_extra_option(struct tcp_extra_option_ops *ops)
+void tcp_extra_options_copy(struct sock *listener,
+ struct request_sock *req,
+ struct tcp_options_received *opt)
+{
+ struct tcp_extra_option_store *entry;
+ struct list_head *from, *to;
+
+ from = tcp_extra_options_get_list(listener);
+ to = tcp_extra_options_get_list(req_to_sk(req));
+
+ list_for_each_entry(entry, from, list) {
+ struct tcp_extra_option_store *new;
+
+ if (!try_module_get(entry->ops->owner)) {
+ pr_err("%s Module get failed while copying\n", __func__);
+ continue;
+ }
+
+ new = entry->ops->copy(listener, req, opt, entry);
+ if (!new) {
+ module_put(entry->ops->owner);
+ continue;
+ }
+
+ list_add_tail(&new->list, to);
+
+ static_branch_inc(&tcp_extra_options_enabled);
+ }
+}
+
+void tcp_extra_options_move(struct list_head *from, struct list_head *to)
{
- spin_lock(&tcp_option_list_lock);
- list_del_rcu(&ops->list);
- spin_unlock(&tcp_option_list_lock);
+ struct tcp_extra_option_store *entry, *tmp;
- synchronize_net();
+ list_for_each_entry_safe(entry, tmp, from, list) {
+ list_del(&entry->list);
- static_branch_dec(&tcp_extra_options_enabled);
+ if (entry->ops->move) {
+ if (entry->ops->move(entry))
+ continue;
+ }
- module_put(ops->owner);
+ list_add_tail(&entry->list, to);
+ }
}
-EXPORT_SYMBOL_GPL(tcp_unregister_extra_option);
+EXPORT_SYMBOL_GPL(tcp_extra_options_move);
+
+void tcp_extra_options_destroy(struct sock *sk)
+{
+ struct tcp_extra_option_store *entry, *tmp;
+ struct list_head *lhead;
+
+ lhead = tcp_extra_options_get_list(sk);
+
+ list_for_each_entry_safe(entry, tmp, lhead, list) {
+ struct module *owner = entry->ops->owner;
+ list_del(&entry->list);
+
+ entry->ops->destroy(entry);
+
+ static_branch_dec(&tcp_extra_options_enabled);
+
+ module_put(owner);
+ }
+}
+EXPORT_SYMBOL_GPL(tcp_extra_options_destroy);
void tcp_done(struct sock *sk)
{
@@ -3788,8 +3854,6 @@ void __init tcp_init(void)
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
}
- INIT_LIST_HEAD(&tcp_option_list);
-
cnt = tcp_hashinfo.ehash_mask + 1;
sysctl_tcp_max_orphans = cnt / 2;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 028da0a4eff5..4e84f299c96f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6133,6 +6133,7 @@ static void tcp_openreq_init(struct request_sock *req,
#if IS_ENABLED(CONFIG_SMC)
ireq->smc_ok = rx_opt->smc_ok;
#endif
+ INIT_LIST_HEAD(&tcp_rsk(req)->tcp_option_list);
}
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
@@ -6309,6 +6310,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_reqsk_record_syn(sk, req, skb);
fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
}
+
+ if (static_branch_unlikely(&tcp_extra_options_enabled))
+ tcp_extra_options_copy(sk, req, &tmp_opt);
+
if (fastopen_sk) {
af_ops->send_synack(fastopen_sk, dst, &fl, req,
&foc, TCP_SYNACK_FASTOPEN);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f717069ad778..4a47bfd12662 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -692,12 +692,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
memset(&opts, 0, sizeof(opts));
- rcu_read_lock();
used = tcp_extra_options_prepare(NULL, TCPHDR_RST, remaining,
&opts, sk);
tcp_extra_options_response_write(&rep.opt[0], skb, &rep.th, &opts, sk);
- rcu_read_unlock();
arg.iov[0].iov_len += used;
offset += used / 4;
@@ -825,12 +823,10 @@ static void tcp_v4_send_ack(const struct sock *sk,
#endif
memset(&opts, 0, sizeof(opts));
- rcu_read_lock();
used = tcp_extra_options_prepare(NULL, TCPHDR_ACK, remaining,
&opts, sk);
tcp_extra_options_response_write(&rep.opt[offset], skb, &rep.th, &opts, sk);
- rcu_read_unlock();
arg.iov[0].iov_len += used;
offset += used / 4;
@@ -952,6 +948,9 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
*/
static void tcp_v4_reqsk_destructor(struct request_sock *req)
{
+ if (static_branch_unlikely(&tcp_extra_options_enabled))
+ tcp_extra_options_destroy(req_to_sk(req));
+
kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
@@ -1446,6 +1445,11 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
tcp_initialize_rcv_mss(newsk);
+ if (static_branch_unlikely(&tcp_extra_options_enabled)) {
+ tcp_extra_options_move(&tcp_rsk(req)->tcp_option_list,
+ &newtp->tcp_option_list);
+ INIT_LIST_HEAD(&tcp_rsk(req)->tcp_option_list);
+ }
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
@@ -1945,6 +1949,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
/* Cleans up our, hopefully empty, out_of_order_queue. */
skb_rbtree_purge(&tp->out_of_order_queue);
+ if (static_branch_unlikely(&tcp_extra_options_enabled))
+ tcp_extra_options_destroy(sk);
#ifdef CONFIG_TCP_MD5SIG
/* Clean up the MD5 key list, if any */
if (tp->md5sig_info) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7f9403d3272c..fb50d3d0d158 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -271,6 +271,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
tcptw->tw_ts_offset = tp->tsoffset;
tcptw->tw_last_oow_ack_time = 0;
+ INIT_LIST_HEAD(&tcptw->tcp_option_list);
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
@@ -284,6 +285,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
}
#endif
+ if (static_branch_unlikely(&tcp_extra_options_enabled)) {
+ tcp_extra_options_move(&tcp_sk(sk)->tcp_option_list,
+ &tcptw->tcp_option_list);
+ INIT_LIST_HEAD(&tcp_sk(sk)->tcp_option_list);
+ }
#ifdef CONFIG_TCP_MD5SIG
/*
* The timewait bucket does not have the key DB from the
@@ -334,6 +340,9 @@ void tcp_twsk_destructor(struct sock *sk)
if (twsk->tw_md5_key)
kfree_rcu(twsk->tw_md5_key, rcu);
#endif
+
+ if (static_branch_unlikely(&tcp_extra_options_enabled))
+ tcp_extra_options_destroy(sk);
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
@@ -463,6 +472,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
INIT_LIST_HEAD(&newtp->tsq_node);
INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
+ INIT_LIST_HEAD(&newtp->tcp_option_list);
tcp_init_wl(newtp, treq->rcv_isn);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ef037990ea5e..0a29ef82db35 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -500,6 +500,9 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
+ if (static_branch_unlikely(&tcp_extra_options_enabled))
+ tcp_extra_options_destroy(req_to_sk(req));
+
kfree(inet_rsk(req)->ipv6_opt);
kfree_skb(inet_rsk(req)->pktopts);
}
@@ -1215,6 +1218,12 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
+ if (static_branch_unlikely(&tcp_extra_options_enabled)) {
+ tcp_extra_options_move(&tcp_rsk(req)->tcp_option_list,
+ &newtp->tcp_option_list);
+ INIT_LIST_HEAD(&tcp_rsk(req)->tcp_option_list);
+ }
+
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
--
2.15.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2017-11-29 17:59 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-29 1:44 [MPTCP] [PATCH v2 11/25] tcp_extra_options: Make extra-option list per-socket Mat Martineau
-- strict thread matches above, loose matches on Subject: below --
2017-11-29 17:59 Christoph Paasch
2017-11-28 5:57 Christoph Paasch
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.