From mboxrd@z Thu Jan 1 00:00:00 1970 From: Florian Westphal Subject: [RFC PATCH 2/3] sk_buff: add skb extension infrastructure Date: Mon, 26 Nov 2018 12:38:56 +0100 Message-ID: <20181126113857.29270-3-fw@strlen.de> References: <20181126113857.29270-1-fw@strlen.de> Cc: Florian Westphal To: Return-path: Received: from Chamillionaire.breakpoint.cc ([146.0.238.67]:46432 "EHLO Chamillionaire.breakpoint.cc" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726247AbeKZWfp (ORCPT ); Mon, 26 Nov 2018 17:35:45 -0500 In-Reply-To: <20181126113857.29270-1-fw@strlen.de> Sender: netdev-owner@vger.kernel.org List-ID: adds an extension infrastructure for sk_buff: 1. extension memory is released when the sk_buff is free'd. 2. data is shared after cloning an skb. This is also how xfrm and bridge netfilter skb-associated data (skb->sp and skb->nf_bridge) are handled. Two new members are added to sk_buff: 1. 'active_extensions' byte (filling a hole), telling which extensions have been allocated for the skb. 2. extension pointer, located at the end of the sk_buff. If active_extensions is 0, its content is undefined. The 'nf_bridge' pointer is removed, i.e. sk_buff size remains the same, in a followup patch. This adds extra code to skb clone and free paths (to deal with refcount/free of extension area) but replaces the existing code that deals with skb->nf_bridge. This patch only adds the basic infrastructure, the nf_bridge conversion is done in the next patch. Conversion of skb->sp (ipsec/xfrm secpath) to an skb extension is planned as a followup. Signed-off-by: Florian Westphal --- include/linux/skbuff.h | 124 +++++++++++++++++++++++++++++++++++++- net/Kconfig | 3 + net/core/skbuff.c | 131 +++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_output.c | 1 + net/ipv6/ip6_output.c | 1 + 5 files changed, 259 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 73902acf2b71..832904d71a85 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -245,6 +245,7 @@ struct iov_iter; struct napi_struct; struct bpf_prog; union bpf_attr; +struct skb_ext; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack { @@ -633,6 +634,7 @@ typedef unsigned char *sk_buff_data_t; * @queue_mapping: Queue mapping for multiqueue devices * @xmit_more: More SKBs are pending for this queue * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves + * @active_extensions: active extensions (skb_ext_id types) * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -662,6 +664,7 @@ typedef unsigned char *sk_buff_data_t; * @data: Data head pointer * @truesize: Buffer size * @users: User count - see {datagram,tcp}.c + * @extensions: allocated extensions, valid if active_extensions is nonzero */ struct sk_buff { @@ -744,7 +747,9 @@ struct sk_buff { head_frag:1, xmit_more:1, pfmemalloc:1; - +#ifdef CONFIG_SKB_EXTENSIONS + __u8 active_extensions; +#endif /* fields enclosed in headers_start/headers_end are copied * using a single memcpy() in __copy_skb_header() */ @@ -866,6 +871,11 @@ struct sk_buff { *data; unsigned int truesize; refcount_t users; + +#ifdef CONFIG_SKB_EXTENSIONS + /* only useable after checking ->active_extensions != 0 */ + struct skb_ext *extensions; +#endif }; #ifdef __KERNEL__ @@ -3889,6 +3899,118 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct) atomic_inc(&nfct->use); } #endif + +#ifdef CONFIG_SKB_EXTENSIONS +enum skb_ext_id { +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + SKB_EXT_BRIDGE_NF, +#endif + SKB_EXT_NUM, /* must be last */ +}; + +/* each extension aligned to this value */ +#define SKB_EXT_ALIGN 8 +/* offsets/len: left-shift needed to translate offset to bytes */ +#define SKB_EXT_ALIGN_SHIFT 3 + +/** + * struct skb_ext - sk_buff extensions + * @refcount: 1 on allocation, deallocated on 0 + * @offset: offset to add to @data to obtain extension address + * @len: size currently allocated, stored in SKB_EXT_ALIGN_SHIFT units + * @data: start of extension data, variable sized + * + * Note: offsets and len are stored in chunks of 8 bytes, this allows + * to use 'u8' types while allowing up to 2kb worth of extension data. + */ +struct skb_ext { + refcount_t refcnt; + u8 offset[SKB_EXT_NUM]; /* chunks of 8 bytes */ + u8 len; /* same, i.e. size == len << 3 */ + char data[0] __aligned(SKB_EXT_ALIGN); +}; + +void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id); +void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id); +void __skb_ext_free(struct skb_ext *ext); + +static inline void __skb_ext_put(struct skb_ext *ext) +{ + if (ext && refcount_dec_and_test(&ext->refcnt)) + __skb_ext_free(ext); +} + +static inline void skb_ext_put(struct sk_buff *skb) +{ + if (skb->active_extensions) + __skb_ext_put(skb->extensions); +} + +static inline void skb_ext_get(struct sk_buff *skb) +{ + if (skb->active_extensions) { + struct skb_ext *ext = skb->extensions; + + if (ext) + refcount_inc(&ext->refcnt); + } +} + +static inline void __skb_ext_copy(struct sk_buff *dst, + const struct sk_buff *src) +{ + dst->active_extensions = src->active_extensions; + + if (src->active_extensions) { + struct skb_ext *ext = src->extensions; + + if (ext) + refcount_inc(&ext->refcnt); + dst->extensions = ext; + } +} + +static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *src) +{ + skb_ext_put(dst); + __skb_ext_copy(dst, src); +} + +static inline bool __skb_ext_exist(const struct skb_ext *ext, enum skb_ext_id i) +{ + return !!ext->offset[i]; +} + +static inline bool skb_ext_exist(const struct sk_buff *skb, enum skb_ext_id id) +{ + return skb->active_extensions & (1 << id); +} + +static inline void skb_ext_del(struct sk_buff *skb, enum skb_ext_id id) +{ + if (skb_ext_exist(skb, id)) + __skb_ext_del(skb, id); +} + +static inline void *skb_ext_find(const struct sk_buff *skb, enum skb_ext_id id) +{ + if (skb_ext_exist(skb, id)) { + struct skb_ext *ext = skb->extensions; + + if (ext && __skb_ext_exist(ext, id)) + return (void *)ext + (ext->offset[id] << 3); + } + + return NULL; +} +#else +static inline void skb_ext_put(struct sk_buff *skb) {} +static inline void skb_ext_get(struct sk_buff *skb) {} +static inline void skb_ext_del(struct sk_buff *skb, int unused) {} +static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {} +static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {} +#endif /* CONFIG_SKB_EXTENSIONS */ + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) { diff --git a/net/Kconfig b/net/Kconfig index f235edb593ba..93b291292860 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -51,6 +51,9 @@ config NET_INGRESS config NET_EGRESS bool +config SKB_EXTENSIONS + bool + menu "Networking options" source "net/packet/Kconfig" diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 02cd7ae3d0fb..e29016030633 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -617,6 +617,7 @@ void skb_release_head_state(struct sk_buff *skb) #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); #endif + skb_ext_put(skb); } /* Free everything but the sk_buff shell. */ @@ -796,6 +797,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->dev = old->dev; memcpy(new->cb, old->cb, sizeof(old->cb)); skb_dst_copy(new, old); + __skb_ext_copy(new, old); #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif @@ -5531,3 +5533,132 @@ void skb_condense(struct sk_buff *skb) */ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); } + +#ifdef CONFIG_SKB_EXTENSIONS +static const u8 skb_ext_type_len[] = { +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + [SKB_EXT_BRIDGE_NF] = sizeof(struct nf_bridge_info), +#endif +}; + +static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id) +{ + return (void *)ext + (ext->offset[id] << SKB_EXT_ALIGN_SHIFT); +} + +static struct skb_ext *skb_ext_cow(unsigned int len, + struct skb_ext *old) +{ + struct skb_ext *new = kmalloc(len, GFP_ATOMIC); + + if (!new) + return NULL; + + if (!old) { + memset(new->offset, 0, sizeof(new->offset)); + refcount_set(&new->refcnt, 1); + return new; + } + + memcpy(new, old, old->len << SKB_EXT_ALIGN_SHIFT); + refcount_set(&new->refcnt, 1); + __skb_ext_put(old); + return new; +} + +static __always_inline unsigned int skb_ext_total_length(void) +{ + return 0 + +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + skb_ext_type_len[SKB_EXT_BRIDGE_NF] + +#endif + 0; +} + +/** + * skb_ext_add - allocate space for given extension, COW if needed + * @skb: buffer + * @id: extension to allocate space for + * + * Allocates enough space for the given extension. + * If the extension is already present, a pointer to that extension + * is returned. + * + * If the skb was cloned, COW applies and the returned memory can be + * modified without changing the extension space of clones buffers. + * + * Returns pointer to the extenion or NULL on allocation failure. + */ +void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id) +{ + unsigned int newlen, newoff, oldlen; + struct skb_ext *new, *old = NULL; + bool cow_needed = true; + + BUILD_BUG_ON(SKB_EXT_NUM >= 8); + BUILD_BUG_ON(skb_ext_total_length() > (255 << 3)); + + if (skb->active_extensions) { + old = skb->extensions; + + cow_needed = refcount_read(&old->refcnt) > 1; + + if (__skb_ext_exist(old, id)) { + if (!cow_needed) { + new = old; + goto set_active; + } + + /* extension was allocated previously and it + * might be used by a cloned skb. COW needed. + */ + new = skb_ext_cow(old->len << SKB_EXT_ALIGN_SHIFT, old); + if (!new) + return NULL; + + skb->extensions = new; + goto set_active; + } + oldlen = old->len << SKB_EXT_ALIGN_SHIFT; + } else { + oldlen = sizeof(*new); + } + + newoff = ALIGN(oldlen, SKB_EXT_ALIGN); + newlen = newoff + skb_ext_type_len[id]; + + if (cow_needed) + new = skb_ext_cow(newlen, old); + else + new = krealloc(old, newlen, GFP_ATOMIC); + if (!new) + return NULL; + + new->offset[id] = newoff >> SKB_EXT_ALIGN_SHIFT; + new->len = newlen >> SKB_EXT_ALIGN_SHIFT; + skb->extensions = new; +set_active: + skb->active_extensions |= 1 << id; + return skb_ext_get_ptr(new, id); +} +EXPORT_SYMBOL(skb_ext_add); + +void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id) +{ + struct skb_ext *ext; + + skb->active_extensions &= ~(1 << id); + if (skb->active_extensions == 0) { + ext = skb->extensions; + skb->extensions = NULL; + __skb_ext_put(ext); + } +} +EXPORT_SYMBOL(__skb_ext_del); + +void __skb_ext_free(struct skb_ext *ext) +{ + kfree(ext); +} +EXPORT_SYMBOL(__skb_ext_free); +#endif /* CONFIG_SKB_EXTENSIONS */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c09219e7f230..a12e12f983d5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -533,6 +533,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); + skb_ext_copy(to, from); #if IS_ENABLED(CONFIG_IP_VS) to->ipvs_property = from->ipvs_property; #endif diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 89e0d5118afe..7eeb0f24be87 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -574,6 +574,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); + skb_ext_copy(to, from); skb_copy_secmark(to, from); } -- 2.18.1