* [RFC PATCH 1/5] netlink: mmap: introduce mmaped skb helper functions
2015-07-22 1:09 [RFC PATCH 0/5] netlink: mmap kernel panic and some issues Ken-ichirou MATSUZAWA
@ 2015-07-22 1:10 ` Ken-ichirou MATSUZAWA
2015-07-22 1:11 ` [RFC PATCH 2/5] netlink: mmap: apply " Ken-ichirou MATSUZAWA
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Ken-ichirou MATSUZAWA @ 2015-07-22 1:10 UTC (permalink / raw)
To: The netfilter developer mailinglist
It seems that we need helper functions for skb which is allocated
at netlink_alloc_skb() since it does not have skb_shared_info.
Signed-off-by: Ken-ichirou MATSUZAWA <chamas@h4.dion.ne.jp>
---
include/linux/netlink.h | 22 ++++---------
net/netlink/af_netlink.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 87 insertions(+), 16 deletions(-)
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 6835c12..049962e 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -68,6 +68,12 @@ extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
extern int netlink_has_listeners(struct sock *sk, unsigned int group);
extern struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
u32 dst_portid, gfp_t gfp_mask);
+extern struct sk_buff *netlink_skb_copy(const struct sk_buff *skb, gfp_t gfp_mask);
+extern struct sk_buff *netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask);
+extern int netlink_skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen);
+extern void netlink_free_skb(struct sk_buff *skb);
+void netlink_consume_skb(struct sk_buff *skb);
+
extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
__u32 group, gfp_t allocation);
@@ -86,22 +92,6 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
int netlink_sendskb(struct sock *sk, struct sk_buff *skb);
-static inline struct sk_buff *
-netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
-{
- struct sk_buff *nskb;
-
- nskb = skb_clone(skb, gfp_mask);
- if (!nskb)
- return NULL;
-
- /* This is a large skb, set destructor callback to release head */
- if (is_vmalloc_addr(skb->head))
- nskb->destructor = skb->destructor;
-
- return nskb;
-}
-
/*
* skb should fit one page. This choice is good for headerless malloc.
* But we should limit to 8K so that userspace does not have to
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index bf6e766..a0a32f4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1872,6 +1872,87 @@ out:
}
EXPORT_SYMBOL_GPL(netlink_alloc_skb);
+struct sk_buff *netlink_skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
+{
+#ifdef CONFIG_NETLINK_MMAP
+ if (netlink_skb_is_mmaped(skb)) {
+ struct sk_buff *n = alloc_skb(skb->len, gfp_mask);
+ if (!n)
+ return NULL;
+
+ skb_put(n, skb->len);
+ memcpy(n->data, skb->data, skb->len);
+ return n;
+ } else
+#endif
+ return skb_copy(skb, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(netlink_skb_copy);
+
+struct sk_buff *netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
+{
+ struct sk_buff *nskb;
+
+#ifdef CONFIG_NETLINK_MMAP
+ if (netlink_skb_is_mmaped(skb))
+ return netlink_skb_copy(skb, gfp_mask);
+#endif
+ nskb = skb_clone(skb, gfp_mask);
+ if (!nskb)
+ return NULL;
+
+ /* This is a large skb, set destructor callback to release head */
+ if (is_vmalloc_addr(skb->head))
+ nskb->destructor = skb->destructor;
+
+ return nskb;
+}
+EXPORT_SYMBOL_GPL(netlink_skb_clone);
+
+int
+netlink_skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
+{
+#ifdef CONFIG_NETLINK_MMAP
+ struct page *page;
+ unsigned int offset;
+
+ if (netlink_skb_is_mmaped(from)) {
+ if (!len)
+ return 0;
+
+ page = virt_to_head_page(from->head);
+ offset = from->data - (unsigned char *)page_address(page);
+ __skb_fill_page_desc(to, 0, page, offset, len);
+ get_page(page);
+ to->truesize += len;
+ to->len += len;
+ to->data_len += len;
+
+ return 0;
+ } else
+#endif
+
+ return skb_zerocopy(to, from, len, hlen);
+}
+EXPORT_SYMBOL_GPL(netlink_skb_zerocopy);
+
+void netlink_free_skb(struct sk_buff *skb)
+{
+ kfree_skb_partial(skb, netlink_skb_is_mmaped(skb));
+}
+EXPORT_SYMBOL_GPL(netlink_free_skb);
+
+void netlink_consume_skb(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETLINK_MMAP
+ if (netlink_skb_is_mmaped(skb))
+ kfree_skb_partial(skb, true);
+ else
+#endif
+ consume_skb(skb);
+}
+EXPORT_SYMBOL_GPL(netlink_consume_skb);
+
int netlink_has_listeners(struct sock *sk, unsigned int group)
{
int res = 0;
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [RFC PATCH 2/5] netlink: mmap: apply mmaped skb helper functions
2015-07-22 1:09 [RFC PATCH 0/5] netlink: mmap kernel panic and some issues Ken-ichirou MATSUZAWA
2015-07-22 1:10 ` [RFC PATCH 1/5] netlink: mmap: introduce mmaped skb helper functions Ken-ichirou MATSUZAWA
@ 2015-07-22 1:11 ` Ken-ichirou MATSUZAWA
2015-07-22 1:13 ` [RFC PATCH 3/5] netlink: mmap: fix status for not delivered skb Ken-ichirou MATSUZAWA
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Ken-ichirou MATSUZAWA @ 2015-07-22 1:11 UTC (permalink / raw)
To: The netfilter developer mailinglist
Signed-off-by: Ken-ichirou MATSUZAWA <chamas@h4.dion.ne.jp>
---
net/netfilter/nfnetlink_log.c | 2 +-
net/netfilter/nfnetlink_queue_core.c | 8 ++++----
net/netlink/af_netlink.c | 26 +++++++++++++-------------
3 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 4ef1fae..2294b02 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -357,7 +357,7 @@ __nfulnl_send(struct nfulnl_instance *inst)
0);
if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n",
inst->skb->len, skb_tailroom(inst->skb))) {
- kfree_skb(inst->skb);
+ netlink_free_skb(inst->skb);
goto out;
}
}
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 11c7682..ed1c9f5 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -364,7 +364,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
sizeof(struct nfgenmsg), 0);
if (!nlh) {
skb_tx_error(entskb);
- kfree_skb(skb);
+ netlink_free_skb(skb);
return NULL;
}
nfmsg = nlmsg_data(nlh);
@@ -499,7 +499,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
nla->nla_type = NFQA_PAYLOAD;
nla->nla_len = nla_attr_size(data_len);
- if (skb_zerocopy(skb, entskb, data_len, hlen))
+ if (netlink_skb_zerocopy(skb, entskb, data_len, hlen))
goto nla_put_failure;
}
@@ -508,7 +508,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
nla_put_failure:
skb_tx_error(entskb);
- kfree_skb(skb);
+ netlink_free_skb(skb);
net_err_ratelimited("nf_queue: error creating packet message\n");
return NULL;
}
@@ -556,7 +556,7 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
return 0;
err_out_free_nskb:
- kfree_skb(nskb);
+ netlink_free_skb(nskb);
err_out_unlock:
spin_unlock_bh(&queue->lock);
if (failopen)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a0a32f4..5632ad0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -204,7 +204,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
int ret = -ENOMEM;
dev_hold(dev);
- nskb = skb_clone(skb, GFP_ATOMIC);
+ nskb = netlink_skb_clone(skb, GFP_ATOMIC);
if (nskb) {
nskb->dev = dev;
nskb->protocol = htons((u16) sk->sk_protocol);
@@ -747,7 +747,7 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
err = security_netlink_send(sk, skb);
if (err) {
- kfree_skb(skb);
+ kfree_skb_partial(skb, true);
goto out;
}
@@ -787,7 +787,7 @@ static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
- kfree_skb(skb);
+ kfree_skb_partial(skb, true);
}
static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
@@ -1782,7 +1782,7 @@ int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
retry:
sk = netlink_getsockbyportid(ssk, portid);
if (IS_ERR(sk)) {
- kfree_skb(skb);
+ netlink_free_skb(skb);
return PTR_ERR(sk);
}
if (netlink_is_kernel(sk))
@@ -1790,7 +1790,7 @@ retry:
if (sk_filter(sk, skb)) {
err = skb->len;
- kfree_skb(skb);
+ netlink_free_skb(skb);
sock_put(sk);
return err;
}
@@ -1854,7 +1854,7 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
return skb;
err2:
- kfree_skb(skb);
+ kfree_skb_partial(skb, true);
spin_unlock_bh(&sk->sk_receive_queue.lock);
netlink_overrun(sk);
err1:
@@ -1862,7 +1862,7 @@ err1:
return NULL;
out_free:
- kfree_skb(skb);
+ kfree_skb_partial(skb, true);
spin_unlock_bh(&sk->sk_receive_queue.lock);
out_put:
sock_put(sk);
@@ -2024,7 +2024,7 @@ static void do_one_broadcast(struct sock *sk,
sock_hold(sk);
if (p->skb2 == NULL) {
if (skb_shared(p->skb)) {
- p->skb2 = skb_clone(p->skb, p->allocation);
+ p->skb2 = netlink_skb_clone(p->skb, p->allocation);
} else {
p->skb2 = skb_get(p->skb);
/*
@@ -2090,7 +2090,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
do_one_broadcast(sk, &info);
- consume_skb(skb);
+ netlink_consume_skb(skb);
netlink_unlock_table();
@@ -2755,7 +2755,7 @@ static int netlink_dump(struct sock *sk)
mutex_unlock(nlk->cb_mutex);
if (sk_filter(sk, skb))
- kfree_skb(skb);
+ netlink_free_skb(skb);
else
__netlink_sendskb(sk, skb);
return 0;
@@ -2770,7 +2770,7 @@ static int netlink_dump(struct sock *sk)
memcpy(nlmsg_data(nlh), &len, sizeof(len));
if (sk_filter(sk, skb))
- kfree_skb(skb);
+ netlink_free_skb(skb);
else
__netlink_sendskb(sk, skb);
@@ -2785,7 +2785,7 @@ static int netlink_dump(struct sock *sk)
errout_skb:
mutex_unlock(nlk->cb_mutex);
- kfree_skb(skb);
+ netlink_free_skb(skb);
return err;
}
@@ -2803,7 +2803,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
* a reference to the skb.
*/
if (netlink_skb_is_mmaped(skb)) {
- skb = skb_copy(skb, GFP_KERNEL);
+ skb = netlink_skb_copy(skb, GFP_KERNEL);
if (skb == NULL)
return -ENOBUFS;
} else
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [RFC PATCH 3/5] netlink: mmap: fix status for not delivered skb
2015-07-22 1:09 [RFC PATCH 0/5] netlink: mmap kernel panic and some issues Ken-ichirou MATSUZAWA
2015-07-22 1:10 ` [RFC PATCH 1/5] netlink: mmap: introduce mmaped skb helper functions Ken-ichirou MATSUZAWA
2015-07-22 1:11 ` [RFC PATCH 2/5] netlink: mmap: apply " Ken-ichirou MATSUZAWA
@ 2015-07-22 1:13 ` Ken-ichirou MATSUZAWA
2015-07-22 1:14 ` [RFC PATCH 4/5] netlink: mmap: update tx type check Ken-ichirou MATSUZAWA
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Ken-ichirou MATSUZAWA @ 2015-07-22 1:13 UTC (permalink / raw)
To: The netfilter developer mailinglist
Signed-off-by: Ken-ichirou MATSUZAWA <chamas@h4.dion.ne.jp>
---
net/netlink/af_netlink.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5632ad0..a6fba4c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -847,7 +847,7 @@ static void netlink_skb_destructor(struct sk_buff *skb)
} else {
if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
hdr->nm_len = 0;
- netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
+ netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
}
ring = &nlk_sk(sk)->rx_ring;
}
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [RFC PATCH 4/5] netlink: mmap: update tx type check
2015-07-22 1:09 [RFC PATCH 0/5] netlink: mmap kernel panic and some issues Ken-ichirou MATSUZAWA
` (2 preceding siblings ...)
2015-07-22 1:13 ` [RFC PATCH 3/5] netlink: mmap: fix status for not delivered skb Ken-ichirou MATSUZAWA
@ 2015-07-22 1:14 ` Ken-ichirou MATSUZAWA
2015-07-22 1:15 ` [RFC PATCH 5/5] netlink: rx mmap: notify only when NL_MMAP_STATUS_VALID frame exists Ken-ichirou MATSUZAWA
2015-07-22 12:50 ` [RFC PATCH 0/5] netlink: mmap kernel panic and some issues Florian Westphal
5 siblings, 0 replies; 7+ messages in thread
From: Ken-ichirou MATSUZAWA @ 2015-07-22 1:14 UTC (permalink / raw)
To: The netfilter developer mailinglist
We need to accept msg_iter.type 1(WRITE) which is set in sendto/sendmsg.
Signed-off-by: Ken-ichirou MATSUZAWA <chamas@h4.dion.ne.jp>
---
net/netlink/af_netlink.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a6fba4c..7e1610e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2394,7 +2394,7 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
* sendmsg(), but that's what we've got...
*/
if (netlink_tx_is_mmaped(sk) &&
- msg->msg_iter.type == ITER_IOVEC &&
+ !(msg->msg_iter.type & (ITER_KVEC | ITER_BVEC)) &&
msg->msg_iter.nr_segs == 1 &&
msg->msg_iter.iov->iov_base == NULL) {
err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [RFC PATCH 5/5] netlink: rx mmap: notify only when NL_MMAP_STATUS_VALID frame exists
2015-07-22 1:09 [RFC PATCH 0/5] netlink: mmap kernel panic and some issues Ken-ichirou MATSUZAWA
` (3 preceding siblings ...)
2015-07-22 1:14 ` [RFC PATCH 4/5] netlink: mmap: update tx type check Ken-ichirou MATSUZAWA
@ 2015-07-22 1:15 ` Ken-ichirou MATSUZAWA
2015-07-22 12:50 ` [RFC PATCH 0/5] netlink: mmap kernel panic and some issues Florian Westphal
5 siblings, 0 replies; 7+ messages in thread
From: Ken-ichirou MATSUZAWA @ 2015-07-22 1:15 UTC (permalink / raw)
To: The netfilter developer mailinglist
Signed-off-by: Ken-ichirou MATSUZAWA <chamas@h4.dion.ne.jp>
---
net/netlink/af_netlink.c | 28 ++++++++++++++++------------
1 file changed, 16 insertions(+), 12 deletions(-)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7e1610e..8901acd 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -576,16 +576,6 @@ netlink_current_frame(const struct netlink_ring *ring,
return netlink_lookup_frame(ring, ring->head, status);
}
-static struct nl_mmap_hdr *
-netlink_previous_frame(const struct netlink_ring *ring,
- enum nl_mmap_status status)
-{
- unsigned int prev;
-
- prev = ring->head ? ring->head - 1 : ring->frame_max;
- return netlink_lookup_frame(ring, prev, status);
-}
-
static void netlink_increment_head(struct netlink_ring *ring)
{
ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
@@ -606,6 +596,21 @@ static void netlink_forward_ring(struct netlink_ring *ring)
} while (ring->head != head);
}
+static bool netlink_has_valid_frame(struct netlink_ring *ring)
+{
+ unsigned int head = ring->head, pos = head;
+ const struct nl_mmap_hdr *hdr;
+
+ do {
+ hdr = __netlink_lookup_frame(ring, pos);
+ if (hdr->nm_status == NL_MMAP_STATUS_VALID)
+ return true;
+ pos = pos != ring->frame_max ? pos + 1 : 0;
+ } while (pos != head);
+
+ return false;
+}
+
static bool netlink_dump_space(struct netlink_sock *nlk)
{
struct netlink_ring *ring = &nlk->rx_ring;
@@ -653,8 +658,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock,
spin_lock_bh(&sk->sk_receive_queue.lock);
if (nlk->rx_ring.pg_vec) {
- netlink_forward_ring(&nlk->rx_ring);
- if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED))
+ if (netlink_has_valid_frame(&nlk->rx_ring))
mask |= POLLIN | POLLRDNORM;
}
spin_unlock_bh(&sk->sk_receive_queue.lock);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [RFC PATCH 0/5] netlink: mmap kernel panic and some issues
2015-07-22 1:09 [RFC PATCH 0/5] netlink: mmap kernel panic and some issues Ken-ichirou MATSUZAWA
` (4 preceding siblings ...)
2015-07-22 1:15 ` [RFC PATCH 5/5] netlink: rx mmap: notify only when NL_MMAP_STATUS_VALID frame exists Ken-ichirou MATSUZAWA
@ 2015-07-22 12:50 ` Florian Westphal
5 siblings, 0 replies; 7+ messages in thread
From: Florian Westphal @ 2015-07-22 12:50 UTC (permalink / raw)
To: Ken-ichirou MATSUZAWA; +Cc: The netfilter developer mailinglist
Ken-ichirou MATSUZAWA <chamaken@gmail.com> wrote:
> I got a kernel panic below when I dumped using mmaped netlink socket
> while monitoring it by nlmon tap device. I realized it is because
> mmaped netlink skb does not have skb_shared_info but don't know how
> to fix it in sane. This patch series seems to work fine for me but
> I'm not sure it's right or not.
Could you submit this series to netdev@ver.kernel.org ?
^ permalink raw reply [flat|nested] 7+ messages in thread