All of lore.kernel.org
 help / color / mirror / Atom feed
* v4.16-rc1 misaligned atomics in skb__clone / __napi_alloc_skb
@ 2018-02-15 17:04 Mark Rutland
  2018-02-15 17:20 ` Eric Dumazet
  0 siblings, 1 reply; 8+ messages in thread
From: Mark Rutland @ 2018-02-15 17:04 UTC (permalink / raw)
  To: netdev, linux-kernel; +Cc: davem, willemb, edumazet

Hi,

While fuzzing arm64 v4.16-rc1 with Syzkaller, I've been hitting a
misaligned atomic in __skb_clone:

	atomic_inc(&(skb_shinfo(skb)->dataref));

.. where dataref doesn't have the required natural alignment, and the
atomic operation faults. e.g. i often see it aligned to a single byte
boundary rather than a four byte boundary.

AFAICT, the skb_shared_info is misaligned at the instant it's allocated
in __napi_alloc_skb(). With the patch at the end of this mail, the
atomic_set() (which is a WRITE_ONCE()) in __build_skb() blows up, e.g.

WARNING: CPU: 0 PID: 8457 at mm/access_once.c:12 access_once_alignment_check+0x34/0x40 mm/access_once.c:12
Kernel panic - not syncing: panic_on_warn set ...

CPU: 0 PID: 8457 Comm: syz-executor1 Not tainted 4.16.0-rc1-00002-gb03ae7b8b0de #9
Hardware name: linux,dummy-virt (DT)
Call trace:
 dump_backtrace+0x0/0x390 arch/arm64/kernel/time.c:52
 show_stack+0x20/0x30 arch/arm64/kernel/traps.c:151
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0xd0/0x130 lib/dump_stack.c:53
 panic+0x220/0x3fc kernel/panic.c:183
 __warn+0x270/0x2bc kernel/panic.c:547
 report_bug+0x1dc/0x2d0 lib/bug.c:184
 bug_handler+0x7c/0x128 arch/arm64/kernel/traps.c:758
 call_break_hook arch/arm64/kernel/debug-monitors.c:305 [inline]
 brk_handler+0x1a0/0x300 arch/arm64/kernel/debug-monitors.c:320
 do_debug_exception+0x15c/0x408 arch/arm64/mm/fault.c:808
 el1_dbg+0x18/0x78
 access_once_alignment_check+0x34/0x40 mm/access_once.c:12
 __napi_alloc_skb+0x18c/0x2b8 net/core/skbuff.c:482
 napi_alloc_skb include/linux/skbuff.h:2643 [inline]
 napi_get_frags+0x68/0x120 net/core/dev.c:5108
 tun_napi_alloc_frags drivers/net/tun.c:1477 [inline]
 tun_get_user+0x13b0/0x3fe8 drivers/net/tun.c:1820
 tun_chr_write_iter+0xa8/0x158 drivers/net/tun.c:1988
 call_write_iter include/linux/fs.h:1781 [inline]
 do_iter_readv_writev+0x2f8/0x490 fs/read_write.c:653
 do_iter_write+0x14c/0x4b0 fs/read_write.c:932
 vfs_writev+0x130/0x288 fs/read_write.c:977
 do_writev+0xe0/0x248 fs/read_write.c:1012
 SYSC_writev fs/read_write.c:1085 [inline]
 SyS_writev+0x34/0x48 fs/read_write.c:1082
 el0_svc_naked+0x30/0x34
SMP: stopping secondary CPUs
Kernel Offset: disabled
CPU features: 0x1002082
Memory Limit: none
Rebooting in 86400 seconds..

... I see these splats with both tun and virtio-net.

I have some Syzkaller logs, and can reproduce the problem locally, but
unfortunately the C reproducer it generated doesn't seem to work on its
own.

Any ideas as to how this could happen?

Thanks,
Mark.

---->8----
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c2cc57a2f508..c06b810a3b3b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -163,6 +163,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 
 #include <uapi/linux/types.h>
 
+void access_once_alignment_check(const volatile void *ptr, int size);
+
 #define __READ_ONCE_SIZE                                               \
 ({                                                                     \
        switch (size) {                                                 \
@@ -180,6 +182,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 static __always_inline
 void __read_once_size(const volatile void *p, void *res, int size)
 {
+       access_once_alignment_check(p, size);
        __READ_ONCE_SIZE;
 }
 
@@ -203,6 +206,8 @@ void __read_once_size_nocheck(const volatile void *p, void *res, int size)
 
 static __always_inline void __write_once_size(volatile void *p, void *res, int size)
 {
+       access_once_alignment_check(p, size);
+
        switch (size) {
        case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
        case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
diff --git a/mm/Makefile b/mm/Makefile
index e669f02c5a54..604d269d7d57 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -3,6 +3,7 @@
 # Makefile for the linux memory manager.
 #
 
+KASAN_SANITIZE_access_once.o := n
 KASAN_SANITIZE_slab_common.o := n
 KASAN_SANITIZE_slab.o := n
 KASAN_SANITIZE_slub.o := n
@@ -10,6 +11,7 @@ KASAN_SANITIZE_slub.o := n
 # These files are disabled because they produce non-interesting and/or
 # flaky coverage that is not a function of syscall inputs. E.g. slab is out of
 # free pages, or a task is migrated between nodes.
+KCOV_INSTRUMENT_access_once.o := n
 KCOV_INSTRUMENT_slab_common.o := n
 KCOV_INSTRUMENT_slob.o := n
 KCOV_INSTRUMENT_slab.o := n
@@ -39,7 +41,7 @@ obj-y                 := filemap.o mempool.o oom_kill.o \
                           mm_init.o mmu_context.o percpu.o slab_common.o \
                           compaction.o vmacache.o swap_slots.o \
                           interval_tree.o list_lru.o workingset.o \
-                          debug.o $(mmu-y)
+                          debug.o access_once.o $(mmu-y)
 
 obj-y += init-mm.o
 
diff --git a/mm/access_once.c b/mm/access_once.c
new file mode 100644
index 000000000000..42ee35d171c4
--- /dev/null
+++ b/mm/access_once.c
@@ -0,0 +1,15 @@
+#include <linux/bug.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+
+void access_once_alignment_check(const volatile void *ptr, int size)
+{
+       switch (size) {
+       case 1:
+       case 2:
+       case 4:
+       case 8:
+               WARN_ON(!IS_ALIGNED((unsigned long)ptr, size));
+       }
+}
+EXPORT_SYMBOL(access_once_alignment_check);

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: v4.16-rc1 misaligned atomics in skb__clone / __napi_alloc_skb
  2018-02-15 17:04 v4.16-rc1 misaligned atomics in skb__clone / __napi_alloc_skb Mark Rutland
@ 2018-02-15 17:20 ` Eric Dumazet
  2018-02-15 17:24   ` Eric Dumazet
  0 siblings, 1 reply; 8+ messages in thread
From: Eric Dumazet @ 2018-02-15 17:20 UTC (permalink / raw)
  To: Mark Rutland; +Cc: netdev, LKML, David Miller, Willem de Bruijn

On Thu, Feb 15, 2018 at 9:04 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi,
>
> While fuzzing arm64 v4.16-rc1 with Syzkaller, I've been hitting a
> misaligned atomic in __skb_clone:
>
>         atomic_inc(&(skb_shinfo(skb)->dataref));
>
> .. where dataref doesn't have the required natural alignment, and the
> atomic operation faults. e.g. i often see it aligned to a single byte
> boundary rather than a four byte boundary.
>
> AFAICT, the skb_shared_info is misaligned at the instant it's allocated
> in __napi_alloc_skb(). With the patch at the end of this mail, the
> atomic_set() (which is a WRITE_ONCE()) in __build_skb() blows up, e.g.
>
> WARNING: CPU: 0 PID: 8457 at mm/access_once.c:12 access_once_alignment_check+0x34/0x40 mm/access_once.c:12
> Kernel panic - not syncing: panic_on_warn set ...
>
> CPU: 0 PID: 8457 Comm: syz-executor1 Not tainted 4.16.0-rc1-00002-gb03ae7b8b0de #9
> Hardware name: linux,dummy-virt (DT)
> Call trace:
>  dump_backtrace+0x0/0x390 arch/arm64/kernel/time.c:52
>  show_stack+0x20/0x30 arch/arm64/kernel/traps.c:151
>  __dump_stack lib/dump_stack.c:17 [inline]
>  dump_stack+0xd0/0x130 lib/dump_stack.c:53
>  panic+0x220/0x3fc kernel/panic.c:183
>  __warn+0x270/0x2bc kernel/panic.c:547
>  report_bug+0x1dc/0x2d0 lib/bug.c:184
>  bug_handler+0x7c/0x128 arch/arm64/kernel/traps.c:758
>  call_break_hook arch/arm64/kernel/debug-monitors.c:305 [inline]
>  brk_handler+0x1a0/0x300 arch/arm64/kernel/debug-monitors.c:320
>  do_debug_exception+0x15c/0x408 arch/arm64/mm/fault.c:808
>  el1_dbg+0x18/0x78
>  access_once_alignment_check+0x34/0x40 mm/access_once.c:12
>  __napi_alloc_skb+0x18c/0x2b8 net/core/skbuff.c:482
>  napi_alloc_skb include/linux/skbuff.h:2643 [inline]
>  napi_get_frags+0x68/0x120 net/core/dev.c:5108
>  tun_napi_alloc_frags drivers/net/tun.c:1477 [inline]
>  tun_get_user+0x13b0/0x3fe8 drivers/net/tun.c:1820
>  tun_chr_write_iter+0xa8/0x158 drivers/net/tun.c:1988
>  call_write_iter include/linux/fs.h:1781 [inline]
>  do_iter_readv_writev+0x2f8/0x490 fs/read_write.c:653
>  do_iter_write+0x14c/0x4b0 fs/read_write.c:932
>  vfs_writev+0x130/0x288 fs/read_write.c:977
>  do_writev+0xe0/0x248 fs/read_write.c:1012
>  SYSC_writev fs/read_write.c:1085 [inline]
>  SyS_writev+0x34/0x48 fs/read_write.c:1082
>  el0_svc_naked+0x30/0x34
> SMP: stopping secondary CPUs
> Kernel Offset: disabled
> CPU features: 0x1002082
> Memory Limit: none
> Rebooting in 86400 seconds..
>
> ... I see these splats with both tun and virtio-net.
>
> I have some Syzkaller logs, and can reproduce the problem locally, but
> unfortunately the C reproducer it generated doesn't seem to work on its
> own.
>
> Any ideas as to how this could happen?
>

Yes, it seems tun.c breaks the assumptions.

If it really wants to provide arbitrary fragments and alignments, it
should use a separate

Please try :

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 81e6cc951e7fc7c983919365c34842c34bcaedcf..92c6b6d02f7c18b63c42ffe1d9cb7286975e1263
100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1500,7 +1500,7 @@ static struct sk_buff
*tun_napi_alloc_frags(struct tun_file *tfile,
                }

                local_bh_disable();
-               data = napi_alloc_frag(fragsz);
+               data = napi_alloc_frag(SKB_DATA_ALIGN(fragsz));
                local_bh_enable();
                if (!data) {
                        err = -ENOMEM;



> Thanks,
> Mark.
>
> ---->8----
> diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> index c2cc57a2f508..c06b810a3b3b 100644
> --- a/include/linux/compiler.h
> +++ b/include/linux/compiler.h
> @@ -163,6 +163,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
>
>  #include <uapi/linux/types.h>
>
> +void access_once_alignment_check(const volatile void *ptr, int size);
> +
>  #define __READ_ONCE_SIZE                                               \
>  ({                                                                     \
>         switch (size) {                                                 \
> @@ -180,6 +182,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
>  static __always_inline
>  void __read_once_size(const volatile void *p, void *res, int size)
>  {
> +       access_once_alignment_check(p, size);
>         __READ_ONCE_SIZE;
>  }
>
> @@ -203,6 +206,8 @@ void __read_once_size_nocheck(const volatile void *p, void *res, int size)
>
>  static __always_inline void __write_once_size(volatile void *p, void *res, int size)
>  {
> +       access_once_alignment_check(p, size);
> +
>         switch (size) {
>         case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
>         case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
> diff --git a/mm/Makefile b/mm/Makefile
> index e669f02c5a54..604d269d7d57 100644
> --- a/mm/Makefile
> +++ b/mm/Makefile
> @@ -3,6 +3,7 @@
>  # Makefile for the linux memory manager.
>  #
>
> +KASAN_SANITIZE_access_once.o := n
>  KASAN_SANITIZE_slab_common.o := n
>  KASAN_SANITIZE_slab.o := n
>  KASAN_SANITIZE_slub.o := n
> @@ -10,6 +11,7 @@ KASAN_SANITIZE_slub.o := n
>  # These files are disabled because they produce non-interesting and/or
>  # flaky coverage that is not a function of syscall inputs. E.g. slab is out of
>  # free pages, or a task is migrated between nodes.
> +KCOV_INSTRUMENT_access_once.o := n
>  KCOV_INSTRUMENT_slab_common.o := n
>  KCOV_INSTRUMENT_slob.o := n
>  KCOV_INSTRUMENT_slab.o := n
> @@ -39,7 +41,7 @@ obj-y                 := filemap.o mempool.o oom_kill.o \
>                            mm_init.o mmu_context.o percpu.o slab_common.o \
>                            compaction.o vmacache.o swap_slots.o \
>                            interval_tree.o list_lru.o workingset.o \
> -                          debug.o $(mmu-y)
> +                          debug.o access_once.o $(mmu-y)
>
>  obj-y += init-mm.o
>
> diff --git a/mm/access_once.c b/mm/access_once.c
> new file mode 100644
> index 000000000000..42ee35d171c4
> --- /dev/null
> +++ b/mm/access_once.c
> @@ -0,0 +1,15 @@
> +#include <linux/bug.h>
> +#include <linux/export.h>
> +#include <linux/kernel.h>
> +
> +void access_once_alignment_check(const volatile void *ptr, int size)
> +{
> +       switch (size) {
> +       case 1:
> +       case 2:
> +       case 4:
> +       case 8:
> +               WARN_ON(!IS_ALIGNED((unsigned long)ptr, size));
> +       }
> +}
> +EXPORT_SYMBOL(access_once_alignment_check);

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: v4.16-rc1 misaligned atomics in skb__clone / __napi_alloc_skb
  2018-02-15 17:20 ` Eric Dumazet
@ 2018-02-15 17:24   ` Eric Dumazet
  2018-02-15 17:31     ` Mark Rutland
  2018-02-15 17:43     ` Eric Dumazet
  0 siblings, 2 replies; 8+ messages in thread
From: Eric Dumazet @ 2018-02-15 17:24 UTC (permalink / raw)
  To: Mark Rutland; +Cc: netdev, LKML, David Miller, Willem de Bruijn

On Thu, Feb 15, 2018 at 9:20 AM, Eric Dumazet <edumazet@google.com> wrote:
>
> Yes, it seems tun.c breaks the assumptions.
>
> If it really wants to provide arbitrary fragments and alignments, it
> should use a separate

Sorry, I have sent the message to soon.

tun.c should use a private 'struct page_frag_cache' to deliver
arbitrary frags/alignments,
so that syzkaller might catch interesting bugs in the stack.


>
> Please try :
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 81e6cc951e7fc7c983919365c34842c34bcaedcf..92c6b6d02f7c18b63c42ffe1d9cb7286975e1263
> 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1500,7 +1500,7 @@ static struct sk_buff
> *tun_napi_alloc_frags(struct tun_file *tfile,
>                 }
>
>                 local_bh_disable();
> -               data = napi_alloc_frag(fragsz);
> +               data = napi_alloc_frag(SKB_DATA_ALIGN(fragsz));
>                 local_bh_enable();
>                 if (!data) {
>                         err = -ENOMEM;

This patch should solve your immediate problem, but would lower fuzzer
abilities to find bugs.

I will send something more suited to original intent of these commits :

90e33d45940793def6f773b2d528e9f3c84ffdc7 tun: enable napi_gro_frags()
for TUN/TAP driver
943170998b200190f99d3fe7e771437e2c51f319 tun: enable NAPI for TUN/TAP driver

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: v4.16-rc1 misaligned atomics in skb__clone / __napi_alloc_skb
  2018-02-15 17:24   ` Eric Dumazet
@ 2018-02-15 17:31     ` Mark Rutland
  2018-02-15 17:43     ` Eric Dumazet
  1 sibling, 0 replies; 8+ messages in thread
From: Mark Rutland @ 2018-02-15 17:31 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, LKML, David Miller, Willem de Bruijn

On Thu, Feb 15, 2018 at 09:24:36AM -0800, Eric Dumazet wrote:
> On Thu, Feb 15, 2018 at 9:20 AM, Eric Dumazet <edumazet@google.com> wrote:
> >
> > Yes, it seems tun.c breaks the assumptions.
> >
> > If it really wants to provide arbitrary fragments and alignments, it
> > should use a separate
> 
> Sorry, I have sent the message to soon.
> 
> tun.c should use a private 'struct page_frag_cache' to deliver
> arbitrary frags/alignments,
> so that syzkaller might catch interesting bugs in the stack.
>
> > Please try :
> >
> > diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> > index 81e6cc951e7fc7c983919365c34842c34bcaedcf..92c6b6d02f7c18b63c42ffe1d9cb7286975e1263
> > 100644
> > --- a/drivers/net/tun.c
> > +++ b/drivers/net/tun.c
> > @@ -1500,7 +1500,7 @@ static struct sk_buff
> > *tun_napi_alloc_frags(struct tun_file *tfile,
> >                 }
> >
> >                 local_bh_disable();
> > -               data = napi_alloc_frag(fragsz);
> > +               data = napi_alloc_frag(SKB_DATA_ALIGN(fragsz));
> >                 local_bh_enable();
> >                 if (!data) {
> >                         err = -ENOMEM;
> 
> This patch should solve your immediate problem, but would lower fuzzer
> abilities to find bugs.

So far so good, it seems!

> I will send something more suited to original intent of these commits :
> 
> 90e33d45940793def6f773b2d528e9f3c84ffdc7 tun: enable napi_gro_frags()
> for TUN/TAP driver
> 943170998b200190f99d3fe7e771437e2c51f319 tun: enable NAPI for TUN/TAP driver

Thanks! I'd be more than happy to test any such patches.

As I mentioned, I'm also seeing similar in virtio-net, e.g.

WARNING: CPU: 0 PID: 8 at mm/access_once.c:12 access_once_alignment_check+0x34/0x40 mm/access_once.c:12
Kernel panic - not syncing: panic_on_warn set ...

CPU: 0 PID: 8 Comm: ksoftirqd/0 Not tainted 4.16.0-rc1-00002-gb03ae7b8b0de #9
Hardware name: linux,dummy-virt (DT)
Call trace:
 dump_backtrace+0x0/0x390 arch/arm64/kernel/time.c:52
 show_stack+0x20/0x30 arch/arm64/kernel/traps.c:151
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0xd0/0x130 lib/dump_stack.c:53
 panic+0x220/0x3fc kernel/panic.c:183
 __warn+0x270/0x2bc kernel/panic.c:547
 report_bug+0x1dc/0x2d0 lib/bug.c:184
 bug_handler+0x7c/0x128 arch/arm64/kernel/traps.c:758
 call_break_hook arch/arm64/kernel/debug-monitors.c:305 [inline]
 brk_handler+0x1a0/0x300 arch/arm64/kernel/debug-monitors.c:320
 do_debug_exception+0x15c/0x408 arch/arm64/mm/fault.c:808
 el1_dbg+0x18/0x78
 access_once_alignment_check+0x34/0x40 mm/access_once.c:12
 __napi_alloc_skb+0x18c/0x2b8 net/core/skbuff.c:482
 napi_alloc_skb include/linux/skbuff.h:2643 [inline]
 page_to_skb.isra.17+0x58/0x610 drivers/net/virtio_net.c:345
 receive_mergeable drivers/net/virtio_net.c:783 [inline]
 receive_buf+0x978/0x2a70 drivers/net/virtio_net.c:888
 virtnet_receive drivers/net/virtio_net.c:1160 [inline]
 virtnet_poll+0x24c/0x850 drivers/net/virtio_net.c:1240
 napi_poll net/core/dev.c:5690 [inline]
 net_rx_action+0x324/0xa50 net/core/dev.c:5756
 __do_softirq+0x318/0x734 kernel/softirq.c:285
 run_ksoftirqd+0x70/0xa8 kernel/softirq.c:666
 smpboot_thread_fn+0x544/0x9d0 kernel/smpboot.c:164
 kthread+0x2f8/0x380 kernel/kthread.c:238
 ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:1158
SMP: stopping secondary CPUs
Kernel Offset: disabled
CPU features: 0x1002082
Memory Limit: none
Rebooting in 86400 seconds..

... does similar apply there?

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: v4.16-rc1 misaligned atomics in skb__clone / __napi_alloc_skb
  2018-02-15 17:24   ` Eric Dumazet
  2018-02-15 17:31     ` Mark Rutland
@ 2018-02-15 17:43     ` Eric Dumazet
  2018-02-15 17:57       ` Mark Rutland
  2018-02-15 22:47       ` [PATCH net] tun: fix tun_napi_alloc_frags() frag allocator Eric Dumazet
  1 sibling, 2 replies; 8+ messages in thread
From: Eric Dumazet @ 2018-02-15 17:43 UTC (permalink / raw)
  To: Eric Dumazet, Mark Rutland; +Cc: netdev, LKML, David Miller, Willem de Bruijn

On Thu, 2018-02-15 at 09:24 -0800, Eric Dumazet wrote:
> 
> I will send something more suited to original intent of these commits :
> 
> 90e33d45940793def6f773b2d528e9f3c84ffdc7 tun: enable napi_gro_frags()
> for TUN/TAP driver
> 943170998b200190f99d3fe7e771437e2c51f319 tun: enable NAPI for TUN/TAP driver

Can you try this patch ?

Thanks !

 drivers/net/tun.c |   16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 81e6cc951e7fc7c983919365c34842c34bcaedcf..b52258c327d2e1d7c7476de345e49f082909c246 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1489,27 +1489,23 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
 	skb->truesize += skb->data_len;
 
 	for (i = 1; i < it->nr_segs; i++) {
+		struct page_frag *pfrag = &current->task_frag;
 		size_t fragsz = it->iov[i].iov_len;
-		unsigned long offset;
-		struct page *page;
-		void *data;
 
 		if (fragsz == 0 || fragsz > PAGE_SIZE) {
 			err = -EINVAL;
 			goto free;
 		}
 
-		local_bh_disable();
-		data = napi_alloc_frag(fragsz);
-		local_bh_enable();
-		if (!data) {
+		if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) {
 			err = -ENOMEM;
 			goto free;
 		}
 
-		page = virt_to_head_page(data);
-		offset = data - page_address(page);
-		skb_fill_page_desc(skb, i - 1, page, offset, fragsz);
+		skb_fill_page_desc(skb, i - 1, pfrag->page,
+				   pfrag->offset, fragsz);
+		page_ref_inc(pfrag->page);
+		pfrag->offset += fragsz;
 	}
 
 	return skb;

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: v4.16-rc1 misaligned atomics in skb__clone / __napi_alloc_skb
  2018-02-15 17:43     ` Eric Dumazet
@ 2018-02-15 17:57       ` Mark Rutland
  2018-02-15 22:47       ` [PATCH net] tun: fix tun_napi_alloc_frags() frag allocator Eric Dumazet
  1 sibling, 0 replies; 8+ messages in thread
From: Mark Rutland @ 2018-02-15 17:57 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Eric Dumazet, netdev, LKML, David Miller, Willem de Bruijn

On Thu, Feb 15, 2018 at 09:43:06AM -0800, Eric Dumazet wrote:
> On Thu, 2018-02-15 at 09:24 -0800, Eric Dumazet wrote:
> > 
> > I will send something more suited to original intent of these commits :
> > 
> > 90e33d45940793def6f773b2d528e9f3c84ffdc7 tun: enable napi_gro_frags()
> > for TUN/TAP driver
> > 943170998b200190f99d3fe7e771437e2c51f319 tun: enable NAPI for TUN/TAP driver
> 
> Can you try this patch ?

Looks good! No splats after 10 minutes with a test that usually fails in
a few seconds.

FWIW:

Tested-by: Mark Rutland <mark.rutland@arm.com>

Thanks,
Mark.

>  drivers/net/tun.c |   16 ++++++----------
>  1 file changed, 6 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 81e6cc951e7fc7c983919365c34842c34bcaedcf..b52258c327d2e1d7c7476de345e49f082909c246 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1489,27 +1489,23 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
>  	skb->truesize += skb->data_len;
>  
>  	for (i = 1; i < it->nr_segs; i++) {
> +		struct page_frag *pfrag = &current->task_frag;
>  		size_t fragsz = it->iov[i].iov_len;
> -		unsigned long offset;
> -		struct page *page;
> -		void *data;
>  
>  		if (fragsz == 0 || fragsz > PAGE_SIZE) {
>  			err = -EINVAL;
>  			goto free;
>  		}
>  
> -		local_bh_disable();
> -		data = napi_alloc_frag(fragsz);
> -		local_bh_enable();
> -		if (!data) {
> +		if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) {
>  			err = -ENOMEM;
>  			goto free;
>  		}
>  
> -		page = virt_to_head_page(data);
> -		offset = data - page_address(page);
> -		skb_fill_page_desc(skb, i - 1, page, offset, fragsz);
> +		skb_fill_page_desc(skb, i - 1, pfrag->page,
> +				   pfrag->offset, fragsz);
> +		page_ref_inc(pfrag->page);
> +		pfrag->offset += fragsz;
>  	}
>  
>  	return skb;
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH net] tun: fix tun_napi_alloc_frags() frag allocator
  2018-02-15 17:43     ` Eric Dumazet
  2018-02-15 17:57       ` Mark Rutland
@ 2018-02-15 22:47       ` Eric Dumazet
  2018-02-16 21:21         ` David Miller
  1 sibling, 1 reply; 8+ messages in thread
From: Eric Dumazet @ 2018-02-15 22:47 UTC (permalink / raw)
  To: Mark Rutland; +Cc: netdev, David Miller, Willem de Bruijn, Petar Penkov

From: Eric Dumazet <edumazet@google.com>

<Mark Rutland reported>
    While fuzzing arm64 v4.16-rc1 with Syzkaller, I've been hitting a
    misaligned atomic in __skb_clone:

        atomic_inc(&(skb_shinfo(skb)->dataref));

   where dataref doesn't have the required natural alignment, and the
   atomic operation faults. e.g. i often see it aligned to a single
   byte boundary rather than a four byte boundary.

   AFAICT, the skb_shared_info is misaligned at the instant it's
   allocated in __napi_alloc_skb()  __napi_alloc_skb()
</end of report>

Problem is caused by tun_napi_alloc_frags() using
napi_alloc_frag() with user provided seg sizes,
leading to other users of this API getting unaligned
page fragments.

Since we would like to not necessarily add paddings or alignments to
the frags that tun_napi_alloc_frags() attaches to the skb, switch to
another page frag allocator.

As a bonus skb_page_frag_refill() can use GFP_KERNEL allocations,
meaning that we can not deplete memory reserves as easily.

Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
---
 drivers/net/tun.c |   16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 81e6cc951e7fc7c983919365c34842c34bcaedcf..b52258c327d2e1d7c7476de345e49f082909c246 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1489,27 +1489,23 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
 	skb->truesize += skb->data_len;
 
 	for (i = 1; i < it->nr_segs; i++) {
+		struct page_frag *pfrag = &current->task_frag;
 		size_t fragsz = it->iov[i].iov_len;
-		unsigned long offset;
-		struct page *page;
-		void *data;
 
 		if (fragsz == 0 || fragsz > PAGE_SIZE) {
 			err = -EINVAL;
 			goto free;
 		}
 
-		local_bh_disable();
-		data = napi_alloc_frag(fragsz);
-		local_bh_enable();
-		if (!data) {
+		if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) {
 			err = -ENOMEM;
 			goto free;
 		}
 
-		page = virt_to_head_page(data);
-		offset = data - page_address(page);
-		skb_fill_page_desc(skb, i - 1, page, offset, fragsz);
+		skb_fill_page_desc(skb, i - 1, pfrag->page,
+				   pfrag->offset, fragsz);
+		page_ref_inc(pfrag->page);
+		pfrag->offset += fragsz;
 	}
 
 	return skb;

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH net] tun: fix tun_napi_alloc_frags() frag allocator
  2018-02-15 22:47       ` [PATCH net] tun: fix tun_napi_alloc_frags() frag allocator Eric Dumazet
@ 2018-02-16 21:21         ` David Miller
  0 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2018-02-16 21:21 UTC (permalink / raw)
  To: eric.dumazet; +Cc: mark.rutland, netdev, willemb, peterpenkov96

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 15 Feb 2018 14:47:15 -0800

> From: Eric Dumazet <edumazet@google.com>
> 
> <Mark Rutland reported>
>     While fuzzing arm64 v4.16-rc1 with Syzkaller, I've been hitting a
>     misaligned atomic in __skb_clone:
> 
>         atomic_inc(&(skb_shinfo(skb)->dataref));
> 
>    where dataref doesn't have the required natural alignment, and the
>    atomic operation faults. e.g. i often see it aligned to a single
>    byte boundary rather than a four byte boundary.
> 
>    AFAICT, the skb_shared_info is misaligned at the instant it's
>    allocated in __napi_alloc_skb()  __napi_alloc_skb()
> </end of report>
> 
> Problem is caused by tun_napi_alloc_frags() using
> napi_alloc_frag() with user provided seg sizes,
> leading to other users of this API getting unaligned
> page fragments.
> 
> Since we would like to not necessarily add paddings or alignments to
> the frags that tun_napi_alloc_frags() attaches to the skb, switch to
> another page frag allocator.
> 
> As a bonus skb_page_frag_refill() can use GFP_KERNEL allocations,
> meaning that we can not deplete memory reserves as easily.
> 
> Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Mark Rutland <mark.rutland@arm.com>
> Tested-by: Mark Rutland <mark.rutland@arm.com>

Applied and queued up for -stable, thanks Eric.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-02-16 21:21 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-15 17:04 v4.16-rc1 misaligned atomics in skb__clone / __napi_alloc_skb Mark Rutland
2018-02-15 17:20 ` Eric Dumazet
2018-02-15 17:24   ` Eric Dumazet
2018-02-15 17:31     ` Mark Rutland
2018-02-15 17:43     ` Eric Dumazet
2018-02-15 17:57       ` Mark Rutland
2018-02-15 22:47       ` [PATCH net] tun: fix tun_napi_alloc_frags() frag allocator Eric Dumazet
2018-02-16 21:21         ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.