io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Helge Deller <deller@gmx.de>
To: matoro <matoro_mailinglist_kernel@matoro.tk>
Cc: Jens Axboe <axboe@kernel.dk>,
	io-uring@vger.kernel.org, Linux Ia64 <linux-ia64@vger.kernel.org>,
	glaubitz@physik.fu-berlin.de, Sam James <sam@gentoo.org>
Subject: Re: [PATCH 1/5] io_uring: Adjust mapping wrt architecture aliasing requirements
Date: Sun, 16 Jul 2023 22:54:16 +0200	[thread overview]
Message-ID: <7d3fb4b8-a7e6-8a28-0558-75c1c5a0518d@gmx.de> (raw)
In-Reply-To: <58aaccbd483c582b3bfd590c110d45c6@matoro.tk>

On 7/16/23 20:03, matoro wrote:
> On 2023-07-16 02:54, Helge Deller wrote:
>> * matoro <matoro_mailinglist_kernel@matoro.tk>:
>>> On 2023-07-13 03:27, Helge Deller wrote:
>>> > * matoro <matoro_mailinglist_kernel@matoro.tk>:
>>> > > On 2023-07-12 16:30, Helge Deller wrote:
>>> > > > On 7/12/23 21:05, Helge Deller wrote:
>>> > > > > On 7/12/23 19:28, matoro wrote:
>>> > > > > > On 2023-07-12 12:24, Helge Deller wrote:
>>> > > > > > > Hi Matoro,
>>> > > > > > >
>>> > > > > > > * matoro <matoro_mailinglist_kernel@matoro.tk>:
>>> > > > > > > > On 2023-03-14 13:16, Jens Axboe wrote:
>>> > > > > > > > > From: Helge Deller <deller@gmx.de>
>>> > > > > > > > >
>>> > > > > > > > > Some architectures have memory cache aliasing requirements (e.g. parisc)
>>> > > > > > > > > if memory is shared between userspace and kernel. This patch fixes the
>>> > > > > > > > > kernel to return an aliased address when asked by userspace via mmap().
>>> > > > > > > > >
>>> > > > > > > > > Signed-off-by: Helge Deller <deller@gmx.de>
>>> > > > > > > > > Signed-off-by: Jens Axboe <axboe@kernel.dk>
>>> > > > > > > > > ---
>>> > > > > > > > >  io_uring/io_uring.c | 51 +++++++++++++++++++++++++++++++++++++++++++++
>>> > > > > > > > >  1 file changed, 51 insertions(+)
>>> > > > > > > > >
>>> > > > > > > > > diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>> > > > > > > > > index 722624b6d0dc..3adecebbac71 100644
>>> > > > > > > > > --- a/io_uring/io_uring.c
>>> > > > > > > > > +++ b/io_uring/io_uring.c
>>> > > > > > > > > @@ -72,6 +72,7 @@
>>> > > > > > > > >  #include <linux/io_uring.h>
>>> > > > > > > > >  #include <linux/audit.h>
>>> > > > > > > > >  #include <linux/security.h>
>>> > > > > > > > > +#include <asm/shmparam.h>
>>> > > > > > > > >
>>> > > > > > > > >  #define CREATE_TRACE_POINTS
>>> > > > > > > > >  #include <trace/events/io_uring.h>
>>> > > > > > > > > @@ -3317,6 +3318,54 @@ static __cold int io_uring_mmap(struct file
>>> > > > > > > > > *file, struct vm_area_struct *vma)
>>> > > > > > > > >      return remap_pfn_range(vma, vma->vm_start, pfn, sz,
>>> > > > > > > > > vma->vm_page_prot);
>>> > > > > > > > >  }
>>> > > > > > > > >
>>> > > > > > > > > +static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
>>> > > > > > > > > +            unsigned long addr, unsigned long len,
>>> > > > > > > > > +            unsigned long pgoff, unsigned long flags)
>>> > > > > > > > > +{
>>> > > > > > > > > +    const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
>>> > > > > > > > > +    struct vm_unmapped_area_info info;
>>> > > > > > > > > +    void *ptr;
>>> > > > > > > > > +
>>> > > > > > > > > +    /*
>>> > > > > > > > > +     * Do not allow to map to user-provided address to avoid breaking the
>>> > > > > > > > > +     * aliasing rules. Userspace is not able to guess the offset address
>>> > > > > > > > > of
>>> > > > > > > > > +     * kernel kmalloc()ed memory area.
>>> > > > > > > > > +     */
>>> > > > > > > > > +    if (addr)
>>> > > > > > > > > +        return -EINVAL;
>>> > > > > > > > > +
>>> > > > > > > > > +    ptr = io_uring_validate_mmap_request(filp, pgoff, len);
>>> > > > > > > > > +    if (IS_ERR(ptr))
>>> > > > > > > > > +        return -ENOMEM;
>>> > > > > > > > > +
>>> > > > > > > > > +    info.flags = VM_UNMAPPED_AREA_TOPDOWN;
>>> > > > > > > > > +    info.length = len;
>>> > > > > > > > > +    info.low_limit = max(PAGE_SIZE, mmap_min_addr);
>>> > > > > > > > > +    info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
>>> > > > > > > > > +#ifdef SHM_COLOUR
>>> > > > > > > > > +    info.align_mask = PAGE_MASK & (SHM_COLOUR - 1UL);
>>> > > > > > > > > +#else
>>> > > > > > > > > +    info.align_mask = PAGE_MASK & (SHMLBA - 1UL);
>>> > > > > > > > > +#endif
>>> > > > > > > > > +    info.align_offset = (unsigned long) ptr;
>>> > > > > > > > > +
>>> > > > > > > > > +    /*
>>> > > > > > > > > +     * A failed mmap() very likely causes application failure,
>>> > > > > > > > > +     * so fall back to the bottom-up function here. This scenario
>>> > > > > > > > > +     * can happen with large stack limits and large mmap()
>>> > > > > > > > > +     * allocations.
>>> > > > > > > > > +     */
>>> > > > > > > > > +    addr = vm_unmapped_area(&info);
>>> > > > > > > > > +    if (offset_in_page(addr)) {
>>> > > > > > > > > +        info.flags = 0;
>>> > > > > > > > > +        info.low_limit = TASK_UNMAPPED_BASE;
>>> > > > > > > > > +        info.high_limit = mmap_end;
>>> > > > > > > > > +        addr = vm_unmapped_area(&info);
>>> > > > > > > > > +    }
>>> > > > > > > > > +
>>> > > > > > > > > +    return addr;
>>> > > > > > > > > +}
>>> > > > > > > > > +
>>> > > > > > > > >  #else /* !CONFIG_MMU */
>>> > > > > > > > >
>>> > > > > > > > >  static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
>>> > > > > > > > > @@ -3529,6 +3578,8 @@ static const struct file_operations io_uring_fops
>>> > > > > > > > > = {
>>> > > > > > > > >  #ifndef CONFIG_MMU
>>> > > > > > > > >      .get_unmapped_area = io_uring_nommu_get_unmapped_area,
>>> > > > > > > > >      .mmap_capabilities = io_uring_nommu_mmap_capabilities,
>>> > > > > > > > > +#else
>>> > > > > > > > > +    .get_unmapped_area = io_uring_mmu_get_unmapped_area,
>>> > > > > > > > >  #endif
>>> > > > > > > > >      .poll        = io_uring_poll,
>>> > > > > > > > >  #ifdef CONFIG_PROC_FS
>>> > > > > > > >
>>> > > > > > > > Hi Jens, Helge - I've bisected a regression with
>>> > > > > > > > io_uring on ia64 to this
>>> > > > > > > > patch in 6.4.  Unfortunately this breaks userspace
>>> > > > > > > > programs using io_uring,
>>> > > > > > > > the easiest one to test is cmake with an io_uring
>>> > > > > > > > enabled libuv (i.e., libuv
>>> > > > > > > > >= 1.45.0) which will hang.
>>> > > > > > > >
>>> > > > > > > > I am aware that ia64 is in a vulnerable place right now
>>> > > > > > > > which I why I am
>>> > > > > > > > keeping this spread limited.  Since this clearly involves
>>> > > > > > > > architecture-specific changes for parisc,
>>> > > > > > >
>>> > > > > > > it isn't so much architecture-specific... (just one ifdef)
>>> > > > > > >
>>> > > > > > > > is there any chance of looking at
>>> > > > > > > > what is required to do the same for ia64?  I looked at
>>> > > > > > > > 0ef36bd2b37815719e31a72d2beecc28ca8ecd26 ("parisc:
>>> > > > > > > > change value of SHMLBA
>>> > > > > > > > from 0x00400000 to PAGE_SIZE") and tried to replicate the SHMLBA ->
>>> > > > > > > > SHM_COLOUR change, but it made no difference.
>>> > > > > > > >
>>> > > > > > > > If hardware is necessary for testing, I can provide it,
>>> > > > > > > > including remote BMC
>>> > > > > > > > access for restarts/kernel debugging.  Any takers?
>>> > > > > > >
>>> > > > > > > I won't have time to test myself, but maybe you could test?
>>> > > > > > >
>>> > > > > > > Basically we should try to find out why
>>> > > > > > > io_uring_mmu_get_unmapped_area()
>>> > > > > > > doesn't return valid addresses, while arch_get_unmapped_area()
>>> > > > > > > [in arch/ia64/kernel/sys_ia64.c] does.
>>> > > > > > >
>>> > > > > > > You could apply this patch first:
>>> > > > > > > It introduces a memory leak (as it requests memory twice),
>>> > > > > > > but maybe we
>>> > > > > > > get an idea?
>>> > > > > > > The ia64 arch_get_unmapped_area() searches for memory from bottom
>>> > > > > > > (flags=0), while io_uring function tries top-down first.
>>> > > > > > > Maybe that's
>>> > > > > > > the problem. And I don't understand the offset_in_page() check right
>>> > > > > > > now.
>>> > > > > > >
>>> > > > > > > diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>> > > > > > > index 3bca7a79efda..93b1964d2bbb 100644
>>> > > > > > > --- a/io_uring/io_uring.c
>>> > > > > > > +++ b/io_uring/io_uring.c
>>> > > > > > > @@ -3431,13 +3431,17 @@ static unsigned long
>>> > > > > > > io_uring_mmu_get_unmapped_area(struct file *filp,
>>> > > > > > >       * can happen with large stack limits and large mmap()
>>> > > > > > >       * allocations.
>>> > > > > > >       */
>>> > > > > > > +/* compare to arch_get_unmapped_area() in
>>> > > > > > > arch/ia64/kernel/sys_ia64.c */
>>> > > > > > >      addr = vm_unmapped_area(&info);
>>> > > > > > > -    if (offset_in_page(addr)) {
>>> > > > > > > +printk("io_uring_mmu_get_unmapped_area() address 1 is:
>>> > > > > > > %px\n", addr);
>>> > > > > > > +    addr = NULL;
>>> > > > > > > +    if (!addr) {
>>> > > > > > >          info.flags = 0;
>>> > > > > > >          info.low_limit = TASK_UNMAPPED_BASE;
>>> > > > > > >          info.high_limit = mmap_end;
>>> > > > > > >          addr = vm_unmapped_area(&info);
>>> > > > > > >      }
>>> > > > > > > +printk("io_uring_mmu_get_unmapped_area() returns address
>>> > > > > > > %px\n", addr);
>>> > > > > > >
>>> > > > > > >      return addr;
>>> > > > > > >  }
>>> > > > > > >
>>> > > > > > >
>>> > > > > > > Another option is to disable the call to
>>> > > > > > > io_uring_nommu_get_unmapped_area())
>>> > > > > > > with the next patch. Maybe you could add printks() to ia64's
>>> > > > > > > arch_get_unmapped_area()
>>> > > > > > > and check what it returns there?
>>> > > > > > >
>>> > > > > > > @@ -3654,6 +3658,8 @@ static const struct file_operations
>>> > > > > > > io_uring_fops = {
>>> > > > > > >  #ifndef CONFIG_MMU
>>> > > > > > >      .get_unmapped_area = io_uring_nommu_get_unmapped_area,
>>> > > > > > >      .mmap_capabilities = io_uring_nommu_mmap_capabilities,
>>> > > > > > > +#elif 0    /* IS_ENABLED(CONFIG_IA64) */
>>> > > > > > > +    .get_unmapped_area = NULL,
>>> > > > > > >  #else
>>> > > > > > >      .get_unmapped_area = io_uring_mmu_get_unmapped_area,
>>> > > > > > >  #endif
>>> > > > > > >
>>> > > > > > > Helge
>>> > > > > >
>>> > > > > > Thanks Helge.  Sample output from that first patch:
>>> > > > > >
>>> > > > > > [Wed Jul 12 13:09:50 2023] io_uring_mmu_get_unmapped_area()
>>> > > > > > address 1 is: 1ffffffffff40000
>>> > > > > > [Wed Jul 12 13:09:50 2023] io_uring_mmu_get_unmapped_area()
>>> > > > > > returns address 2000000001e40000
>>> > > > > > [Wed Jul 12 13:09:50 2023] io_uring_mmu_get_unmapped_area()
>>> > > > > > address 1 is: 1ffffffffff20000
>>> > > > > > [Wed Jul 12 13:09:50 2023] io_uring_mmu_get_unmapped_area()
>>> > > > > > returns address 2000000001f20000
>>> > > > > > [Wed Jul 12 13:09:50 2023] io_uring_mmu_get_unmapped_area()
>>> > > > > > address 1 is: 1ffffffffff30000
>>> > > > > > [Wed Jul 12 13:09:50 2023] io_uring_mmu_get_unmapped_area()
>>> > > > > > returns address 2000000001f30000
>>> > > > > > [Wed Jul 12 13:09:50 2023] io_uring_mmu_get_unmapped_area()
>>> > > > > > address 1 is: 1ffffffffff90000
>>> > > > > > [Wed Jul 12 13:09:50 2023] io_uring_mmu_get_unmapped_area()
>>> > > > > > returns address 2000000001f90000
>>> > > > > >
>>> > > > > > This pattern seems to be pretty stable, I tried instead just
>>> > > > > > directly returning the result of a call to
>>> > > > > > arch_get_unmapped_area() at the end of the function and it seems
>>> > > > > > similar:
>>> > > > > >
>>> > > > > > [Wed Jul 12 13:27:07 2023] io_uring_mmu_get_unmapped_area()
>>> > > > > > would return address 1ffffffffffd0000
>>> > > > > > [Wed Jul 12 13:27:07 2023] but arch_get_unmapped_area() would
>>> > > > > > return address 2000000001f00000
>>> > > > > > [Wed Jul 12 13:27:07 2023] io_uring_mmu_get_unmapped_area()
>>> > > > > > would return address 1ffffffffff00000
>>> > > > > > [Wed Jul 12 13:27:07 2023] but arch_get_unmapped_area() would
>>> > > > > > return address 1ffffffffff00000
>>> > > > > > [Wed Jul 12 13:27:07 2023] io_uring_mmu_get_unmapped_area()
>>> > > > > > would return address 1fffffffffe20000
>>> > > > > > [Wed Jul 12 13:27:07 2023] but arch_get_unmapped_area() would
>>> > > > > > return address 2000000002000000
>>> > > > > > [Wed Jul 12 13:27:07 2023] io_uring_mmu_get_unmapped_area()
>>> > > > > > would return address 1fffffffffe30000
>>> > > > > > [Wed Jul 12 13:27:07 2023] but arch_get_unmapped_area() would
>>> > > > > > return address 2000000002100000
>>> > > > > >
>>> > > > > > Is that enough of a clue to go on?
>>> > > > >
>>> > > > > SHMLBA on ia64 is 0x100000:
>>> > > > > arch/ia64/include/asm/shmparam.h:#define        SHMLBA  (1024*1024)
>>> > > > > but the values returned by io_uring_mmu_get_unmapped_area() does not
>>> > > > > fullfill this.
>>> > > > >
>>> > > > > So, probably ia64's SHMLBA isn't pulled in correctly in
>>> > > > > io_uring/io_uring.c.
>>> > > > > Check value of this line:
>>> > > > >      info.align_mask = PAGE_MASK & (SHMLBA - 1UL);
>>> > > > >
>>> > > > > You could also add
>>> > > > > #define SHM_COLOUR  0x100000
>>> > > > > in front of the
>>> > > > >      #ifdef SHM_COLOUR
>>> > > > > (define SHM_COLOUR in io_uring/kbuf.c too).
>>> > > >
>>> > > > What is the value of PAGE_SIZE and "ptr" on your machine?
>>> > > > For 4k page size I get:
>>> > > > SHMLBA -1   ->        FFFFF
>>> > > > PAGE_MASK   -> FFFFFFFFF000
>>> > > > so,
>>> > > > info.align_mask = PAGE_MASK & (SHMLBA - 1UL) = 0xFF000;
>>> > > > You could try to set nfo.align_mask = 0xfffff;
>>> > > >
>>> > > > Helge
>>> > >
>>> > > Using 64KiB (65536) PAGE_SIZE here.  64-bit pointers.
>>> > >
>>> > > Tried both #define SHM_COLOUR 0x100000, as well and info.align_mask =
>>> > > 0xFFFFF, but both of them made the problem change from 100%
>>> > > reproducible, to
>>> > > intermittent.
>>> > >
>>> > > After inspecting the ouput I observed that it hangs only when the
>>> > > first
>>> > > allocation returns an address below 0x2000000000000000, and the second
>>> > > returns an address above it.  When both addresses are above it, it
>>> > > does not
>>> > > hang.  Examples:
>>> > >
>>> > > When it works:
>>> > > $ cmake --version
>>> > > cmake version 3.26.4
>>> > >
>>> > > CMake suite maintained and supported by Kitware (kitware.com/cmake).
>>> > > $ dmesg --color=always -T | tail -n 4
>>> > > [Wed Jul 12 20:32:37 2023] io_uring_mmu_get_unmapped_area() would
>>> > > return
>>> > > address 1fffffffffe20000
>>> > > [Wed Jul 12 20:32:37 2023] but arch_get_unmapped_area() would return
>>> > > address
>>> > > 2000000002000000
>>> > > [Wed Jul 12 20:32:37 2023] io_uring_mmu_get_unmapped_area() would
>>> > > return
>>> > > address 1fffffffffe50000
>>> > > [Wed Jul 12 20:32:37 2023] but arch_get_unmapped_area() would return
>>> > > address
>>> > > 2000000002100000
>>> > >
>>> > >
>>> > > When it hangs:
>>> > > $ cmake --version
>>> > > cmake version 3.26.4
>>> > >
>>> > > CMake suite maintained and supported by Kitware (kitware.com/cmake).
>>> > > ^C
>>> > > $ dmesg --color=always -T | tail -n 4
>>> > > [Wed Jul 12 20:33:12 2023] io_uring_mmu_get_unmapped_area() would
>>> > > return
>>> > > address 1ffffffffff00000
>>> > > [Wed Jul 12 20:33:12 2023] but arch_get_unmapped_area() would return
>>> > > address
>>> > > 1ffffffffff00000
>>> > > [Wed Jul 12 20:33:12 2023] io_uring_mmu_get_unmapped_area() would
>>> > > return
>>> > > address 1fffffffffe60000
>>> > > [Wed Jul 12 20:33:12 2023] but arch_get_unmapped_area() would return
>>> > > address
>>> > > 2000000001f00000
>>> > >
>>> > > Is io_uring_mmu_get_unmapped_area supported to always return
>>> > > addresses above
>>> > > 0x2000000000000000?
>>> >
>>> > Yes, with the patch below.
>>> >
>>> > > Any reason why it is not doing so sometimes?
>>> >
>>> > It depends on the parameters for vm_unmapped_area(). Specifically
>>> > info.flags=0.
>>> >
>>> > Try this patch:
>>> >
>>> > diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>> > index 3bca7a79efda..b259794ab53b 100644
>>> > --- a/io_uring/io_uring.c
>>> > +++ b/io_uring/io_uring.c
>>> > @@ -3429,10 +3429,13 @@ static unsigned long
>>> > io_uring_mmu_get_unmapped_area(struct file *filp,
>>> >       * A failed mmap() very likely causes application failure,
>>> >       * so fall back to the bottom-up function here. This scenario
>>> >       * can happen with large stack limits and large mmap()
>>> > -     * allocations.
>>> > +     * allocations. Use bottom-up on IA64 for correct aliasing.
>>> >       */
>>> > -    addr = vm_unmapped_area(&info);
>>> > -    if (offset_in_page(addr)) {
>>> > +    if (IS_ENABLED(CONFIG_IA64))
>>> > +        addr = NULL;
>>> > +    else
>>> > +        addr = vm_unmapped_area(&info);
>>> > +    if (!addr) {
>>> >          info.flags = 0;
>>> >          info.low_limit = TASK_UNMAPPED_BASE;
>>> >          info.high_limit = mmap_end;
>>> >
>>> > Helge
>>>
>>> This patch does do the trick, but I am a little unsure if it's the right one
>>> to go in:
>>>
>>> * Adding an arch-specific conditional feels like a bad hack, why is it not
>>> working with the other vm_unmapped_area_info settings?
>>
>> because it tries to map below TASK_UNMAPPED_BASE, for which (I assume) IA-64
>> has different aliasing/caching rules. There are some comments in the arch/ia64
>> files, but I'm not a IA-64 expert...
>>
>>> * What happened to the offset_in_page check for other arches?
>>
>> I thought it's not necessary.
>>
>> But below is another (and much better) approach, which you may test.
>> I see quite some errors with the liburing testcases on hppa, but I think
>> they are not related to this function.
>>
>> Can you test and report back?
>>
>> Helge
>>
>>
>> From 457f2c2db984bc159119bfb4426d9dc6c2779ed6 Mon Sep 17 00:00:00 2001
>> From: Helge Deller <deller@gmx.de>
>> Date: Sun, 16 Jul 2023 08:45:17 +0200
>> Subject: [PATCH] io_uring: Adjust mapping wrt architecture aliasing
>>  requirements
>>
>> When mapping memory to userspace use the architecture-provided
>> get_unmapped_area() function instead of the own copy which fails on
>> IA-64 since it doesn't allow mappings below TASK_UNMAPPED_BASE.
>>
>> Additionally make sure to flag the requested memory as MAP_SHARED so
>> that any architecture-specific aliasing rules will be applied.
>>
>> Reported-by: matoro <matoro_mailinglist_kernel@matoro.tk>
>> Signed-off-by: Helge Deller <deller@gmx.de>
>>
>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>> index 3bca7a79efda..2e7dd93e45d0 100644
>> --- a/io_uring/io_uring.c
>> +++ b/io_uring/io_uring.c
>> @@ -3398,48 +3398,27 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
>>              unsigned long addr, unsigned long len,
>>              unsigned long pgoff, unsigned long flags)
>>  {
>> -    const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
>> -    struct vm_unmapped_area_info info;
>>      void *ptr;
>>
>>      /*
>>       * Do not allow to map to user-provided address to avoid breaking the
>> -     * aliasing rules. Userspace is not able to guess the offset address of
>> -     * kernel kmalloc()ed memory area.
>> +     * aliasing rules of various architectures. Userspace is not able to
>> +     * guess the offset address of kernel kmalloc()ed memory area.
>>       */
>> -    if (addr)
>> +    if (addr | (flags & MAP_FIXED))
>>          return -EINVAL;
>>
>> +    /*
>> +     * The requested memory region is required to be shared between kernel
>> +     * and userspace application.
>> +     */
>> +    flags |= MAP_SHARED;
>> +
>>      ptr = io_uring_validate_mmap_request(filp, pgoff, len);
>>      if (IS_ERR(ptr))
>>          return -ENOMEM;
>>
>> -    info.flags = VM_UNMAPPED_AREA_TOPDOWN;
>> -    info.length = len;
>> -    info.low_limit = max(PAGE_SIZE, mmap_min_addr);
>> -    info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
>> -#ifdef SHM_COLOUR
>> -    info.align_mask = PAGE_MASK & (SHM_COLOUR - 1UL);
>> -#else
>> -    info.align_mask = PAGE_MASK & (SHMLBA - 1UL);
>> -#endif
>> -    info.align_offset = (unsigned long) ptr;
>> -
>> -    /*
>> -     * A failed mmap() very likely causes application failure,
>> -     * so fall back to the bottom-up function here. This scenario
>> -     * can happen with large stack limits and large mmap()
>> -     * allocations.
>> -     */
>> -    addr = vm_unmapped_area(&info);
>> -    if (offset_in_page(addr)) {
>> -        info.flags = 0;
>> -        info.low_limit = TASK_UNMAPPED_BASE;
>> -        info.high_limit = mmap_end;
>> -        addr = vm_unmapped_area(&info);
>> -    }
>> -
>> -    return addr;
>> +    return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
>>  }
>>
>>  #else /* !CONFIG_MMU */
>
> This seems really close.  It worked for the trivial test case, so I ran the test suite from https://github.com/axboe/liburing to compare.  With kernel 6.3, I get 100% pass, after I get one failure:
> Running test read-write.t cqe->res=33, expected=32
> test_rem_buf_single(BUFFERS + 1) failed
> Not root, skipping test_write_efbig
> Test read-write.t failed with ret 1
>
> Trying this patch out on other arches to see if it also affects them or is ia64-specific.

I'm sorry, but this patch does break parisc heavily...

I'll need to think more...

Helge




  reply	other threads:[~2023-07-16 20:54 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-14 17:16 [PATCHSET 0/5] User mapped provided buffer rings Jens Axboe
2023-03-14 17:16 ` [PATCH 1/5] io_uring: Adjust mapping wrt architecture aliasing requirements Jens Axboe
2023-07-12  4:43   ` matoro
2023-07-12 16:24     ` Helge Deller
2023-07-12 17:28       ` matoro
2023-07-12 19:05         ` Helge Deller
2023-07-12 20:30           ` Helge Deller
2023-07-13  0:35             ` matoro
2023-07-13  7:27               ` Helge Deller
2023-07-13 23:57                 ` matoro
2023-07-16  6:54                   ` Helge Deller
2023-07-16 18:03                     ` matoro
2023-07-16 20:54                       ` Helge Deller [this message]
2023-03-14 17:16 ` [PATCH 2/5] io_uring/kbuf: move pinning of provided buffer ring into helper Jens Axboe
2023-03-14 17:16 ` [PATCH 3/5] io_uring/kbuf: add buffer_list->is_mapped member Jens Axboe
2023-03-14 17:16 ` [PATCH 4/5] io_uring/kbuf: rename struct io_uring_buf_reg 'pad' to'flags' Jens Axboe
2023-03-14 17:16 ` [PATCH 5/5] io_uring: add support for user mapped provided buffer ring Jens Axboe
2023-03-16 18:07   ` Ammar Faizi
2023-03-16 18:42     ` Jens Axboe
2023-03-15 20:03 ` [PATCHSET 0/5] User mapped provided buffer rings Helge Deller
2023-03-15 20:07   ` Helge Deller
2023-03-15 20:38     ` Jens Axboe
2023-03-15 21:04       ` John David Anglin
2023-03-15 21:08         ` Jens Axboe
2023-03-15 21:18       ` Jens Axboe
2023-03-16 10:18         ` Helge Deller
2023-03-16 17:00           ` Jens Axboe
2023-03-16 19:08         ` John David Anglin
2023-03-16 19:46           ` Jens Axboe
2023-03-17  2:09             ` Jens Axboe
2023-03-17  2:17               ` Jens Axboe
2023-03-17 15:36                 ` John David Anglin
2023-03-17 15:57                   ` Jens Axboe
2023-03-17 16:15                     ` John David Anglin
2023-03-17 16:37                       ` Jens Axboe
2023-03-15 20:11   ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7d3fb4b8-a7e6-8a28-0558-75c1c5a0518d@gmx.de \
    --to=deller@gmx.de \
    --cc=axboe@kernel.dk \
    --cc=glaubitz@physik.fu-berlin.de \
    --cc=io-uring@vger.kernel.org \
    --cc=linux-ia64@vger.kernel.org \
    --cc=matoro_mailinglist_kernel@matoro.tk \
    --cc=sam@gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).