All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] Add preadv and pwritev system calls.
@ 2008-12-12 14:00 Gerd Hoffmann
  2008-12-12 15:29 ` Matthew Wilcox
                   ` (3 more replies)
  0 siblings, 4 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 14:00 UTC (permalink / raw)
  To: linux-kernel, linux-arch; +Cc: Gerd Hoffmann

This patch adds preadv and pwritev system calls.  These syscalls are a
pretty straightforward combination of pread and readv (same for write).
They are quite useful for doing vectored I/O in threaded applications.
Using lseek+readv instead opens race windows you'll have to plug with
locking.

Other systems have such system calls too, for example NetBSD, check
here: http://www.daemon-systems.org/man/preadv.2.html

The patch sports the actual system call implementation and the windup in
the x86 system call tables.  Other archs are TBD.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 arch/x86/ia32/ia32entry.S          |    2 +
 arch/x86/include/asm/unistd_32.h   |    2 +
 arch/x86/include/asm/unistd_64.h   |    4 ++
 arch/x86/kernel/syscall_table_32.S |    2 +
 fs/compat.c                        |   61 ++++++++++++++++++++++++++++++++++++
 fs/read_write.c                    |   48 ++++++++++++++++++++++++++++
 6 files changed, 119 insertions(+), 0 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 256b00b..9a8501b 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -826,4 +826,6 @@ ia32_sys_call_table:
 	.quad sys_dup3			/* 330 */
 	.quad sys_pipe2
 	.quad sys_inotify_init1
+	.quad compat_sys_preadv
+	.quad compat_sys_pwritev
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index f2bba78..6e72d74 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -338,6 +338,8 @@
 #define __NR_dup3		330
 #define __NR_pipe2		331
 #define __NR_inotify_init1	332
+#define __NR_preadv		333
+#define __NR_pwritev		334
 
 #ifdef __KERNEL__
 
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index d2e415e..f818294 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -653,6 +653,10 @@ __SYSCALL(__NR_dup3, sys_dup3)
 __SYSCALL(__NR_pipe2, sys_pipe2)
 #define __NR_inotify_init1			294
 __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
+#define __NR_preadv				295
+__SYSCALL(__NR_preadv, sys_preadv)
+#define __NR_pwritev				296
+__SYSCALL(__NR_pwritev, sys_pwritev)
 
 
 #ifndef __NO_STUBS
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d44395f..a1a5506 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -332,3 +332,5 @@ ENTRY(sys_call_table)
 	.long sys_dup3			/* 330 */
 	.long sys_pipe2
 	.long sys_inotify_init1
+	.long sys_preadv
+	.long sys_pwritev
diff --git a/fs/compat.c b/fs/compat.c
index e5f49f5..3a25cf3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1214,6 +1214,67 @@ out:
 	return ret;
 }
 
+asmlinkage ssize_t
+compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
+                  unsigned long vlen, loff_t pos)
+{
+	struct file *file;
+	ssize_t ret = -EBADF;
+
+	if (pos < 0)
+		return -EINVAL;
+
+	file = fget(fd);
+	if (!file)
+		return -EBADF;
+
+	if (!(file->f_mode & FMODE_READ))
+		goto out;
+
+	ret = -EINVAL;
+	if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
+		goto out;
+
+	ret = compat_do_readv_writev(READ, file, vec, vlen, &pos);
+
+out:
+	if (ret > 0)
+		add_rchar(current, ret);
+	inc_syscr(current);
+	fput(file);
+	return ret;
+}
+
+asmlinkage ssize_t
+compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
+                   unsigned long vlen, loff_t pos)
+{
+	struct file *file;
+	ssize_t ret = -EBADF;
+
+	if (pos < 0)
+		return -EINVAL;
+
+	file = fget(fd);
+	if (!file)
+		return -EBADF;
+	if (!(file->f_mode & FMODE_WRITE))
+		goto out;
+
+	ret = -EINVAL;
+	if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
+		goto out;
+
+	ret = compat_do_readv_writev(WRITE, file, vec, vlen, &pos);
+
+out:
+	if (ret > 0)
+		add_wchar(current, ret);
+	inc_syscw(current);
+	fput(file);
+	return ret;
+}
+
 asmlinkage long
 compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
 		    unsigned int nr_segs, unsigned int flags)
diff --git a/fs/read_write.c b/fs/read_write.c
index 969a6d9..89f273d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -701,6 +701,54 @@ sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
 	return ret;
 }
 
+asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
+                              unsigned long vlen, loff_t pos)
+{
+	struct file *file;
+	ssize_t ret = -EBADF;
+	int fput_needed;
+
+	if (pos < 0)
+		return -EINVAL;
+
+	file = fget_light(fd, &fput_needed);
+	if (file) {
+		ret = -ESPIPE;
+		if (file->f_mode & FMODE_PREAD)
+			ret = vfs_readv(file, vec, vlen, &pos);
+		fput_light(file, fput_needed);
+	}
+
+	if (ret > 0)
+		add_rchar(current, ret);
+	inc_syscr(current);
+	return ret;
+}
+
+asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
+                               unsigned long vlen, loff_t pos)
+{
+	struct file *file;
+	ssize_t ret = -EBADF;
+	int fput_needed;
+
+	if (pos < 0)
+		return -EINVAL;
+
+	file = fget_light(fd, &fput_needed);
+	if (file) {
+		ret = -ESPIPE;
+		if (file->f_mode & FMODE_PWRITE)
+			ret = vfs_writev(file, vec, vlen, &pos);
+		fput_light(file, fput_needed);
+	}
+
+	if (ret > 0)
+		add_wchar(current, ret);
+	inc_syscw(current);
+	return ret;
+}
+
 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 			   size_t count, loff_t max)
 {
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 14:00 [PATCH v2] Add preadv and pwritev system calls Gerd Hoffmann
@ 2008-12-12 15:29 ` Matthew Wilcox
  2008-12-12 15:48   ` Gerd Hoffmann
                     ` (3 more replies)
  2008-12-12 15:40 ` Ralf Baechle
                   ` (2 subsequent siblings)
  3 siblings, 4 replies; 29+ messages in thread
From: Matthew Wilcox @ 2008-12-12 15:29 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: linux-kernel, linux-arch

On Fri, Dec 12, 2008 at 03:00:40PM +0100, Gerd Hoffmann wrote:
> The patch sports the actual system call implementation and the windup in
> the x86 system call tables.  Other archs are TBD.

> +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> +                              unsigned long vlen, loff_t pos)
> +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
> +                               unsigned long vlen, loff_t pos)

Are these prototypes required?  MIPS and PARISC will need wrappers to
fix them if they are.  These two architectures have an ABI which
requires 64-bit arguments to be passed in aligned pairs of registers,
but glibc doesn't know that (and given the existence of syscall(3),
can't do much about it even if it knew), so some of the arguments end up
in the wrong registers.

Things will go much better if we can prototype these as:

asmlinkage ssize_t sys_preadv(unsigned int fd, const struct iovec __user *vec,
				loff_t pos, unsigned long vlen);
asmlinkage ssize_t sys_pwritev(unsigned int fd, const struct iovec __user *vec,
				loff_t pos, unsigned long vlen);

That way 'pos' ends up split between arg2 and arg3 and vlen ends up in
arg4 instead of having vlen in arg2 and pos in arg3 and arg4 which then
have to be munged to be in arg4 and arg5 by a compat wrapper.

I seem to recall the s390 folks having some concerns with this kind of
thing too, but I forget what they are, so I'll let them weigh in on
this.

By the way, why did you make 'fd' an unsigned long?  The rest of the
kernel uses unsigned int.

-- 
Matthew Wilcox				Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 14:00 [PATCH v2] Add preadv and pwritev system calls Gerd Hoffmann
  2008-12-12 15:29 ` Matthew Wilcox
@ 2008-12-12 15:40 ` Ralf Baechle
  2008-12-12 16:59 ` Russell King
  2008-12-13  1:18 ` Michael Kerrisk
  3 siblings, 0 replies; 29+ messages in thread
From: Ralf Baechle @ 2008-12-12 15:40 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: linux-kernel, linux-arch

On Fri, Dec 12, 2008 at 03:00:40PM +0100, Gerd Hoffmann wrote:

> This patch adds preadv and pwritev system calls.  These syscalls are a
> pretty straightforward combination of pread and readv (same for write).
> They are quite useful for doing vectored I/O in threaded applications.
> Using lseek+readv instead opens race windows you'll have to plug with
> locking.
> 
> Other systems have such system calls too, for example NetBSD, check
> here: http://www.daemon-systems.org/man/preadv.2.html
> 
> The patch sports the actual system call implementation and the windup in
> the x86 system call tables.  Other archs are TBD.

> +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> +                              unsigned long vlen, loff_t pos)

> +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
> +                               unsigned long vlen, loff_t pos)

As so often before the devil is in the function prototype.  On some
architectures - 32-bit MIPS and PARISC, maybe others - 64-bit arguments
such as loff_t need to be passed in an _aligned_ pair of 32-bit
arguments which effectivly requires another wrapper like this around
your compat wrapper:

asmlinkage int sys32_preadv(unsigned long fd,
       const struct compat_iovec __user *vec,
       unsigned long vlen, int dummy, unsigned a5, unsigned a6)
{
	return compat_sys_preadv(fd, vec, vlen, merge_64(a5, a6));
}

merge_64() takes two 32-bit halves of a 64-bit argument and combines them
into a 64-bit argument again.

I wonder, does that merging happen magically on x86 or?

  Ralf

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 15:29 ` Matthew Wilcox
@ 2008-12-12 15:48   ` Gerd Hoffmann
  2008-12-12 15:51     ` Matthew Wilcox
  2008-12-12 19:47   ` Arnd Bergmann
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 15:48 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-kernel, linux-arch, Ulrich Drepper

Matthew Wilcox wrote:
>> +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
>> +                              unsigned long vlen, loff_t pos)
>> +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
>> +                               unsigned long vlen, loff_t pos)
> 
> Are these prototypes required?  MIPS and PARISC will need wrappers to
> fix them if they are.  These two architectures have an ABI which
> requires 64-bit arguments to be passed in aligned pairs of registers,
> but glibc doesn't know that (and given the existence of syscall(3),
> can't do much about it even if it knew), so some of the arguments end up
> in the wrong registers.
>
> Things will go much better if we can prototype these as:
>
> asmlinkage ssize_t sys_preadv(unsigned int fd,
>                               const struct iovec __user *vec,
> 				loff_t pos, unsigned long vlen);
> asmlinkage ssize_t sys_pwritev(unsigned int fd,
>                                const struct iovec __user *vec,
> 				 loff_t pos, unsigned long vlen);

Hmm.  It is the argument ordering used by NetBSD, thats why I used that
too.  It certainly should be the application-visible ordering.  We'll
have glibc between apps and kernel though, so I think we can reorder the
arguments at syscall level if that helps on these archs.  Cc'ing Ulrich
Drepper for comments on that.

> By the way, why did you make 'fd' an unsigned long?  The rest of the
> kernel uses unsigned int.

sys_{readv,writev} have unsigned long too, this is where I got it from.
 Don't know what the reason for this is, it looks a bit odd indeed.

cheers,
  Gerd


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 15:48   ` Gerd Hoffmann
@ 2008-12-12 15:51     ` Matthew Wilcox
  2008-12-12 16:02       ` Gerd Hoffmann
  0 siblings, 1 reply; 29+ messages in thread
From: Matthew Wilcox @ 2008-12-12 15:51 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: linux-kernel, linux-arch, Ulrich Drepper

On Fri, Dec 12, 2008 at 04:48:36PM +0100, Gerd Hoffmann wrote:
> Hmm.  It is the argument ordering used by NetBSD, thats why I used that
> too.  It certainly should be the application-visible ordering.  We'll
> have glibc between apps and kernel though, so I think we can reorder the
> arguments at syscall level if that helps on these archs.  Cc'ing Ulrich
> Drepper for comments on that.

On the other hand, NetBSD have approximately 0% market share.
We shouldn't let them lock us into making a bad decision.  Is there
anyone other than NetBSD who has added these syscalls?

-- 
Matthew Wilcox				Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 15:51     ` Matthew Wilcox
@ 2008-12-12 16:02       ` Gerd Hoffmann
  2008-12-12 17:03         ` Matthew Wilcox
                           ` (2 more replies)
  0 siblings, 3 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 16:02 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-kernel, linux-arch, Ulrich Drepper

Matthew Wilcox wrote:
> On the other hand, NetBSD have approximately 0% market share.
> We shouldn't let them lock us into making a bad decision.  Is there
> anyone other than NetBSD who has added these syscalls?

Free- and OpenBSD have it too.  For Solaris I've found a feature request
only.  Dunno about MacOS/Darwin.  Other un*xes which are important these
days?

I'd *really* hate it to have the same system call with different
argument ordering on different systems though.  Especially when swapping
two integer values, so gcc wouldn't error out on wrong usage.

cheers,
  Gerd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 14:00 [PATCH v2] Add preadv and pwritev system calls Gerd Hoffmann
  2008-12-12 15:29 ` Matthew Wilcox
  2008-12-12 15:40 ` Ralf Baechle
@ 2008-12-12 16:59 ` Russell King
  2008-12-13  1:18 ` Michael Kerrisk
  3 siblings, 0 replies; 29+ messages in thread
From: Russell King @ 2008-12-12 16:59 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: linux-kernel, linux-arch

On Fri, Dec 12, 2008 at 03:00:40PM +0100, Gerd Hoffmann wrote:
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 969a6d9..89f273d 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -701,6 +701,54 @@ sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
>  	return ret;
>  }
>  
> +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> +                              unsigned long vlen, loff_t pos)

For ARM, I'd prefer this to be:

	unsigned long fd, const struct iovec __user *vec,
		loff_t pos, unsigned long vlen

to avoid any variance in ABIness which will occur with the argument
layout as it currently stands, but that creates a silly argument
order.  The other option would be for us to define our own version
in an ARM ABI independent manner:

ssize_t sys_arm_preadv(unsigned long fd, const struct iovec __user *vec,
	unsigned long vlen, unsigned long low_pos, unsigned long high_pos)

-- 
Russell King
 Linux kernel    2.6 ARM Linux   - http://www.arm.linux.org.uk/
 maintainer of:

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 16:02       ` Gerd Hoffmann
@ 2008-12-12 17:03         ` Matthew Wilcox
  2008-12-12 18:21           ` Alan Cox
  2008-12-12 18:29         ` Scott Lurndal
  2008-12-15 16:37         ` Jennifer Pioch
  2 siblings, 1 reply; 29+ messages in thread
From: Matthew Wilcox @ 2008-12-12 17:03 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: linux-kernel, linux-arch, Ulrich Drepper

On Fri, Dec 12, 2008 at 05:02:05PM +0100, Gerd Hoffmann wrote:
> I'd *really* hate it to have the same system call with different
> argument ordering on different systems though.  Especially when swapping
> two integer values, so gcc wouldn't error out on wrong usage.

We can always permute it further:

int fd, int vlen, loff_t pos, const struct *

-- 
Matthew Wilcox				Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 17:03         ` Matthew Wilcox
@ 2008-12-12 18:21           ` Alan Cox
  2008-12-12 19:02             ` Russell King
  0 siblings, 1 reply; 29+ messages in thread
From: Alan Cox @ 2008-12-12 18:21 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Gerd Hoffmann, linux-kernel, linux-arch, Ulrich Drepper

On Fri, 12 Dec 2008 10:03:47 -0700
Matthew Wilcox <matthew@wil.cx> wrote:

> On Fri, Dec 12, 2008 at 05:02:05PM +0100, Gerd Hoffmann wrote:
> > I'd *really* hate it to have the same system call with different
> > argument ordering on different systems though.  Especially when swapping
> > two integer values, so gcc wouldn't error out on wrong usage.
> 
> We can always permute it further:
> 
> int fd, int vlen, loff_t pos, const struct *

Or you could add cobol calling syntax or pass the arguments in XML
format ?

Any particular reason you want to make things hell for programmers and
the standard people. Follow the BSD one at least to user space. Anything
else will just lead to pain and suffering later on the standardisation
front.

Alan

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 16:02       ` Gerd Hoffmann
  2008-12-12 17:03         ` Matthew Wilcox
@ 2008-12-12 18:29         ` Scott Lurndal
  2008-12-12 19:07           ` Russell King
  2008-12-15 16:37         ` Jennifer Pioch
  2 siblings, 1 reply; 29+ messages in thread
From: Scott Lurndal @ 2008-12-12 18:29 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Matthew Wilcox, linux-kernel, linux-arch, Ulrich Drepper

On Fri, Dec 12, 2008 at 05:02:05PM +0100, Gerd Hoffmann wrote:
> Matthew Wilcox wrote:
> > On the other hand, NetBSD have approximately 0% market share.
> > We shouldn't let them lock us into making a bad decision.  Is there
> > anyone other than NetBSD who has added these syscalls?
> 
> Free- and OpenBSD have it too.  For Solaris I've found a feature request
> only.  Dunno about MacOS/Darwin.  Other un*xes which are important these
> days?
> 
> I'd *really* hate it to have the same system call with different
> argument ordering on different systems though.  Especially when swapping
> two integer values, so gcc wouldn't error out on wrong usage.

I would suggest that from the end-users perspective, the user-mode API
should be similar to pread/pwrite, e.g:

    int preadv(fd, iovec, iovec_size, offset)

scott

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 18:21           ` Alan Cox
@ 2008-12-12 19:02             ` Russell King
  0 siblings, 0 replies; 29+ messages in thread
From: Russell King @ 2008-12-12 19:02 UTC (permalink / raw)
  To: Alan Cox
  Cc: Matthew Wilcox, Gerd Hoffmann, linux-kernel, linux-arch, Ulrich Drepper

On Fri, Dec 12, 2008 at 06:21:42PM +0000, Alan Cox wrote:
> On Fri, 12 Dec 2008 10:03:47 -0700
> Matthew Wilcox <matthew@wil.cx> wrote:
> 
> > On Fri, Dec 12, 2008 at 05:02:05PM +0100, Gerd Hoffmann wrote:
> > > I'd *really* hate it to have the same system call with different
> > > argument ordering on different systems though.  Especially when swapping
> > > two integer values, so gcc wouldn't error out on wrong usage.
> > 
> > We can always permute it further:
> > 
> > int fd, int vlen, loff_t pos, const struct *
> 
> Or you could add cobol calling syntax or pass the arguments in XML
> format ?
> 
> Any particular reason you want to make things hell for programmers and
> the standard people. Follow the BSD one at least to user space. Anything
> else will just lead to pain and suffering later on the standardisation
> front.

Yes - non-aligned 64-bit arguments in registers really really really
sucks.  On ARM, we will change the syscall argument ordering to avoid
having additional compatibility implementations for EABI vs OABI.

The same goes for similar ABIs with restrictions on 64-bit arguments.

There are causes were a syscall just can not be used on such ABIs.  Eg,

	int a1, unsigned long long a2, int a3, unsigned long long a4

is an impossible syscall argument order for ARM - we run out of registers
for the upper word of 'a4'.

-- 
Russell King
 Linux kernel    2.6 ARM Linux   - http://www.arm.linux.org.uk/
 maintainer of:

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 18:29         ` Scott Lurndal
@ 2008-12-12 19:07           ` Russell King
  2008-12-12 19:56               ` Gerd Hoffmann
  0 siblings, 1 reply; 29+ messages in thread
From: Russell King @ 2008-12-12 19:07 UTC (permalink / raw)
  To: Scott Lurndal
  Cc: Gerd Hoffmann, Matthew Wilcox, linux-kernel, linux-arch, Ulrich Drepper

On Fri, Dec 12, 2008 at 10:29:29AM -0800, Scott Lurndal wrote:
> On Fri, Dec 12, 2008 at 05:02:05PM +0100, Gerd Hoffmann wrote:
> > Matthew Wilcox wrote:
> > > On the other hand, NetBSD have approximately 0% market share.
> > > We shouldn't let them lock us into making a bad decision.  Is there
> > > anyone other than NetBSD who has added these syscalls?
> > 
> > Free- and OpenBSD have it too.  For Solaris I've found a feature request
> > only.  Dunno about MacOS/Darwin.  Other un*xes which are important these
> > days?
> > 
> > I'd *really* hate it to have the same system call with different
> > argument ordering on different systems though.  Especially when swapping
> > two integer values, so gcc wouldn't error out on wrong usage.
> 
> I would suggest that from the end-users perspective, the user-mode API
> should be similar to pread/pwrite, e.g:
> 
>     int preadv(fd, iovec, iovec_size, offset)

Yes, and that's easy for glibc to achieve.

What's hard is that the user <-> kernel API firstly has a limited number
of registers available to it for passing arguments without indirection
from user space into kernel space.

Secondly, the user <-> kernel argument register allocation can vary
depending on the ABI version which user space or kernel space is built
for.  On ARM we have two ABIs, one where 64-bit arguments can be placed
in any two consecutive registers, and one where 64-bit arguments must
be placed in an even,odd register pair (not an odd,even pair.)

That leads to the above being:

fd	r0	r0
iovec	r1	r1
vecsz	r2	r2
offset	r3,r4	r4,r5

Notice the different register allocation for the 64-bit offset.

This problem of register-aligned argument placement is not limited
to just ARM, but several other Linux supported architectures.

-- 
Russell King
 Linux kernel    2.6 ARM Linux   - http://www.arm.linux.org.uk/
 maintainer of:

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 15:29 ` Matthew Wilcox
  2008-12-12 15:48   ` Gerd Hoffmann
@ 2008-12-12 19:47   ` Arnd Bergmann
  2008-12-12 20:02     ` Gerd Hoffmann
  2008-12-14 11:49   ` Heiko Carstens
  2008-12-15  4:14   ` Paul Mackerras
  3 siblings, 1 reply; 29+ messages in thread
From: Arnd Bergmann @ 2008-12-12 19:47 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Gerd Hoffmann, linux-kernel, linux-arch

On Friday 12 December 2008, Matthew Wilcox wrote:
> Things will go much better if we can prototype these as:
> 
> asmlinkage ssize_t sys_preadv(unsigned int fd, const struct iovec __user *vec,
>                                 loff_t pos, unsigned long vlen);
> asmlinkage ssize_t sys_pwritev(unsigned int fd, const struct iovec __user *vec,
>                                 loff_t pos, unsigned long vlen);

I would vote for doing it the same way as sys_llseek, which avoids
this issue entirely by passing the upper half of pos sepearately:


asmlinkage ssize_t sys_preadv(unsigned long fd,
                               const struct iovec __user *vec,
                               unsigned long vlen,
			       unsigned long pos_high, unsigned long pos_low);
asmlinkage ssize_t sys_pwritev(unsigned long fd,
                               const struct iovec __user *vec,
                               unsigned long vlen,
			       unsigned long pos_high, unsigned long pos_low);

This is the only way I can see that lets us use a shared 
compat_sys_preadv/pwritev across all 64 bit architectures.

The libc can then add a trivial wrapper around the syscalls
to get the regular calling conventions.

Aside from that, have you considered doing something even more flexible,
like this?

struct piovec {
	void __user *iov_base;
	__kernel_size_t	iov_len;
	__kernel_loff_t pos;
};

asmlinkage ssize_t sys_pwritev(unsigned long fd,
                               const struct piovec __user *vec,
                               unsigned long vlen);

	Arnd <><

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 19:07           ` Russell King
@ 2008-12-12 19:56               ` Gerd Hoffmann
  0 siblings, 0 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 19:56 UTC (permalink / raw)
  To: Scott Lurndal, Gerd Hoffmann, Matthew Wilcox, linux-kernel,
	linux-arch, Ulrich Drepper

Russell King wrote:
>> should be similar to pread/pwrite, e.g:
>>
>>     int preadv(fd, iovec, iovec_size, offset)
> 
> Yes, and that's easy for glibc to achieve.

This hints the ABI problem exists at syscall level only.  Is that
correct?  So we can have

	preadv(fd, vec, vlen, off)

argument ordering at app <-> glibc level and

	preadv(fd, vec, off, vlen)

ordering at glibc <-> kernel (aka syscall) level and it works fine for
ARM + MIPS + PARISC?

thanks,
  Gerd



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
@ 2008-12-12 19:56               ` Gerd Hoffmann
  0 siblings, 0 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 19:56 UTC (permalink / raw)
  To: Scott Lurndal, Gerd Hoffmann, Matthew Wilcox, linux-kernel,
	linux-arch, Ulrich

Russell King wrote:
>> should be similar to pread/pwrite, e.g:
>>
>>     int preadv(fd, iovec, iovec_size, offset)
> 
> Yes, and that's easy for glibc to achieve.

This hints the ABI problem exists at syscall level only.  Is that
correct?  So we can have

	preadv(fd, vec, vlen, off)

argument ordering at app <-> glibc level and

	preadv(fd, vec, off, vlen)

ordering at glibc <-> kernel (aka syscall) level and it works fine for
ARM + MIPS + PARISC?

thanks,
  Gerd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 19:47   ` Arnd Bergmann
@ 2008-12-12 20:02     ` Gerd Hoffmann
  0 siblings, 0 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 20:02 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: Matthew Wilcox, linux-kernel, linux-arch

Arnd Bergmann wrote:
> Aside from that, have you considered doing something even more flexible,
> like this?
> 
> struct piovec {
> 	void __user *iov_base;
> 	__kernel_size_t	iov_len;
> 	__kernel_loff_t pos;
> };
> 
> asmlinkage ssize_t sys_pwritev(unsigned long fd,
>                                const struct piovec __user *vec,
>                                unsigned long vlen);

There is little point in doing so because I *really* want the
user-visible API being identical to the existing ones in the *BSD
family.  Anything else is just a PITA for the applications using this.

cheers,
  Gerd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 19:56               ` Gerd Hoffmann
  (?)
@ 2008-12-12 20:12               ` Russell King
  2008-12-12 20:39                   ` Gerd Hoffmann
  -1 siblings, 1 reply; 29+ messages in thread
From: Russell King @ 2008-12-12 20:12 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: Scott Lurndal, Matthew Wilcox, linux-kernel, linux-arch, Ulrich Drepper

On Fri, Dec 12, 2008 at 08:56:00PM +0100, Gerd Hoffmann wrote:
> Russell King wrote:
> >> should be similar to pread/pwrite, e.g:
> >>
> >>     int preadv(fd, iovec, iovec_size, offset)
> > 
> > Yes, and that's easy for glibc to achieve.
> 
> This hints the ABI problem exists at syscall level only.  Is that
> correct?  So we can have
> 
> 	preadv(fd, vec, vlen, off)
> 
> argument ordering at app <-> glibc level and
> 
> 	preadv(fd, vec, off, vlen)
> 
> ordering at glibc <-> kernel (aka syscall) level and it works fine for
> ARM + MIPS + PARISC?

Fine for ARM - and yes, the user visible API should be changed from the
BSD standard.  I don't think anyone in this thread was suggesting that
the user visible argument ordering should be any different from the
original.

Having it in a different order from *BSD at the libc visible interface
is just crazy from the OS portability point of view.

-- 
Russell King
 Linux kernel    2.6 ARM Linux   - http://www.arm.linux.org.uk/
 maintainer of:

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 20:12               ` Russell King
@ 2008-12-12 20:39                   ` Gerd Hoffmann
  0 siblings, 0 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 20:39 UTC (permalink / raw)
  To: Gerd Hoffmann, Scott Lurndal, Matthew Wilcox, linux-kernel,
	linux-arch, Ulrich Drepper

Russell King wrote:
> On Fri, Dec 12, 2008 at 08:56:00PM +0100, Gerd Hoffmann wrote:
>> Russell King wrote:
>>>> should be similar to pread/pwrite, e.g:
>>>>
>>>>     int preadv(fd, iovec, iovec_size, offset)
>>> Yes, and that's easy for glibc to achieve.
>> This hints the ABI problem exists at syscall level only.  Is that
>> correct?  So we can have
>>
>> 	preadv(fd, vec, vlen, off)
>>
>> argument ordering at app <-> glibc level and
>>
>> 	preadv(fd, vec, off, vlen)
>>
>> ordering at glibc <-> kernel (aka syscall) level and it works fine for
>> ARM + MIPS + PARISC?
> 
> Fine for ARM

Great.  I'll happily switch the ordering then.  /me goes wait for acks
from the other archs and plans for a new patch revision early next week.

thanks,
  Gerd



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
@ 2008-12-12 20:39                   ` Gerd Hoffmann
  0 siblings, 0 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 20:39 UTC (permalink / raw)
  To: Gerd Hoffmann, Scott Lurndal, Matthew Wilcox, linux-kernel,
	linux-arch, Ulrich

Russell King wrote:
> On Fri, Dec 12, 2008 at 08:56:00PM +0100, Gerd Hoffmann wrote:
>> Russell King wrote:
>>>> should be similar to pread/pwrite, e.g:
>>>>
>>>>     int preadv(fd, iovec, iovec_size, offset)
>>> Yes, and that's easy for glibc to achieve.
>> This hints the ABI problem exists at syscall level only.  Is that
>> correct?  So we can have
>>
>> 	preadv(fd, vec, vlen, off)
>>
>> argument ordering at app <-> glibc level and
>>
>> 	preadv(fd, vec, off, vlen)
>>
>> ordering at glibc <-> kernel (aka syscall) level and it works fine for
>> ARM + MIPS + PARISC?
> 
> Fine for ARM

Great.  I'll happily switch the ordering then.  /me goes wait for acks
from the other archs and plans for a new patch revision early next week.

thanks,
  Gerd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 14:00 [PATCH v2] Add preadv and pwritev system calls Gerd Hoffmann
                   ` (2 preceding siblings ...)
  2008-12-12 16:59 ` Russell King
@ 2008-12-13  1:18 ` Michael Kerrisk
  3 siblings, 0 replies; 29+ messages in thread
From: Michael Kerrisk @ 2008-12-13  1:18 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: linux-kernel, linux-arch, Linux API

Gerd,

Please CC linux-api on patches that change the API!

Cheers,

Michael

On Fri, Dec 12, 2008 at 9:00 AM, Gerd Hoffmann <kraxel@redhat.com> wrote:
> This patch adds preadv and pwritev system calls.  These syscalls are a
> pretty straightforward combination of pread and readv (same for write).
> They are quite useful for doing vectored I/O in threaded applications.
> Using lseek+readv instead opens race windows you'll have to plug with
> locking.
>
> Other systems have such system calls too, for example NetBSD, check
> here: http://www.daemon-systems.org/man/preadv.2.html
>
> The patch sports the actual system call implementation and the windup in
> the x86 system call tables.  Other archs are TBD.
>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>  arch/x86/ia32/ia32entry.S          |    2 +
>  arch/x86/include/asm/unistd_32.h   |    2 +
>  arch/x86/include/asm/unistd_64.h   |    4 ++
>  arch/x86/kernel/syscall_table_32.S |    2 +
>  fs/compat.c                        |   61 ++++++++++++++++++++++++++++++++++++
>  fs/read_write.c                    |   48 ++++++++++++++++++++++++++++
>  6 files changed, 119 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
> index 256b00b..9a8501b 100644
> --- a/arch/x86/ia32/ia32entry.S
> +++ b/arch/x86/ia32/ia32entry.S
> @@ -826,4 +826,6 @@ ia32_sys_call_table:
>        .quad sys_dup3                  /* 330 */
>        .quad sys_pipe2
>        .quad sys_inotify_init1
> +       .quad compat_sys_preadv
> +       .quad compat_sys_pwritev
>  ia32_syscall_end:
> diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
> index f2bba78..6e72d74 100644
> --- a/arch/x86/include/asm/unistd_32.h
> +++ b/arch/x86/include/asm/unistd_32.h
> @@ -338,6 +338,8 @@
>  #define __NR_dup3              330
>  #define __NR_pipe2             331
>  #define __NR_inotify_init1     332
> +#define __NR_preadv            333
> +#define __NR_pwritev           334
>
>  #ifdef __KERNEL__
>
> diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
> index d2e415e..f818294 100644
> --- a/arch/x86/include/asm/unistd_64.h
> +++ b/arch/x86/include/asm/unistd_64.h
> @@ -653,6 +653,10 @@ __SYSCALL(__NR_dup3, sys_dup3)
>  __SYSCALL(__NR_pipe2, sys_pipe2)
>  #define __NR_inotify_init1                     294
>  __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
> +#define __NR_preadv                            295
> +__SYSCALL(__NR_preadv, sys_preadv)
> +#define __NR_pwritev                           296
> +__SYSCALL(__NR_pwritev, sys_pwritev)
>
>
>  #ifndef __NO_STUBS
> diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
> index d44395f..a1a5506 100644
> --- a/arch/x86/kernel/syscall_table_32.S
> +++ b/arch/x86/kernel/syscall_table_32.S
> @@ -332,3 +332,5 @@ ENTRY(sys_call_table)
>        .long sys_dup3                  /* 330 */
>        .long sys_pipe2
>        .long sys_inotify_init1
> +       .long sys_preadv
> +       .long sys_pwritev
> diff --git a/fs/compat.c b/fs/compat.c
> index e5f49f5..3a25cf3 100644
> --- a/fs/compat.c
> +++ b/fs/compat.c
> @@ -1214,6 +1214,67 @@ out:
>        return ret;
>  }
>
> +asmlinkage ssize_t
> +compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
> +                  unsigned long vlen, loff_t pos)
> +{
> +       struct file *file;
> +       ssize_t ret = -EBADF;
> +
> +       if (pos < 0)
> +               return -EINVAL;
> +
> +       file = fget(fd);
> +       if (!file)
> +               return -EBADF;
> +
> +       if (!(file->f_mode & FMODE_READ))
> +               goto out;
> +
> +       ret = -EINVAL;
> +       if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
> +               goto out;
> +
> +       ret = compat_do_readv_writev(READ, file, vec, vlen, &pos);
> +
> +out:
> +       if (ret > 0)
> +               add_rchar(current, ret);
> +       inc_syscr(current);
> +       fput(file);
> +       return ret;
> +}
> +
> +asmlinkage ssize_t
> +compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
> +                   unsigned long vlen, loff_t pos)
> +{
> +       struct file *file;
> +       ssize_t ret = -EBADF;
> +
> +       if (pos < 0)
> +               return -EINVAL;
> +
> +       file = fget(fd);
> +       if (!file)
> +               return -EBADF;
> +       if (!(file->f_mode & FMODE_WRITE))
> +               goto out;
> +
> +       ret = -EINVAL;
> +       if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
> +               goto out;
> +
> +       ret = compat_do_readv_writev(WRITE, file, vec, vlen, &pos);
> +
> +out:
> +       if (ret > 0)
> +               add_wchar(current, ret);
> +       inc_syscw(current);
> +       fput(file);
> +       return ret;
> +}
> +
>  asmlinkage long
>  compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
>                    unsigned int nr_segs, unsigned int flags)
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 969a6d9..89f273d 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -701,6 +701,54 @@ sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
>        return ret;
>  }
>
> +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> +                              unsigned long vlen, loff_t pos)
> +{
> +       struct file *file;
> +       ssize_t ret = -EBADF;
> +       int fput_needed;
> +
> +       if (pos < 0)
> +               return -EINVAL;
> +
> +       file = fget_light(fd, &fput_needed);
> +       if (file) {
> +               ret = -ESPIPE;
> +               if (file->f_mode & FMODE_PREAD)
> +                       ret = vfs_readv(file, vec, vlen, &pos);
> +               fput_light(file, fput_needed);
> +       }
> +
> +       if (ret > 0)
> +               add_rchar(current, ret);
> +       inc_syscr(current);
> +       return ret;
> +}
> +
> +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
> +                               unsigned long vlen, loff_t pos)
> +{
> +       struct file *file;
> +       ssize_t ret = -EBADF;
> +       int fput_needed;
> +
> +       if (pos < 0)
> +               return -EINVAL;
> +
> +       file = fget_light(fd, &fput_needed);
> +       if (file) {
> +               ret = -ESPIPE;
> +               if (file->f_mode & FMODE_PWRITE)
> +                       ret = vfs_writev(file, vec, vlen, &pos);
> +               fput_light(file, fput_needed);
> +       }
> +
> +       if (ret > 0)
> +               add_wchar(current, ret);
> +       inc_syscw(current);
> +       return ret;
> +}
> +
>  static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
>                           size_t count, loff_t max)
>  {
> --
> 1.5.6.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-arch" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
git://git.kernel.org/pub/scm/docs/man-pages/man-pages.git
man-pages online: http://www.kernel.org/doc/man-pages/online_pages.html
Found a bug? http://www.kernel.org/doc/man-pages/reporting_bugs.html

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 15:29 ` Matthew Wilcox
  2008-12-12 15:48   ` Gerd Hoffmann
  2008-12-12 19:47   ` Arnd Bergmann
@ 2008-12-14 11:49   ` Heiko Carstens
  2008-12-15  4:14   ` Paul Mackerras
  3 siblings, 0 replies; 29+ messages in thread
From: Heiko Carstens @ 2008-12-14 11:49 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Gerd Hoffmann, linux-kernel, linux-arch

On Fri, Dec 12, 2008 at 08:29:29AM -0700, Matthew Wilcox wrote:
> On Fri, Dec 12, 2008 at 03:00:40PM +0100, Gerd Hoffmann wrote:
> > The patch sports the actual system call implementation and the windup in
> > the x86 system call tables.  Other archs are TBD.
> 
> > +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> > +                              unsigned long vlen, loff_t pos)
> > +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
> > +                               unsigned long vlen, loff_t pos)
> 
> Are these prototypes required?  MIPS and PARISC will need wrappers to
> fix them if they are.  These two architectures have an ABI which
> requires 64-bit arguments to be passed in aligned pairs of registers,
> but glibc doesn't know that (and given the existence of syscall(3),
> can't do much about it even if it knew), so some of the arguments end up
> in the wrong registers.
> 
> Things will go much better if we can prototype these as:
> 
> asmlinkage ssize_t sys_preadv(unsigned int fd, const struct iovec __user *vec,
> 				loff_t pos, unsigned long vlen);
> asmlinkage ssize_t sys_pwritev(unsigned int fd, const struct iovec __user *vec,
> 				loff_t pos, unsigned long vlen);
> 
> That way 'pos' ends up split between arg2 and arg3 and vlen ends up in
> arg4 instead of having vlen in arg2 and pos in arg3 and arg4 which then
> have to be munged to be in arg4 and arg5 by a compat wrapper.
> 
> I seem to recall the s390 folks having some concerns with this kind of
> thing too, but I forget what they are, so I'll let them weigh in on
> this.

I think a lot of stuff that needs to be known for new system calls was
written up with these two postings:

http://marc.info/?l=linux-arch&m=118277150812137&w=2
http://lkml.org/lkml/2007/8/1/354

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 19:56               ` Gerd Hoffmann
  (?)
  (?)
@ 2008-12-14 18:19               ` Pavel Machek
  -1 siblings, 0 replies; 29+ messages in thread
From: Pavel Machek @ 2008-12-14 18:19 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: Scott Lurndal, Matthew Wilcox, linux-kernel, linux-arch, Ulrich Drepper

On Fri 2008-12-12 20:56:00, Gerd Hoffmann wrote:
> Russell King wrote:
> >> should be similar to pread/pwrite, e.g:
> >>
> >>     int preadv(fd, iovec, iovec_size, offset)
> > 
> > Yes, and that's easy for glibc to achieve.
> 
> This hints the ABI problem exists at syscall level only.  Is that
> correct?  So we can have
> 
> 	preadv(fd, vec, vlen, off)
> 
> argument ordering at app <-> glibc level and
> 
> 	preadv(fd, vec, off, vlen)
> 
> ordering at glibc <-> kernel (aka syscall) level and it works fine for
> ARM + MIPS + PARISC?

ltrace and strace would show different values; very misleading :-(.

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 15:29 ` Matthew Wilcox
                     ` (2 preceding siblings ...)
  2008-12-14 11:49   ` Heiko Carstens
@ 2008-12-15  4:14   ` Paul Mackerras
  2008-12-15  6:20     ` David Miller
  3 siblings, 1 reply; 29+ messages in thread
From: Paul Mackerras @ 2008-12-15  4:14 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: Gerd Hoffmann, linux-kernel, linux-arch

Matthew Wilcox writes:

> On Fri, Dec 12, 2008 at 03:00:40PM +0100, Gerd Hoffmann wrote:
> > The patch sports the actual system call implementation and the windup in
> > the x86 system call tables.  Other archs are TBD.
> 
> > +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> > +                              unsigned long vlen, loff_t pos)
> > +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
> > +                               unsigned long vlen, loff_t pos)
> 
> Are these prototypes required?  MIPS and PARISC will need wrappers to
> fix them if they are.  These two architectures have an ABI which
> requires 64-bit arguments to be passed in aligned pairs of registers,

As does 32-bit PowerPC, so I also would prefer the alternate argument
order for the syscall (pos as the 3rd argument).

Paul.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-15  4:14   ` Paul Mackerras
@ 2008-12-15  6:20     ` David Miller
  0 siblings, 0 replies; 29+ messages in thread
From: David Miller @ 2008-12-15  6:20 UTC (permalink / raw)
  To: paulus; +Cc: matthew, kraxel, linux-kernel, linux-arch

From: Paul Mackerras <paulus@samba.org>
Date: Mon, 15 Dec 2008 15:14:18 +1100

> Matthew Wilcox writes:
> 
> > On Fri, Dec 12, 2008 at 03:00:40PM +0100, Gerd Hoffmann wrote:
> > > The patch sports the actual system call implementation and the windup in
> > > the x86 system call tables.  Other archs are TBD.
> > 
> > > +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> > > +                              unsigned long vlen, loff_t pos)
> > > +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
> > > +                               unsigned long vlen, loff_t pos)
> > 
> > Are these prototypes required?  MIPS and PARISC will need wrappers to
> > fix them if they are.  These two architectures have an ABI which
> > requires 64-bit arguments to be passed in aligned pairs of registers,
> 
> As does 32-bit PowerPC, so I also would prefer the alternate argument
> order for the syscall (pos as the 3rd argument).

FWIW 32-bit sparc does not have the aligned register requirement
for 64-bit arguments.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-12 16:02       ` Gerd Hoffmann
  2008-12-12 17:03         ` Matthew Wilcox
  2008-12-12 18:29         ` Scott Lurndal
@ 2008-12-15 16:37         ` Jennifer Pioch
  2008-12-15 20:43           ` Gerd Hoffmann
  2 siblings, 1 reply; 29+ messages in thread
From: Jennifer Pioch @ 2008-12-15 16:37 UTC (permalink / raw)
  To: Gerd Hoffmann, opensolaris-code, opensolaris-rfe
  Cc: Matthew Wilcox, linux-kernel, linux-arch, Ulrich Drepper

On 12/12/08, Gerd Hoffmann <kraxel@redhat.com> wrote:
> Matthew Wilcox wrote:
>  > On the other hand, NetBSD have approximately 0% market share.
>  > We shouldn't let them lock us into making a bad decision.  Is there
>  > anyone other than NetBSD who has added these syscalls?
>
>
> Free- and OpenBSD have it too.  For Solaris I've found a feature request
>  only.  Dunno about MacOS/Darwin.  Other un*xes which are important these
>  days?

Do you know the ID of the feature request?

Jenny
-- 
Jennifer Pioch, Uni Frankfurt

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-15 16:37         ` Jennifer Pioch
@ 2008-12-15 20:43           ` Gerd Hoffmann
  2008-12-16  9:57             ` Arnd Bergmann
  0 siblings, 1 reply; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-15 20:43 UTC (permalink / raw)
  To: Jennifer Pioch
  Cc: opensolaris-code, opensolaris-rfe, Matthew Wilcox, linux-kernel,
	linux-arch, Ulrich Drepper

Jennifer Pioch wrote:
> On 12/12/08, Gerd Hoffmann <kraxel@redhat.com> wrote:
>> Free- and OpenBSD have it too.  For Solaris I've found a feature request
>>  only.  Dunno about MacOS/Darwin.  Other un*xes which are important these
>>  days?
> 
> Do you know the ID of the feature request?

#1167819 @ bugs.opensolaris.org

HTH,
  Gerd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
  2008-12-15 20:43           ` Gerd Hoffmann
@ 2008-12-16  9:57             ` Arnd Bergmann
  2008-12-17  1:45                 ` Dan Mick
  0 siblings, 1 reply; 29+ messages in thread
From: Arnd Bergmann @ 2008-12-16  9:57 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: Jennifer Pioch, opensolaris-code, opensolaris-rfe,
	Matthew Wilcox, linux-kernel, linux-arch, Ulrich Drepper

On Monday 15 December 2008, Gerd Hoffmann wrote:
> Jennifer Pioch wrote:
> > On 12/12/08, Gerd Hoffmann <kraxel@redhat.com> wrote:
> >> Free- and OpenBSD have it too.  For Solaris I've found a feature request
> >>  only.  Dunno about MacOS/Darwin.  Other un*xes which are important these
> >>  days?
> > 
> > Do you know the ID of the feature request?
> 
> #1167819 @ bugs.opensolaris.org
> 

Looks like they've been working on it for some time:
"Submit Date   25-MAY-1994"!

	Arnd <><

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [osol-code] [PATCH v2] Add preadv and pwritev system calls.
@ 2008-12-17  1:45                 ` Dan Mick
  0 siblings, 0 replies; 29+ messages in thread
From: Dan Mick @ 2008-12-17  1:45 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Gerd Hoffmann, linux-arch, Matthew Wilcox, opensolaris-code,
	linux-kernel, opensolaris-rfe, Ulrich Drepper

Arnd Bergmann wrote:
> On Monday 15 December 2008, Gerd Hoffmann wrote:
>> Jennifer Pioch wrote:
>>> On 12/12/08, Gerd Hoffmann <kraxel@redhat.com> wrote:
>>>> Free- and OpenBSD have it too.  For Solaris I've found a feature request
>>>>  only.  Dunno about MacOS/Darwin.  Other un*xes which are important these
>>>>  days?
>>> Do you know the ID of the feature request?
>> #1167819 @ bugs.opensolaris.org
>>
> 
> Looks like they've been working on it for some time:
> "Submit Date   25-MAY-1994"!
> 
> 	Arnd <><

No one's expended any serious time on it at all, it looks like, other than 
discussion.

I see the one comment says that it can be done with lio_listio(3RT) (apparently 
in LIO_WAIT mode), and that interface does seem to offer all the preadv/pwritev
functionality.  That's not to say that we shouldn't implement preadv/pwritev 
literally, just that there should be a way to get that sort of functionality today.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] Add preadv and pwritev system calls.
@ 2008-12-17  1:45                 ` Dan Mick
  0 siblings, 0 replies; 29+ messages in thread
From: Dan Mick @ 2008-12-17  1:45 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: linux-arch-u79uwXL29TY76Z2rM5mHXA, Matthew Wilcox,
	opensolaris-code-xZgeD5Kw2fzokhkdeNNY6A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	opensolaris-rfe-xZgeD5Kw2fzokhkdeNNY6A, Gerd Hoffmann,
	Ulrich Drepper

Arnd Bergmann wrote:
> On Monday 15 December 2008, Gerd Hoffmann wrote:
>> Jennifer Pioch wrote:
>>> On 12/12/08, Gerd Hoffmann <kraxel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
>>>> Free- and OpenBSD have it too.  For Solaris I've found a feature request
>>>>  only.  Dunno about MacOS/Darwin.  Other un*xes which are important these
>>>>  days?
>>> Do you know the ID of the feature request?
>> #1167819 @ bugs.opensolaris.org
>>
> 
> Looks like they've been working on it for some time:
> "Submit Date   25-MAY-1994"!
> 
> 	Arnd <><

No one's expended any serious time on it at all, it looks like, other than 
discussion.

I see the one comment says that it can be done with lio_listio(3RT) (apparently 
in LIO_WAIT mode), and that interface does seem to offer all the preadv/pwritev
functionality.  That's not to say that we shouldn't implement preadv/pwritev 
literally, just that there should be a way to get that sort of functionality today.

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2008-12-17  2:12 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-12-12 14:00 [PATCH v2] Add preadv and pwritev system calls Gerd Hoffmann
2008-12-12 15:29 ` Matthew Wilcox
2008-12-12 15:48   ` Gerd Hoffmann
2008-12-12 15:51     ` Matthew Wilcox
2008-12-12 16:02       ` Gerd Hoffmann
2008-12-12 17:03         ` Matthew Wilcox
2008-12-12 18:21           ` Alan Cox
2008-12-12 19:02             ` Russell King
2008-12-12 18:29         ` Scott Lurndal
2008-12-12 19:07           ` Russell King
2008-12-12 19:56             ` Gerd Hoffmann
2008-12-12 19:56               ` Gerd Hoffmann
2008-12-12 20:12               ` Russell King
2008-12-12 20:39                 ` Gerd Hoffmann
2008-12-12 20:39                   ` Gerd Hoffmann
2008-12-14 18:19               ` Pavel Machek
2008-12-15 16:37         ` Jennifer Pioch
2008-12-15 20:43           ` Gerd Hoffmann
2008-12-16  9:57             ` Arnd Bergmann
2008-12-17  1:45               ` [osol-code] " Dan Mick
2008-12-17  1:45                 ` Dan Mick
2008-12-12 19:47   ` Arnd Bergmann
2008-12-12 20:02     ` Gerd Hoffmann
2008-12-14 11:49   ` Heiko Carstens
2008-12-15  4:14   ` Paul Mackerras
2008-12-15  6:20     ` David Miller
2008-12-12 15:40 ` Ralf Baechle
2008-12-12 16:59 ` Russell King
2008-12-13  1:18 ` Michael Kerrisk

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.