* [PATCH v2] Add preadv and pwritev system calls.
@ 2008-12-12 14:00 Gerd Hoffmann
2008-12-12 15:29 ` Matthew Wilcox
` (3 more replies)
0 siblings, 4 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 14:00 UTC (permalink / raw)
To: linux-kernel, linux-arch; +Cc: Gerd Hoffmann
This patch adds preadv and pwritev system calls. These syscalls are a
pretty straightforward combination of pread and readv (same for write).
They are quite useful for doing vectored I/O in threaded applications.
Using lseek+readv instead opens race windows you'll have to plug with
locking.
Other systems have such system calls too, for example NetBSD, check
here: http://www.daemon-systems.org/man/preadv.2.html
The patch sports the actual system call implementation and the windup in
the x86 system call tables. Other archs are TBD.
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
arch/x86/ia32/ia32entry.S | 2 +
arch/x86/include/asm/unistd_32.h | 2 +
arch/x86/include/asm/unistd_64.h | 4 ++
arch/x86/kernel/syscall_table_32.S | 2 +
fs/compat.c | 61 ++++++++++++++++++++++++++++++++++++
fs/read_write.c | 48 ++++++++++++++++++++++++++++
6 files changed, 119 insertions(+), 0 deletions(-)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 256b00b..9a8501b 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -826,4 +826,6 @@ ia32_sys_call_table:
.quad sys_dup3 /* 330 */
.quad sys_pipe2
.quad sys_inotify_init1
+ .quad compat_sys_preadv
+ .quad compat_sys_pwritev
ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index f2bba78..6e72d74 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -338,6 +338,8 @@
#define __NR_dup3 330
#define __NR_pipe2 331
#define __NR_inotify_init1 332
+#define __NR_preadv 333
+#define __NR_pwritev 334
#ifdef __KERNEL__
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index d2e415e..f818294 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -653,6 +653,10 @@ __SYSCALL(__NR_dup3, sys_dup3)
__SYSCALL(__NR_pipe2, sys_pipe2)
#define __NR_inotify_init1 294
__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
+#define __NR_preadv 295
+__SYSCALL(__NR_preadv, sys_preadv)
+#define __NR_pwritev 296
+__SYSCALL(__NR_pwritev, sys_pwritev)
#ifndef __NO_STUBS
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d44395f..a1a5506 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -332,3 +332,5 @@ ENTRY(sys_call_table)
.long sys_dup3 /* 330 */
.long sys_pipe2
.long sys_inotify_init1
+ .long sys_preadv
+ .long sys_pwritev
diff --git a/fs/compat.c b/fs/compat.c
index e5f49f5..3a25cf3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1214,6 +1214,67 @@ out:
return ret;
}
+asmlinkage ssize_t
+compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
+ unsigned long vlen, loff_t pos)
+{
+ struct file *file;
+ ssize_t ret = -EBADF;
+
+ if (pos < 0)
+ return -EINVAL;
+
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+
+ if (!(file->f_mode & FMODE_READ))
+ goto out;
+
+ ret = -EINVAL;
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
+ goto out;
+
+ ret = compat_do_readv_writev(READ, file, vec, vlen, &pos);
+
+out:
+ if (ret > 0)
+ add_rchar(current, ret);
+ inc_syscr(current);
+ fput(file);
+ return ret;
+}
+
+asmlinkage ssize_t
+compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
+ unsigned long vlen, loff_t pos)
+{
+ struct file *file;
+ ssize_t ret = -EBADF;
+
+ if (pos < 0)
+ return -EINVAL;
+
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+ if (!(file->f_mode & FMODE_WRITE))
+ goto out;
+
+ ret = -EINVAL;
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
+ goto out;
+
+ ret = compat_do_readv_writev(WRITE, file, vec, vlen, &pos);
+
+out:
+ if (ret > 0)
+ add_wchar(current, ret);
+ inc_syscw(current);
+ fput(file);
+ return ret;
+}
+
asmlinkage long
compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
unsigned int nr_segs, unsigned int flags)
diff --git a/fs/read_write.c b/fs/read_write.c
index 969a6d9..89f273d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -701,6 +701,54 @@ sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
return ret;
}
+asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, loff_t pos)
+{
+ struct file *file;
+ ssize_t ret = -EBADF;
+ int fput_needed;
+
+ if (pos < 0)
+ return -EINVAL;
+
+ file = fget_light(fd, &fput_needed);
+ if (file) {
+ ret = -ESPIPE;
+ if (file->f_mode & FMODE_PREAD)
+ ret = vfs_readv(file, vec, vlen, &pos);
+ fput_light(file, fput_needed);
+ }
+
+ if (ret > 0)
+ add_rchar(current, ret);
+ inc_syscr(current);
+ return ret;
+}
+
+asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, loff_t pos)
+{
+ struct file *file;
+ ssize_t ret = -EBADF;
+ int fput_needed;
+
+ if (pos < 0)
+ return -EINVAL;
+
+ file = fget_light(fd, &fput_needed);
+ if (file) {
+ ret = -ESPIPE;
+ if (file->f_mode & FMODE_PWRITE)
+ ret = vfs_writev(file, vec, vlen, &pos);
+ fput_light(file, fput_needed);
+ }
+
+ if (ret > 0)
+ add_wchar(current, ret);
+ inc_syscw(current);
+ return ret;
+}
+
static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
size_t count, loff_t max)
{
--
1.5.6.5
^ permalink raw reply related [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 14:00 [PATCH v2] Add preadv and pwritev system calls Gerd Hoffmann
@ 2008-12-12 15:29 ` Matthew Wilcox
2008-12-12 15:48 ` Gerd Hoffmann
` (3 more replies)
2008-12-12 15:40 ` Ralf Baechle
` (2 subsequent siblings)
3 siblings, 4 replies; 29+ messages in thread
From: Matthew Wilcox @ 2008-12-12 15:29 UTC (permalink / raw)
To: Gerd Hoffmann; +Cc: linux-kernel, linux-arch
On Fri, Dec 12, 2008 at 03:00:40PM +0100, Gerd Hoffmann wrote:
> The patch sports the actual system call implementation and the windup in
> the x86 system call tables. Other archs are TBD.
> +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> + unsigned long vlen, loff_t pos)
> +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
> + unsigned long vlen, loff_t pos)
Are these prototypes required? MIPS and PARISC will need wrappers to
fix them if they are. These two architectures have an ABI which
requires 64-bit arguments to be passed in aligned pairs of registers,
but glibc doesn't know that (and given the existence of syscall(3),
can't do much about it even if it knew), so some of the arguments end up
in the wrong registers.
Things will go much better if we can prototype these as:
asmlinkage ssize_t sys_preadv(unsigned int fd, const struct iovec __user *vec,
loff_t pos, unsigned long vlen);
asmlinkage ssize_t sys_pwritev(unsigned int fd, const struct iovec __user *vec,
loff_t pos, unsigned long vlen);
That way 'pos' ends up split between arg2 and arg3 and vlen ends up in
arg4 instead of having vlen in arg2 and pos in arg3 and arg4 which then
have to be munged to be in arg4 and arg5 by a compat wrapper.
I seem to recall the s390 folks having some concerns with this kind of
thing too, but I forget what they are, so I'll let them weigh in on
this.
By the way, why did you make 'fd' an unsigned long? The rest of the
kernel uses unsigned int.
--
Matthew Wilcox Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 14:00 [PATCH v2] Add preadv and pwritev system calls Gerd Hoffmann
2008-12-12 15:29 ` Matthew Wilcox
@ 2008-12-12 15:40 ` Ralf Baechle
2008-12-12 16:59 ` Russell King
2008-12-13 1:18 ` Michael Kerrisk
3 siblings, 0 replies; 29+ messages in thread
From: Ralf Baechle @ 2008-12-12 15:40 UTC (permalink / raw)
To: Gerd Hoffmann; +Cc: linux-kernel, linux-arch
On Fri, Dec 12, 2008 at 03:00:40PM +0100, Gerd Hoffmann wrote:
> This patch adds preadv and pwritev system calls. These syscalls are a
> pretty straightforward combination of pread and readv (same for write).
> They are quite useful for doing vectored I/O in threaded applications.
> Using lseek+readv instead opens race windows you'll have to plug with
> locking.
>
> Other systems have such system calls too, for example NetBSD, check
> here: http://www.daemon-systems.org/man/preadv.2.html
>
> The patch sports the actual system call implementation and the windup in
> the x86 system call tables. Other archs are TBD.
> +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> + unsigned long vlen, loff_t pos)
> +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
> + unsigned long vlen, loff_t pos)
As so often before the devil is in the function prototype. On some
architectures - 32-bit MIPS and PARISC, maybe others - 64-bit arguments
such as loff_t need to be passed in an _aligned_ pair of 32-bit
arguments which effectivly requires another wrapper like this around
your compat wrapper:
asmlinkage int sys32_preadv(unsigned long fd,
const struct compat_iovec __user *vec,
unsigned long vlen, int dummy, unsigned a5, unsigned a6)
{
return compat_sys_preadv(fd, vec, vlen, merge_64(a5, a6));
}
merge_64() takes two 32-bit halves of a 64-bit argument and combines them
into a 64-bit argument again.
I wonder, does that merging happen magically on x86 or?
Ralf
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 15:29 ` Matthew Wilcox
@ 2008-12-12 15:48 ` Gerd Hoffmann
2008-12-12 15:51 ` Matthew Wilcox
2008-12-12 19:47 ` Arnd Bergmann
` (2 subsequent siblings)
3 siblings, 1 reply; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 15:48 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: linux-kernel, linux-arch, Ulrich Drepper
Matthew Wilcox wrote:
>> +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
>> + unsigned long vlen, loff_t pos)
>> +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
>> + unsigned long vlen, loff_t pos)
>
> Are these prototypes required? MIPS and PARISC will need wrappers to
> fix them if they are. These two architectures have an ABI which
> requires 64-bit arguments to be passed in aligned pairs of registers,
> but glibc doesn't know that (and given the existence of syscall(3),
> can't do much about it even if it knew), so some of the arguments end up
> in the wrong registers.
>
> Things will go much better if we can prototype these as:
>
> asmlinkage ssize_t sys_preadv(unsigned int fd,
> const struct iovec __user *vec,
> loff_t pos, unsigned long vlen);
> asmlinkage ssize_t sys_pwritev(unsigned int fd,
> const struct iovec __user *vec,
> loff_t pos, unsigned long vlen);
Hmm. It is the argument ordering used by NetBSD, thats why I used that
too. It certainly should be the application-visible ordering. We'll
have glibc between apps and kernel though, so I think we can reorder the
arguments at syscall level if that helps on these archs. Cc'ing Ulrich
Drepper for comments on that.
> By the way, why did you make 'fd' an unsigned long? The rest of the
> kernel uses unsigned int.
sys_{readv,writev} have unsigned long too, this is where I got it from.
Don't know what the reason for this is, it looks a bit odd indeed.
cheers,
Gerd
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 15:48 ` Gerd Hoffmann
@ 2008-12-12 15:51 ` Matthew Wilcox
2008-12-12 16:02 ` Gerd Hoffmann
0 siblings, 1 reply; 29+ messages in thread
From: Matthew Wilcox @ 2008-12-12 15:51 UTC (permalink / raw)
To: Gerd Hoffmann; +Cc: linux-kernel, linux-arch, Ulrich Drepper
On Fri, Dec 12, 2008 at 04:48:36PM +0100, Gerd Hoffmann wrote:
> Hmm. It is the argument ordering used by NetBSD, thats why I used that
> too. It certainly should be the application-visible ordering. We'll
> have glibc between apps and kernel though, so I think we can reorder the
> arguments at syscall level if that helps on these archs. Cc'ing Ulrich
> Drepper for comments on that.
On the other hand, NetBSD have approximately 0% market share.
We shouldn't let them lock us into making a bad decision. Is there
anyone other than NetBSD who has added these syscalls?
--
Matthew Wilcox Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 15:51 ` Matthew Wilcox
@ 2008-12-12 16:02 ` Gerd Hoffmann
2008-12-12 17:03 ` Matthew Wilcox
` (2 more replies)
0 siblings, 3 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 16:02 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: linux-kernel, linux-arch, Ulrich Drepper
Matthew Wilcox wrote:
> On the other hand, NetBSD have approximately 0% market share.
> We shouldn't let them lock us into making a bad decision. Is there
> anyone other than NetBSD who has added these syscalls?
Free- and OpenBSD have it too. For Solaris I've found a feature request
only. Dunno about MacOS/Darwin. Other un*xes which are important these
days?
I'd *really* hate it to have the same system call with different
argument ordering on different systems though. Especially when swapping
two integer values, so gcc wouldn't error out on wrong usage.
cheers,
Gerd
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 14:00 [PATCH v2] Add preadv and pwritev system calls Gerd Hoffmann
2008-12-12 15:29 ` Matthew Wilcox
2008-12-12 15:40 ` Ralf Baechle
@ 2008-12-12 16:59 ` Russell King
2008-12-13 1:18 ` Michael Kerrisk
3 siblings, 0 replies; 29+ messages in thread
From: Russell King @ 2008-12-12 16:59 UTC (permalink / raw)
To: Gerd Hoffmann; +Cc: linux-kernel, linux-arch
On Fri, Dec 12, 2008 at 03:00:40PM +0100, Gerd Hoffmann wrote:
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 969a6d9..89f273d 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -701,6 +701,54 @@ sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
> return ret;
> }
>
> +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> + unsigned long vlen, loff_t pos)
For ARM, I'd prefer this to be:
unsigned long fd, const struct iovec __user *vec,
loff_t pos, unsigned long vlen
to avoid any variance in ABIness which will occur with the argument
layout as it currently stands, but that creates a silly argument
order. The other option would be for us to define our own version
in an ARM ABI independent manner:
ssize_t sys_arm_preadv(unsigned long fd, const struct iovec __user *vec,
unsigned long vlen, unsigned long low_pos, unsigned long high_pos)
--
Russell King
Linux kernel 2.6 ARM Linux - http://www.arm.linux.org.uk/
maintainer of:
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 16:02 ` Gerd Hoffmann
@ 2008-12-12 17:03 ` Matthew Wilcox
2008-12-12 18:21 ` Alan Cox
2008-12-12 18:29 ` Scott Lurndal
2008-12-15 16:37 ` Jennifer Pioch
2 siblings, 1 reply; 29+ messages in thread
From: Matthew Wilcox @ 2008-12-12 17:03 UTC (permalink / raw)
To: Gerd Hoffmann; +Cc: linux-kernel, linux-arch, Ulrich Drepper
On Fri, Dec 12, 2008 at 05:02:05PM +0100, Gerd Hoffmann wrote:
> I'd *really* hate it to have the same system call with different
> argument ordering on different systems though. Especially when swapping
> two integer values, so gcc wouldn't error out on wrong usage.
We can always permute it further:
int fd, int vlen, loff_t pos, const struct *
--
Matthew Wilcox Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 17:03 ` Matthew Wilcox
@ 2008-12-12 18:21 ` Alan Cox
2008-12-12 19:02 ` Russell King
0 siblings, 1 reply; 29+ messages in thread
From: Alan Cox @ 2008-12-12 18:21 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: Gerd Hoffmann, linux-kernel, linux-arch, Ulrich Drepper
On Fri, 12 Dec 2008 10:03:47 -0700
Matthew Wilcox <matthew@wil.cx> wrote:
> On Fri, Dec 12, 2008 at 05:02:05PM +0100, Gerd Hoffmann wrote:
> > I'd *really* hate it to have the same system call with different
> > argument ordering on different systems though. Especially when swapping
> > two integer values, so gcc wouldn't error out on wrong usage.
>
> We can always permute it further:
>
> int fd, int vlen, loff_t pos, const struct *
Or you could add cobol calling syntax or pass the arguments in XML
format ?
Any particular reason you want to make things hell for programmers and
the standard people. Follow the BSD one at least to user space. Anything
else will just lead to pain and suffering later on the standardisation
front.
Alan
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 16:02 ` Gerd Hoffmann
2008-12-12 17:03 ` Matthew Wilcox
@ 2008-12-12 18:29 ` Scott Lurndal
2008-12-12 19:07 ` Russell King
2008-12-15 16:37 ` Jennifer Pioch
2 siblings, 1 reply; 29+ messages in thread
From: Scott Lurndal @ 2008-12-12 18:29 UTC (permalink / raw)
To: Gerd Hoffmann; +Cc: Matthew Wilcox, linux-kernel, linux-arch, Ulrich Drepper
On Fri, Dec 12, 2008 at 05:02:05PM +0100, Gerd Hoffmann wrote:
> Matthew Wilcox wrote:
> > On the other hand, NetBSD have approximately 0% market share.
> > We shouldn't let them lock us into making a bad decision. Is there
> > anyone other than NetBSD who has added these syscalls?
>
> Free- and OpenBSD have it too. For Solaris I've found a feature request
> only. Dunno about MacOS/Darwin. Other un*xes which are important these
> days?
>
> I'd *really* hate it to have the same system call with different
> argument ordering on different systems though. Especially when swapping
> two integer values, so gcc wouldn't error out on wrong usage.
I would suggest that from the end-users perspective, the user-mode API
should be similar to pread/pwrite, e.g:
int preadv(fd, iovec, iovec_size, offset)
scott
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 18:21 ` Alan Cox
@ 2008-12-12 19:02 ` Russell King
0 siblings, 0 replies; 29+ messages in thread
From: Russell King @ 2008-12-12 19:02 UTC (permalink / raw)
To: Alan Cox
Cc: Matthew Wilcox, Gerd Hoffmann, linux-kernel, linux-arch, Ulrich Drepper
On Fri, Dec 12, 2008 at 06:21:42PM +0000, Alan Cox wrote:
> On Fri, 12 Dec 2008 10:03:47 -0700
> Matthew Wilcox <matthew@wil.cx> wrote:
>
> > On Fri, Dec 12, 2008 at 05:02:05PM +0100, Gerd Hoffmann wrote:
> > > I'd *really* hate it to have the same system call with different
> > > argument ordering on different systems though. Especially when swapping
> > > two integer values, so gcc wouldn't error out on wrong usage.
> >
> > We can always permute it further:
> >
> > int fd, int vlen, loff_t pos, const struct *
>
> Or you could add cobol calling syntax or pass the arguments in XML
> format ?
>
> Any particular reason you want to make things hell for programmers and
> the standard people. Follow the BSD one at least to user space. Anything
> else will just lead to pain and suffering later on the standardisation
> front.
Yes - non-aligned 64-bit arguments in registers really really really
sucks. On ARM, we will change the syscall argument ordering to avoid
having additional compatibility implementations for EABI vs OABI.
The same goes for similar ABIs with restrictions on 64-bit arguments.
There are causes were a syscall just can not be used on such ABIs. Eg,
int a1, unsigned long long a2, int a3, unsigned long long a4
is an impossible syscall argument order for ARM - we run out of registers
for the upper word of 'a4'.
--
Russell King
Linux kernel 2.6 ARM Linux - http://www.arm.linux.org.uk/
maintainer of:
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 18:29 ` Scott Lurndal
@ 2008-12-12 19:07 ` Russell King
2008-12-12 19:56 ` Gerd Hoffmann
0 siblings, 1 reply; 29+ messages in thread
From: Russell King @ 2008-12-12 19:07 UTC (permalink / raw)
To: Scott Lurndal
Cc: Gerd Hoffmann, Matthew Wilcox, linux-kernel, linux-arch, Ulrich Drepper
On Fri, Dec 12, 2008 at 10:29:29AM -0800, Scott Lurndal wrote:
> On Fri, Dec 12, 2008 at 05:02:05PM +0100, Gerd Hoffmann wrote:
> > Matthew Wilcox wrote:
> > > On the other hand, NetBSD have approximately 0% market share.
> > > We shouldn't let them lock us into making a bad decision. Is there
> > > anyone other than NetBSD who has added these syscalls?
> >
> > Free- and OpenBSD have it too. For Solaris I've found a feature request
> > only. Dunno about MacOS/Darwin. Other un*xes which are important these
> > days?
> >
> > I'd *really* hate it to have the same system call with different
> > argument ordering on different systems though. Especially when swapping
> > two integer values, so gcc wouldn't error out on wrong usage.
>
> I would suggest that from the end-users perspective, the user-mode API
> should be similar to pread/pwrite, e.g:
>
> int preadv(fd, iovec, iovec_size, offset)
Yes, and that's easy for glibc to achieve.
What's hard is that the user <-> kernel API firstly has a limited number
of registers available to it for passing arguments without indirection
from user space into kernel space.
Secondly, the user <-> kernel argument register allocation can vary
depending on the ABI version which user space or kernel space is built
for. On ARM we have two ABIs, one where 64-bit arguments can be placed
in any two consecutive registers, and one where 64-bit arguments must
be placed in an even,odd register pair (not an odd,even pair.)
That leads to the above being:
fd r0 r0
iovec r1 r1
vecsz r2 r2
offset r3,r4 r4,r5
Notice the different register allocation for the 64-bit offset.
This problem of register-aligned argument placement is not limited
to just ARM, but several other Linux supported architectures.
--
Russell King
Linux kernel 2.6 ARM Linux - http://www.arm.linux.org.uk/
maintainer of:
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 15:29 ` Matthew Wilcox
2008-12-12 15:48 ` Gerd Hoffmann
@ 2008-12-12 19:47 ` Arnd Bergmann
2008-12-12 20:02 ` Gerd Hoffmann
2008-12-14 11:49 ` Heiko Carstens
2008-12-15 4:14 ` Paul Mackerras
3 siblings, 1 reply; 29+ messages in thread
From: Arnd Bergmann @ 2008-12-12 19:47 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: Gerd Hoffmann, linux-kernel, linux-arch
On Friday 12 December 2008, Matthew Wilcox wrote:
> Things will go much better if we can prototype these as:
>
> asmlinkage ssize_t sys_preadv(unsigned int fd, const struct iovec __user *vec,
> loff_t pos, unsigned long vlen);
> asmlinkage ssize_t sys_pwritev(unsigned int fd, const struct iovec __user *vec,
> loff_t pos, unsigned long vlen);
I would vote for doing it the same way as sys_llseek, which avoids
this issue entirely by passing the upper half of pos sepearately:
asmlinkage ssize_t sys_preadv(unsigned long fd,
const struct iovec __user *vec,
unsigned long vlen,
unsigned long pos_high, unsigned long pos_low);
asmlinkage ssize_t sys_pwritev(unsigned long fd,
const struct iovec __user *vec,
unsigned long vlen,
unsigned long pos_high, unsigned long pos_low);
This is the only way I can see that lets us use a shared
compat_sys_preadv/pwritev across all 64 bit architectures.
The libc can then add a trivial wrapper around the syscalls
to get the regular calling conventions.
Aside from that, have you considered doing something even more flexible,
like this?
struct piovec {
void __user *iov_base;
__kernel_size_t iov_len;
__kernel_loff_t pos;
};
asmlinkage ssize_t sys_pwritev(unsigned long fd,
const struct piovec __user *vec,
unsigned long vlen);
Arnd <><
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 19:07 ` Russell King
@ 2008-12-12 19:56 ` Gerd Hoffmann
0 siblings, 0 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 19:56 UTC (permalink / raw)
To: Scott Lurndal, Gerd Hoffmann, Matthew Wilcox, linux-kernel,
linux-arch, Ulrich Drepper
Russell King wrote:
>> should be similar to pread/pwrite, e.g:
>>
>> int preadv(fd, iovec, iovec_size, offset)
>
> Yes, and that's easy for glibc to achieve.
This hints the ABI problem exists at syscall level only. Is that
correct? So we can have
preadv(fd, vec, vlen, off)
argument ordering at app <-> glibc level and
preadv(fd, vec, off, vlen)
ordering at glibc <-> kernel (aka syscall) level and it works fine for
ARM + MIPS + PARISC?
thanks,
Gerd
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
@ 2008-12-12 19:56 ` Gerd Hoffmann
0 siblings, 0 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 19:56 UTC (permalink / raw)
To: Scott Lurndal, Gerd Hoffmann, Matthew Wilcox, linux-kernel,
linux-arch, Ulrich
Russell King wrote:
>> should be similar to pread/pwrite, e.g:
>>
>> int preadv(fd, iovec, iovec_size, offset)
>
> Yes, and that's easy for glibc to achieve.
This hints the ABI problem exists at syscall level only. Is that
correct? So we can have
preadv(fd, vec, vlen, off)
argument ordering at app <-> glibc level and
preadv(fd, vec, off, vlen)
ordering at glibc <-> kernel (aka syscall) level and it works fine for
ARM + MIPS + PARISC?
thanks,
Gerd
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 19:47 ` Arnd Bergmann
@ 2008-12-12 20:02 ` Gerd Hoffmann
0 siblings, 0 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 20:02 UTC (permalink / raw)
To: Arnd Bergmann; +Cc: Matthew Wilcox, linux-kernel, linux-arch
Arnd Bergmann wrote:
> Aside from that, have you considered doing something even more flexible,
> like this?
>
> struct piovec {
> void __user *iov_base;
> __kernel_size_t iov_len;
> __kernel_loff_t pos;
> };
>
> asmlinkage ssize_t sys_pwritev(unsigned long fd,
> const struct piovec __user *vec,
> unsigned long vlen);
There is little point in doing so because I *really* want the
user-visible API being identical to the existing ones in the *BSD
family. Anything else is just a PITA for the applications using this.
cheers,
Gerd
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 19:56 ` Gerd Hoffmann
(?)
@ 2008-12-12 20:12 ` Russell King
2008-12-12 20:39 ` Gerd Hoffmann
-1 siblings, 1 reply; 29+ messages in thread
From: Russell King @ 2008-12-12 20:12 UTC (permalink / raw)
To: Gerd Hoffmann
Cc: Scott Lurndal, Matthew Wilcox, linux-kernel, linux-arch, Ulrich Drepper
On Fri, Dec 12, 2008 at 08:56:00PM +0100, Gerd Hoffmann wrote:
> Russell King wrote:
> >> should be similar to pread/pwrite, e.g:
> >>
> >> int preadv(fd, iovec, iovec_size, offset)
> >
> > Yes, and that's easy for glibc to achieve.
>
> This hints the ABI problem exists at syscall level only. Is that
> correct? So we can have
>
> preadv(fd, vec, vlen, off)
>
> argument ordering at app <-> glibc level and
>
> preadv(fd, vec, off, vlen)
>
> ordering at glibc <-> kernel (aka syscall) level and it works fine for
> ARM + MIPS + PARISC?
Fine for ARM - and yes, the user visible API should be changed from the
BSD standard. I don't think anyone in this thread was suggesting that
the user visible argument ordering should be any different from the
original.
Having it in a different order from *BSD at the libc visible interface
is just crazy from the OS portability point of view.
--
Russell King
Linux kernel 2.6 ARM Linux - http://www.arm.linux.org.uk/
maintainer of:
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 20:12 ` Russell King
@ 2008-12-12 20:39 ` Gerd Hoffmann
0 siblings, 0 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 20:39 UTC (permalink / raw)
To: Gerd Hoffmann, Scott Lurndal, Matthew Wilcox, linux-kernel,
linux-arch, Ulrich Drepper
Russell King wrote:
> On Fri, Dec 12, 2008 at 08:56:00PM +0100, Gerd Hoffmann wrote:
>> Russell King wrote:
>>>> should be similar to pread/pwrite, e.g:
>>>>
>>>> int preadv(fd, iovec, iovec_size, offset)
>>> Yes, and that's easy for glibc to achieve.
>> This hints the ABI problem exists at syscall level only. Is that
>> correct? So we can have
>>
>> preadv(fd, vec, vlen, off)
>>
>> argument ordering at app <-> glibc level and
>>
>> preadv(fd, vec, off, vlen)
>>
>> ordering at glibc <-> kernel (aka syscall) level and it works fine for
>> ARM + MIPS + PARISC?
>
> Fine for ARM
Great. I'll happily switch the ordering then. /me goes wait for acks
from the other archs and plans for a new patch revision early next week.
thanks,
Gerd
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
@ 2008-12-12 20:39 ` Gerd Hoffmann
0 siblings, 0 replies; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-12 20:39 UTC (permalink / raw)
To: Gerd Hoffmann, Scott Lurndal, Matthew Wilcox, linux-kernel,
linux-arch, Ulrich
Russell King wrote:
> On Fri, Dec 12, 2008 at 08:56:00PM +0100, Gerd Hoffmann wrote:
>> Russell King wrote:
>>>> should be similar to pread/pwrite, e.g:
>>>>
>>>> int preadv(fd, iovec, iovec_size, offset)
>>> Yes, and that's easy for glibc to achieve.
>> This hints the ABI problem exists at syscall level only. Is that
>> correct? So we can have
>>
>> preadv(fd, vec, vlen, off)
>>
>> argument ordering at app <-> glibc level and
>>
>> preadv(fd, vec, off, vlen)
>>
>> ordering at glibc <-> kernel (aka syscall) level and it works fine for
>> ARM + MIPS + PARISC?
>
> Fine for ARM
Great. I'll happily switch the ordering then. /me goes wait for acks
from the other archs and plans for a new patch revision early next week.
thanks,
Gerd
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 14:00 [PATCH v2] Add preadv and pwritev system calls Gerd Hoffmann
` (2 preceding siblings ...)
2008-12-12 16:59 ` Russell King
@ 2008-12-13 1:18 ` Michael Kerrisk
3 siblings, 0 replies; 29+ messages in thread
From: Michael Kerrisk @ 2008-12-13 1:18 UTC (permalink / raw)
To: Gerd Hoffmann; +Cc: linux-kernel, linux-arch, Linux API
Gerd,
Please CC linux-api on patches that change the API!
Cheers,
Michael
On Fri, Dec 12, 2008 at 9:00 AM, Gerd Hoffmann <kraxel@redhat.com> wrote:
> This patch adds preadv and pwritev system calls. These syscalls are a
> pretty straightforward combination of pread and readv (same for write).
> They are quite useful for doing vectored I/O in threaded applications.
> Using lseek+readv instead opens race windows you'll have to plug with
> locking.
>
> Other systems have such system calls too, for example NetBSD, check
> here: http://www.daemon-systems.org/man/preadv.2.html
>
> The patch sports the actual system call implementation and the windup in
> the x86 system call tables. Other archs are TBD.
>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
> arch/x86/ia32/ia32entry.S | 2 +
> arch/x86/include/asm/unistd_32.h | 2 +
> arch/x86/include/asm/unistd_64.h | 4 ++
> arch/x86/kernel/syscall_table_32.S | 2 +
> fs/compat.c | 61 ++++++++++++++++++++++++++++++++++++
> fs/read_write.c | 48 ++++++++++++++++++++++++++++
> 6 files changed, 119 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
> index 256b00b..9a8501b 100644
> --- a/arch/x86/ia32/ia32entry.S
> +++ b/arch/x86/ia32/ia32entry.S
> @@ -826,4 +826,6 @@ ia32_sys_call_table:
> .quad sys_dup3 /* 330 */
> .quad sys_pipe2
> .quad sys_inotify_init1
> + .quad compat_sys_preadv
> + .quad compat_sys_pwritev
> ia32_syscall_end:
> diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
> index f2bba78..6e72d74 100644
> --- a/arch/x86/include/asm/unistd_32.h
> +++ b/arch/x86/include/asm/unistd_32.h
> @@ -338,6 +338,8 @@
> #define __NR_dup3 330
> #define __NR_pipe2 331
> #define __NR_inotify_init1 332
> +#define __NR_preadv 333
> +#define __NR_pwritev 334
>
> #ifdef __KERNEL__
>
> diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
> index d2e415e..f818294 100644
> --- a/arch/x86/include/asm/unistd_64.h
> +++ b/arch/x86/include/asm/unistd_64.h
> @@ -653,6 +653,10 @@ __SYSCALL(__NR_dup3, sys_dup3)
> __SYSCALL(__NR_pipe2, sys_pipe2)
> #define __NR_inotify_init1 294
> __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
> +#define __NR_preadv 295
> +__SYSCALL(__NR_preadv, sys_preadv)
> +#define __NR_pwritev 296
> +__SYSCALL(__NR_pwritev, sys_pwritev)
>
>
> #ifndef __NO_STUBS
> diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
> index d44395f..a1a5506 100644
> --- a/arch/x86/kernel/syscall_table_32.S
> +++ b/arch/x86/kernel/syscall_table_32.S
> @@ -332,3 +332,5 @@ ENTRY(sys_call_table)
> .long sys_dup3 /* 330 */
> .long sys_pipe2
> .long sys_inotify_init1
> + .long sys_preadv
> + .long sys_pwritev
> diff --git a/fs/compat.c b/fs/compat.c
> index e5f49f5..3a25cf3 100644
> --- a/fs/compat.c
> +++ b/fs/compat.c
> @@ -1214,6 +1214,67 @@ out:
> return ret;
> }
>
> +asmlinkage ssize_t
> +compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
> + unsigned long vlen, loff_t pos)
> +{
> + struct file *file;
> + ssize_t ret = -EBADF;
> +
> + if (pos < 0)
> + return -EINVAL;
> +
> + file = fget(fd);
> + if (!file)
> + return -EBADF;
> +
> + if (!(file->f_mode & FMODE_READ))
> + goto out;
> +
> + ret = -EINVAL;
> + if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
> + goto out;
> +
> + ret = compat_do_readv_writev(READ, file, vec, vlen, &pos);
> +
> +out:
> + if (ret > 0)
> + add_rchar(current, ret);
> + inc_syscr(current);
> + fput(file);
> + return ret;
> +}
> +
> +asmlinkage ssize_t
> +compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
> + unsigned long vlen, loff_t pos)
> +{
> + struct file *file;
> + ssize_t ret = -EBADF;
> +
> + if (pos < 0)
> + return -EINVAL;
> +
> + file = fget(fd);
> + if (!file)
> + return -EBADF;
> + if (!(file->f_mode & FMODE_WRITE))
> + goto out;
> +
> + ret = -EINVAL;
> + if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
> + goto out;
> +
> + ret = compat_do_readv_writev(WRITE, file, vec, vlen, &pos);
> +
> +out:
> + if (ret > 0)
> + add_wchar(current, ret);
> + inc_syscw(current);
> + fput(file);
> + return ret;
> +}
> +
> asmlinkage long
> compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
> unsigned int nr_segs, unsigned int flags)
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 969a6d9..89f273d 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -701,6 +701,54 @@ sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
> return ret;
> }
>
> +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> + unsigned long vlen, loff_t pos)
> +{
> + struct file *file;
> + ssize_t ret = -EBADF;
> + int fput_needed;
> +
> + if (pos < 0)
> + return -EINVAL;
> +
> + file = fget_light(fd, &fput_needed);
> + if (file) {
> + ret = -ESPIPE;
> + if (file->f_mode & FMODE_PREAD)
> + ret = vfs_readv(file, vec, vlen, &pos);
> + fput_light(file, fput_needed);
> + }
> +
> + if (ret > 0)
> + add_rchar(current, ret);
> + inc_syscr(current);
> + return ret;
> +}
> +
> +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
> + unsigned long vlen, loff_t pos)
> +{
> + struct file *file;
> + ssize_t ret = -EBADF;
> + int fput_needed;
> +
> + if (pos < 0)
> + return -EINVAL;
> +
> + file = fget_light(fd, &fput_needed);
> + if (file) {
> + ret = -ESPIPE;
> + if (file->f_mode & FMODE_PWRITE)
> + ret = vfs_writev(file, vec, vlen, &pos);
> + fput_light(file, fput_needed);
> + }
> +
> + if (ret > 0)
> + add_wchar(current, ret);
> + inc_syscw(current);
> + return ret;
> +}
> +
> static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
> size_t count, loff_t max)
> {
> --
> 1.5.6.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-arch" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
git://git.kernel.org/pub/scm/docs/man-pages/man-pages.git
man-pages online: http://www.kernel.org/doc/man-pages/online_pages.html
Found a bug? http://www.kernel.org/doc/man-pages/reporting_bugs.html
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 15:29 ` Matthew Wilcox
2008-12-12 15:48 ` Gerd Hoffmann
2008-12-12 19:47 ` Arnd Bergmann
@ 2008-12-14 11:49 ` Heiko Carstens
2008-12-15 4:14 ` Paul Mackerras
3 siblings, 0 replies; 29+ messages in thread
From: Heiko Carstens @ 2008-12-14 11:49 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: Gerd Hoffmann, linux-kernel, linux-arch
On Fri, Dec 12, 2008 at 08:29:29AM -0700, Matthew Wilcox wrote:
> On Fri, Dec 12, 2008 at 03:00:40PM +0100, Gerd Hoffmann wrote:
> > The patch sports the actual system call implementation and the windup in
> > the x86 system call tables. Other archs are TBD.
>
> > +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> > + unsigned long vlen, loff_t pos)
> > +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
> > + unsigned long vlen, loff_t pos)
>
> Are these prototypes required? MIPS and PARISC will need wrappers to
> fix them if they are. These two architectures have an ABI which
> requires 64-bit arguments to be passed in aligned pairs of registers,
> but glibc doesn't know that (and given the existence of syscall(3),
> can't do much about it even if it knew), so some of the arguments end up
> in the wrong registers.
>
> Things will go much better if we can prototype these as:
>
> asmlinkage ssize_t sys_preadv(unsigned int fd, const struct iovec __user *vec,
> loff_t pos, unsigned long vlen);
> asmlinkage ssize_t sys_pwritev(unsigned int fd, const struct iovec __user *vec,
> loff_t pos, unsigned long vlen);
>
> That way 'pos' ends up split between arg2 and arg3 and vlen ends up in
> arg4 instead of having vlen in arg2 and pos in arg3 and arg4 which then
> have to be munged to be in arg4 and arg5 by a compat wrapper.
>
> I seem to recall the s390 folks having some concerns with this kind of
> thing too, but I forget what they are, so I'll let them weigh in on
> this.
I think a lot of stuff that needs to be known for new system calls was
written up with these two postings:
http://marc.info/?l=linux-arch&m=118277150812137&w=2
http://lkml.org/lkml/2007/8/1/354
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 19:56 ` Gerd Hoffmann
(?)
(?)
@ 2008-12-14 18:19 ` Pavel Machek
-1 siblings, 0 replies; 29+ messages in thread
From: Pavel Machek @ 2008-12-14 18:19 UTC (permalink / raw)
To: Gerd Hoffmann
Cc: Scott Lurndal, Matthew Wilcox, linux-kernel, linux-arch, Ulrich Drepper
On Fri 2008-12-12 20:56:00, Gerd Hoffmann wrote:
> Russell King wrote:
> >> should be similar to pread/pwrite, e.g:
> >>
> >> int preadv(fd, iovec, iovec_size, offset)
> >
> > Yes, and that's easy for glibc to achieve.
>
> This hints the ABI problem exists at syscall level only. Is that
> correct? So we can have
>
> preadv(fd, vec, vlen, off)
>
> argument ordering at app <-> glibc level and
>
> preadv(fd, vec, off, vlen)
>
> ordering at glibc <-> kernel (aka syscall) level and it works fine for
> ARM + MIPS + PARISC?
ltrace and strace would show different values; very misleading :-(.
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 15:29 ` Matthew Wilcox
` (2 preceding siblings ...)
2008-12-14 11:49 ` Heiko Carstens
@ 2008-12-15 4:14 ` Paul Mackerras
2008-12-15 6:20 ` David Miller
3 siblings, 1 reply; 29+ messages in thread
From: Paul Mackerras @ 2008-12-15 4:14 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: Gerd Hoffmann, linux-kernel, linux-arch
Matthew Wilcox writes:
> On Fri, Dec 12, 2008 at 03:00:40PM +0100, Gerd Hoffmann wrote:
> > The patch sports the actual system call implementation and the windup in
> > the x86 system call tables. Other archs are TBD.
>
> > +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> > + unsigned long vlen, loff_t pos)
> > +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
> > + unsigned long vlen, loff_t pos)
>
> Are these prototypes required? MIPS and PARISC will need wrappers to
> fix them if they are. These two architectures have an ABI which
> requires 64-bit arguments to be passed in aligned pairs of registers,
As does 32-bit PowerPC, so I also would prefer the alternate argument
order for the syscall (pos as the 3rd argument).
Paul.
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-15 4:14 ` Paul Mackerras
@ 2008-12-15 6:20 ` David Miller
0 siblings, 0 replies; 29+ messages in thread
From: David Miller @ 2008-12-15 6:20 UTC (permalink / raw)
To: paulus; +Cc: matthew, kraxel, linux-kernel, linux-arch
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 15 Dec 2008 15:14:18 +1100
> Matthew Wilcox writes:
>
> > On Fri, Dec 12, 2008 at 03:00:40PM +0100, Gerd Hoffmann wrote:
> > > The patch sports the actual system call implementation and the windup in
> > > the x86 system call tables. Other archs are TBD.
> >
> > > +asmlinkage ssize_t sys_preadv(unsigned long fd, const struct iovec __user *vec,
> > > + unsigned long vlen, loff_t pos)
> > > +asmlinkage ssize_t sys_pwritev(unsigned long fd, const struct iovec __user *vec,
> > > + unsigned long vlen, loff_t pos)
> >
> > Are these prototypes required? MIPS and PARISC will need wrappers to
> > fix them if they are. These two architectures have an ABI which
> > requires 64-bit arguments to be passed in aligned pairs of registers,
>
> As does 32-bit PowerPC, so I also would prefer the alternate argument
> order for the syscall (pos as the 3rd argument).
FWIW 32-bit sparc does not have the aligned register requirement
for 64-bit arguments.
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-12 16:02 ` Gerd Hoffmann
2008-12-12 17:03 ` Matthew Wilcox
2008-12-12 18:29 ` Scott Lurndal
@ 2008-12-15 16:37 ` Jennifer Pioch
2008-12-15 20:43 ` Gerd Hoffmann
2 siblings, 1 reply; 29+ messages in thread
From: Jennifer Pioch @ 2008-12-15 16:37 UTC (permalink / raw)
To: Gerd Hoffmann, opensolaris-code, opensolaris-rfe
Cc: Matthew Wilcox, linux-kernel, linux-arch, Ulrich Drepper
On 12/12/08, Gerd Hoffmann <kraxel@redhat.com> wrote:
> Matthew Wilcox wrote:
> > On the other hand, NetBSD have approximately 0% market share.
> > We shouldn't let them lock us into making a bad decision. Is there
> > anyone other than NetBSD who has added these syscalls?
>
>
> Free- and OpenBSD have it too. For Solaris I've found a feature request
> only. Dunno about MacOS/Darwin. Other un*xes which are important these
> days?
Do you know the ID of the feature request?
Jenny
--
Jennifer Pioch, Uni Frankfurt
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-15 16:37 ` Jennifer Pioch
@ 2008-12-15 20:43 ` Gerd Hoffmann
2008-12-16 9:57 ` Arnd Bergmann
0 siblings, 1 reply; 29+ messages in thread
From: Gerd Hoffmann @ 2008-12-15 20:43 UTC (permalink / raw)
To: Jennifer Pioch
Cc: opensolaris-code, opensolaris-rfe, Matthew Wilcox, linux-kernel,
linux-arch, Ulrich Drepper
Jennifer Pioch wrote:
> On 12/12/08, Gerd Hoffmann <kraxel@redhat.com> wrote:
>> Free- and OpenBSD have it too. For Solaris I've found a feature request
>> only. Dunno about MacOS/Darwin. Other un*xes which are important these
>> days?
>
> Do you know the ID of the feature request?
#1167819 @ bugs.opensolaris.org
HTH,
Gerd
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
2008-12-15 20:43 ` Gerd Hoffmann
@ 2008-12-16 9:57 ` Arnd Bergmann
2008-12-17 1:45 ` Dan Mick
0 siblings, 1 reply; 29+ messages in thread
From: Arnd Bergmann @ 2008-12-16 9:57 UTC (permalink / raw)
To: Gerd Hoffmann
Cc: Jennifer Pioch, opensolaris-code, opensolaris-rfe,
Matthew Wilcox, linux-kernel, linux-arch, Ulrich Drepper
On Monday 15 December 2008, Gerd Hoffmann wrote:
> Jennifer Pioch wrote:
> > On 12/12/08, Gerd Hoffmann <kraxel@redhat.com> wrote:
> >> Free- and OpenBSD have it too. For Solaris I've found a feature request
> >> only. Dunno about MacOS/Darwin. Other un*xes which are important these
> >> days?
> >
> > Do you know the ID of the feature request?
>
> #1167819 @ bugs.opensolaris.org
>
Looks like they've been working on it for some time:
"Submit Date 25-MAY-1994"!
Arnd <><
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [osol-code] [PATCH v2] Add preadv and pwritev system calls.
@ 2008-12-17 1:45 ` Dan Mick
0 siblings, 0 replies; 29+ messages in thread
From: Dan Mick @ 2008-12-17 1:45 UTC (permalink / raw)
To: Arnd Bergmann
Cc: Gerd Hoffmann, linux-arch, Matthew Wilcox, opensolaris-code,
linux-kernel, opensolaris-rfe, Ulrich Drepper
Arnd Bergmann wrote:
> On Monday 15 December 2008, Gerd Hoffmann wrote:
>> Jennifer Pioch wrote:
>>> On 12/12/08, Gerd Hoffmann <kraxel@redhat.com> wrote:
>>>> Free- and OpenBSD have it too. For Solaris I've found a feature request
>>>> only. Dunno about MacOS/Darwin. Other un*xes which are important these
>>>> days?
>>> Do you know the ID of the feature request?
>> #1167819 @ bugs.opensolaris.org
>>
>
> Looks like they've been working on it for some time:
> "Submit Date 25-MAY-1994"!
>
> Arnd <><
No one's expended any serious time on it at all, it looks like, other than
discussion.
I see the one comment says that it can be done with lio_listio(3RT) (apparently
in LIO_WAIT mode), and that interface does seem to offer all the preadv/pwritev
functionality. That's not to say that we shouldn't implement preadv/pwritev
literally, just that there should be a way to get that sort of functionality today.
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH v2] Add preadv and pwritev system calls.
@ 2008-12-17 1:45 ` Dan Mick
0 siblings, 0 replies; 29+ messages in thread
From: Dan Mick @ 2008-12-17 1:45 UTC (permalink / raw)
To: Arnd Bergmann
Cc: linux-arch-u79uwXL29TY76Z2rM5mHXA, Matthew Wilcox,
opensolaris-code-xZgeD5Kw2fzokhkdeNNY6A,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
opensolaris-rfe-xZgeD5Kw2fzokhkdeNNY6A, Gerd Hoffmann,
Ulrich Drepper
Arnd Bergmann wrote:
> On Monday 15 December 2008, Gerd Hoffmann wrote:
>> Jennifer Pioch wrote:
>>> On 12/12/08, Gerd Hoffmann <kraxel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
>>>> Free- and OpenBSD have it too. For Solaris I've found a feature request
>>>> only. Dunno about MacOS/Darwin. Other un*xes which are important these
>>>> days?
>>> Do you know the ID of the feature request?
>> #1167819 @ bugs.opensolaris.org
>>
>
> Looks like they've been working on it for some time:
> "Submit Date 25-MAY-1994"!
>
> Arnd <><
No one's expended any serious time on it at all, it looks like, other than
discussion.
I see the one comment says that it can be done with lio_listio(3RT) (apparently
in LIO_WAIT mode), and that interface does seem to offer all the preadv/pwritev
functionality. That's not to say that we shouldn't implement preadv/pwritev
literally, just that there should be a way to get that sort of functionality today.
^ permalink raw reply [flat|nested] 29+ messages in thread
end of thread, other threads:[~2008-12-17 2:12 UTC | newest]
Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-12-12 14:00 [PATCH v2] Add preadv and pwritev system calls Gerd Hoffmann
2008-12-12 15:29 ` Matthew Wilcox
2008-12-12 15:48 ` Gerd Hoffmann
2008-12-12 15:51 ` Matthew Wilcox
2008-12-12 16:02 ` Gerd Hoffmann
2008-12-12 17:03 ` Matthew Wilcox
2008-12-12 18:21 ` Alan Cox
2008-12-12 19:02 ` Russell King
2008-12-12 18:29 ` Scott Lurndal
2008-12-12 19:07 ` Russell King
2008-12-12 19:56 ` Gerd Hoffmann
2008-12-12 19:56 ` Gerd Hoffmann
2008-12-12 20:12 ` Russell King
2008-12-12 20:39 ` Gerd Hoffmann
2008-12-12 20:39 ` Gerd Hoffmann
2008-12-14 18:19 ` Pavel Machek
2008-12-15 16:37 ` Jennifer Pioch
2008-12-15 20:43 ` Gerd Hoffmann
2008-12-16 9:57 ` Arnd Bergmann
2008-12-17 1:45 ` [osol-code] " Dan Mick
2008-12-17 1:45 ` Dan Mick
2008-12-12 19:47 ` Arnd Bergmann
2008-12-12 20:02 ` Gerd Hoffmann
2008-12-14 11:49 ` Heiko Carstens
2008-12-15 4:14 ` Paul Mackerras
2008-12-15 6:20 ` David Miller
2008-12-12 15:40 ` Ralf Baechle
2008-12-12 16:59 ` Russell King
2008-12-13 1:18 ` Michael Kerrisk
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.