linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v7 0/5] Add preadv & pwritev system calls.
@ 2009-01-26 13:26 Gerd Hoffmann
  2009-01-26 13:26 ` [PATCH v7 1/5] create compat_readv() Gerd Hoffmann
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: Gerd Hoffmann @ 2009-01-26 13:26 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-api; +Cc: aarcange, Gerd Hoffmann

  Hi folks,

Next round of the preadv & pwritev patch series.  The whitespace bugs
should finally be gone now, checkpatch has nothing to complain about.

There is now a git tree with the patches (branch 'preadv'):
  git://git.et.redhat.com/kernel-kraxel.git
  http://git.et.redhat.com/?p=kernel-kraxel.git;a=shortlog;h=refs/heads/preadv

Plan it so get this into the linux-next once it comes back to life (after
LCA?).

As for userspace bits someone asked for:

Qemu wants use that to handle virtual disk I/O.  A patch for qemu can be
found here: http://article.gmane.org/gmane.comp.emulators.qemu/36851
The code can't put into use yet though as the qemu block layer needs
some changes to pass though iovecs to the disk image format driver code.

Note this isn't the first attempt to get preadv support into the kernel,
therefore I expect others (databases?) will quickly use that too.

Also *BSD has this syscall for quite some time, thus there likely is
code using this in the wild already ...

cheers,
  Gerd


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v7 1/5] create compat_readv()
  2009-01-26 13:26 [PATCH v7 0/5] Add preadv & pwritev system calls Gerd Hoffmann
@ 2009-01-26 13:26 ` Gerd Hoffmann
  2009-01-26 13:26 ` [PATCH v7 2/5] create compat_writev() Gerd Hoffmann
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Gerd Hoffmann @ 2009-01-26 13:26 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-api; +Cc: aarcange, Gerd Hoffmann

Factor out some code from compat_sys_readv() which can be shared with the
upcoming compat_sys_preadv().

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 fs/compat.c |   26 ++++++++++++++++++--------
 1 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/compat.c b/fs/compat.c
index 65a070e..75c1b46 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1167,16 +1167,12 @@ out:
 	return ret;
 }
 
-asmlinkage ssize_t
-compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen)
+static size_t compat_readv(struct file *file,
+			   const struct compat_iovec __user *vec,
+			   unsigned long vlen, loff_t *pos)
 {
-	struct file *file;
 	ssize_t ret = -EBADF;
 
-	file = fget(fd);
-	if (!file)
-		return -EBADF;
-
 	if (!(file->f_mode & FMODE_READ))
 		goto out;
 
@@ -1184,12 +1180,26 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsign
 	if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
 		goto out;
 
-	ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos);
+	ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
 
 out:
 	if (ret > 0)
 		add_rchar(current, ret);
 	inc_syscr(current);
+	return ret;
+}
+
+asmlinkage ssize_t
+compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
+		 unsigned long vlen)
+{
+	struct file *file;
+	ssize_t ret;
+
+	file = fget(fd);
+	if (!file)
+		return -EBADF;
+	ret = compat_readv(file, vec, vlen, &file->f_pos);
 	fput(file);
 	return ret;
 }
-- 
1.6.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v7 2/5] create compat_writev()
  2009-01-26 13:26 [PATCH v7 0/5] Add preadv & pwritev system calls Gerd Hoffmann
  2009-01-26 13:26 ` [PATCH v7 1/5] create compat_readv() Gerd Hoffmann
@ 2009-01-26 13:26 ` Gerd Hoffmann
  2009-01-26 13:26 ` [PATCH v7 3/5] Add preadv and pwritev system calls Gerd Hoffmann
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Gerd Hoffmann @ 2009-01-26 13:26 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-api; +Cc: aarcange, Gerd Hoffmann

Factor out some code from compat_sys_writev() which can be shared with the
upcoming compat_sys_pwritev().

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 fs/compat.c |   25 ++++++++++++++++++-------
 1 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/fs/compat.c b/fs/compat.c
index 75c1b46..4e65644 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1204,15 +1204,12 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
 	return ret;
 }
 
-asmlinkage ssize_t
-compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen)
+static size_t compat_writev(struct file *file,
+			    const struct compat_iovec __user *vec,
+			    unsigned long vlen, loff_t *pos)
 {
-	struct file *file;
 	ssize_t ret = -EBADF;
 
-	file = fget(fd);
-	if (!file)
-		return -EBADF;
 	if (!(file->f_mode & FMODE_WRITE))
 		goto out;
 
@@ -1220,12 +1217,26 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsig
 	if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
 		goto out;
 
-	ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos);
+	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
 
 out:
 	if (ret > 0)
 		add_wchar(current, ret);
 	inc_syscw(current);
+	return ret;
+}
+
+asmlinkage ssize_t
+compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
+		  unsigned long vlen)
+{
+	struct file *file;
+	ssize_t ret;
+
+	file = fget(fd);
+	if (!file)
+		return -EBADF;
+	ret = compat_writev(file, vec, vlen, &file->f_pos);
 	fput(file);
 	return ret;
 }
-- 
1.6.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v7 3/5] Add preadv and pwritev system calls.
  2009-01-26 13:26 [PATCH v7 0/5] Add preadv & pwritev system calls Gerd Hoffmann
  2009-01-26 13:26 ` [PATCH v7 1/5] create compat_readv() Gerd Hoffmann
  2009-01-26 13:26 ` [PATCH v7 2/5] create compat_writev() Gerd Hoffmann
@ 2009-01-26 13:26 ` Gerd Hoffmann
  2009-01-26 23:42   ` Michael Kerrisk
  2009-01-26 13:26 ` [PATCH v7 4/5] MIPS: Add preadv(2) and pwritev(2) syscalls Gerd Hoffmann
  2009-01-26 13:26 ` [PATCH v7 5/5] switch compat readv/preadv/writev/pwritev from fget to fget_light Gerd Hoffmann
  4 siblings, 1 reply; 8+ messages in thread
From: Gerd Hoffmann @ 2009-01-26 13:26 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-api; +Cc: aarcange, Gerd Hoffmann

This patch adds preadv and pwritev system calls.  These syscalls are a
pretty straightforward combination of pread and readv (same for write).
They are quite useful for doing vectored I/O in threaded applications.
Using lseek+readv instead opens race windows you'll have to plug with
locking.

Other systems have such system calls too, for example NetBSD, check
here: http://www.daemon-systems.org/man/preadv.2.html

The application-visible interface provided by glibc should look like
this to be compatible to the existing implementations in the *BSD family:

  ssize_t preadv(int d, const struct iovec *iov, int iovcnt, off_t offset);
  ssize_t pwritev(int d, const struct iovec *iov, int iovcnt, off_t offset);

This prototype has one problem though:  On 32bit archs is the (64bit)
offset argument unaligned, which the syscall ABI of several archs
doesn't allow to do.  At least s390 needs a wrapper in glibc to handle
this.  As we'll need a wrappers in glibc anyway I've decided to push
problem to glibc entriely and use a syscall prototype which works
without arch-specific wrappers inside the kernel:  The offset argument
is explicitly splitted into two 32bit values.

The patch sports the actual system call implementation and the windup in
the x86 system call tables.  Other archs follow as separate patches.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 arch/x86/ia32/ia32entry.S          |    2 +
 arch/x86/include/asm/unistd_32.h   |    2 +
 arch/x86/include/asm/unistd_64.h   |    4 +++
 arch/x86/kernel/syscall_table_32.S |    2 +
 fs/compat.c                        |   36 ++++++++++++++++++++++++++
 fs/read_write.c                    |   50 ++++++++++++++++++++++++++++++++++++
 include/linux/compat.h             |    6 ++++
 include/linux/syscalls.h           |    4 +++
 8 files changed, 106 insertions(+), 0 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 256b00b..9a8501b 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -826,4 +826,6 @@ ia32_sys_call_table:
 	.quad sys_dup3			/* 330 */
 	.quad sys_pipe2
 	.quad sys_inotify_init1
+	.quad compat_sys_preadv
+	.quad compat_sys_pwritev
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index f2bba78..6e72d74 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -338,6 +338,8 @@
 #define __NR_dup3		330
 #define __NR_pipe2		331
 #define __NR_inotify_init1	332
+#define __NR_preadv		333
+#define __NR_pwritev		334
 
 #ifdef __KERNEL__
 
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index d2e415e..f818294 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -653,6 +653,10 @@ __SYSCALL(__NR_dup3, sys_dup3)
 __SYSCALL(__NR_pipe2, sys_pipe2)
 #define __NR_inotify_init1			294
 __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
+#define __NR_preadv				295
+__SYSCALL(__NR_preadv, sys_preadv)
+#define __NR_pwritev				296
+__SYSCALL(__NR_pwritev, sys_pwritev)
 
 
 #ifndef __NO_STUBS
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index e2e86a0..106204c 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -332,3 +332,5 @@ ENTRY(sys_call_table)
 	.long sys_dup3			/* 330 */
 	.long sys_pipe2
 	.long sys_inotify_init1
+	.long sys_preadv
+	.long sys_pwritev
diff --git a/fs/compat.c b/fs/compat.c
index 4e65644..1a67eee 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1204,6 +1204,24 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
 	return ret;
 }
 
+asmlinkage ssize_t
+compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
+		  unsigned long vlen, u32 pos_high, u32 pos_low)
+{
+	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+	struct file *file;
+	ssize_t ret;
+
+	if (pos < 0)
+		return -EINVAL;
+	file = fget(fd);
+	if (!file)
+		return -EBADF;
+	ret = compat_readv(file, vec, vlen, &pos);
+	fput(file);
+	return ret;
+}
+
 static size_t compat_writev(struct file *file,
 			    const struct compat_iovec __user *vec,
 			    unsigned long vlen, loff_t *pos)
@@ -1241,6 +1259,24 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
 	return ret;
 }
 
+asmlinkage ssize_t
+compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
+		   unsigned long vlen, u32 pos_high, u32 pos_low)
+{
+	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+	struct file *file;
+	ssize_t ret;
+
+	if (pos < 0)
+		return -EINVAL;
+	file = fget(fd);
+	if (!file)
+		return -EBADF;
+	ret = compat_writev(file, vec, vlen, &pos);
+	fput(file);
+	return ret;
+}
+
 asmlinkage long
 compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
 		    unsigned int nr_segs, unsigned int flags)
diff --git a/fs/read_write.c b/fs/read_write.c
index 400fe81..6d5d8ff 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -731,6 +731,56 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
 	return ret;
 }
 
+SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen, u32, pos_high, u32, pos_low)
+{
+	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+	struct file *file;
+	ssize_t ret = -EBADF;
+	int fput_needed;
+
+	if (pos < 0)
+		return -EINVAL;
+
+	file = fget_light(fd, &fput_needed);
+	if (file) {
+		ret = -ESPIPE;
+		if (file->f_mode & FMODE_PREAD)
+			ret = vfs_readv(file, vec, vlen, &pos);
+		fput_light(file, fput_needed);
+	}
+
+	if (ret > 0)
+		add_rchar(current, ret);
+	inc_syscr(current);
+	return ret;
+}
+
+SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
+		unsigned long, vlen, u32, pos_high, u32, pos_low)
+{
+	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+	struct file *file;
+	ssize_t ret = -EBADF;
+	int fput_needed;
+
+	if (pos < 0)
+		return -EINVAL;
+
+	file = fget_light(fd, &fput_needed);
+	if (file) {
+		ret = -ESPIPE;
+		if (file->f_mode & FMODE_PWRITE)
+			ret = vfs_writev(file, vec, vlen, &pos);
+		fput_light(file, fput_needed);
+	}
+
+	if (ret > 0)
+		add_wchar(current, ret);
+	inc_syscw(current);
+	return ret;
+}
+
 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 			   size_t count, loff_t max)
 {
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 3fd2194..79dba49 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -183,6 +183,12 @@ asmlinkage ssize_t compat_sys_readv(unsigned long fd,
 		const struct compat_iovec __user *vec, unsigned long vlen);
 asmlinkage ssize_t compat_sys_writev(unsigned long fd,
 		const struct compat_iovec __user *vec, unsigned long vlen);
+asmlinkage ssize_t compat_sys_preadv(unsigned long fd,
+		const struct compat_iovec __user *vec,
+		unsigned long vlen, u32 pos_high, u32 pos_low);
+asmlinkage ssize_t compat_sys_pwritev(unsigned long fd,
+		const struct compat_iovec __user *vec,
+		unsigned long vlen, u32 pos_high, u32 pos_low);
 
 int compat_do_execve(char * filename, compat_uptr_t __user *argv,
 	        compat_uptr_t __user *envp, struct pt_regs * regs);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 16875f8..b63e93d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -456,6 +456,10 @@ asmlinkage long sys_pread64(unsigned int fd, char __user *buf,
 			    size_t count, loff_t pos);
 asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf,
 			     size_t count, loff_t pos);
+asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec,
+			   unsigned long vlen, u32 pos_high, u32 pos_low);
+asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec,
+			    unsigned long vlen, u32 pos_high, u32 pos_low);
 asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
 asmlinkage long sys_mkdir(const char __user *pathname, int mode);
 asmlinkage long sys_chdir(const char __user *filename);
-- 
1.6.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v7 4/5] MIPS: Add preadv(2) and pwritev(2) syscalls.
  2009-01-26 13:26 [PATCH v7 0/5] Add preadv & pwritev system calls Gerd Hoffmann
                   ` (2 preceding siblings ...)
  2009-01-26 13:26 ` [PATCH v7 3/5] Add preadv and pwritev system calls Gerd Hoffmann
@ 2009-01-26 13:26 ` Gerd Hoffmann
  2009-01-26 13:26 ` [PATCH v7 5/5] switch compat readv/preadv/writev/pwritev from fget to fget_light Gerd Hoffmann
  4 siblings, 0 replies; 8+ messages in thread
From: Gerd Hoffmann @ 2009-01-26 13:26 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-api; +Cc: aarcange, Ralf Baechle, Gerd Hoffmann

From: Ralf Baechle <ralf@linux-mips.org>

From: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 arch/mips/include/asm/unistd.h |   18 ++++++++++++------
 arch/mips/kernel/scall32-o32.S |    2 ++
 arch/mips/kernel/scall64-64.S  |    2 ++
 arch/mips/kernel/scall64-n32.S |    2 ++
 arch/mips/kernel/scall64-o32.S |    2 ++
 5 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index a73e153..4000501 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -350,16 +350,18 @@
 #define __NR_dup3			(__NR_Linux + 327)
 #define __NR_pipe2			(__NR_Linux + 328)
 #define __NR_inotify_init1		(__NR_Linux + 329)
+#define __NR_preadv			(__NR_Linux + 330)
+#define __NR_pwritev			(__NR_Linux + 331)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		329
+#define __NR_Linux_syscalls		331
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		329
+#define __NR_O32_Linux_syscalls		331
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -656,16 +658,18 @@
 #define __NR_dup3			(__NR_Linux + 286)
 #define __NR_pipe2			(__NR_Linux + 287)
 #define __NR_inotify_init1		(__NR_Linux + 288)
+#define __NR_preadv			(__NR_Linux + 289)
+#define __NR_pwritev			(__NR_Linux + 290)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		288
+#define __NR_Linux_syscalls		290
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		288
+#define __NR_64_Linux_syscalls		290
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -966,16 +970,18 @@
 #define __NR_dup3			(__NR_Linux + 290)
 #define __NR_pipe2			(__NR_Linux + 291)
 #define __NR_inotify_init1		(__NR_Linux + 292)
+#define __NR_preadv			(__NR_Linux + 293)
+#define __NR_pwritev			(__NR_Linux + 294)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		292
+#define __NR_Linux_syscalls		294
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		292
+#define __NR_N32_Linux_syscalls		294
 
 #ifdef __KERNEL__
 
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 51d1ba4..0198a9c 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -650,6 +650,8 @@ einval:	li	v0, -ENOSYS
 	sys	sys_dup3		3
 	sys	sys_pipe2		2
 	sys	sys_inotify_init1	1
+	sys	sys_preadv		6	/* 4330 */
+	sys	sys_pwritev		6
 	.endm
 
 	/* We pre-compute the number of _instruction_ bytes needed to
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index a9e1716..217e3ce 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -487,4 +487,6 @@ sys_call_table:
 	PTR	sys_dup3
 	PTR	sys_pipe2
 	PTR	sys_inotify_init1
+	PTR	sys_preadv
+	PTR	sys_pwritev			/* 5390 */
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 30f3b63..f340963 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -413,4 +413,6 @@ EXPORT(sysn32_call_table)
 	PTR	sys_dup3			/* 5290 */
 	PTR	sys_pipe2
 	PTR	sys_inotify_init1
+	PTR	sys_preadv
+	PTR	sys_pwritev
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index fefef4a..b1d281a 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -533,4 +533,6 @@ sys_call_table:
 	PTR	sys_dup3
 	PTR	sys_pipe2
 	PTR	sys_inotify_init1
+	PTR	compat_sys_preadv		/* 4330 */
+	PTR	compat_sys_pwritev
 	.size	sys_call_table,.-sys_call_table
-- 
1.6.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v7 5/5] switch compat readv/preadv/writev/pwritev from fget to fget_light
  2009-01-26 13:26 [PATCH v7 0/5] Add preadv & pwritev system calls Gerd Hoffmann
                   ` (3 preceding siblings ...)
  2009-01-26 13:26 ` [PATCH v7 4/5] MIPS: Add preadv(2) and pwritev(2) syscalls Gerd Hoffmann
@ 2009-01-26 13:26 ` Gerd Hoffmann
  4 siblings, 0 replies; 8+ messages in thread
From: Gerd Hoffmann @ 2009-01-26 13:26 UTC (permalink / raw)
  To: linux-kernel, linux-arch, linux-api; +Cc: aarcange, Gerd Hoffmann


Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 fs/compat.c |   20 ++++++++++++--------
 1 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/fs/compat.c b/fs/compat.c
index 1a67eee..ad442ec 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1194,13 +1194,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
 		 unsigned long vlen)
 {
 	struct file *file;
+	int fput_needed;
 	ssize_t ret;
 
-	file = fget(fd);
+	file = fget_light(fd, &fput_needed);
 	if (!file)
 		return -EBADF;
 	ret = compat_readv(file, vec, vlen, &file->f_pos);
-	fput(file);
+	fput_light(file, fput_needed);
 	return ret;
 }
 
@@ -1210,15 +1211,16 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
 {
 	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 	struct file *file;
+	int fput_needed;
 	ssize_t ret;
 
 	if (pos < 0)
 		return -EINVAL;
-	file = fget(fd);
+	file = fget_light(fd, &fput_needed);
 	if (!file)
 		return -EBADF;
 	ret = compat_readv(file, vec, vlen, &pos);
-	fput(file);
+	fput_light(file, fput_needed);
 	return ret;
 }
 
@@ -1249,13 +1251,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
 		  unsigned long vlen)
 {
 	struct file *file;
+	int fput_needed;
 	ssize_t ret;
 
-	file = fget(fd);
+	file = fget_light(fd, &fput_needed);
 	if (!file)
 		return -EBADF;
 	ret = compat_writev(file, vec, vlen, &file->f_pos);
-	fput(file);
+	fput_light(file, fput_needed);
 	return ret;
 }
 
@@ -1265,15 +1268,16 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
 {
 	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 	struct file *file;
+	int fput_needed;
 	ssize_t ret;
 
 	if (pos < 0)
 		return -EINVAL;
-	file = fget(fd);
+	file = fget_light(fd, &fput_needed);
 	if (!file)
 		return -EBADF;
 	ret = compat_writev(file, vec, vlen, &pos);
-	fput(file);
+	fput_light(file, fput_needed);
 	return ret;
 }
 
-- 
1.6.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v7 3/5] Add preadv and pwritev system calls.
  2009-01-26 13:26 ` [PATCH v7 3/5] Add preadv and pwritev system calls Gerd Hoffmann
@ 2009-01-26 23:42   ` Michael Kerrisk
  2009-01-28 15:04     ` Gerd Hoffmann
  0 siblings, 1 reply; 8+ messages in thread
From: Michael Kerrisk @ 2009-01-26 23:42 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: linux-kernel, linux-arch, linux-api, aarcange

Gerd,

On Mon, Jan 26, 2009 at 2:26 PM, Gerd Hoffmann <kraxel@redhat.com> wrote:
> This patch adds preadv and pwritev system calls.  These syscalls are a
> pretty straightforward combination of pread and readv (same for write).
> They are quite useful for doing vectored I/O in threaded applications.
> Using lseek+readv instead opens race windows you'll have to plug with
> locking.
>
> Other systems have such system calls too, for example NetBSD, check
> here: http://www.daemon-systems.org/man/preadv.2.html
>
> The application-visible interface provided by glibc should look like
> this to be compatible to the existing implementations in the *BSD family:
>
>  ssize_t preadv(int d, const struct iovec *iov, int iovcnt, off_t offset);
>  ssize_t pwritev(int d, const struct iovec *iov, int iovcnt, off_t offset);

I earlier asked if you could provide some userspace example code using
this API.  If there was a response, I missed it.  Could you please
provide some working test using this interface.

Cheers,

Michael

> This prototype has one problem though:  On 32bit archs is the (64bit)
> offset argument unaligned, which the syscall ABI of several archs
> doesn't allow to do.  At least s390 needs a wrapper in glibc to handle
> this.  As we'll need a wrappers in glibc anyway I've decided to push
> problem to glibc entriely and use a syscall prototype which works
> without arch-specific wrappers inside the kernel:  The offset argument
> is explicitly splitted into two 32bit values.
>
> The patch sports the actual system call implementation and the windup in
> the x86 system call tables.  Other archs follow as separate patches.
>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>  arch/x86/ia32/ia32entry.S          |    2 +
>  arch/x86/include/asm/unistd_32.h   |    2 +
>  arch/x86/include/asm/unistd_64.h   |    4 +++
>  arch/x86/kernel/syscall_table_32.S |    2 +
>  fs/compat.c                        |   36 ++++++++++++++++++++++++++
>  fs/read_write.c                    |   50 ++++++++++++++++++++++++++++++++++++
>  include/linux/compat.h             |    6 ++++
>  include/linux/syscalls.h           |    4 +++
>  8 files changed, 106 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
> index 256b00b..9a8501b 100644
> --- a/arch/x86/ia32/ia32entry.S
> +++ b/arch/x86/ia32/ia32entry.S
> @@ -826,4 +826,6 @@ ia32_sys_call_table:
>        .quad sys_dup3                  /* 330 */
>        .quad sys_pipe2
>        .quad sys_inotify_init1
> +       .quad compat_sys_preadv
> +       .quad compat_sys_pwritev
>  ia32_syscall_end:
> diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
> index f2bba78..6e72d74 100644
> --- a/arch/x86/include/asm/unistd_32.h
> +++ b/arch/x86/include/asm/unistd_32.h
> @@ -338,6 +338,8 @@
>  #define __NR_dup3              330
>  #define __NR_pipe2             331
>  #define __NR_inotify_init1     332
> +#define __NR_preadv            333
> +#define __NR_pwritev           334
>
>  #ifdef __KERNEL__
>
> diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
> index d2e415e..f818294 100644
> --- a/arch/x86/include/asm/unistd_64.h
> +++ b/arch/x86/include/asm/unistd_64.h
> @@ -653,6 +653,10 @@ __SYSCALL(__NR_dup3, sys_dup3)
>  __SYSCALL(__NR_pipe2, sys_pipe2)
>  #define __NR_inotify_init1                     294
>  __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
> +#define __NR_preadv                            295
> +__SYSCALL(__NR_preadv, sys_preadv)
> +#define __NR_pwritev                           296
> +__SYSCALL(__NR_pwritev, sys_pwritev)
>
>
>  #ifndef __NO_STUBS
> diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
> index e2e86a0..106204c 100644
> --- a/arch/x86/kernel/syscall_table_32.S
> +++ b/arch/x86/kernel/syscall_table_32.S
> @@ -332,3 +332,5 @@ ENTRY(sys_call_table)
>        .long sys_dup3                  /* 330 */
>        .long sys_pipe2
>        .long sys_inotify_init1
> +       .long sys_preadv
> +       .long sys_pwritev
> diff --git a/fs/compat.c b/fs/compat.c
> index 4e65644..1a67eee 100644
> --- a/fs/compat.c
> +++ b/fs/compat.c
> @@ -1204,6 +1204,24 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
>        return ret;
>  }
>
> +asmlinkage ssize_t
> +compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
> +                 unsigned long vlen, u32 pos_high, u32 pos_low)
> +{
> +       loff_t pos = ((loff_t)pos_high << 32) | pos_low;
> +       struct file *file;
> +       ssize_t ret;
> +
> +       if (pos < 0)
> +               return -EINVAL;
> +       file = fget(fd);
> +       if (!file)
> +               return -EBADF;
> +       ret = compat_readv(file, vec, vlen, &pos);
> +       fput(file);
> +       return ret;
> +}
> +
>  static size_t compat_writev(struct file *file,
>                            const struct compat_iovec __user *vec,
>                            unsigned long vlen, loff_t *pos)
> @@ -1241,6 +1259,24 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
>        return ret;
>  }
>
> +asmlinkage ssize_t
> +compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
> +                  unsigned long vlen, u32 pos_high, u32 pos_low)
> +{
> +       loff_t pos = ((loff_t)pos_high << 32) | pos_low;
> +       struct file *file;
> +       ssize_t ret;
> +
> +       if (pos < 0)
> +               return -EINVAL;
> +       file = fget(fd);
> +       if (!file)
> +               return -EBADF;
> +       ret = compat_writev(file, vec, vlen, &pos);
> +       fput(file);
> +       return ret;
> +}
> +
>  asmlinkage long
>  compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
>                    unsigned int nr_segs, unsigned int flags)
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 400fe81..6d5d8ff 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -731,6 +731,56 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
>        return ret;
>  }
>
> +SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
> +               unsigned long, vlen, u32, pos_high, u32, pos_low)
> +{
> +       loff_t pos = ((loff_t)pos_high << 32) | pos_low;
> +       struct file *file;
> +       ssize_t ret = -EBADF;
> +       int fput_needed;
> +
> +       if (pos < 0)
> +               return -EINVAL;
> +
> +       file = fget_light(fd, &fput_needed);
> +       if (file) {
> +               ret = -ESPIPE;
> +               if (file->f_mode & FMODE_PREAD)
> +                       ret = vfs_readv(file, vec, vlen, &pos);
> +               fput_light(file, fput_needed);
> +       }
> +
> +       if (ret > 0)
> +               add_rchar(current, ret);
> +       inc_syscr(current);
> +       return ret;
> +}
> +
> +SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
> +               unsigned long, vlen, u32, pos_high, u32, pos_low)
> +{
> +       loff_t pos = ((loff_t)pos_high << 32) | pos_low;
> +       struct file *file;
> +       ssize_t ret = -EBADF;
> +       int fput_needed;
> +
> +       if (pos < 0)
> +               return -EINVAL;
> +
> +       file = fget_light(fd, &fput_needed);
> +       if (file) {
> +               ret = -ESPIPE;
> +               if (file->f_mode & FMODE_PWRITE)
> +                       ret = vfs_writev(file, vec, vlen, &pos);
> +               fput_light(file, fput_needed);
> +       }
> +
> +       if (ret > 0)
> +               add_wchar(current, ret);
> +       inc_syscw(current);
> +       return ret;
> +}
> +
>  static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
>                           size_t count, loff_t max)
>  {
> diff --git a/include/linux/compat.h b/include/linux/compat.h
> index 3fd2194..79dba49 100644
> --- a/include/linux/compat.h
> +++ b/include/linux/compat.h
> @@ -183,6 +183,12 @@ asmlinkage ssize_t compat_sys_readv(unsigned long fd,
>                const struct compat_iovec __user *vec, unsigned long vlen);
>  asmlinkage ssize_t compat_sys_writev(unsigned long fd,
>                const struct compat_iovec __user *vec, unsigned long vlen);
> +asmlinkage ssize_t compat_sys_preadv(unsigned long fd,
> +               const struct compat_iovec __user *vec,
> +               unsigned long vlen, u32 pos_high, u32 pos_low);
> +asmlinkage ssize_t compat_sys_pwritev(unsigned long fd,
> +               const struct compat_iovec __user *vec,
> +               unsigned long vlen, u32 pos_high, u32 pos_low);
>
>  int compat_do_execve(char * filename, compat_uptr_t __user *argv,
>                compat_uptr_t __user *envp, struct pt_regs * regs);
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index 16875f8..b63e93d 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -456,6 +456,10 @@ asmlinkage long sys_pread64(unsigned int fd, char __user *buf,
>                            size_t count, loff_t pos);
>  asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf,
>                             size_t count, loff_t pos);
> +asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec,
> +                          unsigned long vlen, u32 pos_high, u32 pos_low);
> +asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec,
> +                           unsigned long vlen, u32 pos_high, u32 pos_low);
>  asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
>  asmlinkage long sys_mkdir(const char __user *pathname, int mode);
>  asmlinkage long sys_chdir(const char __user *filename);
> --
> 1.6.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-api" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>



-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
git://git.kernel.org/pub/scm/docs/man-pages/man-pages.git
man-pages online: http://www.kernel.org/doc/man-pages/online_pages.html
Found a bug? http://www.kernel.org/doc/man-pages/reporting_bugs.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v7 3/5] Add preadv and pwritev system calls.
  2009-01-26 23:42   ` Michael Kerrisk
@ 2009-01-28 15:04     ` Gerd Hoffmann
  0 siblings, 0 replies; 8+ messages in thread
From: Gerd Hoffmann @ 2009-01-28 15:04 UTC (permalink / raw)
  To: mtk.manpages; +Cc: linux-kernel, linux-arch, linux-api, aarcange

[-- Attachment #1: Type: text/plain, Size: 629 bytes --]

Michael Kerrisk wrote:
>> The application-visible interface provided by glibc should look like
>> this to be compatible to the existing implementations in the *BSD family:
>>
>>  ssize_t preadv(int d, const struct iovec *iov, int iovcnt, off_t offset);
>>  ssize_t pwritev(int d, const struct iovec *iov, int iovcnt, off_t offset);
> 
> I earlier asked if you could provide some userspace example code using
> this API.  If there was a response, I missed it.  Could you please
> provide some working test using this interface.

I had some ptrs in the patch series intro text.
Standalone test app is attached now.

cheers,
  Gerd

[-- Attachment #2: preadv.c --]
[-- Type: text/x-csrc, Size: 2670 bytes --]

#if 0
set -x
gcc -Wall -O2 -o preadv $0
exit 0
#endif
/*
 * preadv demo / test
 *
 * (c) 2008 Gerd Hoffmann <kraxel@redhat.com>
 *
 * build with "sh $thisfile"
 */

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <inttypes.h>
#include <sys/uio.h>

/* ----------------------------------------------------------------- */
/* syscall windup                                                    */

#include <sys/syscall.h>
#if 0
/* WARNING: Be sure you know what you are doing if you enable this.
 * linux syscall code isn't upstream yet, syscall numbers are subject
 * to change */
# ifndef __NR_preadv
#  ifdef __i386__
#   define __NR_preadv  333
#   define __NR_pwritev 334
#  endif
#  ifdef __x86_64__
#   define __NR_preadv  295
#   define __NR_pwritev 296
#  endif
# endif
#endif
#ifndef __NR_preadv
# error preadv/pwritev syscall numbers are unknown
#endif

static ssize_t preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
    uint32_t pos_high = (offset >> 32) & 0xffffffff;
    uint32_t pos_low  =  offset        & 0xffffffff;

    return syscall(__NR_preadv, fd, iov, iovcnt, pos_high, pos_low);
}

static ssize_t pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
    uint32_t pos_high = (offset >> 32) & 0xffffffff;
    uint32_t pos_low  =  offset        & 0xffffffff;

    return syscall(__NR_pwritev, fd, iov, iovcnt, pos_high, pos_low);
}

/* ----------------------------------------------------------------- */
/* demo/test app                                                     */

static char filename[] = "/tmp/preadv-XXXXXX";
static char outbuf[11] = "0123456789";
static char inbuf[11]  = "----------";

static struct iovec ovec[2] = {{
        .iov_base = outbuf + 5,
        .iov_len  = 5,
    },{
        .iov_base = outbuf + 0,
        .iov_len  = 5,
    }};

static struct iovec ivec[3] = {{
        .iov_base = inbuf + 6,
        .iov_len  = 2,
    },{
        .iov_base = inbuf + 4,
        .iov_len  = 2,
    },{
        .iov_base = inbuf + 2,
        .iov_len  = 2,
    }};

void cleanup(void)
{
    unlink(filename);
}

int main(int argc, char **argv)
{
    int fd, rc;

    fd = mkstemp(filename);
    if (-1 == fd) {
        perror("mkstemp");
        exit(1);
    }
    atexit(cleanup);

    /* write to file: "56789-01234" */
    rc = pwritev(fd, ovec, 2, 0);
    if (rc < 0) {
        perror("pwritev");
        exit(1);
    }

    /* read from file: "78-90-12" */
    rc = preadv(fd, ivec, 3, 2);
    if (rc < 0) {
        perror("preadv");
        exit(1);
    }

    printf("result  : %s\n", inbuf);
    printf("expected: %s\n", "--129078--");
    exit(0);
}

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2009-01-28 15:05 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-01-26 13:26 [PATCH v7 0/5] Add preadv & pwritev system calls Gerd Hoffmann
2009-01-26 13:26 ` [PATCH v7 1/5] create compat_readv() Gerd Hoffmann
2009-01-26 13:26 ` [PATCH v7 2/5] create compat_writev() Gerd Hoffmann
2009-01-26 13:26 ` [PATCH v7 3/5] Add preadv and pwritev system calls Gerd Hoffmann
2009-01-26 23:42   ` Michael Kerrisk
2009-01-28 15:04     ` Gerd Hoffmann
2009-01-26 13:26 ` [PATCH v7 4/5] MIPS: Add preadv(2) and pwritev(2) syscalls Gerd Hoffmann
2009-01-26 13:26 ` [PATCH v7 5/5] switch compat readv/preadv/writev/pwritev from fget to fget_light Gerd Hoffmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).