linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH 1/3] readfile: implement readfile syscall
@ 2020-07-04 20:50 Alexey Dobriyan
  2020-07-04 21:15 ` Miklos Szeredi
  0 siblings, 1 reply; 8+ messages in thread
From: Alexey Dobriyan @ 2020-07-04 20:50 UTC (permalink / raw)
  To: viro; +Cc: linux-kernel, gregkh, geert, willy, miklos, linux-fsdevel

Al wrote:

> > On Sat, Jul 04, 2020 at 09:41:09PM +0200, Miklos Szeredi wrote:
> >  1) just leave the first explanation (it's an open + read + close
> > equivalent) and leave out the rest
> >
> >  2) add a loop around the vfs_read() in the code.
> 
> 3) don't bother with the entire thing, until somebody manages to demonstrate
> a setup where it does make a real difference (compared to than the obvious
> sequence of syscalls, that is).

Ehh? System call overead is trivially measurable.
https://lwn.net/Articles/814175/

> At which point we'll need to figure out
> what's going on and deal with the underlying problem of that setup.

Run top?

Teach userspace to read 1 page minimum?

	192803 read(4, "cpu  3718263 4417 342808 7127674"..., 1024) = 1024
	192803 read(4, " 0 21217 21617 21954 10201 15425"..., 1024) = 1024
	192803 read(4, " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0"..., 1024) = 1024
	192803 read(4, "", 1024)

Teach userspace to pread?

	192803 openat(AT_FDCWD, "/proc/uptime", O_RDONLY) = 5
	192803 lseek(5, 0, SEEK_SET)            = 0
	192803 read(5, "47198.56 713699.82\n", 8191) = 19

Rhetorical question: what is harder: ditch the main source of overhead
(VFS, seq_file, text) or teach userspace how to read files?

Here is open+read /proc/cpuinfo in python2 and python3.
Python2 case is terrifying.

BTW is there is something broken with seqfiles and record keeping?
Why does it return only 2 records per read?

Python 3:

openat(AT_FDCWD, "/proc/cpuinfo", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
ioctl(3, TCGETS, 0x7ffe6f1f0850) = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR)            = 0
ioctl(3, TCGETS, 0x7ffe6f1f0710) = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR)            = 0
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(3, "processor\t: 0\nvendor_id\t: Genuin"..., 8192) = 3038
read(3, "processor\t: 2\nvendor_id\t: Genuin"..., 5154) = 3038
read(3, "processor\t: 4\nvendor_id\t: Genuin"..., 2116) = 2116
read(3, "clmulqdq dtes64 monitor ds_cpl v"..., 8448) = 3966
read(3, "processor\t: 8\nvendor_id\t: Genuin"..., 4482) = 3038
read(3, "processor\t: 10\nvendor_id\t: Genui"..., 1444) = 1444
read(3, "t\t: 64\naddress sizes\t: 46 bits p"..., 16896) = 3116
read(3, "processor\t: 13\nvendor_id\t: Genui"..., 13780) = 3044
read(3, "processor\t: 15\nvendor_id\t: Genui"..., 10736) = 1522
read(3, "", 9214)                = 0


Python 2

openat(AT_FDCWD, "/proc/cpuinfo", O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 0
lseek(3, 0, SEEK_CUR)            = 0
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(3, "processor\t: 0\nvendor_id\t: Genuin"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 1024
lseek(3, 0, SEEK_CUR)            = 1024
read(3, " cqm_occup_llc cqm_mbm_total cqm"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 2048
lseek(3, 0, SEEK_CUR)            = 2048
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 2048
lseek(3, 0, SEEK_CUR)            = 2048
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 2048
lseek(3, 0, SEEK_CUR)            = 2048
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 2048
lseek(3, 0, SEEK_CUR)            = 2048
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 2048
lseek(3, 0, SEEK_CUR)            = 2048
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 2048
lseek(3, 0, SEEK_CUR)            = 2048
read(3, "ebs bts rep_good nopl xtopology "..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 3072
lseek(3, 0, SEEK_CUR)            = 3072
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 3072
lseek(3, 0, SEEK_CUR)            = 3072
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 3072
lseek(3, 0, SEEK_CUR)            = 3072
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 3072
lseek(3, 0, SEEK_CUR)            = 3072
read(3, "ntel\ncpu family\t: 6\nmodel\t\t: 79\n"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 4096
lseek(3, 0, SEEK_CUR)            = 4096
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 4096
lseek(3, 0, SEEK_CUR)            = 4096
read(3, "bm_local dtherm ida arat pln pts"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 5120
lseek(3, 0, SEEK_CUR)            = 5120
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 5120
lseek(3, 0, SEEK_CUR)            = 5120
read(3, "nstop_tsc cpuid aperfmperf pni p"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 6144
lseek(3, 0, SEEK_CUR)            = 6144
read(3, "del name\t: Intel(R) Xeon(R) CPU "..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 7168
lseek(3, 0, SEEK_CUR)            = 7168
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 7168
lseek(3, 0, SEEK_CUR)            = 7168
read(3, "d_clear flush_l1d\nvmx flags\t: vn"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 8192
lseek(3, 0, SEEK_CUR)            = 8192
read(3, "clmulqdq dtes64 monitor ds_cpl v"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 9216
lseek(3, 0, SEEK_CUR)            = 9216
read(3, "E5-2620 v4 @ 2.10GHz\nstepping\t: "..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 10240
lseek(3, 0, SEEK_CUR)            = 10240
read(3, "vnmi preemption_timer posted_int"..., 1024) = 1024
read(3, " vmx smx est tm2 ssse3 sdbg fma "..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 12288
lseek(3, 0, SEEK_CUR)            = 12288
read(3, ": 1\nmicrocode\t: 0xb000038\ncpu MH"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 13312
lseek(3, 0, SEEK_CUR)            = 13312
read(3, "r invvpid ept_x_only ept_ad ept_"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 14336
lseek(3, 0, SEEK_CUR)            = 14336
read(3, "16 xtpr pdcm pcid dca sse4_1 sse"..., 1024) = 1024
read(3, "\t\t: 1326.352\ncache size\t: 20480 "..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 16384
lseek(3, 0, SEEK_CUR)            = 16384
read(3, "gb flexpriority apicv tsc_offset"..., 1024) = 1024
read(3, "4_2 x2apic movbe popcnt tsc_dead"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 18432
lseek(3, 0, SEEK_CUR)            = 18432
read(3, "KB\nphysical id\t: 0\nsiblings\t: 16"..., 1024) = 1024
read(3, " vtpr mtf vapic ept vpid unrestr"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 20480
lseek(3, 0, SEEK_CUR)            = 20480
read(3, "adline_timer aes xsave avx f16c "..., 2048) = 2048
read(3, "estricted_guest vapic_reg vid pl"..., 1024) = 1024
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
lseek(3, 0, SEEK_CUR)            = 23552
lseek(3, 0, SEEK_CUR)            = 23552
read(3, "16c rdrand lahf_lm abm 3dnowpref"..., 2048) = 770
read(3, "", 1024)                = 0
close(3)                         = 0

^ permalink raw reply	[flat|nested] 8+ messages in thread
* [PATCH 0/3] readfile(2): a new syscall to make open/read/close faster
@ 2020-07-04 14:02 Greg Kroah-Hartman
  2020-07-04 14:02 ` [PATCH 1/3] readfile: implement readfile syscall Greg Kroah-Hartman
  0 siblings, 1 reply; 8+ messages in thread
From: Greg Kroah-Hartman @ 2020-07-04 14:02 UTC (permalink / raw)
  To: viro, mtk.manpages, shuah, linux-api
  Cc: linux-fsdevel, linux-kernel, linux-man, linux-kselftest,
	Greg Kroah-Hartman

Here is a tiny new syscall, readfile, that makes it simpler to read
small/medium sized files all in one shot, no need to do open/read/close.
This is especially helpful for tools that poke around in procfs or
sysfs, making a little bit of a less system load than before, especially
as syscall overheads go up over time due to various CPU bugs being
addressed.

There are 4 patches in this series, the first 3 are against the kernel
tree, adding the syscall logic, wiring up the syscall, and adding some
tests for it.

The last patch is agains the man-pages project, adding a tiny man page
to try to describe the new syscall.

Greg Kroah-Hartman (3):
  readfile: implement readfile syscall
  arch: wire up the readfile syscall
  selftests: add readfile(2) selftests

 arch/alpha/kernel/syscalls/syscall.tbl        |   1 +
 arch/arm/tools/syscall.tbl                    |   1 +
 arch/arm64/include/asm/unistd.h               |   2 +-
 arch/arm64/include/asm/unistd32.h             |   2 +
 arch/ia64/kernel/syscalls/syscall.tbl         |   1 +
 arch/m68k/kernel/syscalls/syscall.tbl         |   1 +
 arch/microblaze/kernel/syscalls/syscall.tbl   |   1 +
 arch/mips/kernel/syscalls/syscall_n32.tbl     |   1 +
 arch/mips/kernel/syscalls/syscall_n64.tbl     |   1 +
 arch/mips/kernel/syscalls/syscall_o32.tbl     |   1 +
 arch/parisc/kernel/syscalls/syscall.tbl       |   1 +
 arch/powerpc/kernel/syscalls/syscall.tbl      |   1 +
 arch/s390/kernel/syscalls/syscall.tbl         |   1 +
 arch/sh/kernel/syscalls/syscall.tbl           |   1 +
 arch/sparc/kernel/syscalls/syscall.tbl        |   1 +
 arch/x86/entry/syscalls/syscall_32.tbl        |   1 +
 arch/x86/entry/syscalls/syscall_64.tbl        |   1 +
 arch/xtensa/kernel/syscalls/syscall.tbl       |   1 +
 fs/open.c                                     |  50 +++
 include/linux/syscalls.h                      |   2 +
 include/uapi/asm-generic/unistd.h             |   4 +-
 tools/testing/selftests/Makefile              |   1 +
 tools/testing/selftests/readfile/.gitignore   |   3 +
 tools/testing/selftests/readfile/Makefile     |   7 +
 tools/testing/selftests/readfile/readfile.c   | 285 +++++++++++++++++
 .../selftests/readfile/readfile_speed.c       | 301 ++++++++++++++++++
 26 files changed, 671 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/readfile/.gitignore
 create mode 100644 tools/testing/selftests/readfile/Makefile
 create mode 100644 tools/testing/selftests/readfile/readfile.c
 create mode 100644 tools/testing/selftests/readfile/readfile_speed.c

-- 
2.27.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-07-04 21:15 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-04 20:50 [PATCH 1/3] readfile: implement readfile syscall Alexey Dobriyan
2020-07-04 21:15 ` Miklos Szeredi
  -- strict thread matches above, loose matches on Subject: below --
2020-07-04 14:02 [PATCH 0/3] readfile(2): a new syscall to make open/read/close faster Greg Kroah-Hartman
2020-07-04 14:02 ` [PATCH 1/3] readfile: implement readfile syscall Greg Kroah-Hartman
2020-07-04 18:35   ` Geert Uytterhoeven
2020-07-04 19:14   ` Matthew Wilcox
2020-07-04 19:41   ` Miklos Szeredi
2020-07-04 20:12     ` Al Viro
2020-07-04 20:16       ` Al Viro

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).