* [git pull] VFS patches, the first series @ 2008-07-27 1:22 Al Viro 2008-08-21 7:40 ` Eric W. Biederman 0 siblings, 1 reply; 7+ messages in thread From: Al Viro @ 2008-07-27 1:22 UTC (permalink / raw) To: Linus Torvalds; +Cc: linux-kernel The first part of huge pile. Mostly it's untangling nameidata handling, digging towards the pieces that kill intents and cleaning pathname resolution in general. ->permission() sanitizing and sysctl procfs treatment rewrite needed for it. A bunch of descriptor handling fixes. Plus part of assorted patched from the last cycle sent by other folks. A _lot_ more is still pending; this is what I'd managed to pull into a series by this point. Please, pull from git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6.git/ for-linus Shortlog: Al Viro (24): beginning of sysctl cleanup - ctl_table_set allow delayed freeing of ctl_table_header sysctl: make sure that /proc/sys/net/ipv4 appears before per-ns ones sysctl: keep track of tree relationships sanitize proc_sysctl sanitize ->permission() prototype permission checks for chdir need special treatment only on the last step kill altroot fix MAY_CHDIR/MAY_ACCESS/LOOKUP_ACCESS mess pass MAY_OPEN to vfs_permission() explicitly more nameidata removal: exec_permission_lite() doesn't need it take noexec checks to very few callers that care kill nameidata passing to permission(), rename to inode_permission() preparation to __user_walk_fd cleanup sanitize __user_walk_fd() et.al. new (local) helper: user_path_parent() don't pass nameidata to gfs2_lookupi() don't pass nameidata to __ncp_lookup_validate() f_count may wrap around get rid of __user_path_lookup_open get rid of indirect users of namei.h remove remaining namei_{32,64}.h crap get rid of corner case in dup3() entirely fix RLIM_NOFILE handling Christoph Hellwig (1): Re: [PATCH 3/6] vfs: open_exec cleanup Denys Vlasenko (1): reuse xxx_fifo_fops for xxx_pipe_fops Li Zefan (1): vfs: use kstrdup() and check failing allocation Miklos Szeredi (10): [patch] vfs: fix lookup on deleted directory [patch] hppfs: remove hppfs_permission [patch 05/14] hpfs: dont call permission() [patch 1/5] vfs: truncate: dont check immutable twice [patch 3/5] vfs: change remove_suid() to file_remove_suid() [patch 5/5] vfs: remove mode parameter from vfs_symlink() [patch 1/4] vfs: utimes: move owner check into inode_change_ok() [patch 2/4] vfs: utimes cleanup [patch 3/4] fat: dont call notify_change [patch 4/4] vfs: immutable inode checking cleanup Tetsuo Handa (1): [patch 4/5] vfs: reuse local variable in vfs_link() Ulrich Drepper (1): dup3 fix Diffstat: arch/alpha/kernel/osf_sys.c | 10 +- arch/parisc/hpux/sys_hpux.c | 10 +- drivers/net/ppp_generic.c | 6 +- fs/affs/file.c | 4 - fs/afs/internal.h | 4 +- fs/afs/security.c | 2 +- fs/aio.c | 6 +- fs/attr.c | 7 +- fs/bad_inode.c | 3 +- fs/cifs/cifsfs.c | 2 +- fs/coda/dir.c | 4 +- fs/coda/pioctl.c | 20 +- fs/compat.c | 20 +- fs/ecryptfs/inode.c | 21 +-- fs/exec.c | 81 +++++--- fs/ext2/acl.c | 2 +- fs/ext2/acl.h | 2 +- fs/ext3/acl.c | 2 +- fs/ext3/acl.h | 2 +- fs/ext4/acl.c | 2 +- fs/ext4/acl.h | 2 +- fs/fat/file.c | 15 ++- fs/fcntl.c | 33 ++-- fs/fifo.c | 8 +- fs/file.c | 9 + fs/file_table.c | 10 +- fs/fuse/dir.c | 6 +- fs/fuse/file.c | 2 +- fs/gfs2/inode.c | 6 +- fs/gfs2/inode.h | 2 +- fs/gfs2/ops_export.c | 2 +- fs/gfs2/ops_inode.c | 16 +- fs/gfs2/super.c | 2 +- fs/hfs/inode.c | 7 +- fs/hfsplus/inode.c | 6 +- fs/hostfs/hostfs_kern.c | 2 +- fs/hpfs/namei.c | 2 +- fs/hppfs/hppfs.c | 7 - fs/inotify_user.c | 22 +- fs/jffs2/acl.c | 2 +- fs/jffs2/acl.h | 2 +- fs/jfs/acl.c | 2 +- fs/jfs/jfs_acl.h | 2 +- fs/namei.c | 354 +++++++++++---------------------- fs/namespace.c | 106 +++++------ fs/ncpfs/dir.c | 4 +- fs/nfs/dir.c | 11 +- fs/nfsd/nfsctl.c | 1 + fs/nfsd/nfsfh.c | 2 +- fs/nfsd/vfs.c | 14 +- fs/ntfs/file.c | 2 +- fs/ocfs2/file.c | 2 +- fs/ocfs2/file.h | 3 +- fs/open.c | 179 ++++++++--------- fs/pipe.c | 51 +---- fs/proc/base.c | 3 +- fs/proc/inode.c | 5 + fs/proc/proc_sysctl.c | 429 ++++++++++++++++++---------------------- fs/reiserfs/xattr.c | 2 +- fs/smbfs/file.c | 4 +- fs/splice.c | 4 +- fs/stat.c | 32 ++-- fs/ubifs/file.c | 1 + fs/utimes.c | 139 +++++++------- fs/xattr.c | 98 +++++----- fs/xfs/linux-2.6/xfs_ioctl.c | 14 +- fs/xfs/linux-2.6/xfs_iops.c | 3 +- fs/xfs/linux-2.6/xfs_lrw.c | 2 +- include/asm-alpha/namei.h | 17 -- include/asm-arm/namei.h | 25 --- include/asm-avr32/namei.h | 7 - include/asm-blackfin/namei.h | 19 -- include/asm-cris/namei.h | 17 -- include/asm-frv/namei.h | 18 -- include/asm-h8300/namei.h | 17 -- include/asm-ia64/namei.h | 25 --- include/asm-m32r/namei.h | 17 -- include/asm-m68k/namei.h | 17 -- include/asm-m68knommu/namei.h | 1 - include/asm-mips/namei.h | 11 - include/asm-mn10300/namei.h | 22 -- include/asm-parisc/namei.h | 17 -- include/asm-powerpc/namei.h | 20 -- include/asm-s390/namei.h | 21 -- include/asm-sh/namei.h | 17 -- include/asm-sparc/namei.h | 8 - include/asm-sparc/namei_32.h | 13 -- include/asm-sparc/namei_64.h | 13 -- include/asm-sparc64/namei.h | 1 - include/asm-um/namei.h | 6 - include/asm-v850/namei.h | 17 -- include/asm-x86/namei.h | 11 - include/asm-xtensa/namei.h | 26 --- include/linux/coda_linux.h | 2 +- include/linux/fs.h | 57 +++--- include/linux/fs_struct.h | 3 +- include/linux/mount.h | 2 +- include/linux/namei.h | 19 +- include/linux/nfs_fs.h | 3 +- include/linux/proc_fs.h | 5 + include/linux/reiserfs_xattr.h | 2 +- include/linux/security.h | 7 +- include/linux/shmem_fs.h | 2 +- include/linux/sysctl.h | 25 +++- include/net/af_unix.h | 2 +- include/net/ip.h | 2 + include/net/net_namespace.h | 4 +- ipc/mqueue.c | 2 +- kernel/cgroup.c | 1 + kernel/exec_domain.c | 1 - kernel/exit.c | 2 - kernel/fork.c | 7 - kernel/sysctl.c | 166 ++++++++++++++-- mm/filemap.c | 7 +- mm/filemap_xip.c | 2 +- mm/shmem_acl.c | 2 +- net/ipv4/af_inet.c | 4 + net/ipv4/sysctl_net_ipv4.c | 7 + net/sched/sch_atm.c | 4 +- net/sysctl_net.c | 22 +-- net/unix/af_unix.c | 2 +- net/unix/garbage.c | 18 +- security/capability.c | 3 +- security/security.c | 5 +- security/selinux/hooks.c | 5 +- security/smack/smack_lsm.c | 3 +- 126 files changed, 1088 insertions(+), 1535 deletions(-) delete mode 100644 include/asm-alpha/namei.h delete mode 100644 include/asm-arm/namei.h delete mode 100644 include/asm-avr32/namei.h delete mode 100644 include/asm-blackfin/namei.h delete mode 100644 include/asm-cris/namei.h delete mode 100644 include/asm-frv/namei.h delete mode 100644 include/asm-h8300/namei.h delete mode 100644 include/asm-ia64/namei.h delete mode 100644 include/asm-m32r/namei.h delete mode 100644 include/asm-m68k/namei.h delete mode 100644 include/asm-m68knommu/namei.h delete mode 100644 include/asm-mips/namei.h delete mode 100644 include/asm-mn10300/namei.h delete mode 100644 include/asm-parisc/namei.h delete mode 100644 include/asm-powerpc/namei.h delete mode 100644 include/asm-s390/namei.h delete mode 100644 include/asm-sh/namei.h delete mode 100644 include/asm-sparc/namei.h delete mode 100644 include/asm-sparc/namei_32.h delete mode 100644 include/asm-sparc/namei_64.h delete mode 100644 include/asm-sparc64/namei.h delete mode 100644 include/asm-um/namei.h delete mode 100644 include/asm-v850/namei.h delete mode 100644 include/asm-x86/namei.h delete mode 100644 include/asm-xtensa/namei.h ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [git pull] VFS patches, the first series 2008-07-27 1:22 [git pull] VFS patches, the first series Al Viro @ 2008-08-21 7:40 ` Eric W. Biederman 2008-08-21 17:14 ` Eric W. Biederman 0 siblings, 1 reply; 7+ messages in thread From: Eric W. Biederman @ 2008-08-21 7:40 UTC (permalink / raw) To: Al Viro; +Cc: Linus Torvalds, linux-kernel Al Viro <viro@ZenIV.linux.org.uk> writes: > The first part of huge pile. Mostly it's untangling nameidata handling, > digging towards the pieces that kill intents and cleaning pathname > resolution in general. ->permission() sanitizing and sysctl procfs > treatment rewrite needed for it. A bunch of descriptor handling fixes. > Plus part of assorted patched from the last cycle sent by other folks. > A _lot_ more is still pending; this is what I'd managed to pull into > a series by this point. Please, pull from > git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6.git/ for-linus Al a quick heads up. In testing movement of network devices between namespaces I hit the recently added WARN_ON in unregister_sysctl_table. More tomorrow when I get a after I have dug into this farther. Eric ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [git pull] VFS patches, the first series 2008-08-21 7:40 ` Eric W. Biederman @ 2008-08-21 17:14 ` Eric W. Biederman 2008-08-22 0:08 ` Eric W. Biederman 0 siblings, 1 reply; 7+ messages in thread From: Eric W. Biederman @ 2008-08-21 17:14 UTC (permalink / raw) To: Al Viro; +Cc: Linus Torvalds, linux-kernel ebiederm@xmission.com (Eric W. Biederman) writes: > Al Viro <viro@ZenIV.linux.org.uk> writes: > >> The first part of huge pile. Mostly it's untangling nameidata handling, >> digging towards the pieces that kill intents and cleaning pathname >> resolution in general. ->permission() sanitizing and sysctl procfs >> treatment rewrite needed for it. A bunch of descriptor handling fixes. >> Plus part of assorted patched from the last cycle sent by other folks. >> A _lot_ more is still pending; this is what I'd managed to pull into >> a series by this point. Please, pull from >> git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6.git/ for-linus > > Al a quick heads up. In testing movement of network devices between > namespaces I hit the recently added WARN_ON in unregister_sysctl_table. It seems to be some new oddness when destroying a network namespace. If I don't have network devices to push out of the network namespace when I clean it up nothing happens. When I do I get this nice beautiful backtrace. I will dig into the sysctl code in a bit and see if I can understand why this is happening. ------------[ cut here ]------------ WARNING: at /home/eric/projects/linux/linux-2.6-arastra-ns/kernel/sysctl.c:1929 unregister_sysctl_table+0xb5/0x e5() Modules linked in: Pid: 22, comm: netns Tainted: G W 2.6.27-rc3x86_64 #48 Call Trace: [<ffffffff802361da>] warn_on_slowpath+0x51/0x77 [<ffffffff8023c631>] unregister_sysctl_table+0x34/0xe5 [<ffffffff8023c6b2>] unregister_sysctl_table+0xb5/0xe5 [<ffffffff80523f8c>] neigh_sysctl_unregister+0x1a/0x31 [<ffffffff80559635>] inetdev_event+0x2b4/0x3d1 [<ffffffff8024b850>] notifier_call_chain+0x29/0x56 [<ffffffff8051fa2a>] dev_change_net_namespace+0x1bb/0x1da [<ffffffff8051fa9d>] default_device_exit+0x54/0xa2 [<ffffffff8052105e>] netdev_run_todo+0x1fd/0x206 [<ffffffff8051ce0b>] cleanup_net+0x0/0x95 [<ffffffff8051ce6f>] cleanup_net+0x64/0x95 [<ffffffff80244e58>] run_workqueue+0xf1/0x1ee [<ffffffff80244e02>] run_workqueue+0x9b/0x1ee [<ffffffff802459ee>] worker_thread+0xd8/0xe3 [<ffffffff80248426>] autoremove_wake_function+0x0/0x2e [<ffffffff80245916>] worker_thread+0x0/0xe3 [<ffffffff80248311>] kthread+0x47/0x76 [<ffffffff805c7529>] trace_hardirqs_on_thunk+0x3a/0x3f [<ffffffff8020cdc9>] child_rip+0xa/0x11 [<ffffffff8020c3ff>] restore_args+0x0/0x30 [<ffffffff80230bcd>] finish_task_switch+0x0/0xc4 [<ffffffff802482ca>] kthread+0x0/0x76 [<ffffffff8020cdbf>] child_rip+0x0/0x11 ---[ end trace f9cc56de378eb3ce ]--- ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [git pull] VFS patches, the first series 2008-08-21 17:14 ` Eric W. Biederman @ 2008-08-22 0:08 ` Eric W. Biederman 2008-08-23 3:33 ` Al Viro 2008-08-23 7:24 ` Al Viro 0 siblings, 2 replies; 7+ messages in thread From: Eric W. Biederman @ 2008-08-22 0:08 UTC (permalink / raw) To: Al Viro; +Cc: Linus Torvalds, linux-kernel, netdev, Denis V. Lunev ebiederm@xmission.com (Eric W. Biederman) writes: > ebiederm@xmission.com (Eric W. Biederman) writes: > >> Al Viro <viro@ZenIV.linux.org.uk> writes: >> >>> The first part of huge pile. Mostly it's untangling nameidata handling, >>> digging towards the pieces that kill intents and cleaning pathname >>> resolution in general. ->permission() sanitizing and sysctl procfs >>> treatment rewrite needed for it. A bunch of descriptor handling fixes. >>> Plus part of assorted patched from the last cycle sent by other folks. >>> A _lot_ more is still pending; this is what I'd managed to pull into >>> a series by this point. Please, pull from >>> git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6.git/ for-linus >> >> Al a quick heads up. In testing movement of network devices between >> namespaces I hit the recently added WARN_ON in unregister_sysctl_table. > > It seems to be some new oddness when destroying a network namespace. > > If I don't have network devices to push out of the network namespace > when I clean it up nothing happens. When I do I get this nice beautiful > backtrace. > > I will dig into the sysctl code in a bit and see if I can understand > why this is happening. Ok. The situation is now clear. /proc/sys/net/ipv4/neigh/default does not currently exist in network namespaces. This looks like an oversight. In my network namespace I have the interfaces lo, sit, veth0 We have the result that /proc/sys/net/ipv4/neigh/veth0 has /proc/sys/net/ipv4/neigh/lo. lo gets unregistered before veth0 when we bring the network namespaces down. Then when veth gets unregistered we have a problem. I'm not certain what to do about it. The semantics of the how the sysctl tables are access have changed significantly. Now the first sysctl table to describe a directory must remain until there are no other tables that have entries in that directory and a sysctl table must have a pure path of directories for any portion of the address space it shares with an earlier sysctl table. This is noticeably different from the union mount semantics we have had previously for the sysctl tables. If it doesn't look to bad to maintain the new semantics it looks like the right thing to do is to add some additional checks so we get more precise warnings (who knows what out of tree sysctl code will do) and to find someplace I can insert a net/ipv4/neigh sysctl directory into (ipv4_net_table looks like it will work) to keep the network namespace code working safely. Al btw nice trick using compare to keep the dentries separate allowing us to cache everything in /proc. I feel silly for missing that one. Want to get together in the next couple of weeks and build a tree that updates the sysctls infrastructure to suck less? Eric > ------------[ cut here ]------------ > WARNING: at /home/eric/projects/linux/linux-2.6-arastra-ns/kernel/sysctl.c:1929 > unregister_sysctl_table+0xb5/0x > e5() > Modules linked in: > Pid: 22, comm: netns Tainted: G W 2.6.27-rc3x86_64 #48 > > Call Trace: > [<ffffffff802361da>] warn_on_slowpath+0x51/0x77 > [<ffffffff8023c631>] unregister_sysctl_table+0x34/0xe5 > [<ffffffff8023c6b2>] unregister_sysctl_table+0xb5/0xe5 > [<ffffffff80523f8c>] neigh_sysctl_unregister+0x1a/0x31 > [<ffffffff80559635>] inetdev_event+0x2b4/0x3d1 > [<ffffffff8024b850>] notifier_call_chain+0x29/0x56 > [<ffffffff8051fa2a>] dev_change_net_namespace+0x1bb/0x1da > [<ffffffff8051fa9d>] default_device_exit+0x54/0xa2 > [<ffffffff8052105e>] netdev_run_todo+0x1fd/0x206 > [<ffffffff8051ce0b>] cleanup_net+0x0/0x95 > [<ffffffff8051ce6f>] cleanup_net+0x64/0x95 > [<ffffffff80244e58>] run_workqueue+0xf1/0x1ee > [<ffffffff80244e02>] run_workqueue+0x9b/0x1ee > [<ffffffff802459ee>] worker_thread+0xd8/0xe3 > [<ffffffff80248426>] autoremove_wake_function+0x0/0x2e > [<ffffffff80245916>] worker_thread+0x0/0xe3 > [<ffffffff80248311>] kthread+0x47/0x76 > [<ffffffff805c7529>] trace_hardirqs_on_thunk+0x3a/0x3f > [<ffffffff8020cdc9>] child_rip+0xa/0x11 > [<ffffffff8020c3ff>] restore_args+0x0/0x30 > [<ffffffff80230bcd>] finish_task_switch+0x0/0xc4 > [<ffffffff802482ca>] kthread+0x0/0x76 > [<ffffffff8020cdbf>] child_rip+0x0/0x11 > > ---[ end trace f9cc56de378eb3ce ]--- ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [git pull] VFS patches, the first series 2008-08-22 0:08 ` Eric W. Biederman @ 2008-08-23 3:33 ` Al Viro 2008-08-23 5:22 ` Eric W. Biederman 2008-08-23 7:24 ` Al Viro 1 sibling, 1 reply; 7+ messages in thread From: Al Viro @ 2008-08-23 3:33 UTC (permalink / raw) To: Eric W. Biederman; +Cc: Linus Torvalds, linux-kernel, netdev, Denis V. Lunev On Thu, Aug 21, 2008 at 05:08:25PM -0700, Eric W. Biederman wrote: > I'm not certain what to do about it. The semantics of the how the > sysctl tables are access have changed significantly. Now the first > sysctl table to describe a directory must remain until there are no > other tables that have entries in that directory and a sysctl table > must have a pure path of directories for any portion of the address > space it shares with an earlier sysctl table. This is noticeably > different from the union mount semantics we have had previously for > the sysctl tables. Note that the old semantics had a lovely inherent problem (leaving aside the utterly insane amount of walking and re-walking the trees, as you've found out the hard way - don't tell me you hadn't cursed it when writing the previous version of proc_sysctl.c): there's redundancy between the trees. At the very least, just what are we supposed to get when the stems do not match each other - either in permissions or in ctl_name? > If it doesn't look to bad to maintain the new semantics it looks like > the right thing to do is to add some additional checks so we get more > precise warnings (who knows what out of tree sysctl code will do) and > to find someplace I can insert a net/ipv4/neigh sysctl directory into > (ipv4_net_table looks like it will work) to keep the network namespace > code working safely. > > Al btw nice trick using compare to keep the dentries separate allowing > us to cache everything in /proc. I feel silly for missing that one. > Want to get together in the next couple of weeks and build a tree that > updates the sysctls infrastructure to suck less? Fine by me... BTW, fixing that particular crap is not hard - you need to have the entry in question show up before either interface, that's all. I missed that part of ordering mess, to be honest. I'll look into that, hopefully will post the fix later tonight. FWIW, I'd very much prefer ->d_compare() trick to the horrors you guys are doing around sysfs; it might or might not be feasible depending on what visibility rules you end up with there, but if it's feasible at all I'd rather go for it and avoid the entire 'separate backing store' mess. IIRC, I had described that scheme to you quite a few months ago in sysfs context; got no response back then... ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [git pull] VFS patches, the first series 2008-08-23 3:33 ` Al Viro @ 2008-08-23 5:22 ` Eric W. Biederman 0 siblings, 0 replies; 7+ messages in thread From: Eric W. Biederman @ 2008-08-23 5:22 UTC (permalink / raw) To: Al Viro; +Cc: Linus Torvalds, linux-kernel, netdev, Denis V. Lunev Al Viro <viro@ZenIV.linux.org.uk> writes: > On Thu, Aug 21, 2008 at 05:08:25PM -0700, Eric W. Biederman wrote: > >> I'm not certain what to do about it. The semantics of the how the >> sysctl tables are access have changed significantly. Now the first >> sysctl table to describe a directory must remain until there are no >> other tables that have entries in that directory and a sysctl table >> must have a pure path of directories for any portion of the address >> space it shares with an earlier sysctl table. This is noticeably >> different from the union mount semantics we have had previously for >> the sysctl tables. > > Note that the old semantics had a lovely inherent problem (leaving aside > the utterly insane amount of walking and re-walking the trees, as you've > found out the hard way - don't tell me you hadn't cursed it when writing > the previous version of proc_sysctl.c): there's redundancy between the > trees. At the very least, just what are we supposed to get when the > stems do not match each other - either in permissions or in ctl_name? That case is simple. We never allowed overlapping leaves, and all of the directories had essentially the same permissions. Beyond that I added checks in sysctl_check to make certain we are never out of sync. As for the walking and rewalking I was never fond of it but it was simple and worked. So far I am not a fan of the new semantics. >> If it doesn't look to bad to maintain the new semantics it looks like >> the right thing to do is to add some additional checks so we get more >> precise warnings (who knows what out of tree sysctl code will do) and >> to find someplace I can insert a net/ipv4/neigh sysctl directory into >> (ipv4_net_table looks like it will work) to keep the network namespace >> code working safely. >> >> Al btw nice trick using compare to keep the dentries separate allowing >> us to cache everything in /proc. I feel silly for missing that one. >> Want to get together in the next couple of weeks and build a tree that >> updates the sysctls infrastructure to suck less? > > Fine by me... BTW, fixing that particular crap is not hard - you need > to have the entry in question show up before either interface, that's all. > I missed that part of ordering mess, to be honest. I'll look into that, > hopefully will post the fix later tonight. Thanks for looking. The ordering problem is self inflicted as you introduced an ordering constraint where none existed previously, and it seems unnecessary. I'm currently tearing my hair out trying to think of a reasonable way to audit the current sysctl usage to see if there is anything else that was missed. > FWIW, I'd very much prefer ->d_compare() trick to the horrors you guys > are doing around sysfs; it might or might not be feasible depending on > what visibility rules you end up with there, but if it's feasible at all > I'd rather go for it and avoid the entire 'separate backing store' mess. > IIRC, I had described that scheme to you quite a few months ago in sysfs > context; got no response back then... Weird. I must have missed seeing it, as I don't have any recollection of it. There are two pieces of the problem. - How do we get a dentry tree that the vfs won't gag on. Without knowing how to successfully implement the dcompare trick it required 2 dentry trees. - Monitoring. It is desirable to be able to mount the filesystem such that someone outside the namespace can get a view of what the folks inside the namespace see. Roughly like is done with /proc/net today. Neither of those two cases requires multiple dentry trees and the tagged sysfs dirents can easily support an operation like is_seen. I don't think the dcompare trick is general enough to support discriminating on something besides the current process. Which leads to problems with monitoring. Eric ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [git pull] VFS patches, the first series 2008-08-22 0:08 ` Eric W. Biederman 2008-08-23 3:33 ` Al Viro @ 2008-08-23 7:24 ` Al Viro 1 sibling, 0 replies; 7+ messages in thread From: Al Viro @ 2008-08-23 7:24 UTC (permalink / raw) To: Eric W. Biederman; +Cc: Linus Torvalds, linux-kernel, netdev, Denis V. Lunev On Thu, Aug 21, 2008 at 05:08:25PM -0700, Eric W. Biederman wrote: > Ok. The situation is now clear. > > /proc/sys/net/ipv4/neigh/default does not currently exist in network > namespaces. This looks like an oversight. > > In my network namespace I have the interfaces lo, sit, veth0 > > We have the result that /proc/sys/net/ipv4/neigh/veth0 has /proc/sys/net/ipv4/neigh/lo. > > lo gets unregistered before veth0 when we bring the network namespaces down. > Then when veth gets unregistered we have a problem. > > I'm not certain what to do about it. diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 16fc6f4..d3c156e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3054,14 +3054,23 @@ static ctl_table ipv4_route_table[] = { { .ctl_name = 0 } }; -static __net_initdata struct ctl_path ipv4_route_path[] = { +static struct ctl_table empty[1]; + +static struct ctl_table ipv4_skeleton[] = +{ + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, + .child = ipv4_route_table}, + { .procname = "neigh", .ctl_name = NET_IPV4_NEIGH, + .child = empty}, + { } +}; + +static __net_initdata struct ctl_path ipv4_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "ipv4", .ctl_name = NET_IPV4, }, - { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, { }, }; - static struct ctl_table ipv4_route_flush_table[] = { { .ctl_name = NET_IPV4_ROUTE_FLUSH, @@ -3074,6 +3083,13 @@ static struct ctl_table ipv4_route_flush_table[] = { { .ctl_name = 0 }, }; +static __net_initdata struct ctl_path ipv4_route_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, + { }, +}; + static __net_init int sysctl_route_net_init(struct net *net) { struct ctl_table *tbl; @@ -3223,7 +3239,7 @@ int __init ip_rt_init(void) */ void __init ip_static_sysctl_init(void) { - register_sysctl_paths(ipv4_route_path, ipv4_route_table); + register_sysctl_paths(ipv4_path, ipv4_skeleton); } #endif ^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2008-08-23 7:24 UTC | newest] Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2008-07-27 1:22 [git pull] VFS patches, the first series Al Viro 2008-08-21 7:40 ` Eric W. Biederman 2008-08-21 17:14 ` Eric W. Biederman 2008-08-22 0:08 ` Eric W. Biederman 2008-08-23 3:33 ` Al Viro 2008-08-23 5:22 ` Eric W. Biederman 2008-08-23 7:24 ` Al Viro
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).