keyrings.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: Christian Brauner <christian.brauner@ubuntu.com>
Cc: dhowells@redhat.com, torvalds@linux-foundation.org,
	viro@zeniv.linux.org.uk, dray@redhat.com, kzak@redhat.com,
	mszeredi@redhat.com, swhiteho@redhat.com, jlayton@redhat.com,
	raven@themaw.net, andres@anarazel.de, keyrings@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	lennart@poettering.net, cyphar@cyphar.com
Subject: Re: Upcoming: Notifications, FS notifications and fsinfo()
Date: Tue, 31 Mar 2020 21:52:52 +0000	[thread overview]
Message-ID: <2418286.1585691572@warthog.procyon.org.uk> (raw)
In-Reply-To: <20200330211700.g7evnuvvjenq3fzm@wittgenstein>

Christian Brauner <christian.brauner@ubuntu.com> wrote:

> querying all properties of a mount atomically all-at-once,

I don't actually offer that, per se.

Having an atomic all-at-once query for a single mount is actually quite a
burden on the system.  There's potentially a lot of state involved, much of
which you don't necessarily need.

I've tried to avoid the need to do that by adding change counters that can be
queried cheaply.  You read the counters, then you check mounts and superblocks
for which the counters have changed, and then you re-read the counters.  I've
added multiple counters, assigned to different purposes, to make it easier to
pin down what has changed - and so reduce the amount of checking required.

What I have added to fsinfo() is a way to atomically retrieve a list of all
the children of a mount, including, for each mount, the mount ID (which may
have been reused), a uniquifier (which shouldn't wrap over the kernel
lifetime) and the sum of the mount object and superblock change counters.

This should allow you to quickly rescan the mount tree as fsinfo() can look up
mounts by mount ID instead of by path or fd.

Below is a sample file from the kernel that scans by this method, displaying
an ascii art tree of all the mounts under a path or mount.

David
---
// SPDX-License-Identifier: GPL-2.0-or-later
/* Test the fsinfo() system call
 *
 * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */

#define _GNU_SOURCE
#define _ATFILE_SOURCE
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#include <errno.h>
#include <time.h>
#include <math.h>
#include <sys/syscall.h>
#include <linux/fsinfo.h>
#include <linux/socket.h>
#include <linux/fcntl.h>
#include <sys/stat.h>
#include <arpa/inet.h>

#ifndef __NR_fsinfo
#define __NR_fsinfo -1
#endif

static __attribute__((unused))
ssize_t fsinfo(int dfd, const char *filename,
	       struct fsinfo_params *params, size_t params_size,
	       void *result_buffer, size_t result_buf_size)
{
	return syscall(__NR_fsinfo, dfd, filename,
		       params, params_size,
		       result_buffer, result_buf_size);
}

static char tree_buf[4096];
static char bar_buf[4096];
static unsigned int children_list_interval;

/*
 * Get an fsinfo attribute in a statically allocated buffer.
 */
static void get_attr(unsigned int mnt_id, unsigned int attr, unsigned int Nth,
		     void *buf, size_t buf_size)
{
	struct fsinfo_params params = {
		.flags		= FSINFO_FLAGS_QUERY_MOUNT,
		.request	= attr,
		.Nth		= Nth,
	};
	char file[32];
	long ret;

	sprintf(file, "%u", mnt_id);

	memset(buf, 0xbd, buf_size);

	ret = fsinfo(AT_FDCWD, file, &params, sizeof(params), buf, buf_size);
	if (ret == -1) {
		fprintf(stderr, "mount-%s: %m\n", file);
		exit(1);
	}
}

/*
 * Get an fsinfo attribute in a dynamically allocated buffer.
 */
static void *get_attr_alloc(unsigned int mnt_id, unsigned int attr,
			    unsigned int Nth, size_t *_size)
{
	struct fsinfo_params params = {
		.flags		= FSINFO_FLAGS_QUERY_MOUNT,
		.request	= attr,
		.Nth		= Nth,
	};
	size_t buf_size = 4096;
	char file[32];
	void *r;
	long ret;

	sprintf(file, "%u", mnt_id);

	for (;;) {
		r = malloc(buf_size);
		if (!r) {
			perror("malloc");
			exit(1);
		}
		memset(r, 0xbd, buf_size);

		ret = fsinfo(AT_FDCWD, file, &params, sizeof(params), r, buf_size);
		if (ret == -1) {
			fprintf(stderr, "mount-%s: %x,%x,%x %m\n",
				file, params.request, params.Nth, params.Mth);
			exit(1);
		}

		if (ret <= buf_size) {
			*_size = ret;
			break;
		}
		buf_size = (ret + 4096 - 1) & ~(4096 - 1);
	}

	return r;
}

/*
 * Display a mount and then recurse through its children.
 */
static void display_mount(unsigned int mnt_id, unsigned int depth, char *path)
{
	struct fsinfo_mount_topology top;
	struct fsinfo_mount_child child;
	struct fsinfo_mount_info info;
	struct fsinfo_ids ids;
	void *children;
	unsigned int d;
	size_t ch_size, p_size;
	char dev[64];
	int i, n, s;

	get_attr(mnt_id, FSINFO_ATTR_MOUNT_TOPOLOGY, 0, &top, sizeof(top));
	get_attr(mnt_id, FSINFO_ATTR_MOUNT_INFO, 0, &info, sizeof(info));
	get_attr(mnt_id, FSINFO_ATTR_IDS, 0, &ids, sizeof(ids));
	if (depth > 0)
		printf("%s", tree_buf);

	s = strlen(path);
	printf("%s", !s ? "\"\"" : path);
	if (!s)
		s += 2;
	s += depth;
	if (s < 38)
		s = 38 - s;
	else
		s = 1;
	printf("%*.*s", s, s, "");

	sprintf(dev, "%x:%x", ids.f_dev_major, ids.f_dev_minor);
	printf("%10u %8x %2x %x %5s %s",
	       info.mnt_id,
	       (info.sb_changes +
		info.sb_notifications +
		info.mnt_attr_changes +
		info.mnt_topology_changes +
		info.mnt_subtree_notifications),
	       info.attr, top.propagation,
	       dev, ids.f_fs_name);
	putchar('\n');

	children = get_attr_alloc(mnt_id, FSINFO_ATTR_MOUNT_CHILDREN, 0, &ch_size);
	n = ch_size / children_list_interval - 1;

	bar_buf[depth + 1] = '|';
	if (depth > 0) {
		tree_buf[depth - 4 + 1] = bar_buf[depth - 4 + 1];
		tree_buf[depth - 4 + 2] = ' ';
	}

	tree_buf[depth + 0] = ' ';
	tree_buf[depth + 1] = '\\';
	tree_buf[depth + 2] = '_';
	tree_buf[depth + 3] = ' ';
	tree_buf[depth + 4] = 0;
	d = depth + 4;

	memset(&child, 0, sizeof(child));
	for (i = 0; i < n; i++) {
		void *p = children + i * children_list_interval;

		if (sizeof(child) >= children_list_interval)
			memcpy(&child, p, children_list_interval);
		else
			memcpy(&child, p, sizeof(child));

		if (i == n - 1)
			bar_buf[depth + 1] = ' ';
		path = get_attr_alloc(child.mnt_id, FSINFO_ATTR_MOUNT_POINT,
				      0, &p_size);
		display_mount(child.mnt_id, d, path + 1);
		free(path);
	}

	free(children);
	if (depth > 0) {
		tree_buf[depth - 4 + 1] = '\\';
		tree_buf[depth - 4 + 2] = '_';
	}
	tree_buf[depth] = 0;
}

/*
 * Find the ID of whatever is at the nominated path.
 */
static unsigned int lookup_mnt_by_path(const char *path)
{
	struct fsinfo_mount_info mnt;
	struct fsinfo_params params = {
		.flags		= FSINFO_FLAGS_QUERY_PATH,
		.request	= FSINFO_ATTR_MOUNT_INFO,
	};

	if (fsinfo(AT_FDCWD, path, &params, sizeof(params), &mnt, sizeof(mnt)) == -1) {
		perror(path);
		exit(1);
	}

	return mnt.mnt_id;
}

/*
 * Determine the element size for the mount child list.
 */
static unsigned int query_list_element_size(int mnt_id, unsigned int attr)
{
	struct fsinfo_attribute_info attr_info;

	get_attr(mnt_id, FSINFO_ATTR_FSINFO_ATTRIBUTE_INFO, attr,
		 &attr_info, sizeof(attr_info));
	return attr_info.size;
}

/*
 *
 */
int main(int argc, char **argv)
{
	unsigned int mnt_id;
	char *path;
	bool use_mnt_id = false;
	int opt;

	while ((opt = getopt(argc, argv, "m"))) {
		switch (opt) {
		case 'm':
			use_mnt_id = true;
			continue;
		}
		break;
	}

	argc -= optind;
	argv += optind;

	switch (argc) {
	case 0:
		mnt_id = lookup_mnt_by_path("/");
		path = "ROOT";
		break;
	case 1:
		path = argv[0];
		if (use_mnt_id) {
			mnt_id = strtoul(argv[0], NULL, 0);
			break;
		}

		mnt_id = lookup_mnt_by_path(argv[0]);
		break;
	default:
		printf("Format: test-mntinfo\n");
		printf("Format: test-mntinfo <path>\n");
		printf("Format: test-mntinfo -m <mnt_id>\n");
		exit(2);
	}

	children_list_interval =
		query_list_element_size(mnt_id, FSINFO_ATTR_MOUNT_CHILDREN);

	printf("MOUNT                                 MOUNT ID   CHANGE#  AT P DEV   TYPE\n");
	printf("------------------------------------- ---------- -------- -- - ----- --------\n");
	display_mount(mnt_id, 0, path);
	return 0;
}

  parent reply	other threads:[~2020-03-31 21:52 UTC|newest]

Thread overview: 97+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-30 13:58 Upcoming: Notifications, FS notifications and fsinfo() David Howells
2020-03-30 14:31 ` [GIT PULL] General notification queue and key notifications David Howells
2020-03-31  6:51   ` Stephen Rothwell
2020-06-02 15:55   ` David Howells
2020-06-03  2:15     ` Ian Kent
2020-06-08  0:49       ` Ian Kent
2020-06-10  9:56     ` Christian Brauner
2020-06-10 11:12     ` Karel Zak
2020-06-12 21:32       ` Linus Torvalds
2020-06-12 22:01       ` Linus Torvalds
2020-06-13 13:04       ` David Howells
2020-06-13 16:47         ` Linus Torvalds
2020-06-13 17:03           ` Linus Torvalds
2020-06-13 19:22         ` Miklos Szeredi
2020-06-13 13:24       ` David Howells
2020-06-13 18:00     ` pr-tracker-bot
2020-06-17  1:15     ` Williams, Dan J
2020-06-23 23:38       ` Dan Williams
2020-06-24  0:55       ` David Howells
2020-06-24  1:03         ` Dan Williams
2020-06-24  1:17         ` David Howells
2020-03-30 14:36 ` [GIT PULL] Mount and superblock notifications David Howells
2020-04-04 21:13   ` Linus Torvalds
2020-04-05 22:52     ` Andres Freund
2020-03-30 14:43 ` [GIT PULL] fsinfo: Filesystem information query David Howells
2020-03-30 20:28 ` Upcoming: Notifications, FS notifications and fsinfo() Miklos Szeredi
2020-03-31  9:21   ` Karel Zak
2020-03-30 21:17 ` Christian Brauner
2020-03-31  5:11   ` Miklos Szeredi
2020-03-31  8:15     ` Christian Brauner
2020-03-31  8:34       ` Miklos Szeredi
2020-03-31  8:34     ` Karel Zak
2020-03-31  8:56       ` Miklos Szeredi
2020-03-31  9:49         ` Karel Zak
2020-03-31 12:25         ` Lennart Poettering
2020-03-31 15:10           ` Miklos Szeredi
2020-03-31 15:24             ` Lennart Poettering
2020-03-31 21:56         ` David Howells
2020-03-31 21:54     ` David Howells
2020-04-01  8:43       ` Karel Zak
2020-03-31  7:22   ` Lennart Poettering
2020-03-31 17:31 ` David Howells
2020-03-31 19:42   ` Miklos Szeredi
2020-03-31 19:47   ` David Howells
2020-03-31 21:14   ` David Howells
2020-03-31 21:23   ` David Howells
2020-03-31 21:52 ` David Howells [this message]
2020-04-01  9:04   ` Karel Zak
2020-04-01 13:34     ` Miklos Szeredi
2020-04-01 13:55     ` David Howells
2020-04-01 13:58     ` David Howells
2020-04-01 15:25       ` Miklos Szeredi
2020-04-03  9:11         ` Karel Zak
2020-04-01 16:01       ` David Howells
2020-04-01 16:30         ` Miklos Szeredi
2020-04-02 15:22         ` David Howells
2020-04-02 15:24           ` Miklos Szeredi
2020-04-02 15:42           ` David Howells
2020-04-02 15:24         ` David Howells
2020-04-01 14:41   ` Lennart Poettering
2020-04-01 15:33     ` Miklos Szeredi
2020-04-01 16:06     ` David Howells
2020-04-01 16:40       ` Miklos Szeredi
2020-04-02  2:52         ` Ian Kent
2020-04-02 13:52           ` Miklos Szeredi
2020-04-02 14:36             ` Lennart Poettering
2020-04-02 15:22               ` Miklos Szeredi
2020-04-02 15:28                 ` Lennart Poettering
2020-04-02 15:35                   ` Miklos Szeredi
2020-04-02 15:50                     ` Lennart Poettering
2020-04-02 17:20                       ` Miklos Szeredi
2020-04-03 11:08                         ` Lennart Poettering
2020-04-03 11:48                           ` Miklos Szeredi
2020-04-03 15:01                             ` Lennart Poettering
2020-04-06  9:22                               ` Miklos Szeredi
2020-04-06 17:29                                 ` Lennart Poettering
2020-04-07  2:21                                   ` Ian Kent
2020-04-07 13:59                                     ` Miklos Szeredi
2020-04-07 15:53                                       ` Lennart Poettering
2020-04-07 16:06                                         ` Miklos Szeredi
2020-04-02 15:51                 ` David Howells
2020-04-02 15:56                 ` David Howells
2020-04-03  1:44             ` Ian Kent
2020-04-03 11:11               ` Lennart Poettering
2020-04-03 11:38                 ` Miklos Szeredi
2020-04-03 12:05                   ` Richard Weinberger
2020-04-03 15:12                   ` Lennart Poettering
2020-04-03 20:30                     ` J. Bruce Fields
2020-04-06  8:35                       ` Miklos Szeredi
2020-04-06 16:07                         ` J. Bruce Fields
2020-04-06  9:17                       ` Karel Zak
2020-04-06 16:34                         ` Linus Torvalds
2020-04-06 18:46                           ` J. Bruce Fields
2020-04-06 18:48                           ` Lennart Poettering
2020-04-08  3:36                             ` Linus Torvalds
2020-04-03 15:36                   ` David Howells
2020-04-03 15:41                     ` Lennart Poettering

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2418286.1585691572@warthog.procyon.org.uk \
    --to=dhowells@redhat.com \
    --cc=andres@anarazel.de \
    --cc=christian.brauner@ubuntu.com \
    --cc=cyphar@cyphar.com \
    --cc=dray@redhat.com \
    --cc=jlayton@redhat.com \
    --cc=keyrings@vger.kernel.org \
    --cc=kzak@redhat.com \
    --cc=lennart@poettering.net \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mszeredi@redhat.com \
    --cc=raven@themaw.net \
    --cc=swhiteho@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).