From: Slavomir Kaslev <kaslevs@vmware.com>
To: Steven Rostedt <rostedt@goodmis.org>
Cc: "linux-trace-devel@vger.kernel.org"
<linux-trace-devel@vger.kernel.org>,
Yordan Karadzhov <ykaradzhov@vmware.com>,
Tzvetomir Stoyanov <tstoyanov@vmware.com>
Subject: Re: [PATCH v3 6/6] trace-cmd: Add VM kernel tracing over vsock sockets transport
Date: Tue, 15 Jan 2019 15:00:15 +0000 [thread overview]
Message-ID: <CAE0o1NvTM3nk2jVdYirK6D6CXvf4eNqqsEgK_5U-UGA44k2w=Q@mail.gmail.com> (raw)
In-Reply-To: <20190114174603.09287a2c@gandalf.local.home>
On Tue, Jan 15, 2019 at 12:46 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Mon, 14 Jan 2019 17:28:00 +0200
> Slavomir Kaslev <kaslevs@vmware.com> wrote:
> \
> > --- /dev/null
> > +++ b/tracecmd/trace-agent.c
> > @@ -0,0 +1,229 @@
> > +// SPDX-License-Identifier: LGPL-2.1
> > +/*
> > + * Copyright (C) 2018 VMware Inc, Slavomir Kaslev <kaslevs@vmware.com>
> > + *
> > + * based on prior implementation by Yoshihiro Yunomae
> > + * Copyright (C) 2013 Hitachi, Ltd.
> > + * Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
> > + */
> > +
> > +#include <errno.h>
> > +#include <fcntl.h>
> > +#include <getopt.h>
> > +#include <signal.h>
> > +#include <stdbool.h>
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <sys/ioctl.h>
> > +#include <sys/socket.h>
> > +#include <sys/wait.h>
> > +#include <unistd.h>
> > +#include <linux/vm_sockets.h>
> > +
> > +#include "trace-local.h"
> > +#include "trace-msg.h"
> > +
> > +static int make_vsock(unsigned port)
> > +{
> > + struct sockaddr_vm addr = {
> > + .svm_family = AF_VSOCK,
> > + .svm_cid = VMADDR_CID_ANY,
> > + .svm_port = port,
> > + };
> > + int sd;
> > +
> > + sd = socket(AF_VSOCK, SOCK_STREAM, 0);
> > + if (sd < 0)
> > + return -1;
> > +
> > + setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &(int){1}, sizeof(int));
> > +
> > + if (bind(sd, (struct sockaddr *)&addr, sizeof(addr)))
> > + return -1;
> > +
> > + if (listen(sd, SOMAXCONN))
> > + return -1;
> > +
> > + return sd;
> > +}
> > +
> > +static int get_vsock_port(int sd)
> > +{
> > + struct sockaddr_vm addr;
> > + socklen_t addr_len = sizeof(addr);
> > +
> > + if (getsockname(sd, (struct sockaddr *)&addr, &addr_len))
> > + return -1;
> > +
> > + if (addr.svm_family != AF_VSOCK)
> > + return -1;
> > +
> > + return addr.svm_port;
> > +}
> > +
> > +static void make_vsocks(int nr, int **fds, int **ports)
> > +{
> > + int i, fd, port;
> > +
> > + *fds = calloc(nr, sizeof(*fds));
> > + *ports = calloc(nr, sizeof(*ports));
> > + if (!*fds || !*ports)
> > + die("Failed to allocate memory");
> > +
> > + for (i = 0; i < nr; i++) {
> > + fd = make_vsock(VMADDR_PORT_ANY);
> > + if (fd < 0)
> > + die("Failed to open vsock socket");
> > +
> > + port = get_vsock_port(fd);
> > + if (port < 0)
> > + die("Failed to get vsock socket address");
> > +
> > + (*fds)[i] = fd;
> > + (*ports)[i] = port;
> > + }
> > +}
> > +
> > +static void free_vsocks(int nr, int *fds, int *ports)
> > +{
> > + int i;
> > +
> > + for (i = 0; i < nr; i++)
> > + close(fds[i]);
> > + free(fds);
> > + free(ports);
> > +}
> > +
> > +static void agent_handle(int sd, int nr_cpus, int page_size)
> > +{
> > + struct tracecmd_msg_handle *msg_handle;
> > + int *fds, *ports;
> > + char **argv = NULL;
> > + int argc = 0;
> > +
> > + msg_handle = tracecmd_msg_handle_alloc(sd, TRACECMD_MSG_FL_CLIENT);
> > + if (!msg_handle)
> > + die("Failed to allocate message handle");
> > +
> > + if (tracecmd_msg_recv_trace_req(msg_handle, &argc, &argv))
> > + die("Failed to receive trace request");
> > +
> > + make_vsocks(nr_cpus, &fds, &ports);
> > +
> > + if (tracecmd_msg_send_trace_resp(msg_handle, nr_cpus, page_size, ports))
> > + die("Failed to send trace response");
> > +
> > + trace_record_agent(msg_handle, nr_cpus, fds, argc, argv);
> > +
> > + free_vsocks(nr_cpus, fds, ports);
> > + free(argv[0]);
> > + free(argv);
> > + tracecmd_msg_handle_close(msg_handle);
> > + exit(0);
> > +}
> > +
> > +static volatile pid_t handler_pid;
> > +
> > +static void handle_sigchld(int sig)
> > +{
> > + int wstatus;
> > + pid_t pid;
> > +
> > + for (;;) {
> > + pid = waitpid(-1, &wstatus, WNOHANG);
> > + if (pid <= 0)
> > + break;
> > +
> > + if (pid == handler_pid)
> > + handler_pid = 0;
> > + }
> > +}
> > +
> > +static void agent_serve(unsigned port)
> > +{
> > + int sd, cd, nr_cpus;
> > + pid_t pid;
> > +
> > + signal(SIGCHLD, handle_sigchld);
> > +
> > + nr_cpus = count_cpus();
> > + page_size = getpagesize();
> > +
> > + sd = make_vsock(port);
> > + if (sd < 0)
> > + die("Failed to open vsock socket");
> > +
> > + for (;;) {
> > + cd = accept(sd, NULL, NULL);
> > + if (cd < 0) {
> > + if (errno == EINTR)
> > + continue;
> > + die("accept");
> > + }
> > +
> > + if (handler_pid)
> > + goto busy;
> > +
> > + pid = fork();
> > + if (pid == 0) {
> > + close(sd);
> > + signal(SIGCHLD, SIG_DFL);
> > + agent_handle(cd, nr_cpus, page_size);
> > + }
> > + if (pid > 0)
> > + handler_pid = pid;
> > +
> > + busy:
> > + close(cd);
> > + }
> > +
>
> Hmm, I don't see any break from the above for (;;) and that's an
> infinite loop. That makes this dead code below.
>
> > + close(sd);
> > + signal(SIGCHLD, SIG_DFL);
> > +}
>
>
>
> > +
> > +void trace_agent(int argc, char **argv)
> > +{
> > + bool do_daemon = false;
> > + unsigned port = TRACE_AGENT_DEFAULT_PORT;
> > +
> > + if (argc < 2)
> > + usage(argv);
> > +
> > + if (strcmp(argv[1], "agent") != 0)
> > + usage(argv);
> > +
> > + for (;;) {
> > + int c, option_index = 0;
> > + static struct option long_options[] = {
> > + {"port", required_argument, NULL, 'p'},
> > + {"help", no_argument, NULL, '?'},
> > + {NULL, 0, NULL, 0}
> > + };
> > +
> > + c = getopt_long(argc-1, argv+1, "+hp:D",
> > + long_options, &option_index);
> > + if (c == -1)
> > + break;
> > + switch (c) {
> > + case 'h':
> > + usage(argv);
> > + break;
> > + case 'p':
> > + port = atoi(optarg);
> > + break;
> > + case 'D':
> > + do_daemon = true;
> > + break;
> > + default:
> > + usage(argv);
> > + }
> > + }
> > +
> > + if ((argc - optind) >= 2)
> > + usage(argv);
> > +
> > + if (do_daemon && daemon(1, 0))
> > + die("daemon");
> > +
> > + agent_serve(port);
> > +}
> > diff --git a/tracecmd/trace-cmd.c b/tracecmd/trace-cmd.c
> > index 797b303..3ae5e2e 100644
> > --- a/tracecmd/trace-cmd.c
> > +++ b/tracecmd/trace-cmd.c
> > @@ -83,6 +83,9 @@ struct command commands[] = {
> > {"hist", trace_hist},
> > {"mem", trace_mem},
> > {"listen", trace_listen},
> > +#ifdef VSOCK
> > + {"agent", trace_agent},
> > +#endif
> > {"split", trace_split},
> > {"restore", trace_restore},
> > {"stack", trace_stack},
> > diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
> > index 012371c..6c8754e 100644
> > --- a/tracecmd/trace-record.c
> > +++ b/tracecmd/trace-record.c
> > @@ -33,6 +33,9 @@
> > #include <errno.h>
> > #include <limits.h>
> > #include <libgen.h>
> > +#ifdef VSOCK
> > +#include <linux/vm_sockets.h>
> > +#endif
> >
> > #include "trace-local.h"
> > #include "trace-msg.h"
> > @@ -74,8 +77,6 @@ static int buffers;
> > static int clear_function_filters;
> >
> > static char *host;
> > -static int *client_ports;
> > -static int sfd;
> >
> > /* Max size to let a per cpu file get */
> > static int max_kb;
> > @@ -171,6 +172,15 @@ static struct tracecmd_recorder *recorder;
> >
> > static int ignore_event_not_found = 0;
> >
> > +static inline size_t grow_cap(size_t old_cap)
> > +{
> > + size_t cap = 3 * old_cap / 2;
> > +
> > + if (cap < 16)
> > + cap = 16;
> > + return cap;
> > +}
>
> I'm a bit confused by what the grow_cap() is for.
I did some benchmarking awhile ago and calling realloc on each new
entry when appending is indeed linear (which implies that realloc
amortizes to constant).
grow_cap() grows the buffer exponentially and hence calls realloc only
log(n) times, which turns out to be again linear but with much smaller
constant.
Git uses something similar they call ALLOC_GROW:
https://github.com/git/git/blob/0f086e6dcabfbf059e7483ebd7d41080faec3d77/cache.h#L650
I agree that neither of the current uses of grow_cap are not on
performance critical paths so I'll drop it in the next version of this
series.
>
> > +
> > static inline int is_top_instance(struct buffer_instance *instance)
> > {
> > return instance == &top_instance;
> > @@ -518,6 +528,36 @@ static char *get_temp_file(struct buffer_instance *instance, int cpu)
> > return file;
> > }
> >
> > +static char *get_guest_file(const char *file, const char *guest)
> > +{
> > + size_t guest_len = strlen(guest);
> > + size_t file_len = strlen(file);
> > + size_t base_len, idx = 0;
> > + const char *p;
> > + char *out;
> > +
> > + out = malloc(file_len + guest_len + 2 /* dash and \0 */);
> > + if (!out)
> > + return NULL;
> > +
> > + p = strrchr(file, '.');
> > + if (p && p != file)
> > + base_len = p - file;
> > + else
> > + base_len = file_len;
> > +
> > + memcpy(out, file, base_len);
> > + idx += base_len;
> > + out[idx++] = '-';
> > + memcpy(out + idx, guest, guest_len);
> > + idx += guest_len;
> > + memcpy(out + idx, p, file_len - base_len);
> > + idx += file_len - base_len;
> > + out[idx] = '\0';
>
> Can't we pretty much replace this whole function with:
>
> p = strrchr(file, '.');
> if (p && p != file) {
> base_len = p - file;
> else
> base_len = strlen(file);
>
> asprintf(&out, "%.*s-%s%s", base_len, file, guest, file + base_len);
>
> ?
Good catch. This is a leftover from the virt-server branch which I
simplified but didn't know about the %.*s format magic.
>
>
> > +
> > + return out;
> > +}
> > +
> > static void put_temp_file(char *file)
> > {
> > free(file);
> > @@ -623,6 +663,16 @@ static void delete_thread_data(void)
> > }
> > }
> >
> > +static void tell_guests_to_stop(void)
> > +{
> > + struct buffer_instance *instance;
> > +
> > + for_all_instances(instance) {
> > + if (instance->flags & BUFFER_FL_GUEST)
> > + tracecmd_msg_handle_close(instance->msg_handle);
> > + }
> > +}
> > +
> > static void stop_threads(enum trace_type type)
> > {
> > struct timeval tv = { 0, 0 };
> > @@ -632,6 +682,8 @@ static void stop_threads(enum trace_type type)
> > if (!recorder_threads)
> > return;
> >
> > + tell_guests_to_stop();
> > +
> > /* Tell all threads to finish up */
> > for (i = 0; i < recorder_threads; i++) {
> > if (pids[i].pid > 0) {
> > @@ -2571,14 +2623,14 @@ static void flush(int sig)
> > tracecmd_stop_recording(recorder);
> > }
> >
> > -static void connect_port(int cpu)
> > +static int connect_port(const char *host, unsigned port)
> > {
> > struct addrinfo hints;
> > struct addrinfo *results, *rp;
> > - int s;
> > + int s, sfd;
> > char buf[BUFSIZ];
> >
> > - snprintf(buf, BUFSIZ, "%d", client_ports[cpu]);
> > + snprintf(buf, BUFSIZ, "%d", port);
> >
> > memset(&hints, 0, sizeof(hints));
> > hints.ai_family = AF_UNSPEC;
> > @@ -2605,7 +2657,190 @@ static void connect_port(int cpu)
> >
> > freeaddrinfo(results);
> >
> > - client_ports[cpu] = sfd;
> > + return sfd;
> > +}
> > +
> > +#ifdef VSOCK
> > +static int open_vsock(unsigned cid, unsigned port)
> > +{
> > + struct sockaddr_vm addr = {
> > + .svm_family = AF_VSOCK,
> > + .svm_cid = cid,
> > + .svm_port = port,
> > + };
> > + int sd;
> > +
> > + sd = socket(AF_VSOCK, SOCK_STREAM, 0);
> > + if (sd < 0)
> > + return -1;
> > +
> > + if (connect(sd, (struct sockaddr *)&addr, sizeof(addr)))
> > + return -1;
> > +
> > + return sd;
> > +}
> > +#else
> > +static inline int open_vsock(unsigned cid, unsigned port)
> > +{
> > + die("vsock is not supported");
> > + return -1;
> > +}
> > +#endif
> > +
> > +static int do_accept(int sd)
> > +{
> > + int cd;
> > +
> > + for (;;) {
> > + cd = accept(sd, NULL, NULL);
> > + if (cd < 0) {
> > + if (errno == EINTR)
> > + continue;
> > + die("accept");
> > + }
> > +
> > + return cd;
> > + }
> > +
> > + return -1;
> > +}
> > +
> > +static bool is_digits(const char *s)
> > +{
> > + const char *p;
> > +
> > + for (p = s; *p; p++)
> > + if (!isdigit(*p))
> > + return false;
> > +
> > + return true;
> > +}
> > +
> > +struct guest {
> > + char *name;
> > + int cid;
> > + int pid;
> > +};
> > +
> > +static size_t guests_cap, guests_len;
> > +static struct guest *guests;
> > +
> > +static char *get_qemu_guest_name(char *arg)
> > +{
> > + char *tok, *end = arg;
> > +
> > + while ((tok = strsep(&end, ","))) {
> > + if (strncmp(tok, "guest=", 6) == 0)
> > + return tok + 6;
> > + }
> > +
> > + return arg;
> > +}
> > +
> > +static void read_qemu_guests(void)
> > +{
> > + static bool initialized = false;
> > + struct dirent *entry;
> > + char path[PATH_MAX];
> > + DIR *dir;
> > +
> > + if (initialized)
> > + return;
> > +
> > + initialized = true;
> > + dir = opendir("/proc");
> > + if (!dir)
> > + die("opendir");
> > +
> > + for (entry = readdir(dir); entry; entry = readdir(dir)) {
> > + bool is_qemu = false, last_was_name = false;
> > + struct guest guest = {};
> > + char *p, *arg = NULL;
> > + size_t arg_size = 0;
> > + FILE *f;
> > +
> > + if (!(entry->d_type == DT_DIR && is_digits(entry->d_name)))
> > + continue;
> > +
> > + guest.pid = atoi(entry->d_name);
> > + snprintf(path, sizeof(path), "/proc/%s/cmdline", entry->d_name);
> > + f = fopen(path, "r");
> > + if (!f)
> > + continue;
> > +
> > + while (getdelim(&arg, &arg_size, 0, f) != -1) {
> > + if (!is_qemu && strstr(arg, "qemu-system-")) {
> > + is_qemu = true;
> > + continue;
> > + }
> > +
> > + if (!is_qemu)
> > + continue;
> > +
> > + if (strcmp(arg, "-name") == 0) {
> > + last_was_name = true;
> > + continue;
> > + }
> > +
> > + if (last_was_name) {
> > + guest.name = strdup(get_qemu_guest_name(arg));
> > + last_was_name = false;
> > + continue;
> > + }
> > +
> > + p = strstr(arg, "guest-cid=");
> > + if (p) {
> > + guest.cid = atoi(p + 10);
> > + continue;
> > + }
> > + }
> > +
> > + if (is_qemu) {
> > + if (guests_cap == guests_len) {
> > + guests_cap = grow_cap(guests_cap);
>
> This isn't a fast path is it? I'm not sure why we are using
> guests_cap() here. Note, realloc allocs more than required and is
> pretty much a nop if the allocated amount is still within its
> allocation that it made the first time.
>
> -- Steve
>
>
> > + guests = realloc(guests,
> > + guests_cap * sizeof(*guests));
> > + }
> > + guests[guests_len++] = guest;
> > + }
> > +
> > + free(arg);
> > + fclose(f);
> > + }
> > +
> > + closedir(dir);
> > +}
> > +
> >
prev parent reply other threads:[~2019-01-15 15:00 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-01-14 15:27 [PATCH v3 0/6] Add VM kernel tracing over vsock sockets Slavomir Kaslev
2019-01-14 15:27 ` [PATCH v3 1/6] trace-cmd: Minor refactoring Slavomir Kaslev
2019-01-14 15:27 ` [PATCH v3 2/6] trace-cmd: Detect if vsock sockets are available Slavomir Kaslev
2019-01-14 15:27 ` [PATCH v3 3/6] trace-cmd: Add tracecmd_create_recorder_virt function Slavomir Kaslev
2019-01-14 22:10 ` Steven Rostedt
2019-01-15 14:21 ` Slavomir Kaslev
2019-01-15 14:46 ` Steven Rostedt
2019-01-14 15:27 ` [PATCH v3 4/6] trace-cmd: Add TRACE_REQ and TRACE_RESP messages Slavomir Kaslev
2019-01-14 15:27 ` [PATCH v3 5/6] trace-cmd: Add buffer instance flags for tracing in guest and agent context Slavomir Kaslev
2019-01-14 15:28 ` [PATCH v3 6/6] trace-cmd: Add VM kernel tracing over vsock sockets transport Slavomir Kaslev
2019-01-14 22:46 ` Steven Rostedt
2019-01-15 15:00 ` Slavomir Kaslev [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAE0o1NvTM3nk2jVdYirK6D6CXvf4eNqqsEgK_5U-UGA44k2w=Q@mail.gmail.com' \
--to=kaslevs@vmware.com \
--cc=linux-trace-devel@vger.kernel.org \
--cc=rostedt@goodmis.org \
--cc=tstoyanov@vmware.com \
--cc=ykaradzhov@vmware.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).