From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755953AbcLOSiE (ORCPT ); Thu, 15 Dec 2016 13:38:04 -0500 Received: from mx0b-001b2d01.pphosted.com ([148.163.158.5]:42052 "EHLO mx0a-001b2d01.pphosted.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1752715AbcLOSh7 (ORCPT ); Thu, 15 Dec 2016 13:37:59 -0500 Subject: [PATCH v4 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info From: Hari Bathini To: ast@fb.com, peterz@infradead.org, lkml , acme@kernel.org, alexander.shishkin@linux.intel.com, mingo@redhat.com Cc: daniel@iogearbox.net, rostedt@goodmis.org, Ananth N Mavinakayanahalli , ebiederm@xmission.com, sargun@sargun.me, Aravinda Prasad , brendan.d.gregg@gmail.com Date: Fri, 16 Dec 2016 00:07:06 +0530 In-Reply-To: <148182699546.5314.279803283347257825.stgit@hbathini.in.ibm.com> References: <148182699546.5314.279803283347257825.stgit@hbathini.in.ibm.com> User-Agent: StGit/0.17.1-dirty MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit X-TM-AS-MML: disable X-Content-Scanned: Fidelis XPS MAILER x-cbid: 16121518-0016-0000-0000-000002028094 X-IBM-AV-DETECTION: SAVI=unused REMOTE=unused XFE=unused x-cbparentid: 16121518-0017-0000-0000-00000612028A Message-Id: <148182702692.5314.556699668213767056.stgit@hbathini.in.ibm.com> X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10432:,, definitions=2016-12-15_13:,, signatures=0 X-Proofpoint-Spam-Details: rule=outbound_notspam policy=outbound score=0 spamscore=0 suspectscore=2 malwarescore=0 phishscore=0 adultscore=0 bulkscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.0.1-1612050000 definitions=main-1612150280 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org With the advert of container technologies like docker, that depend on namespaces for isolation, there is a need for tracing support for namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for tracing based on namespaces related info. Signed-off-by: Hari Bathini --- include/linux/perf_event.h | 2 + include/uapi/linux/perf_event.h | 31 +++++++++ kernel/events/core.c | 135 +++++++++++++++++++++++++++++++++++++++ kernel/fork.c | 3 + kernel/nsproxy.c | 5 + 5 files changed, 175 insertions(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4741ecd..42d8aa6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1110,6 +1110,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); +extern void perf_event_namespaces(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -1312,6 +1313,7 @@ static inline int perf_unregister_guest_info_callbacks static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } +static inline void perf_event_namespaces(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c66a485..80024f4 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -344,7 +344,8 @@ struct perf_event_attr { use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ - __reserved_1 : 36; + namespaces : 1, /* include namespaces data */ + __reserved_1 : 35; union { __u32 wakeup_events; /* wakeup every n events */ @@ -610,6 +611,23 @@ struct perf_event_header { __u16 size; }; +struct perf_ns_link_info { + __u64 dev; + __u64 ino; +}; + +enum { + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NAMESPACES_MAX, /* maximum available namespaces */ +}; + enum perf_event_type { /* @@ -862,6 +880,17 @@ enum perf_event_type { */ PERF_RECORD_SWITCH_CPU_WIDE = 15, + /* + * struct { + * struct perf_event_header header; + * u32 pid; + * u32 tid; + * struct namespace_link_info link_info[NAMESPACES_MAX]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_NAMESPACES = 16, + PERF_RECORD_MAX, /* non-ABI */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index faf073d..c76485b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -46,6 +46,8 @@ #include #include #include +#include +#include #include "internal.h" @@ -375,6 +377,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; +static atomic_t nr_namespaces_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; @@ -3882,6 +3885,8 @@ static void unaccount_event(struct perf_event *event) atomic_dec(&nr_mmap_events); if (event->attr.comm) atomic_dec(&nr_comm_events); + if (event->attr.namespaces) + atomic_dec(&nr_namespaces_events); if (event->attr.task) atomic_dec(&nr_task_events); if (event->attr.freq) @@ -6382,6 +6387,7 @@ static void perf_event_task(struct task_struct *task, void perf_event_fork(struct task_struct *task) { perf_event_task(task, NULL, 1); + perf_event_namespaces(task); } /* @@ -6484,6 +6490,128 @@ void perf_event_comm(struct task_struct *task, bool exec) } /* + * namespaces tracking + */ + +struct namespaces_event_id { + struct perf_event_header header; + + u32 pid; + u32 tid; + struct perf_ns_link_info link_info[NAMESPACES_MAX]; +}; + +struct perf_namespaces_event { + struct task_struct *task; + + struct namespaces_event_id event_id; +}; + +static int perf_event_namespaces_match(struct perf_event *event) +{ + return event->attr.namespaces; +} + +static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info, + struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct path ns_path; + struct inode *ns_inode; + void *error; + + error = ns_get_path(&ns_path, task, ns_ops); + if (!error) { + ns_inode = ns_path.dentry->d_inode; + ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev); + ns_link_info->ino = ns_inode->i_ino; + } +} + +static void perf_event_namespaces_output(struct perf_event *event, + void *data) +{ + struct perf_namespaces_event *namespaces_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + struct namespaces_event_id *ei; + struct task_struct *task = namespaces_event->task; + int ret; + + if (!perf_event_namespaces_match(event)) + return; + + ei = &namespaces_event->event_id; + perf_event_header__init_id(&ei->header, &sample, event); + ret = perf_output_begin(&handle, event, ei->header.size); + if (ret) + return; + + ei->pid = perf_event_pid(event, task); + ei->tid = perf_event_tid(event, task); + + perf_fill_ns_link_info(&ei->link_info[MNT_NS_INDEX], + task, &mntns_operations); + +#ifdef CONFIG_USER_NS + perf_fill_ns_link_info(&ei->link_info[USER_NS_INDEX], + task, &userns_operations); +#endif +#ifdef CONFIG_NET_NS + perf_fill_ns_link_info(&ei->link_info[NET_NS_INDEX], + task, &netns_operations); +#endif +#ifdef CONFIG_UTS_NS + perf_fill_ns_link_info(&ei->link_info[UTS_NS_INDEX], + task, &utsns_operations); +#endif +#ifdef CONFIG_IPC_NS + perf_fill_ns_link_info(&ei->link_info[IPC_NS_INDEX], + task, &ipcns_operations); +#endif +#ifdef CONFIG_PID_NS + perf_fill_ns_link_info(&ei->link_info[PID_NS_INDEX], + task, &pidns_operations); +#endif +#ifdef CONFIG_CGROUPS + perf_fill_ns_link_info(&ei->link_info[CGROUP_NS_INDEX], + task, &cgroupns_operations); +#endif + + perf_output_put(&handle, namespaces_event->event_id); + + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +void perf_event_namespaces(struct task_struct *task) +{ + struct perf_namespaces_event namespaces_event; + + if (!atomic_read(&nr_namespaces_events)) + return; + + namespaces_event = (struct perf_namespaces_event){ + .task = task, + .event_id = { + .header = { + .type = PERF_RECORD_NAMESPACES, + .misc = 0, + .size = sizeof(namespaces_event.event_id), + }, + /* .pid */ + /* .tid */ + /* .link_info[NAMESPACES_MAX] */ + }, + }; + + perf_iterate_sb(perf_event_namespaces_output, + &namespaces_event, + NULL); +} + +/* * mmap tracking */ @@ -9028,6 +9156,8 @@ static void account_event(struct perf_event *event) atomic_inc(&nr_mmap_events); if (event->attr.comm) atomic_inc(&nr_comm_events); + if (event->attr.namespaces) + atomic_inc(&nr_namespaces_events); if (event->attr.task) atomic_inc(&nr_task_events); if (event->attr.freq) @@ -9542,6 +9672,11 @@ SYSCALL_DEFINE5(perf_event_open, return -EACCES; } + if (attr.namespaces) { + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + } + if (attr.freq) { if (attr.sample_freq > sysctl_perf_event_sample_rate) return -EINVAL; diff --git a/kernel/fork.c b/kernel/fork.c index 869b8cc..c7b71f0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2289,6 +2289,9 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) free_fs_struct(new_fs); bad_unshare_out: + if (!err) + perf_event_namespaces(current); + return err; } diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 782102e..4c25e6e 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -26,6 +26,7 @@ #include #include #include +#include static struct kmem_cache *nsproxy_cachep; @@ -264,6 +265,10 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) switch_task_namespaces(tsk, new_nsproxy); out: fput(file); + + if (!err) + perf_event_namespaces(tsk); + return err; }