linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Masami Hiramatsu <mhiramat@kernel.org>
To: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: oleg@redhat.com, peterz@infradead.org, srikar@linux.vnet.ibm.com,
	rostedt@goodmis.org, acme@kernel.org, ananth@linux.vnet.ibm.com,
	akpm@linux-foundation.org, alexander.shishkin@linux.intel.com,
	alexis.berlemont@gmail.com, corbet@lwn.net,
	dan.j.williams@intel.com, jolsa@redhat.com, kan.liang@intel.com,
	kjlx@templeofstupid.com, kstewart@linuxfoundation.org,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, milian.wolff@kdab.com, mingo@redhat.com,
	namhyung@kernel.org, naveen.n.rao@linux.vnet.ibm.com,
	pc@us.ibm.com, tglx@linutronix.de, yao.jin@linux.intel.com,
	fengguang.wu@intel.com, jglisse@redhat.com,
	Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Subject: Re: [PATCH v3 6/9] trace_uprobe: Support SDT markers having reference count (semaphore)
Date: Fri, 4 May 2018 13:48:16 +0900	[thread overview]
Message-ID: <20180504134816.8633a157dd036489d9b0f1db@kernel.org> (raw)
In-Reply-To: <20180417043244.7501-7-ravi.bangoria@linux.vnet.ibm.com>

Hi Ravi,

I have some comments, please see below.

On Tue, 17 Apr 2018 10:02:41 +0530
Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com> wrote:\

> diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> index 7bd2760..2db3ed1 100644
> --- a/include/linux/uprobes.h
> +++ b/include/linux/uprobes.h
> @@ -122,6 +122,8 @@ struct uprobe_map_info {
>  	unsigned long vaddr;
>  };
>  
> +extern void (*uprobe_mmap_callback)(struct vm_area_struct *vma);
> +
>  extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
>  extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
>  extern bool is_swbp_insn(uprobe_opcode_t *insn);
> @@ -136,6 +138,8 @@ struct uprobe_map_info {
>  extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
>  extern void uprobe_start_dup_mmap(void);
>  extern void uprobe_end_dup_mmap(void);
> +extern void uprobe_down_write_dup_mmap(void);
> +extern void uprobe_up_write_dup_mmap(void);
>  extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
>  extern void uprobe_free_utask(struct task_struct *t);
>  extern void uprobe_copy_process(struct task_struct *t, unsigned long flags);
> @@ -192,6 +196,12 @@ static inline void uprobe_start_dup_mmap(void)
>  static inline void uprobe_end_dup_mmap(void)
>  {
>  }
> +static inline void uprobe_down_write_dup_mmap(void)
> +{
> +}
> +static inline void uprobe_up_write_dup_mmap(void)
> +{
> +}
>  static inline void
>  uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
>  {
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 096d1e6..e26ad83 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -1044,6 +1044,9 @@ static void build_probe_list(struct inode *inode,
>  	spin_unlock(&uprobes_treelock);
>  }
>  
> +/* Rightnow the only user of this is trace_uprobe. */
> +void (*uprobe_mmap_callback)(struct vm_area_struct *vma);
> +
>  /*
>   * Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
>   *
> @@ -1056,7 +1059,13 @@ int uprobe_mmap(struct vm_area_struct *vma)
>  	struct uprobe *uprobe, *u;
>  	struct inode *inode;
>  
> -	if (no_uprobe_events() || !valid_vma(vma, true))
> +	if (no_uprobe_events())
> +		return 0;
> +
> +	if (uprobe_mmap_callback)
> +		uprobe_mmap_callback(vma);
> +
> +	if (!valid_vma(vma, true))
>  		return 0;
>  
>  	inode = file_inode(vma->vm_file);
> @@ -1247,6 +1256,16 @@ void uprobe_end_dup_mmap(void)
>  	percpu_up_read(&dup_mmap_sem);
>  }
>  
> +void uprobe_down_write_dup_mmap(void)
> +{
> +	percpu_down_write(&dup_mmap_sem);
> +}
> +
> +void uprobe_up_write_dup_mmap(void)
> +{
> +	percpu_up_write(&dup_mmap_sem);
> +}
> +

I'm not sure why these hunks are not done in previous patch.
If you separate "uprobe_map_info" export patch, this also
should be separated. (Or both merged into this patch)


>  void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
>  {
>  	if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) {
> diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
> index 0d450b4..1a48b04 100644
> --- a/kernel/trace/trace_uprobe.c
> +++ b/kernel/trace/trace_uprobe.c
> @@ -25,6 +25,8 @@
>  #include <linux/namei.h>
>  #include <linux/string.h>
>  #include <linux/rculist.h>
> +#include <linux/sched/mm.h>
> +#include <linux/highmem.h>
>  
>  #include "trace_probe.h"
>  
> @@ -58,6 +60,7 @@ struct trace_uprobe {
>  	struct inode			*inode;
>  	char				*filename;
>  	unsigned long			offset;
> +	unsigned long			ref_ctr_offset;
>  	unsigned long			nhit;
>  	struct trace_probe		tp;
>  };
> @@ -364,10 +367,10 @@ static int create_trace_uprobe(int argc, char **argv)
>  {
>  	struct trace_uprobe *tu;
>  	struct inode *inode;
> -	char *arg, *event, *group, *filename;
> +	char *arg, *event, *group, *filename, *rctr, *rctr_end;
>  	char buf[MAX_EVENT_NAME_LEN];
>  	struct path path;
> -	unsigned long offset;
> +	unsigned long offset, ref_ctr_offset;
>  	bool is_delete, is_return;
>  	int i, ret;
>  
> @@ -377,6 +380,7 @@ static int create_trace_uprobe(int argc, char **argv)
>  	is_return = false;
>  	event = NULL;
>  	group = NULL;
> +	ref_ctr_offset = 0;
>  
>  	/* argc must be >= 1 */
>  	if (argv[0][0] == '-')
> @@ -456,6 +460,26 @@ static int create_trace_uprobe(int argc, char **argv)
>  		goto fail_address_parse;
>  	}
>  
> +	/* Parse reference counter offset if specified. */
> +	rctr = strchr(arg, '(');
> +	if (rctr) {
> +		rctr_end = strchr(rctr, ')');
> +		if (rctr > rctr_end || *(rctr_end + 1) != 0) {
> +			ret = -EINVAL;
> +			pr_info("Invalid reference counter offset.\n");
> +			goto fail_address_parse;
> +		}
> +
> +		*rctr++ = '\0';
> +		*rctr_end = '\0';
> +		ret = kstrtoul(rctr, 0, &ref_ctr_offset);
> +		if (ret) {
> +			pr_info("Invalid reference counter offset.\n");
> +			goto fail_address_parse;
> +		}
> +	}
> +
> +	/* Parse uprobe offset. */
>  	ret = kstrtoul(arg, 0, &offset);
>  	if (ret)
>  		goto fail_address_parse;
> @@ -490,6 +514,7 @@ static int create_trace_uprobe(int argc, char **argv)
>  		goto fail_address_parse;
>  	}
>  	tu->offset = offset;
> +	tu->ref_ctr_offset = ref_ctr_offset;
>  	tu->inode = inode;
>  	tu->filename = kstrdup(filename, GFP_KERNEL);
>  
> @@ -622,6 +647,8 @@ static int probes_seq_show(struct seq_file *m, void *v)
>  			break;
>  		}
>  	}
> +	if (tu->ref_ctr_offset)
> +		seq_printf(m, "(0x%lx)", tu->ref_ctr_offset);
>  
>  	for (i = 0; i < tu->tp.nr_args; i++)
>  		seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);
> @@ -896,6 +923,129 @@ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
>  	return trace_handle_return(s);
>  }
>  
> +static bool sdt_valid_vma(struct trace_uprobe *tu,
> +			  struct vm_area_struct *vma,
> +			  unsigned long vaddr)
> +{
> +	return tu->ref_ctr_offset &&
> +		vma->vm_file &&
> +		file_inode(vma->vm_file) == tu->inode &&
> +		vma->vm_flags & VM_WRITE &&
> +		vma->vm_start <= vaddr &&
> +		vma->vm_end > vaddr;
> +}
> +
> +static struct vm_area_struct *sdt_find_vma(struct trace_uprobe *tu,
> +					   struct mm_struct *mm,
> +					   unsigned long vaddr)
> +{
> +	struct vm_area_struct *vma = find_vma(mm, vaddr);
> +
> +	return (vma && sdt_valid_vma(tu, vma, vaddr)) ? vma : NULL;
> +}
> +
> +/*
> + * Reference counter gate the invocation of probe. If present,
> + * by default reference counter is 0. One needs to increment
> + * it before tracing the probe and decrement it when done.
> + */
> +static int
> +sdt_update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
> +{
> +	void *kaddr;
> +	struct page *page;
> +	struct vm_area_struct *vma;
> +	int ret = 0;
> +	unsigned short *ptr;
> +
> +	if (vaddr == 0)
> +		return -EINVAL;
> +
> +	ret = get_user_pages_remote(NULL, mm, vaddr, 1,
> +		FOLL_FORCE | FOLL_WRITE, &page, &vma, NULL);
> +	if (ret <= 0)
> +		return ret;

Hmm, get_user_pages_remote() said

===
If nr_pages is 0 or negative, returns 0. If no pages were pinned, returns -errno.
===

And you've passed 1 for nr_pages, so it must be 1 or -errno.

> +
> +	kaddr = kmap_atomic(page);
> +	ptr = kaddr + (vaddr & ~PAGE_MASK);
> +	*ptr += d;
> +	kunmap_atomic(kaddr);
> +
> +	put_page(page);
> +	return 0;

And obviously 0 means "success" for sdt_update_ref_ctr().
I think if get_user_pages_remote returns 0, this should
return -EBUSY (*) or something else.

* It seems that if faultin_page() in __get_user_pages()
returns -EBUSY, get_user_pages_remote() can return 0.

> +}
> +
> +static void sdt_increment_ref_ctr(struct trace_uprobe *tu)
> +{
> +	struct uprobe_map_info *info;
> +
> +	uprobe_down_write_dup_mmap();
> +	info = uprobe_build_map_info(tu->inode->i_mapping,
> +				tu->ref_ctr_offset, false);
> +	if (IS_ERR(info))
> +		goto out;
> +
> +	while (info) {
> +		down_write(&info->mm->mmap_sem);
> +
> +		if (sdt_find_vma(tu, info->mm, info->vaddr))
> +			sdt_update_ref_ctr(info->mm, info->vaddr, 1);

Don't you have to handle the error to map pages here?

> +
> +		up_write(&info->mm->mmap_sem);
> +		info = uprobe_free_map_info(info);
> +	}
> +
> +out:
> +	uprobe_up_write_dup_mmap();
> +}
> +
> +/* Called with down_write(&vma->vm_mm->mmap_sem) */
> +static void trace_uprobe_mmap(struct vm_area_struct *vma)
> +{
> +	struct trace_uprobe *tu;
> +	unsigned long vaddr;
> +
> +	if (!(vma->vm_flags & VM_WRITE))
> +		return;
> +
> +	mutex_lock(&uprobe_lock);
> +	list_for_each_entry(tu, &uprobe_list, list) {
> +		if (!trace_probe_is_enabled(&tu->tp))
> +			continue;
> +
> +		vaddr = vma_offset_to_vaddr(vma, tu->ref_ctr_offset);
> +		if (!sdt_valid_vma(tu, vma, vaddr))
> +			continue;
> +
> +		sdt_update_ref_ctr(vma->vm_mm, vaddr, 1);

Same here.

> +	}
> +	mutex_unlock(&uprobe_lock);
> +}
> +
> +static void sdt_decrement_ref_ctr(struct trace_uprobe *tu)
> +{
> +	struct uprobe_map_info *info;
> +
> +	uprobe_down_write_dup_mmap();
> +	info = uprobe_build_map_info(tu->inode->i_mapping,
> +				tu->ref_ctr_offset, false);
> +	if (IS_ERR(info))
> +		goto out;
> +
> +	while (info) {
> +		down_write(&info->mm->mmap_sem);
> +
> +		if (sdt_find_vma(tu, info->mm, info->vaddr))
> +			sdt_update_ref_ctr(info->mm, info->vaddr, -1);

Ditto.

Thank you,

> +
> +		up_write(&info->mm->mmap_sem);
> +		info = uprobe_free_map_info(info);
> +	}
> +
> +out:
> +	uprobe_up_write_dup_mmap();
> +}
> +
>  typedef bool (*filter_func_t)(struct uprobe_consumer *self,
>  				enum uprobe_filter_ctx ctx,
>  				struct mm_struct *mm);
> @@ -941,6 +1091,9 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self,
>  	if (ret)
>  		goto err_buffer;
>  
> +	if (tu->ref_ctr_offset)
> +		sdt_increment_ref_ctr(tu);
> +
>  	return 0;
>  
>   err_buffer:
> @@ -981,6 +1134,9 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self,
>  
>  	WARN_ON(!uprobe_filter_is_empty(&tu->filter));
>  
> +	if (tu->ref_ctr_offset)
> +		sdt_decrement_ref_ctr(tu);
> +
>  	uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
>  	tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;
>  
> @@ -1425,6 +1581,8 @@ static __init int init_uprobe_trace(void)
>  	/* Profile interface */
>  	trace_create_file("uprobe_profile", 0444, d_tracer,
>  				    NULL, &uprobe_profile_ops);
> +
> +	uprobe_mmap_callback = trace_uprobe_mmap;
>  	return 0;
>  }
>  
> -- 
> 1.8.3.1
> 


-- 
Masami Hiramatsu <mhiramat@kernel.org>

  reply	other threads:[~2018-05-04  4:48 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-17  4:32 [PATCH v3 0/9] trace_uprobe: Support SDT markers having reference count (semaphore) Ravi Bangoria
2018-04-17  4:32 ` [PATCH v3 1/9] Uprobe: Export vaddr <-> offset conversion functions Ravi Bangoria
2018-04-17  4:32 ` [PATCH v3 2/9] mm: Prefix vma_ to vaddr_to_offset() and offset_to_vaddr() Ravi Bangoria
2018-05-03 14:59   ` Steven Rostedt
2018-04-17  4:32 ` [PATCH v3 3/9] Uprobe: Move mmput() into free_map_info() Ravi Bangoria
2018-04-17  4:32 ` [PATCH v3 4/9] Uprobe: Rename map_info to uprobe_map_info Ravi Bangoria
2018-04-17  4:32 ` [PATCH v3 5/9] Uprobe: Export uprobe_map_info along with uprobe_{build/free}_map_info() Ravi Bangoria
2018-04-17  4:32 ` [PATCH v3 6/9] trace_uprobe: Support SDT markers having reference count (semaphore) Ravi Bangoria
2018-05-04  4:48   ` Masami Hiramatsu [this message]
2018-05-04 14:21     ` Ravi Bangoria
2018-05-07  8:21       ` Ravi Bangoria
2018-05-07 15:56         ` Masami Hiramatsu
2018-05-08  9:46           ` Naveen N. Rao
2018-05-08 10:26           ` Ravi Bangoria
2018-05-24 16:26   ` Oleg Nesterov
2018-05-25  8:28     ` Ravi Bangoria
2018-04-17  4:32 ` [PATCH v3 7/9] trace_uprobe/sdt: Fix multiple update of same reference counter Ravi Bangoria
2018-04-17  4:32 ` [PATCH v3 8/9] trace_uprobe/sdt: Document about " Ravi Bangoria
2018-05-03 14:57   ` Masami Hiramatsu
2018-04-17  4:32 ` [PATCH v3 9/9] perf probe: Support SDT markers having reference counter (semaphore) Ravi Bangoria
2018-05-03 14:57   ` Masami Hiramatsu
2018-05-03  8:32 ` [PATCH v3 0/9] trace_uprobe: Support SDT markers having reference count (semaphore) Ravi Bangoria
2018-05-04 14:30   ` Oleg Nesterov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180504134816.8633a157dd036489d9b0f1db@kernel.org \
    --to=mhiramat@kernel.org \
    --cc=acme@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=alexis.berlemont@gmail.com \
    --cc=ananth@linux.vnet.ibm.com \
    --cc=corbet@lwn.net \
    --cc=dan.j.williams@intel.com \
    --cc=fengguang.wu@intel.com \
    --cc=jglisse@redhat.com \
    --cc=jolsa@redhat.com \
    --cc=kan.liang@intel.com \
    --cc=kjlx@templeofstupid.com \
    --cc=kstewart@linuxfoundation.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=milian.wolff@kdab.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=naveen.n.rao@linux.vnet.ibm.com \
    --cc=oleg@redhat.com \
    --cc=pc@us.ibm.com \
    --cc=peterz@infradead.org \
    --cc=ravi.bangoria@linux.ibm.com \
    --cc=ravi.bangoria@linux.vnet.ibm.com \
    --cc=rostedt@goodmis.org \
    --cc=srikar@linux.vnet.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=yao.jin@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).