From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754605AbbA2NUC (ORCPT ); Thu, 29 Jan 2015 08:20:02 -0500 Received: from mail-pa0-f46.google.com ([209.85.220.46]:34246 "EHLO mail-pa0-f46.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751793AbbA2NUA (ORCPT ); Thu, 29 Jan 2015 08:20:00 -0500 Date: Thu, 29 Jan 2015 22:19:38 +0900 From: Namhyung Kim To: Arnaldo Carvalho de Melo Cc: Ingo Molnar , Peter Zijlstra , Jiri Olsa , LKML , David Ahern , Adrian Hunter , Andi Kleen , Stephane Eranian , Frederic Weisbecker Subject: Re: [PATCH 29/42] perf tools: Protect dso cache fd with a mutex Message-ID: <20150129131938.GA29130@danjae> References: <1422518843-25818-1-git-send-email-namhyung@kernel.org> <1422518843-25818-30-git-send-email-namhyung@kernel.org> <20150129123107.GR7220@kernel.org> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20150129123107.GR7220@kernel.org> User-Agent: Mutt/1.5.23 (2014-03-12) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Thu, Jan 29, 2015 at 09:31:07AM -0300, Arnaldo Carvalho de Melo wrote: > Em Thu, Jan 29, 2015 at 05:07:10PM +0900, Namhyung Kim escreveu: > > When dso cache is accessed in multi-thread environment, it's possible > > to close other dso->data.fd during operation due to open file limit. > > Protect the file descriptors using a separate mutex. > > > > Signed-off-by: Namhyung Kim > > --- > > tools/perf/tests/dso-data.c | 5 ++ > > tools/perf/util/dso.c | 136 +++++++++++++++++++++++++++++--------------- > > 2 files changed, 94 insertions(+), 47 deletions(-) > > > > diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c > > index caaf37f079b1..0276e7d2d41b 100644 > > --- a/tools/perf/tests/dso-data.c > > +++ b/tools/perf/tests/dso-data.c > > @@ -111,6 +111,9 @@ int test__dso_data(void) > > memset(&machine, 0, sizeof(machine)); > > > > dso = dso__new((const char *)file); > > + TEST_ASSERT_VAL("failed to get dso", dso); > > + > > + dso->binary_type = DSO_BINARY_TYPE__SYSTEM_PATH_DSO; > > > > /* Basic 10 bytes tests. */ > > for (i = 0; i < ARRAY_SIZE(offsets); i++) { > > @@ -199,6 +202,8 @@ static int dsos__create(int cnt, int size) > > > > dsos[i] = dso__new(file); > > TEST_ASSERT_VAL("failed to get dso", dsos[i]); > > + > > + dsos[i]->binary_type = DSO_BINARY_TYPE__SYSTEM_PATH_DSO; > > Those two are unrelated, please put them in a separate patch, one that I > can even cherrypick ahead of the other patches. It's a consequence of changing dso__data_read_offset() not to call dso__data_fd() due to a performance reason. The binary_type was determined during the dso__data_fd() before, but now it needs to be set explicitly for this test. In the original code, it was called everytime we access to the dso cache just to check an error, I guess. But it's enough to check the status field. > > > } > > > > return 0; > > diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c > > index 11ece224ef50..ae92046ae2c8 100644 > > --- a/tools/perf/util/dso.c > > +++ b/tools/perf/util/dso.c > > @@ -213,6 +213,7 @@ bool dso__needs_decompress(struct dso *dso) > > */ > > static LIST_HEAD(dso__data_open); > > static long dso__data_open_cnt; > > +static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER; > > > > static void dso__list_add(struct dso *dso) > > { > > @@ -240,7 +241,7 @@ static int do_open(char *name) > > if (fd >= 0) > > return fd; > > > > - pr_debug("dso open failed, mmap: %s\n", > > + pr_debug("dso open failed: %s\n", > > strerror_r(errno, sbuf, sizeof(sbuf))); > > if (!dso__data_open_cnt || errno != EMFILE) > > Ditto, another unrelated patch, please separate. Ah, okay. I kept it since it's just a small change. But I'd like to separate if it helps reviewing. > > > break; > > @@ -382,7 +383,9 @@ static void check_data_close(void) > > */ > > void dso__data_close(struct dso *dso) > > { > > + pthread_mutex_lock(&dso__data_open_lock); > > close_dso(dso); > > + pthread_mutex_unlock(&dso__data_open_lock); > > } > > > > /** > > @@ -405,6 +408,8 @@ int dso__data_fd(struct dso *dso, struct machine *machine) > > if (dso->data.status == DSO_DATA_STATUS_ERROR) > > return -1; > > > > + pthread_mutex_lock(&dso__data_open_lock); > > + > > if (dso->data.fd >= 0) > > goto out; > > > > @@ -427,6 +432,7 @@ int dso__data_fd(struct dso *dso, struct machine *machine) > > else > > dso->data.status = DSO_DATA_STATUS_ERROR; > > > > + pthread_mutex_unlock(&dso__data_open_lock); > > return dso->data.fd; > > } > > > > @@ -531,52 +537,66 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset, > > } > > > > static ssize_t > > -dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size) > > +dso_cache__read(struct dso *dso, struct machine *machine, > > + u64 offset, u8 *data, ssize_t size) > > { > > struct dso_cache *cache; > > struct dso_cache *old; > > - ssize_t ret; > > - > > - do { > > - u64 cache_offset; > > While I understand that there was no need for this do { } while (0) > construct in the first place, removing it in this patch is not > interesting, as it is both unrelated to this patch and makes the it > harder to review by just looking at the patch :-\ Please refrain from > doing this in this patch. Understood, sorry for bothering! :) > > A later patch that does _just_ that could be done, if you feel like > doing it. Okay. > > > + ssize_t ret = -EINVAL; > > + u64 cache_offset; > > > > - ret = -ENOMEM; > > + cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE); > > + if (!cache) > > + return -ENOMEM; > > > > - cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE); > > - if (!cache) > > - break; > > + cache_offset = offset & DSO__DATA_CACHE_MASK; > > > > - cache_offset = offset & DSO__DATA_CACHE_MASK; > > - ret = -EINVAL; > > + pthread_mutex_lock(&dso__data_open_lock); > > > > - if (-1 == lseek(dso->data.fd, cache_offset, SEEK_SET)) > > - break; > > + /* > > + * dso->data.fd might be closed if other thread opened another > > + * file (dso) due to open file limit (RLIMIT_NOFILE). > > + */ > > + if (dso->data.fd < 0) { > > + dso->data.fd = open_dso(dso, machine); > > Also please consider adding a backpointer to machine in the dso object, > since you need to reopen it, so that we don't have to go on passing > machine around to dso_cache__read(), etc. Yeah, it's a pain to passing a machine pointer. > > This probably needs to be done in the patch that makes dso->data.fd to > be closed due to limit. I don't know which patch you are refering.. It already closes an fd if it reaches the limit - what this patch does is protecting such concurrent open and close when multi-thread is used. Thanks, Namhyung > > > + if (dso->data.fd < 0) { > > + ret = -errno; > > + dso->data.status = DSO_DATA_STATUS_ERROR; > > + goto err_unlock; > > + } > > + } > > > > - ret = read(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE); > > - if (ret <= 0) > > - break; > > + if (-1 == lseek(dso->data.fd, cache_offset, SEEK_SET)) > > + goto err_unlock; > > > > - cache->offset = cache_offset; > > - cache->size = ret; > > - old = dso_cache__insert(dso, cache); > > - if (old) { > > - /* we lose the race */ > > - free(cache); > > - cache = old; > > - } > > + ret = read(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE); > > + if (ret <= 0) > > + goto err_unlock; > > > > - ret = dso_cache__memcpy(cache, offset, data, size); > > + pthread_mutex_unlock(&dso__data_open_lock); > > > > - } while (0); > > + cache->offset = cache_offset; > > + cache->size = ret; > > + old = dso_cache__insert(dso, cache); > > + if (old) { > > + /* we lose the race */ > > + free(cache); > > + cache = old; > > + } > > > > + ret = dso_cache__memcpy(cache, offset, data, size); > > if (ret <= 0) > > free(cache); > > > > return ret; > > + > > +err_unlock: > > + pthread_mutex_unlock(&dso__data_open_lock); > > + return ret; > > } > > > > -static ssize_t dso_cache_read(struct dso *dso, u64 offset, > > - u8 *data, ssize_t size) > > +static ssize_t dso_cache_read(struct dso *dso, struct machine *machine, > > + u64 offset, u8 *data, ssize_t size) > > { > > struct dso_cache *cache; > > > > @@ -584,7 +604,7 @@ static ssize_t dso_cache_read(struct dso *dso, u64 offset, > > if (cache) > > return dso_cache__memcpy(cache, offset, data, size); > > else > > - return dso_cache__read(dso, offset, data, size); > > + return dso_cache__read(dso, machine, offset, data, size); > > } > > > > /* > > @@ -592,7 +612,8 @@ static ssize_t dso_cache_read(struct dso *dso, u64 offset, > > * in the rb_tree. Any read to already cached data is served > > * by cached data. > > */ > > -static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size) > > +static ssize_t cached_read(struct dso *dso, struct machine *machine, > > + u64 offset, u8 *data, ssize_t size) > > { > > ssize_t r = 0; > > u8 *p = data; > > @@ -600,7 +621,7 @@ static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size) > > do { > > ssize_t ret; > > > > - ret = dso_cache_read(dso, offset, p, size); > > + ret = dso_cache_read(dso, machine, offset, p, size); > > if (ret < 0) > > return ret; > > > > @@ -620,21 +641,42 @@ static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size) > > return r; > > } > > > > -static int data_file_size(struct dso *dso) > > +static int data_file_size(struct dso *dso, struct machine *machine) > > { > > + int ret = 0; > > struct stat st; > > char sbuf[STRERR_BUFSIZE]; > > > > - if (!dso->data.file_size) { > > - if (fstat(dso->data.fd, &st)) { > > - pr_err("dso mmap failed, fstat: %s\n", > > - strerror_r(errno, sbuf, sizeof(sbuf))); > > - return -1; > > + if (dso->data.file_size) > > + return 0; > > + > > + pthread_mutex_lock(&dso__data_open_lock); > > + > > + /* > > + * dso->data.fd might be closed if other thread opened another > > + * file (dso) due to open file limit (RLIMIT_NOFILE). > > + */ > > + if (dso->data.fd < 0) { > > + dso->data.fd = open_dso(dso, machine); > > + if (dso->data.fd < 0) { > > + ret = -errno; > > + dso->data.status = DSO_DATA_STATUS_ERROR; > > + goto out; > > } > > - dso->data.file_size = st.st_size; > > } > > > > - return 0; > > + if (fstat(dso->data.fd, &st) < 0) { > > + ret = -errno; > > + pr_err("dso cache fstat failed: %s\n", > > + strerror_r(errno, sbuf, sizeof(sbuf))); > > + dso->data.status = DSO_DATA_STATUS_ERROR; > > + goto out; > > + } > > + dso->data.file_size = st.st_size; > > + > > +out: > > + pthread_mutex_unlock(&dso__data_open_lock); > > + return ret; > > } > > > > /** > > @@ -652,17 +694,17 @@ off_t dso__data_size(struct dso *dso, struct machine *machine) > > if (fd < 0) > > return fd; > > > > - if (data_file_size(dso)) > > + if (data_file_size(dso, machine)) > > return -1; > > > > /* For now just estimate dso data size is close to file size */ > > return dso->data.file_size; > > } > > > > -static ssize_t data_read_offset(struct dso *dso, u64 offset, > > - u8 *data, ssize_t size) > > +static ssize_t data_read_offset(struct dso *dso, struct machine *machine, > > + u64 offset, u8 *data, ssize_t size) > > { > > - if (data_file_size(dso)) > > + if (data_file_size(dso, machine)) > > return -1; > > > > /* Check the offset sanity. */ > > @@ -672,7 +714,7 @@ static ssize_t data_read_offset(struct dso *dso, u64 offset, > > if (offset + size < offset) > > return -1; > > > > - return cached_read(dso, offset, data, size); > > + return cached_read(dso, machine, offset, data, size); > > } > > > > /** > > @@ -689,10 +731,10 @@ static ssize_t data_read_offset(struct dso *dso, u64 offset, > > ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, > > u64 offset, u8 *data, ssize_t size) > > { > > - if (dso__data_fd(dso, machine) < 0) > > + if (dso->data.status == DSO_DATA_STATUS_ERROR) > > return -1; > > > > - return data_read_offset(dso, offset, data, size); > > + return data_read_offset(dso, machine, offset, data, size); > > } > > > > /** > > -- > > 2.2.2