All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] Improve the performance of --num-threads -d 31
@ 2016-02-17  7:05 Zhou Wenjian
  2016-02-22 16:58 ` Minfei Huang
                   ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Zhou Wenjian @ 2016-02-17  7:05 UTC (permalink / raw)
  To: kexec

v2:
        1. address Minoru Usui's comments about magic number and error message
        2. fix a bug in cyclic mode caused by optimizing

v1:
        1. change page_flag.ready's value to enum
        2. change the patch description
        3. cleanup some codes
        4. fix a bug in cyclic mode

multi-threads implementation will introduce extra cost when handling
each page. The origin implementation will also do the extra work for
filtered pages. So there is a big performance degradation in
--num-threads -d 31.
The new implementation won't do the extra work for filtered pages any
more. So the performance of -d 31 is close to that of serial processing.

The new implementation is just like the following:
        * The basic idea is producer producing page and consumer writing page.
        * Each producer have a page_flag_buf list which is used for storing
          page's description.
        * The size of page_flag_buf is little so it won't take too much memory.
        * And all producers will share a page_data_buf array which is
          used for storing page's compressed data.
        * The main thread is the consumer. It will find the next pfn and write
          it into file.
        * The next pfn is smallest pfn in all page_flag_buf.

Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com>
---
 makedumpfile.c | 266 ++++++++++++++++++++++++++++++++++++++-------------------
 makedumpfile.h |  31 ++++---
 2 files changed, 200 insertions(+), 97 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index fa0b779..31329b5 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -3483,7 +3483,8 @@ initial_for_parallel()
 	unsigned long page_data_buf_size;
 	unsigned long limit_size;
 	int page_data_num;
-	int i;
+	struct page_flag *current;
+	int i, j;
 
 	len_buf_out = calculate_len_buf_out(info->page_size);
 
@@ -3562,8 +3563,10 @@ initial_for_parallel()
 		      - MAP_REGION * info->num_threads) * 0.6;
 
 	page_data_num = limit_size / page_data_buf_size;
+	info->num_buffers = 3 * info->num_threads;
 
-	info->num_buffers = MIN(NUM_BUFFERS, page_data_num);
+	info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS);
+	info->num_buffers = MIN(info->num_buffers, page_data_num);
 
 	DEBUG_MSG("Number of struct page_data for produce/consume: %d\n",
 			info->num_buffers);
@@ -3588,6 +3591,36 @@ initial_for_parallel()
 	}
 
 	/*
+	 * initial page_flag for each thread
+	 */
+	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
+	    == NULL) {
+		MSG("Can't allocate memory for page_flag_buf. %s\n",
+				strerror(errno));
+		return FALSE;
+	}
+	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
+
+	for (i = 0; i < info->num_threads; i++) {
+		if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
+			MSG("Can't allocate memory for page_flag. %s\n",
+				strerror(errno));
+			return FALSE;
+		}
+		current = info->page_flag_buf[i];
+
+		for (j = 1; j < NUM_BUFFERS; j++) {
+			if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) {
+				MSG("Can't allocate memory for page_flag. %s\n",
+					strerror(errno));
+				return FALSE;
+			}
+			current = current->next;
+		}
+		current->next = info->page_flag_buf[i];
+	}
+
+	/*
 	 * initial fd_memory for threads
 	 */
 	for (i = 0; i < info->num_threads; i++) {
@@ -3612,7 +3645,8 @@ initial_for_parallel()
 void
 free_for_parallel()
 {
-	int i;
+	int i, j;
+	struct page_flag *current;
 
 	if (info->threads != NULL) {
 		for (i = 0; i < info->num_threads; i++) {
@@ -3655,6 +3689,19 @@ free_for_parallel()
 		free(info->page_data_buf);
 	}
 
+	if (info->page_flag_buf != NULL) {
+		for (i = 0; i < info->num_threads; i++) {
+			for (j = 0; j < NUM_BUFFERS; j++) {
+				if (info->page_flag_buf[i] != NULL) {
+					current = info->page_flag_buf[i];
+					info->page_flag_buf[i] = current->next;
+					free(current);
+				}
+			}
+		}
+		free(info->page_flag_buf);
+	}
+
 	if (info->parallel_info == NULL)
 		return;
 
@@ -7076,10 +7123,10 @@ kdump_thread_function_cyclic(void *arg) {
 	void *retval = PTHREAD_FAIL;
 	struct thread_args *kdump_thread_args = (struct thread_args *)arg;
 	struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
+	struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
 	struct cycle *cycle = kdump_thread_args->cycle;
-	int page_data_num = kdump_thread_args->page_data_num;
-	mdf_pfn_t pfn;
-	int index;
+	mdf_pfn_t pfn = cycle->start_pfn;
+	int index = kdump_thread_args->thread_num;
 	int buf_ready;
 	int dumpable;
 	int fd_memory = 0;
@@ -7125,47 +7172,46 @@ kdump_thread_function_cyclic(void *arg) {
 						kdump_thread_args->thread_num);
 	}
 
-	while (1) {
-		/* get next pfn */
-		pthread_mutex_lock(&info->current_pfn_mutex);
-		pfn = info->current_pfn;
-		info->current_pfn++;
-		pthread_mutex_unlock(&info->current_pfn_mutex);
+	/*
+	 * filtered page won't take anything
+	 * unfiltered zero page will only take a page_flag_buf
+	 * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
+	 */
+	while (pfn < kdump_thread_args->end_pfn) {
+		buf_ready = FALSE;
 
-		if (pfn >= kdump_thread_args->end_pfn)
-			break;
+		while (page_data_buf[index].used != FALSE ||
+				pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
+			index = (index + 1) % info->num_buffers;
 
-		index = -1;
-		buf_ready = FALSE;
+		page_data_buf[index].used = TRUE;
 
 		while (buf_ready == FALSE) {
 			pthread_testcancel();
-
-			index = pfn % page_data_num;
-
-			if (pfn - info->consumed_pfn > info->num_buffers)
-				continue;
-
-			if (page_data_buf[index].ready != 0)
+			if (page_flag_buf->ready == FLAG_READY)
 				continue;
 
-			pthread_mutex_lock(&page_data_buf[index].mutex);
-
-			if (page_data_buf[index].ready != 0)
-				goto unlock;
+			/* get next pfn */
+			pthread_mutex_lock(&info->current_pfn_mutex);
+			pfn = info->current_pfn;
+			info->current_pfn++;
+			page_flag_buf->ready = FLAG_FILLING;
+			pthread_mutex_unlock(&info->current_pfn_mutex);
 
-			buf_ready = TRUE;
+			page_flag_buf->pfn = pfn;
 
-			page_data_buf[index].pfn = pfn;
-			page_data_buf[index].ready = 1;
+			if (pfn >= kdump_thread_args->end_pfn) {
+				page_data_buf[index].used = FALSE;
+				page_flag_buf->ready = FLAG_READY;
+				break;
+			}
 
 			dumpable = is_dumpable(
 				info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
 				pfn,
 				cycle);
-			page_data_buf[index].dumpable = dumpable;
 			if (!dumpable)
-				goto unlock;
+				continue;
 
 			if (!read_pfn_parallel(fd_memory, pfn, buf,
 					       &bitmap_memory_parallel,
@@ -7178,11 +7224,11 @@ kdump_thread_function_cyclic(void *arg) {
 
 			if ((info->dump_level & DL_EXCLUDE_ZERO)
 			    && is_zero_page(buf, info->page_size)) {
-				page_data_buf[index].zero = TRUE;
-				goto unlock;
+				page_flag_buf->zero = TRUE;
+				goto next;
 			}
 
-			page_data_buf[index].zero = FALSE;
+			page_flag_buf->zero = FALSE;
 
 			/*
 			 * Compress the page data.
@@ -7232,12 +7278,16 @@ kdump_thread_function_cyclic(void *arg) {
 				page_data_buf[index].size  = info->page_size;
 				memcpy(page_data_buf[index].buf, buf, info->page_size);
 			}
-unlock:
-			pthread_mutex_unlock(&page_data_buf[index].mutex);
+			page_flag_buf->index = index;
+			buf_ready = TRUE;
+next:
+			page_flag_buf->ready = FLAG_READY;
+			page_flag_buf = page_flag_buf->next;
 
 		}
-	}
 
+		pthread_mutex_unlock(&page_data_buf[index].mutex);
+	}
 	retval = NULL;
 
 fail:
@@ -7265,14 +7315,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 	struct page_desc pd;
 	struct timeval tv_start;
 	struct timeval last, new;
-	unsigned long long consuming_pfn;
 	pthread_t **threads = NULL;
 	struct thread_args *kdump_thread_args = NULL;
 	void *thread_result;
-	int page_data_num;
+	int page_buf_num;
 	struct page_data *page_data_buf = NULL;
 	int i;
 	int index;
+	int end_count, consuming, check_count;
+	mdf_pfn_t current_pfn, temp_pfn;
 
 	if (info->flag_elf_dumpfile)
 		return FALSE;
@@ -7319,16 +7370,11 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 	threads = info->threads;
 	kdump_thread_args = info->kdump_thread_args;
 
-	page_data_num = info->num_buffers;
+	page_buf_num = info->num_buffers;
 	page_data_buf = info->page_data_buf;
 
-	for (i = 0; i < page_data_num; i++) {
-		/*
-		 * producer will use pfn in page_data_buf to decide the
-		 * consumed pfn
-		 */
-		page_data_buf[i].pfn = start_pfn - 1;
-		page_data_buf[i].ready = 0;
+	for (i = 0; i < page_buf_num; i++) {
+		page_data_buf[i].used = FALSE;
 		res = pthread_mutex_init(&page_data_buf[i].mutex, NULL);
 		if (res != 0) {
 			ERRMSG("Can't initialize mutex of page_data_buf. %s\n",
@@ -7342,8 +7388,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 		kdump_thread_args[i].len_buf_out = len_buf_out;
 		kdump_thread_args[i].start_pfn = start_pfn;
 		kdump_thread_args[i].end_pfn = end_pfn;
-		kdump_thread_args[i].page_data_num = page_data_num;
+		kdump_thread_args[i].page_buf_num = page_buf_num;
 		kdump_thread_args[i].page_data_buf = page_data_buf;
+		kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
 		kdump_thread_args[i].cycle = cycle;
 
 		res = pthread_create(threads[i], NULL,
@@ -7356,55 +7403,101 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 		}
 	}
 
-	consuming_pfn = start_pfn;
-	index = -1;
+	end_count = 0;
+	while (1) {
+		consuming = 0;
+		check_count = 0;
 
-	gettimeofday(&last, NULL);
+		/*
+		 * The basic idea is producer producing page and consumer writing page.
+		 * Each producer have a page_flag_buf list which is used for storing page's description.
+		 * The size of page_flag_buf is little so it won't take too much memory.
+		 * And all producers will share a page_data_buf array which is used for storing page's compressed data.
+		 * The main thread is the consumer. It will find the next pfn and write it into file.
+		 * The next pfn is smallest pfn in all page_flag_buf.
+		 */
+		gettimeofday(&last, NULL);
+		while (1) {
+			current_pfn = end_pfn;
 
-	while (consuming_pfn < end_pfn) {
-		index = consuming_pfn % page_data_num;
+			/*
+			 * page_flag_buf is in circular linked list.
+			 * The array info->page_flag_buf[] records the current page_flag_buf in each thread's
+			 * page_flag_buf list.
+			 * consuming is used for recording in which thread the pfn is the smallest.
+			 * current_pfn is used for recording the value of pfn when checking the pfn.
+			 */
+			for (i = 0; i < info->num_threads; i++) {
+				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
+					continue;
+				temp_pfn = info->page_flag_buf[i]->pfn;
 
-		gettimeofday(&new, NULL);
-		if (new.tv_sec - last.tv_sec > WAIT_TIME) {
-			ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn);
-			goto out;
-		}
+				/*
+				 * count how many threads have reached the end.
+				 */
+				if (temp_pfn >= end_pfn) {
+					/*
+					 * prevent setting FLAG_UNUSED being optimized.
+					 */
+					MSG("-");
 
-		/*
-		 * check pfn first without mutex locked to reduce the time
-		 * trying to lock the mutex
-		 */
-		if (page_data_buf[index].pfn != consuming_pfn)
-			continue;
+					info->page_flag_buf[i]->ready = FLAG_UNUSED;
 
-		if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
-			continue;
+					info->current_pfn = end_pfn;
+					end_count++;
+					continue;
+				}
+
+				if (current_pfn < temp_pfn)
+					continue;
 
-		/* check whether the found one is ready to be consumed */
-		if (page_data_buf[index].pfn != consuming_pfn ||
-		    page_data_buf[index].ready != 1) {
-			goto unlock;
+				check_count++;
+				consuming = i;
+				current_pfn = temp_pfn;
+			}
+
+			/*
+			 * If all the threads have reached the end, we will finish writing.
+			 */
+			if (end_count >= info->num_threads)
+				goto finish;
+
+			/*
+			 * Since it has the probabilty that there is no page_flag_buf being ready,
+			 * we should recheck if it happens.
+			 */
+			if (check_count == 0)
+				continue;
+
+			/*
+			 * If the page_flag_buf is not ready, the pfn recorded may be changed.
+			 * So we should recheck.
+			 */
+			if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
+				gettimeofday(&new, NULL);
+				if (new.tv_sec - last.tv_sec > WAIT_TIME) {
+					ERRMSG("Can't get data of pfn.\n");
+					goto out;
+				}
+				continue;
+			}
+
+			if (current_pfn == info->page_flag_buf[consuming]->pfn)
+				break;
 		}
 
 		if ((num_dumped % per) == 0)
 			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
 
-		/* next pfn is found, refresh last here */
-		last = new;
-		consuming_pfn++;
-		info->consumed_pfn++;
-		page_data_buf[index].ready = 0;
-
-		if (page_data_buf[index].dumpable == FALSE)
-			goto unlock;
-
 		num_dumped++;
 
-		if (page_data_buf[index].zero == TRUE) {
+
+		if (info->page_flag_buf[consuming]->zero == TRUE) {
 			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
 				goto out;
 			pfn_zero++;
 		} else {
+			index = info->page_flag_buf[consuming]->index;
 			pd.flags      = page_data_buf[index].flags;
 			pd.size       = page_data_buf[index].size;
 			pd.page_flags = 0;
@@ -7420,12 +7513,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 			 */
 			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
 				goto out;
-
+			page_data_buf[index].used = FALSE;
 		}
-unlock:
-		pthread_mutex_unlock(&page_data_buf[index].mutex);
+		info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
+		info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
 	}
-
+finish:
 	ret = TRUE;
 	/*
 	 * print [100 %]
@@ -7464,7 +7557,7 @@ out:
 	}
 
 	if (page_data_buf != NULL) {
-		for (i = 0; i < page_data_num; i++) {
+		for (i = 0; i < page_buf_num; i++) {
 			pthread_mutex_destroy(&page_data_buf[i].mutex);
 		}
 	}
@@ -7564,6 +7657,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag
 		num_dumped++;
 		if (!read_pfn(pfn, buf))
 			goto out;
+
 		filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size);
 
 		/*
diff --git a/makedumpfile.h b/makedumpfile.h
index e0b5bbf..8a9a5b2 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -977,7 +977,7 @@ typedef unsigned long long int ulonglong;
 #define PAGE_DATA_NUM	(50)
 #define WAIT_TIME	(60 * 10)
 #define PTHREAD_FAIL	((void *)-2)
-#define NUM_BUFFERS	(50)
+#define NUM_BUFFERS	(20)
 
 struct mmap_cache {
 	char	*mmap_buf;
@@ -985,28 +985,36 @@ struct mmap_cache {
 	off_t   mmap_end_offset;
 };
 
+enum {
+	FLAG_UNUSED,
+	FLAG_READY,
+	FLAG_FILLING
+};
+struct page_flag {
+	mdf_pfn_t pfn;
+	char zero;
+	char ready;
+	short index;
+	struct page_flag *next;
+};
+
 struct page_data
 {
-	mdf_pfn_t pfn;
-	int dumpable;
-	int zero;
-	unsigned int flags;
+	pthread_mutex_t mutex;
 	long size;
 	unsigned char *buf;
-	pthread_mutex_t mutex;
-	/*
-	 * whether the page_data is ready to be consumed
-	 */
-	int ready;
+	int flags;
+	int used;
 };
 
 struct thread_args {
 	int thread_num;
 	unsigned long len_buf_out;
 	mdf_pfn_t start_pfn, end_pfn;
-	int page_data_num;
+	int page_buf_num;
 	struct cycle *cycle;
 	struct page_data *page_data_buf;
+	struct page_flag *page_flag_buf;
 };
 
 /*
@@ -1295,6 +1303,7 @@ struct DumpInfo {
 	pthread_t **threads;
 	struct thread_args *kdump_thread_args;
 	struct page_data *page_data_buf;
+	struct page_flag **page_flag_buf;
 	pthread_rwlock_t usemmap_rwlock;
 	mdf_pfn_t current_pfn;
 	pthread_mutex_t current_pfn_mutex;
-- 
1.8.3.1




_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-17  7:05 [PATCH v2] Improve the performance of --num-threads -d 31 Zhou Wenjian
@ 2016-02-22 16:58 ` Minfei Huang
  2016-02-23  5:26   ` Minfei Huang
  2016-02-23  1:32 ` Minoru Usui
  2016-02-24  8:13 ` Minoru Usui
  2 siblings, 1 reply; 26+ messages in thread
From: Minfei Huang @ 2016-02-22 16:58 UTC (permalink / raw)
  To: Zhou Wenjian; +Cc: kexec

On 02/17/16 at 03:05pm, Zhou Wenjian wrote:
> The new implementation won't do the extra work for filtered pages any
> more. So the performance of -d 31 is close to that of serial processing.

Hi, Wenjian.

Command makedumpfile is complied with this patch on version 1.5.9, and
makedumpfile.backup is complied on version 1.5.7.

The compressed file vmcore is 67G filtered out from 4T memory.

kdump:/# time bash -x a.sh makedumpfile vmcore02 1 --num-threads 128
+ makedumpfile --num-threads -l --message-level 1 -d 31 /proc/vmcore /kdumproot/home//var/crash/127.0.0.1-2016-02-22-23:22:36/vmcore02
Copying data                       : [100.0 %] /

real    5m24.378s
user    2m3.572s
sys 0m57.133s
kdump:/# time bash -x a.sh makedumpfile vmcore04 1 --num-threads 0
+ makedumpfile --num-threads -l --message-level 1 -d 31 /proc/vmcore /kdumproot/home//var/crash/127.0.0.1-2016-02-22-23:22:36/vmcore04
Copying data                       : [100.0 %] /

real    5m17.519s
user    2m6.638s
sys 0m57.157s
kdump:/# time bash -x a.sh makedumpfile.backup vmcore05 1
+ makedumpfile.backup -l --message-level 1 -d 31 /proc/vmcore /kdumproot/home//var/crash/127.0.0.1-2016-02-22-23:22:36/vmcore05
Copying data                       : [100.0 %] \

real    5m51.227s
user    3m41.281s
sys 0m14.924s

Seem above test result reaches this goal. 

>  makedumpfile.c | 266 ++++++++++++++++++++++++++++++++++++++-------------------
>  makedumpfile.h |  31 ++++---
>  2 files changed, 200 insertions(+), 97 deletions(-)
> 
> diff --git a/makedumpfile.c b/makedumpfile.c
> index fa0b779..31329b5 100644
> --- a/makedumpfile.c
> +++ b/makedumpfile.c
> @@ -3562,8 +3563,10 @@ initial_for_parallel()
>  		      - MAP_REGION * info->num_threads) * 0.6;
>  
>  	page_data_num = limit_size / page_data_buf_size;

3562         limit_size = (get_free_memory_size()
3563                       - MAP_REGION * info->num_threads) * 0.6;

limit_size will get value from above calculation. It is better
to have a test before getting limit_size's value, since the value
from get_free_memory_size may be smaller than MAP_REGION * info->num_threads.

> +	for (i = 0; i < info->num_threads; i++) {
> +		if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
> +			MSG("Can't allocate memory for page_flag. %s\n",
> +				strerror(errno));
> +			return FALSE;
> +		}
> +		current = info->page_flag_buf[i];
> +
> +		for (j = 1; j < NUM_BUFFERS; j++) {
> +			if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) {
> +				MSG("Can't allocate memory for page_flag. %s\n",
> +					strerror(errno));
> +				return FALSE;
> +			}
> +			current = current->next;
> +		}

Code will be more elegant and simple, if makedumpfile can
source the list.h.

Thanks
Minfei

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-17  7:05 [PATCH v2] Improve the performance of --num-threads -d 31 Zhou Wenjian
  2016-02-22 16:58 ` Minfei Huang
@ 2016-02-23  1:32 ` Minoru Usui
  2016-02-23  3:45   ` "Zhou, Wenjian/周文?"
  2016-02-24  8:13 ` Minoru Usui
  2 siblings, 1 reply; 26+ messages in thread
From: Minoru Usui @ 2016-02-23  1:32 UTC (permalink / raw)
  To: Zhou Wenjian, kexec

Hello, Zhou

Thank you for reflecting my comments.
I have other comments.

See below.

> -----Original Message-----
> From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of Zhou Wenjian
> Sent: Wednesday, February 17, 2016 4:06 PM
> To: kexec@lists.infradead.org
> Subject: [PATCH v2] Improve the performance of --num-threads -d 31
> 
> v2:
>         1. address Minoru Usui's comments about magic number and error message
>         2. fix a bug in cyclic mode caused by optimizing
> 
> v1:
>         1. change page_flag.ready's value to enum
>         2. change the patch description
>         3. cleanup some codes
>         4. fix a bug in cyclic mode
> 
> multi-threads implementation will introduce extra cost when handling
> each page. The origin implementation will also do the extra work for
> filtered pages. So there is a big performance degradation in
> --num-threads -d 31.
> The new implementation won't do the extra work for filtered pages any
> more. So the performance of -d 31 is close to that of serial processing.
> 
> The new implementation is just like the following:
>         * The basic idea is producer producing page and consumer writing page.
>         * Each producer have a page_flag_buf list which is used for storing
>           page's description.
>         * The size of page_flag_buf is little so it won't take too much memory.
>         * And all producers will share a page_data_buf array which is
>           used for storing page's compressed data.
>         * The main thread is the consumer. It will find the next pfn and write
>           it into file.
>         * The next pfn is smallest pfn in all page_flag_buf.
> 
> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com>
> ---
>  makedumpfile.c | 266 ++++++++++++++++++++++++++++++++++++++-------------------
>  makedumpfile.h |  31 ++++---
>  2 files changed, 200 insertions(+), 97 deletions(-)
> 
> diff --git a/makedumpfile.c b/makedumpfile.c
> index fa0b779..31329b5 100644
> --- a/makedumpfile.c
> +++ b/makedumpfile.c
> @@ -3483,7 +3483,8 @@ initial_for_parallel()
>  	unsigned long page_data_buf_size;
>  	unsigned long limit_size;
>  	int page_data_num;
> -	int i;
> +	struct page_flag *current;
> +	int i, j;
> 
>  	len_buf_out = calculate_len_buf_out(info->page_size);
> 
> @@ -3562,8 +3563,10 @@ initial_for_parallel()
>  		      - MAP_REGION * info->num_threads) * 0.6;
> 
>  	page_data_num = limit_size / page_data_buf_size;
> +	info->num_buffers = 3 * info->num_threads;
> 
> -	info->num_buffers = MIN(NUM_BUFFERS, page_data_num);
> +	info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS);
> +	info->num_buffers = MIN(info->num_buffers, page_data_num);
> 
>  	DEBUG_MSG("Number of struct page_data for produce/consume: %d\n",
>  			info->num_buffers);
> @@ -3588,6 +3591,36 @@ initial_for_parallel()
>  	}
> 
>  	/*
> +	 * initial page_flag for each thread
> +	 */
> +	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
> +	    == NULL) {
> +		MSG("Can't allocate memory for page_flag_buf. %s\n",
> +				strerror(errno));
> +		return FALSE;
> +	}
> +	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
> +
> +	for (i = 0; i < info->num_threads; i++) {
> +		if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
> +			MSG("Can't allocate memory for page_flag. %s\n",
> +				strerror(errno));
> +			return FALSE;
> +		}
> +		current = info->page_flag_buf[i];
> +
> +		for (j = 1; j < NUM_BUFFERS; j++) {
> +			if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) {
> +				MSG("Can't allocate memory for page_flag. %s\n",
> +					strerror(errno));
> +				return FALSE;
> +			}
> +			current = current->next;
> +		}
> +		current->next = info->page_flag_buf[i];
> +	}
> +
> +	/*
>  	 * initial fd_memory for threads
>  	 */
>  	for (i = 0; i < info->num_threads; i++) {
> @@ -3612,7 +3645,8 @@ initial_for_parallel()
>  void
>  free_for_parallel()
>  {
> -	int i;
> +	int i, j;
> +	struct page_flag *current;
> 
>  	if (info->threads != NULL) {
>  		for (i = 0; i < info->num_threads; i++) {
> @@ -3655,6 +3689,19 @@ free_for_parallel()
>  		free(info->page_data_buf);
>  	}
> 
> +	if (info->page_flag_buf != NULL) {
> +		for (i = 0; i < info->num_threads; i++) {
> +			for (j = 0; j < NUM_BUFFERS; j++) {
> +				if (info->page_flag_buf[i] != NULL) {
> +					current = info->page_flag_buf[i];
> +					info->page_flag_buf[i] = current->next;
> +					free(current);
> +				}
> +			}
> +		}
> +		free(info->page_flag_buf);
> +	}
> +
>  	if (info->parallel_info == NULL)
>  		return;
> 
> @@ -7076,10 +7123,10 @@ kdump_thread_function_cyclic(void *arg) {
>  	void *retval = PTHREAD_FAIL;
>  	struct thread_args *kdump_thread_args = (struct thread_args *)arg;
>  	struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
> +	struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
>  	struct cycle *cycle = kdump_thread_args->cycle;
> -	int page_data_num = kdump_thread_args->page_data_num;
> -	mdf_pfn_t pfn;
> -	int index;
> +	mdf_pfn_t pfn = cycle->start_pfn;
> +	int index = kdump_thread_args->thread_num;
>  	int buf_ready;
>  	int dumpable;
>  	int fd_memory = 0;
> @@ -7125,47 +7172,46 @@ kdump_thread_function_cyclic(void *arg) {
>  						kdump_thread_args->thread_num);
>  	}
> 
> -	while (1) {
> -		/* get next pfn */
> -		pthread_mutex_lock(&info->current_pfn_mutex);
> -		pfn = info->current_pfn;
> -		info->current_pfn++;
> -		pthread_mutex_unlock(&info->current_pfn_mutex);
> +	/*
> +	 * filtered page won't take anything
> +	 * unfiltered zero page will only take a page_flag_buf
> +	 * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
> +	 */
> +	while (pfn < kdump_thread_args->end_pfn) {
> +		buf_ready = FALSE;
> 
> -		if (pfn >= kdump_thread_args->end_pfn)
> -			break;
> +		while (page_data_buf[index].used != FALSE ||
> +				pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
> +			index = (index + 1) % info->num_buffers;
> -		index = -1;
> -		buf_ready = FALSE;
> +		page_data_buf[index].used = TRUE;
> 
>  		while (buf_ready == FALSE) {
>  			pthread_testcancel();
> -
> -			index = pfn % page_data_num;
> -
> -			if (pfn - info->consumed_pfn > info->num_buffers)
> -				continue;
> -
> -			if (page_data_buf[index].ready != 0)
> +			if (page_flag_buf->ready == FLAG_READY)
>  				continue;
> 
> -			pthread_mutex_lock(&page_data_buf[index].mutex);
> -
> -			if (page_data_buf[index].ready != 0)
> -				goto unlock;
> +			/* get next pfn */
> +			pthread_mutex_lock(&info->current_pfn_mutex);
> +			pfn = info->current_pfn;
> +			info->current_pfn++;
> +			page_flag_buf->ready = FLAG_FILLING;
> +			pthread_mutex_unlock(&info->current_pfn_mutex);
> 
> -			buf_ready = TRUE;
> +			page_flag_buf->pfn = pfn;
> 
> -			page_data_buf[index].pfn = pfn;
> -			page_data_buf[index].ready = 1;
> +			if (pfn >= kdump_thread_args->end_pfn) {
> +				page_data_buf[index].used = FALSE;
> +				page_flag_buf->ready = FLAG_READY;
> +				break;
> +			}
> 
>  			dumpable = is_dumpable(
>  				info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
>  				pfn,
>  				cycle);
> -			page_data_buf[index].dumpable = dumpable;
>  			if (!dumpable)
> -				goto unlock;
> +				continue;
> 
>  			if (!read_pfn_parallel(fd_memory, pfn, buf,
>  					       &bitmap_memory_parallel,
> @@ -7178,11 +7224,11 @@ kdump_thread_function_cyclic(void *arg) {
> 
>  			if ((info->dump_level & DL_EXCLUDE_ZERO)
>  			    && is_zero_page(buf, info->page_size)) {
> -				page_data_buf[index].zero = TRUE;
> -				goto unlock;
> +				page_flag_buf->zero = TRUE;
> +				goto next;
>  			}
> 
> -			page_data_buf[index].zero = FALSE;
> +			page_flag_buf->zero = FALSE;
> 
>  			/*
>  			 * Compress the page data.
> @@ -7232,12 +7278,16 @@ kdump_thread_function_cyclic(void *arg) {
>  				page_data_buf[index].size  = info->page_size;
>  				memcpy(page_data_buf[index].buf, buf, info->page_size);
>  			}
> -unlock:
> -			pthread_mutex_unlock(&page_data_buf[index].mutex);
> +			page_flag_buf->index = index;
> +			buf_ready = TRUE;
> +next:
> +			page_flag_buf->ready = FLAG_READY;
> +			page_flag_buf = page_flag_buf->next;
> 
>  		}
> -	}
> 
> +		pthread_mutex_unlock(&page_data_buf[index].mutex);
> +	}
>  	retval = NULL;
> 
>  fail:
> @@ -7265,14 +7315,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  	struct page_desc pd;
>  	struct timeval tv_start;
>  	struct timeval last, new;
> -	unsigned long long consuming_pfn;
>  	pthread_t **threads = NULL;
>  	struct thread_args *kdump_thread_args = NULL;
>  	void *thread_result;
> -	int page_data_num;
> +	int page_buf_num;
>  	struct page_data *page_data_buf = NULL;
>  	int i;
>  	int index;
> +	int end_count, consuming, check_count;
> +	mdf_pfn_t current_pfn, temp_pfn;
> 
>  	if (info->flag_elf_dumpfile)
>  		return FALSE;
> @@ -7319,16 +7370,11 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  	threads = info->threads;
>  	kdump_thread_args = info->kdump_thread_args;
> 
> -	page_data_num = info->num_buffers;
> +	page_buf_num = info->num_buffers;
>  	page_data_buf = info->page_data_buf;
> 
> -	for (i = 0; i < page_data_num; i++) {
> -		/*
> -		 * producer will use pfn in page_data_buf to decide the
> -		 * consumed pfn
> -		 */
> -		page_data_buf[i].pfn = start_pfn - 1;
> -		page_data_buf[i].ready = 0;
> +	for (i = 0; i < page_buf_num; i++) {
> +		page_data_buf[i].used = FALSE;
>  		res = pthread_mutex_init(&page_data_buf[i].mutex, NULL);
>  		if (res != 0) {
>  			ERRMSG("Can't initialize mutex of page_data_buf. %s\n",
> @@ -7342,8 +7388,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  		kdump_thread_args[i].len_buf_out = len_buf_out;
>  		kdump_thread_args[i].start_pfn = start_pfn;
>  		kdump_thread_args[i].end_pfn = end_pfn;
> -		kdump_thread_args[i].page_data_num = page_data_num;
> +		kdump_thread_args[i].page_buf_num = page_buf_num;
>  		kdump_thread_args[i].page_data_buf = page_data_buf;
> +		kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
>  		kdump_thread_args[i].cycle = cycle;
> 
>  		res = pthread_create(threads[i], NULL,
> @@ -7356,55 +7403,101 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  		}
>  	}
> 
> -	consuming_pfn = start_pfn;
> -	index = -1;
> +	end_count = 0;
> +	while (1) {
> +		consuming = 0;
> +		check_count = 0;
> 
> -	gettimeofday(&last, NULL);
> +		/*
> +		 * The basic idea is producer producing page and consumer writing page.
> +		 * Each producer have a page_flag_buf list which is used for storing page's description.
> +		 * The size of page_flag_buf is little so it won't take too much memory.
> +		 * And all producers will share a page_data_buf array which is used for storing page's compressed data.
> +		 * The main thread is the consumer. It will find the next pfn and write it into file.
> +		 * The next pfn is smallest pfn in all page_flag_buf.
> +		 */
> +		gettimeofday(&last, NULL);
> +		while (1) {
> +			current_pfn = end_pfn;
> 
> -	while (consuming_pfn < end_pfn) {
> -		index = consuming_pfn % page_data_num;
> +			/*
> +			 * page_flag_buf is in circular linked list.
> +			 * The array info->page_flag_buf[] records the current page_flag_buf in each thread's
> +			 * page_flag_buf list.
> +			 * consuming is used for recording in which thread the pfn is the smallest.
> +			 * current_pfn is used for recording the value of pfn when checking the pfn.
> +			 */
> +			for (i = 0; i < info->num_threads; i++) {
> +				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
> +					continue;
> +				temp_pfn = info->page_flag_buf[i]->pfn;
> 
> -		gettimeofday(&new, NULL);
> -		if (new.tv_sec - last.tv_sec > WAIT_TIME) {
> -			ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn);
> -			goto out;
> -		}
> +				/*
> +				 * count how many threads have reached the end.
> +				 */
> +				if (temp_pfn >= end_pfn) {
> +					/*
> +					 * prevent setting FLAG_UNUSED being optimized.
> +					 */
> +					MSG("-");

Why does this line need?
Please clarify why FLAG_UNUSED is optimized.

> 
> -		/*
> -		 * check pfn first without mutex locked to reduce the time
> -		 * trying to lock the mutex
> -		 */
> -		if (page_data_buf[index].pfn != consuming_pfn)
> -			continue;
> +					info->page_flag_buf[i]->ready = FLAG_UNUSED;
> 
> -		if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
> -			continue;
> +					info->current_pfn = end_pfn;

This part doesn't get info->current_pfn_mutex.
It becomes bigger than end_pfn at the end of producer thread in following case.

===
  producer                   Consumer
---------------------------------------------------------
  pthread_mutex_lock()
  pfn = info->current_pfn
                             info->current_pfn = end_pfn
  info->current_pfn++
    -> end_pfn + 1
  pthread_mutex_unlock()
===

But I know, if this race is happened, processing other producer thread and consumer thread works well 
in current cycle.
Because each thread checks whether info->current_pfn >= end_pfn.

On the other hand, info->current_pfn is initialized to cycle->start_pfn before pthread_create() 
in write_kdump_pages_parallel_cyclic().
This means it does not cause a problem in next cycle, too.

In other words, my point is following.

  a) When info->current_pfn changes, you have to get info->current_pfn_mutex.
  b) If you allow info->current_pfn is bigger than end_pfn at the end of each cycle,
     "info->current_pfn = end_pfn;" is unnecessary.

Honestly, I don't like approach b).

Thanks,
Minoru Usui

> +					end_count++;
> +					continue;
> +				}
> +
> +				if (current_pfn < temp_pfn)
> +					continue;
> 
> -		/* check whether the found one is ready to be consumed */
> -		if (page_data_buf[index].pfn != consuming_pfn ||
> -		    page_data_buf[index].ready != 1) {
> -			goto unlock;
> +				check_count++;
> +				consuming = i;
> +				current_pfn = temp_pfn;
> +			}
> +
> +			/*
> +			 * If all the threads have reached the end, we will finish writing.
> +			 */
> +			if (end_count >= info->num_threads)
> +				goto finish;
> +
> +			/*
> +			 * Since it has the probabilty that there is no page_flag_buf being ready,
> +			 * we should recheck if it happens.
> +			 */
> +			if (check_count == 0)
> +				continue;
> +
> +			/*
> +			 * If the page_flag_buf is not ready, the pfn recorded may be changed.
> +			 * So we should recheck.
> +			 */
> +			if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
> +				gettimeofday(&new, NULL);
> +				if (new.tv_sec - last.tv_sec > WAIT_TIME) {
> +					ERRMSG("Can't get data of pfn.\n");
> +					goto out;
> +				}
> +				continue;
> +			}
> +
> +			if (current_pfn == info->page_flag_buf[consuming]->pfn)
> +				break;
>  		}
> 
>  		if ((num_dumped % per) == 0)
>  			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
> 
> -		/* next pfn is found, refresh last here */
> -		last = new;
> -		consuming_pfn++;
> -		info->consumed_pfn++;
> -		page_data_buf[index].ready = 0;
> -
> -		if (page_data_buf[index].dumpable == FALSE)
> -			goto unlock;
> -
>  		num_dumped++;
> 
> -		if (page_data_buf[index].zero == TRUE) {
> +
> +		if (info->page_flag_buf[consuming]->zero == TRUE) {
>  			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
>  				goto out;
>  			pfn_zero++;
>  		} else {
> +			index = info->page_flag_buf[consuming]->index;
>  			pd.flags      = page_data_buf[index].flags;
>  			pd.size       = page_data_buf[index].size;
>  			pd.page_flags = 0;
> @@ -7420,12 +7513,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  			 */
>  			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
>  				goto out;
> -
> +			page_data_buf[index].used = FALSE;
>  		}
> -unlock:
> -		pthread_mutex_unlock(&page_data_buf[index].mutex);
> +		info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
> +		info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
>  	}
> -
> +finish:
>  	ret = TRUE;
>  	/*
>  	 * print [100 %]
> @@ -7464,7 +7557,7 @@ out:
>  	}
> 
>  	if (page_data_buf != NULL) {
> -		for (i = 0; i < page_data_num; i++) {
> +		for (i = 0; i < page_buf_num; i++) {
>  			pthread_mutex_destroy(&page_data_buf[i].mutex);
>  		}
>  	}
> @@ -7564,6 +7657,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag
>  		num_dumped++;
>  		if (!read_pfn(pfn, buf))
>  			goto out;
> +
>  		filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size);
> 
>  		/*
> diff --git a/makedumpfile.h b/makedumpfile.h
> index e0b5bbf..8a9a5b2 100644
> --- a/makedumpfile.h
> +++ b/makedumpfile.h
> @@ -977,7 +977,7 @@ typedef unsigned long long int ulonglong;
>  #define PAGE_DATA_NUM	(50)
>  #define WAIT_TIME	(60 * 10)
>  #define PTHREAD_FAIL	((void *)-2)
> -#define NUM_BUFFERS	(50)
> +#define NUM_BUFFERS	(20)
> 
>  struct mmap_cache {
>  	char	*mmap_buf;
> @@ -985,28 +985,36 @@ struct mmap_cache {
>  	off_t   mmap_end_offset;
>  };
> 
> +enum {
> +	FLAG_UNUSED,
> +	FLAG_READY,
> +	FLAG_FILLING
> +};
> +struct page_flag {
> +	mdf_pfn_t pfn;
> +	char zero;
> +	char ready;
> +	short index;
> +	struct page_flag *next;
> +};
> +
>  struct page_data
>  {
> -	mdf_pfn_t pfn;
> -	int dumpable;
> -	int zero;
> -	unsigned int flags;
> +	pthread_mutex_t mutex;
>  	long size;
>  	unsigned char *buf;
> -	pthread_mutex_t mutex;
> -	/*
> -	 * whether the page_data is ready to be consumed
> -	 */
> -	int ready;
> +	int flags;
> +	int used;
>  };
> 
>  struct thread_args {
>  	int thread_num;
>  	unsigned long len_buf_out;
>  	mdf_pfn_t start_pfn, end_pfn;
> -	int page_data_num;
> +	int page_buf_num;
>  	struct cycle *cycle;
>  	struct page_data *page_data_buf;
> +	struct page_flag *page_flag_buf;
>  };
> 
>  /*
> @@ -1295,6 +1303,7 @@ struct DumpInfo {
>  	pthread_t **threads;
>  	struct thread_args *kdump_thread_args;
>  	struct page_data *page_data_buf;
> +	struct page_flag **page_flag_buf;
>  	pthread_rwlock_t usemmap_rwlock;
>  	mdf_pfn_t current_pfn;
>  	pthread_mutex_t current_pfn_mutex;
> --
> 1.8.3.1
> 
> 
> 
> 
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-23  1:32 ` Minoru Usui
@ 2016-02-23  3:45   ` "Zhou, Wenjian/周文?"
  2016-02-23  8:00     ` Minoru Usui
  0 siblings, 1 reply; 26+ messages in thread
From: "Zhou, Wenjian/周文?" @ 2016-02-23  3:45 UTC (permalink / raw)
  To: Minoru Usui, kexec

Hello Usui,

On 02/23/2016 09:32 AM, Minoru Usui wrote:
> Hello, Zhou
> 
> Thank you for reflecting my comments.
> I have other comments.
> 
> See below.
> 
>> -----Original Message-----
>> From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of Zhou Wenjian
>> Sent: Wednesday, February 17, 2016 4:06 PM
>> To: kexec@lists.infradead.org
>> Subject: [PATCH v2] Improve the performance of --num-threads -d 31
>>
>> v2:
>>          1. address Minoru Usui's comments about magic number and error message
>>          2. fix a bug in cyclic mode caused by optimizing
>>
>> v1:
>>          1. change page_flag.ready's value to enum
>>          2. change the patch description
>>          3. cleanup some codes
>>          4. fix a bug in cyclic mode
>>
>> multi-threads implementation will introduce extra cost when handling
>> each page. The origin implementation will also do the extra work for
>> filtered pages. So there is a big performance degradation in
>> --num-threads -d 31.
>> The new implementation won't do the extra work for filtered pages any
>> more. So the performance of -d 31 is close to that of serial processing.
>>
>> The new implementation is just like the following:
>>          * The basic idea is producer producing page and consumer writing page.
>>          * Each producer have a page_flag_buf list which is used for storing
>>            page's description.
>>          * The size of page_flag_buf is little so it won't take too much memory.
>>          * And all producers will share a page_data_buf array which is
>>            used for storing page's compressed data.
>>          * The main thread is the consumer. It will find the next pfn and write
>>            it into file.
>>          * The next pfn is smallest pfn in all page_flag_buf.
>>
>> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com>
>> ---
>>   makedumpfile.c | 266 ++++++++++++++++++++++++++++++++++++++-------------------
>>   makedumpfile.h |  31 ++++---
>>   2 files changed, 200 insertions(+), 97 deletions(-)
>>
>> diff --git a/makedumpfile.c b/makedumpfile.c
>> index fa0b779..31329b5 100644
>> --- a/makedumpfile.c
>> +++ b/makedumpfile.c
>> @@ -3483,7 +3483,8 @@ initial_for_parallel()
>>   	unsigned long page_data_buf_size;
>>   	unsigned long limit_size;
>>   	int page_data_num;
>> -	int i;
>> +	struct page_flag *current;
>> +	int i, j;
>>
>>   	len_buf_out = calculate_len_buf_out(info->page_size);
>>
>> @@ -3562,8 +3563,10 @@ initial_for_parallel()
>>   		      - MAP_REGION * info->num_threads) * 0.6;
>>
>>   	page_data_num = limit_size / page_data_buf_size;
>> +	info->num_buffers = 3 * info->num_threads;
>>
>> -	info->num_buffers = MIN(NUM_BUFFERS, page_data_num);
>> +	info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS);
>> +	info->num_buffers = MIN(info->num_buffers, page_data_num);
>>
>>   	DEBUG_MSG("Number of struct page_data for produce/consume: %d\n",
>>   			info->num_buffers);
>> @@ -3588,6 +3591,36 @@ initial_for_parallel()
>>   	}
>>
>>   	/*
>> +	 * initial page_flag for each thread
>> +	 */
>> +	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
>> +	    == NULL) {
>> +		MSG("Can't allocate memory for page_flag_buf. %s\n",
>> +				strerror(errno));
>> +		return FALSE;
>> +	}
>> +	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
>> +
>> +	for (i = 0; i < info->num_threads; i++) {
>> +		if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
>> +			MSG("Can't allocate memory for page_flag. %s\n",
>> +				strerror(errno));
>> +			return FALSE;
>> +		}
>> +		current = info->page_flag_buf[i];
>> +
>> +		for (j = 1; j < NUM_BUFFERS; j++) {
>> +			if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) {
>> +				MSG("Can't allocate memory for page_flag. %s\n",
>> +					strerror(errno));
>> +				return FALSE;
>> +			}
>> +			current = current->next;
>> +		}
>> +		current->next = info->page_flag_buf[i];
>> +	}
>> +
>> +	/*
>>   	 * initial fd_memory for threads
>>   	 */
>>   	for (i = 0; i < info->num_threads; i++) {
>> @@ -3612,7 +3645,8 @@ initial_for_parallel()
>>   void
>>   free_for_parallel()
>>   {
>> -	int i;
>> +	int i, j;
>> +	struct page_flag *current;
>>
>>   	if (info->threads != NULL) {
>>   		for (i = 0; i < info->num_threads; i++) {
>> @@ -3655,6 +3689,19 @@ free_for_parallel()
>>   		free(info->page_data_buf);
>>   	}
>>
>> +	if (info->page_flag_buf != NULL) {
>> +		for (i = 0; i < info->num_threads; i++) {
>> +			for (j = 0; j < NUM_BUFFERS; j++) {
>> +				if (info->page_flag_buf[i] != NULL) {
>> +					current = info->page_flag_buf[i];
>> +					info->page_flag_buf[i] = current->next;
>> +					free(current);
>> +				}
>> +			}
>> +		}
>> +		free(info->page_flag_buf);
>> +	}
>> +
>>   	if (info->parallel_info == NULL)
>>   		return;
>>
>> @@ -7076,10 +7123,10 @@ kdump_thread_function_cyclic(void *arg) {
>>   	void *retval = PTHREAD_FAIL;
>>   	struct thread_args *kdump_thread_args = (struct thread_args *)arg;
>>   	struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
>> +	struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
>>   	struct cycle *cycle = kdump_thread_args->cycle;
>> -	int page_data_num = kdump_thread_args->page_data_num;
>> -	mdf_pfn_t pfn;
>> -	int index;
>> +	mdf_pfn_t pfn = cycle->start_pfn;
>> +	int index = kdump_thread_args->thread_num;
>>   	int buf_ready;
>>   	int dumpable;
>>   	int fd_memory = 0;
>> @@ -7125,47 +7172,46 @@ kdump_thread_function_cyclic(void *arg) {
>>   						kdump_thread_args->thread_num);
>>   	}
>>
>> -	while (1) {
>> -		/* get next pfn */
>> -		pthread_mutex_lock(&info->current_pfn_mutex);
>> -		pfn = info->current_pfn;
>> -		info->current_pfn++;
>> -		pthread_mutex_unlock(&info->current_pfn_mutex);
>> +	/*
>> +	 * filtered page won't take anything
>> +	 * unfiltered zero page will only take a page_flag_buf
>> +	 * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
>> +	 */
>> +	while (pfn < kdump_thread_args->end_pfn) {
>> +		buf_ready = FALSE;
>>
>> -		if (pfn >= kdump_thread_args->end_pfn)
>> -			break;
>> +		while (page_data_buf[index].used != FALSE ||
>> +				pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
>> +			index = (index + 1) % info->num_buffers;
>> -		index = -1;
>> -		buf_ready = FALSE;
>> +		page_data_buf[index].used = TRUE;
>>
>>   		while (buf_ready == FALSE) {
>>   			pthread_testcancel();
>> -
>> -			index = pfn % page_data_num;
>> -
>> -			if (pfn - info->consumed_pfn > info->num_buffers)
>> -				continue;
>> -
>> -			if (page_data_buf[index].ready != 0)
>> +			if (page_flag_buf->ready == FLAG_READY)
>>   				continue;
>>
>> -			pthread_mutex_lock(&page_data_buf[index].mutex);
>> -
>> -			if (page_data_buf[index].ready != 0)
>> -				goto unlock;
>> +			/* get next pfn */
>> +			pthread_mutex_lock(&info->current_pfn_mutex);
>> +			pfn = info->current_pfn;
>> +			info->current_pfn++;
>> +			page_flag_buf->ready = FLAG_FILLING;
>> +			pthread_mutex_unlock(&info->current_pfn_mutex);
>>
>> -			buf_ready = TRUE;
>> +			page_flag_buf->pfn = pfn;
>>
>> -			page_data_buf[index].pfn = pfn;
>> -			page_data_buf[index].ready = 1;
>> +			if (pfn >= kdump_thread_args->end_pfn) {
>> +				page_data_buf[index].used = FALSE;
>> +				page_flag_buf->ready = FLAG_READY;
>> +				break;
>> +			}
>>
>>   			dumpable = is_dumpable(
>>   				info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
>>   				pfn,
>>   				cycle);
>> -			page_data_buf[index].dumpable = dumpable;
>>   			if (!dumpable)
>> -				goto unlock;
>> +				continue;
>>
>>   			if (!read_pfn_parallel(fd_memory, pfn, buf,
>>   					       &bitmap_memory_parallel,
>> @@ -7178,11 +7224,11 @@ kdump_thread_function_cyclic(void *arg) {
>>
>>   			if ((info->dump_level & DL_EXCLUDE_ZERO)
>>   			    && is_zero_page(buf, info->page_size)) {
>> -				page_data_buf[index].zero = TRUE;
>> -				goto unlock;
>> +				page_flag_buf->zero = TRUE;
>> +				goto next;
>>   			}
>>
>> -			page_data_buf[index].zero = FALSE;
>> +			page_flag_buf->zero = FALSE;
>>
>>   			/*
>>   			 * Compress the page data.
>> @@ -7232,12 +7278,16 @@ kdump_thread_function_cyclic(void *arg) {
>>   				page_data_buf[index].size  = info->page_size;
>>   				memcpy(page_data_buf[index].buf, buf, info->page_size);
>>   			}
>> -unlock:
>> -			pthread_mutex_unlock(&page_data_buf[index].mutex);
>> +			page_flag_buf->index = index;
>> +			buf_ready = TRUE;
>> +next:
>> +			page_flag_buf->ready = FLAG_READY;
>> +			page_flag_buf = page_flag_buf->next;
>>
>>   		}
>> -	}
>>
>> +		pthread_mutex_unlock(&page_data_buf[index].mutex);
>> +	}
>>   	retval = NULL;
>>
>>   fail:
>> @@ -7265,14 +7315,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>   	struct page_desc pd;
>>   	struct timeval tv_start;
>>   	struct timeval last, new;
>> -	unsigned long long consuming_pfn;
>>   	pthread_t **threads = NULL;
>>   	struct thread_args *kdump_thread_args = NULL;
>>   	void *thread_result;
>> -	int page_data_num;
>> +	int page_buf_num;
>>   	struct page_data *page_data_buf = NULL;
>>   	int i;
>>   	int index;
>> +	int end_count, consuming, check_count;
>> +	mdf_pfn_t current_pfn, temp_pfn;
>>
>>   	if (info->flag_elf_dumpfile)
>>   		return FALSE;
>> @@ -7319,16 +7370,11 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>   	threads = info->threads;
>>   	kdump_thread_args = info->kdump_thread_args;
>>
>> -	page_data_num = info->num_buffers;
>> +	page_buf_num = info->num_buffers;
>>   	page_data_buf = info->page_data_buf;
>>
>> -	for (i = 0; i < page_data_num; i++) {
>> -		/*
>> -		 * producer will use pfn in page_data_buf to decide the
>> -		 * consumed pfn
>> -		 */
>> -		page_data_buf[i].pfn = start_pfn - 1;
>> -		page_data_buf[i].ready = 0;
>> +	for (i = 0; i < page_buf_num; i++) {
>> +		page_data_buf[i].used = FALSE;
>>   		res = pthread_mutex_init(&page_data_buf[i].mutex, NULL);
>>   		if (res != 0) {
>>   			ERRMSG("Can't initialize mutex of page_data_buf. %s\n",
>> @@ -7342,8 +7388,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>   		kdump_thread_args[i].len_buf_out = len_buf_out;
>>   		kdump_thread_args[i].start_pfn = start_pfn;
>>   		kdump_thread_args[i].end_pfn = end_pfn;
>> -		kdump_thread_args[i].page_data_num = page_data_num;
>> +		kdump_thread_args[i].page_buf_num = page_buf_num;
>>   		kdump_thread_args[i].page_data_buf = page_data_buf;
>> +		kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
>>   		kdump_thread_args[i].cycle = cycle;
>>
>>   		res = pthread_create(threads[i], NULL,
>> @@ -7356,55 +7403,101 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>   		}
>>   	}
>>
>> -	consuming_pfn = start_pfn;
>> -	index = -1;
>> +	end_count = 0;
>> +	while (1) {
>> +		consuming = 0;
>> +		check_count = 0;
>>
>> -	gettimeofday(&last, NULL);
>> +		/*
>> +		 * The basic idea is producer producing page and consumer writing page.
>> +		 * Each producer have a page_flag_buf list which is used for storing page's description.
>> +		 * The size of page_flag_buf is little so it won't take too much memory.
>> +		 * And all producers will share a page_data_buf array which is used for storing page's compressed data.
>> +		 * The main thread is the consumer. It will find the next pfn and write it into file.
>> +		 * The next pfn is smallest pfn in all page_flag_buf.
>> +		 */
>> +		gettimeofday(&last, NULL);
>> +		while (1) {
>> +			current_pfn = end_pfn;
>>
>> -	while (consuming_pfn < end_pfn) {
>> -		index = consuming_pfn % page_data_num;
>> +			/*
>> +			 * page_flag_buf is in circular linked list.
>> +			 * The array info->page_flag_buf[] records the current page_flag_buf in each thread's
>> +			 * page_flag_buf list.
>> +			 * consuming is used for recording in which thread the pfn is the smallest.
>> +			 * current_pfn is used for recording the value of pfn when checking the pfn.
>> +			 */
>> +			for (i = 0; i < info->num_threads; i++) {
>> +				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
>> +					continue;
>> +				temp_pfn = info->page_flag_buf[i]->pfn;
>>
>> -		gettimeofday(&new, NULL);
>> -		if (new.tv_sec - last.tv_sec > WAIT_TIME) {
>> -			ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn);
>> -			goto out;
>> -		}
>> +				/*
>> +				 * count how many threads have reached the end.
>> +				 */
>> +				if (temp_pfn >= end_pfn) {
>> +					/*
>> +					 * prevent setting FLAG_UNUSED being optimized.
>> +					 */
>> +					MSG("-");
> 
> Why does this line need?
> Please clarify why FLAG_UNUSED is optimized.
> 

I tried but still can't figure out the reason or get better solution.
I will be so happy if you can help me.

In gdb, I run it by step.
The code "info->page_flag_buf[i]->ready = FLAG_UNUSED;" can never be reached.
If a breakpoint is set here, the code will never be skipped.

>>
>> -		/*
>> -		 * check pfn first without mutex locked to reduce the time
>> -		 * trying to lock the mutex
>> -		 */
>> -		if (page_data_buf[index].pfn != consuming_pfn)
>> -			continue;
>> +					info->page_flag_buf[i]->ready = FLAG_UNUSED;
>>
>> -		if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
>> -			continue;
>> +					info->current_pfn = end_pfn;
> 
> This part doesn't get info->current_pfn_mutex.
> It becomes bigger than end_pfn at the end of producer thread in following case.
> 
> ===
>    producer                   Consumer
> ---------------------------------------------------------
>    pthread_mutex_lock()
>    pfn = info->current_pfn
>                               info->current_pfn = end_pfn
>    info->current_pfn++
>      -> end_pfn + 1
>    pthread_mutex_unlock()
> ===
> 
> But I know, if this race is happened, processing other producer thread and consumer thread works well
> in current cycle.
> Because each thread checks whether info->current_pfn >= end_pfn.
> 
> On the other hand, info->current_pfn is initialized to cycle->start_pfn before pthread_create()
> in write_kdump_pages_parallel_cyclic().
> This means it does not cause a problem in next cycle, too.
> 
> In other words, my point is following.
> 
>    a) When info->current_pfn changes, you have to get info->current_pfn_mutex.
>    b) If you allow info->current_pfn is bigger than end_pfn at the end of each cycle,
>       "info->current_pfn = end_pfn;" is unnecessary.
> 
> Honestly, I don't like approach b).

You're right. Some thing I thought is wrong.
But I don't like lock if I have other choice.
I will set info->current_pfn before returning.
How about it?

-- 
Thanks
Zhou

> 
> Thanks,
> Minoru Usui
> 
>> +					end_count++;
>> +					continue;
>> +				}
>> +
>> +				if (current_pfn < temp_pfn)
>> +					continue;
>>
>> -		/* check whether the found one is ready to be consumed */
>> -		if (page_data_buf[index].pfn != consuming_pfn ||
>> -		    page_data_buf[index].ready != 1) {
>> -			goto unlock;
>> +				check_count++;
>> +				consuming = i;
>> +				current_pfn = temp_pfn;
>> +			}
>> +
>> +			/*
>> +			 * If all the threads have reached the end, we will finish writing.
>> +			 */
>> +			if (end_count >= info->num_threads)
>> +				goto finish;
>> +
>> +			/*
>> +			 * Since it has the probabilty that there is no page_flag_buf being ready,
>> +			 * we should recheck if it happens.
>> +			 */
>> +			if (check_count == 0)
>> +				continue;
>> +
>> +			/*
>> +			 * If the page_flag_buf is not ready, the pfn recorded may be changed.
>> +			 * So we should recheck.
>> +			 */
>> +			if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
>> +				gettimeofday(&new, NULL);
>> +				if (new.tv_sec - last.tv_sec > WAIT_TIME) {
>> +					ERRMSG("Can't get data of pfn.\n");
>> +					goto out;
>> +				}
>> +				continue;
>> +			}
>> +
>> +			if (current_pfn == info->page_flag_buf[consuming]->pfn)
>> +				break;
>>   		}
>>
>>   		if ((num_dumped % per) == 0)
>>   			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
>>
>> -		/* next pfn is found, refresh last here */
>> -		last = new;
>> -		consuming_pfn++;
>> -		info->consumed_pfn++;
>> -		page_data_buf[index].ready = 0;
>> -
>> -		if (page_data_buf[index].dumpable == FALSE)
>> -			goto unlock;
>> -
>>   		num_dumped++;
>>
>> -		if (page_data_buf[index].zero == TRUE) {
>> +
>> +		if (info->page_flag_buf[consuming]->zero == TRUE) {
>>   			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
>>   				goto out;
>>   			pfn_zero++;
>>   		} else {
>> +			index = info->page_flag_buf[consuming]->index;
>>   			pd.flags      = page_data_buf[index].flags;
>>   			pd.size       = page_data_buf[index].size;
>>   			pd.page_flags = 0;
>> @@ -7420,12 +7513,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>   			 */
>>   			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
>>   				goto out;
>> -
>> +			page_data_buf[index].used = FALSE;
>>   		}
>> -unlock:
>> -		pthread_mutex_unlock(&page_data_buf[index].mutex);
>> +		info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
>> +		info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
>>   	}
>> -
>> +finish:
>>   	ret = TRUE;
>>   	/*
>>   	 * print [100 %]
>> @@ -7464,7 +7557,7 @@ out:
>>   	}
>>
>>   	if (page_data_buf != NULL) {
>> -		for (i = 0; i < page_data_num; i++) {
>> +		for (i = 0; i < page_buf_num; i++) {
>>   			pthread_mutex_destroy(&page_data_buf[i].mutex);
>>   		}
>>   	}
>> @@ -7564,6 +7657,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag
>>   		num_dumped++;
>>   		if (!read_pfn(pfn, buf))
>>   			goto out;
>> +
>>   		filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size);
>>
>>   		/*
>> diff --git a/makedumpfile.h b/makedumpfile.h
>> index e0b5bbf..8a9a5b2 100644
>> --- a/makedumpfile.h
>> +++ b/makedumpfile.h
>> @@ -977,7 +977,7 @@ typedef unsigned long long int ulonglong;
>>   #define PAGE_DATA_NUM	(50)
>>   #define WAIT_TIME	(60 * 10)
>>   #define PTHREAD_FAIL	((void *)-2)
>> -#define NUM_BUFFERS	(50)
>> +#define NUM_BUFFERS	(20)
>>
>>   struct mmap_cache {
>>   	char	*mmap_buf;
>> @@ -985,28 +985,36 @@ struct mmap_cache {
>>   	off_t   mmap_end_offset;
>>   };
>>
>> +enum {
>> +	FLAG_UNUSED,
>> +	FLAG_READY,
>> +	FLAG_FILLING
>> +};
>> +struct page_flag {
>> +	mdf_pfn_t pfn;
>> +	char zero;
>> +	char ready;
>> +	short index;
>> +	struct page_flag *next;
>> +};
>> +
>>   struct page_data
>>   {
>> -	mdf_pfn_t pfn;
>> -	int dumpable;
>> -	int zero;
>> -	unsigned int flags;
>> +	pthread_mutex_t mutex;
>>   	long size;
>>   	unsigned char *buf;
>> -	pthread_mutex_t mutex;
>> -	/*
>> -	 * whether the page_data is ready to be consumed
>> -	 */
>> -	int ready;
>> +	int flags;
>> +	int used;
>>   };
>>
>>   struct thread_args {
>>   	int thread_num;
>>   	unsigned long len_buf_out;
>>   	mdf_pfn_t start_pfn, end_pfn;
>> -	int page_data_num;
>> +	int page_buf_num;
>>   	struct cycle *cycle;
>>   	struct page_data *page_data_buf;
>> +	struct page_flag *page_flag_buf;
>>   };
>>
>>   /*
>> @@ -1295,6 +1303,7 @@ struct DumpInfo {
>>   	pthread_t **threads;
>>   	struct thread_args *kdump_thread_args;
>>   	struct page_data *page_data_buf;
>> +	struct page_flag **page_flag_buf;
>>   	pthread_rwlock_t usemmap_rwlock;
>>   	mdf_pfn_t current_pfn;
>>   	pthread_mutex_t current_pfn_mutex;
>> --
>> 1.8.3.1
>>
>>
>>
>>
>> _______________________________________________
>> kexec mailing list
>> kexec@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/kexec




_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-22 16:58 ` Minfei Huang
@ 2016-02-23  5:26   ` Minfei Huang
  2016-02-23  5:47     ` "Zhou, Wenjian/周文剑"
  0 siblings, 1 reply; 26+ messages in thread
From: Minfei Huang @ 2016-02-23  5:26 UTC (permalink / raw)
  To: Zhou Wenjian; +Cc: kexec

On 02/23/16 at 12:58am, Minfei Huang wrote:
> On 02/17/16 at 03:05pm, Zhou Wenjian wrote:
> > The new implementation won't do the extra work for filtered pages any
> > more. So the performance of -d 31 is close to that of serial processing.
> 
> Hi, Wenjian.
> 
> kdump:/# time bash -x a.sh makedumpfile vmcore02 1 --num-threads 128
> + makedumpfile --num-threads -l --message-level 1 -d 31 /proc/vmcore /kdumproot/home//var/crash/127.0.0.1-2016-02-22-23:22:36/vmcore02

Please ignore this benchmark, since there is no parameter passed to
option num-threads. Following is my new test result which is generated
in 1st kernel.

127.0.0.1-2016-02-22-19\:08\:17/vmcore01 is generated by makedumpfile
with -d 31 in 2nd kernel.
> 
> Command makedumpfile is complied with this patch on version 1.5.9, and
> makedumpfile.backup is complied on version 1.5.7.
> 
> The compressed file vmcore is 67G filtered out from 4T memory.

[crash]# time makedumpfile -l --message-level 1 -d 31 127.0.0.1-2016-02-22-19\:08\:17/vmcore01 vmcore10
Copying data                       : [100.0 %] /

real    7m25.492s
user    6m37.386s
sys 0m47.801s
[crash]# time makedumpfile.backup -l --message-level 1 -d 31 127.0.0.1-2016-02-22-19\:08\:17/vmcore01 vmcore11
Copying data                       : [100.0 %] -

real    7m15.784s
user    6m28.829s
sys 0m46.656s

[crash]# time makedumpfile --num-threads 128 -l --message-level 1 -d 31 127.0.0.1-2016-02-22-19\:08\:17/vmcore01
vmcore14
Copying data                       : [ 99.7 %] |

Never return from makedumpfile.

There are more than 128 cpus plugged in this machine.

Thanks
Minfei

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-23  5:26   ` Minfei Huang
@ 2016-02-23  5:47     ` "Zhou, Wenjian/周文剑"
  2016-02-24  1:43       ` Minfei Huang
  0 siblings, 1 reply; 26+ messages in thread
From: "Zhou, Wenjian/周文剑" @ 2016-02-23  5:47 UTC (permalink / raw)
  To: Minfei Huang; +Cc: kexec

Hello, Minfei,

Does it occur every time?
If not, I think I have known the reason.

-- 
Thanks
Zhou

On 02/23/2016 01:26 PM, Minfei Huang wrote:
> On 02/23/16 at 12:58am, Minfei Huang wrote:
>> On 02/17/16 at 03:05pm, Zhou Wenjian wrote:
>>> The new implementation won't do the extra work for filtered pages any
>>> more. So the performance of -d 31 is close to that of serial processing.
>>
>> Hi, Wenjian.
>>
>> kdump:/# time bash -x a.sh makedumpfile vmcore02 1 --num-threads 128
>> + makedumpfile --num-threads -l --message-level 1 -d 31 /proc/vmcore /kdumproot/home//var/crash/127.0.0.1-2016-02-22-23:22:36/vmcore02
>
> Please ignore this benchmark, since there is no parameter passed to
> option num-threads. Following is my new test result which is generated
> in 1st kernel.
>
> 127.0.0.1-2016-02-22-19\:08\:17/vmcore01 is generated by makedumpfile
> with -d 31 in 2nd kernel.
>>
>> Command makedumpfile is complied with this patch on version 1.5.9, and
>> makedumpfile.backup is complied on version 1.5.7.
>>
>> The compressed file vmcore is 67G filtered out from 4T memory.
>
> [crash]# time makedumpfile -l --message-level 1 -d 31 127.0.0.1-2016-02-22-19\:08\:17/vmcore01 vmcore10
> Copying data                       : [100.0 %] /
>
> real    7m25.492s
> user    6m37.386s
> sys 0m47.801s
> [crash]# time makedumpfile.backup -l --message-level 1 -d 31 127.0.0.1-2016-02-22-19\:08\:17/vmcore01 vmcore11
> Copying data                       : [100.0 %] -
>
> real    7m15.784s
> user    6m28.829s
> sys 0m46.656s
>
> [crash]# time makedumpfile --num-threads 128 -l --message-level 1 -d 31 127.0.0.1-2016-02-22-19\:08\:17/vmcore01
> vmcore14
> Copying data                       : [ 99.7 %] |
>
> Never return from makedumpfile.
>
> There are more than 128 cpus plugged in this machine.
>
> Thanks
> Minfei
>
>






_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-23  3:45   ` "Zhou, Wenjian/周文?"
@ 2016-02-23  8:00     ` Minoru Usui
  2016-02-23  8:29       ` "Zhou, Wenjian/周文?"
  0 siblings, 1 reply; 26+ messages in thread
From: Minoru Usui @ 2016-02-23  8:00 UTC (permalink / raw)
  To: "Zhou, Wenjian/周文?", kexec

 Hi,

> -----Original Message-----
> From: "Zhou, Wenjian/周文?" [mailto:zhouwj-fnst@cn.fujitsu.com]
> Sent: Tuesday, February 23, 2016 12:45 PM
> To: Usui Minoru(碓井 成) <min-usui@ti.jp.nec.com>; kexec@lists.infradead.org
> Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31
> 
> Hello Usui,
> 
> On 02/23/2016 09:32 AM, Minoru Usui wrote:
> > Hello, Zhou
> >
> > Thank you for reflecting my comments.
> > I have other comments.
> >
> > See below.
> >
> >> -----Original Message-----
> >> From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of Zhou Wenjian
> >> Sent: Wednesday, February 17, 2016 4:06 PM
> >> To: kexec@lists.infradead.org
> >> Subject: [PATCH v2] Improve the performance of --num-threads -d 31
> >>
> >> v2:
> >>          1. address Minoru Usui's comments about magic number and error message
> >>          2. fix a bug in cyclic mode caused by optimizing
> >>
> >> v1:
> >>          1. change page_flag.ready's value to enum
> >>          2. change the patch description
> >>          3. cleanup some codes
> >>          4. fix a bug in cyclic mode
> >>
> >> multi-threads implementation will introduce extra cost when handling
> >> each page. The origin implementation will also do the extra work for
> >> filtered pages. So there is a big performance degradation in
> >> --num-threads -d 31.
> >> The new implementation won't do the extra work for filtered pages any
> >> more. So the performance of -d 31 is close to that of serial processing.
> >>
> >> The new implementation is just like the following:
> >>          * The basic idea is producer producing page and consumer writing page.
> >>          * Each producer have a page_flag_buf list which is used for storing
> >>            page's description.
> >>          * The size of page_flag_buf is little so it won't take too much memory.
> >>          * And all producers will share a page_data_buf array which is
> >>            used for storing page's compressed data.
> >>          * The main thread is the consumer. It will find the next pfn and write
> >>            it into file.
> >>          * The next pfn is smallest pfn in all page_flag_buf.
> >>
> >> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com>
> >> ---
> >>   makedumpfile.c | 266 ++++++++++++++++++++++++++++++++++++++-------------------
> >>   makedumpfile.h |  31 ++++---
> >>   2 files changed, 200 insertions(+), 97 deletions(-)
> >>
> >> diff --git a/makedumpfile.c b/makedumpfile.c
> >> index fa0b779..31329b5 100644
> >> --- a/makedumpfile.c
> >> +++ b/makedumpfile.c
> >> @@ -3483,7 +3483,8 @@ initial_for_parallel()
> >>   	unsigned long page_data_buf_size;
> >>   	unsigned long limit_size;
> >>   	int page_data_num;
> >> -	int i;
> >> +	struct page_flag *current;
> >> +	int i, j;
> >>
> >>   	len_buf_out = calculate_len_buf_out(info->page_size);
> >>
> >> @@ -3562,8 +3563,10 @@ initial_for_parallel()
> >>   		      - MAP_REGION * info->num_threads) * 0.6;
> >>
> >>   	page_data_num = limit_size / page_data_buf_size;
> >> +	info->num_buffers = 3 * info->num_threads;
> >>
> >> -	info->num_buffers = MIN(NUM_BUFFERS, page_data_num);
> >> +	info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS);
> >> +	info->num_buffers = MIN(info->num_buffers, page_data_num);
> >>
> >>   	DEBUG_MSG("Number of struct page_data for produce/consume: %d\n",
> >>   			info->num_buffers);
> >> @@ -3588,6 +3591,36 @@ initial_for_parallel()
> >>   	}
> >>
> >>   	/*
> >> +	 * initial page_flag for each thread
> >> +	 */
> >> +	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
> >> +	    == NULL) {
> >> +		MSG("Can't allocate memory for page_flag_buf. %s\n",
> >> +				strerror(errno));
> >> +		return FALSE;
> >> +	}
> >> +	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
> >> +
> >> +	for (i = 0; i < info->num_threads; i++) {
> >> +		if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
> >> +			MSG("Can't allocate memory for page_flag. %s\n",
> >> +				strerror(errno));
> >> +			return FALSE;
> >> +		}
> >> +		current = info->page_flag_buf[i];
> >> +
> >> +		for (j = 1; j < NUM_BUFFERS; j++) {
> >> +			if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) {
> >> +				MSG("Can't allocate memory for page_flag. %s\n",
> >> +					strerror(errno));
> >> +				return FALSE;
> >> +			}
> >> +			current = current->next;
> >> +		}
> >> +		current->next = info->page_flag_buf[i];
> >> +	}
> >> +
> >> +	/*
> >>   	 * initial fd_memory for threads
> >>   	 */
> >>   	for (i = 0; i < info->num_threads; i++) {
> >> @@ -3612,7 +3645,8 @@ initial_for_parallel()
> >>   void
> >>   free_for_parallel()
> >>   {
> >> -	int i;
> >> +	int i, j;
> >> +	struct page_flag *current;
> >>
> >>   	if (info->threads != NULL) {
> >>   		for (i = 0; i < info->num_threads; i++) {
> >> @@ -3655,6 +3689,19 @@ free_for_parallel()
> >>   		free(info->page_data_buf);
> >>   	}
> >>
> >> +	if (info->page_flag_buf != NULL) {
> >> +		for (i = 0; i < info->num_threads; i++) {
> >> +			for (j = 0; j < NUM_BUFFERS; j++) {
> >> +				if (info->page_flag_buf[i] != NULL) {
> >> +					current = info->page_flag_buf[i];
> >> +					info->page_flag_buf[i] = current->next;
> >> +					free(current);
> >> +				}
> >> +			}
> >> +		}
> >> +		free(info->page_flag_buf);
> >> +	}
> >> +
> >>   	if (info->parallel_info == NULL)
> >>   		return;
> >>
> >> @@ -7076,10 +7123,10 @@ kdump_thread_function_cyclic(void *arg) {
> >>   	void *retval = PTHREAD_FAIL;
> >>   	struct thread_args *kdump_thread_args = (struct thread_args *)arg;
> >>   	struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
> >> +	struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
> >>   	struct cycle *cycle = kdump_thread_args->cycle;
> >> -	int page_data_num = kdump_thread_args->page_data_num;
> >> -	mdf_pfn_t pfn;
> >> -	int index;
> >> +	mdf_pfn_t pfn = cycle->start_pfn;
> >> +	int index = kdump_thread_args->thread_num;
> >>   	int buf_ready;
> >>   	int dumpable;
> >>   	int fd_memory = 0;
> >> @@ -7125,47 +7172,46 @@ kdump_thread_function_cyclic(void *arg) {
> >>   						kdump_thread_args->thread_num);
> >>   	}
> >>
> >> -	while (1) {
> >> -		/* get next pfn */
> >> -		pthread_mutex_lock(&info->current_pfn_mutex);
> >> -		pfn = info->current_pfn;
> >> -		info->current_pfn++;
> >> -		pthread_mutex_unlock(&info->current_pfn_mutex);
> >> +	/*
> >> +	 * filtered page won't take anything
> >> +	 * unfiltered zero page will only take a page_flag_buf
> >> +	 * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
> >> +	 */
> >> +	while (pfn < kdump_thread_args->end_pfn) {
> >> +		buf_ready = FALSE;
> >>
> >> -		if (pfn >= kdump_thread_args->end_pfn)
> >> -			break;
> >> +		while (page_data_buf[index].used != FALSE ||
> >> +				pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
> >> +			index = (index + 1) % info->num_buffers;
> >> -		index = -1;
> >> -		buf_ready = FALSE;
> >> +		page_data_buf[index].used = TRUE;
> >>
> >>   		while (buf_ready == FALSE) {
> >>   			pthread_testcancel();
> >> -
> >> -			index = pfn % page_data_num;
> >> -
> >> -			if (pfn - info->consumed_pfn > info->num_buffers)
> >> -				continue;
> >> -
> >> -			if (page_data_buf[index].ready != 0)
> >> +			if (page_flag_buf->ready == FLAG_READY)
> >>   				continue;
> >>
> >> -			pthread_mutex_lock(&page_data_buf[index].mutex);
> >> -
> >> -			if (page_data_buf[index].ready != 0)
> >> -				goto unlock;
> >> +			/* get next pfn */
> >> +			pthread_mutex_lock(&info->current_pfn_mutex);
> >> +			pfn = info->current_pfn;
> >> +			info->current_pfn++;
> >> +			page_flag_buf->ready = FLAG_FILLING;
> >> +			pthread_mutex_unlock(&info->current_pfn_mutex);
> >>
> >> -			buf_ready = TRUE;
> >> +			page_flag_buf->pfn = pfn;
> >>
> >> -			page_data_buf[index].pfn = pfn;
> >> -			page_data_buf[index].ready = 1;
> >> +			if (pfn >= kdump_thread_args->end_pfn) {
> >> +				page_data_buf[index].used = FALSE;
> >> +				page_flag_buf->ready = FLAG_READY;
> >> +				break;
> >> +			}
> >>
> >>   			dumpable = is_dumpable(
> >>   				info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
> >>   				pfn,
> >>   				cycle);
> >> -			page_data_buf[index].dumpable = dumpable;
> >>   			if (!dumpable)
> >> -				goto unlock;
> >> +				continue;
> >>
> >>   			if (!read_pfn_parallel(fd_memory, pfn, buf,
> >>   					       &bitmap_memory_parallel,
> >> @@ -7178,11 +7224,11 @@ kdump_thread_function_cyclic(void *arg) {
> >>
> >>   			if ((info->dump_level & DL_EXCLUDE_ZERO)
> >>   			    && is_zero_page(buf, info->page_size)) {
> >> -				page_data_buf[index].zero = TRUE;
> >> -				goto unlock;
> >> +				page_flag_buf->zero = TRUE;
> >> +				goto next;
> >>   			}
> >>
> >> -			page_data_buf[index].zero = FALSE;
> >> +			page_flag_buf->zero = FALSE;
> >>
> >>   			/*
> >>   			 * Compress the page data.
> >> @@ -7232,12 +7278,16 @@ kdump_thread_function_cyclic(void *arg) {
> >>   				page_data_buf[index].size  = info->page_size;
> >>   				memcpy(page_data_buf[index].buf, buf, info->page_size);
> >>   			}
> >> -unlock:
> >> -			pthread_mutex_unlock(&page_data_buf[index].mutex);
> >> +			page_flag_buf->index = index;
> >> +			buf_ready = TRUE;
> >> +next:
> >> +			page_flag_buf->ready = FLAG_READY;
> >> +			page_flag_buf = page_flag_buf->next;
> >>
> >>   		}
> >> -	}
> >>
> >> +		pthread_mutex_unlock(&page_data_buf[index].mutex);
> >> +	}
> >>   	retval = NULL;
> >>
> >>   fail:
> >> @@ -7265,14 +7315,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> >>   	struct page_desc pd;
> >>   	struct timeval tv_start;
> >>   	struct timeval last, new;
> >> -	unsigned long long consuming_pfn;
> >>   	pthread_t **threads = NULL;
> >>   	struct thread_args *kdump_thread_args = NULL;
> >>   	void *thread_result;
> >> -	int page_data_num;
> >> +	int page_buf_num;
> >>   	struct page_data *page_data_buf = NULL;
> >>   	int i;
> >>   	int index;
> >> +	int end_count, consuming, check_count;
> >> +	mdf_pfn_t current_pfn, temp_pfn;
> >>
> >>   	if (info->flag_elf_dumpfile)
> >>   		return FALSE;
> >> @@ -7319,16 +7370,11 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> >>   	threads = info->threads;
> >>   	kdump_thread_args = info->kdump_thread_args;
> >>
> >> -	page_data_num = info->num_buffers;
> >> +	page_buf_num = info->num_buffers;
> >>   	page_data_buf = info->page_data_buf;
> >>
> >> -	for (i = 0; i < page_data_num; i++) {
> >> -		/*
> >> -		 * producer will use pfn in page_data_buf to decide the
> >> -		 * consumed pfn
> >> -		 */
> >> -		page_data_buf[i].pfn = start_pfn - 1;
> >> -		page_data_buf[i].ready = 0;
> >> +	for (i = 0; i < page_buf_num; i++) {
> >> +		page_data_buf[i].used = FALSE;
> >>   		res = pthread_mutex_init(&page_data_buf[i].mutex, NULL);
> >>   		if (res != 0) {
> >>   			ERRMSG("Can't initialize mutex of page_data_buf. %s\n",
> >> @@ -7342,8 +7388,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> >>   		kdump_thread_args[i].len_buf_out = len_buf_out;
> >>   		kdump_thread_args[i].start_pfn = start_pfn;
> >>   		kdump_thread_args[i].end_pfn = end_pfn;
> >> -		kdump_thread_args[i].page_data_num = page_data_num;
> >> +		kdump_thread_args[i].page_buf_num = page_buf_num;
> >>   		kdump_thread_args[i].page_data_buf = page_data_buf;
> >> +		kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
> >>   		kdump_thread_args[i].cycle = cycle;
> >>
> >>   		res = pthread_create(threads[i], NULL,
> >> @@ -7356,55 +7403,101 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> >>   		}
> >>   	}
> >>
> >> -	consuming_pfn = start_pfn;
> >> -	index = -1;
> >> +	end_count = 0;
> >> +	while (1) {
> >> +		consuming = 0;
> >> +		check_count = 0;
> >>
> >> -	gettimeofday(&last, NULL);
> >> +		/*
> >> +		 * The basic idea is producer producing page and consumer writing page.
> >> +		 * Each producer have a page_flag_buf list which is used for storing page's description.
> >> +		 * The size of page_flag_buf is little so it won't take too much memory.
> >> +		 * And all producers will share a page_data_buf array which is used for storing page's compressed data.
> >> +		 * The main thread is the consumer. It will find the next pfn and write it into file.
> >> +		 * The next pfn is smallest pfn in all page_flag_buf.
> >> +		 */
> >> +		gettimeofday(&last, NULL);
> >> +		while (1) {
> >> +			current_pfn = end_pfn;
> >>
> >> -	while (consuming_pfn < end_pfn) {
> >> -		index = consuming_pfn % page_data_num;
> >> +			/*
> >> +			 * page_flag_buf is in circular linked list.
> >> +			 * The array info->page_flag_buf[] records the current page_flag_buf in each thread's
> >> +			 * page_flag_buf list.
> >> +			 * consuming is used for recording in which thread the pfn is the smallest.
> >> +			 * current_pfn is used for recording the value of pfn when checking the pfn.
> >> +			 */
> >> +			for (i = 0; i < info->num_threads; i++) {
> >> +				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
> >> +					continue;
> >> +				temp_pfn = info->page_flag_buf[i]->pfn;
> >>
> >> -		gettimeofday(&new, NULL);
> >> -		if (new.tv_sec - last.tv_sec > WAIT_TIME) {
> >> -			ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn);
> >> -			goto out;
> >> -		}
> >> +				/*
> >> +				 * count how many threads have reached the end.
> >> +				 */
> >> +				if (temp_pfn >= end_pfn) {
> >> +					/*
> >> +					 * prevent setting FLAG_UNUSED being optimized.
> >> +					 */
> >> +					MSG("-");
> >
> > Why does this line need?
> > Please clarify why FLAG_UNUSED is optimized.
> >
> 
> I tried but still can't figure out the reason or get better solution.
> I will be so happy if you can help me.
> 
> In gdb, I run it by step.
> The code "info->page_flag_buf[i]->ready = FLAG_UNUSED;" can never be reached.
> If a breakpoint is set here, the code will never be skipped.

On my environment, RHEL7.2, gdb stops at this line even if MSG("-") is deleted.
See below.

===
[root@ark crash]# gdb makedumpfile-code-3d32567394bac85ee3247780bfa41072b0d9bfe9/makedumpfile
GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-80.el7                                           
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-redhat-linux-gnu".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from /work/crash/makedumpfile-code-3d32567394bac85ee3247780bfa41072b0d9bfe9/makedumpfile...done.
(gdb) set args -l -d0 --num-threads=2 ./vmcore ./vmcore.new.2                                 
(gdb) l 7780                                                                                  
7775                                    if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
7776                                            continue;
7777                                    temp_pfn = info->page_flag_buf[i]->pfn;
7778    
7779                                    /*
7780                                     * count how many threads have reached the end.
7781                                     */
7782                                    if (temp_pfn >= end_pfn) {
7783                                            info->page_flag_buf[i]->ready = FLAG_UNUSED;
7784    
(gdb) b 7783
Breakpoint 1 at 0x433778: file makedumpfile.c, line 7783.
(gdb)
(gdb) run
Starting program: /work/crash/makedumpfile-code-3d32567394bac85ee3247780bfa41072b0d9bfe9/makedumpfile -l -d0 --num-threads=2 ./vmcore ./vmcore.new.2
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
The kernel version is not supported.
The makedumpfile operation may be incomplete.
Checking for memory holes          : [100.0 %] |[New Thread 0x7ffff6530700 (LWP 32123)]
[New Thread 0x7ffff5d2f700 (LWP 32124)]
Copying data                       : [ 98.8 %] |
Breakpoint 1, write_kdump_pages_parallel_cyclic (cd_header=<optimized out>, 
    cd_page=<optimized out>, pd_zero=<optimized out>, offset_data=<optimized out>, 
    cycle=<optimized out>) at makedumpfile.c:7783
7783                                            info->page_flag_buf[i]->ready = FLAG_UNUSED;
Missing separate debuginfos, use: debuginfo-install bzip2-libs-1.0.6-13.el7.x86_64 elfutils-libelf-0.163-3.el7.x86_64 elfutils-libs-0.163-3.el7.x86_64 glibc-2.17-105.el7.x86_64 lzo-2.06-8.el7.x86_64 xz-libs-5.1.2-12alpha.el7.x86_64 zlib-1.2.7-15.el7.x86_64
(gdb) bt
#0  write_kdump_pages_parallel_cyclic (cd_header=<optimized out>, cd_page=<optimized out>, 
    pd_zero=<optimized out>, offset_data=<optimized out>, cycle=<optimized out>)
    at makedumpfile.c:7783
#1  0x0000000000439a15 in write_kdump_pages_and_bitmap_cyclic (
    cd_header=cd_header@entry=0x7fffffffe2d0, cd_page=cd_page@entry=0x7fffffffe300)
    at makedumpfile.c:8501
#2  0x000000000043a918 in writeout_dumpfile () at makedumpfile.c:9497
#3  0x000000000044181c in create_dumpfile () at makedumpfile.c:9724
#4  0x00000000004086df in main (argc=<optimized out>, argv=0x7fffffffe468)
    at makedumpfile.c:11159
(gdb)
===

> >>
> >> -		/*
> >> -		 * check pfn first without mutex locked to reduce the time
> >> -		 * trying to lock the mutex
> >> -		 */
> >> -		if (page_data_buf[index].pfn != consuming_pfn)
> >> -			continue;
> >> +					info->page_flag_buf[i]->ready = FLAG_UNUSED;
> >>
> >> -		if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
> >> -			continue;
> >> +					info->current_pfn = end_pfn;
> >
> > This part doesn't get info->current_pfn_mutex.
> > It becomes bigger than end_pfn at the end of producer thread in following case.
> >
> > ===
> >    producer                   Consumer
> > ---------------------------------------------------------
> >    pthread_mutex_lock()
> >    pfn = info->current_pfn
> >                               info->current_pfn = end_pfn
> >    info->current_pfn++
> >      -> end_pfn + 1
> >    pthread_mutex_unlock()
> > ===
> >
> > But I know, if this race is happened, processing other producer thread and consumer thread works well
> > in current cycle.
> > Because each thread checks whether info->current_pfn >= end_pfn.
> >
> > On the other hand, info->current_pfn is initialized to cycle->start_pfn before pthread_create()
> > in write_kdump_pages_parallel_cyclic().
> > This means it does not cause a problem in next cycle, too.
> >
> > In other words, my point is following.
> >
> >    a) When info->current_pfn changes, you have to get info->current_pfn_mutex.
> >    b) If you allow info->current_pfn is bigger than end_pfn at the end of each cycle,
> >       "info->current_pfn = end_pfn;" is unnecessary.
> >
> > Honestly, I don't like approach b).
> 
> You're right. Some thing I thought is wrong.
> But I don't like lock if I have other choice.
> I will set info->current_pfn before returning.
> How about it?

If you mean you set info->current_pfn by producer side, 
this race will occur between producer A and producer B.

I think you can't avoid getting mutex lock, if you will change info->current_pfn.

Thanks,
Minoru Usui


> --
> Thanks
> Zhou
> 
> >
> > Thanks,
> > Minoru Usui
> >
> >> +					end_count++;
> >> +					continue;
> >> +				}
> >> +
> >> +				if (current_pfn < temp_pfn)
> >> +					continue;
> >>
> >> -		/* check whether the found one is ready to be consumed */
> >> -		if (page_data_buf[index].pfn != consuming_pfn ||
> >> -		    page_data_buf[index].ready != 1) {
> >> -			goto unlock;
> >> +				check_count++;
> >> +				consuming = i;
> >> +				current_pfn = temp_pfn;
> >> +			}
> >> +
> >> +			/*
> >> +			 * If all the threads have reached the end, we will finish writing.
> >> +			 */
> >> +			if (end_count >= info->num_threads)
> >> +				goto finish;
> >> +
> >> +			/*
> >> +			 * Since it has the probabilty that there is no page_flag_buf being ready,
> >> +			 * we should recheck if it happens.
> >> +			 */
> >> +			if (check_count == 0)
> >> +				continue;
> >> +
> >> +			/*
> >> +			 * If the page_flag_buf is not ready, the pfn recorded may be changed.
> >> +			 * So we should recheck.
> >> +			 */
> >> +			if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
> >> +				gettimeofday(&new, NULL);
> >> +				if (new.tv_sec - last.tv_sec > WAIT_TIME) {
> >> +					ERRMSG("Can't get data of pfn.\n");
> >> +					goto out;
> >> +				}
> >> +				continue;
> >> +			}
> >> +
> >> +			if (current_pfn == info->page_flag_buf[consuming]->pfn)
> >> +				break;
> >>   		}
> >>
> >>   		if ((num_dumped % per) == 0)
> >>   			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
> >>
> >> -		/* next pfn is found, refresh last here */
> >> -		last = new;
> >> -		consuming_pfn++;
> >> -		info->consumed_pfn++;
> >> -		page_data_buf[index].ready = 0;
> >> -
> >> -		if (page_data_buf[index].dumpable == FALSE)
> >> -			goto unlock;
> >> -
> >>   		num_dumped++;
> >>
> >> -		if (page_data_buf[index].zero == TRUE) {
> >> +
> >> +		if (info->page_flag_buf[consuming]->zero == TRUE) {
> >>   			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
> >>   				goto out;
> >>   			pfn_zero++;
> >>   		} else {
> >> +			index = info->page_flag_buf[consuming]->index;
> >>   			pd.flags      = page_data_buf[index].flags;
> >>   			pd.size       = page_data_buf[index].size;
> >>   			pd.page_flags = 0;
> >> @@ -7420,12 +7513,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> >>   			 */
> >>   			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
> >>   				goto out;
> >> -
> >> +			page_data_buf[index].used = FALSE;
> >>   		}
> >> -unlock:
> >> -		pthread_mutex_unlock(&page_data_buf[index].mutex);
> >> +		info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
> >> +		info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
> >>   	}
> >> -
> >> +finish:
> >>   	ret = TRUE;
> >>   	/*
> >>   	 * print [100 %]
> >> @@ -7464,7 +7557,7 @@ out:
> >>   	}
> >>
> >>   	if (page_data_buf != NULL) {
> >> -		for (i = 0; i < page_data_num; i++) {
> >> +		for (i = 0; i < page_buf_num; i++) {
> >>   			pthread_mutex_destroy(&page_data_buf[i].mutex);
> >>   		}
> >>   	}
> >> @@ -7564,6 +7657,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag
> >>   		num_dumped++;
> >>   		if (!read_pfn(pfn, buf))
> >>   			goto out;
> >> +
> >>   		filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size);
> >>
> >>   		/*
> >> diff --git a/makedumpfile.h b/makedumpfile.h
> >> index e0b5bbf..8a9a5b2 100644
> >> --- a/makedumpfile.h
> >> +++ b/makedumpfile.h
> >> @@ -977,7 +977,7 @@ typedef unsigned long long int ulonglong;
> >>   #define PAGE_DATA_NUM	(50)
> >>   #define WAIT_TIME	(60 * 10)
> >>   #define PTHREAD_FAIL	((void *)-2)
> >> -#define NUM_BUFFERS	(50)
> >> +#define NUM_BUFFERS	(20)
> >>
> >>   struct mmap_cache {
> >>   	char	*mmap_buf;
> >> @@ -985,28 +985,36 @@ struct mmap_cache {
> >>   	off_t   mmap_end_offset;
> >>   };
> >>
> >> +enum {
> >> +	FLAG_UNUSED,
> >> +	FLAG_READY,
> >> +	FLAG_FILLING
> >> +};
> >> +struct page_flag {
> >> +	mdf_pfn_t pfn;
> >> +	char zero;
> >> +	char ready;
> >> +	short index;
> >> +	struct page_flag *next;
> >> +};
> >> +
> >>   struct page_data
> >>   {
> >> -	mdf_pfn_t pfn;
> >> -	int dumpable;
> >> -	int zero;
> >> -	unsigned int flags;
> >> +	pthread_mutex_t mutex;
> >>   	long size;
> >>   	unsigned char *buf;
> >> -	pthread_mutex_t mutex;
> >> -	/*
> >> -	 * whether the page_data is ready to be consumed
> >> -	 */
> >> -	int ready;
> >> +	int flags;
> >> +	int used;
> >>   };
> >>
> >>   struct thread_args {
> >>   	int thread_num;
> >>   	unsigned long len_buf_out;
> >>   	mdf_pfn_t start_pfn, end_pfn;
> >> -	int page_data_num;
> >> +	int page_buf_num;
> >>   	struct cycle *cycle;
> >>   	struct page_data *page_data_buf;
> >> +	struct page_flag *page_flag_buf;
> >>   };
> >>
> >>   /*
> >> @@ -1295,6 +1303,7 @@ struct DumpInfo {
> >>   	pthread_t **threads;
> >>   	struct thread_args *kdump_thread_args;
> >>   	struct page_data *page_data_buf;
> >> +	struct page_flag **page_flag_buf;
> >>   	pthread_rwlock_t usemmap_rwlock;
> >>   	mdf_pfn_t current_pfn;
> >>   	pthread_mutex_t current_pfn_mutex;
> >> --
> >> 1.8.3.1
> >>
> >>
> >>
> >>
> >> _______________________________________________
> >> kexec mailing list
> >> kexec@lists.infradead.org
> >> http://lists.infradead.org/mailman/listinfo/kexec
> 
> 


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-23  8:00     ` Minoru Usui
@ 2016-02-23  8:29       ` "Zhou, Wenjian/周文?"
  2016-02-24  0:45         ` Minoru Usui
  0 siblings, 1 reply; 26+ messages in thread
From: "Zhou, Wenjian/周文?" @ 2016-02-23  8:29 UTC (permalink / raw)
  To: Minoru Usui, kexec

Hi

On 02/23/2016 04:00 PM, Minoru Usui wrote:
>   Hi,
> 
>> -----Original Message-----
>> From: "Zhou, Wenjian/周文?" [mailto:zhouwj-fnst@cn.fujitsu.com]
>> Sent: Tuesday, February 23, 2016 12:45 PM
>> To: Usui Minoru(碓井 成) <min-usui@ti.jp.nec.com>; kexec@lists.infradead.org
>> Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31
>>
>> Hello Usui,
>>
>> On 02/23/2016 09:32 AM, Minoru Usui wrote:
>>> Hello, Zhou
>>>
>>> Thank you for reflecting my comments.
>>> I have other comments.
>>>
>>> See below.
>>>
>>>> -----Original Message-----
>>>> From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of Zhou Wenjian
>>>> Sent: Wednesday, February 17, 2016 4:06 PM
>>>> To: kexec@lists.infradead.org
>>>> Subject: [PATCH v2] Improve the performance of --num-threads -d 31
>>>>
>>>> v2:
>>>>           1. address Minoru Usui's comments about magic number and error message
>>>>           2. fix a bug in cyclic mode caused by optimizing
>>>>
>>>> v1:
>>>>           1. change page_flag.ready's value to enum
>>>>           2. change the patch description
>>>>           3. cleanup some codes
>>>>           4. fix a bug in cyclic mode
>>>>
>>>> multi-threads implementation will introduce extra cost when handling
>>>> each page. The origin implementation will also do the extra work for
>>>> filtered pages. So there is a big performance degradation in
>>>> --num-threads -d 31.
>>>> The new implementation won't do the extra work for filtered pages any
>>>> more. So the performance of -d 31 is close to that of serial processing.
>>>>
>>>> The new implementation is just like the following:
>>>>           * The basic idea is producer producing page and consumer writing page.
>>>>           * Each producer have a page_flag_buf list which is used for storing
>>>>             page's description.
>>>>           * The size of page_flag_buf is little so it won't take too much memory.
>>>>           * And all producers will share a page_data_buf array which is
>>>>             used for storing page's compressed data.
>>>>           * The main thread is the consumer. It will find the next pfn and write
>>>>             it into file.
>>>>           * The next pfn is smallest pfn in all page_flag_buf.
>>>>
>>>> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com>
>>>> ---
>>>>    makedumpfile.c | 266 ++++++++++++++++++++++++++++++++++++++-------------------
>>>>    makedumpfile.h |  31 ++++---
>>>>    2 files changed, 200 insertions(+), 97 deletions(-)
>>>>
>>>> diff --git a/makedumpfile.c b/makedumpfile.c
>>>> index fa0b779..31329b5 100644
>>>> --- a/makedumpfile.c
>>>> +++ b/makedumpfile.c
>>>> @@ -3483,7 +3483,8 @@ initial_for_parallel()
>>>>    	unsigned long page_data_buf_size;
>>>>    	unsigned long limit_size;
>>>>    	int page_data_num;
>>>> -	int i;
>>>> +	struct page_flag *current;
>>>> +	int i, j;
>>>>
>>>>    	len_buf_out = calculate_len_buf_out(info->page_size);
>>>>
>>>> @@ -3562,8 +3563,10 @@ initial_for_parallel()
>>>>    		      - MAP_REGION * info->num_threads) * 0.6;
>>>>
>>>>    	page_data_num = limit_size / page_data_buf_size;
>>>> +	info->num_buffers = 3 * info->num_threads;
>>>>
>>>> -	info->num_buffers = MIN(NUM_BUFFERS, page_data_num);
>>>> +	info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS);
>>>> +	info->num_buffers = MIN(info->num_buffers, page_data_num);
>>>>
>>>>    	DEBUG_MSG("Number of struct page_data for produce/consume: %d\n",
>>>>    			info->num_buffers);
>>>> @@ -3588,6 +3591,36 @@ initial_for_parallel()
>>>>    	}
>>>>
>>>>    	/*
>>>> +	 * initial page_flag for each thread
>>>> +	 */
>>>> +	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
>>>> +	    == NULL) {
>>>> +		MSG("Can't allocate memory for page_flag_buf. %s\n",
>>>> +				strerror(errno));
>>>> +		return FALSE;
>>>> +	}
>>>> +	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
>>>> +
>>>> +	for (i = 0; i < info->num_threads; i++) {
>>>> +		if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
>>>> +			MSG("Can't allocate memory for page_flag. %s\n",
>>>> +				strerror(errno));
>>>> +			return FALSE;
>>>> +		}
>>>> +		current = info->page_flag_buf[i];
>>>> +
>>>> +		for (j = 1; j < NUM_BUFFERS; j++) {
>>>> +			if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) {
>>>> +				MSG("Can't allocate memory for page_flag. %s\n",
>>>> +					strerror(errno));
>>>> +				return FALSE;
>>>> +			}
>>>> +			current = current->next;
>>>> +		}
>>>> +		current->next = info->page_flag_buf[i];
>>>> +	}
>>>> +
>>>> +	/*
>>>>    	 * initial fd_memory for threads
>>>>    	 */
>>>>    	for (i = 0; i < info->num_threads; i++) {
>>>> @@ -3612,7 +3645,8 @@ initial_for_parallel()
>>>>    void
>>>>    free_for_parallel()
>>>>    {
>>>> -	int i;
>>>> +	int i, j;
>>>> +	struct page_flag *current;
>>>>
>>>>    	if (info->threads != NULL) {
>>>>    		for (i = 0; i < info->num_threads; i++) {
>>>> @@ -3655,6 +3689,19 @@ free_for_parallel()
>>>>    		free(info->page_data_buf);
>>>>    	}
>>>>
>>>> +	if (info->page_flag_buf != NULL) {
>>>> +		for (i = 0; i < info->num_threads; i++) {
>>>> +			for (j = 0; j < NUM_BUFFERS; j++) {
>>>> +				if (info->page_flag_buf[i] != NULL) {
>>>> +					current = info->page_flag_buf[i];
>>>> +					info->page_flag_buf[i] = current->next;
>>>> +					free(current);
>>>> +				}
>>>> +			}
>>>> +		}
>>>> +		free(info->page_flag_buf);
>>>> +	}
>>>> +
>>>>    	if (info->parallel_info == NULL)
>>>>    		return;
>>>>
>>>> @@ -7076,10 +7123,10 @@ kdump_thread_function_cyclic(void *arg) {
>>>>    	void *retval = PTHREAD_FAIL;
>>>>    	struct thread_args *kdump_thread_args = (struct thread_args *)arg;
>>>>    	struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
>>>> +	struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
>>>>    	struct cycle *cycle = kdump_thread_args->cycle;
>>>> -	int page_data_num = kdump_thread_args->page_data_num;
>>>> -	mdf_pfn_t pfn;
>>>> -	int index;
>>>> +	mdf_pfn_t pfn = cycle->start_pfn;
>>>> +	int index = kdump_thread_args->thread_num;
>>>>    	int buf_ready;
>>>>    	int dumpable;
>>>>    	int fd_memory = 0;
>>>> @@ -7125,47 +7172,46 @@ kdump_thread_function_cyclic(void *arg) {
>>>>    						kdump_thread_args->thread_num);
>>>>    	}
>>>>
>>>> -	while (1) {
>>>> -		/* get next pfn */
>>>> -		pthread_mutex_lock(&info->current_pfn_mutex);
>>>> -		pfn = info->current_pfn;
>>>> -		info->current_pfn++;
>>>> -		pthread_mutex_unlock(&info->current_pfn_mutex);
>>>> +	/*
>>>> +	 * filtered page won't take anything
>>>> +	 * unfiltered zero page will only take a page_flag_buf
>>>> +	 * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
>>>> +	 */
>>>> +	while (pfn < kdump_thread_args->end_pfn) {
>>>> +		buf_ready = FALSE;
>>>>
>>>> -		if (pfn >= kdump_thread_args->end_pfn)
>>>> -			break;
>>>> +		while (page_data_buf[index].used != FALSE ||
>>>> +				pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
>>>> +			index = (index + 1) % info->num_buffers;
>>>> -		index = -1;
>>>> -		buf_ready = FALSE;
>>>> +		page_data_buf[index].used = TRUE;
>>>>
>>>>    		while (buf_ready == FALSE) {
>>>>    			pthread_testcancel();
>>>> -
>>>> -			index = pfn % page_data_num;
>>>> -
>>>> -			if (pfn - info->consumed_pfn > info->num_buffers)
>>>> -				continue;
>>>> -
>>>> -			if (page_data_buf[index].ready != 0)
>>>> +			if (page_flag_buf->ready == FLAG_READY)
>>>>    				continue;
>>>>
>>>> -			pthread_mutex_lock(&page_data_buf[index].mutex);
>>>> -
>>>> -			if (page_data_buf[index].ready != 0)
>>>> -				goto unlock;
>>>> +			/* get next pfn */
>>>> +			pthread_mutex_lock(&info->current_pfn_mutex);
>>>> +			pfn = info->current_pfn;
>>>> +			info->current_pfn++;
>>>> +			page_flag_buf->ready = FLAG_FILLING;
>>>> +			pthread_mutex_unlock(&info->current_pfn_mutex);
>>>>
>>>> -			buf_ready = TRUE;
>>>> +			page_flag_buf->pfn = pfn;
>>>>
>>>> -			page_data_buf[index].pfn = pfn;
>>>> -			page_data_buf[index].ready = 1;
>>>> +			if (pfn >= kdump_thread_args->end_pfn) {
>>>> +				page_data_buf[index].used = FALSE;
>>>> +				page_flag_buf->ready = FLAG_READY;
>>>> +				break;
>>>> +			}
>>>>
>>>>    			dumpable = is_dumpable(
>>>>    				info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
>>>>    				pfn,
>>>>    				cycle);
>>>> -			page_data_buf[index].dumpable = dumpable;
>>>>    			if (!dumpable)
>>>> -				goto unlock;
>>>> +				continue;
>>>>
>>>>    			if (!read_pfn_parallel(fd_memory, pfn, buf,
>>>>    					       &bitmap_memory_parallel,
>>>> @@ -7178,11 +7224,11 @@ kdump_thread_function_cyclic(void *arg) {
>>>>
>>>>    			if ((info->dump_level & DL_EXCLUDE_ZERO)
>>>>    			    && is_zero_page(buf, info->page_size)) {
>>>> -				page_data_buf[index].zero = TRUE;
>>>> -				goto unlock;
>>>> +				page_flag_buf->zero = TRUE;
>>>> +				goto next;
>>>>    			}
>>>>
>>>> -			page_data_buf[index].zero = FALSE;
>>>> +			page_flag_buf->zero = FALSE;
>>>>
>>>>    			/*
>>>>    			 * Compress the page data.
>>>> @@ -7232,12 +7278,16 @@ kdump_thread_function_cyclic(void *arg) {
>>>>    				page_data_buf[index].size  = info->page_size;
>>>>    				memcpy(page_data_buf[index].buf, buf, info->page_size);
>>>>    			}
>>>> -unlock:
>>>> -			pthread_mutex_unlock(&page_data_buf[index].mutex);
>>>> +			page_flag_buf->index = index;
>>>> +			buf_ready = TRUE;
>>>> +next:
>>>> +			page_flag_buf->ready = FLAG_READY;
>>>> +			page_flag_buf = page_flag_buf->next;
>>>>
>>>>    		}
>>>> -	}
>>>>
>>>> +		pthread_mutex_unlock(&page_data_buf[index].mutex);
>>>> +	}
>>>>    	retval = NULL;
>>>>
>>>>    fail:
>>>> @@ -7265,14 +7315,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>>>    	struct page_desc pd;
>>>>    	struct timeval tv_start;
>>>>    	struct timeval last, new;
>>>> -	unsigned long long consuming_pfn;
>>>>    	pthread_t **threads = NULL;
>>>>    	struct thread_args *kdump_thread_args = NULL;
>>>>    	void *thread_result;
>>>> -	int page_data_num;
>>>> +	int page_buf_num;
>>>>    	struct page_data *page_data_buf = NULL;
>>>>    	int i;
>>>>    	int index;
>>>> +	int end_count, consuming, check_count;
>>>> +	mdf_pfn_t current_pfn, temp_pfn;
>>>>
>>>>    	if (info->flag_elf_dumpfile)
>>>>    		return FALSE;
>>>> @@ -7319,16 +7370,11 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>>>    	threads = info->threads;
>>>>    	kdump_thread_args = info->kdump_thread_args;
>>>>
>>>> -	page_data_num = info->num_buffers;
>>>> +	page_buf_num = info->num_buffers;
>>>>    	page_data_buf = info->page_data_buf;
>>>>
>>>> -	for (i = 0; i < page_data_num; i++) {
>>>> -		/*
>>>> -		 * producer will use pfn in page_data_buf to decide the
>>>> -		 * consumed pfn
>>>> -		 */
>>>> -		page_data_buf[i].pfn = start_pfn - 1;
>>>> -		page_data_buf[i].ready = 0;
>>>> +	for (i = 0; i < page_buf_num; i++) {
>>>> +		page_data_buf[i].used = FALSE;
>>>>    		res = pthread_mutex_init(&page_data_buf[i].mutex, NULL);
>>>>    		if (res != 0) {
>>>>    			ERRMSG("Can't initialize mutex of page_data_buf. %s\n",
>>>> @@ -7342,8 +7388,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>>>    		kdump_thread_args[i].len_buf_out = len_buf_out;
>>>>    		kdump_thread_args[i].start_pfn = start_pfn;
>>>>    		kdump_thread_args[i].end_pfn = end_pfn;
>>>> -		kdump_thread_args[i].page_data_num = page_data_num;
>>>> +		kdump_thread_args[i].page_buf_num = page_buf_num;
>>>>    		kdump_thread_args[i].page_data_buf = page_data_buf;
>>>> +		kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
>>>>    		kdump_thread_args[i].cycle = cycle;
>>>>
>>>>    		res = pthread_create(threads[i], NULL,
>>>> @@ -7356,55 +7403,101 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>>>    		}
>>>>    	}
>>>>
>>>> -	consuming_pfn = start_pfn;
>>>> -	index = -1;
>>>> +	end_count = 0;
>>>> +	while (1) {
>>>> +		consuming = 0;
>>>> +		check_count = 0;
>>>>
>>>> -	gettimeofday(&last, NULL);
>>>> +		/*
>>>> +		 * The basic idea is producer producing page and consumer writing page.
>>>> +		 * Each producer have a page_flag_buf list which is used for storing page's description.
>>>> +		 * The size of page_flag_buf is little so it won't take too much memory.
>>>> +		 * And all producers will share a page_data_buf array which is used for storing page's compressed data.
>>>> +		 * The main thread is the consumer. It will find the next pfn and write it into file.
>>>> +		 * The next pfn is smallest pfn in all page_flag_buf.
>>>> +		 */
>>>> +		gettimeofday(&last, NULL);
>>>> +		while (1) {
>>>> +			current_pfn = end_pfn;
>>>>
>>>> -	while (consuming_pfn < end_pfn) {
>>>> -		index = consuming_pfn % page_data_num;
>>>> +			/*
>>>> +			 * page_flag_buf is in circular linked list.
>>>> +			 * The array info->page_flag_buf[] records the current page_flag_buf in each thread's
>>>> +			 * page_flag_buf list.
>>>> +			 * consuming is used for recording in which thread the pfn is the smallest.
>>>> +			 * current_pfn is used for recording the value of pfn when checking the pfn.
>>>> +			 */
>>>> +			for (i = 0; i < info->num_threads; i++) {
>>>> +				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
>>>> +					continue;
>>>> +				temp_pfn = info->page_flag_buf[i]->pfn;
>>>>
>>>> -		gettimeofday(&new, NULL);
>>>> -		if (new.tv_sec - last.tv_sec > WAIT_TIME) {
>>>> -			ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn);
>>>> -			goto out;
>>>> -		}
>>>> +				/*
>>>> +				 * count how many threads have reached the end.
>>>> +				 */
>>>> +				if (temp_pfn >= end_pfn) {
>>>> +					/*
>>>> +					 * prevent setting FLAG_UNUSED being optimized.
>>>> +					 */
>>>> +					MSG("-");
>>>
>>> Why does this line need?
>>> Please clarify why FLAG_UNUSED is optimized.
>>>
>>
>> I tried but still can't figure out the reason or get better solution.
>> I will be so happy if you can help me.
>>
>> In gdb, I run it by step.
>> The code "info->page_flag_buf[i]->ready = FLAG_UNUSED;" can never be reached.
>> If a breakpoint is set here, the code will never be skipped.
> 
> On my environment, RHEL7.2, gdb stops at this line even if MSG("-") is deleted.
> See below.
> 
> ===
> [root@ark crash]# gdb makedumpfile-code-3d32567394bac85ee3247780bfa41072b0d9bfe9/makedumpfile
> GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-80.el7
> Copyright (C) 2013 Free Software Foundation, Inc.
> License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
> This is free software: you are free to change and redistribute it.
> There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
> and "show warranty" for details.
> This GDB was configured as "x86_64-redhat-linux-gnu".
> For bug reporting instructions, please see:
> <http://www.gnu.org/software/gdb/bugs/>...
> Reading symbols from /work/crash/makedumpfile-code-3d32567394bac85ee3247780bfa41072b0d9bfe9/makedumpfile...done.
> (gdb) set args -l -d0 --num-threads=2 ./vmcore ./vmcore.new.2
> (gdb) l 7780
> 7775                                    if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
> 7776                                            continue;
> 7777                                    temp_pfn = info->page_flag_buf[i]->pfn;
> 7778
> 7779                                    /*
> 7780                                     * count how many threads have reached the end.
> 7781                                     */
> 7782                                    if (temp_pfn >= end_pfn) {
> 7783                                            info->page_flag_buf[i]->ready = FLAG_UNUSED;
> 7784
> (gdb) b 7783
> Breakpoint 1 at 0x433778: file makedumpfile.c, line 7783.
> (gdb)
> (gdb) run
> Starting program: /work/crash/makedumpfile-code-3d32567394bac85ee3247780bfa41072b0d9bfe9/makedumpfile -l -d0 --num-threads=2 ./vmcore ./vmcore.new.2
> [Thread debugging using libthread_db enabled]
> Using host libthread_db library "/lib64/libthread_db.so.1".
> The kernel version is not supported.
> The makedumpfile operation may be incomplete.
> Checking for memory holes          : [100.0 %] |[New Thread 0x7ffff6530700 (LWP 32123)]
> [New Thread 0x7ffff5d2f700 (LWP 32124)]
> Copying data                       : [ 98.8 %] |
> Breakpoint 1, write_kdump_pages_parallel_cyclic (cd_header=<optimized out>,
>      cd_page=<optimized out>, pd_zero=<optimized out>, offset_data=<optimized out>,
>      cycle=<optimized out>) at makedumpfile.c:7783
> 7783                                            info->page_flag_buf[i]->ready = FLAG_UNUSED;
> Missing separate debuginfos, use: debuginfo-install bzip2-libs-1.0.6-13.el7.x86_64 elfutils-libelf-0.163-3.el7.x86_64 elfutils-libs-0.163-3.el7.x86_64 glibc-2.17-105.el7.x86_64 lzo-2.06-8.el7.x86_64 xz-libs-5.1.2-12alpha.el7.x86_64 zlib-1.2.7-15.el7.x86_64
> (gdb) bt
> #0  write_kdump_pages_parallel_cyclic (cd_header=<optimized out>, cd_page=<optimized out>,
>      pd_zero=<optimized out>, offset_data=<optimized out>, cycle=<optimized out>)
>      at makedumpfile.c:7783
> #1  0x0000000000439a15 in write_kdump_pages_and_bitmap_cyclic (
>      cd_header=cd_header@entry=0x7fffffffe2d0, cd_page=cd_page@entry=0x7fffffffe300)
>      at makedumpfile.c:8501
> #2  0x000000000043a918 in writeout_dumpfile () at makedumpfile.c:9497
> #3  0x000000000044181c in create_dumpfile () at makedumpfile.c:9724
> #4  0x00000000004086df in main (argc=<optimized out>, argv=0x7fffffffe468)
>      at makedumpfile.c:11159
> (gdb)
> ===
> 

Yes, it won't be skipped every time without MSG("-").
But it did occur sometime.
And unluckily, MSG("-") can't do much help either.

I guess it is the same problem that minfei meets.

I'm doing some further investigation.

>>>>
>>>> -		/*
>>>> -		 * check pfn first without mutex locked to reduce the time
>>>> -		 * trying to lock the mutex
>>>> -		 */
>>>> -		if (page_data_buf[index].pfn != consuming_pfn)
>>>> -			continue;
>>>> +					info->page_flag_buf[i]->ready = FLAG_UNUSED;
>>>>
>>>> -		if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
>>>> -			continue;
>>>> +					info->current_pfn = end_pfn;
>>>
>>> This part doesn't get info->current_pfn_mutex.
>>> It becomes bigger than end_pfn at the end of producer thread in following case.
>>>
>>> ===
>>>     producer                   Consumer
>>> ---------------------------------------------------------
>>>     pthread_mutex_lock()
>>>     pfn = info->current_pfn
>>>                                info->current_pfn = end_pfn
>>>     info->current_pfn++
>>>       -> end_pfn + 1
>>>     pthread_mutex_unlock()
>>> ===
>>>
>>> But I know, if this race is happened, processing other producer thread and consumer thread works well
>>> in current cycle.
>>> Because each thread checks whether info->current_pfn >= end_pfn.
>>>
>>> On the other hand, info->current_pfn is initialized to cycle->start_pfn before pthread_create()
>>> in write_kdump_pages_parallel_cyclic().
>>> This means it does not cause a problem in next cycle, too.
>>>
>>> In other words, my point is following.
>>>
>>>     a) When info->current_pfn changes, you have to get info->current_pfn_mutex.
>>>     b) If you allow info->current_pfn is bigger than end_pfn at the end of each cycle,
>>>        "info->current_pfn = end_pfn;" is unnecessary.
>>>
>>> Honestly, I don't like approach b).
>>
>> You're right. Some thing I thought is wrong.
>> But I don't like lock if I have other choice.
>> I will set info->current_pfn before returning.
>> How about it?
> 
> If you mean you set info->current_pfn by producer side,
> this race will occur between producer A and producer B.
> 
> I think you can't avoid getting mutex lock, if you will change info->current_pfn.
> 

I mean setting it by consumer.

-- 
Thanks
Zhou

> Thanks,
> Minoru Usui
> 
> 
>> --
>> Thanks
>> Zhou
>>
>>>
>>> Thanks,
>>> Minoru Usui
>>>
>>>> +					end_count++;
>>>> +					continue;
>>>> +				}
>>>> +
>>>> +				if (current_pfn < temp_pfn)
>>>> +					continue;
>>>>
>>>> -		/* check whether the found one is ready to be consumed */
>>>> -		if (page_data_buf[index].pfn != consuming_pfn ||
>>>> -		    page_data_buf[index].ready != 1) {
>>>> -			goto unlock;
>>>> +				check_count++;
>>>> +				consuming = i;
>>>> +				current_pfn = temp_pfn;
>>>> +			}
>>>> +
>>>> +			/*
>>>> +			 * If all the threads have reached the end, we will finish writing.
>>>> +			 */
>>>> +			if (end_count >= info->num_threads)
>>>> +				goto finish;
>>>> +
>>>> +			/*
>>>> +			 * Since it has the probabilty that there is no page_flag_buf being ready,
>>>> +			 * we should recheck if it happens.
>>>> +			 */
>>>> +			if (check_count == 0)
>>>> +				continue;
>>>> +
>>>> +			/*
>>>> +			 * If the page_flag_buf is not ready, the pfn recorded may be changed.
>>>> +			 * So we should recheck.
>>>> +			 */
>>>> +			if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
>>>> +				gettimeofday(&new, NULL);
>>>> +				if (new.tv_sec - last.tv_sec > WAIT_TIME) {
>>>> +					ERRMSG("Can't get data of pfn.\n");
>>>> +					goto out;
>>>> +				}
>>>> +				continue;
>>>> +			}
>>>> +
>>>> +			if (current_pfn == info->page_flag_buf[consuming]->pfn)
>>>> +				break;
>>>>    		}
>>>>
>>>>    		if ((num_dumped % per) == 0)
>>>>    			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
>>>>
>>>> -		/* next pfn is found, refresh last here */
>>>> -		last = new;
>>>> -		consuming_pfn++;
>>>> -		info->consumed_pfn++;
>>>> -		page_data_buf[index].ready = 0;
>>>> -
>>>> -		if (page_data_buf[index].dumpable == FALSE)
>>>> -			goto unlock;
>>>> -
>>>>    		num_dumped++;
>>>>
>>>> -		if (page_data_buf[index].zero == TRUE) {
>>>> +
>>>> +		if (info->page_flag_buf[consuming]->zero == TRUE) {
>>>>    			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
>>>>    				goto out;
>>>>    			pfn_zero++;
>>>>    		} else {
>>>> +			index = info->page_flag_buf[consuming]->index;
>>>>    			pd.flags      = page_data_buf[index].flags;
>>>>    			pd.size       = page_data_buf[index].size;
>>>>    			pd.page_flags = 0;
>>>> @@ -7420,12 +7513,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>>>    			 */
>>>>    			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
>>>>    				goto out;
>>>> -
>>>> +			page_data_buf[index].used = FALSE;
>>>>    		}
>>>> -unlock:
>>>> -		pthread_mutex_unlock(&page_data_buf[index].mutex);
>>>> +		info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
>>>> +		info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
>>>>    	}
>>>> -
>>>> +finish:
>>>>    	ret = TRUE;
>>>>    	/*
>>>>    	 * print [100 %]
>>>> @@ -7464,7 +7557,7 @@ out:
>>>>    	}
>>>>
>>>>    	if (page_data_buf != NULL) {
>>>> -		for (i = 0; i < page_data_num; i++) {
>>>> +		for (i = 0; i < page_buf_num; i++) {
>>>>    			pthread_mutex_destroy(&page_data_buf[i].mutex);
>>>>    		}
>>>>    	}
>>>> @@ -7564,6 +7657,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag
>>>>    		num_dumped++;
>>>>    		if (!read_pfn(pfn, buf))
>>>>    			goto out;
>>>> +
>>>>    		filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size);
>>>>
>>>>    		/*
>>>> diff --git a/makedumpfile.h b/makedumpfile.h
>>>> index e0b5bbf..8a9a5b2 100644
>>>> --- a/makedumpfile.h
>>>> +++ b/makedumpfile.h
>>>> @@ -977,7 +977,7 @@ typedef unsigned long long int ulonglong;
>>>>    #define PAGE_DATA_NUM	(50)
>>>>    #define WAIT_TIME	(60 * 10)
>>>>    #define PTHREAD_FAIL	((void *)-2)
>>>> -#define NUM_BUFFERS	(50)
>>>> +#define NUM_BUFFERS	(20)
>>>>
>>>>    struct mmap_cache {
>>>>    	char	*mmap_buf;
>>>> @@ -985,28 +985,36 @@ struct mmap_cache {
>>>>    	off_t   mmap_end_offset;
>>>>    };
>>>>
>>>> +enum {
>>>> +	FLAG_UNUSED,
>>>> +	FLAG_READY,
>>>> +	FLAG_FILLING
>>>> +};
>>>> +struct page_flag {
>>>> +	mdf_pfn_t pfn;
>>>> +	char zero;
>>>> +	char ready;
>>>> +	short index;
>>>> +	struct page_flag *next;
>>>> +};
>>>> +
>>>>    struct page_data
>>>>    {
>>>> -	mdf_pfn_t pfn;
>>>> -	int dumpable;
>>>> -	int zero;
>>>> -	unsigned int flags;
>>>> +	pthread_mutex_t mutex;
>>>>    	long size;
>>>>    	unsigned char *buf;
>>>> -	pthread_mutex_t mutex;
>>>> -	/*
>>>> -	 * whether the page_data is ready to be consumed
>>>> -	 */
>>>> -	int ready;
>>>> +	int flags;
>>>> +	int used;
>>>>    };
>>>>
>>>>    struct thread_args {
>>>>    	int thread_num;
>>>>    	unsigned long len_buf_out;
>>>>    	mdf_pfn_t start_pfn, end_pfn;
>>>> -	int page_data_num;
>>>> +	int page_buf_num;
>>>>    	struct cycle *cycle;
>>>>    	struct page_data *page_data_buf;
>>>> +	struct page_flag *page_flag_buf;
>>>>    };
>>>>
>>>>    /*
>>>> @@ -1295,6 +1303,7 @@ struct DumpInfo {
>>>>    	pthread_t **threads;
>>>>    	struct thread_args *kdump_thread_args;
>>>>    	struct page_data *page_data_buf;
>>>> +	struct page_flag **page_flag_buf;
>>>>    	pthread_rwlock_t usemmap_rwlock;
>>>>    	mdf_pfn_t current_pfn;
>>>>    	pthread_mutex_t current_pfn_mutex;
>>>> --
>>>> 1.8.3.1
>>>>
>>>>
>>>>
>>>>
>>>> _______________________________________________
>>>> kexec mailing list
>>>> kexec@lists.infradead.org
>>>> http://lists.infradead.org/mailman/listinfo/kexec
>>
>>
> 
> 
> 



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-23  8:29       ` "Zhou, Wenjian/周文?"
@ 2016-02-24  0:45         ` Minoru Usui
  2016-03-01  7:49           ` "Zhou, Wenjian/周文?"
  0 siblings, 1 reply; 26+ messages in thread
From: Minoru Usui @ 2016-02-24  0:45 UTC (permalink / raw)
  To: "Zhou, Wenjian/周文?", kexec

Hi, Zhou

> -----Original Message-----
> From: "Zhou, Wenjian/周文?" [mailto:zhouwj-fnst@cn.fujitsu.com]
> Sent: Tuesday, February 23, 2016 5:30 PM
> To: Usui Minoru(碓井 成) <min-usui@ti.jp.nec.com>; kexec@lists.infradead.org
> Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31
> 
> Hi
> 
> On 02/23/2016 04:00 PM, Minoru Usui wrote:
> >   Hi,
> >
> >> -----Original Message-----
> >> From: "Zhou, Wenjian/周文?" [mailto:zhouwj-fnst@cn.fujitsu.com]
> >> Sent: Tuesday, February 23, 2016 12:45 PM
> >> To: Usui Minoru(碓井 成) <min-usui@ti.jp.nec.com>; kexec@lists.infradead.org
> >> Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31
> >>
> >> Hello Usui,
> >>
> >> On 02/23/2016 09:32 AM, Minoru Usui wrote:
> >>> Hello, Zhou
> >>>
> >>> Thank you for reflecting my comments.
> >>> I have other comments.
> >>>
> >>> See below.
> >>>
> >>>> -----Original Message-----
> >>>> From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of Zhou Wenjian
> >>>> Sent: Wednesday, February 17, 2016 4:06 PM
> >>>> To: kexec@lists.infradead.org
> >>>> Subject: [PATCH v2] Improve the performance of --num-threads -d 31
> >>>>
> >>>> v2:
> >>>>           1. address Minoru Usui's comments about magic number and error message
> >>>>           2. fix a bug in cyclic mode caused by optimizing
> >>>>
> >>>> v1:
> >>>>           1. change page_flag.ready's value to enum
> >>>>           2. change the patch description
> >>>>           3. cleanup some codes
> >>>>           4. fix a bug in cyclic mode
> >>>>
> >>>> multi-threads implementation will introduce extra cost when handling
> >>>> each page. The origin implementation will also do the extra work for
> >>>> filtered pages. So there is a big performance degradation in
> >>>> --num-threads -d 31.
> >>>> The new implementation won't do the extra work for filtered pages any
> >>>> more. So the performance of -d 31 is close to that of serial processing.
> >>>>
> >>>> The new implementation is just like the following:
> >>>>           * The basic idea is producer producing page and consumer writing page.
> >>>>           * Each producer have a page_flag_buf list which is used for storing
> >>>>             page's description.
> >>>>           * The size of page_flag_buf is little so it won't take too much memory.
> >>>>           * And all producers will share a page_data_buf array which is
> >>>>             used for storing page's compressed data.
> >>>>           * The main thread is the consumer. It will find the next pfn and write
> >>>>             it into file.
> >>>>           * The next pfn is smallest pfn in all page_flag_buf.
> >>>>
> >>>> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com>
> >>>> ---
> >>>>    makedumpfile.c | 266 ++++++++++++++++++++++++++++++++++++++-------------------
> >>>>    makedumpfile.h |  31 ++++---
> >>>>    2 files changed, 200 insertions(+), 97 deletions(-)
> >>>>
> >>>> diff --git a/makedumpfile.c b/makedumpfile.c
> >>>> index fa0b779..31329b5 100644
> >>>> --- a/makedumpfile.c
> >>>> +++ b/makedumpfile.c
> >>>> @@ -3483,7 +3483,8 @@ initial_for_parallel()
> >>>>    	unsigned long page_data_buf_size;
> >>>>    	unsigned long limit_size;
> >>>>    	int page_data_num;
> >>>> -	int i;
> >>>> +	struct page_flag *current;
> >>>> +	int i, j;
> >>>>
> >>>>    	len_buf_out = calculate_len_buf_out(info->page_size);
> >>>>
> >>>> @@ -3562,8 +3563,10 @@ initial_for_parallel()
> >>>>    		      - MAP_REGION * info->num_threads) * 0.6;
> >>>>
> >>>>    	page_data_num = limit_size / page_data_buf_size;
> >>>> +	info->num_buffers = 3 * info->num_threads;
> >>>>
> >>>> -	info->num_buffers = MIN(NUM_BUFFERS, page_data_num);
> >>>> +	info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS);
> >>>> +	info->num_buffers = MIN(info->num_buffers, page_data_num);
> >>>>
> >>>>    	DEBUG_MSG("Number of struct page_data for produce/consume: %d\n",
> >>>>    			info->num_buffers);
> >>>> @@ -3588,6 +3591,36 @@ initial_for_parallel()
> >>>>    	}
> >>>>
> >>>>    	/*
> >>>> +	 * initial page_flag for each thread
> >>>> +	 */
> >>>> +	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
> >>>> +	    == NULL) {
> >>>> +		MSG("Can't allocate memory for page_flag_buf. %s\n",
> >>>> +				strerror(errno));
> >>>> +		return FALSE;
> >>>> +	}
> >>>> +	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
> >>>> +
> >>>> +	for (i = 0; i < info->num_threads; i++) {
> >>>> +		if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
> >>>> +			MSG("Can't allocate memory for page_flag. %s\n",
> >>>> +				strerror(errno));
> >>>> +			return FALSE;
> >>>> +		}
> >>>> +		current = info->page_flag_buf[i];
> >>>> +
> >>>> +		for (j = 1; j < NUM_BUFFERS; j++) {
> >>>> +			if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) {
> >>>> +				MSG("Can't allocate memory for page_flag. %s\n",
> >>>> +					strerror(errno));
> >>>> +				return FALSE;
> >>>> +			}
> >>>> +			current = current->next;
> >>>> +		}
> >>>> +		current->next = info->page_flag_buf[i];
> >>>> +	}
> >>>> +
> >>>> +	/*
> >>>>    	 * initial fd_memory for threads
> >>>>    	 */
> >>>>    	for (i = 0; i < info->num_threads; i++) {
> >>>> @@ -3612,7 +3645,8 @@ initial_for_parallel()
> >>>>    void
> >>>>    free_for_parallel()
> >>>>    {
> >>>> -	int i;
> >>>> +	int i, j;
> >>>> +	struct page_flag *current;
> >>>>
> >>>>    	if (info->threads != NULL) {
> >>>>    		for (i = 0; i < info->num_threads; i++) {
> >>>> @@ -3655,6 +3689,19 @@ free_for_parallel()
> >>>>    		free(info->page_data_buf);
> >>>>    	}
> >>>>
> >>>> +	if (info->page_flag_buf != NULL) {
> >>>> +		for (i = 0; i < info->num_threads; i++) {
> >>>> +			for (j = 0; j < NUM_BUFFERS; j++) {
> >>>> +				if (info->page_flag_buf[i] != NULL) {
> >>>> +					current = info->page_flag_buf[i];
> >>>> +					info->page_flag_buf[i] = current->next;
> >>>> +					free(current);
> >>>> +				}
> >>>> +			}
> >>>> +		}
> >>>> +		free(info->page_flag_buf);
> >>>> +	}
> >>>> +
> >>>>    	if (info->parallel_info == NULL)
> >>>>    		return;
> >>>>
> >>>> @@ -7076,10 +7123,10 @@ kdump_thread_function_cyclic(void *arg) {
> >>>>    	void *retval = PTHREAD_FAIL;
> >>>>    	struct thread_args *kdump_thread_args = (struct thread_args *)arg;
> >>>>    	struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
> >>>> +	struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
> >>>>    	struct cycle *cycle = kdump_thread_args->cycle;
> >>>> -	int page_data_num = kdump_thread_args->page_data_num;
> >>>> -	mdf_pfn_t pfn;
> >>>> -	int index;
> >>>> +	mdf_pfn_t pfn = cycle->start_pfn;
> >>>> +	int index = kdump_thread_args->thread_num;
> >>>>    	int buf_ready;
> >>>>    	int dumpable;
> >>>>    	int fd_memory = 0;
> >>>> @@ -7125,47 +7172,46 @@ kdump_thread_function_cyclic(void *arg) {
> >>>>    						kdump_thread_args->thread_num);
> >>>>    	}
> >>>>
> >>>> -	while (1) {
> >>>> -		/* get next pfn */
> >>>> -		pthread_mutex_lock(&info->current_pfn_mutex);
> >>>> -		pfn = info->current_pfn;
> >>>> -		info->current_pfn++;
> >>>> -		pthread_mutex_unlock(&info->current_pfn_mutex);
> >>>> +	/*
> >>>> +	 * filtered page won't take anything
> >>>> +	 * unfiltered zero page will only take a page_flag_buf
> >>>> +	 * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
> >>>> +	 */
> >>>> +	while (pfn < kdump_thread_args->end_pfn) {
> >>>> +		buf_ready = FALSE;
> >>>>
> >>>> -		if (pfn >= kdump_thread_args->end_pfn)
> >>>> -			break;
> >>>> +		while (page_data_buf[index].used != FALSE ||
> >>>> +				pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
> >>>> +			index = (index + 1) % info->num_buffers;
> >>>> -		index = -1;
> >>>> -		buf_ready = FALSE;
> >>>> +		page_data_buf[index].used = TRUE;
> >>>>
> >>>>    		while (buf_ready == FALSE) {
> >>>>    			pthread_testcancel();
> >>>> -
> >>>> -			index = pfn % page_data_num;
> >>>> -
> >>>> -			if (pfn - info->consumed_pfn > info->num_buffers)
> >>>> -				continue;
> >>>> -
> >>>> -			if (page_data_buf[index].ready != 0)
> >>>> +			if (page_flag_buf->ready == FLAG_READY)
> >>>>    				continue;
> >>>>
> >>>> -			pthread_mutex_lock(&page_data_buf[index].mutex);
> >>>> -
> >>>> -			if (page_data_buf[index].ready != 0)
> >>>> -				goto unlock;
> >>>> +			/* get next pfn */
> >>>> +			pthread_mutex_lock(&info->current_pfn_mutex);
> >>>> +			pfn = info->current_pfn;
> >>>> +			info->current_pfn++;
> >>>> +			page_flag_buf->ready = FLAG_FILLING;
> >>>> +			pthread_mutex_unlock(&info->current_pfn_mutex);
> >>>>
> >>>> -			buf_ready = TRUE;
> >>>> +			page_flag_buf->pfn = pfn;
> >>>>
> >>>> -			page_data_buf[index].pfn = pfn;
> >>>> -			page_data_buf[index].ready = 1;
> >>>> +			if (pfn >= kdump_thread_args->end_pfn) {
> >>>> +				page_data_buf[index].used = FALSE;
> >>>> +				page_flag_buf->ready = FLAG_READY;
> >>>> +				break;
> >>>> +			}
> >>>>
> >>>>    			dumpable = is_dumpable(
> >>>>    				info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
> >>>>    				pfn,
> >>>>    				cycle);
> >>>> -			page_data_buf[index].dumpable = dumpable;
> >>>>    			if (!dumpable)
> >>>> -				goto unlock;
> >>>> +				continue;
> >>>>
> >>>>    			if (!read_pfn_parallel(fd_memory, pfn, buf,
> >>>>    					       &bitmap_memory_parallel,
> >>>> @@ -7178,11 +7224,11 @@ kdump_thread_function_cyclic(void *arg) {
> >>>>
> >>>>    			if ((info->dump_level & DL_EXCLUDE_ZERO)
> >>>>    			    && is_zero_page(buf, info->page_size)) {
> >>>> -				page_data_buf[index].zero = TRUE;
> >>>> -				goto unlock;
> >>>> +				page_flag_buf->zero = TRUE;
> >>>> +				goto next;
> >>>>    			}
> >>>>
> >>>> -			page_data_buf[index].zero = FALSE;
> >>>> +			page_flag_buf->zero = FALSE;
> >>>>
> >>>>    			/*
> >>>>    			 * Compress the page data.
> >>>> @@ -7232,12 +7278,16 @@ kdump_thread_function_cyclic(void *arg) {
> >>>>    				page_data_buf[index].size  = info->page_size;
> >>>>    				memcpy(page_data_buf[index].buf, buf, info->page_size);
> >>>>    			}
> >>>> -unlock:
> >>>> -			pthread_mutex_unlock(&page_data_buf[index].mutex);
> >>>> +			page_flag_buf->index = index;
> >>>> +			buf_ready = TRUE;
> >>>> +next:
> >>>> +			page_flag_buf->ready = FLAG_READY;
> >>>> +			page_flag_buf = page_flag_buf->next;
> >>>>
> >>>>    		}
> >>>> -	}
> >>>>
> >>>> +		pthread_mutex_unlock(&page_data_buf[index].mutex);
> >>>> +	}
> >>>>    	retval = NULL;
> >>>>
> >>>>    fail:
> >>>> @@ -7265,14 +7315,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> >>>>    	struct page_desc pd;
> >>>>    	struct timeval tv_start;
> >>>>    	struct timeval last, new;
> >>>> -	unsigned long long consuming_pfn;
> >>>>    	pthread_t **threads = NULL;
> >>>>    	struct thread_args *kdump_thread_args = NULL;
> >>>>    	void *thread_result;
> >>>> -	int page_data_num;
> >>>> +	int page_buf_num;
> >>>>    	struct page_data *page_data_buf = NULL;
> >>>>    	int i;
> >>>>    	int index;
> >>>> +	int end_count, consuming, check_count;
> >>>> +	mdf_pfn_t current_pfn, temp_pfn;
> >>>>
> >>>>    	if (info->flag_elf_dumpfile)
> >>>>    		return FALSE;
> >>>> @@ -7319,16 +7370,11 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> >>>>    	threads = info->threads;
> >>>>    	kdump_thread_args = info->kdump_thread_args;
> >>>>
> >>>> -	page_data_num = info->num_buffers;
> >>>> +	page_buf_num = info->num_buffers;
> >>>>    	page_data_buf = info->page_data_buf;
> >>>>
> >>>> -	for (i = 0; i < page_data_num; i++) {
> >>>> -		/*
> >>>> -		 * producer will use pfn in page_data_buf to decide the
> >>>> -		 * consumed pfn
> >>>> -		 */
> >>>> -		page_data_buf[i].pfn = start_pfn - 1;
> >>>> -		page_data_buf[i].ready = 0;
> >>>> +	for (i = 0; i < page_buf_num; i++) {
> >>>> +		page_data_buf[i].used = FALSE;
> >>>>    		res = pthread_mutex_init(&page_data_buf[i].mutex, NULL);
> >>>>    		if (res != 0) {
> >>>>    			ERRMSG("Can't initialize mutex of page_data_buf. %s\n",
> >>>> @@ -7342,8 +7388,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> >>>>    		kdump_thread_args[i].len_buf_out = len_buf_out;
> >>>>    		kdump_thread_args[i].start_pfn = start_pfn;
> >>>>    		kdump_thread_args[i].end_pfn = end_pfn;
> >>>> -		kdump_thread_args[i].page_data_num = page_data_num;
> >>>> +		kdump_thread_args[i].page_buf_num = page_buf_num;
> >>>>    		kdump_thread_args[i].page_data_buf = page_data_buf;
> >>>> +		kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
> >>>>    		kdump_thread_args[i].cycle = cycle;
> >>>>
> >>>>    		res = pthread_create(threads[i], NULL,
> >>>> @@ -7356,55 +7403,101 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> >>>>    		}
> >>>>    	}
> >>>>
> >>>> -	consuming_pfn = start_pfn;
> >>>> -	index = -1;
> >>>> +	end_count = 0;
> >>>> +	while (1) {
> >>>> +		consuming = 0;
> >>>> +		check_count = 0;
> >>>>
> >>>> -	gettimeofday(&last, NULL);
> >>>> +		/*
> >>>> +		 * The basic idea is producer producing page and consumer writing page.
> >>>> +		 * Each producer have a page_flag_buf list which is used for storing page's description.
> >>>> +		 * The size of page_flag_buf is little so it won't take too much memory.
> >>>> +		 * And all producers will share a page_data_buf array which is used for storing page's compressed data.
> >>>> +		 * The main thread is the consumer. It will find the next pfn and write it into file.
> >>>> +		 * The next pfn is smallest pfn in all page_flag_buf.
> >>>> +		 */
> >>>> +		gettimeofday(&last, NULL);
> >>>> +		while (1) {
> >>>> +			current_pfn = end_pfn;
> >>>>
> >>>> -	while (consuming_pfn < end_pfn) {
> >>>> -		index = consuming_pfn % page_data_num;
> >>>> +			/*
> >>>> +			 * page_flag_buf is in circular linked list.
> >>>> +			 * The array info->page_flag_buf[] records the current page_flag_buf in each thread's
> >>>> +			 * page_flag_buf list.
> >>>> +			 * consuming is used for recording in which thread the pfn is the smallest.
> >>>> +			 * current_pfn is used for recording the value of pfn when checking the pfn.
> >>>> +			 */
> >>>> +			for (i = 0; i < info->num_threads; i++) {
> >>>> +				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
> >>>> +					continue;
> >>>> +				temp_pfn = info->page_flag_buf[i]->pfn;
> >>>>
> >>>> -		gettimeofday(&new, NULL);
> >>>> -		if (new.tv_sec - last.tv_sec > WAIT_TIME) {
> >>>> -			ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn);
> >>>> -			goto out;
> >>>> -		}
> >>>> +				/*
> >>>> +				 * count how many threads have reached the end.
> >>>> +				 */
> >>>> +				if (temp_pfn >= end_pfn) {
> >>>> +					/*
> >>>> +					 * prevent setting FLAG_UNUSED being optimized.
> >>>> +					 */
> >>>> +					MSG("-");
> >>>
> >>> Why does this line need?
> >>> Please clarify why FLAG_UNUSED is optimized.
> >>>
> >>
> >> I tried but still can't figure out the reason or get better solution.
> >> I will be so happy if you can help me.
> >>
> >> In gdb, I run it by step.
> >> The code "info->page_flag_buf[i]->ready = FLAG_UNUSED;" can never be reached.
> >> If a breakpoint is set here, the code will never be skipped.
> >
> > On my environment, RHEL7.2, gdb stops at this line even if MSG("-") is deleted.
> > See below.
> >
> > ===
> > [root@ark crash]# gdb makedumpfile-code-3d32567394bac85ee3247780bfa41072b0d9bfe9/makedumpfile
> > GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-80.el7
> > Copyright (C) 2013 Free Software Foundation, Inc.
> > License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
> > This is free software: you are free to change and redistribute it.
> > There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
> > and "show warranty" for details.
> > This GDB was configured as "x86_64-redhat-linux-gnu".
> > For bug reporting instructions, please see:
> > <http://www.gnu.org/software/gdb/bugs/>...
> > Reading symbols from /work/crash/makedumpfile-code-3d32567394bac85ee3247780bfa41072b0d9bfe9/makedumpfile...done.
> > (gdb) set args -l -d0 --num-threads=2 ./vmcore ./vmcore.new.2
> > (gdb) l 7780
> > 7775                                    if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
> > 7776                                            continue;
> > 7777                                    temp_pfn = info->page_flag_buf[i]->pfn;
> > 7778
> > 7779                                    /*
> > 7780                                     * count how many threads have reached the end.
> > 7781                                     */
> > 7782                                    if (temp_pfn >= end_pfn) {
> > 7783                                            info->page_flag_buf[i]->ready = FLAG_UNUSED;
> > 7784
> > (gdb) b 7783
> > Breakpoint 1 at 0x433778: file makedumpfile.c, line 7783.
> > (gdb)
> > (gdb) run
> > Starting program: /work/crash/makedumpfile-code-3d32567394bac85ee3247780bfa41072b0d9bfe9/makedumpfile -l -d0
> --num-threads=2 ./vmcore ./vmcore.new.2
> > [Thread debugging using libthread_db enabled]
> > Using host libthread_db library "/lib64/libthread_db.so.1".
> > The kernel version is not supported.
> > The makedumpfile operation may be incomplete.
> > Checking for memory holes          : [100.0 %] |[New Thread 0x7ffff6530700 (LWP 32123)]
> > [New Thread 0x7ffff5d2f700 (LWP 32124)]
> > Copying data                       : [ 98.8 %] |
> > Breakpoint 1, write_kdump_pages_parallel_cyclic (cd_header=<optimized out>,
> >      cd_page=<optimized out>, pd_zero=<optimized out>, offset_data=<optimized out>,
> >      cycle=<optimized out>) at makedumpfile.c:7783
> > 7783                                            info->page_flag_buf[i]->ready = FLAG_UNUSED;
> > Missing separate debuginfos, use: debuginfo-install bzip2-libs-1.0.6-13.el7.x86_64 elfutils-libelf-0.163-3.el7.x86_64
> elfutils-libs-0.163-3.el7.x86_64 glibc-2.17-105.el7.x86_64 lzo-2.06-8.el7.x86_64 xz-libs-5.1.2-12alpha.el7.x86_64
> zlib-1.2.7-15.el7.x86_64
> > (gdb) bt
> > #0  write_kdump_pages_parallel_cyclic (cd_header=<optimized out>, cd_page=<optimized out>,
> >      pd_zero=<optimized out>, offset_data=<optimized out>, cycle=<optimized out>)
> >      at makedumpfile.c:7783
> > #1  0x0000000000439a15 in write_kdump_pages_and_bitmap_cyclic (
> >      cd_header=cd_header@entry=0x7fffffffe2d0, cd_page=cd_page@entry=0x7fffffffe300)
> >      at makedumpfile.c:8501
> > #2  0x000000000043a918 in writeout_dumpfile () at makedumpfile.c:9497
> > #3  0x000000000044181c in create_dumpfile () at makedumpfile.c:9724
> > #4  0x00000000004086df in main (argc=<optimized out>, argv=0x7fffffffe468)
> >      at makedumpfile.c:11159
> > (gdb)
> > ===
> >
> 
> Yes, it won't be skipped every time without MSG("-").
> But it did occur sometime.
> And unluckily, MSG("-") can't do much help either.
> 
> I guess it is the same problem that minfei meets.
> 
> I'm doing some further investigation.
> 
> >>>>
> >>>> -		/*
> >>>> -		 * check pfn first without mutex locked to reduce the time
> >>>> -		 * trying to lock the mutex
> >>>> -		 */
> >>>> -		if (page_data_buf[index].pfn != consuming_pfn)
> >>>> -			continue;
> >>>> +					info->page_flag_buf[i]->ready = FLAG_UNUSED;
> >>>>
> >>>> -		if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
> >>>> -			continue;
> >>>> +					info->current_pfn = end_pfn;
> >>>
> >>> This part doesn't get info->current_pfn_mutex.
> >>> It becomes bigger than end_pfn at the end of producer thread in following case.
> >>>
> >>> ===
> >>>     producer                   Consumer
> >>> ---------------------------------------------------------
> >>>     pthread_mutex_lock()
> >>>     pfn = info->current_pfn
> >>>                                info->current_pfn = end_pfn
> >>>     info->current_pfn++
> >>>       -> end_pfn + 1
> >>>     pthread_mutex_unlock()
> >>> ===
> >>>
> >>> But I know, if this race is happened, processing other producer thread and consumer thread works well
> >>> in current cycle.
> >>> Because each thread checks whether info->current_pfn >= end_pfn.
> >>>
> >>> On the other hand, info->current_pfn is initialized to cycle->start_pfn before pthread_create()
> >>> in write_kdump_pages_parallel_cyclic().
> >>> This means it does not cause a problem in next cycle, too.
> >>>
> >>> In other words, my point is following.
> >>>
> >>>     a) When info->current_pfn changes, you have to get info->current_pfn_mutex.
> >>>     b) If you allow info->current_pfn is bigger than end_pfn at the end of each cycle,
> >>>        "info->current_pfn = end_pfn;" is unnecessary.
> >>>
> >>> Honestly, I don't like approach b).
> >>
> >> You're right. Some thing I thought is wrong.
> >> But I don't like lock if I have other choice.
> >> I will set info->current_pfn before returning.
> >> How about it?
> >
> > If you mean you set info->current_pfn by producer side,
> > this race will occur between producer A and producer B.
> >
> > I think you can't avoid getting mutex lock, if you will change info->current_pfn.
> >
> 
> I mean setting it by consumer.

I'm sorry, I can't imagine your proposal.
What do you change?

Please show me the code.

---
Thanks
Minoru Usui


> --
> Thanks
> Zhou
> 
> > Thanks,
> > Minoru Usui
> >
> >
> >> --
> >> Thanks
> >> Zhou
> >>
> >>>
> >>> Thanks,
> >>> Minoru Usui
> >>>
> >>>> +					end_count++;
> >>>> +					continue;
> >>>> +				}
> >>>> +
> >>>> +				if (current_pfn < temp_pfn)
> >>>> +					continue;
> >>>>
> >>>> -		/* check whether the found one is ready to be consumed */
> >>>> -		if (page_data_buf[index].pfn != consuming_pfn ||
> >>>> -		    page_data_buf[index].ready != 1) {
> >>>> -			goto unlock;
> >>>> +				check_count++;
> >>>> +				consuming = i;
> >>>> +				current_pfn = temp_pfn;
> >>>> +			}
> >>>> +
> >>>> +			/*
> >>>> +			 * If all the threads have reached the end, we will finish writing.
> >>>> +			 */
> >>>> +			if (end_count >= info->num_threads)
> >>>> +				goto finish;
> >>>> +
> >>>> +			/*
> >>>> +			 * Since it has the probabilty that there is no page_flag_buf being ready,
> >>>> +			 * we should recheck if it happens.
> >>>> +			 */
> >>>> +			if (check_count == 0)
> >>>> +				continue;
> >>>> +
> >>>> +			/*
> >>>> +			 * If the page_flag_buf is not ready, the pfn recorded may be changed.
> >>>> +			 * So we should recheck.
> >>>> +			 */
> >>>> +			if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
> >>>> +				gettimeofday(&new, NULL);
> >>>> +				if (new.tv_sec - last.tv_sec > WAIT_TIME) {
> >>>> +					ERRMSG("Can't get data of pfn.\n");
> >>>> +					goto out;
> >>>> +				}
> >>>> +				continue;
> >>>> +			}
> >>>> +
> >>>> +			if (current_pfn == info->page_flag_buf[consuming]->pfn)
> >>>> +				break;
> >>>>    		}
> >>>>
> >>>>    		if ((num_dumped % per) == 0)
> >>>>    			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
> >>>>
> >>>> -		/* next pfn is found, refresh last here */
> >>>> -		last = new;
> >>>> -		consuming_pfn++;
> >>>> -		info->consumed_pfn++;
> >>>> -		page_data_buf[index].ready = 0;
> >>>> -
> >>>> -		if (page_data_buf[index].dumpable == FALSE)
> >>>> -			goto unlock;
> >>>> -
> >>>>    		num_dumped++;
> >>>>
> >>>> -		if (page_data_buf[index].zero == TRUE) {
> >>>> +
> >>>> +		if (info->page_flag_buf[consuming]->zero == TRUE) {
> >>>>    			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
> >>>>    				goto out;
> >>>>    			pfn_zero++;
> >>>>    		} else {
> >>>> +			index = info->page_flag_buf[consuming]->index;
> >>>>    			pd.flags      = page_data_buf[index].flags;
> >>>>    			pd.size       = page_data_buf[index].size;
> >>>>    			pd.page_flags = 0;
> >>>> @@ -7420,12 +7513,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> >>>>    			 */
> >>>>    			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
> >>>>    				goto out;
> >>>> -
> >>>> +			page_data_buf[index].used = FALSE;
> >>>>    		}
> >>>> -unlock:
> >>>> -		pthread_mutex_unlock(&page_data_buf[index].mutex);
> >>>> +		info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
> >>>> +		info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
> >>>>    	}
> >>>> -
> >>>> +finish:
> >>>>    	ret = TRUE;
> >>>>    	/*
> >>>>    	 * print [100 %]
> >>>> @@ -7464,7 +7557,7 @@ out:
> >>>>    	}
> >>>>
> >>>>    	if (page_data_buf != NULL) {
> >>>> -		for (i = 0; i < page_data_num; i++) {
> >>>> +		for (i = 0; i < page_buf_num; i++) {
> >>>>    			pthread_mutex_destroy(&page_data_buf[i].mutex);
> >>>>    		}
> >>>>    	}
> >>>> @@ -7564,6 +7657,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag
> >>>>    		num_dumped++;
> >>>>    		if (!read_pfn(pfn, buf))
> >>>>    			goto out;
> >>>> +
> >>>>    		filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size);
> >>>>
> >>>>    		/*
> >>>> diff --git a/makedumpfile.h b/makedumpfile.h
> >>>> index e0b5bbf..8a9a5b2 100644
> >>>> --- a/makedumpfile.h
> >>>> +++ b/makedumpfile.h
> >>>> @@ -977,7 +977,7 @@ typedef unsigned long long int ulonglong;
> >>>>    #define PAGE_DATA_NUM	(50)
> >>>>    #define WAIT_TIME	(60 * 10)
> >>>>    #define PTHREAD_FAIL	((void *)-2)
> >>>> -#define NUM_BUFFERS	(50)
> >>>> +#define NUM_BUFFERS	(20)
> >>>>
> >>>>    struct mmap_cache {
> >>>>    	char	*mmap_buf;
> >>>> @@ -985,28 +985,36 @@ struct mmap_cache {
> >>>>    	off_t   mmap_end_offset;
> >>>>    };
> >>>>
> >>>> +enum {
> >>>> +	FLAG_UNUSED,
> >>>> +	FLAG_READY,
> >>>> +	FLAG_FILLING
> >>>> +};
> >>>> +struct page_flag {
> >>>> +	mdf_pfn_t pfn;
> >>>> +	char zero;
> >>>> +	char ready;
> >>>> +	short index;
> >>>> +	struct page_flag *next;
> >>>> +};
> >>>> +
> >>>>    struct page_data
> >>>>    {
> >>>> -	mdf_pfn_t pfn;
> >>>> -	int dumpable;
> >>>> -	int zero;
> >>>> -	unsigned int flags;
> >>>> +	pthread_mutex_t mutex;
> >>>>    	long size;
> >>>>    	unsigned char *buf;
> >>>> -	pthread_mutex_t mutex;
> >>>> -	/*
> >>>> -	 * whether the page_data is ready to be consumed
> >>>> -	 */
> >>>> -	int ready;
> >>>> +	int flags;
> >>>> +	int used;
> >>>>    };
> >>>>
> >>>>    struct thread_args {
> >>>>    	int thread_num;
> >>>>    	unsigned long len_buf_out;
> >>>>    	mdf_pfn_t start_pfn, end_pfn;
> >>>> -	int page_data_num;
> >>>> +	int page_buf_num;
> >>>>    	struct cycle *cycle;
> >>>>    	struct page_data *page_data_buf;
> >>>> +	struct page_flag *page_flag_buf;
> >>>>    };
> >>>>
> >>>>    /*
> >>>> @@ -1295,6 +1303,7 @@ struct DumpInfo {
> >>>>    	pthread_t **threads;
> >>>>    	struct thread_args *kdump_thread_args;
> >>>>    	struct page_data *page_data_buf;
> >>>> +	struct page_flag **page_flag_buf;
> >>>>    	pthread_rwlock_t usemmap_rwlock;
> >>>>    	mdf_pfn_t current_pfn;
> >>>>    	pthread_mutex_t current_pfn_mutex;
> >>>> --
> >>>> 1.8.3.1
> >>>>
> >>>>
> >>>>
> >>>>
> >>>> _______________________________________________
> >>>> kexec mailing list
> >>>> kexec@lists.infradead.org
> >>>> http://lists.infradead.org/mailman/listinfo/kexec
> >>
> >>
> >
> >
> >
> 


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-23  5:47     ` "Zhou, Wenjian/周文剑"
@ 2016-02-24  1:43       ` Minfei Huang
  2016-02-24  2:20         ` "Zhou, Wenjian/周文剑"
  0 siblings, 1 reply; 26+ messages in thread
From: Minfei Huang @ 2016-02-24  1:43 UTC (permalink / raw)
  To: "Zhou, Wenjian/周文剑"; +Cc: kexec

On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
> Hello, Minfei,
> 
> Does it occur every time?
> If not, I think I have known the reason.

Hi, Wenjian.

This patch is applied directly on version 1.5.9. And makedumpfile hangs
if option num-thread is appended.

Thanks
Minfei

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-24  1:43       ` Minfei Huang
@ 2016-02-24  2:20         ` "Zhou, Wenjian/周文剑"
  2016-02-24  2:24           ` Minfei Huang
  0 siblings, 1 reply; 26+ messages in thread
From: "Zhou, Wenjian/周文剑" @ 2016-02-24  2:20 UTC (permalink / raw)
  To: Minfei Huang; +Cc: kexec

Hi,

On 02/24/2016 09:43 AM, Minfei Huang wrote:
> On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
>> Hello, Minfei,
>>
>> Does it occur every time?
>> If not, I think I have known the reason.
>
> Hi, Wenjian.
>
> This patch is applied directly on version 1.5.9. And makedumpfile hangs
> if option num-thread is appended.
>

I see.
I'm working on it.

BTW, did you only test it with --num-threads 128?
How is it with --num-threads 32(or smaller value)?

-- 
Thanks
Zhou



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-24  2:20         ` "Zhou, Wenjian/周文剑"
@ 2016-02-24  2:24           ` Minfei Huang
  2016-03-01  6:59             ` "Zhou, Wenjian/周文剑"
  2016-03-01  7:20             ` "Zhou, Wenjian/周文剑"
  0 siblings, 2 replies; 26+ messages in thread
From: Minfei Huang @ 2016-02-24  2:24 UTC (permalink / raw)
  To: "Zhou, Wenjian/周文剑"; +Cc: kexec


> On Feb 24, 2016, at 10:20, Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> wrote:
> 
> Hi,
> 
> On 02/24/2016 09:43 AM, Minfei Huang wrote:
>> On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
>>> Hello, Minfei,
>>> 
>>> Does it occur every time?
>>> If not, I think I have known the reason.
>> 
>> Hi, Wenjian.
>> 
>> This patch is applied directly on version 1.5.9. And makedumpfile hangs
>> if option num-thread is appended.
>> 
> 
> I see.
> I'm working on it.
> 
> BTW, did you only test it with --num-threads 128?
> How is it with --num-threads 32(or smaller value)?

Yes. I have tested with num-thread 1, 2, 8 and 32. Except for —num-threads 1,
all of the test fail.

Thanks
Minfei
> 


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-17  7:05 [PATCH v2] Improve the performance of --num-threads -d 31 Zhou Wenjian
  2016-02-22 16:58 ` Minfei Huang
  2016-02-23  1:32 ` Minoru Usui
@ 2016-02-24  8:13 ` Minoru Usui
  2016-03-01  7:34   ` "Zhou, Wenjian/周文?"
  2 siblings, 1 reply; 26+ messages in thread
From: Minoru Usui @ 2016-02-24  8:13 UTC (permalink / raw)
  To: Zhou Wenjian, kexec

Hi, Zhou

I have one more comment.
I found unused variables.

> -----Original Message-----
> From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of Zhou Wenjian
> Sent: Wednesday, February 17, 2016 4:06 PM
> To: kexec@lists.infradead.org
> Subject: [PATCH v2] Improve the performance of --num-threads -d 31
> 
> v2:
>         1. address Minoru Usui's comments about magic number and error message
>         2. fix a bug in cyclic mode caused by optimizing
> 
> v1:
>         1. change page_flag.ready's value to enum
>         2. change the patch description
>         3. cleanup some codes
>         4. fix a bug in cyclic mode
> 
> multi-threads implementation will introduce extra cost when handling
> each page. The origin implementation will also do the extra work for
> filtered pages. So there is a big performance degradation in
> --num-threads -d 31.
> The new implementation won't do the extra work for filtered pages any
> more. So the performance of -d 31 is close to that of serial processing.
> 
> The new implementation is just like the following:
>         * The basic idea is producer producing page and consumer writing page.
>         * Each producer have a page_flag_buf list which is used for storing
>           page's description.
>         * The size of page_flag_buf is little so it won't take too much memory.
>         * And all producers will share a page_data_buf array which is
>           used for storing page's compressed data.
>         * The main thread is the consumer. It will find the next pfn and write
>           it into file.
>         * The next pfn is smallest pfn in all page_flag_buf.
> 
> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com>
> ---
>  makedumpfile.c | 266 ++++++++++++++++++++++++++++++++++++++-------------------
>  makedumpfile.h |  31 ++++---
>  2 files changed, 200 insertions(+), 97 deletions(-)
> 
> diff --git a/makedumpfile.c b/makedumpfile.c
> index fa0b779..31329b5 100644
> --- a/makedumpfile.c
> +++ b/makedumpfile.c
> @@ -3483,7 +3483,8 @@ initial_for_parallel()
>  	unsigned long page_data_buf_size;
>  	unsigned long limit_size;
>  	int page_data_num;
> -	int i;
> +	struct page_flag *current;
> +	int i, j;
> 
>  	len_buf_out = calculate_len_buf_out(info->page_size);
> 
> @@ -3562,8 +3563,10 @@ initial_for_parallel()
>  		      - MAP_REGION * info->num_threads) * 0.6;
> 
>  	page_data_num = limit_size / page_data_buf_size;
> +	info->num_buffers = 3 * info->num_threads;
> 
> -	info->num_buffers = MIN(NUM_BUFFERS, page_data_num);
> +	info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS);
> +	info->num_buffers = MIN(info->num_buffers, page_data_num);
> 
>  	DEBUG_MSG("Number of struct page_data for produce/consume: %d\n",
>  			info->num_buffers);
> @@ -3588,6 +3591,36 @@ initial_for_parallel()
>  	}
> 
>  	/*
> +	 * initial page_flag for each thread
> +	 */
> +	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
> +	    == NULL) {
> +		MSG("Can't allocate memory for page_flag_buf. %s\n",
> +				strerror(errno));
> +		return FALSE;
> +	}
> +	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
> +
> +	for (i = 0; i < info->num_threads; i++) {
> +		if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
> +			MSG("Can't allocate memory for page_flag. %s\n",
> +				strerror(errno));
> +			return FALSE;
> +		}
> +		current = info->page_flag_buf[i];
> +
> +		for (j = 1; j < NUM_BUFFERS; j++) {
> +			if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) {
> +				MSG("Can't allocate memory for page_flag. %s\n",
> +					strerror(errno));
> +				return FALSE;
> +			}
> +			current = current->next;
> +		}
> +		current->next = info->page_flag_buf[i];
> +	}
> +
> +	/*
>  	 * initial fd_memory for threads
>  	 */
>  	for (i = 0; i < info->num_threads; i++) {
> @@ -3612,7 +3645,8 @@ initial_for_parallel()
>  void
>  free_for_parallel()
>  {
> -	int i;
> +	int i, j;
> +	struct page_flag *current;
> 
>  	if (info->threads != NULL) {
>  		for (i = 0; i < info->num_threads; i++) {
> @@ -3655,6 +3689,19 @@ free_for_parallel()
>  		free(info->page_data_buf);
>  	}
> 
> +	if (info->page_flag_buf != NULL) {
> +		for (i = 0; i < info->num_threads; i++) {
> +			for (j = 0; j < NUM_BUFFERS; j++) {
> +				if (info->page_flag_buf[i] != NULL) {
> +					current = info->page_flag_buf[i];
> +					info->page_flag_buf[i] = current->next;
> +					free(current);
> +				}
> +			}
> +		}
> +		free(info->page_flag_buf);
> +	}
> +
>  	if (info->parallel_info == NULL)
>  		return;
> 
> @@ -7076,10 +7123,10 @@ kdump_thread_function_cyclic(void *arg) {
>  	void *retval = PTHREAD_FAIL;
>  	struct thread_args *kdump_thread_args = (struct thread_args *)arg;
>  	struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
> +	struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
>  	struct cycle *cycle = kdump_thread_args->cycle;
> -	int page_data_num = kdump_thread_args->page_data_num;
> -	mdf_pfn_t pfn;
> -	int index;
> +	mdf_pfn_t pfn = cycle->start_pfn;
> +	int index = kdump_thread_args->thread_num;
>  	int buf_ready;
>  	int dumpable;
>  	int fd_memory = 0;
> @@ -7125,47 +7172,46 @@ kdump_thread_function_cyclic(void *arg) {
>  						kdump_thread_args->thread_num);
>  	}
> 
> -	while (1) {
> -		/* get next pfn */
> -		pthread_mutex_lock(&info->current_pfn_mutex);
> -		pfn = info->current_pfn;
> -		info->current_pfn++;
> -		pthread_mutex_unlock(&info->current_pfn_mutex);
> +	/*
> +	 * filtered page won't take anything
> +	 * unfiltered zero page will only take a page_flag_buf
> +	 * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
> +	 */
> +	while (pfn < kdump_thread_args->end_pfn) {
> +		buf_ready = FALSE;
> 
> -		if (pfn >= kdump_thread_args->end_pfn)
> -			break;
> +		while (page_data_buf[index].used != FALSE ||
> +				pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
> +			index = (index + 1) % info->num_buffers;
> 
> -		index = -1;
> -		buf_ready = FALSE;
> +		page_data_buf[index].used = TRUE;
> 
>  		while (buf_ready == FALSE) {
>  			pthread_testcancel();
> -
> -			index = pfn % page_data_num;
> -
> -			if (pfn - info->consumed_pfn > info->num_buffers)
> -				continue;
> -
> -			if (page_data_buf[index].ready != 0)
> +			if (page_flag_buf->ready == FLAG_READY)
>  				continue;
> 
> -			pthread_mutex_lock(&page_data_buf[index].mutex);
> -
> -			if (page_data_buf[index].ready != 0)
> -				goto unlock;
> +			/* get next pfn */
> +			pthread_mutex_lock(&info->current_pfn_mutex);
> +			pfn = info->current_pfn;
> +			info->current_pfn++;
> +			page_flag_buf->ready = FLAG_FILLING;
> +			pthread_mutex_unlock(&info->current_pfn_mutex);
> 
> -			buf_ready = TRUE;
> +			page_flag_buf->pfn = pfn;
> 
> -			page_data_buf[index].pfn = pfn;
> -			page_data_buf[index].ready = 1;
> +			if (pfn >= kdump_thread_args->end_pfn) {
> +				page_data_buf[index].used = FALSE;
> +				page_flag_buf->ready = FLAG_READY;
> +				break;
> +			}
> 
>  			dumpable = is_dumpable(
>  				info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
>  				pfn,
>  				cycle);
> -			page_data_buf[index].dumpable = dumpable;
>  			if (!dumpable)
> -				goto unlock;
> +				continue;
> 
>  			if (!read_pfn_parallel(fd_memory, pfn, buf,
>  					       &bitmap_memory_parallel,
> @@ -7178,11 +7224,11 @@ kdump_thread_function_cyclic(void *arg) {
> 
>  			if ((info->dump_level & DL_EXCLUDE_ZERO)
>  			    && is_zero_page(buf, info->page_size)) {
> -				page_data_buf[index].zero = TRUE;
> -				goto unlock;
> +				page_flag_buf->zero = TRUE;
> +				goto next;
>  			}
> 
> -			page_data_buf[index].zero = FALSE;
> +			page_flag_buf->zero = FALSE;
> 
>  			/*
>  			 * Compress the page data.
> @@ -7232,12 +7278,16 @@ kdump_thread_function_cyclic(void *arg) {
>  				page_data_buf[index].size  = info->page_size;
>  				memcpy(page_data_buf[index].buf, buf, info->page_size);
>  			}
> -unlock:
> -			pthread_mutex_unlock(&page_data_buf[index].mutex);
> +			page_flag_buf->index = index;
> +			buf_ready = TRUE;
> +next:
> +			page_flag_buf->ready = FLAG_READY;
> +			page_flag_buf = page_flag_buf->next;
> 
>  		}
> -	}
> 
> +		pthread_mutex_unlock(&page_data_buf[index].mutex);
> +	}
>  	retval = NULL;
> 
>  fail:
> @@ -7265,14 +7315,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  	struct page_desc pd;
>  	struct timeval tv_start;
>  	struct timeval last, new;
> -	unsigned long long consuming_pfn;
>  	pthread_t **threads = NULL;
>  	struct thread_args *kdump_thread_args = NULL;
>  	void *thread_result;
> -	int page_data_num;
> +	int page_buf_num;
>  	struct page_data *page_data_buf = NULL;
>  	int i;
>  	int index;
> +	int end_count, consuming, check_count;
> +	mdf_pfn_t current_pfn, temp_pfn;
> 
>  	if (info->flag_elf_dumpfile)
>  		return FALSE;
> @@ -7319,16 +7370,11 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  	threads = info->threads;
>  	kdump_thread_args = info->kdump_thread_args;
> 
> -	page_data_num = info->num_buffers;
> +	page_buf_num = info->num_buffers;
>  	page_data_buf = info->page_data_buf;
> 
> -	for (i = 0; i < page_data_num; i++) {
> -		/*
> -		 * producer will use pfn in page_data_buf to decide the
> -		 * consumed pfn
> -		 */
> -		page_data_buf[i].pfn = start_pfn - 1;
> -		page_data_buf[i].ready = 0;
> +	for (i = 0; i < page_buf_num; i++) {
> +		page_data_buf[i].used = FALSE;
>  		res = pthread_mutex_init(&page_data_buf[i].mutex, NULL);
>  		if (res != 0) {
>  			ERRMSG("Can't initialize mutex of page_data_buf. %s\n",
> @@ -7342,8 +7388,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  		kdump_thread_args[i].len_buf_out = len_buf_out;
>  		kdump_thread_args[i].start_pfn = start_pfn;
>  		kdump_thread_args[i].end_pfn = end_pfn;
> -		kdump_thread_args[i].page_data_num = page_data_num;
> +		kdump_thread_args[i].page_buf_num = page_buf_num;
>  		kdump_thread_args[i].page_data_buf = page_data_buf;
> +		kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
>  		kdump_thread_args[i].cycle = cycle;

Main thread passes to start_pfn and end_pfn, but it also passes cycle.
start_pfn and end_pfn equal to cycle->start_pfn and cycle->end_pfn,
so start_pfn and end_pfn members should be removed.

And also, it passes page_buf_num but it isn't used by producer.

>  		res = pthread_create(threads[i], NULL,
> @@ -7356,55 +7403,101 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  		}
>  	}
> 
> -	consuming_pfn = start_pfn;
> -	index = -1;
> +	end_count = 0;
> +	while (1) {
> +		consuming = 0;
> +		check_count = 0;
> 
> -	gettimeofday(&last, NULL);
> +		/*
> +		 * The basic idea is producer producing page and consumer writing page.
> +		 * Each producer have a page_flag_buf list which is used for storing page's description.
> +		 * The size of page_flag_buf is little so it won't take too much memory.
> +		 * And all producers will share a page_data_buf array which is used for storing page's compressed data.
> +		 * The main thread is the consumer. It will find the next pfn and write it into file.
> +		 * The next pfn is smallest pfn in all page_flag_buf.
> +		 */
> +		gettimeofday(&last, NULL);
> +		while (1) {
> +			current_pfn = end_pfn;
> 
> -	while (consuming_pfn < end_pfn) {
> -		index = consuming_pfn % page_data_num;
> +			/*
> +			 * page_flag_buf is in circular linked list.
> +			 * The array info->page_flag_buf[] records the current page_flag_buf in each thread's
> +			 * page_flag_buf list.
> +			 * consuming is used for recording in which thread the pfn is the smallest.
> +			 * current_pfn is used for recording the value of pfn when checking the pfn.
> +			 */
> +			for (i = 0; i < info->num_threads; i++) {
> +				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
> +					continue;
> +				temp_pfn = info->page_flag_buf[i]->pfn;
> 
> -		gettimeofday(&new, NULL);
> -		if (new.tv_sec - last.tv_sec > WAIT_TIME) {
> -			ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn);
> -			goto out;
> -		}
> +				/*
> +				 * count how many threads have reached the end.
> +				 */
> +				if (temp_pfn >= end_pfn) {
> +					/*
> +					 * prevent setting FLAG_UNUSED being optimized.
> +					 */
> +					MSG("-");
> 
> -		/*
> -		 * check pfn first without mutex locked to reduce the time
> -		 * trying to lock the mutex
> -		 */
> -		if (page_data_buf[index].pfn != consuming_pfn)
> -			continue;
> +					info->page_flag_buf[i]->ready = FLAG_UNUSED;
> 
> -		if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
> -			continue;
> +					info->current_pfn = end_pfn;
> +					end_count++;
> +					continue;
> +				}
> +
> +				if (current_pfn < temp_pfn)
> +					continue;
> 
> -		/* check whether the found one is ready to be consumed */
> -		if (page_data_buf[index].pfn != consuming_pfn ||
> -		    page_data_buf[index].ready != 1) {
> -			goto unlock;
> +				check_count++;
> +				consuming = i;
> +				current_pfn = temp_pfn;
> +			}
> +
> +			/*
> +			 * If all the threads have reached the end, we will finish writing.
> +			 */
> +			if (end_count >= info->num_threads)
> +				goto finish;
> +
> +			/*
> +			 * Since it has the probabilty that there is no page_flag_buf being ready,
> +			 * we should recheck if it happens.
> +			 */
> +			if (check_count == 0)
> +				continue;
> +
> +			/*
> +			 * If the page_flag_buf is not ready, the pfn recorded may be changed.
> +			 * So we should recheck.
> +			 */
> +			if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
> +				gettimeofday(&new, NULL);
> +				if (new.tv_sec - last.tv_sec > WAIT_TIME) {
> +					ERRMSG("Can't get data of pfn.\n");
> +					goto out;
> +				}
> +				continue;
> +			}
> +
> +			if (current_pfn == info->page_flag_buf[consuming]->pfn)
> +				break;
>  		}
> 
>  		if ((num_dumped % per) == 0)
>  			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
> 
> -		/* next pfn is found, refresh last here */
> -		last = new;
> -		consuming_pfn++;
> -		info->consumed_pfn++;

info->consumed_pfn is no longer used by anywhere, 
so consumed_pfn and consumed_pfn_mutex member should be removed.

---
Thanks,
Minoru Usui

> -		page_data_buf[index].ready = 0;
> -
> -		if (page_data_buf[index].dumpable == FALSE)
> -			goto unlock;
> -
>  		num_dumped++;
> 
> -		if (page_data_buf[index].zero == TRUE) {
> +
> +		if (info->page_flag_buf[consuming]->zero == TRUE) {
>  			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
>  				goto out;
>  			pfn_zero++;
>  		} else {
> +			index = info->page_flag_buf[consuming]->index;
>  			pd.flags      = page_data_buf[index].flags;
>  			pd.size       = page_data_buf[index].size;
>  			pd.page_flags = 0;
> @@ -7420,12 +7513,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  			 */
>  			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
>  				goto out;
> -
> +			page_data_buf[index].used = FALSE;
>  		}
> -unlock:
> -		pthread_mutex_unlock(&page_data_buf[index].mutex);
> +		info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
> +		info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
>  	}
> -
> +finish:
>  	ret = TRUE;
>  	/*
>  	 * print [100 %]
> @@ -7464,7 +7557,7 @@ out:
>  	}
> 
>  	if (page_data_buf != NULL) {
> -		for (i = 0; i < page_data_num; i++) {
> +		for (i = 0; i < page_buf_num; i++) {
>  			pthread_mutex_destroy(&page_data_buf[i].mutex);
>  		}
>  	}
> @@ -7564,6 +7657,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag
>  		num_dumped++;
>  		if (!read_pfn(pfn, buf))
>  			goto out;
> +
>  		filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size);
> 
>  		/*
> diff --git a/makedumpfile.h b/makedumpfile.h
> index e0b5bbf..8a9a5b2 100644
> --- a/makedumpfile.h
> +++ b/makedumpfile.h
> @@ -977,7 +977,7 @@ typedef unsigned long long int ulonglong;
>  #define PAGE_DATA_NUM	(50)
>  #define WAIT_TIME	(60 * 10)
>  #define PTHREAD_FAIL	((void *)-2)
> -#define NUM_BUFFERS	(50)
> +#define NUM_BUFFERS	(20)
> 
>  struct mmap_cache {
>  	char	*mmap_buf;
> @@ -985,28 +985,36 @@ struct mmap_cache {
>  	off_t   mmap_end_offset;
>  };
> 
> +enum {
> +	FLAG_UNUSED,
> +	FLAG_READY,
> +	FLAG_FILLING
> +};
> +struct page_flag {
> +	mdf_pfn_t pfn;
> +	char zero;
> +	char ready;
> +	short index;
> +	struct page_flag *next;
> +};
> +
>  struct page_data
>  {
> -	mdf_pfn_t pfn;
> -	int dumpable;
> -	int zero;
> -	unsigned int flags;
> +	pthread_mutex_t mutex;
>  	long size;
>  	unsigned char *buf;
> -	pthread_mutex_t mutex;
> -	/*
> -	 * whether the page_data is ready to be consumed
> -	 */
> -	int ready;
> +	int flags;
> +	int used;
>  };
> 
>  struct thread_args {
>  	int thread_num;
>  	unsigned long len_buf_out;
>  	mdf_pfn_t start_pfn, end_pfn;
> -	int page_data_num;
> +	int page_buf_num;
>  	struct cycle *cycle;
>  	struct page_data *page_data_buf;
> +	struct page_flag *page_flag_buf;
>  };
> 
>  /*
> @@ -1295,6 +1303,7 @@ struct DumpInfo {
>  	pthread_t **threads;
>  	struct thread_args *kdump_thread_args;
>  	struct page_data *page_data_buf;
> +	struct page_flag **page_flag_buf;
>  	pthread_rwlock_t usemmap_rwlock;
>  	mdf_pfn_t current_pfn;
>  	pthread_mutex_t current_pfn_mutex;
> --
> 1.8.3.1
> 
> 
> 
> 
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-24  2:24           ` Minfei Huang
@ 2016-03-01  6:59             ` "Zhou, Wenjian/周文剑"
  2016-03-01  8:16               ` Minfei Huang
  2016-03-01  7:20             ` "Zhou, Wenjian/周文剑"
  1 sibling, 1 reply; 26+ messages in thread
From: "Zhou, Wenjian/周文剑" @ 2016-03-01  6:59 UTC (permalink / raw)
  To: Minfei Huang; +Cc: kexec

[-- Attachment #1: Type: text/plain, Size: 1057 bytes --]

Hi Minfei,

Could you help me test the patch in the attachment?

I have run it a thousand times and haven't got a failure.
Previously, I can always get a failure after running hundreds of times.

But I'm not sure if it is the same problem you have met.

-- 
Thanks
Zhou

On 02/24/2016 10:24 AM, Minfei Huang wrote:
>
>> On Feb 24, 2016, at 10:20, Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> wrote:
>>
>> Hi,
>>
>> On 02/24/2016 09:43 AM, Minfei Huang wrote:
>>> On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
>>>> Hello, Minfei,
>>>>
>>>> Does it occur every time?
>>>> If not, I think I have known the reason.
>>>
>>> Hi, Wenjian.
>>>
>>> This patch is applied directly on version 1.5.9. And makedumpfile hangs
>>> if option num-thread is appended.
>>>
>>
>> I see.
>> I'm working on it.
>>
>> BTW, did you only test it with --num-threads 128?
>> How is it with --num-threads 32(or smaller value)?
>
> Yes. I have tested with num-thread 1, 2, 8 and 32. Except for —num-threads 1,
> all of the test fail.
>
> Thanks
> Minfei
>>
>
>
>



[-- Attachment #2: 0001-improve-the-performance.patch --]
[-- Type: text/x-patch, Size: 15289 bytes --]

From 42239fea17e5b4b19d9a9210bb335e5984b32aea Mon Sep 17 00:00:00 2001
From: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com>
Date: Tue, 1 Mar 2016 13:35:00 +0800
Subject: [PATCH] improve the performance

---
 makedumpfile.c | 271 ++++++++++++++++++++++++++++++++++++++-------------------
 makedumpfile.h |  36 +++++---
 2 files changed, 207 insertions(+), 100 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index fa0b779..c437f77 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -3483,7 +3483,8 @@ initial_for_parallel()
 	unsigned long page_data_buf_size;
 	unsigned long limit_size;
 	int page_data_num;
-	int i;
+	struct page_flag *current;
+	int i, j;
 
 	len_buf_out = calculate_len_buf_out(info->page_size);
 
@@ -3562,8 +3563,10 @@ initial_for_parallel()
 		      - MAP_REGION * info->num_threads) * 0.6;
 
 	page_data_num = limit_size / page_data_buf_size;
+	info->num_buffers = 3 * info->num_threads;
 
-	info->num_buffers = MIN(NUM_BUFFERS, page_data_num);
+	info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS);
+	info->num_buffers = MIN(info->num_buffers, page_data_num);
 
 	DEBUG_MSG("Number of struct page_data for produce/consume: %d\n",
 			info->num_buffers);
@@ -3588,6 +3591,36 @@ initial_for_parallel()
 	}
 
 	/*
+	 * initial page_flag for each thread
+	 */
+	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
+	    == NULL) {
+		MSG("Can't allocate memory for page_flag_buf. %s\n",
+				strerror(errno));
+		return FALSE;
+	}
+	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
+
+	for (i = 0; i < info->num_threads; i++) {
+		if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
+			MSG("Can't allocate memory for page_flag. %s\n",
+				strerror(errno));
+			return FALSE;
+		}
+		current = info->page_flag_buf[i];
+
+		for (j = 1; j < NUM_BUFFERS; j++) {
+			if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) {
+				MSG("Can't allocate memory for page_flag. %s\n",
+					strerror(errno));
+				return FALSE;
+			}
+			current = current->next;
+		}
+		current->next = info->page_flag_buf[i];
+	}
+
+	/*
 	 * initial fd_memory for threads
 	 */
 	for (i = 0; i < info->num_threads; i++) {
@@ -3612,7 +3645,8 @@ initial_for_parallel()
 void
 free_for_parallel()
 {
-	int i;
+	int i, j;
+	struct page_flag *current;
 
 	if (info->threads != NULL) {
 		for (i = 0; i < info->num_threads; i++) {
@@ -3655,6 +3689,19 @@ free_for_parallel()
 		free(info->page_data_buf);
 	}
 
+	if (info->page_flag_buf != NULL) {
+		for (i = 0; i < info->num_threads; i++) {
+			for (j = 0; j < NUM_BUFFERS; j++) {
+				if (info->page_flag_buf[i] != NULL) {
+					current = info->page_flag_buf[i];
+					info->page_flag_buf[i] = current->next;
+					free(current);
+				}
+			}
+		}
+		free(info->page_flag_buf);
+	}
+
 	if (info->parallel_info == NULL)
 		return;
 
@@ -7075,11 +7122,11 @@ void *
 kdump_thread_function_cyclic(void *arg) {
 	void *retval = PTHREAD_FAIL;
 	struct thread_args *kdump_thread_args = (struct thread_args *)arg;
-	struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
+	struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
+	struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
 	struct cycle *cycle = kdump_thread_args->cycle;
-	int page_data_num = kdump_thread_args->page_data_num;
-	mdf_pfn_t pfn;
-	int index;
+	mdf_pfn_t pfn = cycle->start_pfn;
+	int index = kdump_thread_args->thread_num;
 	int buf_ready;
 	int dumpable;
 	int fd_memory = 0;
@@ -7125,47 +7172,46 @@ kdump_thread_function_cyclic(void *arg) {
 						kdump_thread_args->thread_num);
 	}
 
-	while (1) {
-		/* get next pfn */
-		pthread_mutex_lock(&info->current_pfn_mutex);
-		pfn = info->current_pfn;
-		info->current_pfn++;
-		pthread_mutex_unlock(&info->current_pfn_mutex);
-
-		if (pfn >= kdump_thread_args->end_pfn)
-			break;
-
-		index = -1;
+	/*
+	 * filtered page won't take anything
+	 * unfiltered zero page will only take a page_flag_buf
+	 * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
+	 */
+	while (pfn < kdump_thread_args->end_pfn) {
 		buf_ready = FALSE;
 
+		pthread_mutex_lock(&info->page_data_mutex);
+		while (page_data_buf[index].used != FALSE) {
+			index = (index + 1) % info->num_buffers;
+		}
+		page_data_buf[index].used = TRUE;
+		pthread_mutex_unlock(&info->page_data_mutex);
+
 		while (buf_ready == FALSE) {
 			pthread_testcancel();
-
-			index = pfn % page_data_num;
-
-			if (pfn - info->consumed_pfn > info->num_buffers)
-				continue;
-
-			if (page_data_buf[index].ready != 0)
+			if (page_flag_buf->ready == FLAG_READY)
 				continue;
 
-			pthread_mutex_lock(&page_data_buf[index].mutex);
+			/* get next pfn */
+			pthread_mutex_lock(&info->current_pfn_mutex);
+			pfn = info->current_pfn;
+			info->current_pfn++;
+			page_flag_buf->ready = FLAG_FILLING;
+			pthread_mutex_unlock(&info->current_pfn_mutex);
 
-			if (page_data_buf[index].ready != 0)
-				goto unlock;
+			page_flag_buf->pfn = pfn;
 
-			buf_ready = TRUE;
-
-			page_data_buf[index].pfn = pfn;
-			page_data_buf[index].ready = 1;
+			if (pfn >= kdump_thread_args->end_pfn) {
+				page_data_buf[index].used = FALSE;
+				break;
+			}
 
 			dumpable = is_dumpable(
 				info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
 				pfn,
 				cycle);
-			page_data_buf[index].dumpable = dumpable;
 			if (!dumpable)
-				goto unlock;
+				continue;
 
 			if (!read_pfn_parallel(fd_memory, pfn, buf,
 					       &bitmap_memory_parallel,
@@ -7178,11 +7224,11 @@ kdump_thread_function_cyclic(void *arg) {
 
 			if ((info->dump_level & DL_EXCLUDE_ZERO)
 			    && is_zero_page(buf, info->page_size)) {
-				page_data_buf[index].zero = TRUE;
-				goto unlock;
+				page_flag_buf->zero = TRUE;
+				goto next;
 			}
 
-			page_data_buf[index].zero = FALSE;
+			page_flag_buf->zero = FALSE;
 
 			/*
 			 * Compress the page data.
@@ -7210,6 +7256,8 @@ kdump_thread_function_cyclic(void *arg) {
 				page_data_buf[index].flags =
 							DUMP_DH_COMPRESSED_LZO;
 				page_data_buf[index].size  = size_out;
+			page_data_buf[index].pre=page_data_buf[index].pfn;
+			page_data_buf[index].pfn=pfn;
 				memcpy(page_data_buf[index].buf, buf_out, size_out);
 #endif
 #ifdef USESNAPPY
@@ -7230,14 +7278,20 @@ kdump_thread_function_cyclic(void *arg) {
 			} else {
 				page_data_buf[index].flags = 0;
 				page_data_buf[index].size  = info->page_size;
+			page_data_buf[index].pre=page_data_buf[index].pfn;
+			page_data_buf[index].pfn=pfn;
 				memcpy(page_data_buf[index].buf, buf, info->page_size);
 			}
-unlock:
-			pthread_mutex_unlock(&page_data_buf[index].mutex);
+			page_flag_buf->index = index;
+			buf_ready = TRUE;
+next:
+			page_flag_buf->ready = FLAG_READY;
+			page_flag_buf = page_flag_buf->next;
 
 		}
-	}
 
+		pthread_mutex_unlock(&page_data_buf[index].mutex);
+	}
 	retval = NULL;
 
 fail:
@@ -7265,14 +7319,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 	struct page_desc pd;
 	struct timeval tv_start;
 	struct timeval last, new;
-	unsigned long long consuming_pfn;
 	pthread_t **threads = NULL;
 	struct thread_args *kdump_thread_args = NULL;
 	void *thread_result;
-	int page_data_num;
+	int page_buf_num;
 	struct page_data *page_data_buf = NULL;
 	int i;
 	int index;
+	int end_count, consuming, check_count;
+	mdf_pfn_t current_pfn, temp_pfn;
 
 	if (info->flag_elf_dumpfile)
 		return FALSE;
@@ -7319,16 +7374,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 	threads = info->threads;
 	kdump_thread_args = info->kdump_thread_args;
 
-	page_data_num = info->num_buffers;
+	page_buf_num = info->num_buffers;
 	page_data_buf = info->page_data_buf;
+	pthread_mutex_init(&info->page_data_mutex, NULL);
 
-	for (i = 0; i < page_data_num; i++) {
-		/*
-		 * producer will use pfn in page_data_buf to decide the
-		 * consumed pfn
-		 */
-		page_data_buf[i].pfn = start_pfn - 1;
-		page_data_buf[i].ready = 0;
+	for (i = 0; i < page_buf_num; i++) {
+		page_data_buf[i].used = FALSE;
 		res = pthread_mutex_init(&page_data_buf[i].mutex, NULL);
 		if (res != 0) {
 			ERRMSG("Can't initialize mutex of page_data_buf. %s\n",
@@ -7342,8 +7393,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 		kdump_thread_args[i].len_buf_out = len_buf_out;
 		kdump_thread_args[i].start_pfn = start_pfn;
 		kdump_thread_args[i].end_pfn = end_pfn;
-		kdump_thread_args[i].page_data_num = page_data_num;
+		kdump_thread_args[i].page_buf_num = page_buf_num;
 		kdump_thread_args[i].page_data_buf = page_data_buf;
+		kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
 		kdump_thread_args[i].cycle = cycle;
 
 		res = pthread_create(threads[i], NULL,
@@ -7356,60 +7408,102 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 		}
 	}
 
-	consuming_pfn = start_pfn;
-	index = -1;
+	end_count = 0;
+	while (1) {
+		consuming = 0;
+		check_count = 0;
 
-	gettimeofday(&last, NULL);
+		/*
+		 * The basic idea is producer producing page and consumer writing page.
+		 * Each producer have a page_flag_buf list which is used for storing page's description.
+		 * The size of page_flag_buf is little so it won't take too much memory.
+		 * And all producers will share a page_data_buf array which is used for storing page's compressed data.
+		 * The main thread is the consumer. It will find the next pfn and write it into file.
+		 * The next pfn is smallest pfn in all page_flag_buf.
+		 */
+		gettimeofday(&last, NULL);
+		while (1) {
+			current_pfn = end_pfn;
 
-	while (consuming_pfn < end_pfn) {
-		index = consuming_pfn % page_data_num;
+			/*
+			 * page_flag_buf is in circular linked list.
+			 * The array info->page_flag_buf[] records the current page_flag_buf in each thread's
+			 * page_flag_buf list.
+			 * consuming is used for recording in which thread the pfn is the smallest.
+			 * current_pfn is used for recording the value of pfn when checking the pfn.
+			 */
+			for (i = 0; i < info->num_threads; i++) {
+				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
+					continue;
+				temp_pfn = info->page_flag_buf[i]->pfn;
 
-		gettimeofday(&new, NULL);
-		if (new.tv_sec - last.tv_sec > WAIT_TIME) {
-			ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn);
-			goto out;
-		}
+				/*
+				 * count how many threads have reached the end.
+				 */
+				if (temp_pfn >= end_pfn) {
+					info->page_flag_buf[i]->ready = FLAG_UNUSED;
 
-		/*
-		 * check pfn first without mutex locked to reduce the time
-		 * trying to lock the mutex
-		 */
-		if (page_data_buf[index].pfn != consuming_pfn)
-			continue;
+					info->current_pfn = end_pfn;
+					end_count++;
+					continue;
+				}
 
-		if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
-			continue;
+				if (current_pfn < temp_pfn)
+					continue;
 
-		/* check whether the found one is ready to be consumed */
-		if (page_data_buf[index].pfn != consuming_pfn ||
-		    page_data_buf[index].ready != 1) {
-			goto unlock;
+				check_count++;
+				consuming = i;
+				current_pfn = temp_pfn;
+			}
+
+			/*
+			 * If all the threads have reached the end, we will finish writing.
+			 */
+			if (end_count >= info->num_threads)
+				goto finish;
+
+			/*
+			 * Since it has the probabilty that there is no page_flag_buf being ready,
+			 * we should recheck if it happens.
+			 */
+			if (check_count == 0)
+				continue;
+
+			/*
+			 * If the page_flag_buf is not ready, the pfn recorded may be changed.
+			 * So we should recheck.
+			 */
+			if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
+				gettimeofday(&new, NULL);
+				if (new.tv_sec - last.tv_sec > WAIT_TIME) {
+					ERRMSG("Can't get data of pfn.\n");
+					goto out;
+				}
+				continue;
+			}
+
+			if (current_pfn == info->page_flag_buf[consuming]->pfn)
+				break;
 		}
 
 		if ((num_dumped % per) == 0)
 			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
 
-		/* next pfn is found, refresh last here */
-		last = new;
-		consuming_pfn++;
-		info->consumed_pfn++;
-		page_data_buf[index].ready = 0;
-
-		if (page_data_buf[index].dumpable == FALSE)
-			goto unlock;
-
 		num_dumped++;
 
-		if (page_data_buf[index].zero == TRUE) {
+
+		if (info->page_flag_buf[consuming]->zero == TRUE) {
 			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
 				goto out;
 			pfn_zero++;
 		} else {
+			index = info->page_flag_buf[consuming]->index;
 			pd.flags      = page_data_buf[index].flags;
 			pd.size       = page_data_buf[index].size;
 			pd.page_flags = 0;
 			pd.offset     = *offset_data;
 			*offset_data  += pd.size;
+//MSG("expectpfn:%d pfn:%d, pd.size:%d, pre:%d\n", info->page_flag_buf[consuming]->pfn, page_data_buf[index].pfn, pd.size, page_data_buf[index].pre);
 			/*
 			 * Write the page header.
 			 */
@@ -7420,12 +7514,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 			 */
 			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
 				goto out;
-
+			page_data_buf[index].used = FALSE;
 		}
-unlock:
-		pthread_mutex_unlock(&page_data_buf[index].mutex);
+		info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
+		info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
 	}
-
+finish:
 	ret = TRUE;
 	/*
 	 * print [100 %]
@@ -7464,7 +7558,7 @@ out:
 	}
 
 	if (page_data_buf != NULL) {
-		for (i = 0; i < page_data_num; i++) {
+		for (i = 0; i < page_buf_num; i++) {
 			pthread_mutex_destroy(&page_data_buf[i].mutex);
 		}
 	}
@@ -7564,6 +7658,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag
 		num_dumped++;
 		if (!read_pfn(pfn, buf))
 			goto out;
+
 		filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size);
 
 		/*
diff --git a/makedumpfile.h b/makedumpfile.h
index e0b5bbf..2de6afe 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -977,7 +977,7 @@ typedef unsigned long long int ulonglong;
 #define PAGE_DATA_NUM	(50)
 #define WAIT_TIME	(60 * 10)
 #define PTHREAD_FAIL	((void *)-2)
-#define NUM_BUFFERS	(50)
+#define NUM_BUFFERS	(20)
 
 struct mmap_cache {
 	char	*mmap_buf;
@@ -985,28 +985,38 @@ struct mmap_cache {
 	off_t   mmap_end_offset;
 };
 
+enum {
+	FLAG_UNUSED,
+	FLAG_READY,
+	FLAG_FILLING
+};
+struct page_flag {
+	mdf_pfn_t pfn;
+	char zero;
+	char ready;
+	short index;
+	struct page_flag *next;
+};
+
 struct page_data
 {
-	mdf_pfn_t pfn;
-	int dumpable;
-	int zero;
-	unsigned int flags;
+	pthread_mutex_t mutex;
 	long size;
 	unsigned char *buf;
-	pthread_mutex_t mutex;
-	/*
-	 * whether the page_data is ready to be consumed
-	 */
-	int ready;
+	int flags;
+	int used;
+mdf_pfn_t pfn;
+mdf_pfn_t pre;
 };
 
 struct thread_args {
 	int thread_num;
 	unsigned long len_buf_out;
 	mdf_pfn_t start_pfn, end_pfn;
-	int page_data_num;
+	int page_buf_num;
 	struct cycle *cycle;
 	struct page_data *page_data_buf;
+	struct page_flag *page_flag_buf;
 };
 
 /*
@@ -1294,10 +1304,12 @@ struct DumpInfo {
 	int num_buffers;
 	pthread_t **threads;
 	struct thread_args *kdump_thread_args;
-	struct page_data *page_data_buf;
+	struct page_data *page_data_buf;
+	struct page_flag **page_flag_buf;
 	pthread_rwlock_t usemmap_rwlock;
 	mdf_pfn_t current_pfn;
 	pthread_mutex_t current_pfn_mutex;
+	pthread_mutex_t page_data_mutex;
 	mdf_pfn_t consumed_pfn;
 	pthread_mutex_t consumed_pfn_mutex;
 	pthread_mutex_t filter_mutex;
-- 
1.8.3.1


[-- Attachment #3: Type: text/plain, Size: 143 bytes --]

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-24  2:24           ` Minfei Huang
  2016-03-01  6:59             ` "Zhou, Wenjian/周文剑"
@ 2016-03-01  7:20             ` "Zhou, Wenjian/周文剑"
  2016-03-01  8:17               ` Minfei Huang
  1 sibling, 1 reply; 26+ messages in thread
From: "Zhou, Wenjian/周文剑" @ 2016-03-01  7:20 UTC (permalink / raw)
  To: Minfei Huang; +Cc: kexec

Hello Minfei,

The mail I sent to you and kexec list is held by the kexec list.
Have you received it?
Should I resend?

-- 
Thanks
Zhou

On 02/24/2016 10:24 AM, Minfei Huang wrote:
>
>> On Feb 24, 2016, at 10:20, Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> wrote:
>>
>> Hi,
>>
>> On 02/24/2016 09:43 AM, Minfei Huang wrote:
>>> On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
>>>> Hello, Minfei,
>>>>
>>>> Does it occur every time?
>>>> If not, I think I have known the reason.
>>>
>>> Hi, Wenjian.
>>>
>>> This patch is applied directly on version 1.5.9. And makedumpfile hangs
>>> if option num-thread is appended.
>>>
>>
>> I see.
>> I'm working on it.
>>
>> BTW, did you only test it with --num-threads 128?
>> How is it with --num-threads 32(or smaller value)?
>
> Yes. I have tested with num-thread 1, 2, 8 and 32. Except for —num-threads 1,
> all of the test fail.
>
> Thanks
> Minfei
>>
>
>
>





_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-24  8:13 ` Minoru Usui
@ 2016-03-01  7:34   ` "Zhou, Wenjian/周文?"
  0 siblings, 0 replies; 26+ messages in thread
From: "Zhou, Wenjian/周文?" @ 2016-03-01  7:34 UTC (permalink / raw)
  To: Minoru Usui, kexec

On 02/24/2016 04:13 PM, Minoru Usui wrote:
> Hi, Zhou
> 
> I have one more comment.
> I found unused variables.
> 
Hi Usui,

Thanks a lot!
I will fix it in the next version.

-- 
Thanks
Zhou
>> -----Original Message-----
>> From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of Zhou Wenjian
>> Sent: Wednesday, February 17, 2016 4:06 PM
>> To: kexec@lists.infradead.org
>> Subject: [PATCH v2] Improve the performance of --num-threads -d 31
>>
>> v2:
>>          1. address Minoru Usui's comments about magic number and error message
>>          2. fix a bug in cyclic mode caused by optimizing
>>
>> v1:
>>          1. change page_flag.ready's value to enum
>>          2. change the patch description
>>          3. cleanup some codes
>>          4. fix a bug in cyclic mode
>>
>> multi-threads implementation will introduce extra cost when handling
>> each page. The origin implementation will also do the extra work for
>> filtered pages. So there is a big performance degradation in
>> --num-threads -d 31.
>> The new implementation won't do the extra work for filtered pages any
>> more. So the performance of -d 31 is close to that of serial processing.
>>
>> The new implementation is just like the following:
>>          * The basic idea is producer producing page and consumer writing page.
>>          * Each producer have a page_flag_buf list which is used for storing
>>            page's description.
>>          * The size of page_flag_buf is little so it won't take too much memory.
>>          * And all producers will share a page_data_buf array which is
>>            used for storing page's compressed data.
>>          * The main thread is the consumer. It will find the next pfn and write
>>            it into file.
>>          * The next pfn is smallest pfn in all page_flag_buf.
>>
>> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com>
>> ---
>>   makedumpfile.c | 266 ++++++++++++++++++++++++++++++++++++++-------------------
>>   makedumpfile.h |  31 ++++---
>>   2 files changed, 200 insertions(+), 97 deletions(-)
>>
>> diff --git a/makedumpfile.c b/makedumpfile.c
>> index fa0b779..31329b5 100644
>> --- a/makedumpfile.c
>> +++ b/makedumpfile.c
>> @@ -3483,7 +3483,8 @@ initial_for_parallel()
>>   	unsigned long page_data_buf_size;
>>   	unsigned long limit_size;
>>   	int page_data_num;
>> -	int i;
>> +	struct page_flag *current;
>> +	int i, j;
>>
>>   	len_buf_out = calculate_len_buf_out(info->page_size);
>>
>> @@ -3562,8 +3563,10 @@ initial_for_parallel()
>>   		      - MAP_REGION * info->num_threads) * 0.6;
>>
>>   	page_data_num = limit_size / page_data_buf_size;
>> +	info->num_buffers = 3 * info->num_threads;
>>
>> -	info->num_buffers = MIN(NUM_BUFFERS, page_data_num);
>> +	info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS);
>> +	info->num_buffers = MIN(info->num_buffers, page_data_num);
>>
>>   	DEBUG_MSG("Number of struct page_data for produce/consume: %d\n",
>>   			info->num_buffers);
>> @@ -3588,6 +3591,36 @@ initial_for_parallel()
>>   	}
>>
>>   	/*
>> +	 * initial page_flag for each thread
>> +	 */
>> +	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
>> +	    == NULL) {
>> +		MSG("Can't allocate memory for page_flag_buf. %s\n",
>> +				strerror(errno));
>> +		return FALSE;
>> +	}
>> +	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
>> +
>> +	for (i = 0; i < info->num_threads; i++) {
>> +		if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
>> +			MSG("Can't allocate memory for page_flag. %s\n",
>> +				strerror(errno));
>> +			return FALSE;
>> +		}
>> +		current = info->page_flag_buf[i];
>> +
>> +		for (j = 1; j < NUM_BUFFERS; j++) {
>> +			if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) {
>> +				MSG("Can't allocate memory for page_flag. %s\n",
>> +					strerror(errno));
>> +				return FALSE;
>> +			}
>> +			current = current->next;
>> +		}
>> +		current->next = info->page_flag_buf[i];
>> +	}
>> +
>> +	/*
>>   	 * initial fd_memory for threads
>>   	 */
>>   	for (i = 0; i < info->num_threads; i++) {
>> @@ -3612,7 +3645,8 @@ initial_for_parallel()
>>   void
>>   free_for_parallel()
>>   {
>> -	int i;
>> +	int i, j;
>> +	struct page_flag *current;
>>
>>   	if (info->threads != NULL) {
>>   		for (i = 0; i < info->num_threads; i++) {
>> @@ -3655,6 +3689,19 @@ free_for_parallel()
>>   		free(info->page_data_buf);
>>   	}
>>
>> +	if (info->page_flag_buf != NULL) {
>> +		for (i = 0; i < info->num_threads; i++) {
>> +			for (j = 0; j < NUM_BUFFERS; j++) {
>> +				if (info->page_flag_buf[i] != NULL) {
>> +					current = info->page_flag_buf[i];
>> +					info->page_flag_buf[i] = current->next;
>> +					free(current);
>> +				}
>> +			}
>> +		}
>> +		free(info->page_flag_buf);
>> +	}
>> +
>>   	if (info->parallel_info == NULL)
>>   		return;
>>
>> @@ -7076,10 +7123,10 @@ kdump_thread_function_cyclic(void *arg) {
>>   	void *retval = PTHREAD_FAIL;
>>   	struct thread_args *kdump_thread_args = (struct thread_args *)arg;
>>   	struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
>> +	struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
>>   	struct cycle *cycle = kdump_thread_args->cycle;
>> -	int page_data_num = kdump_thread_args->page_data_num;
>> -	mdf_pfn_t pfn;
>> -	int index;
>> +	mdf_pfn_t pfn = cycle->start_pfn;
>> +	int index = kdump_thread_args->thread_num;
>>   	int buf_ready;
>>   	int dumpable;
>>   	int fd_memory = 0;
>> @@ -7125,47 +7172,46 @@ kdump_thread_function_cyclic(void *arg) {
>>   						kdump_thread_args->thread_num);
>>   	}
>>
>> -	while (1) {
>> -		/* get next pfn */
>> -		pthread_mutex_lock(&info->current_pfn_mutex);
>> -		pfn = info->current_pfn;
>> -		info->current_pfn++;
>> -		pthread_mutex_unlock(&info->current_pfn_mutex);
>> +	/*
>> +	 * filtered page won't take anything
>> +	 * unfiltered zero page will only take a page_flag_buf
>> +	 * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
>> +	 */
>> +	while (pfn < kdump_thread_args->end_pfn) {
>> +		buf_ready = FALSE;
>>
>> -		if (pfn >= kdump_thread_args->end_pfn)
>> -			break;
>> +		while (page_data_buf[index].used != FALSE ||
>> +				pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
>> +			index = (index + 1) % info->num_buffers;
>>
>> -		index = -1;
>> -		buf_ready = FALSE;
>> +		page_data_buf[index].used = TRUE;
>>
>>   		while (buf_ready == FALSE) {
>>   			pthread_testcancel();
>> -
>> -			index = pfn % page_data_num;
>> -
>> -			if (pfn - info->consumed_pfn > info->num_buffers)
>> -				continue;
>> -
>> -			if (page_data_buf[index].ready != 0)
>> +			if (page_flag_buf->ready == FLAG_READY)
>>   				continue;
>>
>> -			pthread_mutex_lock(&page_data_buf[index].mutex);
>> -
>> -			if (page_data_buf[index].ready != 0)
>> -				goto unlock;
>> +			/* get next pfn */
>> +			pthread_mutex_lock(&info->current_pfn_mutex);
>> +			pfn = info->current_pfn;
>> +			info->current_pfn++;
>> +			page_flag_buf->ready = FLAG_FILLING;
>> +			pthread_mutex_unlock(&info->current_pfn_mutex);
>>
>> -			buf_ready = TRUE;
>> +			page_flag_buf->pfn = pfn;
>>
>> -			page_data_buf[index].pfn = pfn;
>> -			page_data_buf[index].ready = 1;
>> +			if (pfn >= kdump_thread_args->end_pfn) {
>> +				page_data_buf[index].used = FALSE;
>> +				page_flag_buf->ready = FLAG_READY;
>> +				break;
>> +			}
>>
>>   			dumpable = is_dumpable(
>>   				info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
>>   				pfn,
>>   				cycle);
>> -			page_data_buf[index].dumpable = dumpable;
>>   			if (!dumpable)
>> -				goto unlock;
>> +				continue;
>>
>>   			if (!read_pfn_parallel(fd_memory, pfn, buf,
>>   					       &bitmap_memory_parallel,
>> @@ -7178,11 +7224,11 @@ kdump_thread_function_cyclic(void *arg) {
>>
>>   			if ((info->dump_level & DL_EXCLUDE_ZERO)
>>   			    && is_zero_page(buf, info->page_size)) {
>> -				page_data_buf[index].zero = TRUE;
>> -				goto unlock;
>> +				page_flag_buf->zero = TRUE;
>> +				goto next;
>>   			}
>>
>> -			page_data_buf[index].zero = FALSE;
>> +			page_flag_buf->zero = FALSE;
>>
>>   			/*
>>   			 * Compress the page data.
>> @@ -7232,12 +7278,16 @@ kdump_thread_function_cyclic(void *arg) {
>>   				page_data_buf[index].size  = info->page_size;
>>   				memcpy(page_data_buf[index].buf, buf, info->page_size);
>>   			}
>> -unlock:
>> -			pthread_mutex_unlock(&page_data_buf[index].mutex);
>> +			page_flag_buf->index = index;
>> +			buf_ready = TRUE;
>> +next:
>> +			page_flag_buf->ready = FLAG_READY;
>> +			page_flag_buf = page_flag_buf->next;
>>
>>   		}
>> -	}
>>
>> +		pthread_mutex_unlock(&page_data_buf[index].mutex);
>> +	}
>>   	retval = NULL;
>>
>>   fail:
>> @@ -7265,14 +7315,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>   	struct page_desc pd;
>>   	struct timeval tv_start;
>>   	struct timeval last, new;
>> -	unsigned long long consuming_pfn;
>>   	pthread_t **threads = NULL;
>>   	struct thread_args *kdump_thread_args = NULL;
>>   	void *thread_result;
>> -	int page_data_num;
>> +	int page_buf_num;
>>   	struct page_data *page_data_buf = NULL;
>>   	int i;
>>   	int index;
>> +	int end_count, consuming, check_count;
>> +	mdf_pfn_t current_pfn, temp_pfn;
>>
>>   	if (info->flag_elf_dumpfile)
>>   		return FALSE;
>> @@ -7319,16 +7370,11 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>   	threads = info->threads;
>>   	kdump_thread_args = info->kdump_thread_args;
>>
>> -	page_data_num = info->num_buffers;
>> +	page_buf_num = info->num_buffers;
>>   	page_data_buf = info->page_data_buf;
>>
>> -	for (i = 0; i < page_data_num; i++) {
>> -		/*
>> -		 * producer will use pfn in page_data_buf to decide the
>> -		 * consumed pfn
>> -		 */
>> -		page_data_buf[i].pfn = start_pfn - 1;
>> -		page_data_buf[i].ready = 0;
>> +	for (i = 0; i < page_buf_num; i++) {
>> +		page_data_buf[i].used = FALSE;
>>   		res = pthread_mutex_init(&page_data_buf[i].mutex, NULL);
>>   		if (res != 0) {
>>   			ERRMSG("Can't initialize mutex of page_data_buf. %s\n",
>> @@ -7342,8 +7388,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>   		kdump_thread_args[i].len_buf_out = len_buf_out;
>>   		kdump_thread_args[i].start_pfn = start_pfn;
>>   		kdump_thread_args[i].end_pfn = end_pfn;
>> -		kdump_thread_args[i].page_data_num = page_data_num;
>> +		kdump_thread_args[i].page_buf_num = page_buf_num;
>>   		kdump_thread_args[i].page_data_buf = page_data_buf;
>> +		kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
>>   		kdump_thread_args[i].cycle = cycle;
> 
> Main thread passes to start_pfn and end_pfn, but it also passes cycle.
> start_pfn and end_pfn equal to cycle->start_pfn and cycle->end_pfn,
> so start_pfn and end_pfn members should be removed.
> 
> And also, it passes page_buf_num but it isn't used by producer.
> 
>>   		res = pthread_create(threads[i], NULL,
>> @@ -7356,55 +7403,101 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>   		}
>>   	}
>>
>> -	consuming_pfn = start_pfn;
>> -	index = -1;
>> +	end_count = 0;
>> +	while (1) {
>> +		consuming = 0;
>> +		check_count = 0;
>>
>> -	gettimeofday(&last, NULL);
>> +		/*
>> +		 * The basic idea is producer producing page and consumer writing page.
>> +		 * Each producer have a page_flag_buf list which is used for storing page's description.
>> +		 * The size of page_flag_buf is little so it won't take too much memory.
>> +		 * And all producers will share a page_data_buf array which is used for storing page's compressed data.
>> +		 * The main thread is the consumer. It will find the next pfn and write it into file.
>> +		 * The next pfn is smallest pfn in all page_flag_buf.
>> +		 */
>> +		gettimeofday(&last, NULL);
>> +		while (1) {
>> +			current_pfn = end_pfn;
>>
>> -	while (consuming_pfn < end_pfn) {
>> -		index = consuming_pfn % page_data_num;
>> +			/*
>> +			 * page_flag_buf is in circular linked list.
>> +			 * The array info->page_flag_buf[] records the current page_flag_buf in each thread's
>> +			 * page_flag_buf list.
>> +			 * consuming is used for recording in which thread the pfn is the smallest.
>> +			 * current_pfn is used for recording the value of pfn when checking the pfn.
>> +			 */
>> +			for (i = 0; i < info->num_threads; i++) {
>> +				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
>> +					continue;
>> +				temp_pfn = info->page_flag_buf[i]->pfn;
>>
>> -		gettimeofday(&new, NULL);
>> -		if (new.tv_sec - last.tv_sec > WAIT_TIME) {
>> -			ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn);
>> -			goto out;
>> -		}
>> +				/*
>> +				 * count how many threads have reached the end.
>> +				 */
>> +				if (temp_pfn >= end_pfn) {
>> +					/*
>> +					 * prevent setting FLAG_UNUSED being optimized.
>> +					 */
>> +					MSG("-");
>>
>> -		/*
>> -		 * check pfn first without mutex locked to reduce the time
>> -		 * trying to lock the mutex
>> -		 */
>> -		if (page_data_buf[index].pfn != consuming_pfn)
>> -			continue;
>> +					info->page_flag_buf[i]->ready = FLAG_UNUSED;
>>
>> -		if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
>> -			continue;
>> +					info->current_pfn = end_pfn;
>> +					end_count++;
>> +					continue;
>> +				}
>> +
>> +				if (current_pfn < temp_pfn)
>> +					continue;
>>
>> -		/* check whether the found one is ready to be consumed */
>> -		if (page_data_buf[index].pfn != consuming_pfn ||
>> -		    page_data_buf[index].ready != 1) {
>> -			goto unlock;
>> +				check_count++;
>> +				consuming = i;
>> +				current_pfn = temp_pfn;
>> +			}
>> +
>> +			/*
>> +			 * If all the threads have reached the end, we will finish writing.
>> +			 */
>> +			if (end_count >= info->num_threads)
>> +				goto finish;
>> +
>> +			/*
>> +			 * Since it has the probabilty that there is no page_flag_buf being ready,
>> +			 * we should recheck if it happens.
>> +			 */
>> +			if (check_count == 0)
>> +				continue;
>> +
>> +			/*
>> +			 * If the page_flag_buf is not ready, the pfn recorded may be changed.
>> +			 * So we should recheck.
>> +			 */
>> +			if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
>> +				gettimeofday(&new, NULL);
>> +				if (new.tv_sec - last.tv_sec > WAIT_TIME) {
>> +					ERRMSG("Can't get data of pfn.\n");
>> +					goto out;
>> +				}
>> +				continue;
>> +			}
>> +
>> +			if (current_pfn == info->page_flag_buf[consuming]->pfn)
>> +				break;
>>   		}
>>
>>   		if ((num_dumped % per) == 0)
>>   			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
>>
>> -		/* next pfn is found, refresh last here */
>> -		last = new;
>> -		consuming_pfn++;
>> -		info->consumed_pfn++;
> 
> info->consumed_pfn is no longer used by anywhere,
> so consumed_pfn and consumed_pfn_mutex member should be removed.
> 
> ---
> Thanks,
> Minoru Usui
> 
>> -		page_data_buf[index].ready = 0;
>> -
>> -		if (page_data_buf[index].dumpable == FALSE)
>> -			goto unlock;
>> -
>>   		num_dumped++;
>>
>> -		if (page_data_buf[index].zero == TRUE) {
>> +
>> +		if (info->page_flag_buf[consuming]->zero == TRUE) {
>>   			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
>>   				goto out;
>>   			pfn_zero++;
>>   		} else {
>> +			index = info->page_flag_buf[consuming]->index;
>>   			pd.flags      = page_data_buf[index].flags;
>>   			pd.size       = page_data_buf[index].size;
>>   			pd.page_flags = 0;
>> @@ -7420,12 +7513,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>>   			 */
>>   			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
>>   				goto out;
>> -
>> +			page_data_buf[index].used = FALSE;
>>   		}
>> -unlock:
>> -		pthread_mutex_unlock(&page_data_buf[index].mutex);
>> +		info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
>> +		info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
>>   	}
>> -
>> +finish:
>>   	ret = TRUE;
>>   	/*
>>   	 * print [100 %]
>> @@ -7464,7 +7557,7 @@ out:
>>   	}
>>
>>   	if (page_data_buf != NULL) {
>> -		for (i = 0; i < page_data_num; i++) {
>> +		for (i = 0; i < page_buf_num; i++) {
>>   			pthread_mutex_destroy(&page_data_buf[i].mutex);
>>   		}
>>   	}
>> @@ -7564,6 +7657,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag
>>   		num_dumped++;
>>   		if (!read_pfn(pfn, buf))
>>   			goto out;
>> +
>>   		filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size);
>>
>>   		/*
>> diff --git a/makedumpfile.h b/makedumpfile.h
>> index e0b5bbf..8a9a5b2 100644
>> --- a/makedumpfile.h
>> +++ b/makedumpfile.h
>> @@ -977,7 +977,7 @@ typedef unsigned long long int ulonglong;
>>   #define PAGE_DATA_NUM	(50)
>>   #define WAIT_TIME	(60 * 10)
>>   #define PTHREAD_FAIL	((void *)-2)
>> -#define NUM_BUFFERS	(50)
>> +#define NUM_BUFFERS	(20)
>>
>>   struct mmap_cache {
>>   	char	*mmap_buf;
>> @@ -985,28 +985,36 @@ struct mmap_cache {
>>   	off_t   mmap_end_offset;
>>   };
>>
>> +enum {
>> +	FLAG_UNUSED,
>> +	FLAG_READY,
>> +	FLAG_FILLING
>> +};
>> +struct page_flag {
>> +	mdf_pfn_t pfn;
>> +	char zero;
>> +	char ready;
>> +	short index;
>> +	struct page_flag *next;
>> +};
>> +
>>   struct page_data
>>   {
>> -	mdf_pfn_t pfn;
>> -	int dumpable;
>> -	int zero;
>> -	unsigned int flags;
>> +	pthread_mutex_t mutex;
>>   	long size;
>>   	unsigned char *buf;
>> -	pthread_mutex_t mutex;
>> -	/*
>> -	 * whether the page_data is ready to be consumed
>> -	 */
>> -	int ready;
>> +	int flags;
>> +	int used;
>>   };
>>
>>   struct thread_args {
>>   	int thread_num;
>>   	unsigned long len_buf_out;
>>   	mdf_pfn_t start_pfn, end_pfn;
>> -	int page_data_num;
>> +	int page_buf_num;
>>   	struct cycle *cycle;
>>   	struct page_data *page_data_buf;
>> +	struct page_flag *page_flag_buf;
>>   };
>>
>>   /*
>> @@ -1295,6 +1303,7 @@ struct DumpInfo {
>>   	pthread_t **threads;
>>   	struct thread_args *kdump_thread_args;
>>   	struct page_data *page_data_buf;
>> +	struct page_flag **page_flag_buf;
>>   	pthread_rwlock_t usemmap_rwlock;
>>   	mdf_pfn_t current_pfn;
>>   	pthread_mutex_t current_pfn_mutex;
>> --
>> 1.8.3.1
>>
>>
>>
>>
>> _______________________________________________
>> kexec mailing list
>> kexec@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/kexec




_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-02-24  0:45         ` Minoru Usui
@ 2016-03-01  7:49           ` "Zhou, Wenjian/周文?"
  2016-03-02  3:05             ` Minoru Usui
  0 siblings, 1 reply; 26+ messages in thread
From: "Zhou, Wenjian/周文?" @ 2016-03-01  7:49 UTC (permalink / raw)
  To: Minoru Usui, kexec

Hi,

On 02/24/2016 08:45 AM, Minoru Usui wrote:
>>>>>>
>>>>>> -		/*
>>>>>> -		 * check pfn first without mutex locked to reduce the time
>>>>>> -		 * trying to lock the mutex
>>>>>> -		 */
>>>>>> -		if (page_data_buf[index].pfn != consuming_pfn)
>>>>>> -			continue;
>>>>>> +					info->page_flag_buf[i]->ready = FLAG_UNUSED;
>>>>>>
>>>>>> -		if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
>>>>>> -			continue;
>>>>>> +					info->current_pfn = end_pfn;
>>>>>
>>>>> This part doesn't get info->current_pfn_mutex.
>>>>> It becomes bigger than end_pfn at the end of producer thread in following case.
>>>>>
>>>>> ===
>>>>>      producer                   Consumer
>>>>> ---------------------------------------------------------
>>>>>      pthread_mutex_lock()
>>>>>      pfn = info->current_pfn
>>>>>                                 info->current_pfn = end_pfn
>>>>>      info->current_pfn++
>>>>>        -> end_pfn + 1
>>>>>      pthread_mutex_unlock()
>>>>> ===
>>>>>
>>>>> But I know, if this race is happened, processing other producer thread and consumer thread works well
>>>>> in current cycle.
>>>>> Because each thread checks whether info->current_pfn >= end_pfn.
>>>>>
>>>>> On the other hand, info->current_pfn is initialized to cycle->start_pfn before pthread_create()
>>>>> in write_kdump_pages_parallel_cyclic().
>>>>> This means it does not cause a problem in next cycle, too.
>>>>>
>>>>> In other words, my point is following.
>>>>>
>>>>>      a) When info->current_pfn changes, you have to get info->current_pfn_mutex.
>>>>>      b) If you allow info->current_pfn is bigger than end_pfn at the end of each cycle,
>>>>>         "info->current_pfn = end_pfn;" is unnecessary.
>>>>>
>>>>> Honestly, I don't like approach b).
>>>>
>>>> You're right. Some thing I thought is wrong.
>>>> But I don't like lock if I have other choice.
>>>> I will set info->current_pfn before returning.
>>>> How about it?
>>>
>>> If you mean you set info->current_pfn by producer side,
>>> this race will occur between producer A and producer B.
>>>
>>> I think you can't avoid getting mutex lock, if you will change info->current_pfn.
>>>
>>
>> I mean setting it by consumer.
> 
> I'm sorry, I can't imagine your proposal.
> What do you change?
> 
> Please show me the code.
> 

At that time, I meant setting the current_pfn at the end of the function
write_kdump_pages_parallel_cyclic().
But I don't think it is good choice now.

>>>>> ===
>>>>>      producer                   Consumer
>>>>> ---------------------------------------------------------
>>>>>      pthread_mutex_lock()
>>>>>      pfn = info->current_pfn
>>>>>                                 info->current_pfn = end_pfn
>>>>>      info->current_pfn++
>>>>>        -> end_pfn + 1
>>>>>      pthread_mutex_unlock()
>>>>> ===

How about just changing "info->current_pfn = end_pfn" to "info->current_pfn--" ?
Just like the first version of the patch.

-- 
Thanks
Zhou



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-03-01  6:59             ` "Zhou, Wenjian/周文剑"
@ 2016-03-01  8:16               ` Minfei Huang
  2016-03-02 10:25                 ` Minfei Huang
  0 siblings, 1 reply; 26+ messages in thread
From: Minfei Huang @ 2016-03-01  8:16 UTC (permalink / raw)
  To: "Zhou, Wenjian/周文剑"; +Cc: kexec

On 03/01/16 at 02:59pm, "Zhou, Wenjian/周文剑" wrote:
> Hi Minfei,
> 
> Could you help me test the patch in the attachment?

Ok. I will verify it with this patch. Feedback will be given in next few
days. Thanks for your effort.

Thanks
Minfei

> 
> I have run it a thousand times and haven't got a failure.
> Previously, I can always get a failure after running hundreds of times.
> 
> But I'm not sure if it is the same problem you have met.
> 
> -- 
> Thanks
> Zhou
> 
> On 02/24/2016 10:24 AM, Minfei Huang wrote:
> >
> >>On Feb 24, 2016, at 10:20, Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> wrote:
> >>
> >>Hi,
> >>
> >>On 02/24/2016 09:43 AM, Minfei Huang wrote:
> >>>On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
> >>>>Hello, Minfei,
> >>>>
> >>>>Does it occur every time?
> >>>>If not, I think I have known the reason.
> >>>
> >>>Hi, Wenjian.
> >>>
> >>>This patch is applied directly on version 1.5.9. And makedumpfile hangs
> >>>if option num-thread is appended.
> >>>
> >>
> >>I see.
> >>I'm working on it.
> >>
> >>BTW, did you only test it with --num-threads 128?
> >>How is it with --num-threads 32(or smaller value)?
> >
> >Yes. I have tested with num-thread 1, 2, 8 and 32. Except for —num-threads 1,
> >all of the test fail.
> >
> >Thanks
> >Minfei

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-03-01  7:20             ` "Zhou, Wenjian/周文剑"
@ 2016-03-01  8:17               ` Minfei Huang
  0 siblings, 0 replies; 26+ messages in thread
From: Minfei Huang @ 2016-03-01  8:17 UTC (permalink / raw)
  To: "Zhou, Wenjian/周文剑"; +Cc: kexec

On 03/01/16 at 03:20pm, "Zhou, Wenjian/周文剑" wrote:
> Hello Minfei,
> 
> The mail I sent to you and kexec list is held by the kexec list.
> Have you received it?
> Should I resend?

It's fine, since I have receieved your request.

Thanks
Minfei

> 
> -- 
> Thanks
> Zhou
> 
> On 02/24/2016 10:24 AM, Minfei Huang wrote:
> >
> >>On Feb 24, 2016, at 10:20, Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> wrote:
> >>
> >>Hi,
> >>
> >>On 02/24/2016 09:43 AM, Minfei Huang wrote:
> >>>On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
> >>>>Hello, Minfei,
> >>>>
> >>>>Does it occur every time?
> >>>>If not, I think I have known the reason.
> >>>
> >>>Hi, Wenjian.
> >>>
> >>>This patch is applied directly on version 1.5.9. And makedumpfile hangs
> >>>if option num-thread is appended.
> >>>
> >>
> >>I see.
> >>I'm working on it.
> >>
> >>BTW, did you only test it with --num-threads 128?
> >>How is it with --num-threads 32(or smaller value)?
> >
> >Yes. I have tested with num-thread 1, 2, 8 and 32. Except for —num-threads 1,
> >all of the test fail.
> >
> >Thanks
> >Minfei
> >>
> >
> >
> >
> 
> 
> 
> 

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-03-01  7:49           ` "Zhou, Wenjian/周文?"
@ 2016-03-02  3:05             ` Minoru Usui
  2016-03-02  3:16               ` Minoru Usui
  2016-03-02  3:59               ` "Zhou, Wenjian/周文?"
  0 siblings, 2 replies; 26+ messages in thread
From: Minoru Usui @ 2016-03-02  3:05 UTC (permalink / raw)
  To: "Zhou, Wenjian/周文?", kexec

Hi, Zhou

> >>>>> ===
> >>>>>      producer                   Consumer
> >>>>> ---------------------------------------------------------
> >>>>>      pthread_mutex_lock()
> >>>>>      pfn = info->current_pfn
> >>>>>                                 info->current_pfn = end_pfn
> >>>>>      info->current_pfn++
> >>>>>        -> end_pfn + 1
> >>>>>      pthread_mutex_unlock()
> >>>>> ===
> 
> How about just changing "info->current_pfn = end_pfn" to "info->current_pfn--" ?
> Just like the first version of the patch.

If you don't get mutex lock in consumer side, this change is meaningless.
Of course, info->current_pfn may equal to end_pfn at the end of the cycle,
but there is a timing that info->current_pfn is bigger than end_pfn in processing producer thread.

The root cause is producer increments info->current_pfn everytime, even if info->current_pfn == end_pfn 
in following code.

===
>>>> +			/* get next pfn */
>>>> +			pthread_mutex_lock(&info->current_pfn_mutex);
>>>> +			pfn = info->current_pfn;
>>>> +			info->current_pfn++;                        # increment everytime
>>>> +			page_flag_buf->ready = FLAG_FILLING;
>>>> +			pthread_mutex_unlock(&info->current_pfn_mutex);
>>>>
>>>> -			buf_ready = TRUE;
>>>> +			page_flag_buf->pfn = pfn;
>>>>
>>>> -			page_data_buf[index].pfn = pfn;
>>>> -			page_data_buf[index].ready = 1;
>>>> +			if (pfn >= kdump_thread_args->end_pfn) {
>>>> +				page_data_buf[index].used = FALSE;
>>>> +				page_flag_buf->ready = FLAG_READY;
>>>> +				break;                             # not decrement
>>>> +			}
===

If you don't allow info->current_pfn is bigger than end_pfn,
you don't need to increment info->current_pfn when pfn >= kdump_thread_args->end_pfn like following.

===
                        /* get next pfn */
                        pthread_mutex_lock(&info->current_pfn_mutex);
                        pfn = info->current_pfn;
                        page_flag_buf->pfn = pfn;
                        if (pfn >= kdump_thread_args->end_pfn) {
                                page_data_buf[index].used = FALSE;
                                page_flag_buf->ready = FLAG_READY;
                                pthread_mutex_unlock(&info->current_pfn_mutex);
                                break;
                        }
                        page_flag_buf->ready = FLAG_FILLING;
                        info->current_pfn++;
                        pthread_mutex_unlock(&info->current_pfn_mutex);
===

If you allow info->current_pfn is bigger than end_pfn, producer doesn't need to change info->current_pfn.

---
Thanks
Minoru Usui

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-03-02  3:05             ` Minoru Usui
@ 2016-03-02  3:16               ` Minoru Usui
  2016-03-02  3:59               ` "Zhou, Wenjian/周文?"
  1 sibling, 0 replies; 26+ messages in thread
From: Minoru Usui @ 2016-03-02  3:16 UTC (permalink / raw)
  To: "Zhou, Wenjian/周文?", kexec

Hi, Zhou

I wrote a typo, sorry.

> -----Original Message-----
> From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of Minoru Usui
> Sent: Wednesday, March 02, 2016 12:06 PM
> To: "Zhou, Wenjian/周文?" <zhouwj-fnst@cn.fujitsu.com>; kexec@lists.infradead.org
> Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31
> 
> Hi, Zhou
> 
> > >>>>> ===
> > >>>>>      producer                   Consumer
> > >>>>> ---------------------------------------------------------
> > >>>>>      pthread_mutex_lock()
> > >>>>>      pfn = info->current_pfn
> > >>>>>                                 info->current_pfn = end_pfn
> > >>>>>      info->current_pfn++
> > >>>>>        -> end_pfn + 1
> > >>>>>      pthread_mutex_unlock()
> > >>>>> ===
> >
> > How about just changing "info->current_pfn = end_pfn" to "info->current_pfn--" ?
> > Just like the first version of the patch.
> 
> If you don't get mutex lock in consumer side, this change is meaningless.
> Of course, info->current_pfn may equal to end_pfn at the end of the cycle,
> but there is a timing that info->current_pfn is bigger than end_pfn in processing producer thread.
> 
> The root cause is producer increments info->current_pfn everytime, even if info->current_pfn == end_pfn
> in following code.
> 
> ===
> >>>> +			/* get next pfn */
> >>>> +			pthread_mutex_lock(&info->current_pfn_mutex);
> >>>> +			pfn = info->current_pfn;
> >>>> +			info->current_pfn++;                        # increment everytime
> >>>> +			page_flag_buf->ready = FLAG_FILLING;
> >>>> +			pthread_mutex_unlock(&info->current_pfn_mutex);
> >>>>
> >>>> -			buf_ready = TRUE;
> >>>> +			page_flag_buf->pfn = pfn;
> >>>>
> >>>> -			page_data_buf[index].pfn = pfn;
> >>>> -			page_data_buf[index].ready = 1;
> >>>> +			if (pfn >= kdump_thread_args->end_pfn) {
> >>>> +				page_data_buf[index].used = FALSE;
> >>>> +				page_flag_buf->ready = FLAG_READY;
> >>>> +				break;                             # not decrement
> >>>> +			}
> ===
> 
> If you don't allow info->current_pfn is bigger than end_pfn,
> you don't need to increment info->current_pfn when pfn >= kdump_thread_args->end_pfn like following.
> 
> ===
>                         /* get next pfn */
>                         pthread_mutex_lock(&info->current_pfn_mutex);
>                         pfn = info->current_pfn;
>                         page_flag_buf->pfn = pfn;
>                         if (pfn >= kdump_thread_args->end_pfn) {
>                                 page_data_buf[index].used = FALSE;
>                                 page_flag_buf->ready = FLAG_READY;
>                                 pthread_mutex_unlock(&info->current_pfn_mutex);
>                                 break;
>                         }
>                         page_flag_buf->ready = FLAG_FILLING;
>                         info->current_pfn++;
>                         pthread_mutex_unlock(&info->current_pfn_mutex);
> ===
> 
> If you allow info->current_pfn is bigger than end_pfn, producer doesn't need to change info->current_pfn.
                                                         ^^^^^^^^ consumer

Consumer doesn't need to change info->current_pfn.
---
Thanks
Minoru Usui


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-03-02  3:05             ` Minoru Usui
  2016-03-02  3:16               ` Minoru Usui
@ 2016-03-02  3:59               ` "Zhou, Wenjian/周文?"
  2016-03-02  6:23                 ` Minoru Usui
  1 sibling, 1 reply; 26+ messages in thread
From: "Zhou, Wenjian/周文?" @ 2016-03-02  3:59 UTC (permalink / raw)
  To: Minoru Usui, kexec

Hi,

On 03/02/2016 11:05 AM, Minoru Usui wrote:
> Hi, Zhou
> 
>>>>>>> ===
>>>>>>>       producer                   Consumer
>>>>>>> ---------------------------------------------------------
>>>>>>>       pthread_mutex_lock()
>>>>>>>       pfn = info->current_pfn
>>>>>>>                                  info->current_pfn = end_pfn
>>>>>>>       info->current_pfn++
>>>>>>>         -> end_pfn + 1
>>>>>>>       pthread_mutex_unlock()
>>>>>>> ===
>>
>> How about just changing "info->current_pfn = end_pfn" to "info->current_pfn--" ?
>> Just like the first version of the patch.
> 
> If you don't get mutex lock in consumer side, this change is meaningless.
> Of course, info->current_pfn may equal to end_pfn at the end of the cycle,
> but there is a timing that info->current_pfn is bigger than end_pfn in processing producer thread.
> 
> The root cause is producer increments info->current_pfn everytime, even if info->current_pfn == end_pfn
> in following code.
> 

Actually, I didn't get what you mean exactly until this letter...

I think there is no problem if the info->current_pfn is larger than the end_pfn
in the function write_kdump_pages_parallel_cyclic(), for no other one will use
current_pfn here.
Since we can't and needn't prevent using info->current_pfn outside the function,
we should keep info->current_pfn correct before returning from the function.

> ===
>>>>> +			/* get next pfn */
>>>>> +			pthread_mutex_lock(&info->current_pfn_mutex);
>>>>> +			pfn = info->current_pfn;
>>>>> +			info->current_pfn++;                        # increment everytime
>>>>> +			page_flag_buf->ready = FLAG_FILLING;
>>>>> +			pthread_mutex_unlock(&info->current_pfn_mutex);
>>>>>
>>>>> -			buf_ready = TRUE;
>>>>> +			page_flag_buf->pfn = pfn;
>>>>>
>>>>> -			page_data_buf[index].pfn = pfn;
>>>>> -			page_data_buf[index].ready = 1;
>>>>> +			if (pfn >= kdump_thread_args->end_pfn) {
>>>>> +				page_data_buf[index].used = FALSE;
>>>>> +				page_flag_buf->ready = FLAG_READY;
>>>>> +				break;                             # not decrement
>>>>> +			}
> ===
> 
> If you don't allow info->current_pfn is bigger than end_pfn,
> you don't need to increment info->current_pfn when pfn >= kdump_thread_args->end_pfn like following.
> 
> ===
>                          /* get next pfn */
>                          pthread_mutex_lock(&info->current_pfn_mutex);
>                          pfn = info->current_pfn;
>                          page_flag_buf->pfn = pfn;
>                          if (pfn >= kdump_thread_args->end_pfn) {
>                                  page_data_buf[index].used = FALSE;
>                                  page_flag_buf->ready = FLAG_READY;
>                                  pthread_mutex_unlock(&info->current_pfn_mutex);
>                                  break;
>                          }
>                          page_flag_buf->ready = FLAG_FILLING;
>                          info->current_pfn++;
>                          pthread_mutex_unlock(&info->current_pfn_mutex);
> ===
> 
> If you allow info->current_pfn is bigger than end_pfn, producer doesn't need to change info->current_pfn.
> 

I also have thought about it.
It can keep current_pfn never larger than the end.
But it also makes the code a bit more complex.
If there aren't any special reason, I don't think it's worth to do it.

-- 
Thanks
Zhou



_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-03-02  3:59               ` "Zhou, Wenjian/周文?"
@ 2016-03-02  6:23                 ` Minoru Usui
  0 siblings, 0 replies; 26+ messages in thread
From: Minoru Usui @ 2016-03-02  6:23 UTC (permalink / raw)
  To: "Zhou, Wenjian/周文?", kexec

Hi, Zhou

> -----Original Message-----
> From: "Zhou, Wenjian/周文?" [mailto:zhouwj-fnst@cn.fujitsu.com]
> Sent: Wednesday, March 02, 2016 12:59 PM
> To: Usui Minoru(碓井 成) <min-usui@ti.jp.nec.com>; kexec@lists.infradead.org
> Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31
> 
> Hi,
> 
> On 03/02/2016 11:05 AM, Minoru Usui wrote:
> > Hi, Zhou
> >
> >>>>>>> ===
> >>>>>>>       producer                   Consumer
> >>>>>>> ---------------------------------------------------------
> >>>>>>>       pthread_mutex_lock()
> >>>>>>>       pfn = info->current_pfn
> >>>>>>>                                  info->current_pfn = end_pfn
> >>>>>>>       info->current_pfn++
> >>>>>>>         -> end_pfn + 1
> >>>>>>>       pthread_mutex_unlock()
> >>>>>>> ===
> >>
> >> How about just changing "info->current_pfn = end_pfn" to "info->current_pfn--" ?
> >> Just like the first version of the patch.
> >
> > If you don't get mutex lock in consumer side, this change is meaningless.
> > Of course, info->current_pfn may equal to end_pfn at the end of the cycle,
> > but there is a timing that info->current_pfn is bigger than end_pfn in processing producer thread.
> >
> > The root cause is producer increments info->current_pfn everytime, even if info->current_pfn == end_pfn
> > in following code.
> >
> 
> Actually, I didn't get what you mean exactly until this letter...
> 
> I think there is no problem if the info->current_pfn is larger than the end_pfn
> in the function write_kdump_pages_parallel_cyclic(), for no other one will use
> current_pfn here.

Right.
As I said previously, your code works well.
But I merely don't like this even if there is no problem.

If you allow info->current_pfn is bigger than end_pfn, I think it's ok.

> Since we can't and needn't prevent using info->current_pfn outside the function,
> we should keep info->current_pfn correct before returning from the function.

As I also said previously, in the head of write_kdump_pages_parallel_cyclic(), 
info->current_pfn is initialized by start_pfn.
I think we don't need to keep info->current_pfn correct before returning from write_kdump_pages_parallel_cyclic().

> > ===
> >>>>> +			/* get next pfn */
> >>>>> +			pthread_mutex_lock(&info->current_pfn_mutex);
> >>>>> +			pfn = info->current_pfn;
> >>>>> +			info->current_pfn++;                        # increment everytime
> >>>>> +			page_flag_buf->ready = FLAG_FILLING;
> >>>>> +			pthread_mutex_unlock(&info->current_pfn_mutex);
> >>>>>
> >>>>> -			buf_ready = TRUE;
> >>>>> +			page_flag_buf->pfn = pfn;
> >>>>>
> >>>>> -			page_data_buf[index].pfn = pfn;
> >>>>> -			page_data_buf[index].ready = 1;
> >>>>> +			if (pfn >= kdump_thread_args->end_pfn) {
> >>>>> +				page_data_buf[index].used = FALSE;
> >>>>> +				page_flag_buf->ready = FLAG_READY;
> >>>>> +				break;                             # not decrement
> >>>>> +			}
> > ===
> >
> > If you don't allow info->current_pfn is bigger than end_pfn,
> > you don't need to increment info->current_pfn when pfn >= kdump_thread_args->end_pfn like following.
> >
> > ===
> >                          /* get next pfn */
> >                          pthread_mutex_lock(&info->current_pfn_mutex);
> >                          pfn = info->current_pfn;
> >                          page_flag_buf->pfn = pfn;
> >                          if (pfn >= kdump_thread_args->end_pfn) {
> >                                  page_data_buf[index].used = FALSE;
> >                                  page_flag_buf->ready = FLAG_READY;
> >                                  pthread_mutex_unlock(&info->current_pfn_mutex);
> >                                  break;
> >                          }
> >                          page_flag_buf->ready = FLAG_FILLING;
> >                          info->current_pfn++;
> >                          pthread_mutex_unlock(&info->current_pfn_mutex);
> > ===
> >
> > If you allow info->current_pfn is bigger than end_pfn, producer doesn't need to change info->current_pfn.
> >
> 
> I also have thought about it.
> It can keep current_pfn never larger than the end.
> But it also makes the code a bit more complex.
> If there aren't any special reason, I don't think it's worth to do it.

Personally, I think the above code isn't complex, and locking period is almost unchanged except pfn >= end_pfn case.

But the original works well, so I don't stick to it.

Thanks,
Minoru Usui

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-03-01  8:16               ` Minfei Huang
@ 2016-03-02 10:25                 ` Minfei Huang
  2016-03-04  0:59                   ` Minoru Usui
  0 siblings, 1 reply; 26+ messages in thread
From: Minfei Huang @ 2016-03-02 10:25 UTC (permalink / raw)
  To: "Zhou, Wenjian/周文剑"; +Cc: kexec

On 03/01/16 at 04:16pm, Minfei Huang wrote:
> On 03/01/16 at 02:59pm, "Zhou, Wenjian/周文剑" wrote:
> > Hi Minfei,
> > 
> > Could you help me test the patch in the attachment?
> 
> Ok. I will verify it with this patch. Feedback will be given in next few
> days. Thanks for your effort.
> 
> Thanks
> Minfei
> 
> > 
> > I have run it a thousand times and haven't got a failure.
> > Previously, I can always get a failure after running hundreds of times.
> > 
> > But I'm not sure if it is the same problem you have met.

Hi.

I have verified this patch based on 1.5.9. makedumpfile works well both
with and without option --num-threads after applying this patch.

There are 128 active CPUs and 4T memory in kdump kernel.

with option -d 31
real    3m45.916s

with option --num-threads 1 -d 31
real    6m30.286s

with option --num-threads 2 -d 31
real    7m45.597s

with option --num-threads 8 -d 31
real    13m7.914s

with option --num-threads 32 -d 31
real    10m10.248s

original makedumpfile with option -d 31
real    4m27.617s

Seem more time will be taken by increasing thread number.

Thanks
Minfei

> > 
> > -- 
> > Thanks
> > Zhou
> > 
> > On 02/24/2016 10:24 AM, Minfei Huang wrote:
> > >
> > >>On Feb 24, 2016, at 10:20, Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> wrote:
> > >>
> > >>Hi,
> > >>
> > >>On 02/24/2016 09:43 AM, Minfei Huang wrote:
> > >>>On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
> > >>>>Hello, Minfei,
> > >>>>
> > >>>>Does it occur every time?
> > >>>>If not, I think I have known the reason.
> > >>>
> > >>>Hi, Wenjian.
> > >>>
> > >>>This patch is applied directly on version 1.5.9. And makedumpfile hangs
> > >>>if option num-thread is appended.
> > >>>
> > >>
> > >>I see.
> > >>I'm working on it.
> > >>
> > >>BTW, did you only test it with --num-threads 128?
> > >>How is it with --num-threads 32(or smaller value)?
> > >
> > >Yes. I have tested with num-thread 1, 2, 8 and 32. Except for —num-threads 1,
> > >all of the test fail.
> > >
> > >Thanks
> > >Minfei

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-03-02 10:25                 ` Minfei Huang
@ 2016-03-04  0:59                   ` Minoru Usui
  2016-03-04  4:17                     ` Minfei Huang
  0 siblings, 1 reply; 26+ messages in thread
From: Minoru Usui @ 2016-03-04  0:59 UTC (permalink / raw)
  To: Minfei Huang, "Zhou, Wenjian/周文剑"; +Cc: kexec

Hi, Zhou, Minfei

> -----Original Message-----
> From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of Minfei Huang
> Sent: Wednesday, February 24, 2016 11:25 AM
> To: "Zhou, Wenjian/周文剑" <zhouwj-fnst@cn.fujitsu.com>
> Cc: kexec@lists.infradead.org
> Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31
> 
> 
> > On Feb 24, 2016, at 10:20, Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> wrote:
> >
> > Hi,
> >
> > On 02/24/2016 09:43 AM, Minfei Huang wrote:
> >> On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
> >>> Hello, Minfei,
> >>>
> >>> Does it occur every time?
> >>> If not, I think I have known the reason.
> >>
> >> Hi, Wenjian.
> >>
> >> This patch is applied directly on version 1.5.9. And makedumpfile hangs
> >> if option num-thread is appended.
> >>
> >
> > I see.
> > I'm working on it.
> >
> > BTW, did you only test it with --num-threads 128?
> > How is it with --num-threads 32(or smaller value)?
> 
> Yes. I have tested with num-thread 1, 2, 8 and 32. Except for —num-threads 1,
> all of the test fail.

I wrote an RFC patch which fixed this problem.
This patch is incremental one from the Zhou's original v2 patch.

I think the problem was caused by page_flag_buf race between consumer and producers.
So, I add the mutex to each page_flag_buf.
Change summary is as follows.

  * Add mutex to each page_flag_buf.
  * Producer:
      - Keeping page_flag_buf mutex until producer finish writing all data to page_flag_buf. 
      - Fix the issue that info->current_pfn becomes larger than end_pfn.
      - Keeping info->current_pfn_mutex until producer gets dumpable pfn.
  * Consumer:
      - Keeping page_flag_buf mutex until consumer finish writing all data to page_flag_buf. 

The result of my test in 5GB dumpfile is following.

===
Common Option: -c --cyclic-buffer=10 --num-threads

[-d0]
num-thread    real     vs num-threads 0
----------------------------------------
  0          209.621      100.0%
  1          217.921      104.0%
  2           84.539       40.3%
  4           51.787       24.7%
  8           37.260       17.8%

[-d31]
num-thread    real     vs num-threads 0
----------------------------------------
  0           11.492      100.0%
  1            8.572       74.6%
  2            4.928       42.9%
  4            3.115       27.1%
  8            2.259       19.7% 
===

How about this approach?
Could you test this?

===
diff --git a/makedumpfile.c b/makedumpfile.c
index 8a0c636..c697f93 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -7521,7 +7521,7 @@ kdump_thread_function_cyclic(void *arg) {
 	 * unfiltered zero page will only take a page_flag_buf
 	 * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
 	 */
-	while (pfn < kdump_thread_args->end_pfn) {
+	while (1) {
 		buf_ready = FALSE;
 
 		while (page_data_buf[index].used != FALSE ||
@@ -7532,35 +7532,46 @@ kdump_thread_function_cyclic(void *arg) {
 
 		while (buf_ready == FALSE) {
 			pthread_testcancel();
-			if (page_flag_buf->ready == FLAG_READY)
+			pthread_mutex_lock(&page_flag_buf->mutex);
+			if (page_flag_buf->ready == FLAG_READY) {
+				pthread_mutex_unlock(&page_flag_buf->mutex);
 				continue;
+			}
 
-			/* get next pfn */
+			/* get next dumpable pfn */
 			pthread_mutex_lock(&info->current_pfn_mutex);
 			pfn = info->current_pfn;
-			info->current_pfn++;
-			page_flag_buf->ready = FLAG_FILLING;
-			pthread_mutex_unlock(&info->current_pfn_mutex);
-
-			page_flag_buf->pfn = pfn;
+			while(1) {
+				if (pfn >= kdump_thread_args->end_pfn) {
+					pthread_mutex_unlock(&info->current_pfn_mutex);
+					page_flag_buf->pfn = pfn;
+					page_flag_buf->ready = FLAG_READY;
+					pthread_mutex_unlock(&page_flag_buf->mutex);
+					page_data_buf[index].used = FALSE;
+					pthread_mutex_unlock(&page_data_buf[index].mutex);
+					goto finish;
+				}
+				dumpable = is_dumpable(
+					info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
+					pfn, cycle);
+				if (dumpable)
+					break;
 
-			if (pfn >= kdump_thread_args->end_pfn) {
-				page_data_buf[index].used = FALSE;
-				page_flag_buf->ready = FLAG_READY;
-				break;
+				pfn++;
 			}
-
-			dumpable = is_dumpable(
-				info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
-				pfn,
-				cycle);
-			if (!dumpable)
-				continue;
+			info->current_pfn = pfn + 1;
+			pthread_mutex_unlock(&info->current_pfn_mutex);
+			page_flag_buf->pfn = pfn;
+			page_flag_buf->ready = FLAG_FILLING;
 
 			if (!read_pfn_parallel(fd_memory, pfn, buf,
 					       &bitmap_memory_parallel,
-					       mmap_cache))
+					       mmap_cache)) {
+					pthread_mutex_unlock(&page_flag_buf->mutex);
+					page_data_buf[index].used = FALSE;
+					pthread_mutex_unlock(&page_data_buf[index].mutex);
 					goto fail;
+			}
 
 			filter_data_buffer_parallel(buf, pfn_to_paddr(pfn),
 							info->page_size,
@@ -7626,12 +7637,12 @@ kdump_thread_function_cyclic(void *arg) {
 			buf_ready = TRUE;
 next:
 			page_flag_buf->ready = FLAG_READY;
+			pthread_mutex_unlock(&page_flag_buf->mutex);
 			page_flag_buf = page_flag_buf->next;
-
 		}
-
 		pthread_mutex_unlock(&page_data_buf[index].mutex);
 	}
+finish:
 	retval = NULL;
 
 fail:
@@ -7658,15 +7669,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 	mdf_pfn_t start_pfn, end_pfn;
 	struct page_desc pd;
 	struct timeval tv_start;
-	struct timeval last, new;
 	pthread_t **threads = NULL;
 	struct thread_args *kdump_thread_args = NULL;
 	void *thread_result;
 	int page_buf_num;
 	struct page_data *page_data_buf = NULL;
+	struct page_flag *page_flag_buf = NULL;
 	int i;
 	int index;
-	int end_count, consuming, check_count;
+	int end_count, consuming, check_count, prevlocked;
 	mdf_pfn_t current_pfn, temp_pfn;
 
 	if (info->flag_elf_dumpfile)
@@ -7728,6 +7739,19 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 	}
 
 	for (i = 0; i < info->num_threads; i++) {
+		for (page_flag_buf = info->page_flag_buf[i];
+		     page_flag_buf->next != info->page_flag_buf[i];
+		     page_flag_buf = page_flag_buf->next) {
+			page_flag_buf->ready = FLAG_UNUSED;
+			page_flag_buf->pfn   = start_pfn;
+			res = pthread_mutex_init(&page_flag_buf->mutex, NULL);
+			if (res != 0) {
+				ERRMSG("Can't initialize mutex of page_flag_buf. %s\n",
+						strerror(res));
+				goto out;
+			}
+		}
+
 		kdump_thread_args[i].thread_num = i;
 		kdump_thread_args[i].len_buf_out = len_buf_out;
 		kdump_thread_args[i].start_pfn = start_pfn;
@@ -7749,9 +7773,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 
 	end_count = 0;
 	while (1) {
-		consuming = 0;
-		check_count = 0;
-
 		/*
 		 * The basic idea is producer producing page and consumer writing page.
 		 * Each producer have a page_flag_buf list which is used for storing page's description.
@@ -7760,8 +7781,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 		 * The main thread is the consumer. It will find the next pfn and write it into file.
 		 * The next pfn is smallest pfn in all page_flag_buf.
 		 */
-		gettimeofday(&last, NULL);
-		while (1) {
+		do {
+			consuming = prevlocked = -1;
+			check_count = 0;
 			current_pfn = end_pfn;
 
 			/*
@@ -7772,32 +7794,36 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 			 * current_pfn is used for recording the value of pfn when checking the pfn.
 			 */
 			for (i = 0; i < info->num_threads; i++) {
+				pthread_mutex_lock(&info->page_flag_buf[i]->mutex);
 				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
-					continue;
+					goto next;
 				temp_pfn = info->page_flag_buf[i]->pfn;
 
 				/*
 				 * count how many threads have reached the end.
 				 */
 				if (temp_pfn >= end_pfn) {
-					/*
-					 * prevent setting FLAG_UNUSED being optimized.
-					 */
-					MSG("-");
-
 					info->page_flag_buf[i]->ready = FLAG_UNUSED;
-
-					info->current_pfn = end_pfn;
 					end_count++;
-					continue;
+					goto next;
 				}
 
 				if (current_pfn < temp_pfn)
-					continue;
+					goto next;
 
 				check_count++;
+				prevlocked = consuming;
 				consuming = i;
 				current_pfn = temp_pfn;
+next:
+				/*
+				 * Keep mutex of page_flag which has minimam pfn
+				 */
+				if (consuming != i) {            // unlock page_flag which is not minimum pfn
+					pthread_mutex_unlock(&info->page_flag_buf[i]->mutex);
+				} else if (prevlocked != -1) {   // unlock page_flag which previously locked
+					pthread_mutex_unlock(&info->page_flag_buf[prevlocked]->mutex);
+				}
 			}
 
 			/*
@@ -7805,40 +7831,18 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 			 */
 			if (end_count >= info->num_threads)
 				goto finish;
-
-			/*
-			 * Since it has the probabilty that there is no page_flag_buf being ready,
-			 * we should recheck if it happens.
-			 */
-			if (check_count == 0)
-				continue;
-
-			/*
-			 * If the page_flag_buf is not ready, the pfn recorded may be changed.
-			 * So we should recheck.
-			 */
-			if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
-				gettimeofday(&new, NULL);
-				if (new.tv_sec - last.tv_sec > WAIT_TIME) {
-					ERRMSG("Can't get data of pfn.\n");
-					goto out;
-				}
-				continue;
-			}
-
-			if (current_pfn == info->page_flag_buf[consuming]->pfn)
-				break;
-		}
+		} while (check_count == 0);
 
 		if ((num_dumped % per) == 0)
 			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
 
 		num_dumped++;
 
-
 		if (info->page_flag_buf[consuming]->zero == TRUE) {
-			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
+			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) {
+				pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
 				goto out;
+			}
 			pfn_zero++;
 		} else {
 			index = info->page_flag_buf[consuming]->index;
@@ -7850,16 +7854,21 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 			/*
 			 * Write the page header.
 			 */
-			if (!write_cache(cd_header, &pd, sizeof(page_desc_t)))
+			if (!write_cache(cd_header, &pd, sizeof(page_desc_t))) {
+				pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
 				goto out;
+			}
 			/*
 			 * Write the page data.
 			 */
-			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
+			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) {
+				pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
 				goto out;
+			}
 			page_data_buf[index].used = FALSE;
 		}
 		info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
+		pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
 		info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
 	}
 finish:
@@ -7906,6 +7915,14 @@ out:
 		}
 	}
 
+	for (i = 0; i < info->num_threads; i++) {
+		for (page_flag_buf = info->page_flag_buf[i];
+		     page_flag_buf->next != info->page_flag_buf[i];
+		     page_flag_buf = page_flag_buf->next) {
+			pthread_mutex_destroy(&page_flag_buf->mutex);
+		}
+	}
+
 	pthread_rwlock_destroy(&info->usemmap_rwlock);
 	pthread_mutex_destroy(&info->filter_mutex);
 	pthread_mutex_destroy(&info->consumed_pfn_mutex);
diff --git a/makedumpfile.h b/makedumpfile.h
index 80ce23a..188ec17 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -971,7 +971,6 @@ typedef unsigned long long int ulonglong;
  */
 
 #define PAGE_DATA_NUM	(50)
-#define WAIT_TIME	(60 * 10)
 #define PTHREAD_FAIL	((void *)-2)
 #define NUM_BUFFERS	(20)
 
@@ -987,6 +986,7 @@ enum {
 	FLAG_FILLING
 };
 struct page_flag {
+	pthread_mutex_t mutex;
 	mdf_pfn_t pfn;
 	char zero;
 	char ready;
===


Thanks
Minoru Usui
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH v2] Improve the performance of --num-threads -d 31
  2016-03-04  0:59                   ` Minoru Usui
@ 2016-03-04  4:17                     ` Minfei Huang
  0 siblings, 0 replies; 26+ messages in thread
From: Minfei Huang @ 2016-03-04  4:17 UTC (permalink / raw)
  To: Minoru Usui; +Cc: "Zhou, Wenjian/周文剑", kexec

Hi, Minoru.

Sure. I will test this patch.

Thanks
Minfei

On 03/04/16 at 12:59am, Minoru Usui wrote:
> Hi, Zhou, Minfei
> 
> > -----Original Message-----
> > From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of Minfei Huang
> > Sent: Wednesday, February 24, 2016 11:25 AM
> > To: "Zhou, Wenjian/周文剑" <zhouwj-fnst@cn.fujitsu.com>
> > Cc: kexec@lists.infradead.org
> > Subject: Re: [PATCH v2] Improve the performance of --num-threads -d 31
> > 
> > 
> > > On Feb 24, 2016, at 10:20, Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> wrote:
> > >
> > > Hi,
> > >
> > > On 02/24/2016 09:43 AM, Minfei Huang wrote:
> > >> On 02/23/16 at 01:47pm, "Zhou, Wenjian/周文剑" wrote:
> > >>> Hello, Minfei,
> > >>>
> > >>> Does it occur every time?
> > >>> If not, I think I have known the reason.
> > >>
> > >> Hi, Wenjian.
> > >>
> > >> This patch is applied directly on version 1.5.9. And makedumpfile hangs
> > >> if option num-thread is appended.
> > >>
> > >
> > > I see.
> > > I'm working on it.
> > >
> > > BTW, did you only test it with --num-threads 128?
> > > How is it with --num-threads 32(or smaller value)?
> > 
> > Yes. I have tested with num-thread 1, 2, 8 and 32. Except for —num-threads 1,
> > all of the test fail.
> 
> I wrote an RFC patch which fixed this problem.
> This patch is incremental one from the Zhou's original v2 patch.
> 
> I think the problem was caused by page_flag_buf race between consumer and producers.
> So, I add the mutex to each page_flag_buf.
> Change summary is as follows.
> 
>   * Add mutex to each page_flag_buf.
>   * Producer:
>       - Keeping page_flag_buf mutex until producer finish writing all data to page_flag_buf. 
>       - Fix the issue that info->current_pfn becomes larger than end_pfn.
>       - Keeping info->current_pfn_mutex until producer gets dumpable pfn.
>   * Consumer:
>       - Keeping page_flag_buf mutex until consumer finish writing all data to page_flag_buf. 
> 
> The result of my test in 5GB dumpfile is following.
> 
> ===
> Common Option: -c --cyclic-buffer=10 --num-threads
> 
> [-d0]
> num-thread    real     vs num-threads 0
> ----------------------------------------
>   0          209.621      100.0%
>   1          217.921      104.0%
>   2           84.539       40.3%
>   4           51.787       24.7%
>   8           37.260       17.8%
> 
> [-d31]
> num-thread    real     vs num-threads 0
> ----------------------------------------
>   0           11.492      100.0%
>   1            8.572       74.6%
>   2            4.928       42.9%
>   4            3.115       27.1%
>   8            2.259       19.7% 
> ===
> 
> How about this approach?
> Could you test this?
> 
> ===
> diff --git a/makedumpfile.c b/makedumpfile.c
> index 8a0c636..c697f93 100644
> --- a/makedumpfile.c
> +++ b/makedumpfile.c
> @@ -7521,7 +7521,7 @@ kdump_thread_function_cyclic(void *arg) {
>  	 * unfiltered zero page will only take a page_flag_buf
>  	 * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
>  	 */
> -	while (pfn < kdump_thread_args->end_pfn) {
> +	while (1) {
>  		buf_ready = FALSE;
>  
>  		while (page_data_buf[index].used != FALSE ||
> @@ -7532,35 +7532,46 @@ kdump_thread_function_cyclic(void *arg) {
>  
>  		while (buf_ready == FALSE) {
>  			pthread_testcancel();
> -			if (page_flag_buf->ready == FLAG_READY)
> +			pthread_mutex_lock(&page_flag_buf->mutex);
> +			if (page_flag_buf->ready == FLAG_READY) {
> +				pthread_mutex_unlock(&page_flag_buf->mutex);
>  				continue;
> +			}
>  
> -			/* get next pfn */
> +			/* get next dumpable pfn */
>  			pthread_mutex_lock(&info->current_pfn_mutex);
>  			pfn = info->current_pfn;
> -			info->current_pfn++;
> -			page_flag_buf->ready = FLAG_FILLING;
> -			pthread_mutex_unlock(&info->current_pfn_mutex);
> -
> -			page_flag_buf->pfn = pfn;
> +			while(1) {
> +				if (pfn >= kdump_thread_args->end_pfn) {
> +					pthread_mutex_unlock(&info->current_pfn_mutex);
> +					page_flag_buf->pfn = pfn;
> +					page_flag_buf->ready = FLAG_READY;
> +					pthread_mutex_unlock(&page_flag_buf->mutex);
> +					page_data_buf[index].used = FALSE;
> +					pthread_mutex_unlock(&page_data_buf[index].mutex);
> +					goto finish;
> +				}
> +				dumpable = is_dumpable(
> +					info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
> +					pfn, cycle);
> +				if (dumpable)
> +					break;
>  
> -			if (pfn >= kdump_thread_args->end_pfn) {
> -				page_data_buf[index].used = FALSE;
> -				page_flag_buf->ready = FLAG_READY;
> -				break;
> +				pfn++;
>  			}
> -
> -			dumpable = is_dumpable(
> -				info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
> -				pfn,
> -				cycle);
> -			if (!dumpable)
> -				continue;
> +			info->current_pfn = pfn + 1;
> +			pthread_mutex_unlock(&info->current_pfn_mutex);
> +			page_flag_buf->pfn = pfn;
> +			page_flag_buf->ready = FLAG_FILLING;
>  
>  			if (!read_pfn_parallel(fd_memory, pfn, buf,
>  					       &bitmap_memory_parallel,
> -					       mmap_cache))
> +					       mmap_cache)) {
> +					pthread_mutex_unlock(&page_flag_buf->mutex);
> +					page_data_buf[index].used = FALSE;
> +					pthread_mutex_unlock(&page_data_buf[index].mutex);
>  					goto fail;
> +			}
>  
>  			filter_data_buffer_parallel(buf, pfn_to_paddr(pfn),
>  							info->page_size,
> @@ -7626,12 +7637,12 @@ kdump_thread_function_cyclic(void *arg) {
>  			buf_ready = TRUE;
>  next:
>  			page_flag_buf->ready = FLAG_READY;
> +			pthread_mutex_unlock(&page_flag_buf->mutex);
>  			page_flag_buf = page_flag_buf->next;
> -
>  		}
> -
>  		pthread_mutex_unlock(&page_data_buf[index].mutex);
>  	}
> +finish:
>  	retval = NULL;
>  
>  fail:
> @@ -7658,15 +7669,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  	mdf_pfn_t start_pfn, end_pfn;
>  	struct page_desc pd;
>  	struct timeval tv_start;
> -	struct timeval last, new;
>  	pthread_t **threads = NULL;
>  	struct thread_args *kdump_thread_args = NULL;
>  	void *thread_result;
>  	int page_buf_num;
>  	struct page_data *page_data_buf = NULL;
> +	struct page_flag *page_flag_buf = NULL;
>  	int i;
>  	int index;
> -	int end_count, consuming, check_count;
> +	int end_count, consuming, check_count, prevlocked;
>  	mdf_pfn_t current_pfn, temp_pfn;
>  
>  	if (info->flag_elf_dumpfile)
> @@ -7728,6 +7739,19 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  	}
>  
>  	for (i = 0; i < info->num_threads; i++) {
> +		for (page_flag_buf = info->page_flag_buf[i];
> +		     page_flag_buf->next != info->page_flag_buf[i];
> +		     page_flag_buf = page_flag_buf->next) {
> +			page_flag_buf->ready = FLAG_UNUSED;
> +			page_flag_buf->pfn   = start_pfn;
> +			res = pthread_mutex_init(&page_flag_buf->mutex, NULL);
> +			if (res != 0) {
> +				ERRMSG("Can't initialize mutex of page_flag_buf. %s\n",
> +						strerror(res));
> +				goto out;
> +			}
> +		}
> +
>  		kdump_thread_args[i].thread_num = i;
>  		kdump_thread_args[i].len_buf_out = len_buf_out;
>  		kdump_thread_args[i].start_pfn = start_pfn;
> @@ -7749,9 +7773,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  
>  	end_count = 0;
>  	while (1) {
> -		consuming = 0;
> -		check_count = 0;
> -
>  		/*
>  		 * The basic idea is producer producing page and consumer writing page.
>  		 * Each producer have a page_flag_buf list which is used for storing page's description.
> @@ -7760,8 +7781,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  		 * The main thread is the consumer. It will find the next pfn and write it into file.
>  		 * The next pfn is smallest pfn in all page_flag_buf.
>  		 */
> -		gettimeofday(&last, NULL);
> -		while (1) {
> +		do {
> +			consuming = prevlocked = -1;
> +			check_count = 0;
>  			current_pfn = end_pfn;
>  
>  			/*
> @@ -7772,32 +7794,36 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  			 * current_pfn is used for recording the value of pfn when checking the pfn.
>  			 */
>  			for (i = 0; i < info->num_threads; i++) {
> +				pthread_mutex_lock(&info->page_flag_buf[i]->mutex);
>  				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
> -					continue;
> +					goto next;
>  				temp_pfn = info->page_flag_buf[i]->pfn;
>  
>  				/*
>  				 * count how many threads have reached the end.
>  				 */
>  				if (temp_pfn >= end_pfn) {
> -					/*
> -					 * prevent setting FLAG_UNUSED being optimized.
> -					 */
> -					MSG("-");
> -
>  					info->page_flag_buf[i]->ready = FLAG_UNUSED;
> -
> -					info->current_pfn = end_pfn;
>  					end_count++;
> -					continue;
> +					goto next;
>  				}
>  
>  				if (current_pfn < temp_pfn)
> -					continue;
> +					goto next;
>  
>  				check_count++;
> +				prevlocked = consuming;
>  				consuming = i;
>  				current_pfn = temp_pfn;
> +next:
> +				/*
> +				 * Keep mutex of page_flag which has minimam pfn
> +				 */
> +				if (consuming != i) {            // unlock page_flag which is not minimum pfn
> +					pthread_mutex_unlock(&info->page_flag_buf[i]->mutex);
> +				} else if (prevlocked != -1) {   // unlock page_flag which previously locked
> +					pthread_mutex_unlock(&info->page_flag_buf[prevlocked]->mutex);
> +				}
>  			}
>  
>  			/*
> @@ -7805,40 +7831,18 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  			 */
>  			if (end_count >= info->num_threads)
>  				goto finish;
> -
> -			/*
> -			 * Since it has the probabilty that there is no page_flag_buf being ready,
> -			 * we should recheck if it happens.
> -			 */
> -			if (check_count == 0)
> -				continue;
> -
> -			/*
> -			 * If the page_flag_buf is not ready, the pfn recorded may be changed.
> -			 * So we should recheck.
> -			 */
> -			if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
> -				gettimeofday(&new, NULL);
> -				if (new.tv_sec - last.tv_sec > WAIT_TIME) {
> -					ERRMSG("Can't get data of pfn.\n");
> -					goto out;
> -				}
> -				continue;
> -			}
> -
> -			if (current_pfn == info->page_flag_buf[consuming]->pfn)
> -				break;
> -		}
> +		} while (check_count == 0);
>  
>  		if ((num_dumped % per) == 0)
>  			print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
>  
>  		num_dumped++;
>  
> -
>  		if (info->page_flag_buf[consuming]->zero == TRUE) {
> -			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
> +			if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) {
> +				pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
>  				goto out;
> +			}
>  			pfn_zero++;
>  		} else {
>  			index = info->page_flag_buf[consuming]->index;
> @@ -7850,16 +7854,21 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>  			/*
>  			 * Write the page header.
>  			 */
> -			if (!write_cache(cd_header, &pd, sizeof(page_desc_t)))
> +			if (!write_cache(cd_header, &pd, sizeof(page_desc_t))) {
> +				pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
>  				goto out;
> +			}
>  			/*
>  			 * Write the page data.
>  			 */
> -			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
> +			if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) {
> +				pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
>  				goto out;
> +			}
>  			page_data_buf[index].used = FALSE;
>  		}
>  		info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
> +		pthread_mutex_unlock(&info->page_flag_buf[consuming]->mutex);
>  		info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
>  	}
>  finish:
> @@ -7906,6 +7915,14 @@ out:
>  		}
>  	}
>  
> +	for (i = 0; i < info->num_threads; i++) {
> +		for (page_flag_buf = info->page_flag_buf[i];
> +		     page_flag_buf->next != info->page_flag_buf[i];
> +		     page_flag_buf = page_flag_buf->next) {
> +			pthread_mutex_destroy(&page_flag_buf->mutex);
> +		}
> +	}
> +
>  	pthread_rwlock_destroy(&info->usemmap_rwlock);
>  	pthread_mutex_destroy(&info->filter_mutex);
>  	pthread_mutex_destroy(&info->consumed_pfn_mutex);
> diff --git a/makedumpfile.h b/makedumpfile.h
> index 80ce23a..188ec17 100644
> --- a/makedumpfile.h
> +++ b/makedumpfile.h
> @@ -971,7 +971,6 @@ typedef unsigned long long int ulonglong;
>   */
>  
>  #define PAGE_DATA_NUM	(50)
> -#define WAIT_TIME	(60 * 10)
>  #define PTHREAD_FAIL	((void *)-2)
>  #define NUM_BUFFERS	(20)
>  
> @@ -987,6 +986,7 @@ enum {
>  	FLAG_FILLING
>  };
>  struct page_flag {
> +	pthread_mutex_t mutex;
>  	mdf_pfn_t pfn;
>  	char zero;
>  	char ready;
> ===
> 
> 
> Thanks
> Minoru Usui

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2016-03-04  4:14 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-17  7:05 [PATCH v2] Improve the performance of --num-threads -d 31 Zhou Wenjian
2016-02-22 16:58 ` Minfei Huang
2016-02-23  5:26   ` Minfei Huang
2016-02-23  5:47     ` "Zhou, Wenjian/周文剑"
2016-02-24  1:43       ` Minfei Huang
2016-02-24  2:20         ` "Zhou, Wenjian/周文剑"
2016-02-24  2:24           ` Minfei Huang
2016-03-01  6:59             ` "Zhou, Wenjian/周文剑"
2016-03-01  8:16               ` Minfei Huang
2016-03-02 10:25                 ` Minfei Huang
2016-03-04  0:59                   ` Minoru Usui
2016-03-04  4:17                     ` Minfei Huang
2016-03-01  7:20             ` "Zhou, Wenjian/周文剑"
2016-03-01  8:17               ` Minfei Huang
2016-02-23  1:32 ` Minoru Usui
2016-02-23  3:45   ` "Zhou, Wenjian/周文?"
2016-02-23  8:00     ` Minoru Usui
2016-02-23  8:29       ` "Zhou, Wenjian/周文?"
2016-02-24  0:45         ` Minoru Usui
2016-03-01  7:49           ` "Zhou, Wenjian/周文?"
2016-03-02  3:05             ` Minoru Usui
2016-03-02  3:16               ` Minoru Usui
2016-03-02  3:59               ` "Zhou, Wenjian/周文?"
2016-03-02  6:23                 ` Minoru Usui
2016-02-24  8:13 ` Minoru Usui
2016-03-01  7:34   ` "Zhou, Wenjian/周文?"

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.