From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail203.messagelabs.com (mail203.messagelabs.com [216.82.254.243]) by kanga.kvack.org (Postfix) with SMTP id AF7FC62008B for ; Wed, 7 Apr 2010 22:57:13 -0400 (EDT) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [PATCH 53 of 67] Export unusable free space index via /proc/unusable_index Message-Id: <2929ff46bf9c71ab001b.1270691496@v2.random> In-Reply-To: References: Date: Thu, 08 Apr 2010 03:51:36 +0200 From: Andrea Arcangeli Sender: owner-linux-mm@kvack.org To: linux-mm@kvack.org, Andrew Morton Cc: Marcelo Tosatti , Adam Litke , Avi Kivity , Izik Eidus , Hugh Dickins , Nick Piggin , Rik van Riel , Mel Gorman , Dave Hansen , Benjamin Herrenschmidt , Ingo Molnar , Mike Travis , KAMEZAWA Hiroyuki , Christoph Lameter , Chris Wright , bpicco@redhat.com, KOSAKI Motohiro , Balbir Singh , Arnd Bergmann , "Michael S. Tsirkin" , Peter Zijlstra , Johannes Weiner , Daisuke Nishimura , Chris Mason List-ID: From: Mel Gorman Unusable free space index is a measure of external fragmentation that takes the allocation size into account. For the most part, the huge page size will be the size of interest but not necessarily so it is exported on a per-order and per-zone basis via /proc/unusable_index. The index is a value between 0 and 1. It can be expressed as a percentage by multiplying by 100 as documented in Documentation/filesystems/proc.txt. Signed-off-by: Mel Gorman Reviewed-by: Minchan Kim Reviewed-by: KOSAKI Motohiro Acked-by: Rik van Riel Reviewed-by: Christoph Lameter --- diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -451,6 +451,7 @@ Table 1-5: Kernel info in /proc sys See chapter 2 sysvipc Info of SysVIPC Resources (msg, sem, shm) (2.4) tty Info of tty drivers + unusable_index Additional page allocator information (see text)(2.5) uptime System uptime version Kernel version video bttv info of video resources (2.4) @@ -604,7 +605,7 @@ ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in Z available in ZONE_NORMAL, etc... More information relevant to external fragmentation can be found in -pagetypeinfo. +pagetypeinfo and unusable_index > cat /proc/pagetypeinfo Page block order: 9 @@ -645,6 +646,16 @@ unless memory has been mlock()'d. Some o also be allocatable although a lot of filesystem metadata may have to be reclaimed to achieve this. +> cat /proc/unusable_index +Node 0, zone DMA 0.000 0.000 0.000 0.001 0.005 0.013 0.021 0.037 0.037 0.101 0.230 +Node 0, zone Normal 0.000 0.000 0.000 0.001 0.002 0.002 0.005 0.015 0.028 0.028 0.054 + +The unusable free space index measures how much of the available free +memory cannot be used to satisfy an allocation of a given size and is a +value between 0 and 1. The higher the value, the more of free memory is +unusable and by implication, the worse the external fragmentation is. This +can be expressed as a percentage by multiplying by 100. + .............................................................................. meminfo: diff --git a/mm/vmstat.c b/mm/vmstat.c --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -454,6 +454,106 @@ static int frag_show(struct seq_file *m, return 0; } + +struct contig_page_info { + unsigned long free_pages; + unsigned long free_blocks_total; + unsigned long free_blocks_suitable; +}; + +/* + * Calculate the number of free pages in a zone, how many contiguous + * pages are free and how many are large enough to satisfy an allocation of + * the target size. Note that this function makes no attempt to estimate + * how many suitable free blocks there *might* be if MOVABLE pages were + * migrated. Calculating that is possible, but expensive and can be + * figured out from userspace + */ +static void fill_contig_page_info(struct zone *zone, + unsigned int suitable_order, + struct contig_page_info *info) +{ + unsigned int order; + + info->free_pages = 0; + info->free_blocks_total = 0; + info->free_blocks_suitable = 0; + + for (order = 0; order < MAX_ORDER; order++) { + unsigned long blocks; + + /* Count number of free blocks */ + blocks = zone->free_area[order].nr_free; + info->free_blocks_total += blocks; + + /* Count free base pages */ + info->free_pages += blocks << order; + + /* Count the suitable free blocks */ + if (order >= suitable_order) + info->free_blocks_suitable += blocks << + (order - suitable_order); + } +} + +/* + * Return an index indicating how much of the available free memory is + * unusable for an allocation of the requested size. + */ +static int unusable_free_index(unsigned int order, + struct contig_page_info *info) +{ + /* No free memory is interpreted as all free memory is unusable */ + if (info->free_pages == 0) + return 1000; + + /* + * Index should be a value between 0 and 1. Return a value to 3 + * decimal places. + * + * 0 => no fragmentation + * 1 => high fragmentation + */ + return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages); + +} + +static void unusable_show_print(struct seq_file *m, + pg_data_t *pgdat, struct zone *zone) +{ + unsigned int order; + int index; + struct contig_page_info info; + + seq_printf(m, "Node %d, zone %8s ", + pgdat->node_id, + zone->name); + for (order = 0; order < MAX_ORDER; ++order) { + fill_contig_page_info(zone, order, &info); + index = unusable_free_index(order, &info); + seq_printf(m, "%d.%03d ", index / 1000, index % 1000); + } + + seq_putc(m, '\n'); +} + +/* + * Display unusable free space index + * XXX: Could be a lot more efficient, but it's not a critical path + */ +static int unusable_show(struct seq_file *m, void *arg) +{ + pg_data_t *pgdat = (pg_data_t *)arg; + + /* check memoryless node */ + if (!node_state(pgdat->node_id, N_HIGH_MEMORY)) + return 0; + + walk_zones_in_node(m, pgdat, unusable_show_print); + + return 0; +} + static void pagetypeinfo_showfree_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone) { @@ -604,6 +704,25 @@ static const struct file_operations page .release = seq_release, }; +static const struct seq_operations unusable_op = { + .start = frag_start, + .next = frag_next, + .stop = frag_stop, + .show = unusable_show, +}; + +static int unusable_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &unusable_op); +} + +static const struct file_operations unusable_file_ops = { + .open = unusable_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + #ifdef CONFIG_ZONE_DMA #define TEXT_FOR_DMA(xx) xx "_dma", #else @@ -951,6 +1070,7 @@ static int __init setup_vmstat(void) #ifdef CONFIG_PROC_FS proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); + proc_create("unusable_index", S_IRUGO, NULL, &unusable_file_ops); proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); #endif -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org