All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] e2freefrag utility
@ 2009-07-21  0:17 Andreas Dilger
  2009-07-22  7:43 ` Theodore Tso
  0 siblings, 1 reply; 24+ messages in thread
From: Andreas Dilger @ 2009-07-21  0:17 UTC (permalink / raw)
  To: Theodore Ts'o; +Cc: linux-ext4

[-- Attachment #1: Type: text/plain, Size: 441 bytes --]

Attached is the e2freefrag tool.  It grabs the block bitmaps, creates
buddy bitmaps from them and displays the total/free chunks (default
1MB chunk size), and a histogram of free space.

It could probably be enhanced to print the chunk sizes based on the
RAID chunk size stored in the superblock, but I just thought of that
this minute...

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


[-- Attachment #2: e2fsprogs-e2freefrag.patch --]
[-- Type: text/plain, Size: 14337 bytes --]

Index: e2fsprogs-1.41.4/misc/e2freefrag.8.in
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ e2fsprogs-1.41.4/misc/e2freefrag.8.in	2009-04-21 13:24:09.000000000 -0600
@@ -0,0 +1,99 @@
+.\" -*- nroff -*-
+.TH E2FREEFRAG 8
+.SH NAME
+e2freefrag \- report free space fragmentation information
+.SH SYNOPSIS
+.B e2freefrag
+[
+.B \-c chunk_kb
+]
+[
+.B \-h
+]
+.B filesys
+
+.SH DESCRIPTION
+.B e2freefrag
+is used to report free space fragmentation on ext2/3/4 file systems.
+.I filesys
+is the filesystem device name (e.g.
+.IR /dev/hdc1 ", " /dev/md0 ).
+The
+.B e2freefrag
+program will scan the block bitmap information to check how many free blocks
+are present as contiguous and aligned free space. The percentage of contiguous
+free blocks of size and of alignment
+.IR chunk_kb
+is reported.  It also displays the minimum/maximum/average free chunk size in
+the filesystem, along with a histogram of all free chunks.  This information
+can be used to gauge the level of free space fragmentation in the filesystem.
+.SH OPTIONS
+.TP
+.BI \-c " chunk_kb"
+Desired size of chunk. It is specified in units of kilobytes (KB). If no
+.I chunk_kb
+is specified on the command line, then the default value is 1024KB.
+.TP
+.BI \-h
+Print the usage of the program.
+.SH EXAMPLE
+# e2freefrag /dev/vgroot/lvhome
+.br
+Device: /dev/vgroot/lvhome
+.br
+Blocksize: 4096 bytes
+.br
+Total blocks: 5120710
+.br
+Free blocks: 831744 (16.2%)
+.br
+Chunk size: 1048576 bytes (256 blocks)
+.br
+Total chunks: 20003
+.br
+Free chunks: 2174 (10.9%)
+.br
+
+Min free chunk: 4 KB
+.br
+Max free chunk: 24576 KB
+.br
+Avg. free chunk: 340 KB
+.br
+
+HISTOGRAM OF FREE CHUNK SIZES:
+.br
+          Range         Free chunks
+.br
+    4K...    8K- :        2824
+.br
+    8K...   16K- :        1760
+.br
+   16K...   32K- :        1857
+.br
+   32K...   64K- :        1003
+.br
+   64K...  128K- :         616
+.br
+  128K...  256K- :         479
+.br
+  256K...  512K- :         302
+.br
+  512K... 1024K- :         238
+.br
+    1M...    2M- :         213
+.br
+    2M...    4M- :         173
+.br
+    4M...    8M- :         287
+.br
+    8M...   16M- :           4
+.br
+   16M...   32M- :           1
+.SH AUTHOR
+This version of e2freefrag was written by Rupesh Thakare, and modified by
+Andreas Dilger <adilger@sun.com>, and Kalpak Shah.
+.SH SEE ALSO
+.IR debugfs (8),
+.IR dumpe2fs (8),
+.IR e2fsck (8)
Index: e2fsprogs-1.41.4/misc/e2freefrag.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ e2fsprogs-1.41.4/misc/e2freefrag.c	2009-04-21 13:18:30.000000000 -0600
@@ -0,0 +1,275 @@
+/*
+ * e2freefrag - report filesystem free-space fragmentation
+ *
+ * Copyright (C) 2009 Sun Microsystems, Inc.
+ *
+ * Author: Rupesh Thakare <rupesh@sun.com>
+ *         Andreas Dilger <adilger@sun.com>
+ *
+ * %Begin-Header%
+ * This file may be redistributed under the terms of the GNU Public
+ * License version 2.
+ * %End-Header%
+ */
+#include <stdio.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#else
+extern char *optarg;
+extern int optind;
+#endif
+
+#include "ext2fs/ext2_fs.h"
+#include "ext2fs/ext2fs.h"
+#include "e2freefrag.h"
+
+void usage(const char *prog)
+{
+	fprintf(stderr, "usage: %s [-c chunksize in kb] [-h] "
+		"device_name\n", prog);
+	exit(1);
+}
+
+static int ul_log2(unsigned long arg)
+{
+        int     l = 0;
+
+        arg >>= 1;
+        while (arg) {
+                l++;
+                arg >>= 1;
+        }
+        return l;
+}
+
+void init_chunk_info(ext2_filsys fs, struct chunk_info *info)
+{
+	int i;
+
+	info->chunkbits = ul_log2(info->chunkbytes);
+	info->blocksize_bits = ul_log2((unsigned long)fs->blocksize);
+	info->blks_in_chunk = info->chunkbytes >> info->blocksize_bits;
+
+	info->min = ~0UL;
+	info->max = info->avg = 0;
+	info->real_free_chunks = 0;
+
+	for (i = 0; i < MAX_HIST; i++)
+		info->histogram.fc_buckets[i] = 0;
+}
+
+void scan_block_bitmap(ext2_filsys fs, struct chunk_info *info)
+{
+	unsigned long long blocks_count = fs->super->s_blocks_count;
+	unsigned long long chunks = (blocks_count + info->blks_in_chunk) >>
+				(info->chunkbits - info->blocksize_bits);
+	unsigned long long chunk_num;
+	unsigned long last_chunk_size = 0;
+	unsigned long long chunk_start_blk = 0;
+
+	for (chunk_num = 0; chunk_num < chunks; chunk_num++) {
+		unsigned long long blk, num_blks;
+		int chunk_free;
+
+		/* Last chunk may be smaller */
+		if (chunk_start_blk + info->blks_in_chunk > blocks_count)
+			num_blks = blocks_count - chunk_start_blk;
+		else
+			num_blks = info->blks_in_chunk;
+
+		chunk_free = 0;
+
+		/* Initialize starting block for first chunk correctly else
+		 * there is a segfault when blocksize = 1024 in which case
+		 * block_map->start = 1 */
+		for (blk = (chunk_num == 0 ? fs->super->s_first_data_block : 0);
+		     blk < num_blks; blk++, chunk_start_blk++) {
+			int used = ext2fs_fast_test_block_bitmap(fs->block_map,
+							       chunk_start_blk);
+			if (!used) {
+				last_chunk_size++;
+				chunk_free++;
+			}
+
+			if (used && last_chunk_size != 0) {
+				unsigned long index;
+
+				index = ul_log2(last_chunk_size) + 1;
+				info->histogram.fc_buckets[index]++;
+
+				if (last_chunk_size > info->max)
+					info->max = last_chunk_size;
+				if (last_chunk_size < info->min)
+					info->min = last_chunk_size;
+				info->avg += last_chunk_size;
+
+				info->real_free_chunks++;
+				last_chunk_size = 0;
+			}
+		}
+
+		if (chunk_free == info->blks_in_chunk)
+			info->free_chunks++;
+	}
+}
+
+errcode_t get_chunk_info(ext2_filsys fs, struct chunk_info *info)
+{
+	unsigned long total_chunks;
+	char *unitp = "KMGTPEZY";
+	int units = 10;
+	unsigned long start = 0, end, cum;
+	int i, retval = 0;
+
+	scan_block_bitmap(fs, info);
+
+	printf("Total blocks: %lu\nFree blocks: %lu (%0.1f%%)\n",
+	       fs->super->s_blocks_count, fs->super->s_free_blocks_count,
+	       (double)fs->super->s_free_blocks_count * 100 /
+						fs->super->s_blocks_count);
+
+	printf("\nChunksize: %u bytes (%u blocks)\n",
+	       info->chunkbytes, info->blks_in_chunk);
+	total_chunks = (fs->super->s_blocks_count + info->blks_in_chunk) >>
+                                       (info->chunkbits - info->blocksize_bits);
+	printf("Total chunks: %lu\nFree chunks: %lu (%0.1f%%)\n",
+	       total_chunks, info->free_chunks,
+	       (double)info->free_chunks * 100 / total_chunks);
+
+	/* Display chunk information in KB */
+	if (info->real_free_chunks) {
+		info->min = (info->min * fs->blocksize) >> 10;
+		info->max = (info->max * fs->blocksize) >> 10;
+		info->avg = (info->avg / info->real_free_chunks *
+			     fs->blocksize) >> 10;
+	} else {
+		info->min = 0;
+	}
+
+	printf("\nMin free chunk: %lu KB \nMax free chunk: %lu KB\n"
+	       "Avg free chunk: %lu KB\n", info->min, info->max, info->avg);
+
+	printf("\nHISTOGRAM OF FREE CHUNK SIZES:\n");
+	printf("%s\t%10s\n", "Chunk Size Range :", "Free chunks");
+	for (i = 0; i < MAX_HIST; i++) {
+		end = 1 << (i + info->blocksize_bits - units);
+		if (info->histogram.fc_buckets[i] != 0)
+			printf("%5lu%c...%5lu%c- :  %10lu\n", start, *unitp,
+			       end, *unitp, info->histogram.fc_buckets[i]);
+		start = end;
+		if (start == 1<<10) {
+			start = 1;
+			units += 10;
+			unitp++;
+		}
+	}
+
+	return retval;
+}
+
+void close_device(char *device_name, ext2_filsys fs)
+{
+	int retval = ext2fs_close(fs);
+
+	if (retval)
+		com_err(device_name, retval, "while closing the filesystem.\n");
+}
+
+void collect_info(ext2_filsys fs, struct chunk_info *chunk_info)
+{
+	unsigned int retval = 0, i, free_blks;
+
+	printf("Device: %s\n", fs->device_name);
+	printf("Blocksize: %u bytes\n", fs->blocksize);
+
+	retval = ext2fs_read_block_bitmap(fs);
+	if (retval) {
+		com_err(fs->device_name, retval, "while reading block bitmap");
+		close_device(fs->device_name, fs);
+		exit(1);
+	}
+
+	init_chunk_info(fs, chunk_info);
+
+	retval = get_chunk_info(fs, chunk_info);
+	if (retval) {
+		com_err(fs->device_name, retval, "while collecting chunk info");
+                close_device(fs->device_name, fs);
+		exit(1);
+	}
+}
+
+void open_device(char *device_name, ext2_filsys *fs)
+{
+	int retval;
+	int flag = EXT2_FLAG_FORCE;
+
+	retval = ext2fs_open(device_name, flag, 0, 0, unix_io_manager, fs);
+	if (retval) {
+		com_err(device_name, retval, "while opening filesystem");
+		exit(1);
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct chunk_info chunk_info = { .chunkbytes = DEFAULT_CHUNKSIZE };
+	errcode_t retval = 0;
+	ext2_filsys fs = NULL;
+	char *device_name;
+	char *progname;
+	char c, *end;
+
+	progname = argv[0];
+
+	while ((c = getopt(argc, argv, "c:h")) != EOF) {
+		switch (c) {
+		case 'c':
+			chunk_info.chunkbytes = strtoull(optarg, &end, 0);
+			if (*end != '\0') {
+				fprintf(stderr, "%s: bad chunk size '%s'\n",
+					progname, optarg);
+				usage(progname);
+			}
+			if (chunk_info.chunkbytes &
+			    (chunk_info.chunkbytes - 1)) {
+				fprintf(stderr, "%s: chunk size must be a "
+					"power of 2.");
+				usage(progname);
+			}
+			chunk_info.chunkbytes *= 1024;
+			break;
+		default:
+			fprintf(stderr, "%s: bad option '%c'\n",
+				progname, c);
+		case 'h':
+			usage(progname);
+			break;
+		}
+	}
+
+	if (optind == argc) {
+		fprintf(stderr, "%s: missing device name.\n", progname);
+		usage(progname);
+	}
+
+	device_name = argv[optind];
+
+	open_device(device_name, &fs);
+
+	if (chunk_info.chunkbytes < fs->blocksize) {
+		fprintf(stderr, "%s: chunksize must be greater than or equal "
+			"to filesystem blocksize.\n", progname);
+		exit(1);
+	}
+	collect_info(fs, &chunk_info);
+	close_device(device_name, fs);
+
+	return retval;
+}
Index: e2fsprogs-1.41.4/misc/e2freefrag.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ e2fsprogs-1.41.4/misc/e2freefrag.h	2009-04-21 13:19:48.000000000 -0600
@@ -0,0 +1,19 @@
+#include <sys/types.h>
+
+#define DEFAULT_CHUNKSIZE (1024*1024)
+
+#define MAX_HIST	32
+struct free_chunk_histogram {
+	unsigned long fc_buckets[MAX_HIST];
+};
+
+struct chunk_info {
+	unsigned long chunkbytes;	/* chunk size in bytes */
+	int chunkbits;			/* chunk size in bits */
+	unsigned long free_chunks;	/* total free chunks of given size */
+	unsigned long real_free_chunks; /* free chunks of any size */
+	int blocksize_bits;		/* fs blocksize in bits */
+	int blks_in_chunk;		/* number of blocks in a chunk */
+	unsigned long min, max, avg;	/* chunk size stats */
+	struct free_chunk_histogram histogram; /* histogram of all chunk sizes*/
+};
Index: e2fsprogs-1.41.4/e2fsprogs.spec.in
===================================================================
--- e2fsprogs-1.41.4.orig/e2fsprogs.spec.in	2009-04-14 05:56:43.000000000 -0600
+++ e2fsprogs-1.41.4/e2fsprogs.spec.in	2009-04-21 13:25:19.000000000 -0600
@@ -143,6 +143,7 @@
 %{_root_sbindir}/tune2fs
 %{_sbindir}/filefrag
 %{_sbindir}/mklost+found
+%{_sbindir}/e2freefrag
 
 %{_root_libdir}/libblkid.so.*
 %{_root_libdir}/libcom_err.so.*
@@ -187,6 +188,7 @@
 %{_mandir}/man8/resize2fs.8*
 %{_mandir}/man8/tune2fs.8*
 %{_mandir}/man8/filefrag.8*
+%{_mandir}/man8/e2freefrag.8*
 
 %files devel
 %defattr(-,root,root)
Index: e2fsprogs-1.41.4/misc/Makefile.in
===================================================================
--- e2fsprogs-1.41.4.orig/misc/Makefile.in	2009-04-14 05:56:43.000000000 -0600
+++ e2fsprogs-1.41.4/misc/Makefile.in	2009-04-14 06:09:57.000000000 -0600
@@ -19,10 +19,10 @@
 
 SPROGS=		mke2fs badblocks tune2fs dumpe2fs $(BLKID_PROG) logsave \
 			$(E2IMAGE_PROG) @FSCK_PROG@ e2undo
-USPROGS=	mklost+found filefrag $(UUIDD_PROG)
+USPROGS=	mklost+found filefrag e2freefrag $(UUIDD_PROG)
 SMANPAGES=	tune2fs.8 mklost+found.8 mke2fs.8 dumpe2fs.8 badblocks.8 \
 			e2label.8 $(FINDFS_MAN) $(BLKID_MAN) $(E2IMAGE_MAN) \
-			logsave.8 filefrag.8 e2undo.8 $(UUIDD_MAN) @FSCK_MAN@
+			logsave.8 filefrag.8 e2freefrag.8 e2undo.8 $(UUIDD_MAN) @FSCK_MAN@
 FMANPAGES=	mke2fs.conf.5
 
 UPROGS=		chattr lsattr uuidgen
@@ -44,6 +44,7 @@
 BLKID_OBJS=	blkid.o
 FILEFRAG_OBJS=	filefrag.o
 E2UNDO_OBJS=  e2undo.o
+E2FREEFRAG_OBJS= e2freefrag.o
 
 PROFILED_TUNE2FS_OBJS=	profiled/tune2fs.o profiled/util.o
 PROFILED_MKLPF_OBJS=	profiled/mklost+found.o
@@ -71,7 +72,7 @@
 		$(srcdir)/uuidgen.c $(srcdir)/blkid.c $(srcdir)/logsave.c \
 		$(srcdir)/filefrag.c $(srcdir)/base_device.c \
 		$(srcdir)/ismounted.c $(srcdir)/../e2fsck/profile.c \
-		$(srcdir)/e2undo.c
+		$(srcdir)/e2undo.c $(srcdir)/e2freefrag.c
 
 LIBS= $(LIBEXT2FS) $(LIBCOM_ERR) 
 DEPLIBS= $(LIBEXT2FS) $(LIBCOM_ERR) 
@@ -276,6 +277,10 @@
 	@echo "	LD $@"
 	@$(CC) $(ALL_LDFLAGS) -g -pg -o logsave.profiled profiled/logsave.o
 
+e2freefrag: $(E2FREEFRAG_OBJS)
+	@echo "LD $@"
+	@$(CC) $(ALL_LDFLAGS) -o e2freefrag $(E2FREEFRAG_OBJS) $(LIBS)
+
 filefrag: $(FILEFRAG_OBJS)
 	@echo "	LD $@"
 	@$(CC) $(ALL_LDFLAGS) -o filefrag $(FILEFRAG_OBJS) 
@@ -361,6 +366,10 @@
 	@echo "	SUBST $@"
 	@$(SUBSTITUTE_UPTIME) $(srcdir)/blkid.1.in blkid.1 
 
+e2freefrag.8: $(DEP_SUBSTITUTE) $(srcdir)/e2freefrag.8.in
+	@echo "	SUBST $@"
+	@$(SUBSTITUTE_UPTIME) $(srcdir)/e2freefrag.8.in e2freefrag.8
+
 filefrag.8: $(DEP_SUBSTITUTE) $(srcdir)/filefrag.8.in
 	@echo "	SUBST $@"
 	@$(SUBSTITUTE_UPTIME) $(srcdir)/filefrag.8.in filefrag.8
@@ -522,7 +531,7 @@
 clean:
 	$(RM) -f $(SPROGS) $(USPROGS) $(UPROGS) $(UMANPAGES) $(SMANPAGES) \
 		$(FMANPAGES) \
-		base_device base_device.out mke2fs.static filefrag \
+		base_device base_device.out mke2fs.static filefrag e2freefrag \
 		e2initrd_helper partinfo prof_err.[ch] default_profile.c \
 		uuidd e2image tune2fs.static tst_ismounted fsck.profiled \
 		blkid.profiled tune2fs.profiled e2image.profiled \
@@ -603,6 +612,9 @@
 blkid.o: $(srcdir)/blkid.c $(top_srcdir)/lib/blkid/blkid.h \
  $(top_builddir)/lib/blkid/blkid_types.h
 logsave.o: $(srcdir)/logsave.c
+e2freefrag.o: $(srcdir)/e2freefrag.c e2freefrag.h \
+ $(top_srcdir)/lib/ext2fs/ext2_fs.h $(top_srcdir)/lib/ext2fs/ext2fs.h \
+ $(top_srcdir)/lib/ext2fs/bitops.h
 filefrag.o: $(srcdir)/filefrag.c
 base_device.o: $(srcdir)/base_device.c $(srcdir)/fsck.h
 ismounted.o: $(srcdir)/ismounted.c $(top_srcdir)/lib/et/com_err.h

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] e2freefrag utility
  2009-07-21  0:17 [PATCH] e2freefrag utility Andreas Dilger
@ 2009-07-22  7:43 ` Theodore Tso
  2009-07-23  4:59   ` Eric Sandeen
  0 siblings, 1 reply; 24+ messages in thread
From: Theodore Tso @ 2009-07-22  7:43 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: linux-ext4

On Mon, Jul 20, 2009 at 06:17:50PM -0600, Andreas Dilger wrote:
> Attached is the e2freefrag tool.  It grabs the block bitmaps, creates
> buddy bitmaps from them and displays the total/free chunks (default
> 1MB chunk size), and a histogram of free space.
> 
> It could probably be enhanced to print the chunk sizes based on the
> RAID chunk size stored in the superblock, but I just thought of that
> this minute...

Thanks, checked in with some minor changes to fix some printf
warnings.

Here's the output on my root filesystem (which has been in use since
February):

Device: /dev/ssd/root
Blocksize: 4096 bytes
Total blocks: 18350080
Free blocks: 10774142 (58.7%)

Chunksize: 1048576 bytes (256 blocks)
Total chunks: 71681
Free chunks: 21792 (30.4%)

Min free chunk: 4 KB 
Max free chunk: 568232 KB
Avg free chunk: 184 KB

HISTOGRAM OF FREE CHUNK SIZES:
Chunk Size Range :	Free chunks
    4K...    8K- :       35005
    8K...   16K- :       33639
   16K...   32K- :       31419
   32K...   64K- :       33953
   64K...  128K- :       26397
  128K...  256K- :        7314
  256K...  512K- :        1855
  512K... 1024K- :        1612
    1M...    2M- :        1160
    2M...    4M- :         567
    4M...    8M- :         303
    8M...   16M- :         106
   16M...   32M- :          40
   32M...   64M- :          51
   64M...  128M- :         123
  128M...  256M- :           8
  512M... 1024M- :           1

Yeah.... pretty fragmented.   :-(

						- Ted

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] e2freefrag utility
  2009-07-22  7:43 ` Theodore Tso
@ 2009-07-23  4:59   ` Eric Sandeen
  2009-07-23 13:45     ` How to fix up mballoc Theodore Tso
  2009-07-23 17:07     ` [PATCH] e2freefrag utility Andreas Dilger
  0 siblings, 2 replies; 24+ messages in thread
From: Eric Sandeen @ 2009-07-23  4:59 UTC (permalink / raw)
  To: Theodore Tso; +Cc: Andreas Dilger, linux-ext4

Theodore Tso wrote:
...

> Here's the output on my root filesystem (which has been in use since
> February):
> 
> Device: /dev/ssd/root
> Blocksize: 4096 bytes
> Total blocks: 18350080
> Free blocks: 10774142 (58.7%)
> 
> Chunksize: 1048576 bytes (256 blocks)
> Total chunks: 71681
> Free chunks: 21792 (30.4%)
> 
> Min free chunk: 4 KB 
> Max free chunk: 568232 KB
> Avg free chunk: 184 KB
> 
> HISTOGRAM OF FREE CHUNK SIZES:
> Chunk Size Range :	Free chunks
>     4K...    8K- :       35005
>     8K...   16K- :       33639
>    16K...   32K- :       31419
>    32K...   64K- :       33953
>    64K...  128K- :       26397
>   128K...  256K- :        7314
>   256K...  512K- :        1855
>   512K... 1024K- :        1612
>     1M...    2M- :        1160
>     2M...    4M- :         567
>     4M...    8M- :         303
>     8M...   16M- :         106
>    16M...   32M- :          40
>    32M...   64M- :          51
>    64M...  128M- :         123
>   128M...  256M- :           8
>   512M... 1024M- :           1
> 
> Yeah.... pretty fragmented.   :-(
> 


Just for comparison, here's a 30G xfs root that has run for a year or
two, currently about 70% full:

xfs_db> freesp -s
   from      to extents  blocks    pct
      1       1    1849    1849   0.08
      2       3    1383    3293   0.14
      4       7    1034    5429   0.23
      8      15    1061   12260   0.53
     16      31     641   13261   0.57
     32      63     355   15601   0.67
     64     127     221   19940   0.86
    128     255     195   35841   1.54
    256     511     173   63066   2.71
    512    1023     122   89824   3.86
   1024    2047      51   70032   3.01
   2048    4095      22   60982   2.62
   4096    8191      20  116580   5.01
   8192   16383      10  109896   4.72
  16384   32767       7  152026   6.53
  32768   65535       4  206283   8.87
  65536  131071       3  285744  12.28
 262144  524287       1  509811  21.91
 524288 1048575       1  554838  23.85
total free extents 7153
total free blocks 2326556
average free extent size 325.256

from...to units are in 4k blocks.

Maybe the fancy ext4 defragger will have a good second use case in
cleaning up some of that freespace fragmentation.

-Eric

^ permalink raw reply	[flat|nested] 24+ messages in thread

* How to fix up mballoc
  2009-07-23  4:59   ` Eric Sandeen
@ 2009-07-23 13:45     ` Theodore Tso
  2009-07-23 17:43       ` Eric Sandeen
  2009-07-23 17:51       ` Mingming Cao
  2009-07-23 17:07     ` [PATCH] e2freefrag utility Andreas Dilger
  1 sibling, 2 replies; 24+ messages in thread
From: Theodore Tso @ 2009-07-23 13:45 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: Andreas Dilger, linux-ext4

So I started looking to see how we might be able to improve mballoc to
avoid freespace fragmentation, and I came up with the following high
level design.  Does this look sane?   Have I overlooked anything?

1) In ext4_mb_normalize_request(), if the inode that we are allocating
does not have any open file descriptors for write (i.e., it's already
closed and we're allocating via delalloc) _and_ the inode was
previously opened with O_CREAT and without O_APPEND (checked via a
flag in EXT4_I(inode)), then do not normalize the size to a power of
two, but rather to the filesystem blocksize.

The idea here is that we should be trying to find an exact fit, since
most of the time (except for log files, which get appended; hence the
O_CREAT && !O_APPEND test) once a file is written, that is probably
the final size for the file.  So normalizing the size for the
preallocation area to a power of two will be counterproductive for
most files.

2) If the there has been less than X files opened in Y jiffies the
parent directory (using the dentry path used to open the file), then
do not set EXT4_MB_HINT_GROUP_ALLOC in ext4_mb_group_or_file().  We
can simulate this for without creating this patch to test #1 by
setting mb_stream_request to 0 (which should completely disable group
preallocation).

						- Ted

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] e2freefrag utility
  2009-07-23  4:59   ` Eric Sandeen
  2009-07-23 13:45     ` How to fix up mballoc Theodore Tso
@ 2009-07-23 17:07     ` Andreas Dilger
  2009-07-23 17:18       ` Eric Sandeen
  2009-07-24 22:32       ` Theodore Tso
  1 sibling, 2 replies; 24+ messages in thread
From: Andreas Dilger @ 2009-07-23 17:07 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: Theodore Tso, linux-ext4

[-- Attachment #1: Type: text/plain, Size: 1263 bytes --]

On Jul 22, 2009  23:59 -0500, Eric Sandeen wrote:
> Theodore Tso wrote:
> > Here's the output on my root filesystem (which has been in use since
> > February):
> > 
> > Total chunks: 71681
> > Free chunks: 21792 (30.4%)
> > 
> > Min free chunk: 4 KB 
> > Max free chunk: 568232 KB
> > Avg free chunk: 184 KB
> > 
> > HISTOGRAM OF FREE CHUNK SIZES:
> > Chunk Size Range :	Free chunks
> >     4K...    8K- :       35005
> >     8K...   16K- :       33639
:
:
> >   128M...  256M- :           8
> >   512M... 1024M- :           1
> > 
> > Yeah.... pretty fragmented.   :-(
> > 
> 
> 
> Just for comparison, here's a 30G xfs root that has run for a year or
> two, currently about 70% full:
> 
> xfs_db> freesp -s
>    from      to extents  blocks    pct
>       1       1    1849    1849   0.08
>       2       3    1383    3293   0.14
:
:
>  262144  524287       1  509811  21.91
>  524288 1048575       1  554838  23.85
> total free extents 7153
> total free blocks 2326556
> average free extent size 325.256

I like the printing of the total blocks in each section and the
percent of blocks...  Attached is an incremental patch that adds
the same to e2freefrag.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


[-- Attachment #2: e2freefrag-pct.diff --]
[-- Type: text/plain, Size: 4786 bytes --]

--- ./misc/e2freefrag.c	2009-07-23 11:02:38.000000000 -0600
+++ ./misc/e2freefrag.c.new	2009-07-23 10:52:22.000000000 -0600
@@ -60,8 +60,10 @@ void init_chunk_info(ext2_filsys fs, str
 	info->max = info->avg = 0;
 	info->real_free_chunks = 0;
 
-	for (i = 0; i < MAX_HIST; i++)
-		info->histogram.fc_buckets[i] = 0;
+	for (i = 0; i < MAX_HIST; i++) {
+		info->histogram.fc_chunks[i] = 0;
+		info->histogram.fc_blocks[i] = 0;
+	}
 }
 
 void scan_block_bitmap(ext2_filsys fs, struct chunk_info *info)
@@ -101,7 +103,9 @@ void scan_block_bitmap(ext2_filsys fs, s
 				unsigned long index;
 
 				index = ul_log2(last_chunk_size) + 1;
-				info->histogram.fc_buckets[index]++;
+				info->histogram.fc_chunks[index]++;
+				info->histogram.fc_blocks[index] +=
+							last_chunk_size;
 
 				if (last_chunk_size > info->max)
 					info->max = last_chunk_size;
@@ -137,7 +141,7 @@ errcode_t get_chunk_info(ext2_filsys fs,
 	printf("\nChunksize: %u bytes (%u blocks)\n",
 	       info->chunkbytes, info->blks_in_chunk);
 	total_chunks = (fs->super->s_blocks_count + info->blks_in_chunk) >>
-                                       (info->chunkbits - info->blocksize_bits);
+				(info->chunkbits - info->blocksize_bits);
 	printf("Total chunks: %lu\nFree chunks: %lu (%0.1f%%)\n",
 	       total_chunks, info->free_chunks,
 	       (double)info->free_chunks * 100 / total_chunks);
@@ -156,12 +160,17 @@ errcode_t get_chunk_info(ext2_filsys fs,
 	       "Avg free chunk: %lu KB\n", info->min, info->max, info->avg);
 
 	printf("\nHISTOGRAM OF FREE CHUNK SIZES:\n");
-	printf("%15s\t\t%10s\n", "Range", "Free chunks");
+	printf("%s :  %12s  %12s  %7s\n", "Chunk Size Range", "Free chunks",
+	       "Free Blocks", "Percent");
 	for (i = 0; i < MAX_HIST; i++) {
 		end = 1 << (i + info->blocksize_bits - units);
-		if (info->histogram.fc_buckets[i] != 0)
-			printf("%5lu%c...%5lu%c- :  %10lu\n", start, *unitp,
-			       end, *unitp, info->histogram.fc_buckets[i]);
+		if (info->histogram.fc_chunks[i] != 0)
+			printf("%5lu%c...%5lu%c- :  %12lu  %12lu  %6.1f%%\n",
+			       start, *unitp, end, *unitp,
+			       info->histogram.fc_chunks[i],
+			       info->histogram.fc_blocks[i],
+			       (double)info->histogram.fc_blocks[i] * 100 /
+			       fs->super->s_free_blocks_count);
 		start = end;
 		if (start == 1<<10) {
 			start = 1;
--- ./misc/e2freefrag.h	2009-07-23 11:02:38.000000000 -0600
+++ ./misc/e2freefrag.h.new	2009-07-23 10:52:26.000000000 -0600
@@ -4,7 +4,8 @@
 
 #define MAX_HIST	32
 struct free_chunk_histogram {
-	unsigned long fc_buckets[MAX_HIST];
+	unsigned long fc_chunks[MAX_HIST];
+	unsigned long fc_blocks[MAX_HIST];
 };
 
 struct chunk_info {
--- ./misc/e2freefrag.8.in	2009-07-23 11:02:38.000000000 -0600
+++ ./misc/e2freefrag.8.in.new	2009-07-23 11:05:13.000000000 -0600
@@ -44,53 +44,53 @@ is specified on the command line, then t
 .br
 Blocksize: 4096 bytes
 .br
-Total blocks: 5120710
+Total blocks: 1504085
 .br
-Free blocks: 831744 (16.2%)
+Free blocks: 292995 (19.5%)
 .br
 Chunk size: 1048576 bytes (256 blocks)
 .br
-Total chunks: 20003
+Total chunks: 5876
 .br
-Free chunks: 2174 (10.9%)
+Free chunks: 463 (7.9%)
 .br
 
 Min free chunk: 4 KB
 .br
-Max free chunk: 24576 KB
+Max free chunk: 24008 KB
 .br
-Avg. free chunk: 340 KB
+Avg free chunk: 252 KB
 .br
 
 HISTOGRAM OF FREE CHUNK SIZES:
 .br
-          Range         Free chunks
+Chunk Size Range :   Free chunks   Free Blocks  Percent
 .br
-    4K...    8K- :        2824
+    4K...    8K- :           704           704     0.2%
 .br
-    8K...   16K- :        1760
+    8K...   16K- :           810          1979     0.7%
 .br
-   16K...   32K- :        1857
+   16K...   32K- :           843          4467     1.5%
 .br
-   32K...   64K- :        1003
+   32K...   64K- :           579          6263     2.1%
 .br
-   64K...  128K- :         616
+   64K...  128K- :           493         11067     3.8%
 .br
-  128K...  256K- :         479
+  128K...  256K- :           394         18097     6.2%
 .br
-  256K...  512K- :         302
+  256K...  512K- :           281         25477     8.7%
 .br
-  512K... 1024K- :         238
+  512K... 1024K- :           253         44914    15.3%
 .br
-    1M...    2M- :         213
+    1M...    2M- :           143         51897    17.7%
 .br
-    2M...    4M- :         173
+    2M...    4M- :            73         50683    17.3%
 .br
-    4M...    8M- :         287
+    4M...    8M- :            37         52417    17.9%
 .br
-    8M...   16M- :           4
+    8M...   16M- :             7         19028     6.5%
 .br
-   16M...   32M- :           1
+   16M...   32M- :             1          6002     2.0%
 .SH AUTHOR
 This version of e2freefrag was written by Rupesh Thakare, and modified by
 Andreas Dilger <adilger@sun.com>, and Kalpak Shah.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] e2freefrag utility
  2009-07-23 17:07     ` [PATCH] e2freefrag utility Andreas Dilger
@ 2009-07-23 17:18       ` Eric Sandeen
  2009-07-24 22:32       ` Theodore Tso
  1 sibling, 0 replies; 24+ messages in thread
From: Eric Sandeen @ 2009-07-23 17:18 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Theodore Tso, linux-ext4

Andreas Dilger wrote:

> I like the printing of the total blocks in each section and the
> percent of blocks...  Attached is an incremental patch that adds
> the same to e2freefrag.

Thanks, I was going to suggest that :)

-Eric

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: How to fix up mballoc
  2009-07-23 13:45     ` How to fix up mballoc Theodore Tso
@ 2009-07-23 17:43       ` Eric Sandeen
  2009-07-24  0:23         ` Theodore Tso
  2009-07-23 17:51       ` Mingming Cao
  1 sibling, 1 reply; 24+ messages in thread
From: Eric Sandeen @ 2009-07-23 17:43 UTC (permalink / raw)
  To: Theodore Tso; +Cc: Andreas Dilger, linux-ext4

Theodore Tso wrote:
> So I started looking to see how we might be able to improve mballoc to
> avoid freespace fragmentation, and I came up with the following high
> level design.  Does this look sane?   Have I overlooked anything?
> 
> 1) In ext4_mb_normalize_request(), if the inode that we are allocating
> does not have any open file descriptors for write (i.e., it's already
> closed and we're allocating via delalloc) _and_ the inode was
> previously opened with O_CREAT and without O_APPEND (checked via a
> flag in EXT4_I(inode)), then do not normalize the size to a power of
> two, but rather to the filesystem blocksize.
> 
> The idea here is that we should be trying to find an exact fit, since
> most of the time (except for log files, which get appended; hence the
> O_CREAT && !O_APPEND test) once a file is written, that is probably
> the final size for the file.  So normalizing the size for the
> preallocation area to a power of two will be counterproductive for
> most files.

I'm sort of woefully ignorant of a lot of the mballoc stuff.

When you say once a file is written that's probably the final size... do
you mean when writes are done and it's closed, or when the first write
to the file is complete?

I think an awful lot of normal cases write to a file in sub-file-sized
chunks (think mp3 or flac encoding, file downloading, etc).

Also, I get the !O_APPEND test, but is O_CREAT necessary?  I wonder how
much of a hint that really gives us.

> 2) If the there has been less than X files opened in Y jiffies the
> parent directory (using the dentry path used to open the file), then
> do not set EXT4_MB_HINT_GROUP_ALLOC in ext4_mb_group_or_file().  We
> can simulate this for without creating this patch to test #1 by
> setting mb_stream_request to 0 (which should completely disable group
> preallocation).

Hm have to try hard to parse that ;)  But that sounds reasonable I think.

I'm talking to the Fedora infrastructure folks to see if there's a way
to recreate snapshots of, say, the F10 repos from initial release to
today, to be able to sort of fast-forward root filesystem updates.  It'd
be a nice way to do accelerated aging tests for any changes we make, at
least for one usecase ...

-Eric

> 						- Ted

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: How to fix up mballoc
  2009-07-23 13:45     ` How to fix up mballoc Theodore Tso
  2009-07-23 17:43       ` Eric Sandeen
@ 2009-07-23 17:51       ` Mingming Cao
  2009-07-24  0:43         ` Theodore Tso
  1 sibling, 1 reply; 24+ messages in thread
From: Mingming Cao @ 2009-07-23 17:51 UTC (permalink / raw)
  To: Theodore Tso; +Cc: Eric Sandeen, Andreas Dilger, linux-ext4

Theodore Tso wrote:
> So I started looking to see how we might be able to improve mballoc to
> avoid freespace fragmentation, and I came up with the following high
> level design.  Does this look sane?   Have I overlooked anything?
>
> 1) In ext4_mb_normalize_request(), if the inode that we are allocating
> does not have any open file descriptors for write (i.e., it's already
> closed and we're allocating via delalloc) _and_ the inode was
> previously opened with O_CREAT and without O_APPEND (checked via a
> flag in EXT4_I(inode)), then do not normalize the size to a power of
> two, but rather to the filesystem blocksize.
>
> The idea here is that we should be trying to find an exact fit, since
> most of the time (except for log files, which get appended; hence the
> O_CREAT && !O_APPEND test) once a file is written, that is probably
> the final size for the file.  So normalizing the size for the
> preallocation area to a power of two will be counterproductive for
> most files.
>
>   
I am trying to understand what user cases prefer normalize allocation 
request size? If they are uncommon cases, perhaps
we should disable the normalize the allocation size disabled by default, 
unless the apps opens the files with O_APPEND?
> 2) If the there has been less than X files opened in Y jiffies the
> parent directory (using the dentry path used to open the file), then
> do not set EXT4_MB_HINT_GROUP_ALLOC in ext4_mb_group_or_file().  We
> can simulate this for without creating this patch to test #1 by
> setting mb_stream_request to 0 (which should completely disable group
> preallocation).
>
> 						- Ted
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>   



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: How to fix up mballoc
  2009-07-23 17:43       ` Eric Sandeen
@ 2009-07-24  0:23         ` Theodore Tso
  2009-07-24  2:18           ` Eric Sandeen
  2009-07-24  2:30           ` Andreas Dilger
  0 siblings, 2 replies; 24+ messages in thread
From: Theodore Tso @ 2009-07-24  0:23 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: Andreas Dilger, linux-ext4

On Thu, Jul 23, 2009 at 12:43:47PM -0500, Eric Sandeen wrote:
> > 1) In ext4_mb_normalize_request(), if the inode that we are allocating
> > does not have any open file descriptors for write (i.e., it's already
> > closed and we're allocating via delalloc) _and_ the inode was
> > previously opened with O_CREAT and without O_APPEND (checked via a
> > flag in EXT4_I(inode)), then do not normalize the size to a power of
> > two, but rather to the filesystem blocksize.
> 
> I'm sort of woefully ignorant of a lot of the mballoc stuff.
> 
> When you say once a file is written that's probably the final size... do
> you mean when writes are done and it's closed, or when the first write
> to the file is complete?
> 
> I think an awful lot of normal cases write to a file in sub-file-sized
> chunks (think mp3 or flac encoding, file downloading, etc).

I meant when the writes are done and the files are closed; hence my
proposal that we do this do #1 above only if there are no open file
descriptors for write.  That is, if the file can be written and closed
by the userspace process before any delayed allocation blocks are
attempted to be written by the filesystem, we can probably safely
assume that the file won't grown in size later on.

> Also, I get the !O_APPEND test, but is O_CREAT necessary?  I wonder how
> much of a hint that really gives us.

Well, it probably should be O_CREAT || O_TRUNC.  The basic idea here is
to distinguish between a file which gets appended to via syslog, or
via a mail delivery program that writes 4k of data to the end of a
mail spool file.  In some cases, such as the mail delivery program, it
might not use O_APPEND, but instead it might lock the file, seek to
end of the file, and then right the 4k worth of e-mail.  So if the
file wasn't freshly created (or truncated) at the last open, maybe we
should use a more aggressive preallocation --- and in the case of
/var/mail spool delivery, perhaps the preallocation should persist
beyond the file getting closed.  (In the future we might want to have
some hueristics where if we notice that the pattern of file writes is
a repeated open, write-causing-block-allocation, close, maybe we
should do some kind of block reservation style scheme while the
filesystem is mounted and the inode stays in the inode cache.)

	      	      	      	    	     - Ted

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: How to fix up mballoc
  2009-07-23 17:51       ` Mingming Cao
@ 2009-07-24  0:43         ` Theodore Tso
  0 siblings, 0 replies; 24+ messages in thread
From: Theodore Tso @ 2009-07-24  0:43 UTC (permalink / raw)
  To: Mingming Cao; +Cc: Eric Sandeen, Andreas Dilger, linux-ext4

On Thu, Jul 23, 2009 at 10:51:58AM -0700, Mingming Cao wrote:
> I am trying to understand what user cases prefer normalize allocation  
> request size? If they are uncommon cases, perhaps
> we should disable the normalize the allocation size disabled by default,  
> unless the apps opens the files with O_APPEND?

The case where we would want to round the allocation size up would be
if we are writing a large file (say, like a large mp3 or mpeg4 file),
which takes a while for the audio/video encoder to write out the
blocks.   In that case, doing file-based preallocation is a good thing.

Normally, if we are doing block allocations for files greater than 16
blocks (i.e, 64k), we use file-based preallocation.  Otherwise we use
block group allocations.  The problem with using block group
allocations is that way it works is that first time we try to allocate
out of a block group, we try to find a free extent which is 512 blocks
long.  If we can't find a free extent which is 512 blocks long, we'll
try another block group.  Hence, for small files, once a block group
gets fragmented to the point where there isn't a free chunk which is
512 blocks long, we'll try to find another block group --- even if
that means finding another block group far, FAR away from the block
group where the directory is contained.

Worse yet, if we unmount and remount the filesystem, we forget the
fact that we were using a particular (now-partially filled)
preallocation group, so the next time we try to allocate space for a
small file, we will find *another* free 512 block chunk to allocate
small files.  Given that there is 32,768 blocks in block group, after
64 interations of "mount, write one 4k file in a directory, unmount",
that block group will have 64 files, each separated by 511 blocks, and
that block group will no longer have any free 512 chunks for block
allocations.  (And given that the block preallocation is per-CPU, it
becomes even worse on an SMP system.)

Put this baldly, it may be that we need to do a fundamental rethink on
how we do per-cpu, per-blockgroup preallocations for small files.
Maybe instead of trying to find a 512 extent which is completely full,
we should instead be looking for a 512 extent which has at least
mb_stream_req free blocks (i.e. by default 16 free blocks).

	      	   	  	   	      	   - Ted

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: How to fix up mballoc
  2009-07-24  0:23         ` Theodore Tso
@ 2009-07-24  2:18           ` Eric Sandeen
  2009-07-24  2:25             ` Eric Sandeen
  2009-07-24  2:30           ` Andreas Dilger
  1 sibling, 1 reply; 24+ messages in thread
From: Eric Sandeen @ 2009-07-24  2:18 UTC (permalink / raw)
  To: Theodore Tso; +Cc: Andreas Dilger, linux-ext4

Theodore Tso wrote:
> On Thu, Jul 23, 2009 at 12:43:47PM -0500, Eric Sandeen wrote:
>>> 1) In ext4_mb_normalize_request(), if the inode that we are allocating
>>> does not have any open file descriptors for write (i.e., it's already
>>> closed and we're allocating via delalloc) _and_ the inode was
>>> previously opened with O_CREAT and without O_APPEND (checked via a
>>> flag in EXT4_I(inode)), then do not normalize the size to a power of
>>> two, but rather to the filesystem blocksize.
>> I'm sort of woefully ignorant of a lot of the mballoc stuff.
>>
>> When you say once a file is written that's probably the final size... do
>> you mean when writes are done and it's closed, or when the first write
>> to the file is complete?
>>
>> I think an awful lot of normal cases write to a file in sub-file-sized
>> chunks (think mp3 or flac encoding, file downloading, etc).
> 
> I meant when the writes are done and the files are closed; hence my
> proposal that we do this do #1 above only if there are no open file
> descriptors for write.  That is, if the file can be written and closed
> by the userspace process before any delayed allocation blocks are
> attempted to be written by the filesystem, we can probably safely
> assume that the file won't grown in size later on.

Ah, ok.  Sorry, I misunderstood.  Yep, that seems reasonable.

It should probably get tested with workloads like video transcoding,
where there will be incremental writes that span many minutes or hours.

>> Also, I get the !O_APPEND test, but is O_CREAT necessary?  I wonder how
>> much of a hint that really gives us.
> 
> Well, it probably should be O_CREAT || O_TRUNC.  The basic idea here is
> to distinguish between a file which gets appended to via syslog, or
> via a mail delivery program that writes 4k of data to the end of a
> mail spool file.  In some cases, such as the mail delivery program, it
> might not use O_APPEND, but instead it might lock the file, seek to
> end of the file, and then right the 4k worth of e-mail.  So if the
> file wasn't freshly created (or truncated) at the last open, maybe we
> should use a more aggressive preallocation --- and in the case of
> /var/mail spool delivery, perhaps the preallocation should persist
> beyond the file getting closed.  (In the future we might want to have
> some hueristics where if we notice that the pattern of file writes is
> a repeated open, write-causing-block-allocation, close, maybe we
> should do some kind of block reservation style scheme while the
> filesystem is mounted and the inode stays in the inode cache.)


sounds fancy ;)

-Eric

> 	      	      	      	    	     - Ted


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: How to fix up mballoc
  2009-07-24  2:18           ` Eric Sandeen
@ 2009-07-24  2:25             ` Eric Sandeen
  0 siblings, 0 replies; 24+ messages in thread
From: Eric Sandeen @ 2009-07-24  2:25 UTC (permalink / raw)
  To: Theodore Tso; +Cc: Andreas Dilger, linux-ext4

Eric Sandeen wrote:
> Theodore Tso wrote:
>> On Thu, Jul 23, 2009 at 12:43:47PM -0500, Eric Sandeen wrote:
>>>> 1) In ext4_mb_normalize_request(), if the inode that we are allocating
>>>> does not have any open file descriptors for write (i.e., it's already
>>>> closed and we're allocating via delalloc) _and_ the inode was
>>>> previously opened with O_CREAT and without O_APPEND (checked via a
>>>> flag in EXT4_I(inode)), then do not normalize the size to a power of
>>>> two, but rather to the filesystem blocksize.
>>> I'm sort of woefully ignorant of a lot of the mballoc stuff.
>>>
>>> When you say once a file is written that's probably the final size... do
>>> you mean when writes are done and it's closed, or when the first write
>>> to the file is complete?
>>>
>>> I think an awful lot of normal cases write to a file in sub-file-sized
>>> chunks (think mp3 or flac encoding, file downloading, etc).
>> I meant when the writes are done and the files are closed; hence my
>> proposal that we do this do #1 above only if there are no open file
>> descriptors for write.  That is, if the file can be written and closed
>> by the userspace process before any delayed allocation blocks are
>> attempted to be written by the filesystem, we can probably safely
>> assume that the file won't grown in size later on.
> 
> Ah, ok.  Sorry, I misunderstood.  Yep, that seems reasonable.
> 
> It should probably get tested with workloads like video transcoding,
> where there will be incremental writes that span many minutes or hours.

Ugh right after I sent this I think I'm finally making sense of it.  :)
 In that case, come allocation time there =would= be file descriptors
open, and we'd go back to the old method of normalizing the allocation.
 You're just talking about changing things where an entire series of
file writes have come & gone, everything is closed & done, and -now-
we're allocating.

Sorry for being slow.  :)

-Eric

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: How to fix up mballoc
  2009-07-24  0:23         ` Theodore Tso
  2009-07-24  2:18           ` Eric Sandeen
@ 2009-07-24  2:30           ` Andreas Dilger
  1 sibling, 0 replies; 24+ messages in thread
From: Andreas Dilger @ 2009-07-24  2:30 UTC (permalink / raw)
  To: Theodore Tso; +Cc: Eric Sandeen, linux-ext4

On Jul 23, 2009  20:23 -0400, Theodore Ts'o wrote:
> On Thu, Jul 23, 2009 at 12:43:47PM -0500, Eric Sandeen wrote:
> > > 1) In ext4_mb_normalize_request(), if the inode that we are allocating
> > > does not have any open file descriptors for write (i.e., it's already
> > > closed and we're allocating via delalloc) _and_ the inode was
> > > previously opened with O_CREAT and without O_APPEND (checked via a
> > > flag in EXT4_I(inode)), then do not normalize the size to a power of
> > > two, but rather to the filesystem blocksize.
> > 
> > I'm sort of woefully ignorant of a lot of the mballoc stuff.
> > 
> > When you say once a file is written that's probably the final size... do
> > you mean when writes are done and it's closed, or when the first write
> > to the file is complete?
> > 
> > I think an awful lot of normal cases write to a file in sub-file-sized
> > chunks (think mp3 or flac encoding, file downloading, etc).
> 
> I meant when the writes are done and the files are closed; hence my
> proposal that we do this do #1 above only if there are no open file
> descriptors for write.  That is, if the file can be written and closed
> by the userspace process before any delayed allocation blocks are
> attempted to be written by the filesystem, we can probably safely
> assume that the file won't grown in size later on.

Right, this is a reasonable default I think.

> > Also, I get the !O_APPEND test, but is O_CREAT necessary?  I wonder how
> > much of a hint that really gives us.
> 
> Well, it probably should be O_CREAT || O_TRUNC.  The basic idea here is
> to distinguish between a file which gets appended to via syslog, or
> via a mail delivery program that writes 4k of data to the end of a
> mail spool file.  In some cases, such as the mail delivery program, it
> might not use O_APPEND, but instead it might lock the file, seek to
> end of the file, and then right the 4k worth of e-mail.  So if the
> file wasn't freshly created (or truncated) at the last open, maybe we
> should use a more aggressive preallocation --- and in the case of
> /var/mail spool delivery, perhaps the preallocation should persist
> beyond the file getting closed.  (In the future we might want to have
> some hueristics where if we notice that the pattern of file writes is
> a repeated open, write-causing-block-allocation, close, maybe we
> should do some kind of block reservation style scheme while the
> filesystem is mounted and the inode stays in the inode cache.)

I think you are on the right track with the !O_TRUNC check.  Namely,
any file which is a non-zero size and gets an extending write at a
non-zero offset should probably get some persistent preallocation
(fallocate).

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] e2freefrag utility
  2009-07-23 17:07     ` [PATCH] e2freefrag utility Andreas Dilger
  2009-07-23 17:18       ` Eric Sandeen
@ 2009-07-24 22:32       ` Theodore Tso
  2009-07-24 23:14         ` Andreas Dilger
  1 sibling, 1 reply; 24+ messages in thread
From: Theodore Tso @ 2009-07-24 22:32 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Eric Sandeen, linux-ext4

On Thu, Jul 23, 2009 at 11:07:59AM -0600, Andreas Dilger wrote:
> 
> I like the printing of the total blocks in each section and the
> percent of blocks...  Attached is an incremental patch that adds
> the same to e2freefrag.

Thanks, applied.

One question --- right now the chunksize (as specified by -c) only
affects these lines printed by e2freefrag, right?

Chunksize: 1048576 bytes (256 blocks)
Total chunks: 71681
Free chunks: 21657 (30.2%)

They are a little confusing since "chunk" as used here is different
from "chunk" used in the next part of the output:

Min free chunk: 4 KB 
Max free chunk: 568232 KB
Avg free chunk: 188 KB

How useful is it to print the "total chunks / free chunks" in the
general case?  I'm guessing this relates to Lutsre's chunking and
chunksize?  Would it make sense to only print the "Chunksize / Total
chunks / Free Chunks" if a chunksize is specified explicitly via the
-c option, and to do a s/chunk/extent/ in the next part of the output,
i.e.:

Min free extent: 4 KB 
Max free extent: 568232 KB
Avg free extent size: 188 KB

							- Ted

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] e2freefrag utility
  2009-07-24 22:32       ` Theodore Tso
@ 2009-07-24 23:14         ` Andreas Dilger
  2009-07-25  0:18           ` Theodore Tso
  0 siblings, 1 reply; 24+ messages in thread
From: Andreas Dilger @ 2009-07-24 23:14 UTC (permalink / raw)
  To: Theodore Tso; +Cc: Eric Sandeen, linux-ext4

On Jul 24, 2009  18:32 -0400, Theodore Ts'o wrote:
> One question --- right now the chunksize (as specified by -c) only
> affects these lines printed by e2freefrag, right?
> 
> Chunksize: 1048576 bytes (256 blocks)
> Total chunks: 71681
> Free chunks: 21657 (30.2%)

Yes, pretty much.

> They are a little confusing since "chunk" as used here is different
> from "chunk" used in the next part of the output:
> 
> Min free chunk: 4 KB 
> Max free chunk: 568232 KB
> Avg free chunk: 188 KB

You're right.  "free extent" is better.

> How useful is it to print the "total chunks / free chunks" in the
> general case?  I'm guessing this relates to Lutsre's chunking and
> chunksize?

Well, it was important for the hardware RAID setups, to see how many
stripe-aligned free chunks are available in the filesystem.  Since
mballoc will also try to allocate/align on "chunk" boundaries this
is useful to know.  If this chunksize depended on the superblock
s_raid_stripe_width then it would be more useful for the general public.

> Would it make sense to only print the "Chunksize / Total
> chunks / Free Chunks" if a chunksize is specified explicitly via the
> -c option, and to do a s/chunk/extent/ in the next part of the output,

I don't have a big objection.  I don't think there are any tools that
depend on this output.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] e2freefrag utility
  2009-07-24 23:14         ` Andreas Dilger
@ 2009-07-25  0:18           ` Theodore Tso
  2009-07-27 18:36             ` Andreas Dilger
  0 siblings, 1 reply; 24+ messages in thread
From: Theodore Tso @ 2009-07-25  0:18 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Eric Sandeen, linux-ext4

On Fri, Jul 24, 2009 at 05:14:25PM -0600, Andreas Dilger wrote:
> 
> Well, it was important for the hardware RAID setups, to see how many
> stripe-aligned free chunks are available in the filesystem.  Since
> mballoc will also try to allocate/align on "chunk" boundaries this
> is useful to know.  If this chunksize depended on the superblock
> s_raid_stripe_width then it would be more useful for the general public.

Yeah, I think what we should try to do is to display the chunk
information if s_raid_stripe_width if it is set, and otherwise only
print it if a chunk size is explicitly specified.  Sounds like a plan?

      	      	    	    	       - Ted

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] e2freefrag utility
  2009-07-25  0:18           ` Theodore Tso
@ 2009-07-27 18:36             ` Andreas Dilger
  2009-08-10  3:31               ` [PATCH 0/6] Patches to improve/fix e2freefrag Theodore Ts'o
                                 ` (6 more replies)
  0 siblings, 7 replies; 24+ messages in thread
From: Andreas Dilger @ 2009-07-27 18:36 UTC (permalink / raw)
  To: Theodore Tso; +Cc: Eric Sandeen, linux-ext4

On Jul 24, 2009  20:18 -0400, Theodore Ts'o wrote:
> On Fri, Jul 24, 2009 at 05:14:25PM -0600, Andreas Dilger wrote:
> > Well, it was important for the hardware RAID setups, to see how many
> > stripe-aligned free chunks are available in the filesystem.  Since
> > mballoc will also try to allocate/align on "chunk" boundaries this
> > is useful to know.  If this chunksize depended on the superblock
> > s_raid_stripe_width then it would be more useful for the general public.
> 
> Yeah, I think what we should try to do is to display the chunk
> information if s_raid_stripe_width if it is set, and otherwise only
> print it if a chunk size is explicitly specified.  Sounds like a plan?

Yes, sounds reasonable.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 0/6] Patches to improve/fix e2freefrag
  2009-07-27 18:36             ` Andreas Dilger
@ 2009-08-10  3:31               ` Theodore Ts'o
  2009-08-10  3:31               ` [PATCH 1/6] e2freefrag: Clarify e2freefrag's messages Theodore Ts'o
                                 ` (5 subsequent siblings)
  6 siblings, 0 replies; 24+ messages in thread
From: Theodore Ts'o @ 2009-08-10  3:31 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: Andreas Dilger, Theodore Ts'o

As we had discussed earlier, here are the patches to make the output of
e2freefrag either to understand.  While I was working with the program,
I found a number of bugs that caused it to behave incorrectly on file
systems with 1k block size, and to give incorrect information in some
circumstances.  These patches fix all of this bugs I was able to find.

Theodore Ts'o (6):
  e2freefrag: Clarify e2freefrag's messages
  e2freefrag: Do not print chunk-related information by default
  e2freefrag: Fix to work correctly for file systems with 1kb block
    sizes
  e2freefrag: Take into account the last free extent in the file system
  Add V=1 support when linking e2freefrag in misc/Makefile.in
  libext2fs: Treat uninitialized parts of bitmaps as unallocated

 lib/ext2fs/rw_bitmaps.c |    4 +-
 misc/Makefile.in        |    4 +-
 misc/e2freefrag.8.in    |   23 +++++------
 misc/e2freefrag.c       |   99 +++++++++++++++++++++++++++++-----------------
 tests/m_uninit/expect.1 |   48 +++++++++++-----------
 5 files changed, 100 insertions(+), 78 deletions(-)


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/6] e2freefrag: Clarify e2freefrag's messages
  2009-07-27 18:36             ` Andreas Dilger
  2009-08-10  3:31               ` [PATCH 0/6] Patches to improve/fix e2freefrag Theodore Ts'o
@ 2009-08-10  3:31               ` Theodore Ts'o
  2009-08-10  3:31               ` [PATCH 2/6] e2freefrag: Do not print chunk-related information by default Theodore Ts'o
                                 ` (4 subsequent siblings)
  6 siblings, 0 replies; 24+ messages in thread
From: Theodore Ts'o @ 2009-08-10  3:31 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: Andreas Dilger, Theodore Ts'o

"Free chunks" is confusing since it has nothing to do with the
chunksize; use "free extents" instead.

Also add a missing newline in an error message.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 misc/e2freefrag.8.in |   10 +++++-----
 misc/e2freefrag.c    |   12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/misc/e2freefrag.8.in b/misc/e2freefrag.8.in
index 9c47e97..77fadd7 100644
--- a/misc/e2freefrag.8.in
+++ b/misc/e2freefrag.8.in
@@ -54,16 +54,16 @@ Total chunks: 5876
 Free chunks: 463 (7.9%)
 .br
 
-Min free chunk: 4 KB
+Min. free extent: 4 KB
 .br
-Max free chunk: 24008 KB
+Max. free extent: 24008 KB
 .br
-Avg free chunk: 252 KB
+Avg. free extent: 252 KB
 .br
 
-HISTOGRAM OF FREE CHUNK SIZES:
+HISTOGRAM OF FREE EXTENT SIZES:
 .br
-Chunk Size Range :   Free chunks   Free Blocks  Percent
+Extent Size Range :   Free extents   Free Blocks  Percent
 .br
     4K...    8K- :           704           704     0.2%
 .br
diff --git a/misc/e2freefrag.c b/misc/e2freefrag.c
index df41853..9e7b617 100644
--- a/misc/e2freefrag.c
+++ b/misc/e2freefrag.c
@@ -156,16 +156,16 @@ errcode_t get_chunk_info(ext2_filsys fs, struct chunk_info *info)
 		info->min = 0;
 	}
 
-	printf("\nMin free chunk: %lu KB \nMax free chunk: %lu KB\n"
-	       "Avg free chunk: %lu KB\n", info->min, info->max, info->avg);
+	printf("\nMin. free extent: %lu KB \nMax. free extent: %lu KB\n"
+	       "Avg. free extent: %lu KB\n", info->min, info->max, info->avg);
 
-	printf("\nHISTOGRAM OF FREE CHUNK SIZES:\n");
-	printf("%s :  %12s  %12s  %7s\n", "Chunk Size Range", "Free chunks",
+	printf("\nHISTOGRAM OF FREE EXTENT SIZES:\n");
+	printf("%s :  %12s  %12s  %7s\n", "Extent Size Range", "Free extents",
 	       "Free Blocks", "Percent");
 	for (i = 0; i < MAX_HIST; i++) {
 		end = 1 << (i + info->blocksize_bits - units);
 		if (info->histogram.fc_chunks[i] != 0)
-			printf("%5lu%c...%5lu%c- :  %12lu  %12lu  %6.2f%%\n",
+			printf("%5lu%c...%5lu%c-  :  %12lu  %12lu  %6.2f%%\n",
 			       start, *unitp, end, *unitp,
 			       info->histogram.fc_chunks[i],
 			       info->histogram.fc_blocks[i],
@@ -250,7 +250,7 @@ int main(int argc, char *argv[])
 			if (chunk_info.chunkbytes &
 			    (chunk_info.chunkbytes - 1)) {
 				fprintf(stderr, "%s: chunk size must be a "
-					"power of 2.", argv[0]);
+					"power of 2.\n", argv[0]);
 				usage(progname);
 			}
 			chunk_info.chunkbytes *= 1024;
-- 
1.6.3.2.1.gb9f7d.dirty


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/6] e2freefrag: Do not print chunk-related information by default
  2009-07-27 18:36             ` Andreas Dilger
  2009-08-10  3:31               ` [PATCH 0/6] Patches to improve/fix e2freefrag Theodore Ts'o
  2009-08-10  3:31               ` [PATCH 1/6] e2freefrag: Clarify e2freefrag's messages Theodore Ts'o
@ 2009-08-10  3:31               ` Theodore Ts'o
  2009-08-10  3:31               ` [PATCH 3/6] e2freefrag: Fix to work correctly for file systems with 1kb block sizes Theodore Ts'o
                                 ` (3 subsequent siblings)
  6 siblings, 0 replies; 24+ messages in thread
From: Theodore Ts'o @ 2009-08-10  3:31 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: Andreas Dilger, Theodore Ts'o

Only print information related to chunk sizes if a chunksize is
printed.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 misc/e2freefrag.8.in |   13 +++++--------
 misc/e2freefrag.c    |   30 +++++++++++++++++++-----------
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/misc/e2freefrag.8.in b/misc/e2freefrag.8.in
index 77fadd7..56fdbff 100644
--- a/misc/e2freefrag.8.in
+++ b/misc/e2freefrag.8.in
@@ -30,9 +30,12 @@ can be used to gauge the level of free space fragmentation in the filesystem.
 .SH OPTIONS
 .TP
 .BI \-c " chunk_kb"
-Desired size of chunk. It is specified in units of kilobytes (KB). If no
+If a chunk size is specified, then
+.B e2freefrag
+will print how many free chunks of size
 .I chunk_kb
-is specified on the command line, then the default value is 1024KB.
+are available in units of kilobytes (Kb).  The chunk size must be a
+power of two and be larger than filesystem block size.
 .TP
 .BI \-h
 Print the usage of the program.
@@ -47,12 +50,6 @@ Total blocks: 1504085
 .br
 Free blocks: 292995 (19.5%)
 .br
-Chunk size: 1048576 bytes (256 blocks)
-.br
-Total chunks: 5876
-.br
-Free chunks: 463 (7.9%)
-.br
 
 Min. free extent: 4 KB
 .br
diff --git a/misc/e2freefrag.c b/misc/e2freefrag.c
index 9e7b617..274bf55 100644
--- a/misc/e2freefrag.c
+++ b/misc/e2freefrag.c
@@ -52,9 +52,14 @@ void init_chunk_info(ext2_filsys fs, struct chunk_info *info)
 {
 	int i;
 
-	info->chunkbits = ul_log2(info->chunkbytes);
 	info->blocksize_bits = ul_log2((unsigned long)fs->blocksize);
-	info->blks_in_chunk = info->chunkbytes >> info->blocksize_bits;
+	if (info->chunkbytes) {
+		info->chunkbits = ul_log2(info->chunkbytes);
+		info->blks_in_chunk = info->chunkbytes >> info->blocksize_bits;
+	} else {
+		info->chunkbits = ul_log2(DEFAULT_CHUNKSIZE);
+		info->blks_in_chunk = DEFAULT_CHUNKSIZE >> info->blocksize_bits;
+	}
 
 	info->min = ~0UL;
 	info->max = info->avg = 0;
@@ -138,13 +143,16 @@ errcode_t get_chunk_info(ext2_filsys fs, struct chunk_info *info)
 	       (double)fs->super->s_free_blocks_count * 100 /
 						fs->super->s_blocks_count);
 
-	printf("\nChunksize: %lu bytes (%u blocks)\n",
-	       info->chunkbytes, info->blks_in_chunk);
-	total_chunks = (fs->super->s_blocks_count + info->blks_in_chunk) >>
-				(info->chunkbits - info->blocksize_bits);
-	printf("Total chunks: %lu\nFree chunks: %lu (%0.1f%%)\n",
-	       total_chunks, info->free_chunks,
-	       (double)info->free_chunks * 100 / total_chunks);
+	if (info->chunkbytes) {
+		printf("\nChunksize: %lu bytes (%u blocks)\n",
+		       info->chunkbytes, info->blks_in_chunk);
+		total_chunks = (fs->super->s_blocks_count +
+				info->blks_in_chunk) >>
+			(info->chunkbits - info->blocksize_bits);
+		printf("Total chunks: %lu\nFree chunks: %lu (%0.1f%%)\n",
+		       total_chunks, info->free_chunks,
+		       (double)info->free_chunks * 100 / total_chunks);
+	}
 
 	/* Display chunk information in KB */
 	if (info->real_free_chunks) {
@@ -228,7 +236,7 @@ void open_device(char *device_name, ext2_filsys *fs)
 
 int main(int argc, char *argv[])
 {
-	struct chunk_info chunk_info = { .chunkbytes = DEFAULT_CHUNKSIZE };
+	struct chunk_info chunk_info = { };
 	errcode_t retval = 0;
 	ext2_filsys fs = NULL;
 	char *device_name;
@@ -273,7 +281,7 @@ int main(int argc, char *argv[])
 
 	open_device(device_name, &fs);
 
-	if (chunk_info.chunkbytes < fs->blocksize) {
+	if (chunk_info.chunkbytes && (chunk_info.chunkbytes < fs->blocksize)) {
 		fprintf(stderr, "%s: chunksize must be greater than or equal "
 			"to filesystem blocksize.\n", progname);
 		exit(1);
-- 
1.6.3.2.1.gb9f7d.dirty


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/6] e2freefrag: Fix to work correctly for file systems with 1kb block sizes
  2009-07-27 18:36             ` Andreas Dilger
                                 ` (2 preceding siblings ...)
  2009-08-10  3:31               ` [PATCH 2/6] e2freefrag: Do not print chunk-related information by default Theodore Ts'o
@ 2009-08-10  3:31               ` Theodore Ts'o
  2009-08-10  3:31               ` [PATCH 4/6] e2freefrag: Take into account the last free extent in the file system Theodore Ts'o
                                 ` (2 subsequent siblings)
  6 siblings, 0 replies; 24+ messages in thread
From: Theodore Ts'o @ 2009-08-10  3:31 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: Andreas Dilger, Theodore Ts'o

If the file system has a non-zero s_first_data_block, as is the case
when the block size is 1kb, e2freefrag would incorrectly try to
reference invalid data blocks in the block allocation bitmap.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 misc/e2freefrag.c |   12 ++++++++----
 1 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/misc/e2freefrag.c b/misc/e2freefrag.c
index 274bf55..10a48ad 100644
--- a/misc/e2freefrag.c
+++ b/misc/e2freefrag.c
@@ -79,6 +79,7 @@ void scan_block_bitmap(ext2_filsys fs, struct chunk_info *info)
 	unsigned long long chunk_num;
 	unsigned long last_chunk_size = 0;
 	unsigned long long chunk_start_blk = 0;
+	int used;
 
 	for (chunk_num = 0; chunk_num < chunks; chunk_num++) {
 		unsigned long long blk, num_blks;
@@ -95,10 +96,13 @@ void scan_block_bitmap(ext2_filsys fs, struct chunk_info *info)
 		/* Initialize starting block for first chunk correctly else
 		 * there is a segfault when blocksize = 1024 in which case
 		 * block_map->start = 1 */
-		for (blk = (chunk_num == 0 ? fs->super->s_first_data_block : 0);
-		     blk < num_blks; blk++, chunk_start_blk++) {
-			int used = ext2fs_fast_test_block_bitmap(fs->block_map,
-							       chunk_start_blk);
+		for (blk = 0; blk < num_blks; blk++, chunk_start_blk++) {
+			if (chunk_num == 0 && blk == 0) {
+				blk = fs->super->s_first_data_block;
+				chunk_start_blk = blk;
+			}
+			used = ext2fs_fast_test_block_bitmap(fs->block_map,
+							     chunk_start_blk);
 			if (!used) {
 				last_chunk_size++;
 				chunk_free++;
-- 
1.6.3.2.1.gb9f7d.dirty


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 4/6] e2freefrag: Take into account the last free extent in the file system
  2009-07-27 18:36             ` Andreas Dilger
                                 ` (3 preceding siblings ...)
  2009-08-10  3:31               ` [PATCH 3/6] e2freefrag: Fix to work correctly for file systems with 1kb block sizes Theodore Ts'o
@ 2009-08-10  3:31               ` Theodore Ts'o
  2009-08-10  3:31               ` [PATCH 5/6] Add V=1 support when linking e2freefrag in misc/Makefile.in Theodore Ts'o
  2009-08-10  3:31               ` [PATCH 6/6] libext2fs: Treat uninitialized parts of bitmaps as unallocated Theodore Ts'o
  6 siblings, 0 replies; 24+ messages in thread
From: Theodore Ts'o @ 2009-08-10  3:31 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: Andreas Dilger, Theodore Ts'o

Fix a bug in e2freefrag where if the last free extent is at the very
end of the filesystem, it would be disregarded.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 misc/e2freefrag.c |   47 ++++++++++++++++++++++++++++++-----------------
 1 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/misc/e2freefrag.c b/misc/e2freefrag.c
index 10a48ad..a4ab994 100644
--- a/misc/e2freefrag.c
+++ b/misc/e2freefrag.c
@@ -71,6 +71,24 @@ void init_chunk_info(ext2_filsys fs, struct chunk_info *info)
 	}
 }
 
+void update_chunk_stats(struct chunk_info *info, unsigned long chunk_size)
+{
+	unsigned long index;
+
+	index = ul_log2(chunk_size) + 1;
+	if (index >= MAX_HIST)
+		index = MAX_HIST-1;
+	info->histogram.fc_chunks[index]++;
+	info->histogram.fc_blocks[index] += chunk_size;
+
+	if (chunk_size > info->max)
+		info->max = chunk_size;
+	if (chunk_size < info->min)
+		info->min = chunk_size;
+	info->avg += chunk_size;
+	info->real_free_chunks++;
+}
+
 void scan_block_bitmap(ext2_filsys fs, struct chunk_info *info)
 {
 	unsigned long long blocks_count = fs->super->s_blocks_count;
@@ -109,20 +127,7 @@ void scan_block_bitmap(ext2_filsys fs, struct chunk_info *info)
 			}
 
 			if (used && last_chunk_size != 0) {
-				unsigned long index;
-
-				index = ul_log2(last_chunk_size) + 1;
-				info->histogram.fc_chunks[index]++;
-				info->histogram.fc_blocks[index] +=
-							last_chunk_size;
-
-				if (last_chunk_size > info->max)
-					info->max = last_chunk_size;
-				if (last_chunk_size < info->min)
-					info->min = last_chunk_size;
-				info->avg += last_chunk_size;
-
-				info->real_free_chunks++;
+				update_chunk_stats(info, last_chunk_size);
 				last_chunk_size = 0;
 			}
 		}
@@ -130,6 +135,8 @@ void scan_block_bitmap(ext2_filsys fs, struct chunk_info *info)
 		if (chunk_free == info->blks_in_chunk)
 			info->free_chunks++;
 	}
+	if (last_chunk_size != 0)
+		update_chunk_stats(info, last_chunk_size);
 }
 
 errcode_t get_chunk_info(ext2_filsys fs, struct chunk_info *info)
@@ -176,13 +183,19 @@ errcode_t get_chunk_info(ext2_filsys fs, struct chunk_info *info)
 	       "Free Blocks", "Percent");
 	for (i = 0; i < MAX_HIST; i++) {
 		end = 1 << (i + info->blocksize_bits - units);
-		if (info->histogram.fc_chunks[i] != 0)
-			printf("%5lu%c...%5lu%c-  :  %12lu  %12lu  %6.2f%%\n",
-			       start, *unitp, end, *unitp,
+		if (info->histogram.fc_chunks[i] != 0) {
+			char end_str[32];
+
+			sprintf(end_str, "%5lu%c-", end, *unitp);
+			if (i == MAX_HIST-1)
+				strcpy(end_str, "max ");
+			printf("%5lu%c...%7s  :  %12lu  %12lu  %6.2f%%\n",
+			       start, *unitp, end_str,
 			       info->histogram.fc_chunks[i],
 			       info->histogram.fc_blocks[i],
 			       (double)info->histogram.fc_blocks[i] * 100 /
 			       fs->super->s_free_blocks_count);
+		}
 		start = end;
 		if (start == 1<<10) {
 			start = 1;
-- 
1.6.3.2.1.gb9f7d.dirty


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 5/6] Add V=1 support when linking e2freefrag in misc/Makefile.in
  2009-07-27 18:36             ` Andreas Dilger
                                 ` (4 preceding siblings ...)
  2009-08-10  3:31               ` [PATCH 4/6] e2freefrag: Take into account the last free extent in the file system Theodore Ts'o
@ 2009-08-10  3:31               ` Theodore Ts'o
  2009-08-10  3:31               ` [PATCH 6/6] libext2fs: Treat uninitialized parts of bitmaps as unallocated Theodore Ts'o
  6 siblings, 0 replies; 24+ messages in thread
From: Theodore Ts'o @ 2009-08-10  3:31 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: Andreas Dilger, Theodore Ts'o

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 misc/Makefile.in |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/misc/Makefile.in b/misc/Makefile.in
index 3076f67..a5ed0c0 100644
--- a/misc/Makefile.in
+++ b/misc/Makefile.in
@@ -289,8 +289,8 @@ logsave.profiled: profiled/logsave.o
 	$(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o logsave.profiled profiled/logsave.o
 
 e2freefrag: $(E2FREEFRAG_OBJS)
-	@echo "	LD $@"
-	@$(CC) $(ALL_LDFLAGS) -o e2freefrag $(E2FREEFRAG_OBJS) $(LIBS)
+	$(E) "	LD $@"
+	$(Q) $(CC) $(ALL_LDFLAGS) -o e2freefrag $(E2FREEFRAG_OBJS) $(LIBS)
 
 filefrag: $(FILEFRAG_OBJS)
 	$(E) "	LD $@"
-- 
1.6.3.2.1.gb9f7d.dirty


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 6/6] libext2fs: Treat uninitialized parts of bitmaps as unallocated
  2009-07-27 18:36             ` Andreas Dilger
                                 ` (5 preceding siblings ...)
  2009-08-10  3:31               ` [PATCH 5/6] Add V=1 support when linking e2freefrag in misc/Makefile.in Theodore Ts'o
@ 2009-08-10  3:31               ` Theodore Ts'o
  6 siblings, 0 replies; 24+ messages in thread
From: Theodore Ts'o @ 2009-08-10  3:31 UTC (permalink / raw)
  To: Ext4 Developers List; +Cc: Andreas Dilger, Theodore Ts'o

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 lib/ext2fs/rw_bitmaps.c |    4 +-
 tests/m_uninit/expect.1 |   48 +++++++++++++++++++++++-----------------------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/lib/ext2fs/rw_bitmaps.c b/lib/ext2fs/rw_bitmaps.c
index 341f834..56fcfd6 100644
--- a/lib/ext2fs/rw_bitmaps.c
+++ b/lib/ext2fs/rw_bitmaps.c
@@ -248,7 +248,7 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 					goto cleanup;
 				}
 			} else
-				memset(block_bitmap, 0xff, block_nbytes);
+				memset(block_bitmap, 0, block_nbytes);
 			cnt = block_nbytes << 3;
 			retval = ext2fs_set_block_bitmap_range(fs->block_map,
 					       blk_itr, cnt, block_bitmap);
@@ -270,7 +270,7 @@ static errcode_t read_bitmaps(ext2_filsys fs, int do_inode, int do_block)
 					goto cleanup;
 				}
 			} else
-				memset(inode_bitmap, 0xff, inode_nbytes);
+				memset(inode_bitmap, 0, inode_nbytes);
 			cnt = inode_nbytes << 3;
 			retval = ext2fs_set_inode_bitmap_range(fs->inode_map,
 					       ino_itr, cnt, inode_bitmap);
diff --git a/tests/m_uninit/expect.1 b/tests/m_uninit/expect.1
index 549aced..67cd33d 100644
--- a/tests/m_uninit/expect.1
+++ b/tests/m_uninit/expect.1
@@ -71,13 +71,13 @@ Group 1: (Blocks 8193-16384) [INODE_UNINIT, ITABLE_ZEROED]
   Inode table at 8453-8708 (+260)
   7676 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
   Free blocks: 8709-16384
-  Free inodes: 
+  Free inodes: 2049-4096
 Group 2: (Blocks 16385-24576) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED]
   Block bitmap at 16385 (+0), Inode bitmap at 16386 (+1)
   Inode table at 16387-16642 (+2)
   7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
-  Free blocks: 
-  Free inodes: 
+  Free blocks: 16385-24576
+  Free inodes: 4097-6144
 Group 3: (Blocks 24577-32768) [INODE_UNINIT, ITABLE_ZEROED]
   Backup superblock at 24577, Group descriptors at 24578-24578
   Reserved GDT blocks at 24579-24834
@@ -85,13 +85,13 @@ Group 3: (Blocks 24577-32768) [INODE_UNINIT, ITABLE_ZEROED]
   Inode table at 24837-25092 (+260)
   7676 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
   Free blocks: 25093-32768
-  Free inodes: 
+  Free inodes: 6145-8192
 Group 4: (Blocks 32769-40960) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED]
   Block bitmap at 32769 (+0), Inode bitmap at 32770 (+1)
   Inode table at 32771-33026 (+2)
   7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
-  Free blocks: 
-  Free inodes: 
+  Free blocks: 32769-40960
+  Free inodes: 8193-10240
 Group 5: (Blocks 40961-49152) [INODE_UNINIT, ITABLE_ZEROED]
   Backup superblock at 40961, Group descriptors at 40962-40962
   Reserved GDT blocks at 40963-41218
@@ -99,13 +99,13 @@ Group 5: (Blocks 40961-49152) [INODE_UNINIT, ITABLE_ZEROED]
   Inode table at 41221-41476 (+260)
   7676 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
   Free blocks: 41477-49152
-  Free inodes: 
+  Free inodes: 10241-12288
 Group 6: (Blocks 49153-57344) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED]
   Block bitmap at 49153 (+0), Inode bitmap at 49154 (+1)
   Inode table at 49155-49410 (+2)
   7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
-  Free blocks: 
-  Free inodes: 
+  Free blocks: 49153-57344
+  Free inodes: 12289-14336
 Group 7: (Blocks 57345-65536) [INODE_UNINIT, ITABLE_ZEROED]
   Backup superblock at 57345, Group descriptors at 57346-57346
   Reserved GDT blocks at 57347-57602
@@ -113,13 +113,13 @@ Group 7: (Blocks 57345-65536) [INODE_UNINIT, ITABLE_ZEROED]
   Inode table at 57605-57860 (+260)
   7676 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
   Free blocks: 57861-65536
-  Free inodes: 
+  Free inodes: 14337-16384
 Group 8: (Blocks 65537-73728) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED]
   Block bitmap at 65537 (+0), Inode bitmap at 65538 (+1)
   Inode table at 65539-65794 (+2)
   7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
-  Free blocks: 
-  Free inodes: 
+  Free blocks: 65537-73728
+  Free inodes: 16385-18432
 Group 9: (Blocks 73729-81920) [INODE_UNINIT, ITABLE_ZEROED]
   Backup superblock at 73729, Group descriptors at 73730-73730
   Reserved GDT blocks at 73731-73986
@@ -127,40 +127,40 @@ Group 9: (Blocks 73729-81920) [INODE_UNINIT, ITABLE_ZEROED]
   Inode table at 73989-74244 (+260)
   7676 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
   Free blocks: 74245-81920
-  Free inodes: 
+  Free inodes: 18433-20480
 Group 10: (Blocks 81921-90112) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED]
   Block bitmap at 81921 (+0), Inode bitmap at 81922 (+1)
   Inode table at 81923-82178 (+2)
   7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
-  Free blocks: 
-  Free inodes: 
+  Free blocks: 81921-90112
+  Free inodes: 20481-22528
 Group 11: (Blocks 90113-98304) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED]
   Block bitmap at 90113 (+0), Inode bitmap at 90114 (+1)
   Inode table at 90115-90370 (+2)
   7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
-  Free blocks: 
-  Free inodes: 
+  Free blocks: 90113-98304
+  Free inodes: 22529-24576
 Group 12: (Blocks 98305-106496) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED]
   Block bitmap at 98305 (+0), Inode bitmap at 98306 (+1)
   Inode table at 98307-98562 (+2)
   7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
-  Free blocks: 
-  Free inodes: 
+  Free blocks: 98305-106496
+  Free inodes: 24577-26624
 Group 13: (Blocks 106497-114688) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED]
   Block bitmap at 106497 (+0), Inode bitmap at 106498 (+1)
   Inode table at 106499-106754 (+2)
   7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
-  Free blocks: 
-  Free inodes: 
+  Free blocks: 106497-114688
+  Free inodes: 26625-28672
 Group 14: (Blocks 114689-122880) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED]
   Block bitmap at 114689 (+0), Inode bitmap at 114690 (+1)
   Inode table at 114691-114946 (+2)
   7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
-  Free blocks: 
-  Free inodes: 
+  Free blocks: 114689-122880
+  Free inodes: 28673-30720
 Group 15: (Blocks 122881-131071) [INODE_UNINIT, ITABLE_ZEROED]
   Block bitmap at 122881 (+0), Inode bitmap at 122882 (+1)
   Inode table at 122883-123138 (+2)
   7933 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
   Free blocks: 123139-131071
-  Free inodes: 
+  Free inodes: 30721-32768
-- 
1.6.3.2.1.gb9f7d.dirty


^ permalink raw reply related	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2009-08-10  3:32 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-07-21  0:17 [PATCH] e2freefrag utility Andreas Dilger
2009-07-22  7:43 ` Theodore Tso
2009-07-23  4:59   ` Eric Sandeen
2009-07-23 13:45     ` How to fix up mballoc Theodore Tso
2009-07-23 17:43       ` Eric Sandeen
2009-07-24  0:23         ` Theodore Tso
2009-07-24  2:18           ` Eric Sandeen
2009-07-24  2:25             ` Eric Sandeen
2009-07-24  2:30           ` Andreas Dilger
2009-07-23 17:51       ` Mingming Cao
2009-07-24  0:43         ` Theodore Tso
2009-07-23 17:07     ` [PATCH] e2freefrag utility Andreas Dilger
2009-07-23 17:18       ` Eric Sandeen
2009-07-24 22:32       ` Theodore Tso
2009-07-24 23:14         ` Andreas Dilger
2009-07-25  0:18           ` Theodore Tso
2009-07-27 18:36             ` Andreas Dilger
2009-08-10  3:31               ` [PATCH 0/6] Patches to improve/fix e2freefrag Theodore Ts'o
2009-08-10  3:31               ` [PATCH 1/6] e2freefrag: Clarify e2freefrag's messages Theodore Ts'o
2009-08-10  3:31               ` [PATCH 2/6] e2freefrag: Do not print chunk-related information by default Theodore Ts'o
2009-08-10  3:31               ` [PATCH 3/6] e2freefrag: Fix to work correctly for file systems with 1kb block sizes Theodore Ts'o
2009-08-10  3:31               ` [PATCH 4/6] e2freefrag: Take into account the last free extent in the file system Theodore Ts'o
2009-08-10  3:31               ` [PATCH 5/6] Add V=1 support when linking e2freefrag in misc/Makefile.in Theodore Ts'o
2009-08-10  3:31               ` [PATCH 6/6] libext2fs: Treat uninitialized parts of bitmaps as unallocated Theodore Ts'o

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.