linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andreas Dilger <adilger@whamcloud.com>
To: tytso@mit.edu
Cc: linux-ext4@vger.kernel.org, Andreas Dilger <adilger@whamcloud.com>
Subject: [PATCH] e2fsck: avoid overflow with very large dirs
Date: Tue, 11 Feb 2020 18:07:21 -0700	[thread overview]
Message-ID: <1581469641-85696-1-git-send-email-adilger@whamcloud.com> (raw)
In-Reply-To: <1581037786-62789-1-git-send-email-adilger@whamcloud.com>

In alloc_size_dir() it multiples signed ints when allocating the
buffer for rehashing an htree-indexed directory.  This will overflow
when the directory size is above 4GB, which is possible with largedir
directories having about 100M entries, assuming an average 3/4 leaf
fullness and 24-byte filenames, or fewer with longer filenames.
The same problem exisgs in get_next_block().

Similarly, the out_dir struct used a signed int for the number of
blocks in the directory, which may result in a negative size if the
directory is over 2GB (about 50M entries or fewer).

Use appropriate unsigned variables for block counts, and use larger
types for calculating the byte count for memory offsets/sizes.

Such large directories not been seen yet, but are not too far away.
The ext2fs_get_array() function will properly calculate the needed
memory allocation, and detect overflow on 32-bit systems.
Add ext2fs_resize_array() to do the same for array resize.

Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Lustre-bug-id: https://jira.whamcloud.com/browse/LU-13197
---
 e2fsck/pass2.c      | 10 ++++----
 e2fsck/rehash.c     | 72 +++++++++++++++++++++++++++++------------------------
 lib/ext2fs/ext2fs.h | 44 ++++++++++++++++++++++++++------
 3 files changed, 81 insertions(+), 45 deletions(-)

diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
index 0fa6233..a280619 100644
--- a/e2fsck/pass2.c
+++ b/e2fsck/pass2.c
@@ -88,7 +88,7 @@ struct check_dir_struct {
 static void update_parents(struct dx_dir_info *dx_dir, int type)
 {
 	struct dx_dirblock_info *dx_db, *dx_parent, *dx_previous;
-	int b;
+	blk_t b;
 
 	for (b = 0, dx_db = dx_dir->dx_block;
 	     b < dx_dir->numblocks;
@@ -130,7 +130,7 @@ void e2fsck_pass2(e2fsck_t ctx)
 	struct check_dir_struct cd;
 	struct dx_dir_info	*dx_dir;
 	struct dx_dirblock_info	*dx_db;
-	int			b;
+	blk_t			b;
 	ext2_ino_t		i;
 	short			depth;
 	problem_t		code;
@@ -570,8 +570,8 @@ static void parse_int_node(ext2_filsys fs,
 			   struct dx_dir_info	*dx_dir,
 			   char *block_buf, int failed_csum)
 {
-	struct 		ext2_dx_root_info  *root;
-	struct 		ext2_dx_entry *ent;
+	struct		ext2_dx_root_info  *root;
+	struct		ext2_dx_entry *ent;
 	struct		ext2_dx_countlimit *limit;
 	struct dx_dirblock_info	*dx_db;
 	int		i, expect_limit, count;
@@ -646,7 +646,7 @@ static void parse_int_node(ext2_filsys fs,
 #endif
 		blk = ext2fs_le32_to_cpu(ent[i].block) & EXT4_DX_BLOCK_MASK;
 		/* Check to make sure the block is valid */
-		if (blk >= (blk_t) dx_dir->numblocks) {
+		if (blk >= dx_dir->numblocks) {
 			cd->pctx.blk = blk;
 			if (fix_problem(cd->ctx, PR_2_HTREE_BADBLK,
 					&cd->pctx))
diff --git a/e2fsck/rehash.c b/e2fsck/rehash.c
index c9d667b..6cb1c8e 100644
--- a/e2fsck/rehash.c
+++ b/e2fsck/rehash.c
@@ -80,8 +80,8 @@ struct fill_dir_struct {
 	errcode_t err;
 	e2fsck_t ctx;
 	struct hash_entry *harray;
-	int max_array, num_array;
-	unsigned int dir_size;
+	blk_t max_array, num_array;
+	ext2_off64_t dir_size;
 	int compress;
 	ext2_ino_t parent;
 	ext2_ino_t dir;
@@ -95,8 +95,8 @@ struct hash_entry {
 };
 
 struct out_dir {
-	int		num;
-	int		max;
+	blk_t		num;
+	blk_t		max;
 	char		*buf;
 	ext2_dirhash_t	*hashes;
 };
@@ -169,13 +169,16 @@ static int fill_dir_block(ext2_filsys fs,
 			continue;
 		}
 		if (fd->num_array >= fd->max_array) {
-			new_array = realloc(fd->harray,
-			    sizeof(struct hash_entry) * (fd->max_array+500));
-			if (!new_array) {
-				fd->err = ENOMEM;
+			errcode_t retval;
+
+			retval = ext2fs_resize_array(sizeof(struct hash_entry),
+						     fd->max_array,
+						     fd->max_array + 500,
+						     &fd->harray);
+			if (retval) {
+				fd->err = retval;
 				return BLOCK_ABORT;
 			}
-			fd->harray = new_array;
 			fd->max_array += 500;
 		}
 		ent = fd->harray + fd->num_array++;
@@ -256,23 +259,28 @@ static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b)
 }
 
 static errcode_t alloc_size_dir(ext2_filsys fs, struct out_dir *outdir,
-				int blocks)
+				blk_t blocks)
 {
-	void			*new_mem;
+	errcode_t retval;
 
 	if (outdir->max) {
-		new_mem = realloc(outdir->buf, blocks * fs->blocksize);
-		if (!new_mem)
-			return ENOMEM;
-		outdir->buf = new_mem;
-		new_mem = realloc(outdir->hashes,
-				  blocks * sizeof(ext2_dirhash_t));
-		if (!new_mem)
-			return ENOMEM;
-		outdir->hashes = new_mem;
+		retval = ext2fs_resize_array(fs->blocksize, outdir->max, blocks,
+					     &outdir->buf);
+		if (retval)
+			return retval;
+		retval = ext2fs_resize_array(sizeof(ext2_dirhash_t),
+					     outdir->max, blocks,
+					     &outdir->hashes);
+		if (retval)
+			return retval;
 	} else {
-		outdir->buf = malloc(blocks * fs->blocksize);
-		outdir->hashes = malloc(blocks * sizeof(ext2_dirhash_t));
+		retval = ext2fs_get_array(fs->blocksize, blocks, &outdir->buf);
+		if (retval)
+			return retval;
+		retval = ext2fs_get_array(sizeof(ext2_dirhash_t), blocks,
+					  &outdir->hashes);
+		if (retval)
+			return retval;
 		outdir->num = 0;
 	}
 	outdir->max = blocks;
@@ -297,7 +305,7 @@ static errcode_t get_next_block(ext2_filsys fs, struct out_dir *outdir,
 		if (retval)
 			return retval;
 	}
-	*ret = outdir->buf + (outdir->num++ * fs->blocksize);
+	*ret = outdir->buf + (size_t)outdir->num++ * fs->blocksize;
 	memset(*ret, 0, fs->blocksize);
 	return 0;
 }
@@ -367,8 +375,8 @@ static int duplicate_search_and_fix(e2fsck_t ctx, ext2_filsys fs,
 				    struct fill_dir_struct *fd)
 {
 	struct problem_context	pctx;
-	struct hash_entry 	*ent, *prev;
-	int			i, j;
+	struct hash_entry	*ent, *prev;
+	blk_t			i, j;
 	int			fixed = 0;
 	char			new_name[256];
 	unsigned int		new_len;
@@ -869,14 +877,14 @@ errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino,
 	   (inode.i_flags & EXT4_INLINE_DATA_FL))
 		return 0;
 
-	retval = ENOMEM;
-	dir_buf = malloc(inode.i_size);
-	if (!dir_buf)
+	retval = ext2fs_get_mem(inode.i_size, &dir_buf);
+	if (retval)
 		goto errout;
 
 	fd.max_array = inode.i_size / 32;
-	fd.harray = malloc(fd.max_array * sizeof(struct hash_entry));
-	if (!fd.harray)
+	retval = ext2fs_get_array(sizeof(struct hash_entry),
+				  fd.max_array, &fd.harray);
+	if (retval)
 		goto errout;
 
 	fd.ino = ino;
@@ -965,8 +973,8 @@ resort:
 	else
 		retval = e2fsck_check_rebuild_extents(ctx, ino, &inode, pctx);
 errout:
-	free(dir_buf);
-	free(fd.harray);
+	ext2fs_free_mem(&dir_buf);
+	ext2fs_free_mem(&fd.harray);
 
 	free_out_dir(&outdir);
 	return retval;
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 59fd974..452af6a 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1760,6 +1760,8 @@ extern errcode_t ext2fs_get_arrayzero(unsigned long count,
 extern errcode_t ext2fs_free_mem(void *ptr);
 extern errcode_t ext2fs_resize_mem(unsigned long old_size,
 				   unsigned long size, void *ptr);
+extern errcode_t ext2fs_resize_array(unsigned long old_count, unsigned long count,
+				     unsigned long size, void *ptr);
 extern void ext2fs_mark_super_dirty(ext2_filsys fs);
 extern void ext2fs_mark_changed(ext2_filsys fs);
 extern int ext2fs_test_changed(ext2_filsys fs);
@@ -1837,7 +1839,8 @@ _INLINE_ errcode_t ext2fs_get_memzero(unsigned long size, void *ptr)
 	return 0;
 }
 
-_INLINE_ errcode_t ext2fs_get_array(unsigned long count, unsigned long size, void *ptr)
+_INLINE_ errcode_t ext2fs_get_array(unsigned long count, unsigned long size,
+				    void *ptr)
 {
 	if (count && (~0UL)/count < size)
 		return EXT2_ET_NO_MEMORY;
@@ -1847,15 +1850,10 @@ _INLINE_ errcode_t ext2fs_get_array(unsigned long count, unsigned long size, voi
 _INLINE_ errcode_t ext2fs_get_arrayzero(unsigned long count,
 					unsigned long size, void *ptr)
 {
-	void *pp;
-
 	if (count && (~0UL)/count < size)
 		return EXT2_ET_NO_MEMORY;
-	pp = calloc(count, size);
-	if (!pp)
-		return EXT2_ET_NO_MEMORY;
-	memcpy(ptr, &pp, sizeof(pp));
-	return 0;
+
+	return ext2fs_get_memzero((size_t)count * size, ptr);
 }
 
 /*
@@ -1889,6 +1887,36 @@ _INLINE_ errcode_t ext2fs_resize_mem(unsigned long EXT2FS_ATTR((unused)) old_siz
 	memcpy(ptr, &p, sizeof(p));
 	return 0;
 }
+
+/*
+ *  Resize array.  The 'ptr' arg must point to a pointer.
+ */
+_INLINE_ errcode_t ext2fs_resize_array(unsigned long size,
+				       unsigned long old_count,
+				       unsigned long count, void *ptr)
+{
+	unsigned long old_size;
+	errcode_t retval;
+
+	if (count && (~0UL)/count < size)
+		return EXT2_ET_NO_MEMORY;
+
+	size *= count;
+	old_size = size * old_count;
+	retval = ext2fs_resize_mem(old_size, size, ptr);
+	if (retval)
+		return retval;
+
+	if (size > old_size) {
+		void *p;
+
+		memcpy(&p, ptr, sizeof(p));
+		memset((char *)p + old_size, 0, size - old_size);
+		memcpy(ptr, &p, sizeof(p));
+	}
+
+	return 0;
+}
 #endif	/* Custom memory routines */
 
 /*
-- 
1.8.0


  parent reply	other threads:[~2020-02-12  1:07 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-07  1:09 [PATCH 1/9] e2fsck: fix e2fsck_allocate_memory() overflow Andreas Dilger
2020-02-07  1:09 ` [PATCH 2/9] e2fsck: use proper types for variables Andreas Dilger
2020-02-29 23:27   ` Theodore Y. Ts'o
2020-02-07  1:09 ` [PATCH 3/9] e2fsck: avoid mallinfo() if over 2GB allocated Andreas Dilger
2020-02-29 23:28   ` Theodore Y. Ts'o
2020-02-07  1:09 ` [PATCH 4/9] e2fsck: reduce memory usage for many directories Andreas Dilger
2020-02-29 23:29   ` Theodore Y. Ts'o
2020-02-07  1:09 ` [PATCH 5/9] debugfs: allow comment lines in command file Andreas Dilger
2020-02-29 23:32   ` Theodore Y. Ts'o
2020-02-07  1:09 ` [PATCH 6/9] debugfs: print inode numbers as unsigned Andreas Dilger
2020-02-29 23:34   ` Theodore Y. Ts'o
2020-02-07  1:09 ` [PATCH 7/9] e2fsck: fix overflow if more than 4B inodes Andreas Dilger
2020-02-29 23:35   ` Theodore Y. Ts'o
2020-02-07  1:09 ` [PATCH 8/9] e2fsck: consistently use ext2fs_get_mem() Andreas Dilger
2020-02-29 23:36   ` Theodore Y. Ts'o
2020-03-04 23:23     ` Theodore Y. Ts'o
2020-02-07  1:09 ` [PATCH 9/9] misc: handle very large files with filefrag Andreas Dilger
2020-03-04 23:27   ` Theodore Y. Ts'o
2020-02-12  0:58 ` [PATCH] " Andreas Dilger
2020-02-12  1:09   ` Andreas Dilger
2020-02-12  1:07 ` Andreas Dilger [this message]
2020-03-04 23:39   ` [PATCH] e2fsck: avoid overflow with very large dirs Theodore Y. Ts'o
2020-02-29 23:25 ` [PATCH 1/9] e2fsck: fix e2fsck_allocate_memory() overflow Theodore Y. Ts'o

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1581469641-85696-1-git-send-email-adilger@whamcloud.com \
    --to=adilger@whamcloud.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).