linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Port SquashFS to 2.6
@ 2003-07-19 22:59 junkio
  2003-07-19 23:35 ` David Dillow
  2003-07-20  1:50 ` Bernd Eckenfels
  0 siblings, 2 replies; 13+ messages in thread
From: junkio @ 2003-07-19 22:59 UTC (permalink / raw)
  To: linux-kernel; +Cc: Phillip Lougher

The following is a straight port of Phillip Lougher's SquashFS
(http://squashfs.sf.net).  The port is based on squashfs 1.2 for
2.4.20.

The patch is on top of, and tested with, 2.6.0-test1-ac2.  It
applies cleanly on vanilla 2.6.0-test1 (since -ac2 is mostly
bugfixes, there is not much point testing with vanilla, though).

There are four things that are different from the 2.4 version
(is there "notes on porting filesystems from 2.4 to 2.6"
somewhere, btw?).

 - Hooks in init/do_mounts.c to mount things in initramfs seems
   to have removed (there is no mention of CRAMFS there
   anymore), so that part of the patch has been removed.

 - 2.6 vfs calls dir_ops.readdir without BKL held, and expected
   to do that itself if needed.  The attached port mimics what
   cramfs/inode.c does differently between 2.4 and 2.6.

 - struct super_block in 2.6 vfs has removed the embedded union
   for filesystem private data.  Filesystems are expected to use
   void *s_fs_info member in it to hang private data in
   kmalloc'ed memory if needed.  The attached port again mimics
   what cramfs/inode.c does differently between 2.4 and 2.6.

 - struct inode in 2.6 vfs has removed the embedded union for
   filesystem private data.  Filesystems with private data now
   define its own xxx_inode_info struct that embeds struct
   inode.  IOW, containment has become inside out.  The attached
   port defines alloc_inode (and corresponding destroy_inode)
   members in the super_operations struct, mimicking what
   ext3/super.c does differently between 2.4 and 2.6.


diff -Naur linux-2.6.0-test1-ac2/fs/Kconfig linux-2.6.0-test1-ac2-sfs/fs/Kconfig
--- linux-2.6.0-test1-ac2/fs/Kconfig	2003-07-10 13:10:54.000000000 -0700
+++ linux-2.6.0-test1-ac2/fs/Kconfig	2003-07-16 13:44:53.000000000 -0700
@@ -1114,6 +1114,13 @@
 
 	  If unsure, say N.
 
+config SQUASHFS
+	tristate "SquashFs file system support"
+	help
+	  Saying Y here includes support for SquashFs.
+
+	  If unsure, say N.
+
 config VXFS_FS
 	tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
 	help
diff -Naur linux-2.6.0-test1-ac2/fs/Makefile linux-2.6.0-test1-ac2-sfs/fs/Makefile
--- linux-2.6.0-test1-ac2/fs/Makefile	2003-07-10 13:10:55.000000000 -0700
+++ linux-2.6.0-test1-ac2/fs/Makefile	2003-07-16 13:45:34.000000000 -0700
@@ -53,6 +53,7 @@
 obj-$(CONFIG_JBD)		+= jbd/
 obj-$(CONFIG_EXT2_FS)		+= ext2/
 obj-$(CONFIG_CRAMFS)		+= cramfs/
+obj-$(CONFIG_SQUASHFS)		+= squashfs/
 obj-$(CONFIG_RAMFS)		+= ramfs/
 obj-$(CONFIG_HUGETLBFS)		+= hugetlbfs/
 obj-$(CONFIG_CODA_FS)		+= coda/
diff -Naur linux-2.6.0-test1-ac2/fs/squashfs/Makefile linux-2.6.0-test1-ac2-sfs/fs/squashfs/Makefile
--- linux-2.6.0-test1-ac2/fs/squashfs/Makefile	1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.0-test1-ac2/fs/squashfs/Makefile	2003-07-15 12:17:43.000000000 -0700
@@ -0,0 +1,8 @@
+#
+# Makefile for the linux squashfs routines.
+#
+
+obj-$(CONFIG_SQUASHFS) += squashfs.o
+
+squashfs-objs := inode.o
+
diff -Naur linux-2.6.0-test1-ac2/fs/squashfs/inode.c linux-2.6.0-test1-ac2-sfs/fs/squashfs/inode.c
--- linux-2.6.0-test1-ac2/fs/squashfs/inode.c	1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.0-test1-ac2/fs/squashfs/inode.c	2003-07-17 22:21:41.000000000 -0700
@@ -0,0 +1,1020 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002 Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * inode.c
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/dcache.h>
+#include <asm/uaccess.h>
+#include <linux/wait.h>
+#include <asm/semaphore.h>
+#include <linux/zlib.h>
+#include <linux/blkdev.h>
+#include <linux/vmalloc.h>
+#include <linux/smp_lock.h>
+#include <linux/buffer_head.h>
+#include <linux/squashfs_fs_sb.h>
+#include <linux/vfs.h>
+
+#ifdef SQUASHFS_TRACE
+#define TRACE(s, args...)				printk(KERN_NOTICE "SQUASHFS: "s, ## args)
+#else
+#define TRACE(s, args...)				{}
+#endif
+
+#define ERROR(s, args...)				printk(KERN_ERR "SQUASHFS error: "s, ## args)
+
+#define SERROR(s, args...)				if(!silent) printk(KERN_ERR "SQUASHFS error: "s, ## args)
+#define WARNING(s, args...)				printk(KERN_WARNING "SQUASHFS: "s, ## args)
+
+typedef struct squashfs_inode_info {
+	unsigned int	start_block;
+	unsigned int	block_list_start;
+	unsigned int	offset;
+
+	struct inode	vfs_inode;
+} squashfs_inode_info;
+
+static inline struct squashfs_inode_info *SQUASH_I(struct inode *inode)
+{
+	return container_of(inode, struct squashfs_inode_info, vfs_inode);
+}
+
+static int squashfs_fill_super(struct super_block *, void *, int);
+static void squashfs_put_super(struct super_block *);
+static int squashfs_statfs(struct super_block *, struct kstatfs *);
+static int squashfs_symlink_readpage(struct file *file, struct page *page);
+static int squashfs_readpage(struct file *file, struct page *page);
+static int squashfs_readdir(struct file *, void *, filldir_t);
+static struct dentry *squashfs_lookup(struct inode *, struct dentry *, struct nameidata *);
+static unsigned int read_data(struct super_block *s, char *buffer,
+		unsigned int index, int length, unsigned int *next_index);
+static int squashfs_get_cached_block(struct super_block *s, char *buffer,
+		unsigned int block, unsigned int offset, int length,
+		unsigned int *next_block, unsigned int *next_offset);
+static struct inode *squashfs_iget(struct super_block *s, squashfs_inode inode);
+static void squashfs_put_super(struct super_block *s);
+static struct super_block *squashfs_get_sb(struct file_system_type *, int, const char *, void *);
+static struct inode *squashfs_alloc_inode(struct super_block *);
+static void squashfs_destroy_inode(struct inode *);
+
+DECLARE_MUTEX(read_data_mutex);
+
+static z_stream stream;
+
+static unsigned char squashfs_filetype_table[] = {
+	DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR
+};
+
+static struct super_operations squashfs_ops = {
+	.alloc_inode	= squashfs_alloc_inode,
+	.destroy_inode	= squashfs_destroy_inode,
+	.statfs		= squashfs_statfs,
+	.put_super	= squashfs_put_super,
+};
+
+static struct address_space_operations squashfs_symlink_aops = {
+	.readpage	= squashfs_symlink_readpage
+};
+
+static struct address_space_operations squashfs_aops = {
+	.readpage	= squashfs_readpage
+};
+
+static struct file_operations squashfs_dir_ops = {
+	.read		= generic_read_dir,
+	.readdir	= squashfs_readdir
+};
+
+static struct inode_operations squashfs_dir_inode_ops = {
+	.lookup		= squashfs_lookup
+};
+
+static struct file_system_type squashfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "squashfs",
+	.get_sb		= squashfs_get_sb,
+	.kill_sb	= kill_block_super,
+	.fs_flags	= FS_REQUIRES_DEV,
+};
+
+static unsigned int read_data(struct super_block *s, char *buffer,
+		unsigned int index, int length, unsigned int *next_index)
+{
+	squashfs_sb_info *msBlk = s->s_fs_info;
+	struct buffer_head *bh[((SQUASHFS_FILE_MAX_SIZE - 1) >> msBlk->devblksize_log2) + 2];
+	unsigned short c_byte;
+	unsigned int offset = index & ((1 << msBlk->devblksize_log2) - 1);
+	unsigned int cur_index = index >> msBlk->devblksize_log2;
+	int bytes, avail_bytes, b, k;
+	char *c_buffer;
+	unsigned int compressed;
+
+	if(!(bh[0] = sb_bread(s, cur_index)))
+		goto read_failure;
+
+	if(length)
+		c_byte = length;
+	else {
+		if(msBlk->devblksize - offset == 1) {
+			if(msBlk->swap)
+				((unsigned char *) &c_byte)[1] = *((unsigned char *) (bh[0]->b_data + offset));
+			else
+				((unsigned char *) &c_byte)[0] = *((unsigned char *) (bh[0]->b_data + offset));
+			brelse(bh[0]);
+			if(!(bh[0] = sb_bread(s, ++cur_index)))
+				goto read_failure;
+			if(msBlk->swap)
+				((unsigned char *) &c_byte)[0] = *((unsigned char *) bh[0]->b_data); 
+			else
+				((unsigned char *) &c_byte)[1] = *((unsigned char *) bh[0]->b_data); 
+			offset = 1;
+		}
+		else {
+			if(msBlk->swap) {
+				((unsigned char *) &c_byte)[1] = *((unsigned char *) (bh[0]->b_data + offset));
+				((unsigned char *) &c_byte)[0] = *((unsigned char *) (bh[0]->b_data + offset + 1)); 
+			} else
+				c_byte = *((unsigned short *) (bh[0]->b_data + offset));
+			offset += 2;
+		}
+		if(SQUASHFS_CHECK_DATA(msBlk->sBlk.flags)) {
+			if(offset == msBlk->devblksize) {
+				brelse(bh[0]);
+				if(!(bh[0] = sb_bread(s, ++cur_index)))
+					goto read_failure;
+				offset = 0;
+			}
+			if(*((unsigned char *) (bh[0]->b_data + offset)) != SQUASHFS_MARKER_BYTE) {
+				ERROR("Metadata block marker corrupt @ %x\n", index);
+				brelse(bh[0]);
+				return 0;
+			}
+			offset ++;
+		}
+	}
+
+	bytes = msBlk->devblksize - offset;
+	c_buffer = (compressed = SQUASHFS_COMPRESSED(c_byte)) ? msBlk->read_data : buffer;
+	c_byte = SQUASHFS_COMPRESSED_SIZE(c_byte);
+
+	TRACE("Block @ 0x%x, %scompressed size %d\n", index, compressed ? "" : "un", (unsigned int) c_byte);
+
+	for(b = 1; bytes < c_byte; b++) {
+		if(!(bh[b] = sb_bread(s, ++cur_index)))
+			goto block_release;
+		bytes += msBlk->devblksize;
+	}
+
+	if(compressed)
+		down(&read_data_mutex);
+
+	for(bytes = 0, k = 0; k < b; k++) {
+		avail_bytes = (c_byte - bytes) > (msBlk->devblksize - offset) ? msBlk->devblksize - offset : c_byte - bytes;
+		memcpy(c_buffer + bytes, bh[k]->b_data + offset, avail_bytes);
+		bytes += avail_bytes;
+		offset = 0;
+		brelse(bh[k]);
+	}
+
+	/*
+	 * uncompress block
+	 */
+	if(compressed) {
+		int zlib_err;
+
+		stream.next_in = c_buffer;
+		stream.avail_in = c_byte;
+		stream.next_out = buffer;
+		stream.avail_out = msBlk->read_size;
+		if(((zlib_err = zlib_inflateInit(&stream)) != Z_OK) ||
+				((zlib_err = zlib_inflate(&stream, Z_FINISH)) != Z_STREAM_END) ||
+				((zlib_err = zlib_inflateEnd(&stream)) != Z_OK)) {
+			ERROR("zlib_fs returned unexpected result 0x%x\n", zlib_err);
+			bytes = 0;
+		} else
+			bytes = stream.total_out;
+		up(&read_data_mutex);
+	}
+
+	if(next_index)
+		*next_index = index + c_byte + (length ? 0 : (SQUASHFS_CHECK_DATA(msBlk->sBlk.flags) ? 3 : 2));
+
+	return bytes;
+
+block_release:
+	while(--b >= 0) brelse(bh[b]);
+
+read_failure:
+	ERROR("sb_bread failed reading block 0x%x\n", cur_index);
+	return 0;
+}
+
+
+static int squashfs_get_cached_block(struct super_block *s, char *buffer,
+		unsigned int block, unsigned int offset, int length,
+		unsigned int *next_block, unsigned int *next_offset)
+{
+	squashfs_sb_info *msBlk = s->s_fs_info;
+	int n, i, bytes, return_length = length;
+	unsigned int next_index;
+
+	TRACE("Entered squashfs_get_cached_block [%x:%x]\n", block, offset);
+
+	for(;;) {
+		for(i = 0; i < SQUASHFS_CACHED_BLKS; i++) 
+			if(msBlk->block_cache[i].block == block)
+				break; 
+		
+		down(&msBlk->block_cache_mutex);
+		if(i == SQUASHFS_CACHED_BLKS) {
+			/* read inode header block */
+			for(i = msBlk->next_cache, n = SQUASHFS_CACHED_BLKS; n ; n --, i = (i + 1) % SQUASHFS_CACHED_BLKS)
+				if(msBlk->block_cache[i].block != SQUASHFS_USED_BLK)
+					break;
+			if(n == 0) {
+				up(&msBlk->block_cache_mutex);
+				sleep_on(&msBlk->waitq);
+				continue;
+			}
+			msBlk->next_cache = (i + 1) % SQUASHFS_CACHED_BLKS;
+
+			if(msBlk->block_cache[i].block == SQUASHFS_INVALID_BLK) {
+				if(!(msBlk->block_cache[i].data = (unsigned char *)
+							kmalloc(SQUASHFS_METADATA_SIZE, GFP_KERNEL))) {
+					ERROR("Failed to allocate cache block\n");
+					up(&msBlk->block_cache_mutex);
+					return 0;
+				}
+			}
+	
+			msBlk->block_cache[i].block = SQUASHFS_USED_BLK;
+			up(&msBlk->block_cache_mutex);
+			if(!(msBlk->block_cache[i].length = read_data(s, msBlk->block_cache[i].data, block, 0,
+							&next_index))) {
+				ERROR("Unable to read cache block [%x:%x]\n", block, offset);
+				return 0;
+			}
+			down(&msBlk->block_cache_mutex);
+			wake_up(&msBlk->waitq);
+			msBlk->block_cache[i].block = block;
+			msBlk->block_cache[i].next_index = next_index;
+			TRACE("Read cache block [%x:%x]\n", block, offset);
+		}
+
+		if(msBlk->block_cache[i].block != block) {
+			up(&msBlk->block_cache_mutex);
+			continue;
+		}
+
+		if((bytes = msBlk->block_cache[i].length - offset) >= length) {
+			memcpy(buffer, msBlk->block_cache[i].data + offset, length);
+			if(msBlk->block_cache[i].length - offset == length) {
+				*next_block = msBlk->block_cache[i].next_index;
+				*next_offset = 0;
+			} else {
+				*next_block = block;
+				*next_offset = offset + length;
+			}
+	
+			up(&msBlk->block_cache_mutex);
+			return return_length;
+		} else {
+			memcpy(buffer, msBlk->block_cache[i].data + offset, bytes);
+			block = msBlk->block_cache[i].next_index;
+			up(&msBlk->block_cache_mutex);
+			length -= bytes;
+			offset = 0;
+			buffer += bytes;
+		}
+	}
+}
+
+
+static struct inode *squashfs_iget(struct super_block *s, squashfs_inode inode)
+{
+	struct inode *i = new_inode(s);
+	struct squashfs_inode_info *sqi = SQUASH_I(i);
+	squashfs_sb_info *msBlk = s->s_fs_info;
+	squashfs_super_block *sBlk = &msBlk->sBlk;
+	unsigned int block = SQUASHFS_INODE_BLK(inode) + sBlk->inode_table_start;
+	unsigned int offset = SQUASHFS_INODE_OFFSET(inode);
+	unsigned int next_block, next_offset;
+	squashfs_base_inode_header inodeb;
+
+	TRACE("Entered squashfs_iget\n");
+
+	if(msBlk->swap) {
+		squashfs_base_inode_header sinodeb;
+
+		if(!squashfs_get_cached_block(s, (char *) &sinodeb, block,  offset,
+					sizeof(sinodeb), &next_block, &next_offset))
+			goto failed_read;
+		SQUASHFS_SWAP_BASE_INODE_HEADER(&inodeb, &sinodeb, sizeof(sinodeb));
+	} else
+		if(!squashfs_get_cached_block(s, (char *) &inodeb, block,  offset,
+					sizeof(inodeb), &next_block, &next_offset))
+			goto failed_read;
+
+	i->i_nlink = 1;
+
+	i->i_mtime.tv_sec = sBlk->mkfs_time;
+	i->i_atime.tv_sec = sBlk->mkfs_time;
+	i->i_ctime.tv_sec = sBlk->mkfs_time;
+	i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0;
+	i->i_uid = msBlk->uid[((inodeb.inode_type - 1) / SQUASHFS_TYPES) * 16 + inodeb.uid];
+	i->i_ino = SQUASHFS_MK_VFS_INODE(block - sBlk->inode_table_start, offset);
+
+	if(inodeb.guid == SQUASHFS_GUIDS)
+		i->i_gid = i->i_uid;
+	else
+		i->i_gid = msBlk->guid[inodeb.guid];
+
+	i->i_mode = inodeb.mode;
+
+	switch((inodeb.inode_type - 1) % SQUASHFS_TYPES + 1) {
+		case SQUASHFS_FILE_TYPE: {
+			squashfs_reg_inode_header inodep;
+
+			if(msBlk->swap) {
+				squashfs_reg_inode_header sinodep;
+
+				if(!squashfs_get_cached_block(s, (char *) &sinodep, block,  offset, sizeof(sinodep),
+							&next_block, &next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_REG_INODE_HEADER(&inodep, &sinodep);
+			} else
+				if(!squashfs_get_cached_block(s, (char *) &inodep, block,  offset, sizeof(inodep),
+							&next_block, &next_offset))
+					goto failed_read;
+
+			i->i_size = inodep.file_size;
+			i->i_fop = &generic_ro_fops;
+			i->i_data.a_ops = &squashfs_aops;
+			i->i_mode |= S_IFREG;
+
+			i->i_mtime.tv_sec = inodep.mtime;
+			i->i_atime.tv_sec = inodep.mtime;
+			i->i_ctime.tv_sec = inodep.mtime;
+			i->i_mtime.tv_nsec = i->i_atime.tv_nsec =
+				i->i_ctime.tv_nsec = 0;
+			sqi->start_block = inodep.start_block;
+			sqi->block_list_start = next_block;
+			sqi->offset = next_offset;
+			TRACE("File inode %x:%x, start_block %x, block_list_start %x, offset %x\n",
+					SQUASHFS_INODE_BLK(inode), offset, inodep.start_block, next_block, next_offset);
+			break;
+		}
+		case SQUASHFS_DIR_TYPE: {
+			squashfs_dir_inode_header inodep;
+
+			if(msBlk->swap) {
+				squashfs_dir_inode_header sinodep;
+
+				if(!squashfs_get_cached_block(s, (char *) &sinodep, block,  offset, sizeof(sinodep),
+							&next_block, &next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_DIR_INODE_HEADER(&inodep, &sinodep);
+			} else
+				if(!squashfs_get_cached_block(s, (char *) &inodep, block,  offset, sizeof(inodep),
+							&next_block, &next_offset))
+					goto failed_read;
+
+			i->i_size = inodep.file_size;
+			i->i_op = &squashfs_dir_inode_ops;
+			i->i_fop = &squashfs_dir_ops;
+			i->i_mode |= S_IFDIR;
+			i->i_mtime.tv_sec = inodep.mtime;
+			i->i_atime.tv_sec = inodep.mtime;
+			i->i_ctime.tv_sec = inodep.mtime;
+			i->i_mtime.tv_nsec = i->i_atime.tv_nsec =
+				i->i_ctime.tv_nsec = 0;
+			sqi->start_block = inodep.start_block;
+			sqi->offset = inodep.offset;
+			TRACE("Directory inode %x:%x, start_block %x, offset %x\n", SQUASHFS_INODE_BLK(inode), offset,
+					inodep.start_block, inodep.offset);
+			break;
+		}
+		case SQUASHFS_SYMLINK_TYPE: {
+			squashfs_symlink_inode_header inodep;
+	
+			if(msBlk->swap) {
+				squashfs_symlink_inode_header sinodep;
+
+				if(!squashfs_get_cached_block(s, (char *) &sinodep, block,  offset, sizeof(sinodep),
+							&next_block, &next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_SYMLINK_INODE_HEADER(&inodep, &sinodep);
+			} else
+				if(!squashfs_get_cached_block(s, (char *) &inodep, block,  offset, sizeof(inodep),
+							&next_block, &next_offset))
+					goto failed_read;
+
+			i->i_size = inodep.symlink_size;
+			i->i_op = &page_symlink_inode_operations;
+			i->i_data.a_ops = &squashfs_symlink_aops;
+			i->i_mode |= S_IFLNK;
+			sqi->start_block = next_block;
+			sqi->offset = next_offset;
+			TRACE("Symbolic link inode %x:%x, start_block %x, offset %x\n",
+				SQUASHFS_INODE_BLK(inode), offset, next_block, next_offset);
+			break;
+		 }
+		 case SQUASHFS_BLKDEV_TYPE:
+		 case SQUASHFS_CHRDEV_TYPE: {
+			squashfs_dev_inode_header inodep;
+
+			if(msBlk->swap) {
+				squashfs_dev_inode_header sinodep;
+
+				if(!squashfs_get_cached_block(s, (char *) &sinodep, block,  offset, sizeof(sinodep),
+							&next_block, &next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_DEV_INODE_HEADER(&inodep, &sinodep);
+			} else	
+				if(!squashfs_get_cached_block(s, (char *) &inodep, block,  offset, sizeof(inodep),
+							&next_block, &next_offset))
+					goto failed_read;
+
+			i->i_size = 0;
+			i->i_mode |= (inodeb.inode_type == SQUASHFS_CHRDEV_TYPE) ? S_IFCHR : S_IFBLK;
+			init_special_inode(i, i->i_mode, inodep.rdev);
+			TRACE("Device inode %x:%x, rdev %x\n", SQUASHFS_INODE_BLK(inode), offset, inodep.rdev);
+			break;
+		 }
+		 default:
+			ERROR("Unknown inode type %d in squashfs_iget!\n", inodeb.inode_type);
+				goto failed_read1;
+	}
+	
+	return i;
+
+failed_read:
+	ERROR("Unable to read inode [%x:%x]\n", block, offset);
+
+failed_read1:
+	return NULL;
+}
+
+
+static int squashfs_fill_super(struct super_block *s, void *data, int silent)
+{
+	char bbuf[BDEVNAME_SIZE];
+	squashfs_sb_info *msBlk;
+	squashfs_super_block *sBlk;
+	int i;
+
+	TRACE("Entered squashfs_fill_super\n");
+
+	msBlk = kmalloc(sizeof(squashfs_sb_info), GFP_KERNEL);
+	if (!msBlk)
+		return -ENOMEM;
+	s->s_fs_info = msBlk;
+	sBlk = &(msBlk->sBlk);
+	memset(msBlk, 0, sizeof(squashfs_sb_info));
+
+	msBlk->devblksize = sb_min_blocksize(s, BLOCK_SIZE);
+	msBlk->devblksize_log2 = ffz(~msBlk->devblksize);
+
+	init_MUTEX(&msBlk->read_page_mutex);
+	init_MUTEX(&msBlk->block_cache_mutex);
+	
+	init_waitqueue_head(&msBlk->waitq);
+
+	if(!read_data(s, (char *) sBlk, SQUASHFS_START, sizeof(squashfs_super_block) | SQUASHFS_COMPRESSED_BIT, NULL)) {
+		SERROR("unable to read superblock\n");
+		goto failed_mount;
+	}
+
+	/* Check it is a SQUASHFS superblock */
+	msBlk->swap = 0;
+	if((s->s_magic = sBlk->s_magic) != SQUASHFS_MAGIC) {
+		if(sBlk->s_magic == SQUASHFS_MAGIC_SWAP) {
+			squashfs_super_block sblk;
+			WARNING("Mounting a different endian SQUASHFS filesystem on %s\n", __bdevname(s->s_dev, bbuf));
+			SQUASHFS_SWAP_SUPER_BLOCK(&sblk, sBlk);
+			memcpy(sBlk, &sblk, sizeof(squashfs_super_block));
+			msBlk->swap = 1;
+		} else  {
+			SERROR("Can't find a SQUASHFS superblock on %s\n", __bdevname(s->s_dev, bbuf));
+			goto failed_mount;
+		}
+	}
+
+	/* Check the MAJOR & MINOR versions */
+	if(sBlk->s_major != SQUASHFS_MAJOR || sBlk->s_minor > SQUASHFS_MINOR) {
+		SERROR("Major/Minor mismatch, filesystem is (%d:%d), I support (%d: <= %d)\n",
+				sBlk->s_major, sBlk->s_minor, SQUASHFS_MAJOR, SQUASHFS_MINOR);
+		goto failed_mount;
+	}
+
+	TRACE("Found valid superblock on %s\n", __bdevname(s->s_dev, bbuf));
+	TRACE("Inodes are %scompressed\n", SQUASHFS_UNCOMPRESSED_INODES(sBlk->flags) ? "un" : "");
+	TRACE("Data is %scompressed\n", SQUASHFS_UNCOMPRESSED_DATA(sBlk->flags) ? "un" : "");
+	TRACE("Check data is %s present in the filesystem\n", SQUASHFS_CHECK_DATA(sBlk->flags) ? "" : "not");
+	TRACE("Filesystem size %d bytes\n", sBlk->bytes_used);
+	TRACE("Block size %d\n", sBlk->block_size);
+	TRACE("Number of inodes %d\n", sBlk->inodes);
+	TRACE("Number of uids %d\n", sBlk->no_uids);
+	TRACE("Number of gids %d\n", sBlk->no_guids);
+	TRACE("sBlk->inode_table_start %x\n", sBlk->inode_table_start);
+	TRACE("sBlk->directory_table_start %x\n", sBlk->directory_table_start);
+	TRACE("sBlk->uid_start %x\n", sBlk->uid_start);
+
+	s->s_flags |= MS_RDONLY;
+	s->s_op = &squashfs_ops;
+
+	/* Init inode_table block pointer array */
+	if(!(msBlk->block_cache = (squashfs_cache *) kmalloc(sizeof(squashfs_cache) * SQUASHFS_CACHED_BLKS, GFP_KERNEL))) {
+		ERROR("Failed to allocate block cache\n");
+		goto failed_mount;
+	}
+
+	for(i = 0; i < SQUASHFS_CACHED_BLKS; i++)
+		msBlk->block_cache[i].block = SQUASHFS_INVALID_BLK;
+
+	msBlk->next_cache = 0;
+
+	/* Allocate read_data block */
+	msBlk->read_size = (sBlk->block_size < SQUASHFS_METADATA_SIZE) ? SQUASHFS_METADATA_SIZE : sBlk->block_size;
+	if(!(msBlk->read_data = (char *) kmalloc(msBlk->read_size, GFP_KERNEL))) {
+		ERROR("Failed to allocate read_data block\n");
+		goto failed_mount1;
+	}
+
+	/* Allocate read_page block */
+	if(sBlk->block_size > PAGE_CACHE_SIZE && 
+	   !(msBlk->read_page = (char *) kmalloc(sBlk->block_size, GFP_KERNEL))) {
+		ERROR("Failed to allocate read_page block\n");
+		goto failed_mount2;
+	}
+
+	/* Allocate uid and gid tables */
+	if(!(msBlk->uid = (squashfs_uid *) kmalloc((sBlk->no_uids +
+		sBlk->no_guids) * sizeof(squashfs_uid), GFP_KERNEL))) {
+		ERROR("Failed to allocate uid/gid table\n");
+		goto failed_mount3;
+	}
+	msBlk->guid = msBlk->uid + sBlk->no_uids;
+   
+	if(msBlk->swap) {
+		squashfs_uid suid[sBlk->no_uids + sBlk->no_guids];
+
+		if(!read_data(s, (char *) &suid, sBlk->uid_start, ((sBlk->no_uids + sBlk->no_guids) *
+				sizeof(squashfs_uid)) | SQUASHFS_COMPRESSED_BIT, NULL)) {
+			SERROR("unable to read uid/gid table\n");
+			goto failed_mount4;
+		}
+		SQUASHFS_SWAP_DATA(msBlk->uid, suid, (sBlk->no_uids + sBlk->no_guids), (sizeof(squashfs_uid) * 8));
+	} else
+		if(!read_data(s, (char *) msBlk->uid, sBlk->uid_start, ((sBlk->no_uids + sBlk->no_guids) *
+				sizeof(squashfs_uid)) | SQUASHFS_COMPRESSED_BIT, NULL)) {
+			SERROR("unable to read uid/gid table\n");
+			goto failed_mount4;
+		}
+
+	if(!(s->s_root = d_alloc_root(squashfs_iget(s, sBlk->root_inode)))) {
+		ERROR("Root inode create failed\n");
+		goto failed_mount4;
+	}
+
+	TRACE("Leaving squashfs_fill_super\n");
+	return 0;
+
+failed_mount4:
+	kfree(msBlk->uid);
+failed_mount3:
+	kfree(msBlk->read_page);
+failed_mount2:
+	kfree(msBlk->read_data);
+failed_mount1:
+	kfree(msBlk->block_cache);
+failed_mount:
+	kfree(msBlk);
+	s->s_fs_info = NULL;
+	return -EINVAL;
+}
+
+
+static int squashfs_statfs(struct super_block *s, struct kstatfs *buf)
+{
+	squashfs_super_block *sBlk = &((squashfs_sb_info*)(s->s_fs_info))->sBlk;
+
+	TRACE("Entered squashfs_statfs\n");
+	buf->f_type = SQUASHFS_MAGIC;
+	buf->f_bsize = sBlk->block_size;
+	buf->f_blocks = ((sBlk->bytes_used - 1) >> sBlk->block_log) + 1;
+	buf->f_bfree = buf->f_bavail = 0;
+	buf->f_files = sBlk->inodes;
+	buf->f_ffree = 0;
+	buf->f_namelen = SQUASHFS_NAME_LEN;
+	return 0;
+}
+
+
+static int squashfs_symlink_readpage(struct file *file, struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct squashfs_inode_info *sqi = SQUASH_I(inode);
+	int index = page->index << PAGE_CACHE_SHIFT, length = 0, bytes;
+	int block = sqi->start_block;
+	int offset = sqi->offset;
+
+	TRACE("Entered squashfs_symlink_readpage, page index %d, start block %x, offset %x\n",
+		page->index, sqi->start_block, sqi->offset);
+
+	while(length < index) {
+		char buffer[PAGE_CACHE_SIZE];
+
+		if(length += bytes = squashfs_get_cached_block(inode->i_sb, buffer, block, offset,
+					PAGE_CACHE_SIZE, &block, &offset), !bytes) {
+			ERROR("Unable to read symbolic link [%x:%x]\n", block, offset);
+			goto skip_read;
+		}
+	}
+
+	if(length != index) {
+		ERROR("(squashfs_symlink_readpage) length != index\n");
+		return 0;
+	}
+
+	bytes = (inode->i_size - length) > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : inode->i_size - length;
+	if(!squashfs_get_cached_block(inode->i_sb, page_address(page), block, offset, bytes, &block, &offset))
+		ERROR("Unable to read symbolic link [%x:%x]\n", block, offset);
+
+skip_read:
+	memset(page_address(page) + bytes, 0, PAGE_CACHE_SIZE - bytes);
+	flush_dcache_page(page);
+	SetPageUptodate(page);
+	unlock_page(page);
+
+	return 0;
+}
+
+
+#define SIZE 1024
+static int squashfs_readpage(struct file *file, struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct squashfs_inode_info *sqi = SQUASH_I(inode);
+	squashfs_sb_info *msBlk = inode->i_sb->s_fs_info;
+	squashfs_super_block *sBlk = &msBlk->sBlk;
+	unsigned char block_list[SIZE];
+	unsigned short *block_listp;
+	int index = sBlk->block_log > PAGE_CACHE_SHIFT ?
+		page->index >> (sBlk->block_log - PAGE_CACHE_SHIFT) :
+		page->index << (PAGE_CACHE_SHIFT - sBlk->block_log);
+	int block = sqi->start_block, i = 0;
+	int bytes = sqi->block_list_start;
+	int offset = sqi->offset;
+	int file_blocks = ((inode->i_size - 1) >> sBlk->block_log) + 1;
+	
+	TRACE("Entered squashfs_readpage, page index %d, start block %x\n", page->index,
+		sqi->start_block);
+
+	if(index > file_blocks)
+		goto skip_read;
+
+	for(;;) {
+		int blocks = (index + 1 - i) > (SIZE >> 1) ? SIZE >> 1 : (index + 1 - i);
+		if(msBlk->swap) {
+			unsigned char sblock_list[SIZE];
+			if(!squashfs_get_cached_block(inode->i_sb, (char *) sblock_list, bytes, offset, blocks << 1, &bytes, &offset)) {
+				ERROR("Unable to read block list [%d:%x]\n", bytes, offset);
+				goto skip_read;
+			}
+			SQUASHFS_SWAP_SHORTS(((unsigned short *)block_list), ((unsigned short *)sblock_list), blocks);
+		} else
+			if(!squashfs_get_cached_block(inode->i_sb, (char *) block_list, bytes, offset, blocks << 1, &bytes, &offset)) {
+				ERROR("Unable to read block list [%d:%x]\n", bytes, offset);
+				goto skip_read;
+			}
+		for(block_listp = (unsigned short *) block_list; i < index && blocks; i ++, block_listp ++, blocks --)
+			block += SQUASHFS_COMPRESSED_SIZE(*block_listp);
+		if(blocks)
+			break;
+	}
+
+	if(sBlk->block_log > PAGE_CACHE_SHIFT) {
+		int mask = (1 << (sBlk->block_log - PAGE_CACHE_SHIFT)) - 1;
+		int start_index = page->index & ~mask;
+		int end_index = start_index | mask;
+		int byte_offset = 0;
+
+		down(&msBlk->read_page_mutex);
+		if(!(bytes = read_data(inode->i_sb, msBlk->read_page, block, *block_listp, NULL))) {
+			ERROR("Unable to read page, block %x, size %x\n", block, (int) *block_listp);
+			goto skip_read;
+		}
+
+		for(i = start_index; i <= end_index && byte_offset < bytes; i++, byte_offset += PAGE_CACHE_SIZE) {
+			int available_bytes = (bytes - byte_offset) > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : bytes - byte_offset;
+
+			TRACE("bytes %d, i %d, byte_offset %d, available_bytes %d\n", bytes, i, byte_offset, available_bytes);
+
+			if(i == page->index)  {
+				memcpy(page_address(page), msBlk->read_page + byte_offset, available_bytes);
+				memset(page_address(page) + available_bytes, 0, PAGE_CACHE_SIZE - available_bytes);
+				flush_dcache_page(page);
+				SetPageUptodate(page);
+				unlock_page(page);
+			}  else {
+				struct page *push_page;
+
+				if((push_page = grab_cache_page_nowait(page->mapping, i))) {
+					memcpy(page_address(push_page), msBlk->read_page + byte_offset, available_bytes);
+					memset(page_address(push_page) + available_bytes, 0, PAGE_CACHE_SIZE - available_bytes);
+					flush_dcache_page(push_page);
+					SetPageUptodate(push_page);
+					unlock_page(push_page);
+					page_cache_release(push_page);
+				}
+			}
+		}
+		up( &msBlk->read_page_mutex);
+
+		return 0;
+
+	} else if(sBlk->block_log == PAGE_CACHE_SHIFT) {
+		if(!(bytes = read_data(inode->i_sb, page_address(page), block, *block_listp, NULL)))
+			ERROR("Unable to read page, block %x, size %x\n", block, (int) *block_listp);
+
+	} else {
+		int i_end = index + (1 << (PAGE_CACHE_SHIFT - sBlk->block_log));
+		char *p = (char *) page_address(page);
+		int byte;
+
+		if(i_end > file_blocks)
+			i_end = file_blocks;
+
+		while(index < i_end) {
+			if(!(byte = read_data(inode->i_sb, p, block, *block_listp, NULL))) {
+				ERROR("Unable to read page, block %x, size %x\n", block, (int) *block_listp);
+				goto skip_read;
+			}
+			block += SQUASHFS_COMPRESSED_SIZE(*block_listp);
+			p += byte;
+			bytes += byte;
+			index ++;
+			block_listp ++;
+		}
+	}
+
+skip_read:
+	memset(page_address(page) + bytes, 0, PAGE_CACHE_SIZE - bytes);
+	flush_dcache_page(page);
+	SetPageUptodate(page);
+	unlock_page(page);
+
+	return 0;
+}
+
+
+static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
+{
+	struct inode *i = file->f_dentry->d_inode;
+	struct squashfs_inode_info *sqi = SQUASH_I(i);
+	squashfs_sb_info *msBlk = i->i_sb->s_fs_info;
+	squashfs_super_block *sBlk = &msBlk->sBlk;
+	int next_block = sqi->start_block + sBlk->directory_table_start, next_offset =
+		sqi->offset, length = 0, dirs_read = 0, dir_count, bytes;
+	squashfs_dir_header dirh;
+	char buffer[sizeof(squashfs_dir_entry) + SQUASHFS_NAME_LEN + 1];
+	squashfs_dir_entry *dire = (squashfs_dir_entry *) buffer;
+
+	TRACE("Entered squashfs_readdir [%x:%x]\n", next_block, next_offset);
+
+	lock_kernel();
+
+	while(length < i->i_size) {
+		/* read directory header */
+		if(msBlk->swap) {
+			squashfs_dir_header sdirh;
+			if(length += bytes = squashfs_get_cached_block(i->i_sb, (char *) &sdirh, next_block,
+						next_offset, sizeof(sdirh), &next_block, &next_offset), !bytes)
+				goto failed_read;
+			SQUASHFS_SWAP_DIR_HEADER(&dirh, &sdirh);
+		} else
+			if(length += bytes = squashfs_get_cached_block(i->i_sb, (char *) &dirh, next_block,
+						next_offset, sizeof(dirh), &next_block, &next_offset), !bytes)
+				goto failed_read;
+
+		dir_count = dirh.count + 1;
+		while(dir_count--) {
+			if(msBlk->swap) {
+				squashfs_dir_entry sdire;
+				if(length += bytes = squashfs_get_cached_block(i->i_sb, (char *) &sdire, next_block,
+							next_offset, sizeof(sdire), &next_block, &next_offset), !bytes)
+					goto failed_read;
+				SQUASHFS_SWAP_DIR_ENTRY(dire, &sdire);
+			} else
+				if(length += bytes = squashfs_get_cached_block(i->i_sb, (char *) dire, next_block,
+							next_offset, sizeof(*dire), &next_block, &next_offset), !bytes)
+					goto failed_read;
+
+			if(length += bytes = squashfs_get_cached_block(i->i_sb, dire->name, next_block,
+						next_offset, dire->size + 1, &next_block, &next_offset), !bytes)
+				goto failed_read;
+
+			if(file->f_pos >= length)
+				continue;
+
+			dire->name[dire->size + 1] = '\0';
+
+			TRACE("Calling filldir(%x, %s, %d, %d, %x:%x, %d)\n", dirent,
+			dire->name, dire->size + 1, (int) file->f_pos,
+			dirh.start_block, dire->offset, squashfs_filetype_table[dire->type]);
+
+			if(filldir(dirent, dire->name, dire->size + 1, file->f_pos, SQUASHFS_MK_VFS_INODE(dirh.start_block,
+							dire->offset), squashfs_filetype_table[dire->type]) < 0) {
+				TRACE("Filldir returned less than 0\n");
+				unlock_kernel();
+				return dirs_read;
+			}
+
+			file->f_pos = length;
+			dirs_read ++;
+		}
+	}
+	unlock_kernel();
+	return dirs_read;
+
+failed_read:
+	ERROR("Unable to read directory block [%x:%x]\n", next_block, next_offset);
+	unlock_kernel();
+	return 0;
+}
+
+
+static struct dentry *squashfs_lookup(struct inode *i, struct dentry *dentry,
+				      struct nameidata *nd)
+{
+	const char *name =dentry->d_name.name;
+	int len = dentry->d_name.len;
+	struct inode *inode = NULL;
+	struct squashfs_inode_info *sqi = SQUASH_I(i);
+	squashfs_sb_info *msBlk = i->i_sb->s_fs_info;
+	squashfs_super_block *sBlk = &msBlk->sBlk;
+	int next_block = sqi->start_block + sBlk->directory_table_start, next_offset =
+		sqi->offset, length = 0, dir_count, bytes;
+	squashfs_dir_header dirh;
+	char buffer[sizeof(squashfs_dir_entry) + SQUASHFS_NAME_LEN];
+	squashfs_dir_entry *dire = (squashfs_dir_entry *) buffer;
+
+	TRACE("Entered squashfs_lookup [%x:%x]\n", next_block, next_offset);
+
+	while(length < i->i_size) {
+		/* read directory header */
+		if(msBlk->swap) {
+			squashfs_dir_header sdirh;
+			if(length += bytes = squashfs_get_cached_block(i->i_sb, (char *) &sdirh, next_block, next_offset,
+						sizeof(sdirh), &next_block, &next_offset), !bytes)
+				goto failed_read;
+			SQUASHFS_SWAP_DIR_HEADER(&dirh, &sdirh);
+		} else
+			if(length += bytes = squashfs_get_cached_block(i->i_sb, (char *) &dirh, next_block, next_offset,
+						sizeof(dirh), &next_block, &next_offset), !bytes)
+				goto failed_read;
+
+		dir_count = dirh.count + 1;
+		while(dir_count--) {
+			if(msBlk->swap) {
+				squashfs_dir_entry sdire;
+				if(length += bytes = squashfs_get_cached_block(i->i_sb, (char *) &sdire,
+							next_block,next_offset, sizeof(sdire), &next_block, &next_offset), !bytes)
+					goto failed_read;
+				SQUASHFS_SWAP_DIR_ENTRY(dire, &sdire);
+			} else
+				if(length += bytes = squashfs_get_cached_block(i->i_sb, (char *) dire,
+							next_block,next_offset, sizeof(*dire), &next_block, &next_offset), !bytes)
+					goto failed_read;
+
+			if(length += bytes = squashfs_get_cached_block(i->i_sb, dire->name,
+						next_block, next_offset, dire->size + 1, &next_block, &next_offset), !bytes)
+				goto failed_read;
+
+			if((len == dire->size + 1) && !strncmp(name, dire->name, len)) {
+				squashfs_inode ino = SQUASHFS_MKINODE(dirh.start_block, dire->offset);
+
+				TRACE("calling squashfs_iget for directory entry %s, inode %x:%x\n",
+						name, dirh.start_block, dire->offset);
+
+				inode = squashfs_iget(i->i_sb, ino);
+
+				goto exit_loop;
+			}
+		}
+	}
+
+exit_loop:
+	d_add(dentry, inode);
+	return ERR_PTR(0);
+
+failed_read:
+	ERROR("Unable to read directory block [%x:%x]\n", next_block, next_offset);
+	goto exit_loop;
+}
+
+
+static void squashfs_put_super(struct super_block *s)
+{
+	squashfs_sb_info *msBlk = s->s_fs_info;
+	if(msBlk->block_cache) kfree(msBlk->block_cache);
+	if(msBlk->read_data) kfree(msBlk->read_data);
+	if(msBlk->read_page) kfree(msBlk->read_page);
+	if(msBlk->uid) kfree(msBlk->uid);
+	kfree(msBlk);
+	s->s_fs_info = NULL;
+}
+
+static struct super_block *squashfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, data,
+			   squashfs_fill_super);
+}
+
+static kmem_cache_t * squashfs_inode_cachep;
+
+static struct inode *squashfs_alloc_inode(struct super_block *sb)
+{
+	struct squashfs_inode_info *ii;
+	ii = (struct squashfs_inode_info *)
+		kmem_cache_alloc(squashfs_inode_cachep, SLAB_KERNEL);
+	if (!ii)
+		return NULL;
+	ii->vfs_inode.i_version = 1;
+	return &ii->vfs_inode;
+}
+
+static void squashfs_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(squashfs_inode_cachep, SQUASH_I(inode));
+}
+
+static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+	struct squashfs_inode_info *sqi = (struct squashfs_inode_info *) foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		inode_init_once(&sqi->vfs_inode);
+	}
+}
+
+static int init_inodecache(void)
+{
+	squashfs_inode_cachep =
+		kmem_cache_create("squashfs_inode_cache",
+				  sizeof(struct squashfs_inode_info),
+				  0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+				  init_once, NULL);
+	if (squashfs_inode_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static void destroy_inodecache(void)
+{
+	kmem_cache_destroy(squashfs_inode_cachep);
+}
+
+static int __init init_squashfs_fs(void)
+{
+
+	if(!(stream.workspace = (char *) vmalloc(zlib_inflate_workspacesize()))) {
+		ERROR("Failed to allocate zlib workspace\n");
+		return -ENOMEM;
+	}
+	if (init_inodecache())
+		return -ENOMEM;
+	return register_filesystem(&squashfs_fs_type);
+}
+
+
+static void __exit exit_squashfs_fs(void)
+{
+	destroy_inodecache();
+	vfree(stream.workspace);
+	unregister_filesystem(&squashfs_fs_type);
+}
+
+
+module_init(init_squashfs_fs);
+module_exit(exit_squashfs_fs);
+MODULE_DESCRIPTION("squashfs, a compressed read-only filesystem");
+MODULE_AUTHOR("Phillip Lougher <phillip@lougher.demon.co.uk>");
+MODULE_LICENSE("GPL");
diff -Naur linux-2.6.0-test1-ac2/include/linux/squashfs_fs.h linux-2.6.0-test1-ac2-sfs/include/linux/squashfs_fs.h
--- linux-2.6.0-test1-ac2/include/linux/squashfs_fs.h	1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.0-test1-ac2/include/linux/squashfs_fs.h	2003-07-18 16:12:47.000000000 -0700
@@ -0,0 +1,304 @@
+#ifndef SQUASHFS_FS
+#define SQUASHFS_FS
+/*
+ * Squashfs
+ *
+ * Copyright (c) 2002 Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * squashfs_fs.h
+ */
+
+#define SQUASHFS_MAJOR			1
+#define SQUASHFS_MINOR			0
+#define SQUASHFS_MAGIC			0x73717368
+#define SQUASHFS_MAGIC_SWAP		0x68737173
+#define SQUASHFS_START			0
+
+/* size of metadata (inode and directory) blocks */
+#define SQUASHFS_METADATA_SIZE		8192
+#define SQUASHFS_METADATA_LOG		13
+
+/* default size of data blocks */
+#define SQUASHFS_FILE_SIZE		32768
+#define SQUASHFS_FILE_LOG		15
+
+#define SQUASHFS_FILE_MAX_SIZE		32768
+
+/* Max number of uids and gids */
+#define SQUASHFS_UIDS			48
+#define SQUASHFS_GUIDS			15
+
+/* Max length of filename (not 255) */
+#define SQUASHFS_NAME_LEN		256
+
+#define SQUASHFS_INVALID		((long long) 0xffffffffffff)
+#define SQUASHFS_INVALID_BLK		((long long) 0xffffffff)
+#define SQUASHFS_USED_BLK		((long long) 0xfffffffe)
+
+/* Filesystem flags */
+#define SQUASHFS_NOI			1
+#define SQUASHFS_NOD			2
+#define SQUASHFS_CHECK			4
+#define SQUASHFS_UNCOMPRESSED_INODES(flags)	(flags & SQUASHFS_NOI)
+#define SQUASHFS_UNCOMPRESSED_DATA(flags)	(flags & SQUASHFS_NOD)
+#define SQUASHFS_CHECK_DATA(flags)		(flags & SQUASHFS_CHECK)
+#define SQUASHFS_MKFLAGS(noi, nod, check_data)	(noi | (nod << 1) | (check_data << 2))
+
+/* Max number of types and file types */
+#define SQUASHFS_TYPES			5
+#define SQUASHFS_DIR_TYPE		1
+#define SQUASHFS_FILE_TYPE		2
+#define SQUASHFS_SYMLINK_TYPE		3
+#define SQUASHFS_BLKDEV_TYPE		4
+#define SQUASHFS_CHRDEV_TYPE		5
+
+/* Flag whether block is compressed or uncompressed, bit is set if block is uncompressed */
+#define SQUASHFS_COMPRESSED_BIT		(1 << 15)
+#define SQUASHFS_COMPRESSED_SIZE(B)	(((B) & ~SQUASHFS_COMPRESSED_BIT) ? \
+					(B) & ~SQUASHFS_COMPRESSED_BIT : SQUASHFS_COMPRESSED_BIT)
+
+#define SQUASHFS_COMPRESSED(B)		(!((B) & SQUASHFS_COMPRESSED_BIT))
+
+/*
+ * Inode number ops.  Inodes consist of a compressed block number, and an uncompressed
+ * offset within that block
+ */
+#define SQUASHFS_INODE_BLK(a)		((unsigned int) ((a) >> 16))
+#define SQUASHFS_INODE_OFFSET(a)	((unsigned int) ((a) & 0xffff))
+#define SQUASHFS_MKINODE(A, B)		((squashfs_inode)(((squashfs_inode) (A) << 16)\
+					+ (B)))
+
+/* Compute 32 bit VFS inode number from squashfs inode number */
+#define SQUASHFS_MK_VFS_INODE(a, b)	((unsigned int) (((a) << 8) + ((b) >> 2) + 1))
+
+/* Translate between VFS mode and squashfs mode */
+#define SQUASHFS_MODE(a)		((a) & 0xfff)
+
+/* cached data constants for filesystem */
+#define SQUASHFS_CACHED_BLKS		8
+
+#define SQUASHFS_MAX_FILE_SIZE_LOG	32
+#define SQUASHFS_MAX_FILE_SIZE		((long long) 1 << SQUASHFS_MAX_FILE_SIZE_LOG - 1)
+
+#define SQUASHFS_MARKER_BYTE		0xff
+
+/*
+ * definitions for structures on disk
+ */
+
+typedef unsigned int		squashfs_block;
+typedef long long		squashfs_inode;
+
+typedef unsigned int		squashfs_uid;
+
+typedef struct squashfs_super_block {
+	unsigned int		s_magic __attribute__ ((packed));
+	unsigned int		inodes __attribute__ ((packed));
+	unsigned int		bytes_used __attribute__ ((packed));
+	unsigned int		uid_start __attribute__ ((packed));
+	unsigned int		guid_start __attribute__ ((packed));
+	unsigned int		inode_table_start __attribute__ ((packed));
+	unsigned int		directory_table_start __attribute__ ((packed));
+	unsigned int		s_major:16 __attribute__ ((packed));
+	unsigned int		s_minor:16 __attribute__ ((packed));
+	unsigned int		block_size:16 __attribute__ ((packed));
+	unsigned int		block_log:16 __attribute__ ((packed));
+	unsigned int		flags:8 __attribute__ ((packed));
+	unsigned int		no_uids:8 __attribute__ ((packed));
+	unsigned int		no_guids:8 __attribute__ ((packed));
+	time_t			mkfs_time /* time of filesystem creation */ __attribute__ ((packed));
+	squashfs_inode		root_inode __attribute__ ((packed));
+} squashfs_super_block;
+
+typedef struct {
+	unsigned int		inode_type:4 __attribute__ ((packed));
+	unsigned int		mode:12 __attribute__ ((packed)); /* protection */
+	unsigned int		uid:4 __attribute__ ((packed)); /* index into uid table */
+	unsigned int		guid:4 __attribute__ ((packed)); /* index into guid table */
+} squashfs_base_inode_header;
+
+typedef struct {
+	unsigned int		inode_type:4 __attribute__ ((packed));
+	unsigned int		mode:12 __attribute__ ((packed)); /* protection */
+	unsigned int		uid:4 __attribute__ ((packed)); /* index into uid table */
+	unsigned int		guid:4 __attribute__ ((packed)); /* index into guid table */
+	unsigned short		rdev __attribute__ ((packed));
+} squashfs_dev_inode_header;
+	
+typedef struct {
+	unsigned int		inode_type:4 __attribute__ ((packed));
+	unsigned int		mode:12 __attribute__ ((packed)); /* protection */
+	unsigned int		uid:4 __attribute__ ((packed)); /* index into uid table */
+	unsigned int		guid:4 __attribute__ ((packed)); /* index into guid table */
+	unsigned short		symlink_size __attribute__ ((packed));
+	char			symlink[0] __attribute__ ((packed));
+} squashfs_symlink_inode_header;
+
+typedef struct {
+	unsigned int		inode_type:4 __attribute__ ((packed));
+	unsigned int		mode:12 __attribute__ ((packed)); /* protection */
+	unsigned int		uid:4 __attribute__ ((packed)); /* index into uid table */
+	unsigned int		guid:4 __attribute__ ((packed)); /* index into guid table */
+	time_t			mtime __attribute__ ((packed));
+	squashfs_block		start_block __attribute__ ((packed));
+	unsigned int		file_size:SQUASHFS_MAX_FILE_SIZE_LOG __attribute__ ((packed));
+	unsigned short		block_list[0]  __attribute__ ((packed));
+} squashfs_reg_inode_header;
+
+typedef struct {
+	unsigned int		inode_type:4 __attribute__ ((packed));
+	unsigned int		mode:12 __attribute__ ((packed)); /* protection */
+	unsigned int		uid:4 __attribute__ ((packed)); /* index into uid table */
+	unsigned int		guid:4 __attribute__ ((packed)); /* index into guid table */
+	unsigned int		file_size:19 __attribute__ ((packed));
+	unsigned int		offset:13 __attribute__ ((packed));
+	time_t			mtime __attribute__ ((packed));
+	unsigned int		start_block:24 __attribute__ ((packed));
+} squashfs_dir_inode_header;
+
+typedef union {
+	squashfs_base_inode_header	base;
+	squashfs_dev_inode_header	dev;
+	squashfs_symlink_inode_header	symlink;
+	squashfs_reg_inode_header	reg;
+	squashfs_dir_inode_header	dir;
+} squashfs_inode_header;
+	
+typedef struct {
+	unsigned int		offset:13 __attribute__ ((packed));
+	unsigned int		type:3 __attribute__ ((packed));
+	unsigned int		size:8 __attribute__ ((packed));
+	char			name[0] __attribute__ ((packed));
+} squashfs_dir_entry;
+
+typedef struct {
+	unsigned int		count:8 __attribute__ ((packed));
+	unsigned int		start_block:24 __attribute__ ((packed));
+} squashfs_dir_header;
+
+
+extern int squashfs_uncompress_block(void *d, int dstlen, void *s, int srclen);
+extern int squashfs_uncompress_init(void);
+extern int squashfs_uncompress_exit(void);
+
+/*
+ * macros to convert each packed bitfield structure from little endian to big
+ * endian and vice versa.  These are needed when creating or using a filesystem on a
+ * machine with different byte ordering to the target architecture.
+ *
+ */
+
+#define SQUASHFS_SWAP_SUPER_BLOCK(s, d) {\
+	SQUASHFS_MEMSET(s, d, sizeof(squashfs_super_block));\
+	SQUASHFS_SWAP((s)->s_magic, d, 0, 32);\
+	SQUASHFS_SWAP((s)->inodes, d, 32, 32);\
+	SQUASHFS_SWAP((s)->bytes_used, d, 64, 32);\
+	SQUASHFS_SWAP((s)->uid_start, d, 96, 32);\
+	SQUASHFS_SWAP((s)->guid_start, d, 128, 32);\
+	SQUASHFS_SWAP((s)->inode_table_start, d, 160, 32);\
+	SQUASHFS_SWAP((s)->directory_table_start, d, 192, 32);\
+	SQUASHFS_SWAP((s)->s_major, d, 224, 16);\
+	SQUASHFS_SWAP((s)->s_minor, d, 240, 16);\
+	SQUASHFS_SWAP((s)->block_size, d, 256, 16);\
+	SQUASHFS_SWAP((s)->block_log, d, 272, 16);\
+	SQUASHFS_SWAP((s)->flags, d, 288, 8);\
+	SQUASHFS_SWAP((s)->no_uids, d, 296, 8);\
+	SQUASHFS_SWAP((s)->no_guids, d, 304, 8);\
+	SQUASHFS_SWAP((s)->mkfs_time, d, 312, 32);\
+	SQUASHFS_SWAP((s)->root_inode, d, 344, 64);\
+}
+
+#define SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, n) {\
+	SQUASHFS_MEMSET(s, d, n);\
+	SQUASHFS_SWAP((s)->inode_type, d, 0, 4);\
+	SQUASHFS_SWAP((s)->mode, d, 4, 12);\
+	SQUASHFS_SWAP((s)->uid, d, 16, 4);\
+	SQUASHFS_SWAP((s)->guid, d, 20, 4);\
+}
+
+#define SQUASHFS_SWAP_DEV_INODE_HEADER(s, d) {\
+	SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, sizeof(squashfs_dev_inode_header));\
+	SQUASHFS_SWAP((s)->rdev, d, 24, 16);\
+}
+
+#define SQUASHFS_SWAP_SYMLINK_INODE_HEADER(s, d) {\
+	SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, sizeof(squashfs_symlink_inode_header));\
+	SQUASHFS_SWAP((s)->symlink_size, d, 24, 16);\
+}
+
+#define SQUASHFS_SWAP_REG_INODE_HEADER(s, d) {\
+	SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, sizeof(squashfs_reg_inode_header));\
+	SQUASHFS_SWAP((s)->mtime, d, 24, 32);\
+	SQUASHFS_SWAP((s)->start_block, d, 56, 32);\
+	SQUASHFS_SWAP((s)->file_size, d, 88, SQUASHFS_MAX_FILE_SIZE_LOG);\
+}
+
+#define SQUASHFS_SWAP_DIR_INODE_HEADER(s, d) {\
+	SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, sizeof(squashfs_dir_inode_header));\
+	SQUASHFS_SWAP((s)->file_size, d, 24, 19);\
+	SQUASHFS_SWAP((s)->offset, d, 43, 13);\
+	SQUASHFS_SWAP((s)->mtime, d, 56, 32);\
+	SQUASHFS_SWAP((s)->start_block, d, 88, 24);\
+}
+
+#define SQUASHFS_SWAP_DIR_HEADER(s, d) {\
+	SQUASHFS_MEMSET(s, d, sizeof(squashfs_dir_header));\
+	SQUASHFS_SWAP((s)->count, d, 0, 8);\
+	SQUASHFS_SWAP((s)->start_block, d, 8, 24);\
+}
+
+#define SQUASHFS_SWAP_DIR_ENTRY(s, d) {\
+	SQUASHFS_MEMSET(s, d, sizeof(squashfs_dir_entry));\
+	SQUASHFS_SWAP((s)->offset, d, 0, 13);\
+	SQUASHFS_SWAP((s)->type, d, 13, 3);\
+	SQUASHFS_SWAP((s)->size, d, 16, 8);\
+}
+
+#define SQUASHFS_SWAP_SHORTS(s, d, n) {\
+	int entry;\
+	int bit_position;\
+	SQUASHFS_MEMSET(s, d, n * 2);\
+	for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += 16)\
+		SQUASHFS_SWAP(s[entry], d, bit_position, 16);\
+}
+
+#define SQUASHFS_SWAP_DATA(s, d, n, bits) {\
+	int entry;\
+	int bit_position;\
+	SQUASHFS_MEMSET(s, d, n * bits / 8);\
+	for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += bits)\
+		SQUASHFS_SWAP(s[entry], d, bit_position, bits);\
+}
+
+#ifdef __KERNEL__
+/*
+ * macros used to swap each structure entry, taking into account
+ * bitfields and different bitfield placing conventions on differing architectures
+ */
+#include <asm/byteorder.h>
+#ifdef __BIG_ENDIAN
+	/* convert from little endian to big endian */
+#define SQUASHFS_SWAP(value, p, pos, tbits) _SQUASHFS_SWAP(value, p, pos, tbits, b_pos)
+#else
+	/* convert from big endian to little endian */ 
+#define SQUASHFS_SWAP(value, p, pos, tbits) _SQUASHFS_SWAP(value, p, pos, tbits, 64 - tbits - b_pos)
+#endif
+
+#define _SQUASHFS_SWAP(value, p, pos, tbits, SHIFT) {\
+	int bits;\
+	int b_pos = pos % 8;\
+	unsigned long long val = 0;\
+	unsigned char *s = (unsigned char *)p + (pos / 8);\
+	unsigned char *d = ((unsigned char *) &val) + 7;\
+	for(bits = 0; bits < (tbits + b_pos); bits += 8) \
+		*d-- = *s++;\
+	value = val >> (SHIFT);\
+}
+#define SQUASHFS_MEMSET(s, d, n)	memset(s, 0, n);
+#endif
+#endif
diff -Naur linux-2.6.0-test1-ac2/include/linux/squashfs_fs_sb.h linux-2.6.0-test1-ac2-sfs/include/linux/squashfs_fs_sb.h
--- linux-2.6.0-test1-ac2/include/linux/squashfs_fs_sb.h	1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.0-test1-ac2/include/linux/squashfs_fs_sb.h	2003-07-18 16:12:47.000000000 -0700
@@ -0,0 +1,41 @@
+#ifndef SQUASHFS_FS_SB
+#define SQUASHFS_FS_SB
+/*
+ * Squashfs
+ *
+ * Copyright (c) 2002 Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * squashfs_fs_sb.h
+ */
+
+#include <linux/squashfs_fs.h>
+
+typedef struct {
+	unsigned int	block;
+	int		length;
+	unsigned int	next_index;
+	char		*data;
+	} squashfs_cache;
+
+typedef struct squashfs_sb_info {
+	squashfs_super_block	sBlk;
+	int			devblksize;
+	int			devblksize_log2;
+	int			swap;
+	squashfs_cache		*block_cache;
+	int			next_cache;
+	squashfs_uid		*uid;
+	squashfs_uid		*guid;
+	unsigned int		read_size;
+	char			*read_data;
+	char			*read_page;
+	struct semaphore	read_page_mutex;
+	struct semaphore	block_cache_mutex;
+	wait_queue_head_t	waitq;
+	} squashfs_sb_info;
+#endif
diff -Naur linux-2.6.0-test1-ac2/lib/Kconfig linux-2.6.0-test1-ac2-sfs/lib/Kconfig
--- linux-2.6.0-test1-ac2/lib/Kconfig	2003-05-04 16:53:32.000000000 -0700
+++ linux-2.6.0-test1-ac2/lib/Kconfig	2003-07-16 16:40:44.000000000 -0700
@@ -17,8 +17,8 @@
 #
 config ZLIB_INFLATE
 	tristate
-	default y if CRAMFS=y || PPP_DEFLATE=y || JFFS2_FS=y || ZISOFS_FS=y || BINFMT_ZFLAT=y || CRYPTO_DEFLATE=y
-	default m if CRAMFS=m || PPP_DEFLATE=m || JFFS2_FS=m || ZISOFS_FS=m || BINFMT_ZFLAT=m || CRYPTO_DEFLATE=m
+	default y if CRAMFS=y || SQUASHFS=y || PPP_DEFLATE=y || JFFS2_FS=y || ZISOFS_FS=y || BINFMT_ZFLAT=y || CRYPTO_DEFLATE=y
+	default m if CRAMFS=m || SQUASHFS=m || PPP_DEFLATE=m || JFFS2_FS=m || ZISOFS_FS=m || BINFMT_ZFLAT=m || CRYPTO_DEFLATE=m
 
 config ZLIB_DEFLATE
 	tristate


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Port SquashFS to 2.6
  2003-07-19 22:59 [PATCH] Port SquashFS to 2.6 junkio
@ 2003-07-19 23:35 ` David Dillow
  2003-07-20  5:40   ` junkio
  2003-07-20  1:50 ` Bernd Eckenfels
  1 sibling, 1 reply; 13+ messages in thread
From: David Dillow @ 2003-07-19 23:35 UTC (permalink / raw)
  To: junkio; +Cc: linux-kernel, Phillip Lougher

On Sat, 2003-07-19 at 18:59, junkio@cox.net wrote:

> +static int squashfs_symlink_readpage(struct file *file, struct page *page)
> +{
> +	struct inode *inode = page->mapping->host;
> +	struct squashfs_inode_info *sqi = SQUASH_I(inode);
> +	int index = page->index << PAGE_CACHE_SHIFT, length = 0, bytes;
> +	int block = sqi->start_block;
> +	int offset = sqi->offset;
> +
> +	TRACE("Entered squashfs_symlink_readpage, page index %d, start block %x, offset %x\n",
> +		page->index, sqi->start_block, sqi->offset);
> +
> +	while(length < index) {
> +		char buffer[PAGE_CACHE_SIZE];
 
Hmm, isn't that 4K allocated on the stack? Ouch.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Port SquashFS to 2.6
  2003-07-19 22:59 [PATCH] Port SquashFS to 2.6 junkio
  2003-07-19 23:35 ` David Dillow
@ 2003-07-20  1:50 ` Bernd Eckenfels
  1 sibling, 0 replies; 13+ messages in thread
From: Bernd Eckenfels @ 2003-07-20  1:50 UTC (permalink / raw)
  To: linux-kernel

In article <7vk7ae15ty.fsf@assigned-by-dhcp.cox.net> you wrote:
> +config SQUASHFS
> +       tristate "SquashFs file system support"
> +       help
> +         Saying Y here includes support for SquashFs.
> +
> +         If unsure, say N.
> +

This is not a useful help.

> + * Squashfs - a compressed read only filesystem for Linux

Please at least add this single sentence.

Greetings
Bernd
-- 
eckes privat - http://www.eckes.org/
Project Freefire - http://www.freefire.org/

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Port SquashFS to 2.6
  2003-07-19 23:35 ` David Dillow
@ 2003-07-20  5:40   ` junkio
  2003-07-20  8:22     ` Jörn Engel
  0 siblings, 1 reply; 13+ messages in thread
From: junkio @ 2003-07-20  5:40 UTC (permalink / raw)
  To: David Dillow; +Cc: linux-kernel, Phillip Lougher

>>>>> "DD" == David Dillow <dave@thedillows.org> writes:

DD> On Sat, 2003-07-19 at 18:59, junkio@cox.net wrote:
>> ...
>> +	while(length < index) {
>> +		char buffer[PAGE_CACHE_SIZE];
 
DD> Hmm, isn't that 4K allocated on the stack? Ouch.

Ouch indeed.  I was not looking for these things (I was just
porting not fixing).  Thank you for pointing it out.  Have a
couple of questions:

 - Would it be an acceptable alternative here to use blocking
   kmalloc upon entry with matching kfree before leaving?

 - I would imagine that the acceptable stack usage for functions
   would depend on where they are called and what they call.
   Coulc you suggest a rule-of-thumb number for
   address_space_operations.readpage (say, would 1kB be OK but
   not 3kB?)

 - Would the same rule apply to 2.4 filesystem layer?


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Port SquashFS to 2.6
  2003-07-20  5:40   ` junkio
@ 2003-07-20  8:22     ` Jörn Engel
  2003-07-20 10:16       ` postmaster
  0 siblings, 1 reply; 13+ messages in thread
From: Jörn Engel @ 2003-07-20  8:22 UTC (permalink / raw)
  To: junkio; +Cc: David Dillow, linux-kernel, Phillip Lougher

On Sat, 19 July 2003 22:40:22 -0700, junkio@cox.net wrote:
> >>>>> "DD" == David Dillow <dave@thedillows.org> writes:
> 
> DD> Hmm, isn't that 4K allocated on the stack? Ouch.
> 
> Ouch indeed.  I was not looking for these things (I was just
> porting not fixing).  Thank you for pointing it out.  Have a
> couple of questions:
> 
>  - Would it be an acceptable alternative here to use blocking
>    kmalloc upon entry with matching kfree before leaving?
> 
>  - I would imagine that the acceptable stack usage for functions
>    would depend on where they are called and what they call.
>    Coulc you suggest a rule-of-thumb number for
>    address_space_operations.readpage (say, would 1kB be OK but
>    not 3kB?)

As a rule of thumb, stay below 1k or you will get regular email from
me. :)

Depending on where and what you do, a bit more could be ok, but this
is hard to prove and also depends a bit on the architecture.  s390 has
giant stacks because function call overhead is huge, i386 will likely
halve the kernel stack sometime during 2.7 and there is no point is
hiding more easter eggs now - there is enough hidden already.

Jörn

-- 
"Security vulnerabilities are here to stay."
-- Scott Culp, Manager of the Microsoft Security Response Center, 2001

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Port SquashFS to 2.6
  2003-07-20  8:22     ` Jörn Engel
@ 2003-07-20 10:16       ` postmaster
  2003-07-20 10:38         ` Jörn Engel
  0 siblings, 1 reply; 13+ messages in thread
From: postmaster @ 2003-07-20 10:16 UTC (permalink / raw)
  To: Jörn Engel; +Cc: linux-kernel, junkio

joern@wohnheim.fh-wedel.de wrote:
> On Sat, 19 July 2003 22:40:22 -0700, junkio@cox.net wrote:
> > >>>>> "DD" == David Dillow <dave@thedillows.org> writes:
> > 
> > DD> Hmm, isn't that 4K allocated on the stack? Ouch.
> > 
> > Ouch indeed.  I was not looking for these things (I was just
> > porting not fixing).  Thank you for pointing it out.  Have a
> > couple of questions:
> > 

Thanks for sending the 2.6 patch, due to work pressure, I have had very little
time to do these things recently.  I am still, however, actively developing
squashfs (a 1.3 with some improvements will be released soon), and I'd prefer
to do code fixes myself.
  
> >  - Would it be an acceptable alternative here to use blocking
> >    kmalloc upon entry with matching kfree before leaving?
> > 
> >  - I would imagine that the acceptable stack usage for functions
> >    would depend on where they are called and what they call.
> >    Coulc you suggest a rule-of-thumb number for
> >    address_space_operations.readpage (say, would 1kB be OK but
> >    not 3kB?)
> 
> As a rule of thumb, stay below 1k or you will get regular email from
> me. :)

I tend to allocate (small) buffers on the stack, when their size does not
seem to warrant either: a globally kmalloced buffer and consequent locking,
or a locally kmalloced buffer kfreed on exit from the function, which seems
wasteful. However, if 1K is the perceived wisdom on stack limits, then I will
alter the code.

Phillip



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Port SquashFS to 2.6
  2003-07-20 10:16       ` postmaster
@ 2003-07-20 10:38         ` Jörn Engel
  0 siblings, 0 replies; 13+ messages in thread
From: Jörn Engel @ 2003-07-20 10:38 UTC (permalink / raw)
  To: postmaster; +Cc: linux-kernel, junkio

On Sun, 20 July 2003 11:16:18 +0100, postmaster@lougher.demon.co.uk wrote:
> joern@wohnheim.fh-wedel.de wrote:
> > 
> > As a rule of thumb, stay below 1k or you will get regular email from
> > me. :)
> 
> I tend to allocate (small) buffers on the stack, when their size does not
> seem to warrant either: a globally kmalloced buffer and consequent locking,
> or a locally kmalloced buffer kfreed on exit from the function, which seems
> wasteful. However, if 1K is the perceived wisdom on stack limits, then I will
> alter the code.

At least you should think twice before going above.  Even with wli's
stack reduction work applied, you still have close to 4k for kernel
stack.  But measuring the stack consumption of all the possible call
chains in the kernel is still a hard problem, so you will have a hard
time proving that any one bigger stack allocation is fine.

Jörn

-- 
When people work hard for you for a pat on the back, you've got
to give them that pat.
-- Robert Heinlein

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Port SquashFS to 2.6
  2003-07-22 10:36 John Bradford
@ 2003-07-22 10:47 ` Jörn Engel
  0 siblings, 0 replies; 13+ messages in thread
From: Jörn Engel @ 2003-07-22 10:47 UTC (permalink / raw)
  To: John Bradford; +Cc: junkio, linux-kernel

On Tue, 22 July 2003 11:36:07 +0100, John Bradford wrote:
> 
> > If you look closely at the kernel, there is currently no way of
> > telling whether it contains stack overflows waiting to happen, or not.
> 
> It would be an interesting experiment to deliberately make the kernel
> stack smaller, and see what happens.  If no problems seem apparent
> with a reduced kernel stack, it gives more weight to the argument that
> the default one is OK.

Been there, done that. :)

Use the patch below, enable the stack checking in your .config and
watch the system log get filled.  5k is quite agressive, I agree, but
with 4k stacks in mind, it is just 1k of slack left, plus the
measurement is a bit fuzzy, so you really want some slack.

Maybe there is also a patch flying around that initializes the stack
with some magic number on a fork and logs the number of used bytes on
exit.  That way you get rid of the fuzzyness, but you loose the exact
call trace that led to this number.

Jörn

-- 
Measure. Don't tune for speed until you've measured, and even then
don't unless one part of the code overwhelms the rest.
-- Rob Pike

--- linux-2.5.67/arch/i386/kernel/irq.c~stack_overflow	2003-04-07 19:30:39.000000000 +0200
+++ linux-2.5.67/arch/i386/kernel/irq.c	2003-04-14 20:22:01.000000000 +0200
@@ -342,7 +342,15 @@
 
 		__asm__ __volatile__("andl %%esp,%0" :
 					"=r" (esp) : "0" (8191));
+#if 0
 		if (unlikely(esp < (sizeof(struct thread_info) + 1024))) {
+#else
+		/* We check for 5k for now. The kernel stack still is 8k,
+		 * but should shrink to 4k, so this test makes sense.
+		 * Once the stack is 4k, we go back to the old test.
+		 */
+		if (unlikely(esp < (sizeof(struct thread_info) + 5120))) {
+#endif
 			printk("do_IRQ: stack overflow: %ld\n",
 				esp - sizeof(struct thread_info));
 			dump_stack();

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Port SquashFS to 2.6
@ 2003-07-22 10:36 John Bradford
  2003-07-22 10:47 ` Jörn Engel
  0 siblings, 1 reply; 13+ messages in thread
From: John Bradford @ 2003-07-22 10:36 UTC (permalink / raw)
  To: joern, junkio; +Cc: linux-kernel

> If you look closely at the kernel, there is currently no way of
> telling whether it contains stack overflows waiting to happen, or not.

It would be an interesting experiment to deliberately make the kernel
stack smaller, and see what happens.  If no problems seem apparent
with a reduced kernel stack, it gives more weight to the argument that
the default one is OK.

John.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Port SquashFS to 2.6
  2003-07-22  3:42     ` Valdis.Kletnieks
@ 2003-07-22 10:20       ` Jörn Engel
  0 siblings, 0 replies; 13+ messages in thread
From: Jörn Engel @ 2003-07-22 10:20 UTC (permalink / raw)
  To: Valdis.Kletnieks; +Cc: junkio, linux-kernel

On Mon, 21 July 2003 23:42:37 -0400, Valdis.Kletnieks@vt.edu wrote:
> 
> Not necessarily.  It's quite possible (likely even) that one architecture might
> have N bytes overhead per call,  and is allowed a 4K stack, and some other
> architecture has (N+30%) overhead, so 4K isn't enough - 5K is needed. However,
> other considerations cause a whole-page allocation, so instead of allocating
> 5K, it goes to 8K, with a 3K wastage....

And even worse, for short call chains, 4.1k would be enough, but for
long ones, you need up to 5.2k.  How much is too much?  We don't know
and it depends, so make a pessimistic guess.

Joern

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Port SquashFS to 2.6
  2003-07-22  2:52   ` junkio
  2003-07-22  3:42     ` Valdis.Kletnieks
@ 2003-07-22 10:16     ` Jörn Engel
  1 sibling, 0 replies; 13+ messages in thread
From: Jörn Engel @ 2003-07-22 10:16 UTC (permalink / raw)
  To: junkio; +Cc: linux-kernel

On Mon, 21 July 2003 19:52:39 -0700, junkio@cox.net wrote:
> >>>>> "JE" == JörnEngel  <joern@wohnheim.fh-wedel.de> writes:
>
> JE> Depending on where and what you do,...
> 
> Well, isn't asking about address_space_operations.readpage
> specific enough?

Not for someone as lazy as me.  If you want to know for sure:
- Figure out all possible path leading to that function.
- Figure out all possible path called by that function.
- Sum up all the stack allocated variable size for both.
- Add the architecture-specific function call overhead, multiplied
  by the number of functions on both paths.

Now you have the stack consumption that you were not responsible for,
which depends on the architecture.  Substract that number from the
total stack size, which is also architecture-specific and you know how
much is left.  It is a lot of work and the hard part is finding all
the possible paths.  If you have a good idea how to automate that,
please tell me.  Else, we have to live with rules of thumb.

> JE> ... also depends a bit on the architecture.  s390 has
> JE> giant stacks because function call overhead is huge, ...
> 
> The discussion was about putting variables (or arrays or large
> structs) the kernel programmer defines on the stack, and I do
> not think architecture calling convention has much to do with
> this.
> 
> If an architecture has a big stack usage per call that is
> imposed by the ABI, and larger kernel stack is allocated
> compared to other architectures because of this reason,
> shouldn't there be about the same amount of usable space left
> for the kernel programs within the allocated per-process kernel
> stack space to use?  If that is not the case then the port to
> that particular architecture would not be optimal, wouldn't it?

You end up with all sorts of architecture dependent stuff when
allocating stack.  A long is 4 or 8 bytes, same for pointers, the
amount and size of saved registers differs, the size of the stack
differs, some architectures have a seperate interrupt stack.

If you look closely at the kernel, there is currently no way of
telling whether it contains stack overflows waiting to happen, or not.
We live with lots of hope and the comforting feeling that there were
not many stack overflows in the past.  I wish we had better tools, but
we don't.

Jörn

-- 
The cheapest, fastest and most reliable components of a computer
system are those that aren't there.
-- Gordon Bell, DEC labratories

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Port SquashFS to 2.6
  2003-07-22  2:52   ` junkio
@ 2003-07-22  3:42     ` Valdis.Kletnieks
  2003-07-22 10:20       ` Jörn Engel
  2003-07-22 10:16     ` Jörn Engel
  1 sibling, 1 reply; 13+ messages in thread
From: Valdis.Kletnieks @ 2003-07-22  3:42 UTC (permalink / raw)
  To: junkio; +Cc: =?iso-2022-jp-2?b?ShsuQRtOdnJuRW5nZWw=?=, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 896 bytes --]

On Mon, 21 Jul 2003 19:52:39 PDT, junkio@cox.net said:

> If an architecture has a big stack usage per call that is
> imposed by the ABI, and larger kernel stack is allocated
> compared to other architectures because of this reason,
> shouldn't there be about the same amount of usable space left
> for the kernel programs within the allocated per-process kernel
> stack space to use?  If that is not the case then the port to
> that particular architecture would not be optimal, wouldn't it?

Not necessarily.  It's quite possible (likely even) that one architecture might
have N bytes overhead per call,  and is allowed a 4K stack, and some other
architecture has (N+30%) overhead, so 4K isn't enough - 5K is needed. However,
other considerations cause a whole-page allocation, so instead of allocating
5K, it goes to 8K, with a 3K wastage....

So yes, you can end up with suboptimal results.


[-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Port SquashFS to 2.6
       [not found] ` <fa.hre90bn.e6k5pf@ifi.uio.no>
@ 2003-07-22  2:52   ` junkio
  2003-07-22  3:42     ` Valdis.Kletnieks
  2003-07-22 10:16     ` Jörn Engel
  0 siblings, 2 replies; 13+ messages in thread
From: junkio @ 2003-07-22  2:52 UTC (permalink / raw)
  To: =?iso-2022-jp-2?b?ShsuQRtOdnJuRW5nZWw=?=; +Cc: linux-kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=iso-2022-jp-2, Size: 1243 bytes --]

>>>>> "JE" == J^[.A^[NvrnEngel  <joern@wohnheim.fh-wedel.de> writes:

JE> On Sat, 19 July 2003 22:40:22 -0700, junkio@cox.net wrote:
>> - I would imagine that the acceptable stack usage for functions
>> would depend on where they are called and what they call.
>> Coulc you suggest a rule-of-thumb number for
>> address_space_operations.readpage (say, would 1kB be OK but
>> not 3kB?)

JE> Depending on where and what you do,...

Well, isn't asking about address_space_operations.readpage
specific enough?

JE> ... also depends a bit on the architecture.  s390 has
JE> giant stacks because function call overhead is huge, ...

The discussion was about putting variables (or arrays or large
structs) the kernel programmer defines on the stack, and I do
not think architecture calling convention has much to do with
this.

If an architecture has a big stack usage per call that is
imposed by the ABI, and larger kernel stack is allocated
compared to other architectures because of this reason,
shouldn't there be about the same amount of usable space left
for the kernel programs within the allocated per-process kernel
stack space to use?  If that is not the case then the port to
that particular architecture would not be optimal, wouldn't it?


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2003-07-22 10:32 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-07-19 22:59 [PATCH] Port SquashFS to 2.6 junkio
2003-07-19 23:35 ` David Dillow
2003-07-20  5:40   ` junkio
2003-07-20  8:22     ` Jörn Engel
2003-07-20 10:16       ` postmaster
2003-07-20 10:38         ` Jörn Engel
2003-07-20  1:50 ` Bernd Eckenfels
     [not found] <fa.k0do8p6.ch6pps@ifi.uio.no>
     [not found] ` <fa.hre90bn.e6k5pf@ifi.uio.no>
2003-07-22  2:52   ` junkio
2003-07-22  3:42     ` Valdis.Kletnieks
2003-07-22 10:20       ` Jörn Engel
2003-07-22 10:16     ` Jörn Engel
2003-07-22 10:36 John Bradford
2003-07-22 10:47 ` Jörn Engel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).