All of lore.kernel.org
 help / color / mirror / Atom feed
* [CHECKER] 37 stack variables >= 1K in 2.4.17
@ 2002-06-10  3:56 Dawson Engler
  2002-06-12  8:43 ` Pavel Machek
  2002-06-12 21:51 ` Benjamin LaHaise
  0 siblings, 2 replies; 44+ messages in thread
From: Dawson Engler @ 2002-06-10  3:56 UTC (permalink / raw)
  To: linux-kernel; +Cc: mc

Here are 37 errors where variables >= 1024 bytes are allocated on a function's
stack.

Dawson

# BUGs	|	File Name
4	|	/drivers/cdrom.c
4	|	/message/i2o_proc.c
3	|	/net/airo.c
3	|	/../inflate.c
2	|	/fs/zlib.c
2	|	/drivers/zlib.c
2	|	/drivers/cpqfcTScontrol.c
2	|	/fs/compr_rtime.c
1	|	/char/sidewinder.c
1	|	/drivers/ide-cs.c
1	|	/drivers/iphase.c
1	|	/drivers/ixj_pcmcia.c
1	|	/i386/nmi.c
1	|	/drivers/parport_cs.c
1	|	/net/br_ioctl.c
1	|	/drivers/qlogicfc.c
1	|	/drivers/cpqarray.c
1	|	/drivers/optcd.c
1	|	/net/wanmain.c
1	|	/i386/setup.c
1	|	/fs/nfsroot.c
1	|	/net/cycx_x25.c
1	|	/fs/ioctl.c


############################################################
# 2.4.17 specific errors

#
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/scsi/qlogicfc.c:840:isp2x00_make_portdb: ERROR:VAR:840:840: suspicious var 'temp' = 3072 bytes:840 [nbytes=3072]
static int isp2x00_make_portdb(struct Scsi_Host *host)
{

	short param[8];
	int i, j;

Error --->
	struct id_name_map temp[QLOGICFC_MAX_ID + 1];
	struct isp2x00_hostdata *hostdata;

	isp2x00_disable_irqs(host);
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/message/i2o/i2o_proc.c:838:i2o_proc_read_ddm_table: ERROR:VAR:838:838: suspicious var 'result' = 2828 bytes:838 [nbytes=2828]
		u8  block_status;
		u8  error_info_size;
		u16 row_count;
		u16 more_flag;
		i2o_exec_execute_ddm_table ddm_table[MAX_I2O_MODULES];

Error --->
	} result;

	i2o_exec_execute_ddm_table ddm_table;

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/cdrom/optcd.c:1625:cdromread: ERROR:VAR:1625:1625: suspicious var 'buf' = 2646 bytes:1625 [nbytes=2646]

static int cdromread(unsigned long arg, int blocksize, int cmd)
{
	int status;
	struct cdrom_msf msf;

Error --->
	char buf[CD_FRAMESIZE_RAWER];

	status = verify_area(VERIFY_WRITE, (void *) arg, blocksize);
	if (status)
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/message/i2o/i2o_proc.c:2266:i2o_proc_read_lan_mcast_addr: ERROR:VAR:2266:2266: suspicious var 'result' = 2060 bytes:2266 [nbytes=2060]
		u8  block_status;
		u8  error_info_size;
		u16 row_count;
		u16 more_flag;
		u8  mc_addr[256][8];

Error --->
	} result;	

	spin_lock(&i2o_proc_lock);	
	len = 0;
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/message/i2o/i2o_proc.c:2497:i2o_proc_read_lan_alt_addr: ERROR:VAR:2497:2497: suspicious var 'result' = 2060 bytes:2497 [nbytes=2060]
		u8  block_status;
		u8  error_info_size;
		u16 row_count;
		u16 more_flag;
		u8  alt_addr[256][8];

Error --->
	} result;	

	spin_lock(&i2o_proc_lock);	
	len = 0;
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/message/i2o/i2o_proc.c:1049:i2o_proc_read_groups: ERROR:VAR:1049:1049: suspicious var 'result' = 2060 bytes:1049 [nbytes=2060]
		u8  block_status;
		u8  error_info_size;
		u16 row_count;
		u16 more_flag;
		i2o_group_info group[256];

Error --->
	} result;

	spin_lock(&i2o_proc_lock);

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/scsi/cpqfcTScontrol.c:721:CpqTsProcessIMQEntry: ERROR:VAR:721:721: suspicious var 'ulFibreFrame' = 2048 bytes:721 [nbytes=2048]
  int iStatus;
  USHORT i, RPCset, DPCset;
  ULONG x_ID;
  ULONG ulBuff, dwStatus;
  TachFCHDR_GCMND* fchs;

Error --->
  ULONG ulFibreFrame[2048/4];  // max number of DWORDS in incoming Fibre Frame
  UCHAR ucInboundMessageType;  // Inbound CM, dword 3 "type" field

  ENTER("ProcessIMQEntry");
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/atm/iphase.c:2772:ia_ioctl: ERROR:VAR:2772:2772: suspicious var 'regs_local' = 2048 bytes:2772 [nbytes=2048]
             ia_cmds.status = 0;
             ia_cmds.len = 0x80;
             break;
          case MEMDUMP_FFL:
          {  

Error --->
             ia_regs_t       regs_local;
             ffredn_t        *ffL = &regs_local.ffredn;
             rfredn_t        *rfL = &regs_local.rfredn;
                     
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/net/wireless/airo.c:4316:readrids: ERROR:VAR:4316:4316: suspicious var 'iobuf' = 2048 bytes:4316 [nbytes=2048]
 * as needed.  This represents the READ side of control I/O to
 * the card
 */
static int readrids(struct net_device *dev, aironet_ioctl *comp) {
	unsigned short ridcode;

Error --->
	unsigned char iobuf[2048];

	switch(comp->command)
	{
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/net/wireless/airo.c:4364:writerids: ERROR:VAR:4364:4364: suspicious var 'iobuf' = 2048 bytes:4364 [nbytes=2048]

static int writerids(struct net_device *dev, aironet_ioctl *comp) {
	int  ridcode;
	Resp      rsp;
	static int (* writer)(struct airo_info *, u16 rid, const void *, int);

Error --->
	unsigned char iobuf[2048];

	/* Only super-user can write RIDs */
	if (!capable(CAP_NET_ADMIN))
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/scsi/cpqfcTScontrol.c:610:PeekIMQEntry: ERROR:VAR:610:610: suspicious var 'ulFibreFrame' = 2048 bytes:610 [nbytes=2048]
      // If we find it, check the incoming frame payload (1st word)
      // for LILP frame
        if( (fcChip->IMQ->QEntry[CI].type & 0x1FF) == 0x104 )
        { 
          TachFCHDR_GCMND* fchs;

Error --->
          ULONG ulFibreFrame[2048/4];  // max DWORDS in incoming FC Frame
	  USHORT SFQpi = (USHORT)(fcChip->IMQ->QEntry[CI].word[0] & 0x0fffL);

	  CpqTsGetSFQEntry( fcChip,
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/arch/i386/kernel/setup.c:519:sanitize_e820_map: ERROR:VAR:519:519: function stack consumes 1840 bytes:519 [nbytes=1840]
		   ______________________4_
	*/

	/* if there's only one memory region, don't bother */
	if (*pnr_map < 2)

Error --->
		return -1;

	old_nr = *pnr_map;

---------------------------------------------------------
[BUG] *think* so
/u2/engler/mc/oses/linux/2.4.17/fs/umsdos/ioctl.c:96:UMSDOS_ioctl_dir: ERROR:VAR:96:96: function stack consumes 1772 bytes:96 [nbytes=1772]
	    && cmd != UMSDOS_UNLINK_EMD
	    && cmd != UMSDOS_UNLINK_DOS
	    && cmd != UMSDOS_RMDIR_DOS
	    && cmd != UMSDOS_STAT_DOS
	    && cmd != UMSDOS_DOS_SETUP)

Error --->
		return fat_dir_ioctl (dir, filp, cmd, data_ptr);

	/* #Specification: ioctl / access
	 * Only root (effective id) is allowed to do IOCTL on directory
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/net/wireless/airo.c:3319:airo_ioctl: ERROR:VAR:3319:3319: function stack consumes 1676 bytes:3319 [nbytes=1676]
#endif /* CISCO_EXT */
	{
		/* If the command read some stuff, we better get it out of
		 * the card first... */
		if(IW_IS_GET(cmd))

Error --->
			readStatusRid(local, &status_rid);
		if(IW_IS_GET(cmd) || (cmd == SIOCSIWRATE) || (cmd == SIOCSIWENCODE))
			readCapabilityRid(local, &cap_rid);
		/* Get config in all cases, because SET will just modify it */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/block/cpqarray.c:1188:ida_ioctl: ERROR:VAR:1188:1188: suspicious var 'my_io' = 1296 bytes:1188 [nbytes=1296]
	int dsk  = MINOR(inode->i_rdev) >> NWD_SHIFT;
	int error;
	int diskinfo[4];
	struct hd_geometry *geo = (struct hd_geometry *)arg;
	ida_ioctl_t *io = (ida_ioctl_t*)arg;

Error --->
	ida_ioctl_t my_io;

	switch(cmd) {
	case HDIO_GETGEO:
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/net/wanrouter/wanmain.c:754:device_new_if: ERROR:VAR:754:754: suspicious var 'conf' = 1272 bytes:754 [nbytes=1272]
 *	o register network interface
 */

static int device_new_if (wan_device_t *wandev, wanif_conf_t *u_conf)
{

Error --->
	wanif_conf_t conf;
	netdevice_t *dev=NULL;
#ifdef CONFIG_WANPIPE_MULTPPP
	struct ppp_device *pppdev=NULL;
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/block/../../lib/inflate.c:750:inflate_dynamic: ERROR:VAR:750:750: suspicious var 'll' = 1264 bytes:750 [nbytes=1264]
  unsigned nl;          /* number of literal/length codes */
  unsigned nd;          /* number of distance codes */
#ifdef PKZIP_BUG_WORKAROUND
  unsigned ll[288+32];  /* literal/length and distance code lengths */
#else

Error --->
  unsigned ll[286+30];  /* literal/length and distance code lengths */
#endif
  register ulg b;       /* bit buffer */
  register unsigned k;  /* number of bits in bit buffer */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/net/zlib.c:4216:huft_build: ERROR:VAR:4216:4216: suspicious var 'v' = 1152 bytes:4216 [nbytes=1152]
  int l;                        /* bits per table (returned in m) */
  register uIntf *p;            /* pointer into c[], b[], or v[] */
  inflate_huft *q;              /* points to current table */
  struct inflate_huft_s r;      /* table entry for structure assignment */
  inflate_huft *u[BMAX];        /* table stack */

Error --->
  uInt v[N_MAX];                /* values in order of bit length */
  register int w;               /* bits before this table == (l * h) */
  uInt x[BMAX+1];               /* bit offsets, then code stack */
  uIntf *xp;                    /* pointer into x */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/fs/jffs2/zlib.c:4501:inflate_trees_fixed: ERROR:VAR:4501:4501: suspicious var 'c' = 1152 bytes:4501 [nbytes=1152]
{
  /* build fixed tables if not already (multiple overlapped executions ok) */
  if (!fixed_built)
  {
    int k;              /* temporary variable */

Error --->
    unsigned c[288];    /* length list for huft_build */
    z_stream z;         /* for falloc function */
    int f = FIXEDH;     /* number of hufts left in fixed_mem */

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/fs/jffs2/zlib.c:4216:huft_build: ERROR:VAR:4216:4216: suspicious var 'v' = 1152 bytes:4216 [nbytes=1152]
  int l;                        /* bits per table (returned in m) */
  register uIntf *p;            /* pointer into c[], b[], or v[] */
  inflate_huft *q;              /* points to current table */
  struct inflate_huft_s r;      /* table entry for structure assignment */
  inflate_huft *u[BMAX];        /* table stack */

Error --->
  uInt v[N_MAX];                /* values in order of bit length */
  register int w;               /* bits before this table == (l * h) */
  uInt x[BMAX+1];               /* bit offsets, then code stack */
  uIntf *xp;                    /* pointer into x */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/block/../../lib/inflate.c:688:inflate_fixed: ERROR:VAR:688:688: suspicious var 'l' = 1152 bytes:688 [nbytes=1152]
  int i;                /* temporary variable */
  struct huft *tl;      /* literal/length code table */
  struct huft *td;      /* distance code table */
  int bl;               /* lookup bits for tl */
  int bd;               /* lookup bits for td */

Error --->
  unsigned l[288];      /* length list for huft_build */

DEBG("<fix");

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/block/../../lib/inflate.c:301:huft_build: ERROR:VAR:301:301: suspicious var 'v' = 1152 bytes:301 [nbytes=1152]
  int l;                        /* bits per table (returned in m) */
  register unsigned *p;         /* pointer into c[], b[], or v[] */
  register struct huft *q;      /* points to current table */
  struct huft r;                /* table entry for structure assignment */
  struct huft *u[BMAX];         /* table stack */

Error --->
  unsigned v[N_MAX];            /* values in order of bit length */
  register int w;               /* bits before this table == (l * h) */
  unsigned x[BMAX+1];           /* bit offsets, then code stack */
  unsigned *xp;                 /* pointer into x */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/net/zlib.c:4501:inflate_trees_fixed: ERROR:VAR:4501:4501: suspicious var 'c' = 1152 bytes:4501 [nbytes=1152]
{
  /* build fixed tables if not already (multiple overlapped executions ok) */
  if (!fixed_built)
  {
    int k;              /* temporary variable */

Error --->
    unsigned c[288];    /* length list for huft_build */
    z_stream z;         /* for falloc function */
    int f = FIXEDH;     /* number of hufts left in fixed_mem */

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/ide/ide-cs.c:247:ide_config: ERROR:VAR:367:367: function stack consumes 1148 bytes:367 [nbytes=1148]
	   link->conf.Vpp1/10, link->conf.Vpp1%10);

    link->state &= ~DEV_CONFIG_PENDING;
    return;
    

Error --->
cs_failed:
    cs_error(link->handle, last_fn, last_ret);
failed:
    ide_release((u_long)link);
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/parport/parport_cs.c:252:parport_config: ERROR:VAR:327:327: function stack consumes 1136 bytes:327 [nbytes=1136]
    printk("]\n");
    
    link->state &= ~DEV_CONFIG_PENDING;
    return;
    

Error --->
cs_failed:
    cs_error(link->handle, last_fn, last_ret);
failed:
    parport_cs_release((u_long)link);
---------------------------------------------------------
[BUG]  this function just keeps showing up...
/u2/engler/mc/oses/linux/2.4.17/drivers/telephony/ixj_pcmcia.c:221:ixj_config: ERROR:VAR:266:266: function stack consumes 1132 bytes:266 [nbytes=1132]
	info->node.major = PHONE_MAJOR;
	link->dev = &info->node;
	ixj_get_serial(link, j);
	link->state &= ~DEV_CONFIG_PENDING;
	return;

Error --->
      cs_failed:
	cs_error(link->handle, last_fn, last_ret);
	ixj_cs_release((u_long) link);
}
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/char/joystick/sidewinder.c:572:sw_connect: ERROR:VAR:572:572: function stack consumes 1093 bytes:572 [nbytes=1093]
	unsigned char m = 1;
	char comment[40];

	comment[0] = 0;


Error --->
	if (!(sw = kmalloc(sizeof(struct sw), GFP_KERNEL))) return;
	memset(sw, 0, sizeof(struct sw));

	gameport->private = sw;
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/cdrom/cdrom.c:738:cdrom_number_of_slots: ERROR:VAR:738:738: suspicious var 'info' = 1032 bytes:738 [nbytes=1032]
 */
int cdrom_number_of_slots(struct cdrom_device_info *cdi) 
{
	int status;
	int nslots = 1;

Error --->
	struct cdrom_changer_info info;

	cdinfo(CD_CHANGER, "entering cdrom_number_of_slots()\n"); 
	/* cdrom_read_mech_status requires a valid value for capacity: */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/cdrom/cdrom.c:780:cdrom_select_disc: ERROR:VAR:780:780: suspicious var 'info' = 1032 bytes:780 [nbytes=1032]
	return cdi->ops->generic_packet(cdi, &cgc);
}

int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
{

Error --->
	struct cdrom_changer_info info;
	int curslot;
	int ret;

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/cdrom/cdrom.c:1526:cdrom_ioctl: ERROR:VAR:1526:1526: suspicious var 'info' = 1032 bytes:1526 [nbytes=1032]
			cdi->options |= CDO_AUTO_CLOSE | CDO_AUTO_EJECT;
		return 0;
		}

	case CDROM_MEDIA_CHANGED: {

Error --->
		struct cdrom_changer_info info;

		cdinfo(CD_DO_IOCTL, "entering CDROM_MEDIA_CHANGED\n"); 
		if (!CDROM_CAN(CDC_MEDIA_CHANGED))
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/cdrom/cdrom.c:714:cdrom_slot_status: ERROR:VAR:714:714: suspicious var 'info' = 1032 bytes:714 [nbytes=1032]
	return cdo->generic_packet(cdi, &cgc);
}

static int cdrom_slot_status(struct cdrom_device_info *cdi, int slot)
{

Error --->
	struct cdrom_changer_info info;
	int ret;

	cdinfo(CD_CHANGER, "entering cdrom_slot_status()\n"); 
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/fs/jffs2/compr_rtime.c:97:rtime_decompress: ERROR:VAR:97:97: suspicious var 'positions' = 1024 bytes:97 [nbytes=1024]


void rtime_decompress(unsigned char *data_in, unsigned char *cpage_out,
		      __u32 srclen, __u32 destlen)
{

Error --->
	int positions[256];
	int outpos = 0;
	int pos=0;
	
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/fs/nfs/nfsroot.c:238:root_nfs_name: ERROR:VAR:238:238: suspicious var 'buf' = 1024 bytes:238 [nbytes=1024]
/*
 *  Prepare the NFS data structure and parse all options.
 */
static int __init root_nfs_name(char *name)
{

Error --->
	char buf[NFS_MAXPATHLEN];
	char *cp;

	/* Set some default values */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/net/wan/cycx_x25.c:983:hex_dump: ERROR:VAR:983:983: suspicious var 'hex' = 1024 bytes:983 [nbytes=1024]
			 card->devname, cmd->command);
}
#ifdef CYCLOMX_X25_DEBUG
static void hex_dump(char *msg, unsigned char *p, int len)
{

Error --->
	unsigned char hex[1024],
	    	* phex = hex;

	if (len >= (sizeof(hex) / 2))
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/fs/jffs2/compr_rtime.c:57:rtime_compress: ERROR:VAR:57:57: suspicious var 'positions' = 1024 bytes:57 [nbytes=1024]

/* _compress returns the compressed size, -1 if bigger */
int rtime_compress(unsigned char *data_in, unsigned char *cpage_out, 
		   __u32 *sourcelen, __u32 *dstlen)
{

Error --->
	int positions[256];
	int outpos = 0;
	int pos=0;

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/arch/i386/kernel/nmi.c:48:check_nmi_watchdog: ERROR:VAR:48:48: suspicious var 'tmp' = 1024 bytes:48 [nbytes=1024]
#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
#define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED

int __init check_nmi_watchdog (void)
{

Error --->
	irq_cpustat_t tmp[NR_CPUS];
	int j, cpu;

	printk(KERN_INFO "testing NMI watchdog ... ");
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/net/bridge/br_ioctl.c:86:br_ioctl_device: ERROR:VAR:86:86: suspicious var 'indices' = 1024 bytes:86 [nbytes=1024]
	}

	case BRCTL_GET_PORT_LIST:
	{
		int i;

Error --->
		int indices[256];

		for (i=0;i<256;i++)
			indices[i] = 0;


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-10  3:56 [CHECKER] 37 stack variables >= 1K in 2.4.17 Dawson Engler
@ 2002-06-12  8:43 ` Pavel Machek
  2002-06-12 19:11   ` Nikita Danilov
  2002-06-12 21:51 ` Benjamin LaHaise
  1 sibling, 1 reply; 44+ messages in thread
From: Pavel Machek @ 2002-06-12  8:43 UTC (permalink / raw)
  To: Dawson Engler; +Cc: linux-kernel, mc

Hi!

> # BUGs	|	File Name
> 4	|	/drivers/cdrom.c
> 4	|	/message/i2o_proc.c
> 3	|	/net/airo.c
> 3	|	/../inflate.c
> 2	|	/fs/zlib.c
> 2	|	/drivers/zlib.c
> 2	|	/drivers/cpqfcTScontrol.c
		~~~~~~~~~~~~~~~~~~~~~~~~~

Actually, 3 bugs, the name is so ugly that it is a bug, too :-).
									Pavel
-- 
(about SSSCA) "I don't say this lightly.  However, I really think that the U.S.
no longer is classifiable as a democracy, but rather as a plutocracy." --hpa

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-12  8:43 ` Pavel Machek
@ 2002-06-12 19:11   ` Nikita Danilov
  0 siblings, 0 replies; 44+ messages in thread
From: Nikita Danilov @ 2002-06-12 19:11 UTC (permalink / raw)
  To: Pavel Machek; +Cc: Dawson Engler, linux-kernel, mc

Pavel Machek writes:
 > Hi!
 > 
 > > # BUGs	|	File Name
 > > 4	|	/drivers/cdrom.c
 > > 4	|	/message/i2o_proc.c
 > > 3	|	/net/airo.c
 > > 3	|	/../inflate.c
 > > 2	|	/fs/zlib.c
 > > 2	|	/drivers/zlib.c
 > > 2	|	/drivers/cpqfcTScontrol.c
 > 		~~~~~~~~~~~~~~~~~~~~~~~~~

By the way, gcc has -Wlarger-than-NNNN option to do such checks.

 > 
 > Actually, 3 bugs, the name is so ugly that it is a bug, too :-).
 > 									Pavel

Nikita.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-10  3:56 [CHECKER] 37 stack variables >= 1K in 2.4.17 Dawson Engler
  2002-06-12  8:43 ` Pavel Machek
@ 2002-06-12 21:51 ` Benjamin LaHaise
  2002-06-12 22:26   ` Alexander Viro
  2002-06-13  6:36   ` Dawson Engler
  1 sibling, 2 replies; 44+ messages in thread
From: Benjamin LaHaise @ 2002-06-12 21:51 UTC (permalink / raw)
  To: Dawson Engler; +Cc: linux-kernel, mc

On Sun, Jun 09, 2002 at 08:56:30PM -0700, Dawson Engler wrote:
> Here are 37 errors where variables >= 1024 bytes are allocated on a function's
> stack.

Is it possible to get checker to determine the stack depth of a worst 
case call chain (excluding interrupts)?  I've found that deep call chains 
are far more likely to cause stack overflows than short and bounded paths.

		-ben

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-12 21:51 ` Benjamin LaHaise
@ 2002-06-12 22:26   ` Alexander Viro
  2002-06-12 22:38     ` Benjamin LaHaise
  2002-06-13  6:38     ` Dawson Engler
  2002-06-13  6:36   ` Dawson Engler
  1 sibling, 2 replies; 44+ messages in thread
From: Alexander Viro @ 2002-06-12 22:26 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: Dawson Engler, linux-kernel, mc



On Wed, 12 Jun 2002, Benjamin LaHaise wrote:

> On Sun, Jun 09, 2002 at 08:56:30PM -0700, Dawson Engler wrote:
> > Here are 37 errors where variables >= 1024 bytes are allocated on a function's
> > stack.
> 
> Is it possible to get checker to determine the stack depth of a worst 
> case call chain (excluding interrupts)?  I've found that deep call chains 
> are far more likely to cause stack overflows than short and bounded paths.

Not realistic - we have a recursion through the ->follow_link(), and
a lot of stuff can be called from ->follow_link().  We _do_ have a
limit on depth of recursion here, but it won't be fun to deal with.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-12 22:26   ` Alexander Viro
@ 2002-06-12 22:38     ` Benjamin LaHaise
  2002-06-12 22:44       ` Robert Love
  2002-06-13  0:20       ` [CHECKER] 37 stack variables >= 1K in 2.4.17 Alexander Viro
  2002-06-13  6:38     ` Dawson Engler
  1 sibling, 2 replies; 44+ messages in thread
From: Benjamin LaHaise @ 2002-06-12 22:38 UTC (permalink / raw)
  To: Alexander Viro; +Cc: Dawson Engler, linux-kernel, mc

On Wed, Jun 12, 2002 at 06:26:55PM -0400, Alexander Viro wrote:
> Not realistic - we have a recursion through the ->follow_link(), and
> a lot of stuff can be called from ->follow_link().  We _do_ have a
> limit on depth of recursion here, but it won't be fun to deal with.

Perfection isn't what I'm looking for, rather just an approximation.  
Any tool would have to give up on non-trivial recursion, or have 
additional rules imposed on the system.  Checker seems to be growing 
functionality in this area, so it seems like a useful feature request.

		-ben
-- 
"You will be reincarnated as a toad; and you will be much happier."

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-12 22:38     ` Benjamin LaHaise
@ 2002-06-12 22:44       ` Robert Love
  2002-06-12 22:55         ` procfs documentation Tom Bradley
  2002-06-13  0:20       ` [CHECKER] 37 stack variables >= 1K in 2.4.17 Alexander Viro
  1 sibling, 1 reply; 44+ messages in thread
From: Robert Love @ 2002-06-12 22:44 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: Alexander Viro, Dawson Engler, linux-kernel, mc

On Wed, 2002-06-12 at 15:38, Benjamin LaHaise wrote:

> Perfection isn't what I'm looking for, rather just an approximation.  
> Any tool would have to give up on non-trivial recursion, or have 
> additional rules imposed on the system.  Checker seems to be growing 
> functionality in this area, so it seems like a useful feature request.

I do not want to give up on this idea, either... if the implementation
needs to be "hackish" or even explicitly ignore certain functions, so be
it.  There is a _lot_ that can be done with detailed call chain analysis
-- the point you gave is one of many.

Checker already has some basic functionality here I suspect based on
what it is capable of reporting... imagine what more could be reported?

	Robert Love


^ permalink raw reply	[flat|nested] 44+ messages in thread

* procfs documentation
  2002-06-12 22:44       ` Robert Love
@ 2002-06-12 22:55         ` Tom Bradley
  2002-06-13 11:17           ` John Levon
  0 siblings, 1 reply; 44+ messages in thread
From: Tom Bradley @ 2002-06-12 22:55 UTC (permalink / raw)
  To: linux-kernel

Where can I find documentation that explains all the entries in procfs?

man 5 procfs is out of date and doesn't match.
Documentation/filesystems/proc.txt is incomplete.

I am writing a user-land GUI based program that allows user to easily read
the procfs but I can't find info on all the entries.

Tom



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-12 22:38     ` Benjamin LaHaise
  2002-06-12 22:44       ` Robert Love
@ 2002-06-13  0:20       ` Alexander Viro
  2002-06-13  8:30         ` Helge Hafting
  1 sibling, 1 reply; 44+ messages in thread
From: Alexander Viro @ 2002-06-13  0:20 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: Dawson Engler, linux-kernel, mc



On Wed, 12 Jun 2002, Benjamin LaHaise wrote:

> On Wed, Jun 12, 2002 at 06:26:55PM -0400, Alexander Viro wrote:
> > Not realistic - we have a recursion through the ->follow_link(), and
> > a lot of stuff can be called from ->follow_link().  We _do_ have a
> > limit on depth of recursion here, but it won't be fun to deal with.
> 
> Perfection isn't what I'm looking for, rather just an approximation.  
> Any tool would have to give up on non-trivial recursion, or have 

... in which case it will be useless - anything callable from path_walk()
will be out of its scope and that's a fairly large part of VFS, filesystems,
VM and upper halves of block devices.

> additional rules imposed on the system.  Checker seems to be growing 
> functionality in this area, so it seems like a useful feature request.

Just be careful with that loop - (mutual) recursion through ->follow_link()
must be special-cased if you want anything interesting to come out of that.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-12 21:51 ` Benjamin LaHaise
  2002-06-12 22:26   ` Alexander Viro
@ 2002-06-13  6:36   ` Dawson Engler
  1 sibling, 0 replies; 44+ messages in thread
From: Dawson Engler @ 2002-06-13  6:36 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: linux-kernel, mc

> 
> On Sun, Jun 09, 2002 at 08:56:30PM -0700, Dawson Engler wrote:
> > Here are 37 errors where variables >= 1024 bytes are allocated on a function's
> > stack.
> 
> Is it possible to get checker to determine the stack depth of a worst 
> case call chain (excluding interrupts)?  I've found that deep call chains 
> are far more likely to cause stack overflows than short and bounded paths.

Yeah, it's not that hard.  The main problem is determining if recursive
loops are feasible.  I'd released bugs from it before, but no one fixed any
so hadn't rerun it since.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-12 22:26   ` Alexander Viro
  2002-06-12 22:38     ` Benjamin LaHaise
@ 2002-06-13  6:38     ` Dawson Engler
  2002-06-13  6:59       ` Alexander Viro
  1 sibling, 1 reply; 44+ messages in thread
From: Dawson Engler @ 2002-06-13  6:38 UTC (permalink / raw)
  To: Alexander Viro; +Cc: Benjamin LaHaise, linux-kernel, mc

> On Wed, 12 Jun 2002, Benjamin LaHaise wrote:
> 
> > On Sun, Jun 09, 2002 at 08:56:30PM -0700, Dawson Engler wrote:
> > > Here are 37 errors where variables >= 1024 bytes are allocated on a function's
> > > stack.
> > 
> > Is it possible to get checker to determine the stack depth of a worst 
> > case call chain (excluding interrupts)?  I've found that deep call chains 
> > are far more likely to cause stack overflows than short and bounded paths.
> 
> Not realistic - we have a recursion through the ->follow_link(), and
> a lot of stuff can be called from ->follow_link().  We _do_ have a
> limit on depth of recursion here, but it won't be fun to deal with.

You mean following function pointers is not realistic?  Actually the
function pointers in linxu are pretty easy to deal with since, by
and large, they are set by static structure initialization and not
really fussed with afterwards.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13  6:38     ` Dawson Engler
@ 2002-06-13  6:59       ` Alexander Viro
  2002-06-13 17:41         ` Daniel Phillips
  2002-06-13 17:56         ` Andi Kleen
  0 siblings, 2 replies; 44+ messages in thread
From: Alexander Viro @ 2002-06-13  6:59 UTC (permalink / raw)
  To: Dawson Engler; +Cc: Benjamin LaHaise, linux-kernel, mc



On Wed, 12 Jun 2002, Dawson Engler wrote:

> > Not realistic - we have a recursion through the ->follow_link(), and
> > a lot of stuff can be called from ->follow_link().  We _do_ have a
> > limit on depth of recursion here, but it won't be fun to deal with.
> 
> You mean following function pointers is not realistic?  Actually the
> function pointers in linxu are pretty easy to deal with since, by
> and large, they are set by static structure initialization and not
> really fussed with afterwards.

I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
->vfs_follow_link->link_path_walk) you will get infinite maximal depth
for everything that can be called by any of these functions.  And that's
a _lot_ of stuff.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13  0:20       ` [CHECKER] 37 stack variables >= 1K in 2.4.17 Alexander Viro
@ 2002-06-13  8:30         ` Helge Hafting
  2002-06-13 13:24           ` Roger Larsson
  0 siblings, 1 reply; 44+ messages in thread
From: Helge Hafting @ 2002-06-13  8:30 UTC (permalink / raw)
  To: linux-kernel

Alexander Viro wrote:
> 
> On Wed, 12 Jun 2002, Benjamin LaHaise wrote:
> 
> > On Wed, Jun 12, 2002 at 06:26:55PM -0400, Alexander Viro wrote:
> > > Not realistic - we have a recursion through the ->follow_link(), and
> > > a lot of stuff can be called from ->follow_link().  We _do_ have a
> > > limit on depth of recursion here, but it won't be fun to deal with.
> >
> > Perfection isn't what I'm looking for, rather just an approximation.
> > Any tool would have to give up on non-trivial recursion, or have
> 
> ... in which case it will be useless - anything callable from path_walk()
> will be out of its scope and that's a fairly large part of VFS, filesystems,
> VM and upper halves of block devices.

The automated checker may use hard-coded limits for recursions with
limited depth.  If follow_link stops after n iterations, tell
the checker about it and it will use that in its computations.

Helge Hafting

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: procfs documentation
  2002-06-12 22:55         ` procfs documentation Tom Bradley
@ 2002-06-13 11:17           ` John Levon
  0 siblings, 0 replies; 44+ messages in thread
From: John Levon @ 2002-06-13 11:17 UTC (permalink / raw)
  To: linux-kernel

On Wed, Jun 12, 2002 at 04:55:49PM -0600, Tom Bradley wrote:

> Where can I find documentation that explains all the entries in procfs?
> 
> man 5 procfs is out of date and doesn't match.

Upgrade your man-pages.

> I am writing a user-land GUI based program that allows user to easily read
> the procfs but I can't find info on all the entries.

read the source

(and don't reply to irrelevant threads)

john

-- 
"All is change; all yields its place and goes"
	- Euripides

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13  8:30         ` Helge Hafting
@ 2002-06-13 13:24           ` Roger Larsson
  2002-06-14 10:06             ` Helge Hafting
  0 siblings, 1 reply; 44+ messages in thread
From: Roger Larsson @ 2002-06-13 13:24 UTC (permalink / raw)
  To: linux-kernel

On Thursday 13 June 2002 10.30, Helge Hafting wrote:
> Alexander Viro wrote:
> > 
> > On Wed, 12 Jun 2002, Benjamin LaHaise wrote:
> > 
> > > On Wed, Jun 12, 2002 at 06:26:55PM -0400, Alexander Viro wrote:
> > > > Not realistic - we have a recursion through the ->follow_link(), and
> > > > a lot of stuff can be called from ->follow_link().  We _do_ have a
> > > > limit on depth of recursion here, but it won't be fun to deal with.
> > >
> > > Perfection isn't what I'm looking for, rather just an approximation.
> > > Any tool would have to give up on non-trivial recursion, or have
> > 
> > ... in which case it will be useless - anything callable from path_walk()
> > will be out of its scope and that's a fairly large part of VFS, 
filesystems,
> > VM and upper halves of block devices.
> 
> The automated checker may use hard-coded limits for recursions with
> limited depth.  If follow_link stops after n iterations, tell
> the checker about it and it will use that in its computations.

Alexander Viro <viro@math.psu.edu> wrote:
> (link_path_walk->do_follow_link->foofs_follow_link->
> vfs_follow_link->link_path_walk)

It would not need to follow the recursion at all.

A simple warning "vfs_follow_link makes a recursive call back
to link_path_walk, stack space needed for each recursion is N bytes"

/RogerL


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13  6:59       ` Alexander Viro
@ 2002-06-13 17:41         ` Daniel Phillips
  2002-06-13 17:53           ` Alexander Viro
  2002-06-13 17:56         ` Andi Kleen
  1 sibling, 1 reply; 44+ messages in thread
From: Daniel Phillips @ 2002-06-13 17:41 UTC (permalink / raw)
  To: Alexander Viro, Dawson Engler; +Cc: Benjamin LaHaise, linux-kernel, mc

On Thursday 13 June 2002 08:59, Alexander Viro wrote:
> On Wed, 12 Jun 2002, Dawson Engler wrote:
> 
> > > Not realistic - we have a recursion through the ->follow_link(), and
> > > a lot of stuff can be called from ->follow_link().  We _do_ have a
> > > limit on depth of recursion here, but it won't be fun to deal with.
> > 
> > You mean following function pointers is not realistic?  Actually the
> > function pointers in linxu are pretty easy to deal with since, by
> > and large, they are set by static structure initialization and not
> > really fussed with afterwards.
> 
> I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> for everything that can be called by any of these functions.  And that's
> a _lot_ of stuff.

Then at the point of recursion a dynamic check for stack space is
needed, and [checker]'s role would be to determine the deepest static
depth, to plug into the stack check.  If we want to be sure about 
stack integrity there isn't any way around this.

-- 
Daniel

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13 17:41         ` Daniel Phillips
@ 2002-06-13 17:53           ` Alexander Viro
  2002-06-13 18:45             ` Daniel Phillips
  0 siblings, 1 reply; 44+ messages in thread
From: Alexander Viro @ 2002-06-13 17:53 UTC (permalink / raw)
  To: Daniel Phillips; +Cc: Dawson Engler, Benjamin LaHaise, linux-kernel, mc



On Thu, 13 Jun 2002, Daniel Phillips wrote:

> > I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> > ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> > for everything that can be called by any of these functions.  And that's
> > a _lot_ of stuff.
> 
> Then at the point of recursion a dynamic check for stack space is
> needed, and [checker]'s role would be to determine the deepest static
> depth, to plug into the stack check.  If we want to be sure about 
> stack integrity there isn't any way around this.

Wrong.  Check for stack _space_ will mean that maximal depth of nested
symlinks depends on syscall.  Definitely not what you want to see.
There is a static limit (no more than 5 nested), but it must be
explicitly known to checker - deducing it from code is easy for a
human, but hopeless for anything automatic.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13  6:59       ` Alexander Viro
  2002-06-13 17:41         ` Daniel Phillips
@ 2002-06-13 17:56         ` Andi Kleen
  2002-06-13 18:26           ` Alexander Viro
  1 sibling, 1 reply; 44+ messages in thread
From: Andi Kleen @ 2002-06-13 17:56 UTC (permalink / raw)
  To: Alexander Viro; +Cc: engler, linux-kernel

Alexander Viro <viro@math.psu.edu> writes:
> 
> I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> for everything that can be called by any of these functions.  And that's
> a _lot_ of stuff.

Surely an analysis pass can detect recursive function chains and flag them
(e.g. the global IPA alias analysis pass in open64 does this)

-Andi

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13 17:56         ` Andi Kleen
@ 2002-06-13 18:26           ` Alexander Viro
  2002-06-13 19:01             ` Andi Kleen
  2002-06-13 21:50             ` Dawson Engler
  0 siblings, 2 replies; 44+ messages in thread
From: Alexander Viro @ 2002-06-13 18:26 UTC (permalink / raw)
  To: Andi Kleen; +Cc: engler, linux-kernel



On 13 Jun 2002, Andi Kleen wrote:

> Alexander Viro <viro@math.psu.edu> writes:
> > 
> > I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> > ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> > for everything that can be called by any of these functions.  And that's
> > a _lot_ of stuff.
> 
> Surely an analysis pass can detect recursive function chains and flag them
> (e.g. the global IPA alias analysis pass in open64 does this)

Ugh...  OK, let me try again:

One bit of apriory information needed to get anything interesting from
this analysis:  there is a set of mutually recursive functions (see above)
and there is a limit (5) on the depth of recursion in that loop.

It has to be known to checker.  Explicitly, since
	a) automatically deducing it is not realistic
	b) cutting off the stuff behind that loop will cut off a _lot_ of
things - both in filesystems and in VFS (and in block layer, while we are
at it).

I'm not saying that checker can't be used for that analysis - it can, but
naive approach (find recursive stuff and cut it off) will not be too
interesting.  One of the main stumbling blocks - see above.  With explicit
knowledge of that one the thing will be definitely very interesting - no
arguments here.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13 17:53           ` Alexander Viro
@ 2002-06-13 18:45             ` Daniel Phillips
  0 siblings, 0 replies; 44+ messages in thread
From: Daniel Phillips @ 2002-06-13 18:45 UTC (permalink / raw)
  To: Alexander Viro; +Cc: Dawson Engler, Benjamin LaHaise, linux-kernel, mc

On Thursday 13 June 2002 19:53, Alexander Viro wrote:
> On Thu, 13 Jun 2002, Daniel Phillips wrote:
> 
> > > I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> > > ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> > > for everything that can be called by any of these functions.  And that's
> > > a _lot_ of stuff.
> > 
> > Then at the point of recursion a dynamic check for stack space is
> > needed, and [checker]'s role would be to determine the deepest static
> > depth, to plug into the stack check.  If we want to be sure about 
> > stack integrity there isn't any way around this.
> 
> Wrong.  Check for stack _space_ will mean that maximal depth of nested
> symlinks depends on syscall.  Definitely not what you want to see.
> There is a static limit (no more than 5 nested), but it must be
> explicitly known to checker - deducing it from code is easy for a
> human, but hopeless for anything automatic.

It's even hard to deduce the recursion in this case, and even if
checker is smart enough to spot it there's no way to know the
static requirement of out-of-tree filesystems.

Perhaps a BUG here is the right thing, in case the big chain of
assumptions here is inadvertently violated, in which case I'd much
rather have the system go down with a BUG than wig out in one of
the weird and wonderful ways typical of a stack overflow.

By the way:

327 /*
328  * This limits recursive symlink follows to 8, while
329  * limiting consecutive symlinks to 40.

Maybe:

327 /*
328  * This limits recursive symlink follows and consecutive symlinks.

-- 
Daniel

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13 18:26           ` Alexander Viro
@ 2002-06-13 19:01             ` Andi Kleen
  2002-06-14  0:05               ` William Lee Irwin III
  2002-06-13 21:50             ` Dawson Engler
  1 sibling, 1 reply; 44+ messages in thread
From: Andi Kleen @ 2002-06-13 19:01 UTC (permalink / raw)
  To: Alexander Viro; +Cc: Andi Kleen, engler, linux-kernel

On Thu, Jun 13, 2002 at 08:26:54PM +0200, Alexander Viro wrote:
> Ugh...  OK, let me try again:
> 
> One bit of apriory information needed to get anything interesting from
> this analysis:  there is a set of mutually recursive functions (see above)
> and there is a limit (5) on the depth of recursion in that loop.
> 
> It has to be known to checker.  Explicitly, since
> 	a) automatically deducing it is not realistic
> 	b) cutting off the stuff behind that loop will cut off a _lot_ of
> things - both in filesystems and in VFS (and in block layer, while we are
> at it).
> 
> I'm not saying that checker can't be used for that analysis - it can, but
> naive approach (find recursive stuff and cut it off) will not be too

if you see all possible paths through the program as a tree which branches 
for every decision then you only need to cut off the branches that are 
actually pointing upward the tree again. This would still allow to follow
down into the callees of the recursive function because there should be 
at least one path that is non recursive (if not Checker should definitely
complain ;) 

e.g.
       ----<-----------------+
	   v                     |
 IF   TRUE                RECURSION
-------+------ some path ----+
       |
	  ELSE                    non recursive path 
       +-------------------------- other functions ---------->


Other functions can be still checked, you only need to prune the cycle.
I have no idea if checker's algorithms actually work like this, but I could
imagine that it would be one possible implementation.

-Andi


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13 18:26           ` Alexander Viro
  2002-06-13 19:01             ` Andi Kleen
@ 2002-06-13 21:50             ` Dawson Engler
  2002-06-13 22:43               ` Oliver Xymoron
  2002-06-14  0:25               ` Alexander Viro
  1 sibling, 2 replies; 44+ messages in thread
From: Dawson Engler @ 2002-06-13 21:50 UTC (permalink / raw)
  To: Alexander Viro; +Cc: Andi Kleen, linux-kernel

> On 13 Jun 2002, Andi Kleen wrote:
> 
> > Alexander Viro <viro@math.psu.edu> writes:
> > > 
> > > I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> > > ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> > > for everything that can be called by any of these functions.  And that's
> > > a _lot_ of stuff.
> > 
> > Surely an analysis pass can detect recursive function chains and flag them
> > (e.g. the global IPA alias analysis pass in open64 does this)
> 
> Ugh...  OK, let me try again:
> 
> One bit of apriory information needed to get anything interesting from
> this analysis:  there is a set of mutually recursive functions (see above)
> and there is a limit (5) on the depth of recursion in that loop.
> 
> It has to be known to checker.  Explicitly, since
> 	a) automatically deducing it is not realistic
> 	b) cutting off the stuff behind that loop will cut off a _lot_ of
> things - both in filesystems and in VFS (and in block layer, while we are
> at it).

We're all about jamming system specific information into the checking
extensions.  It's usually just a few if statements.  So to be clear: we
can simply assume that the loop 
   link_path_walk
	->do_follow_link
		->foofs_follow_link
			->vfs_follow_link
				->link_path_walk
can happen exactly 5 times?

In general such restrictions are interesting, since they concretely
show how having a way to include system-specific knowlege into checking
is useful.  Are there any other such relationships?  The other example
I know of is the do_page_fault (sp?) routine that can potentially be
invoked at very copy_*_user site that page faults.  You need to know
this to detect certain classes of deadlock, but there's no way to
discern this path from the code without a priori knowlege.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13 21:50             ` Dawson Engler
@ 2002-06-13 22:43               ` Oliver Xymoron
  2002-06-14  0:25               ` Alexander Viro
  1 sibling, 0 replies; 44+ messages in thread
From: Oliver Xymoron @ 2002-06-13 22:43 UTC (permalink / raw)
  To: Dawson Engler; +Cc: Alexander Viro, Andi Kleen, linux-kernel

On Thu, 13 Jun 2002, Dawson Engler wrote:

> > On 13 Jun 2002, Andi Kleen wrote:
> >
> > > Alexander Viro <viro@math.psu.edu> writes:
> > > >
> > > > I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> > > > ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> > > > for everything that can be called by any of these functions.  And that's
> > > > a _lot_ of stuff.
> > >
> > > Surely an analysis pass can detect recursive function chains and flag them
> > > (e.g. the global IPA alias analysis pass in open64 does this)
> >
> > Ugh...  OK, let me try again:
> >
> > One bit of apriory information needed to get anything interesting from
> > this analysis:  there is a set of mutually recursive functions (see above)
> > and there is a limit (5) on the depth of recursion in that loop.
> >
> > It has to be known to checker.  Explicitly, since
> > 	a) automatically deducing it is not realistic
> > 	b) cutting off the stuff behind that loop will cut off a _lot_ of
> > things - both in filesystems and in VFS (and in block layer, while we are
> > at it).
>
> We're all about jamming system specific information into the checking
> extensions.  It's usually just a few if statements.  So to be clear: we
> can simply assume that the loop
>    link_path_walk
> 	->do_follow_link
> 		->foofs_follow_link
> 			->vfs_follow_link
> 				->link_path_walk
> can happen exactly 5 times?
>
> In general such restrictions are interesting, since they concretely
> show how having a way to include system-specific knowlege into checking
> is useful.  Are there any other such relationships?  The other example
> I know of is the do_page_fault (sp?) routine that can potentially be
> invoked at very copy_*_user site that page faults.  You need to know
> this to detect certain classes of deadlock, but there's no way to
> discern this path from the code without a priori knowlege.

There are various flags for memory allocation that prevent it (usually)
from being recursive (or livelocking).

Though I'm not really convinced by Al's argument. I think if you do a
breadth-first traversal of the possible call tree, as you get past a
certain predicted stack depth, you'll be able to manually prune things
that aren't of interest. Very few things should be potentially recursive
or mutally recursive and any that are probably bear closer manual
scrutiny.

How's Checker do on passing through function pointers?

-- 
 "Love the dolphins," she advised him. "Write by W.A.S.T.E.."


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13 19:01             ` Andi Kleen
@ 2002-06-14  0:05               ` William Lee Irwin III
  0 siblings, 0 replies; 44+ messages in thread
From: William Lee Irwin III @ 2002-06-14  0:05 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Alexander Viro, engler, linux-kernel

On Thu, Jun 13, 2002 at 09:01:30PM +0200, Andi Kleen wrote:
> if you see all possible paths through the program as a tree which branches 
> for every decision then you only need to cut off the branches that are 
> actually pointing upward the tree again. This would still allow to follow
> down into the callees of the recursive function because there should be 
> at least one path that is non recursive (if not Checker should definitely
> complain ;) 
> e.g.
>        ----<-----------------+
> 	   v                     |
>  IF   TRUE                RECURSION
> -------+------ some path ----+
>        |
> 	  ELSE                    non recursive path 
>        +-------------------------- other functions ---------->
> Other functions can be still checked, you only need to prune the cycle.
> I have no idea if checker's algorithms actually work like this, but I could
> imagine that it would be one possible implementation.

Why would you do it this way? AFAICT this is more naturally phrased as
cycle detection in a digraph.

Cheers,
Bill

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13 21:50             ` Dawson Engler
  2002-06-13 22:43               ` Oliver Xymoron
@ 2002-06-14  0:25               ` Alexander Viro
  1 sibling, 0 replies; 44+ messages in thread
From: Alexander Viro @ 2002-06-14  0:25 UTC (permalink / raw)
  To: Dawson Engler; +Cc: Linus Torvalds, Andi Kleen, linux-kernel



On Thu, 13 Jun 2002, Dawson Engler wrote:

> > It has to be known to checker.  Explicitly, since
> > 	a) automatically deducing it is not realistic
> > 	b) cutting off the stuff behind that loop will cut off a _lot_ of
> > things - both in filesystems and in VFS (and in block layer, while we are
> > at it).
> 
> We're all about jamming system specific information into the checking
> extensions.  It's usually just a few if statements.  So to be clear: we
> can simply assume that the loop 
>    link_path_walk
> 	->do_follow_link
> 		->foofs_follow_link
> 			->vfs_follow_link
> 				->link_path_walk
> can happen exactly 5 times?

static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
{   
        int err;
        if (current->link_count >= 5)
                goto loop;
...
        current->link_count++;
...
        err = dentry->d_inode->i_op->follow_link(dentry, nd);
        current->link_count--;
        return err;
loop:
        path_release(nd);
        return -ELOOP;
}

->link_count is modified only by the code above.

INIT_TASK has zero link_count.

new task inherits ->link_count of parent at the moment of do_fork()

It means that:
	* at any moment ->link_count is between 0 and 5
	* at any moment the call chain contains at most 6 instances of
do_follow_link()
	* if there are 6 such instances then the last of them is either the
last element of chain or it is followed by path_release().

	BTW, it shows a potential subtle problem - if we ever call do_fork()
while resolving a symlink (the only way that can happen is kernel_thread()
being called in process of symlink resolution) we will get a kernel thread
with limit for nested symlinks lower than 5.  Proposed fix (both 2.4 and 2.5):

ed kernel/fork.c <<EOF
/swappable/a
        p->link_count = 0;
.
w
EOF

Linus?


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-13 13:24           ` Roger Larsson
@ 2002-06-14 10:06             ` Helge Hafting
  0 siblings, 0 replies; 44+ messages in thread
From: Helge Hafting @ 2002-06-14 10:06 UTC (permalink / raw)
  To: Roger Larsson, linux-kernel

Roger Larsson wrote:

> > The automated checker may use hard-coded limits for recursions with
> > limited depth.  If follow_link stops after n iterations, tell
> > the checker about it and it will use that in its computations.
> 
> Alexander Viro <viro@math.psu.edu> wrote:
> > (link_path_walk->do_follow_link->foofs_follow_link->
> > vfs_follow_link->link_path_walk)
> 
> It would not need to follow the recursion at all.
> 
> A simple warning "vfs_follow_link makes a recursive call back
> to link_path_walk, stack space needed for each recursion is N bytes"
> 
That's all you can do with recursion of unknown depth, sure.

But the recursion here is known limited, and we want to know
"what is the deepest stack we can get into, following the
deepest call chain _after_ VFS recursed 5 levels of symlinks"
We know it won't recurse after that - but it might go
on calling x levels of non-recursive functions.

Hard-coding the limit for that particular call chain makes
sense as a lot of stuff may be called from it.  Or similiar,
various stuff may pile up on the stack and _then_ call into VFS
and recurse to the limit.

Printing the hardcoded assumption is probably a good idea when 
it is used to find some max depth - the kernel code might change
after all.


Helge Hafting

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-17 11:07 Andries.Brouwer
@ 2002-06-17 12:00 ` David Woodhouse
  0 siblings, 0 replies; 44+ messages in thread
From: David Woodhouse @ 2002-06-17 12:00 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: linux-kernel, viro


Andries.Brouwer@cwi.nl said:
>  Good. I think it would be worthwhile to find and submit such patches.
> Not just for this attempt of mine, but it is generally a good idea to
> keep things as uniform as possible.

Er, and to prevent us from having to go to the flash, read and possibly 
decompress the link target, every time someone opens a symlink such as 
/lib/ld-linux.so.2 :)

--
dwmw2



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
@ 2002-06-17 11:07 Andries.Brouwer
  2002-06-17 12:00 ` David Woodhouse
  0 siblings, 1 reply; 44+ messages in thread
From: Andries.Brouwer @ 2002-06-17 11:07 UTC (permalink / raw)
  To: Andries.Brouwer, dwmw2; +Cc: linux-kernel, viro

    From: David Woodhouse <dwmw2@infradead.org>

    +int jffs2_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
    +                  const char **llink, struct page **ppage)

    +    nd->flags |= LOOKUP_KFREE_NEEDED;

    Urgh. Don't do that on my behalf - we'll just switch to using 
    page_follow_link, which to be honest I thought we'd already done -- 
    there were definitely patches for it floating around.

Good. I think it would be worthwhile to find and submit such patches.
Not just for this attempt of mine, but it is generally a good idea
to keep things as uniform as possible.

I think these two ugly bits, and the entire nd argument of
prepare_follow_link can be eliminated, but apart from jffs2
that also requires a little work in proc.

Andries

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-16 20:41 Andries.Brouwer
  2002-06-16 21:33 ` Andreas Dilger
  2002-06-16 21:34 ` Alexander Viro
@ 2002-06-17 10:09 ` David Woodhouse
  2 siblings, 0 replies; 44+ messages in thread
From: David Woodhouse @ 2002-06-17 10:09 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: viro, linux-kernel


+int jffs2_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+			      const char **llink, struct page **ppage)

+	nd->flags |= LOOKUP_KFREE_NEEDED;

Urgh. Don't do that on my behalf - we'll just switch to using 
page_follow_link, which to be honest I thought we'd already done -- there 
were definitely patches for it floating around.

--
dwmw2



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-16 22:05 Andries.Brouwer
@ 2002-06-16 23:57 ` Alexander Viro
  0 siblings, 0 replies; 44+ messages in thread
From: Alexander Viro @ 2002-06-16 23:57 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: linux-kernel



On Mon, 17 Jun 2002 Andries.Brouwer@cwi.nl wrote:

> >> The result of Step One is that the loop no longer touches all
> >> filesystems but lives entirely in namei.c. So, the second patch,
> >> that only changes namei.c can change the recursion into iteration.
> >> Maybe tomorrow or the day after.
> 
> > Obvious breakage: nd->flags can be clobbered by __vfs_follow_link(),
> > so your do_follow_link() and friends are broken.
> 
> Yes, I know. No doubt you are able to fix that by reading that bit
> before calling __vfs_follow_link(). It will be repaired fully
> automatically tomorrow or the day after when __vfs_follow_link()
> disappears altogether.
> 
> But that is the microscopic criticism. I was more interested in
> hearing comments on the global setup.

I don't see global setup here - just a (rather messy) change of API.
The really interesting question is how you'll handle namei.c code.
Bringing the call of __vfs_follow_link() into do_follow_link() is
not interesting in itself - you still have recursion to eliminate and
that's where the hell will begin.  Change of ->follow_link() prototype
per se doesn't buy anything and is as you admit kludgy.  If you need
it for really interesting stuff - please do show that stuff.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
@ 2002-06-16 22:05 Andries.Brouwer
  2002-06-16 23:57 ` Alexander Viro
  0 siblings, 1 reply; 44+ messages in thread
From: Andries.Brouwer @ 2002-06-16 22:05 UTC (permalink / raw)
  To: Andries.Brouwer, viro; +Cc: linux-kernel

>> The result of Step One is that the loop no longer touches all
>> filesystems but lives entirely in namei.c. So, the second patch,
>> that only changes namei.c can change the recursion into iteration.
>> Maybe tomorrow or the day after.

> Obvious breakage: nd->flags can be clobbered by __vfs_follow_link(),
> so your do_follow_link() and friends are broken.

Yes, I know. No doubt you are able to fix that by reading that bit
before calling __vfs_follow_link(). It will be repaired fully
automatically tomorrow or the day after when __vfs_follow_link()
disappears altogether.

But that is the microscopic criticism. I was more interested in
hearing comments on the global setup.

Andries

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-16 20:41 Andries.Brouwer
  2002-06-16 21:33 ` Andreas Dilger
@ 2002-06-16 21:34 ` Alexander Viro
  2002-06-17 10:09 ` David Woodhouse
  2 siblings, 0 replies; 44+ messages in thread
From: Alexander Viro @ 2002-06-16 21:34 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: linux-kernel



On Sun, 16 Jun 2002 Andries.Brouwer@cwi.nl wrote:

> > If you want to try - by all means, go ahead,
> > I'll be glad to see the current situation improved.
> 
> OK, let me attempt something in the style as sketched earlier today.
> There are two stages: Step One is changing follow_link in all
> filesystems to not call vfs_follow_link but to return immediately
> so that the caller can call vfs_follow_link and release resources.
> 
> Somewhat boring work. The patch is below.
> I do not doubt that you'll find all typos, so I did not try
> to compile and test.
> 
> Yes, so the question was how to communicate between filesystem
> and namei.c about what resources have to be freed.
> I considered (i) calling the filesystem with a preallocated page,
> (ii) requiring a page, (iii) requiring page or kmalloc,
> (iv) letting the filesystem supply a callback.
> 
> Since I am lazy and (iii) was easiest, I did (iii).
> That is also reasonable: in almost all cases it really is a page,
> and a flag can signal otherwise.
> 
> The communication between filesystems and namei.c uses
> char **link and page **page and two bits in nd->flags.
> The filesystem gets char **link and page **page.
> Its job is to fill *link with the string, but in case
> it did the complete follow_link itself (as happens under /proc)
> it sets the DONE flag.
> Now namei.c will release page when it is nonzero, and will free
> link when the filesystem tells that that is needed in the KFREE flag.
> 
> What is wrong? You will tell me, but what I disliked while doing
> this was the name prepare_follow_link. Too long. A second time
> I might pick get_link or so.
> 
> The result of Step One is that the loop no longer touches all
> filesystems but lives entirely in namei.c. So, the second patch,
> that only changes namei.c can change the recursion into iteration.
> Maybe tomorrow or the day after.

Obvious breakage: nd->flags can be clobbered by __vfs_follow_link(), so
your do_follow_link() and friends are broken.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-16 20:41 Andries.Brouwer
@ 2002-06-16 21:33 ` Andreas Dilger
  2002-06-16 21:34 ` Alexander Viro
  2002-06-17 10:09 ` David Woodhouse
  2 siblings, 0 replies; 44+ messages in thread
From: Andreas Dilger @ 2002-06-16 21:33 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: viro, linux-kernel

On Jun 16, 2002  22:41 +0200, Andries.Brouwer@cwi.nl wrote:
> ...what I disliked while doing this was the name prepare_follow_link.
> Too long. A second time I might pick get_link or so.

prepare_link()?

Cheers, Andreas
--
Andreas Dilger
http://www-mddsp.enel.ucalgary.ca/People/adilger/
http://sourceforge.net/projects/ext2resize/


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
@ 2002-06-16 20:41 Andries.Brouwer
  2002-06-16 21:33 ` Andreas Dilger
                   ` (2 more replies)
  0 siblings, 3 replies; 44+ messages in thread
From: Andries.Brouwer @ 2002-06-16 20:41 UTC (permalink / raw)
  To: viro; +Cc: linux-kernel

> If you want to try - by all means, go ahead,
> I'll be glad to see the current situation improved.

OK, let me attempt something in the style as sketched earlier today.
There are two stages: Step One is changing follow_link in all
filesystems to not call vfs_follow_link but to return immediately
so that the caller can call vfs_follow_link and release resources.

Somewhat boring work. The patch is below.
I do not doubt that you'll find all typos, so I did not try
to compile and test.

Yes, so the question was how to communicate between filesystem
and namei.c about what resources have to be freed.
I considered (i) calling the filesystem with a preallocated page,
(ii) requiring a page, (iii) requiring page or kmalloc,
(iv) letting the filesystem supply a callback.

Since I am lazy and (iii) was easiest, I did (iii).
That is also reasonable: in almost all cases it really is a page,
and a flag can signal otherwise.

The communication between filesystems and namei.c uses
char **link and page **page and two bits in nd->flags.
The filesystem gets char **link and page **page.
Its job is to fill *link with the string, but in case
it did the complete follow_link itself (as happens under /proc)
it sets the DONE flag.
Now namei.c will release page when it is nonzero, and will free
link when the filesystem tells that that is needed in the KFREE flag.

What is wrong? You will tell me, but what I disliked while doing
this was the name prepare_follow_link. Too long. A second time
I might pick get_link or so.

The result of Step One is that the loop no longer touches all
filesystems but lives entirely in namei.c. So, the second patch,
that only changes namei.c can change the recursion into iteration.
Maybe tomorrow or the day after.

Andries

diff -r -u linux-2.5.21x/linux/Documentation/filesystems/Locking linux-2.5.21w/linux/Documentation/filesystems/Locking
--- linux-2.5.21x/linux/Documentation/filesystems/Locking	Sun Jun  9 07:28:48 2002
+++ linux-2.5.21w/linux/Documentation/filesystems/Locking	Sun Jun 16 22:03:21 2002
@@ -39,7 +39,8 @@
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *);
 	int (*readlink) (struct dentry *, char *,int);
-	int (*follow_link) (struct dentry *, struct nameidata *);
+	int (*prepare_follow_link) (struct dentry *, struct nameidata *,
+			const char **, struct page **);
 	void (*truncate) (struct inode *);
 	int (*permission) (struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
@@ -62,7 +63,7 @@
 rmdir:		no	yes (both)	(see below)
 rename:		no	yes (all)	(see below)
 readlink:	no	no
-follow_link:	no	no
+prepare_follow_link:	no	no
 truncate:	no	yes		(see below)
 setattr:	no	yes
 permission:	yes	no
diff -r -u linux-2.5.21x/linux/Documentation/filesystems/porting linux-2.5.21w/linux/Documentation/filesystems/porting
--- linux-2.5.21x/linux/Documentation/filesystems/porting	Sun Jun  9 07:28:29 2002
+++ linux-2.5.21w/linux/Documentation/filesystems/porting	Sun Jun 16 22:03:21 2002
@@ -196,7 +196,7 @@
 [mandatory]
 
 ->revalidate() is gone.  If your filesystem had it - provide ->getattr()
-and let it call whatever you had as ->revlidate() + (for symlinks that
+and let it call whatever you had as ->revalidate() + (for symlinks that
 had ->revalidate()) add calls in ->follow_link()/->readlink().
 
 ---
diff -r -u linux-2.5.21x/linux/fs/affs/symlink.c linux-2.5.21w/linux/fs/affs/symlink.c
--- linux-2.5.21x/linux/fs/affs/symlink.c	Sun Jun  9 07:27:47 2002
+++ linux-2.5.21w/linux/fs/affs/symlink.c	Sun Jun 16 22:03:21 2002
@@ -82,7 +82,7 @@
 };
 
 struct inode_operations affs_symlink_inode_operations = {
-	readlink:	page_readlink,
-	follow_link:	page_follow_link,
-	setattr:	affs_notify_change,
+	readlink:		page_readlink,
+	prepare_follow_link:	page_prepare_follow_link,
+	setattr:		affs_notify_change,
 };
diff -r -u linux-2.5.21x/linux/fs/autofs/symlink.c linux-2.5.21w/linux/fs/autofs/symlink.c
--- linux-2.5.21x/linux/fs/autofs/symlink.c	Sun Jun  9 07:28:14 2002
+++ linux-2.5.21w/linux/fs/autofs/symlink.c	Sun Jun 16 22:03:21 2002
@@ -18,13 +18,15 @@
 	return vfs_readlink(dentry, buffer, buflen, s);
 }
 
-static int autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static int
+autofs_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+			   const char **llink, struct page **ppage)
 {
-	char *s=((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data;
-	return vfs_follow_link(nd, s);
+	*llink =((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data;
+	return 0;
 }
 
 struct inode_operations autofs_symlink_inode_operations = {
-	readlink:	autofs_readlink,
-	follow_link:	autofs_follow_link
+	readlink:		autofs_readlink,
+	prepare_follow_link:	autofs_prepare_follow_link
 };
diff -r -u linux-2.5.21x/linux/fs/autofs4/symlink.c linux-2.5.21w/linux/fs/autofs4/symlink.c
--- linux-2.5.21x/linux/fs/autofs4/symlink.c	Sun Jun  9 07:30:37 2002
+++ linux-2.5.21w/linux/fs/autofs4/symlink.c	Sun Jun 16 22:03:21 2002
@@ -19,14 +19,17 @@
 	return vfs_readlink(dentry, buffer, buflen, ino->u.symlink);
 }
 
-static int autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
+static int
+autofs4_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+			    const char **llink, struct page **ppage)
 {
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 
-	return vfs_follow_link(nd, ino->u.symlink);
+	*llink = ino->u.symlink;
+	return 0;
 }
 
 struct inode_operations autofs4_symlink_inode_operations = {
-	readlink:	autofs4_readlink,
-	follow_link:	autofs4_follow_link
+	readlink:		autofs4_readlink,
+	prepare_follow_link:	autofs4_prepare_follow_link
 };
diff -r -u linux-2.5.21x/linux/fs/bad_inode.c linux-2.5.21w/linux/fs/bad_inode.c
--- linux-2.5.21x/linux/fs/bad_inode.c	Sun Jun  9 07:27:47 2002
+++ linux-2.5.21w/linux/fs/bad_inode.c	Sun Jun 16 22:03:21 2002
@@ -16,9 +16,12 @@
  * so that a bad root inode can at least be unmounted. To do this
  * we must dput() the base and return the dentry with a dget().
  */
-static int bad_follow_link(struct dentry *dent, struct nameidata *nd)
+static int
+bad_prepare_follow_link(struct dentry *dent, struct nameidata *nd,
+			const char **llink, struct page **ppage)
 {
-	return vfs_follow_link(nd, ERR_PTR(-EIO));
+	*llink = ERR_PTR(-EIO);
+	return 0;
 }
 
 static int return_EIO(void)
@@ -57,7 +60,7 @@
 	mknod:		EIO_ERROR,
 	rename:		EIO_ERROR,
 	readlink:	EIO_ERROR,
-	follow_link:	bad_follow_link,
+	prepare_follow_link:	bad_prepare_follow_link,
 	truncate:	EIO_ERROR,
 	permission:	EIO_ERROR,
 	getattr:	EIO_ERROR,
diff -r -u linux-2.5.21x/linux/fs/coda/cnode.c linux-2.5.21w/linux/fs/coda/cnode.c
--- linux-2.5.21x/linux/fs/coda/cnode.c	Sun Jun  9 07:28:02 2002
+++ linux-2.5.21w/linux/fs/coda/cnode.c	Sun Jun 16 22:03:21 2002
@@ -26,9 +26,9 @@
 }
 
 static struct inode_operations coda_symlink_inode_operations = {
-	readlink:	page_readlink,
-	follow_link:	page_follow_link,
-	setattr:	coda_setattr,
+	readlink:		page_readlink,
+	prepare_follow_link:	page_prepare_follow_link,
+	setattr:		coda_setattr,
 };
 
 /* cnode.c */
diff -r -u linux-2.5.21x/linux/fs/coda/symlink.c linux-2.5.21w/linux/fs/coda/symlink.c
--- linux-2.5.21x/linux/fs/coda/symlink.c	Sun Jun  9 07:28:04 2002
+++ linux-2.5.21w/linux/fs/coda/symlink.c	Sun Jun 16 22:03:21 2002
@@ -32,7 +32,7 @@
 
 	lock_kernel();
 	cii = ITOC(inode);
-	coda_vfs_stat.follow_link++;
+	coda_vfs_stat.prepare_follow_link++;
 
 	error = venus_readlink(inode->i_sb, &cii->c_fid, p, &len);
 	unlock_kernel();
diff -r -u linux-2.5.21x/linux/fs/devfs/base.c linux-2.5.21w/linux/fs/devfs/base.c
--- linux-2.5.21x/linux/fs/devfs/base.c	Sun Jun  9 07:27:42 2002
+++ linux-2.5.21w/linux/fs/devfs/base.c	Sun Jun 16 22:03:21 2002
@@ -3210,16 +3210,19 @@
     return err;
 }   /*  End Function devfs_readlink  */
 
-static int devfs_follow_link (struct dentry *dentry, struct nameidata *nd)
+static int
+devfs_prepare_follow_link (struct dentry *dentry, struct nameidata *nd,
+			   const char **llink, struct page **ppage)
 {
     int err;
     struct devfs_entry *de;
 
     de = get_devfs_entry_from_vfs_inode (dentry->d_inode);
-    if (!de) return -ENODEV;
-    err = vfs_follow_link (nd, de->u.symlink.linkname);
-    return err;
-}   /*  End Function devfs_follow_link  */
+    if (!de)
+	return -ENODEV;
+    *llink = de->u.symlink.linkname;
+    return 0;
+}   /*  End Function devfs_prepare_follow_link  */
 
 static struct inode_operations devfs_iops =
 {
@@ -3239,9 +3242,9 @@
 
 static struct inode_operations devfs_symlink_iops =
 {
-    readlink:       devfs_readlink,
-    follow_link:    devfs_follow_link,
-    setattr:        devfs_notify_change,
+    readlink:	            devfs_readlink,
+    prepare_follow_link:    devfs_prepare_follow_link,
+    setattr:                devfs_notify_change,
 };
 
 static int devfs_fill_super (struct super_block *sb, void *data, int silent)
diff -r -u linux-2.5.21x/linux/fs/ext2/symlink.c linux-2.5.21w/linux/fs/ext2/symlink.c
--- linux-2.5.21x/linux/fs/ext2/symlink.c	Sun Jun  9 07:28:06 2002
+++ linux-2.5.21w/linux/fs/ext2/symlink.c	Sun Jun 16 22:03:21 2002
@@ -25,13 +25,16 @@
 	return vfs_readlink(dentry, buffer, buflen, (char *)ei->i_data);
 }
 
-static int ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
+static int
+ext2_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+			 const char **llink, struct page **ppage)
 {
 	struct ext2_inode_info *ei = EXT2_I(dentry->d_inode);
-	return vfs_follow_link(nd, (char *)ei->i_data);
+	*llink = (char *) ei->i_data;
+	return 0;
 }
 
 struct inode_operations ext2_fast_symlink_inode_operations = {
-	readlink:	ext2_readlink,
-	follow_link:	ext2_follow_link,
+	readlink:		ext2_readlink,
+	prepare_follow_link:	ext2_prepare_follow_link,
 };
diff -r -u linux-2.5.21x/linux/fs/ext3/symlink.c linux-2.5.21w/linux/fs/ext3/symlink.c
--- linux-2.5.21x/linux/fs/ext3/symlink.c	Sun Jun  9 07:30:22 2002
+++ linux-2.5.21w/linux/fs/ext3/symlink.c	Sun Jun 16 22:03:21 2002
@@ -27,13 +27,17 @@
 	return vfs_readlink(dentry, buffer, buflen, (char*)ei->i_data);
 }
 
-static int ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
+static int
+ext3_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+			 const char **llink, struct page **ppage)
 {
 	struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
-	return vfs_follow_link(nd, (char*)ei->i_data);
+
+	*llink = (char*) ei->i_data;
+	return 0;
 }
 
 struct inode_operations ext3_fast_symlink_inode_operations = {
-	readlink:	ext3_readlink,		/* BKL not held.  Don't need */
-	follow_link:	ext3_follow_link,	/* BKL not held.  Don't need */
+	readlink:		ext3_readlink,		  /* BKL not held */
+	prepare_follow_link:	ext3_prepare_follow_link, /* BKL not held */
 };
diff -r -u linux-2.5.21x/linux/fs/freevxfs/vxfs_immed.c linux-2.5.21w/linux/fs/freevxfs/vxfs_immed.c
--- linux-2.5.21x/linux/fs/freevxfs/vxfs_immed.c	Sun Jun  9 07:27:22 2002
+++ linux-2.5.21w/linux/fs/freevxfs/vxfs_immed.c	Sun Jun 16 22:03:21 2002
@@ -40,19 +40,20 @@
 
 
 static int	vxfs_immed_readlink(struct dentry *, char *, int);
-static int	vxfs_immed_follow_link(struct dentry *, struct nameidata *);
-
+static int	vxfs_immed_prepare_follow_link(
+			struct dentry *, struct nameidata *,
+			const char **llink, struct page **ppage);
 static int	vxfs_immed_readpage(struct file *, struct page *);
 
 /*
  * Inode operations for immed symlinks.
  *
- * Unliked all other operations we do not go through the pagecache,
+ * Unlike all other operations we do not go through the pagecache,
  * but do all work directly on the inode.
  */
 struct inode_operations vxfs_immed_symlink_iops = {
-	.readlink =		vxfs_immed_readlink,
-	.follow_link =		vxfs_immed_follow_link,
+	.readlink =			vxfs_immed_readlink,
+	.prepare_follow_link =		vxfs_immed_prepare_follow_link,
 };
 
 /*
@@ -85,23 +86,27 @@
 }
 
 /**
- * vxfs_immed_follow_link - follow immed symlink
+ * vxfs_immed_prepare_follow_link - find link contents
  * @dp:		dentry for the link
  * @np:		pathname lookup data for the current path walk
+ * @llink:	store pointer to link contents here
+ * @ppage:	in case a temp page was used, store it here
  *
  * Description:
- *   vxfs_immed_follow_link restarts the pathname lookup with
- *   the data obtained from @dp.
+ *   vxfs_immed_prepare_follow_link retrieves the contents of a
+ *   symlink, for use by vfs_follow_link().
  *
  * Returns:
  *   Zero on success, else a negative error code.
  */
 static int
-vxfs_immed_follow_link(struct dentry *dp, struct nameidata *np)
+vxfs_immed_prepare_follow_link(struct dentry *dp, struct nameidata *np,
+			       const char **llink, struct page **ppage)
 {
 	struct vxfs_inode_info		*vip = VXFS_INO(dp->d_inode);
 
-	return (vfs_follow_link(np, vip->vii_immed.vi_immed));
+	*llink = vip->vii_immed.vi_immed;
+	return 0;
 }
 
 /**
diff -r -u linux-2.5.21x/linux/fs/jffs2/symlink.c linux-2.5.21w/linux/fs/jffs2/symlink.c
--- linux-2.5.21x/linux/fs/jffs2/symlink.c	Sun Jun  9 07:26:25 2002
+++ linux-2.5.21w/linux/fs/jffs2/symlink.c	Sun Jun 16 22:03:21 2002
@@ -42,13 +42,14 @@
 #include "nodelist.h"
 
 int jffs2_readlink(struct dentry *dentry, char *buffer, int buflen);
-int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd);
+int jffs2_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+			      const char **llink, struct page **ppage);
 
 struct inode_operations jffs2_symlink_inode_operations =
 {	
-	readlink:	jffs2_readlink,
-	follow_link:	jffs2_follow_link,
-	setattr:	jffs2_setattr
+	readlink:		jffs2_readlink,
+	prepare_follow_link:	jffs2_prepare_follow_link,
+	setattr:		jffs2_setattr
 };
 
 int jffs2_readlink(struct dentry *dentry, char *buffer, int buflen)
@@ -56,7 +57,8 @@
 	unsigned char *kbuf;
 	int ret;
 
-	kbuf = jffs2_getlink(JFFS2_SB_INFO(dentry->d_inode->i_sb), JFFS2_INODE_INFO(dentry->d_inode));
+	kbuf = jffs2_getlink(JFFS2_SB_INFO(dentry->d_inode->i_sb),
+			     JFFS2_INODE_INFO(dentry->d_inode));
 	if (IS_ERR(kbuf))
 		return PTR_ERR(kbuf);
 
@@ -65,17 +67,21 @@
 	return ret;
 }
 
-int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
+int jffs2_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+			      const char **llink, struct page **ppage)
 {
 	unsigned char *buf;
 	int ret;
 
-	buf = jffs2_getlink(JFFS2_SB_INFO(dentry->d_inode->i_sb), JFFS2_INODE_INFO(dentry->d_inode));
+	buf = jffs2_getlink(JFFS2_SB_INFO(dentry->d_inode->i_sb),
+			    JFFS2_INODE_INFO(dentry->d_inode));
 
 	if (IS_ERR(buf))
 		return PTR_ERR(buf);
 
-	ret = vfs_follow_link(nd, buf);
-	kfree(buf);
-	return ret;
+	/* free buf when done */
+	nd->flags |= LOOKUP_KFREE_NEEDED;
+
+	*llink = buf;
+	return 0;
 }
diff -r -u linux-2.5.21x/linux/fs/jffs2/wbuf.c linux-2.5.21w/linux/fs/jffs2/wbuf.c
--- linux-2.5.21x/linux/fs/jffs2/wbuf.c	Sun Jun  9 07:28:14 2002
+++ linux-2.5.21w/linux/fs/jffs2/wbuf.c	Sun Jun 16 22:03:21 2002
@@ -367,7 +367,7 @@
 		 * We have the raw data without ECC correction in the buffer, maybe 
 		 * we are lucky and all data or parts are correct. We check the node.
 		 * If data are corrupted node check will sort it out.
-		 * We keep this block, it will fail on write or erase and the we
+		 * We keep this block, it will fail on write or erase and then we
 		 * mark it bad. Or should we do that now? But we should give him a chance.
 		 * Maybe we had a system crash or power loss before the ecc write or  
 		 * a erase was completed.
diff -r -u linux-2.5.21x/linux/fs/jfs/symlink.c linux-2.5.21w/linux/fs/jfs/symlink.c
--- linux-2.5.21x/linux/fs/jfs/symlink.c	Sun Jun  9 07:30:01 2002
+++ linux-2.5.21w/linux/fs/jfs/symlink.c	Sun Jun 16 22:03:21 2002
@@ -20,20 +20,22 @@
 #include "jfs_incore.h"
 
 static int jfs_readlink(struct dentry *, char *buffer, int buflen);
-static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd);
+static int jfs_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+				   const char **llink, struct page **ppage);
 
 /*
  * symlinks can't do much...
  */
 struct inode_operations jfs_symlink_inode_operations = {
-	readlink:	jfs_readlink,
-	follow_link:	jfs_follow_link,
+	readlink:		jfs_readlink,
+	prepare_follow_link:	jfs_prepare_follow_link,
 };
 
-static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static int jfs_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+				   const char **llink, struct page **ppage)
 {
-	char *s = JFS_IP(dentry->d_inode)->i_inline;
-	return vfs_follow_link(nd, s);
+	*llink = JFS_IP(dentry->d_inode)->i_inline;
+	return 0;
 }
 
 static int jfs_readlink(struct dentry *dentry, char *buffer, int buflen)
diff -r -u linux-2.5.21x/linux/fs/minix/inode.c linux-2.5.21w/linux/fs/minix/inode.c
--- linux-2.5.21x/linux/fs/minix/inode.c	Sun Jun  9 07:30:19 2002
+++ linux-2.5.21w/linux/fs/minix/inode.c	Sun Jun 16 22:03:21 2002
@@ -342,9 +342,9 @@
 };
 
 static struct inode_operations minix_symlink_inode_operations = {
-	readlink:	page_readlink,
-	follow_link:	page_follow_link,
-	getattr:	minix_getattr,
+	readlink:		page_readlink,
+	prepare_follow_link:	page_prepare_follow_link,
+	getattr:		minix_getattr,
 };
 
 void minix_set_inode(struct inode *inode, dev_t rdev)
diff -r -u linux-2.5.21x/linux/fs/namei.c linux-2.5.21w/linux/fs/namei.c
--- linux-2.5.21x/linux/fs/namei.c	Sun Jun  9 07:28:50 2002
+++ linux-2.5.21w/linux/fs/namei.c	Sun Jun 16 22:03:21 2002
@@ -384,6 +384,38 @@
 	return result;
 }
 
+static inline int
+__vfs_follow_link(struct nameidata *nd, const char *link);
+
+static inline int
+do_follow_link_nocount(struct dentry *dentry, struct nameidata *nd)
+{
+	const char *link = NULL;
+	struct page *page = NULL;
+	int err;
+
+	UPDATE_ATIME(dentry->d_inode);
+	err = dentry->d_inode->i_op->prepare_follow_link(dentry, nd,
+							 &link, &page);
+
+	/* do actual follow_link() unless the prepare already did all */
+	if (nd->flags & LOOKUP_FOLLOW_DONE)
+		nd->flags &= ~LOOKUP_FOLLOW_DONE;
+	else if (err == 0)
+		err = __vfs_follow_link(nd, link);
+
+	/* free resources used by the filesystem: link or page */
+	if (nd->flags & LOOKUP_KFREE_NEEDED) {
+		nd->flags &= ~LOOKUP_KFREE_NEEDED;
+		kfree (link);
+	} else if (page) {
+		kunmap(page);
+		page_cache_release(page);
+	}
+
+	return err;
+}
+
 /*
  * This limits recursive symlink follows to 8, while
  * limiting consecutive symlinks to 40.
@@ -394,6 +426,7 @@
 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	int err;
+
 	if (current->link_count >= 5)
 		goto loop;
 	if (current->total_link_count >= 40)
@@ -402,10 +435,12 @@
 		current->state = TASK_RUNNING;
 		schedule();
 	}
+
 	current->link_count++;
 	current->total_link_count++;
-	UPDATE_ATIME(dentry->d_inode);
-	err = dentry->d_inode->i_op->follow_link(dentry, nd);
+
+	err = do_follow_link_nocount(dentry, nd);
+
 	current->link_count--;
 	return err;
 loop:
@@ -648,7 +683,7 @@
 		if (!inode->i_op)
 			break;
 
-		if (inode->i_op->follow_link) {
+		if (inode->i_op->prepare_follow_link) {
 			mntget(next.mnt);
 			dget_locked(next.dentry);
 			unlock_nd(nd);
@@ -703,7 +738,8 @@
 		follow_mount(&next.mnt, &next.dentry);
 		inode = next.dentry->d_inode;
 		if ((lookup_flags & LOOKUP_FOLLOW)
-		    && inode && inode->i_op && inode->i_op->follow_link) {
+		    && inode && inode->i_op
+		    && inode->i_op->prepare_follow_link) {
 			mntget(next.mnt);
 			dget_locked(next.dentry);
 			unlock_nd(nd);
@@ -767,8 +803,8 @@
 	if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
 		struct nameidata nd_root;
 		/*
-		 * NAME was not found in alternate root or it's a directory.  Try to find
-		 * it in the normal root:
+		 * NAME was not found in alternate root or it's a directory.
+		 * Try to find it in the normal root:
 		 */
 		nd_root.last_type = LAST_ROOT;
 		nd_root.flags = nd->flags;
@@ -871,8 +907,7 @@
 		spin_lock(&dcache_lock);
 		nd->mnt = current->fs->rootmnt;
 		nd->dentry = current->fs->root;
-	}
-	else{
+	} else {
 		spin_lock(&dcache_lock);
 		nd->mnt = current->fs->pwdmnt;
 		nd->dentry = current->fs->pwd;
@@ -1303,7 +1338,8 @@
 	error = -ENOENT;
 	if (!dentry->d_inode)
 		goto exit_dput;
-	if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
+	if (dentry->d_inode->i_op &&
+	    dentry->d_inode->i_op->prepare_follow_link)
 		goto do_link;
 
 	dput(nd->dentry);
@@ -1337,8 +1373,8 @@
 	 * stored in nd->last.name and we will have to putname() it when we
 	 * are done. Procfs-like symlinks just set LAST_BIND.
 	 */
-	UPDATE_ATIME(dentry->d_inode);
-	error = dentry->d_inode->i_op->follow_link(dentry, nd);
+	error = do_follow_link_nocount(dentry, nd);
+
 	dput(dentry);
 	if (error)
 		return error;
@@ -2065,8 +2101,9 @@
 	}
 	lock_nd(nd);
 	res = link_path_walk(link, nd);
+
 out:
-	if (current->link_count || res || nd->last_type!=LAST_NORM)
+	if (current->link_count || res || nd->last_type != LAST_NORM)
 		return res;
 	/*
 	 * If it is an iterative symlinks resolution in open_namei() we
@@ -2084,18 +2121,13 @@
 	return PTR_ERR(link);
 }
 
-int vfs_follow_link(struct nameidata *nd, const char *link)
-{
-	return __vfs_follow_link(nd, link);
-}
-
 /* get the link contents into pagecache */
 static char *page_getlink(struct dentry * dentry, struct page **ppage)
 {
 	struct page * page;
 	struct address_space *mapping = dentry->d_inode->i_mapping;
-	page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
-				NULL);
+	page = read_cache_page(mapping, 0,
+			       (filler_t *) mapping->a_ops->readpage, NULL);
 	if (IS_ERR(page))
 		goto sync_fail;
 	wait_on_page_locked(page);
@@ -2124,16 +2156,11 @@
 	return res;
 }
 
-int page_follow_link(struct dentry *dentry, struct nameidata *nd)
+int page_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+			     const char **llink, struct page **ppage)
 {
-	struct page *page = NULL;
-	char *s = page_getlink(dentry, &page);
-	int res = __vfs_follow_link(nd, s);
-	if (page) {
-		kunmap(page);
-		page_cache_release(page);
-	}
-	return res;
+	*llink = page_getlink(dentry, ppage);
+	return 0;
 }
 
 int page_symlink(struct inode *inode, const char *symname, int len)
@@ -2177,6 +2204,6 @@
 }
 
 struct inode_operations page_symlink_inode_operations = {
-	readlink:	page_readlink,
-	follow_link:	page_follow_link,
+	readlink:		page_readlink,
+	prepare_follow_link:	page_prepare_follow_link,
 };
diff -r -u linux-2.5.21x/linux/fs/namespace.c linux-2.5.21w/linux/fs/namespace.c
--- linux-2.5.21x/linux/fs/namespace.c	Sun Jun  9 07:30:41 2002
+++ linux-2.5.21w/linux/fs/namespace.c	Sun Jun 16 22:03:21 2002
@@ -363,7 +363,7 @@
 	struct nameidata nd;
 	int retval;
 
-	retval = __user_walk(name, LOOKUP_FOLLOW, &nd);
+	retval = user_path_walk(name, &nd);
 	if (retval)
 		goto out;
 	retval = -EINVAL;
@@ -908,14 +908,14 @@
 
 	lock_kernel();
 
-	error = __user_walk(new_root, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
+	error = user_path_walk_dir(new_root, &new_nd);
 	if (error)
 		goto out0;
 	error = -EINVAL;
 	if (!check_mnt(new_nd.mnt))
 		goto out1;
 
-	error = __user_walk(put_old, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
+	error = user_path_walk_dir(put_old, &old_nd);
 	if (error)
 		goto out1;
 
diff -r -u linux-2.5.21x/linux/fs/ncpfs/inode.c linux-2.5.21w/linux/fs/ncpfs/inode.c
--- linux-2.5.21x/linux/fs/ncpfs/inode.c	Sun Jun  9 07:30:03 2002
+++ linux-2.5.21w/linux/fs/ncpfs/inode.c	Sun Jun 16 22:03:21 2002
@@ -244,9 +244,9 @@
 }
 
 static struct inode_operations ncp_symlink_inode_operations = {
-	readlink:	page_readlink,
-	follow_link:	page_follow_link,
-	setattr:	ncp_notify_change,
+	readlink:		page_readlink,
+	prepare_follow_link:	page_prepare_follow_link,
+	setattr:		ncp_notify_change,
 };
 
 /*
diff -r -u linux-2.5.21x/linux/fs/nfs/symlink.c linux-2.5.21w/linux/fs/nfs/symlink.c
--- linux-2.5.21x/linux/fs/nfs/symlink.c	Sun Jun  9 07:26:23 2002
+++ linux-2.5.21w/linux/fs/nfs/symlink.c	Sun Jun 16 22:03:21 2002
@@ -59,7 +59,7 @@
 	if (page)
 		goto read_failed;
 	page = read_cache_page(&inode->i_data, 0,
-				(filler_t *)nfs_symlink_filler, inode);
+			       (filler_t *) nfs_symlink_filler, inode);
 	if (IS_ERR(page))
 		goto read_failed;
 	if (!PageUptodate(page))
@@ -87,24 +87,19 @@
 	return res;
 }
 
-static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static int nfs_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+				   const char **llink, struct page **ppage)
 {
-	struct inode *inode = dentry->d_inode;
-	struct page *page = NULL;
-	int res = vfs_follow_link(nd, nfs_getlink(inode,&page));
-	if (page) {
-		kunmap(page);
-		page_cache_release(page);
-	}
-	return res;
+	*llink = nfs_getlink(dentry->d_inode, ppage);
+	return 0;
 }
 
 /*
  * symlinks can't do much...
  */
 struct inode_operations nfs_symlink_inode_operations = {
-	readlink:	nfs_readlink,
-	follow_link:	nfs_follow_link,
-	getattr:	nfs_getattr,
-	setattr:	nfs_setattr,
+	readlink:		nfs_readlink,
+	prepare_follow_link:	nfs_prepare_follow_link,
+	getattr:		nfs_getattr,
+	setattr:		nfs_setattr,
 };
diff -r -u linux-2.5.21x/linux/fs/open.c linux-2.5.21w/linux/fs/open.c
--- linux-2.5.21x/linux/fs/open.c	Sun Jun  9 07:26:54 2002
+++ linux-2.5.21w/linux/fs/open.c	Sun Jun 16 22:03:21 2002
@@ -373,7 +373,7 @@
 	struct nameidata nd;
 	int error;
 
-	error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
+	error = user_path_walk_dir(filename, &nd);
 	if (error)
 		goto out;
 
diff -r -u linux-2.5.21x/linux/fs/proc/base.c linux-2.5.21w/linux/fs/proc/base.c
--- linux-2.5.21x/linux/fs/proc/base.c	Sun Jun  9 07:26:58 2002
+++ linux-2.5.21w/linux/fs/proc/base.c	Sun Jun 16 22:03:21 2002
@@ -525,7 +525,9 @@
 	permission:	proc_permission,
 };
 
-static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
+static int
+proc_pid_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+			     const char **llink, struct page **ppage)
 {
 	struct inode *inode = dentry->d_inode;
 	int error = -EACCES;
@@ -542,6 +544,7 @@
 	error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
 	nd->last_type = LAST_BIND;
 out:
+	nd->flags |= LOOKUP_FOLLOW_DONE; /* no vfs_follow_link() required */
 	return error;
 }
 
@@ -594,8 +597,8 @@
 }
 
 static struct inode_operations proc_pid_link_inode_operations = {
-	readlink:	proc_pid_readlink,
-	follow_link:	proc_pid_follow_link
+	readlink:		proc_pid_readlink,
+	prepare_follow_link:	proc_pid_prepare_follow_link
 };
 
 #define NUMBUF 10
@@ -1057,16 +1060,25 @@
 	return vfs_readlink(dentry,buffer,buflen,tmp);
 }
 
-static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-	char tmp[30];
+static int
+proc_self_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+			      const char **llink, struct page **ppage)
+{
+	char *tmp = kmalloc(30, GFP_USER);
+	if (tmp == NULL) {
+		path_release(nd);
+		return -ENOMEM;
+	}
+
+	nd->flags |= LOOKUP_KFREE_NEEDED;
 	sprintf(tmp, "%d", current->pid);
-	return vfs_follow_link(nd,tmp);
+	*llink = tmp;
+	return 0;
 }	
 
 static struct inode_operations proc_self_inode_operations = {
-	readlink:	proc_self_readlink,
-	follow_link:	proc_self_follow_link,
+	readlink:		proc_self_readlink,
+	prepare_follow_link:	proc_self_prepare_follow_link,
 };
 
 /* SMP-safe */
diff -r -u linux-2.5.21x/linux/fs/proc/generic.c linux-2.5.21w/linux/fs/proc/generic.c
--- linux-2.5.21x/linux/fs/proc/generic.c	Sun Jun  9 07:31:31 2002
+++ linux-2.5.21w/linux/fs/proc/generic.c	Sun Jun 16 22:03:21 2002
@@ -223,15 +223,17 @@
 	return vfs_readlink(dentry, buffer, buflen, s);
 }
 
-static int proc_follow_link(struct dentry *dentry, struct nameidata *nd)
+static int
+proc_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+			 const char **llink, struct page **ppage)
 {
-	char *s=PDE(dentry->d_inode)->data;
-	return vfs_follow_link(nd, s);
+	*llink = PDE(dentry->d_inode)->data;
+	return 0;
 }
 
 static struct inode_operations proc_link_inode_operations = {
-	readlink:	proc_readlink,
-	follow_link:	proc_follow_link,
+	readlink:		proc_readlink,
+	prepare_follow_link:	proc_prepare_follow_link,
 };
 
 /*
diff -r -u linux-2.5.21x/linux/fs/sysv/inode.c linux-2.5.21w/linux/fs/sysv/inode.c
--- linux-2.5.21x/linux/fs/sysv/inode.c	Sun Jun  9 07:26:33 2002
+++ linux-2.5.21w/linux/fs/sysv/inode.c	Sun Jun 16 22:03:21 2002
@@ -131,9 +131,9 @@
 }
 
 static struct inode_operations sysv_symlink_inode_operations = {
-	readlink:	page_readlink,
-	follow_link:	page_follow_link,
-	getattr:	sysv_getattr,
+	readlink:		page_readlink,
+	prepare_follow_link:	page_prepare_follow_link,
+	getattr:		sysv_getattr,
 };
 
 void sysv_set_inode(struct inode *inode, dev_t rdev)
diff -r -u linux-2.5.21x/linux/fs/sysv/symlink.c linux-2.5.21w/linux/fs/sysv/symlink.c
--- linux-2.5.21x/linux/fs/sysv/symlink.c	Sun Jun  9 07:31:18 2002
+++ linux-2.5.21w/linux/fs/sysv/symlink.c	Sun Jun 16 22:03:21 2002
@@ -13,13 +13,15 @@
 	return vfs_readlink(dentry, buffer, buflen, s);
 }
 
-static int sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
+static int
+sysv_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+			 const char **llink, struct page **ppage)
 {
-	char *s = (char *)SYSV_I(dentry->d_inode)->i_data;
-	return vfs_follow_link(nd, s);
+	*llink = (char *) SYSV_I(dentry->d_inode)->i_data;
+	return 0;
 }
 
 struct inode_operations sysv_fast_symlink_inode_operations = {
-	readlink:	sysv_readlink,
-	follow_link:	sysv_follow_link,
+	readlink:		sysv_readlink,
+	prepare_follow_link:	sysv_prepare_follow_link,
 };
diff -r -u linux-2.5.21x/linux/fs/ufs/symlink.c linux-2.5.21w/linux/fs/ufs/symlink.c
--- linux-2.5.21x/linux/fs/ufs/symlink.c	Sun Jun  9 07:28:03 2002
+++ linux-2.5.21w/linux/fs/ufs/symlink.c	Sun Jun 16 22:03:21 2002
@@ -34,13 +34,15 @@
 	return vfs_readlink(dentry, buffer, buflen, (char*)p->i_u1.i_symlink);
 }
 
-static int ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static int ufs_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+				   const char **llink, struct page **ppage)
 {
 	struct ufs_inode_info *p = UFS_I(dentry->d_inode);
-	return vfs_follow_link(nd, (char*)p->i_u1.i_symlink);
+	*llink = (char *) p->i_u1.i_symlink;
+	return 0;
 }
 
 struct inode_operations ufs_fast_symlink_inode_operations = {
-	readlink:	ufs_readlink,
-	follow_link:	ufs_follow_link,
+	readlink:		ufs_readlink,
+	prepare_follow_link:	ufs_prepare_follow_link,
 };
diff -r -u linux-2.5.21x/linux/fs/umsdos/inode.c linux-2.5.21w/linux/fs/umsdos/inode.c
--- linux-2.5.21x/linux/fs/umsdos/inode.c	Sun Jun  9 07:27:26 2002
+++ linux-2.5.21w/linux/fs/umsdos/inode.c	Sun Jun 16 22:03:21 2002
@@ -113,9 +113,9 @@
 };
 
 static struct inode_operations umsdos_symlink_inode_operations = {
-	readlink:	page_readlink,
-	follow_link:	page_follow_link,
-	setattr:	UMSDOS_notify_change,
+	readlink:		page_readlink,
+	prepare_follow_link:	page_prepare_follow_link,
+	setattr:		UMSDOS_notify_change,
 };
 
 /*
diff -r -u linux-2.5.21x/linux/include/linux/coda_proc.h linux-2.5.21w/linux/include/linux/coda_proc.h
--- linux-2.5.21x/linux/include/linux/coda_proc.h	Sun Jun  9 07:26:36 2002
+++ linux-2.5.21w/linux/include/linux/coda_proc.h	Sun Jun 16 22:03:21 2002
@@ -56,8 +56,8 @@
 	int rename;
 	int permission;
 
-	/* symlink operatoins*/
-	int follow_link;
+	/* symlink operations*/
+	int prepare_follow_link;
 	int readlink;
 };
 
diff -r -u linux-2.5.21x/linux/include/linux/fs.h linux-2.5.21w/linux/include/linux/fs.h
--- linux-2.5.21x/linux/include/linux/fs.h	Sun Jun  9 07:28:14 2002
+++ linux-2.5.21w/linux/include/linux/fs.h	Sun Jun 16 22:03:21 2002
@@ -771,7 +771,8 @@
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *);
 	int (*readlink) (struct dentry *, char *,int);
-	int (*follow_link) (struct dentry *, struct nameidata *);
+	int (*prepare_follow_link) (struct dentry *, struct nameidata *,
+				    const char **, struct page **);
 	void (*truncate) (struct inode *);
 	int (*permission) (struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
@@ -1242,9 +1243,9 @@
 extern struct file_operations generic_ro_fops;
 
 extern int vfs_readlink(struct dentry *, char *, int, const char *);
-extern int vfs_follow_link(struct nameidata *, const char *);
 extern int page_readlink(struct dentry *, char *, int);
-extern int page_follow_link(struct dentry *, struct nameidata *);
+extern int page_prepare_follow_link(struct dentry *, struct nameidata *,
+				    const char **, struct page **);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern struct inode_operations page_symlink_inode_operations;
 extern void generic_fillattr(struct inode *, struct kstat *);
diff -r -u linux-2.5.21x/linux/include/linux/namei.h linux-2.5.21w/linux/include/linux/namei.h
--- linux-2.5.21x/linux/include/linux/namei.h	Sun Jun  9 07:28:48 2002
+++ linux-2.5.21w/linux/include/linux/namei.h	Sun Jun 16 22:03:21 2002
@@ -33,13 +33,16 @@
 #define LOOKUP_CONTINUE		 4
 #define LOOKUP_PARENT		16
 #define LOOKUP_NOALT		32
-
+#define LOOKUP_FOLLOW_DONE	64
+#define LOOKUP_KFREE_NEEDED	128
 
 extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
-#define user_path_walk(name,nd) \
-	__user_walk(name, LOOKUP_FOLLOW, nd)
 #define user_path_walk_link(name,nd) \
 	__user_walk(name, 0, nd)
+#define user_path_walk(name,nd) \
+	__user_walk(name, LOOKUP_FOLLOW, nd)
+#define user_path_walk_dir(name,nd) \
+	__user_walk(name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, nd)
 extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
 extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
 extern int FASTCALL(path_walk(const char *, struct nameidata *));
diff -r -u linux-2.5.21x/linux/kernel/ksyms.c linux-2.5.21w/linux/kernel/ksyms.c
--- linux-2.5.21x/linux/kernel/ksyms.c	Sun Jun  9 07:26:33 2002
+++ linux-2.5.21w/linux/kernel/ksyms.c	Sun Jun 16 22:03:21 2002
@@ -272,9 +272,8 @@
 EXPORT_SYMBOL(grab_cache_page_nowait);
 EXPORT_SYMBOL(read_cache_page);
 EXPORT_SYMBOL(vfs_readlink);
-EXPORT_SYMBOL(vfs_follow_link);
 EXPORT_SYMBOL(page_readlink);
-EXPORT_SYMBOL(page_follow_link);
+EXPORT_SYMBOL(page_prepare_follow_link);
 EXPORT_SYMBOL(page_symlink_inode_operations);
 EXPORT_SYMBOL(page_symlink);
 EXPORT_SYMBOL(vfs_readdir);
diff -r -u linux-2.5.21x/linux/mm/shmem.c linux-2.5.21w/linux/mm/shmem.c
--- linux-2.5.21x/linux/mm/shmem.c	Sun Jun  9 07:29:25 2002
+++ linux-2.5.21w/linux/mm/shmem.c	Sun Jun 16 22:03:21 2002
@@ -1205,9 +1205,12 @@
 	return vfs_readlink(dentry,buffer,buflen, (const char *)SHMEM_I(dentry->d_inode));
 }
 
-static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
+static int
+shmem_prepare_follow_link_inline(struct dentry *dentry, struct nameidata *nd,
+				 const char **llink, struct page **ppage)
 {
-	return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode));
+	*llink = (const char *) SHMEM_I(dentry->d_inode);
+	return 0;
 }
 
 static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
@@ -1224,28 +1227,26 @@
 	return res;
 }
 
-static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
+static int
+shmem_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
+			  const char **llink, struct page **ppage)
 {
-	struct page * page;
-	int res = shmem_getpage(dentry->d_inode, 0, &page);
+	int res = shmem_getpage(dentry->d_inode, 0, ppage);
 	if (res)
 		return res;
-
-	res = vfs_follow_link(nd, kmap(page));
-	kunmap(page);
-	page_cache_release(page);
-	return res;
+	*llink = kmap(*ppage);
+	return 0;
 }
 
 static struct inode_operations shmem_symlink_inline_operations = {
-	readlink:	shmem_readlink_inline,
-	follow_link:	shmem_follow_link_inline,
+	readlink:		shmem_readlink_inline,
+	prepare_follow_link:	shmem_prepare_follow_link_inline,
 };
 
 static struct inode_operations shmem_symlink_inode_operations = {
-	truncate:	shmem_truncate,
-	readlink:	shmem_readlink,
-	follow_link:	shmem_follow_link,
+	truncate:		shmem_truncate,
+	readlink:		shmem_readlink,
+	prepare_follow_link:	shmem_prepare_follow_link,
 };
 
 static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long * blocks, unsigned long *inodes)
diff -r -u linux-2.5.21x/linux/sound/core/info.c linux-2.5.21w/linux/sound/core/info.c
--- linux-2.5.21x/linux/sound/core/info.c	Sun Jun  9 07:29:22 2002
+++ linux-2.5.21w/linux/sound/core/info.c	Sun Jun 16 22:03:21 2002
@@ -560,53 +560,22 @@
 				  char *buffer, int buflen)
 {
         char *s = PDE(dentry->d_inode)->data;
-#ifndef LINUX_2_2
+
 	return vfs_readlink(dentry, buffer, buflen, s);
-#else
-	int len;
-	
-	if (s == NULL)
-		return -EIO;
-	len = strlen(s);
-	if (len > buflen)
-		len = buflen;
-	if (copy_to_user(buffer, s, len))
-		return -EFAULT;
-	return len;
-#endif
 }
 
-#ifndef LINUX_2_2
-static int snd_info_card_followlink(struct dentry *dentry,
-				    struct nameidata *nd)
-{
-        char *s = PDE(dentry->d_inode)->data;
-        return vfs_follow_link(nd, s);
-}
-#else
-static struct dentry *snd_info_card_followlink(struct dentry *dentry,
-					       struct dentry *base,
-					       unsigned int follow)
+static int
+snd_info_card_prepare_followlink(struct dentry *dentry, struct nameidata *nd,
+				 const char **llink, struct page **ppage)
 {
-	char *s = PDE(dentry->d_inode)->data;
-	return lookup_dentry(s, base, follow);
+        *llink = PDE(dentry->d_inode)->data;
+        return 0;
 }
-#endif
-
-#ifdef LINUX_2_2
-static struct file_operations snd_info_card_link_operations =
-{
-	NULL
-};
-#endif
 
 struct inode_operations snd_info_card_link_inode_operations =
 {
-#ifdef LINUX_2_2
-	default_file_ops:	&snd_info_card_link_operations,
-#endif
 	readlink:		snd_info_card_readlink,
-	follow_link:		snd_info_card_followlink,
+	prepare_follow_link:	snd_info_card_prepare_followlink,
 };
 
 struct proc_dir_entry *snd_create_proc_entry(const char *name, mode_t mode,


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-16 13:13 Andries.Brouwer
@ 2002-06-16 18:51 ` Alexander Viro
  0 siblings, 0 replies; 44+ messages in thread
From: Alexander Viro @ 2002-06-16 18:51 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: linux-kernel



On Sun, 16 Jun 2002 Andries.Brouwer@cwi.nl wrote:

>     That's simply not true.  Of the top of my head - /proc/self.
> 
> Ah, yes - that has the string on stack.
> That changes my inventory into: all callers except two use a page.
> One (jffs2) uses a kmalloced area. One (/proc/self) has its data
> on the stack.
> 
>     Look, it's getting ridiculous.
> 
> That is a counterproductive reply.
> You say "I tried to improve Linux three years ago and failed,
> so you can forget it - no improvement is possible".

No.  What I'm saying is "your handwaving is getting ridiculous".

> Let us try. The basic object is a struct link_work that has dentry,
> nameidata, link, page and flags and a pointer to the next such struct.
> This is a reverse linked list: the thing we work on is in front,
> the tail of the list is the thing we originally started to resolve.
> 
> The routine that does all this is
> 
> int do_link_work(struct link_work **lw) {
> 	int err = 0;
> 
> 	while((*lw)->link) {
> 		if (err == 0)
> 			err = do_link_item(lw);
> 		else
> 			do_bad_link_item(lw);
> 	}
> 	return err;
> }
>
> where do_link_item() has as duty to handle the next part
> of the pathname. That diminishes the work left, unless
> that next part was a symlink, in which case resolution
> of that is prepended to the list of work we have to do.
> If something is wrong, do_bad_link_item() only releases resources.
> 
> Looks like a nice and clean setup.

"The process is iterative, so there is a loop somewhere; let's put
whatever body it might have into a separate function - see, no ugliness
there". 

> Remains the question how to release the resources allocated
> by the filesystems in prepare_follow_link().

... and a couple of other questions, like, say it, how to deal with
handling of "alternative roots" (/usr/gnuemul/<foo>) or what will your
do_link_item() be.

> There are various possibilities. General ones: the filesystem
> provides a callback. Restricted ones: we ask the filesystem
> to have one page as the only resource. Kludgy ones: we allow
> some explicit short list, like page or kmalloc. Probably others.
>
> But maybe you are not interested in thinking about such things.
> You did it already.

Indeed I had.  Look, either you have a decent solution or not.  If you
want to try - by all means, go ahead, I'll be glad to see the current
situation improved.  Write a patch and post it for review - it's not
exactly the rocket science.  Obvious requirements:
	* if current code resolves a pathname, replacement should do the same.
	* if current code fails with something different from ELOOP - so
should the replacement (modulo things like ENOMEM, obviously)
	* replacement code should not penalize the lookups that do not
encounter nested symlinks at all.
	* resulting namei.c should not be uglier than it currently is
(and if some code migrates to new files - same for all these files combined).

Fair enough, I hope?  Keep in mind that pathname resolution is one of the
hotspots, so "should not penalize..." part is quite serious.  Write it
(starting at 2.5.<current> version) and post it for review.  Then there
will be something to talk about; right now you are making very vague
proposals of reorganization of code you hadn't read through and saying that
such reorganization can be done with decent results...


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
@ 2002-06-16 13:13 Andries.Brouwer
  2002-06-16 18:51 ` Alexander Viro
  0 siblings, 1 reply; 44+ messages in thread
From: Andries.Brouwer @ 2002-06-16 13:13 UTC (permalink / raw)
  To: Andries.Brouwer, viro; +Cc: linux-kernel

    >     First of all, _that_ is still recursive.  And it's not easy
    >     to deal with - you need to release the object holding the link body
    >     (BTW, that can be almost anything - page, inode, kmalloc'ed area,
    >     vmalloc'ed area, etc.) after __vfs_follow_link() is done.
    >     And that means (at the very least) a stack of such objects,
    >     along with the information about their nature.
    > 
    > Yes. But in the current tree only the cases page and kmalloc'ed area
    > occur, and it is easy to transform the single occurrence of kmalloc'ed
    > areas (jffs2) into a use of page.

    That's simply not true.  Of the top of my head - /proc/self.

Ah, yes - that has the string on stack.
That changes my inventory into: all callers except two use a page.
One (jffs2) uses a kmalloced area. One (/proc/self) has its data
on the stack.

    Look, it's getting ridiculous.

That is a counterproductive reply.
You say "I tried to improve Linux three years ago and failed,
so you can forget it - no improvement is possible".

Of course improvement is possible. It always is.

Now we find that it is possible to change recursive symlink handling
into iterative handling.  That is good, because the current limit of
5 is really a bit low. It bites every now and then.

Remains the question whether it can be done in a beautiful way.

Let us try. The basic object is a struct link_work that has dentry,
nameidata, link, page and flags and a pointer to the next such struct.
This is a reverse linked list: the thing we work on is in front,
the tail of the list is the thing we originally started to resolve.

The routine that does all this is

int do_link_work(struct link_work **lw) {
	int err = 0;

	while((*lw)->link) {
		if (err == 0)
			err = do_link_item(lw);
		else
			do_bad_link_item(lw);
	}
	return err;
}

where do_link_item() has as duty to handle the next part
of the pathname. That diminishes the work left, unless
that next part was a symlink, in which case resolution
of that is prepended to the list of work we have to do.
If something is wrong, do_bad_link_item() only releases resources.

Looks like a nice and clean setup.
Remains the question how to release the resources allocated
by the filesystems in prepare_follow_link().
There are various possibilities. General ones: the filesystem
provides a callback. Restricted ones: we ask the filesystem
to have one page as the only resource. Kludgy ones: we allow
some explicit short list, like page or kmalloc. Probably others.

But maybe you are not interested in thinking about such things.
You did it already.

Andries

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-16 10:56 Andries.Brouwer
@ 2002-06-16 11:38 ` Alexander Viro
  0 siblings, 0 replies; 44+ messages in thread
From: Alexander Viro @ 2002-06-16 11:38 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: linux-kernel



On Sun, 16 Jun 2002 Andries.Brouwer@cwi.nl wrote:

>     First of all, _that_ is still recursive.  And it's not easy to deal with -
>     you need to release the object holding the link body (BTW, that can
>     be almost anything - page, inode, kmalloc'ed area, vmalloc'ed area, etc.)
>     after __vfs_follow_link() is done.  And that means (at the very least)
>     a stack of such objects, along with the information about their nature.
> 
> Yes. But in the current tree only the cases page and kmalloc'ed area
> occur, and it is easy to transform the single occurrence of kmalloc'ed area
> (jffs2) into a use of page.

That's simply not true.  Of the top of my head - /proc/self.

Look, it's getting ridiculous - you've already got a method that sometimes
follows link, sometimes leaves a bunch of stuff for callers to handle
(BTW, we'll have to duplicate that fun in another caller of ->follow_link()
and that, thanks to usual POSIX elegance, is already butt-ugly).  You've
got (at the very least) 3 stacks of objects - dentry, link and page.
And that - saying "that happens to cover all current cases, except for one
but we can shoehorn it".  Aside of the fact that statement is false,
you are making a lot of assumptions about what's natural for code - both
out-of-tree and in-tree (see above).

And then you'll need to shove said stacks into struct nameidata.  Which
means either a static limit again or all sorts of fun with "allocate
externally if it gets bigger than...".

Face it, it _is_ ugly.  Been there, done that - if you want to repeat
the exercise, more power to you, but it won't be any prettier.

Sure thing, everything is equivalent to Turing Machine and with sufficient
amount of brute force and kludges everything that can be done at all can be
done in any given way.  In this particular case that amount will be
prohibitively painful.  You don't believe me - try it yourself and see how
gross it will get...


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
@ 2002-06-16 10:56 Andries.Brouwer
  2002-06-16 11:38 ` Alexander Viro
  0 siblings, 1 reply; 44+ messages in thread
From: Andries.Brouwer @ 2002-06-16 10:56 UTC (permalink / raw)
  To: Andries.Brouwer, viro; +Cc: linux-kernel

    >     > > Wrong.  That breaks for anything with ->follow_link() that
    >     > > can't be expressed as a single lookup on some path.
    >
    > in-tree systems give no problems; procfs is trivially handled:
    >
    >     err = dentry->d_inode->i_op->prepare_follow_link(dentry, nd, &link, &page);
    >     if (nd->flags & FOLLOW_DONE)
    >         nd->flags &= ~FOLLOW_DONE;
    >     else if (err = 0)
    >         err = __vfs_follow_link(nd, link);
    >     if (page) {
    >         kunmap(page);
    >         page_cache_release(page);
    >     }
    >     ...
    >     return err;
    > }
    > 
    > You must come with more serious objections before I believe your
    > claim that this doesn't work.

    First of all, _that_ is still recursive.  And it's not easy to deal with -
    you need to release the object holding the link body (BTW, that can
    be almost anything - page, inode, kmalloc'ed area, vmalloc'ed area, etc.)
    after __vfs_follow_link() is done.  And that means (at the very least)
    a stack of such objects, along with the information about their nature.

Yes. But in the current tree only the cases page and kmalloc'ed area
occur, and it is easy to transform the single occurrence of kmalloc'ed area
(jffs2) into a use of page.

    Which exposes a lot of information about the filesystem guts.

So, nothing about the filesystem guts is needed, the above code
says it all: when we are done, release page.

Anyway, you cleverly change the topic. First it was impossible and wrong.
Now it is ugly. I even maintain that the new code could be more
beautiful than the old code. And more compact.

The typical prepare_follow_link routine would be

int page_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
                             const char **llink, struct page **ppage)
{
        *llink = page_getlink(dentry, ppage);
        return 0;
}

Andries

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-16  9:59 Andries.Brouwer
@ 2002-06-16 10:33 ` Alexander Viro
  0 siblings, 0 replies; 44+ messages in thread
From: Alexander Viro @ 2002-06-16 10:33 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: linux-kernel



On Sun, 16 Jun 2002 Andries.Brouwer@cwi.nl wrote:

>     On Sun, 16 Jun 2002 Andries.Brouwer@cwi.nl wrote:
> 
>     > > Wrong.  That breaks for anything with ->follow_link() that
>     > > can't be expressed as a single lookup on some path.
>     > 
>     > I don't know what interesting out-of-tree filesystems you
>     > have in mind, but it looks like this would work for all
>     > systems in the current tree.
> 
>     Trivial counterexample: procfs.
> 
> That is trivially handled:
 
I've said "trivial counterexample" ;-)

> 	err = dentry->d_inode->i_op->prepare_follow_link(dentry, nd, &link, &page);
> 	if (nd->flags & FOLLOW_DONE)
> 		nd->flags &= ~FOLLOW_DONE;
> 	else if (err = 0)
> 		err = __vfs_follow_link(nd, link);
> 	if (page) {
> 		kunmap(page);
> 		page_cache_release(page);
> 	}
> 	...
> 	return err;
> }
> 
> You must come with more serious objections before I believe your
> claim that this doesn't work.

First of all, _that_ is still recursive.  And it's not easy to deal with -
you need to release the object holding the link body (BTW, that can
be almost anything - page, inode, kmalloc'ed area, vmalloc'ed area, etc.)
after __vfs_follow_link() is done.  And that means (at the very least)
a stack of such objects, along with the information about their nature.
Which exposes a lot of information about the filesystem guts - so much
that we are basically introducing a cookie created by your ->prepare_...()
and having a destructor of its own.

I'd implemented that variant about 3 years ago.  And it got vetoed by Linus -
for a very good reason.  It was too damn ugly, with more ugliness to come
as we add more filesystems.


>     Notice that your "permanent" data is not quite permanent - e.g.
>     information about partitioning can be lost (in all kernels since
>     2.0, if not earlier) at any moment when nobody has devices opened.
>     rmmod -a and there you go...
> 
>     IMO correct approach to such data is that it's generated on demand
>     (as it is right now - again, consider modular driver; then we don't
>     see _anything_ about devices until somebody attempts to open one
>     of them and triggers module loading)
> 
> Yes, when the disk is removed, or when the driver is removed
> it does not matter that information is lost.
> But it is very bad to generate a partition table on each open.
> Indeed, it is wrong.

D'oh.  Indeed it is wrong - not to mention anything else, if we keep it
over the lifetime of struct block_device, there is no possible reason
to recalculate it.  Sure thing, it's cached.  And stays cached until
we are either told that it needs to be reread (BLKRRPART or your ioctls)
or that device itself is gone or that we didn't have that device (or
any of its partitions) for a long time, all their page caches are evicted
from memory and memory pressure is so bad that we want to start dropping
ancient and long-unused struct block_device.

Normal case is that we read partition table once - on the first open().
Then it stays cached.
 
> User space can tell the kernel what the partitions are.

... And that's also cached.

> Opening a disk device must not destroy that information.

Sure - why would it?  If we already know where partition is - why would
we bother rereading that stuff?

> So some permanent info of the gendisk type is needed.

FSVO "permanent".  Basically, struct block_device of entire disk is
going to be a lot more long-living beast than it is right now...


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
@ 2002-06-16  9:59 Andries.Brouwer
  2002-06-16 10:33 ` Alexander Viro
  0 siblings, 1 reply; 44+ messages in thread
From: Andries.Brouwer @ 2002-06-16  9:59 UTC (permalink / raw)
  To: Andries.Brouwer, viro; +Cc: linux-kernel

    On Sun, 16 Jun 2002 Andries.Brouwer@cwi.nl wrote:

    > > Wrong.  That breaks for anything with ->follow_link() that
    > > can't be expressed as a single lookup on some path.
    > 
    > I don't know what interesting out-of-tree filesystems you
    > have in mind, but it looks like this would work for all
    > systems in the current tree.

    Trivial counterexample: procfs.

That is trivially handled:

static inline int
do_follow_link(struct dentry *dentry, struct nameidata *nd) {
	const char *link = NULL;
	struct page *page = NULL;
	int err;

	...
	err = dentry->d_inode->i_op->prepare_follow_link(dentry, nd, &link, &page);
	if (nd->flags & FOLLOW_DONE)
		nd->flags &= ~FOLLOW_DONE;
	else if (err = 0)
		err = __vfs_follow_link(nd, link);
	if (page) {
		kunmap(page);
		page_cache_release(page);
	}
	...
	return err;
}

You must come with more serious objections before I believe your
claim that this doesn't work.

----------     
    > You are replacing kdev_t by struct block device *, that I
    > consider as a refcounted reference to the device. Fine.
    > But kdev_t is created by the driver, at the moment the
    > device is detected, while struct block device * is created
    > at open() time. Thus, the question arises where you plan to
...
    > store permanent device data like size or sectorsize.

    Sector size is kept in the queue.  Disk size - in per-disk objects
    that are yet to be introduced in the tree.

Yes - it is these per-disk objects that I was referring to.
Recently I was polishing some SCSI code, and the next step
would have been unification with some IDE code that did precisely
the same things in an entirely different way. But in order to
remove this driver code I needed what I still think of as kdev_t
structs. So you will be introducing them after all. Good.

----------
    Notice that your "permanent" data is not quite permanent - e.g.
    information about partitioning can be lost (in all kernels since
    2.0, if not earlier) at any moment when nobody has devices opened.
    rmmod -a and there you go...

    IMO correct approach to such data is that it's generated on demand
    (as it is right now - again, consider modular driver; then we don't
    see _anything_ about devices until somebody attempts to open one
    of them and triggers module loading)

Yes, when the disk is removed, or when the driver is removed
it does not matter that information is lost.
But it is very bad to generate a partition table on each open.
Indeed, it is wrong.

User space can tell the kernel what the partitions are.
Opening a disk device must not destroy that information.
So some permanent info of the gendisk type is needed.

Andries

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-16  7:47 Andries.Brouwer
@ 2002-06-16  8:36 ` Alexander Viro
  0 siblings, 0 replies; 44+ messages in thread
From: Alexander Viro @ 2002-06-16  8:36 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: linux-kernel



On Sun, 16 Jun 2002 Andries.Brouwer@cwi.nl wrote:

> > Wrong.  That breaks for anything with ->follow_link() that
> > can't be expressed as a single lookup on some path.
> 
> I don't know what interesting out-of-tree filesystems you
> have in mind, but it looks like this would work for all
> systems in the current tree.

Trivial counterexample: procfs.
 
> You are replacing kdev_t by struct block device *, that I
> consider as a refcounted reference to the device. Fine.
> But kdev_t is created by the driver, at the moment the
> device is detected, while struct block device * is created
> at open() time. Thus, the question arises where you plan to

It's a bit trickier than that - on both sides (driver might
modular and then the only thing that can guarantee its presence
is opened device; OTOH, struct block_device can stay around longer
than it's opened)

> store permanent device data like size or sectorsize.

Sector size is kept in the queue.  Disk size - in per-disk objects
that are yet to be introduced in the tree.

Notice that your "permanent" data is not quite permanent - e.g.
information about partitioning can be lost (in all kernels since
2.0, if not earlier) at any moment when nobody has devices opened.
rmmod -a and there you go...

IMO correct approach to such data is that it's generated on demand
(as it is right now - again, consider modular driver; then we don't
see _anything_ about devices until somebody attempts to open one
of them and triggers module loading), cached at least until the things
are opened and forcibly invalidated by the driver when it goes away
or decide that it had lost the disk (e.g. rmmod of low-level SCSI
driver means that everything behind controllers of that type effectively
disappears).

IOW, we certainly should not forget everything upon the final close() -
instead we should leave invalidation to the driver or memory pressure.
And that includes the page cache of device - we need it to become
quiet on the final close so that no IO requests would be generated,
but we don't need it to disappear.

The next series of patches will make struct block_device keep the
information (->bd_op, page cache contents, etc.) past the final
close().  After that we can start moving the stuff in there without
breaking the warranties that exist in the current tree...


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
@ 2002-06-16  7:47 Andries.Brouwer
  2002-06-16  8:36 ` Alexander Viro
  0 siblings, 1 reply; 44+ messages in thread
From: Andries.Brouwer @ 2002-06-16  7:47 UTC (permalink / raw)
  To: Andries.Brouwer, viro; +Cc: linux-kernel

> Wrong.  That breaks for anything with ->follow_link() that
> can't be expressed as a single lookup on some path.

I don't know what interesting out-of-tree filesystems you
have in mind, but it looks like this would work for all
systems in the current tree.

> For fsck sake, give the folks here some credit

Funny to hear that from you.

Andries


P.S. Now that you exist again, let me repeat a question
from a few weeks ago. Many years ago I constructed kdev_t,
a pointer to a struct containing the things that lived in
the arrays and some useful functions like a function returning
the name. It was passed around as reference to the device.
You are replacing kdev_t by struct block device *, that I
consider as a refcounted reference to the device. Fine.
But kdev_t is created by the driver, at the moment the
device is detected, while struct block device * is created
at open() time. Thus, the question arises where you plan to
store permanent device data like size or sectorsize.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
  2002-06-16  0:48 Andries.Brouwer
@ 2002-06-16  1:08 ` Alexander Viro
  0 siblings, 0 replies; 44+ messages in thread
From: Alexander Viro @ 2002-06-16  1:08 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: linux-kernel



On Sun, 16 Jun 2002 Andries.Brouwer@cwi.nl wrote:

> As far as I can see, the cycle essentially is
> link_path_walk -> do_follow_link -> page_follow_link -> vfs_follow_link ->.

[snip the obvious strategy that doesn't work]
 
> Now symlink resolution can be entirely iterative.

Wrong.  That breaks for anything with ->follow_link() that can't be expressed
as a single lookup on some path.

For fsck sake, give the folks here some credit - strategy above is _not_
hard to think up and it had been discussed on l-k several times.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [CHECKER] 37 stack variables >= 1K in 2.4.17
@ 2002-06-16  0:48 Andries.Brouwer
  2002-06-16  1:08 ` Alexander Viro
  0 siblings, 1 reply; 44+ messages in thread
From: Andries.Brouwer @ 2002-06-16  0:48 UTC (permalink / raw)
  To: linux-kernel

> Not realistic - we have a recursion through the ->follow_link(), and
> a lot of stuff can be called from ->follow_link().  We _do_ have a
> limit on depth of recursion here, but it won't be fun to deal with.

It would not be impossibly difficult to remove the recursion altogether.

As far as I can see, the cycle essentially is
link_path_walk -> do_follow_link -> page_follow_link -> vfs_follow_link ->.

Now page_follow_link does very little after the vfs_follow_link call,
and the same holds for all other filesystem types.
Either foo_follow_link ends in
	return vfs_follow_link(..);
or it does
	err = vfs_follow_link(..);
	if (something_to_free)
		free_it;
	return err;

With a little bit of polishing we could cut off all foo_follow_link
routines just before the vfs_follow_link call and make foo_follow_link
return the arguments it was going to call vfs_follow_link with.

Then of course the something_to_free must be freed by the caller.
What is it?

page_follow_link and nfs_follow_link have

        if (page) {
                kunmap(page);
                page_cache_release(page);
        }

jffs2_follow_link has

	kfree(buf);

If that is done, there is no stack frame for foo_follow_link,
and in principle the three of link_path_walk, do_follow_link,
vfs_follow_link could be single routine. Today do_follow_link
is already inline.

Now symlink resolution can be entirely iterative.
The routine that does this works on a linked list of things
still to consider, and always works on the tail of the list.

Quite a lot of restructuring would be required to produce
a readable source in this style, but it doesn't look too difficult.
The result might even be more efficient.

Andries

^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2002-06-17 12:00 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-06-10  3:56 [CHECKER] 37 stack variables >= 1K in 2.4.17 Dawson Engler
2002-06-12  8:43 ` Pavel Machek
2002-06-12 19:11   ` Nikita Danilov
2002-06-12 21:51 ` Benjamin LaHaise
2002-06-12 22:26   ` Alexander Viro
2002-06-12 22:38     ` Benjamin LaHaise
2002-06-12 22:44       ` Robert Love
2002-06-12 22:55         ` procfs documentation Tom Bradley
2002-06-13 11:17           ` John Levon
2002-06-13  0:20       ` [CHECKER] 37 stack variables >= 1K in 2.4.17 Alexander Viro
2002-06-13  8:30         ` Helge Hafting
2002-06-13 13:24           ` Roger Larsson
2002-06-14 10:06             ` Helge Hafting
2002-06-13  6:38     ` Dawson Engler
2002-06-13  6:59       ` Alexander Viro
2002-06-13 17:41         ` Daniel Phillips
2002-06-13 17:53           ` Alexander Viro
2002-06-13 18:45             ` Daniel Phillips
2002-06-13 17:56         ` Andi Kleen
2002-06-13 18:26           ` Alexander Viro
2002-06-13 19:01             ` Andi Kleen
2002-06-14  0:05               ` William Lee Irwin III
2002-06-13 21:50             ` Dawson Engler
2002-06-13 22:43               ` Oliver Xymoron
2002-06-14  0:25               ` Alexander Viro
2002-06-13  6:36   ` Dawson Engler
2002-06-16  0:48 Andries.Brouwer
2002-06-16  1:08 ` Alexander Viro
2002-06-16  7:47 Andries.Brouwer
2002-06-16  8:36 ` Alexander Viro
2002-06-16  9:59 Andries.Brouwer
2002-06-16 10:33 ` Alexander Viro
2002-06-16 10:56 Andries.Brouwer
2002-06-16 11:38 ` Alexander Viro
2002-06-16 13:13 Andries.Brouwer
2002-06-16 18:51 ` Alexander Viro
2002-06-16 20:41 Andries.Brouwer
2002-06-16 21:33 ` Andreas Dilger
2002-06-16 21:34 ` Alexander Viro
2002-06-17 10:09 ` David Woodhouse
2002-06-16 22:05 Andries.Brouwer
2002-06-16 23:57 ` Alexander Viro
2002-06-17 11:07 Andries.Brouwer
2002-06-17 12:00 ` David Woodhouse

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.