linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH/RFC] New module refcounting for net_proto_family
@ 2002-12-26  8:11 Max Krasnyansky
  2003-01-02 11:43 ` Max Krasnyansky
  2003-01-07  9:21 ` David S. Miller
  0 siblings, 2 replies; 40+ messages in thread
From: Max Krasnyansky @ 2002-12-26  8:11 UTC (permalink / raw)
  To: davem; +Cc: linux-kernel

> Bunch of problems with this patch:
>
> 1) Module leak.  If try_module_get(npf->owner) works but sock_alloc()
>    fails, we never put the module.  It just branches to the "out"
>    label in that case, which unlocks the net_family table and returns
>    err.
Yeah, I missed that one. Fixed in the new patch.

> 2) Bigger issue, why not attach the owner to struct sock
>    instead of socket?  The sock can exist, and thus reference
>    the protocol family code, long after the socket (ie. the
>    user end) is killed off and closed.
>
>    For example, this could happen for just about any protocol family
>    due to stray device sk_buff references to the sock and thus the
>    protocol family.
Good point. Alghough generic socket code does not necessarily reference 
proto family module via struct sock. Only in case when family installed 
non default callbacks (sk->dataready, sk->destruct, etc). Some families
(af_ipx for example) don't. But I think it's a good idea to refcount 
struct sock anyway. 

Ok. Here is the new patch. 
We still need owner field in struct socket.

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	1.889   -> 1.890  
#	        net/socket.c	1.39    -> 1.40   
#	 include/linux/net.h	1.7     -> 1.8    
#	  include/net/sock.h	1.29    -> 1.30   
#	     net/core/sock.c	1.14    -> 1.15   
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/12/25	maxk@qualcomm.com	1.890
# Convert generic socket code to the new module refcounting API.
# --------------------------------------------
#
diff -Nru a/include/linux/net.h b/include/linux/net.h
--- a/include/linux/net.h	Wed Dec 25 23:29:28 2002
+++ b/include/linux/net.h	Wed Dec 25 23:29:28 2002
@@ -76,6 +76,8 @@
 
 	short			type;
 	unsigned char		passcred;
+
+	struct module           *owner;
 };
 
 struct scm_cookie;
@@ -124,6 +126,8 @@
 	short	authentication;
 	short	encryption;
 	short	encrypt_net;
+
+	struct  module *owner;
 };
 
 struct net_proto 
diff -Nru a/include/net/sock.h b/include/net/sock.h
--- a/include/net/sock.h	Wed Dec 25 23:29:28 2002
+++ b/include/net/sock.h	Wed Dec 25 23:29:28 2002
@@ -41,6 +41,7 @@
 #include <linux/config.h>
 #include <linux/timer.h>
 #include <linux/cache.h>
+#include <linux/module.h>
 
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>	/* struct sk_buff */
@@ -196,7 +197,9 @@
 
 	/* RPC layer private data */
 	void			*user_data;
-  
+
+	struct module		*owner;
+	
 	/* Callbacks */
 	void			(*state_change)(struct sock *sk);
 	void			(*data_ready)(struct sock *sk,int bytes);
@@ -577,6 +580,9 @@
 
 static inline void sock_graft(struct sock *sk, struct socket *parent)
 {
+	try_module_get(parent->owner);
+	sk->owner = parent->owner;
+
 	write_lock_bh(&sk->callback_lock);
 	sk->sleep = &parent->wait;
 	parent->sk = sk;
diff -Nru a/net/core/sock.c b/net/core/sock.c
--- a/net/core/sock.c	Wed Dec 25 23:29:28 2002
+++ b/net/core/sock.c	Wed Dec 25 23:29:28 2002
@@ -601,6 +601,7 @@
 			sock_lock_init(sk);
 		}
 		sk->slab = slab;
+		sk->owner = NULL;
 	}
 
 	return sk;
@@ -626,6 +627,8 @@
 	if (atomic_read(&sk->omem_alloc))
 		printk(KERN_DEBUG "sk_free: optmem leakage (%d bytes) detected.\n", atomic_read(&sk->omem_alloc));
 
+	module_put(sk->owner);
+	
 	kmem_cache_free(sk->slab, sk);
 }
 
@@ -1084,10 +1087,10 @@
 	sk->zapped	=	1;
 	sk->socket	=	sock;
 
-	if(sock)
-	{
+	if (sock) {
 		sk->type	=	sock->type;
 		sk->sleep	=	&sock->wait;
+		sk->owner       =       sock->owner;
 		sock->sk	=	sk;
 	} else
 		sk->sleep	=	NULL;
diff -Nru a/net/socket.c b/net/socket.c
--- a/net/socket.c	Wed Dec 25 23:29:28 2002
+++ b/net/socket.c	Wed Dec 25 23:29:28 2002
@@ -470,6 +470,8 @@
 
 	sock = SOCKET_I(inode);
 
+	sock->owner = NULL;
+	
 	inode->i_mode = S_IFSOCK|S_IRWXUGO;
 	inode->i_sock = 1;
 	inode->i_uid = current->fsuid;
@@ -964,8 +966,9 @@
 
 int sock_create(int family, int type, int protocol, struct socket **res)
 {
-	int i;
+	struct net_proto_family *npf;
 	struct socket *sock;
+	int err;
 
 	/*
 	 *	Check protocol is in range
@@ -990,14 +993,8 @@
 	}
 		
 #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
-	/* Attempt to load a protocol module if the find failed. 
-	 * 
-	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
-	 * requested real, full-featured networking support upon configuration.
-	 * Otherwise module support will break!
-	 */
-	if (net_families[family]==NULL)
-	{
+	/* Attempt to load a protocol module if the find failed. */
+	if (net_families[family]==NULL) {
 		char module_name[30];
 		sprintf(module_name,"net-pf-%d",family);
 		request_module(module_name);
@@ -1005,29 +1002,31 @@
 #endif
 
 	net_family_read_lock();
-	if (net_families[family] == NULL) {
-		i = -EAFNOSUPPORT;
-		goto out;
-	}
 
-/*
- *	Allocate the socket and allow the family to set things up. if
- *	the protocol is 0, the family is instructed to select an appropriate
- *	default.
- */
+	npf = net_families[family];
+	if (!npf || !try_module_get(npf->owner)) {
+		net_family_read_unlock();
+		return -EAFNOSUPPORT;
+	}
+	
+	/*
+	 * Allocate the socket and allow the family to set things up. if
+	 * the protocol is 0, the family is instructed to select an appropriate
+	 * default.
+ 	 */
 
-	if (!(sock = sock_alloc())) 
-	{
+	sock = sock_alloc();
+	if (!sock) {
 		printk(KERN_WARNING "socket: no more sockets\n");
-		i = -ENFILE;		/* Not exactly a match, but its the
+		err = -ENFILE;		/* Not exactly a match, but its the
 					   closest posix thing */
 		goto out;
 	}
 
 	sock->type  = type;
+	sock->owner = npf->owner;
 
-	if ((i = net_families[family]->create(sock, protocol)) < 0) 
-	{
+	if ((err = npf->create(sock, protocol)) < 0) {
 		sock_release(sock);
 		goto out;
 	}
@@ -1036,7 +1035,9 @@
 
 out:
 	net_family_read_unlock();
-	return i;
+	if (err)
+		module_put(npf->owner);
+	return err;
 }
 
 asmlinkage long sys_socket(int family, int type, int protocol)
@@ -1198,9 +1199,10 @@
 	if (!(newsock = sock_alloc())) 
 		goto out_put;
 
-	newsock->type = sock->type;
-	newsock->ops = sock->ops;
-
+	newsock->type  = sock->type;
+	newsock->ops   = sock->ops;
+	newsock->owner = sock->owner;
+	
 	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
 	if (err < 0)
 		goto out_release;

--

Max



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2002-12-26  8:11 [PATCH/RFC] New module refcounting for net_proto_family Max Krasnyansky
@ 2003-01-02 11:43 ` Max Krasnyansky
  2003-01-03  8:24   ` David S. Miller
  2003-01-20  3:22   ` Max Krasnyansky
  2003-01-07  9:21 ` David S. Miller
  1 sibling, 2 replies; 40+ messages in thread
From: Max Krasnyansky @ 2003-01-02 11:43 UTC (permalink / raw)
  To: davem; +Cc: linux-kernel

On Thu, 26 Dec 2002, Max Krasnyansky wrote:

Dave, 

Did you have a chance to look at the new patch ?

Max

> > Bunch of problems with this patch:
> >
> > 1) Module leak.  If try_module_get(npf->owner) works but sock_alloc()
> >    fails, we never put the module.  It just branches to the "out"
> >    label in that case, which unlocks the net_family table and returns
> >    err.
> Yeah, I missed that one. Fixed in the new patch.
> 
> > 2) Bigger issue, why not attach the owner to struct sock
> >    instead of socket?  The sock can exist, and thus reference
> >    the protocol family code, long after the socket (ie. the
> >    user end) is killed off and closed.
> >
> >    For example, this could happen for just about any protocol family
> >    due to stray device sk_buff references to the sock and thus the
> >    protocol family.
> Good point. Alghough generic socket code does not necessarily reference 
> proto family module via struct sock. Only in case when family installed 
> non default callbacks (sk->dataready, sk->destruct, etc). Some families
> (af_ipx for example) don't. But I think it's a good idea to refcount 
> struct sock anyway. 
> 
> Ok. Here is the new patch. 
> We still need owner field in struct socket.
> 
> # This is a BitKeeper generated patch for the following project:
> # Project Name: Linux kernel tree
> # This patch format is intended for GNU patch command version 2.5 or higher.
> # This patch includes the following deltas:
> #	           ChangeSet	1.889   -> 1.890  
> #	        net/socket.c	1.39    -> 1.40   
> #	 include/linux/net.h	1.7     -> 1.8    
> #	  include/net/sock.h	1.29    -> 1.30   
> #	     net/core/sock.c	1.14    -> 1.15   
> #
> # The following is the BitKeeper ChangeSet Log
> # --------------------------------------------
> # 02/12/25	maxk@qualcomm.com	1.890
> # Convert generic socket code to the new module refcounting API.
> # --------------------------------------------
> #
> diff -Nru a/include/linux/net.h b/include/linux/net.h
> --- a/include/linux/net.h	Wed Dec 25 23:29:28 2002
> +++ b/include/linux/net.h	Wed Dec 25 23:29:28 2002
> @@ -76,6 +76,8 @@
>  
>  	short			type;
>  	unsigned char		passcred;
> +
> +	struct module           *owner;
>  };
>  
>  struct scm_cookie;
> @@ -124,6 +126,8 @@
>  	short	authentication;
>  	short	encryption;
>  	short	encrypt_net;
> +
> +	struct  module *owner;
>  };
>  
>  struct net_proto 
> diff -Nru a/include/net/sock.h b/include/net/sock.h
> --- a/include/net/sock.h	Wed Dec 25 23:29:28 2002
> +++ b/include/net/sock.h	Wed Dec 25 23:29:28 2002
> @@ -41,6 +41,7 @@
>  #include <linux/config.h>
>  #include <linux/timer.h>
>  #include <linux/cache.h>
> +#include <linux/module.h>
>  
>  #include <linux/netdevice.h>
>  #include <linux/skbuff.h>	/* struct sk_buff */
> @@ -196,7 +197,9 @@
>  
>  	/* RPC layer private data */
>  	void			*user_data;
> -  
> +
> +	struct module		*owner;
> +	
>  	/* Callbacks */
>  	void			(*state_change)(struct sock *sk);
>  	void			(*data_ready)(struct sock *sk,int bytes);
> @@ -577,6 +580,9 @@
>  
>  static inline void sock_graft(struct sock *sk, struct socket *parent)
>  {
> +	try_module_get(parent->owner);
> +	sk->owner = parent->owner;
> +
>  	write_lock_bh(&sk->callback_lock);
>  	sk->sleep = &parent->wait;
>  	parent->sk = sk;
> diff -Nru a/net/core/sock.c b/net/core/sock.c
> --- a/net/core/sock.c	Wed Dec 25 23:29:28 2002
> +++ b/net/core/sock.c	Wed Dec 25 23:29:28 2002
> @@ -601,6 +601,7 @@
>  			sock_lock_init(sk);
>  		}
>  		sk->slab = slab;
> +		sk->owner = NULL;
>  	}
>  
>  	return sk;
> @@ -626,6 +627,8 @@
>  	if (atomic_read(&sk->omem_alloc))
>  		printk(KERN_DEBUG "sk_free: optmem leakage (%d bytes) detected.\n", atomic_read(&sk->omem_alloc));
>  
> +	module_put(sk->owner);
> +	
>  	kmem_cache_free(sk->slab, sk);
>  }
>  
> @@ -1084,10 +1087,10 @@
>  	sk->zapped	=	1;
>  	sk->socket	=	sock;
>  
> -	if(sock)
> -	{
> +	if (sock) {
>  		sk->type	=	sock->type;
>  		sk->sleep	=	&sock->wait;
> +		sk->owner       =       sock->owner;
>  		sock->sk	=	sk;
>  	} else
>  		sk->sleep	=	NULL;
> diff -Nru a/net/socket.c b/net/socket.c
> --- a/net/socket.c	Wed Dec 25 23:29:28 2002
> +++ b/net/socket.c	Wed Dec 25 23:29:28 2002
> @@ -470,6 +470,8 @@
>  
>  	sock = SOCKET_I(inode);
>  
> +	sock->owner = NULL;
> +	
>  	inode->i_mode = S_IFSOCK|S_IRWXUGO;
>  	inode->i_sock = 1;
>  	inode->i_uid = current->fsuid;
> @@ -964,8 +966,9 @@
>  
>  int sock_create(int family, int type, int protocol, struct socket **res)
>  {
> -	int i;
> +	struct net_proto_family *npf;
>  	struct socket *sock;
> +	int err;
>  
>  	/*
>  	 *	Check protocol is in range
> @@ -990,14 +993,8 @@
>  	}
>  		
>  #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
> -	/* Attempt to load a protocol module if the find failed. 
> -	 * 
> -	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
> -	 * requested real, full-featured networking support upon configuration.
> -	 * Otherwise module support will break!
> -	 */
> -	if (net_families[family]==NULL)
> -	{
> +	/* Attempt to load a protocol module if the find failed. */
> +	if (net_families[family]==NULL) {
>  		char module_name[30];
>  		sprintf(module_name,"net-pf-%d",family);
>  		request_module(module_name);
> @@ -1005,29 +1002,31 @@
>  #endif
>  
>  	net_family_read_lock();
> -	if (net_families[family] == NULL) {
> -		i = -EAFNOSUPPORT;
> -		goto out;
> -	}
>  
> -/*
> - *	Allocate the socket and allow the family to set things up. if
> - *	the protocol is 0, the family is instructed to select an appropriate
> - *	default.
> - */
> +	npf = net_families[family];
> +	if (!npf || !try_module_get(npf->owner)) {
> +		net_family_read_unlock();
> +		return -EAFNOSUPPORT;
> +	}
> +	
> +	/*
> +	 * Allocate the socket and allow the family to set things up. if
> +	 * the protocol is 0, the family is instructed to select an appropriate
> +	 * default.
> + 	 */
>  
> -	if (!(sock = sock_alloc())) 
> -	{
> +	sock = sock_alloc();
> +	if (!sock) {
>  		printk(KERN_WARNING "socket: no more sockets\n");
> -		i = -ENFILE;		/* Not exactly a match, but its the
> +		err = -ENFILE;		/* Not exactly a match, but its the
>  					   closest posix thing */
>  		goto out;
>  	}
>  
>  	sock->type  = type;
> +	sock->owner = npf->owner;
>  
> -	if ((i = net_families[family]->create(sock, protocol)) < 0) 
> -	{
> +	if ((err = npf->create(sock, protocol)) < 0) {
>  		sock_release(sock);
>  		goto out;
>  	}
> @@ -1036,7 +1035,9 @@
>  
>  out:
>  	net_family_read_unlock();
> -	return i;
> +	if (err)
> +		module_put(npf->owner);
> +	return err;
>  }
>  
>  asmlinkage long sys_socket(int family, int type, int protocol)
> @@ -1198,9 +1199,10 @@
>  	if (!(newsock = sock_alloc())) 
>  		goto out_put;
>  
> -	newsock->type = sock->type;
> -	newsock->ops = sock->ops;
> -
> +	newsock->type  = sock->type;
> +	newsock->ops   = sock->ops;
> +	newsock->owner = sock->owner;
> +	
>  	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
>  	if (err < 0)
>  		goto out_release;
> 
> --
> 
> Max
> 
> 
> 


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-01-02 11:43 ` Max Krasnyansky
@ 2003-01-03  8:24   ` David S. Miller
  2003-01-20  3:22   ` Max Krasnyansky
  1 sibling, 0 replies; 40+ messages in thread
From: David S. Miller @ 2003-01-03  8:24 UTC (permalink / raw)
  To: Max Krasnyansky; +Cc: linux-kernel

On Thu, 2003-01-02 at 03:43, Max Krasnyansky wrote:
> Did you have a chance to look at the new patch ?

No, I'm still working on my backlog.



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2002-12-26  8:11 [PATCH/RFC] New module refcounting for net_proto_family Max Krasnyansky
  2003-01-02 11:43 ` Max Krasnyansky
@ 2003-01-07  9:21 ` David S. Miller
  2003-01-09 20:45   ` Max Krasnyansky
  1 sibling, 1 reply; 40+ messages in thread
From: David S. Miller @ 2003-01-07  9:21 UTC (permalink / raw)
  To: maxk; +Cc: linux-kernel


Change is buggy, there are many places that sk_alloc() but don't use
sock_init_data().  net/ipv4/tcp_minisocks.c is one of many such
places.

So your patch will get the refcounting wrong in this common case.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-01-07  9:21 ` David S. Miller
@ 2003-01-09 20:45   ` Max Krasnyansky
  2003-01-09 23:53     ` David S. Miller
  0 siblings, 1 reply; 40+ messages in thread
From: Max Krasnyansky @ 2003-01-09 20:45 UTC (permalink / raw)
  To: David S. Miller; +Cc: linux-kernel

At 01:21 AM 1/7/2003 -0800, David S. Miller wrote:

>Change is buggy, there are many places that sk_alloc() but don't use
>sock_init_data().  net/ipv4/tcp_minisocks.c is one of many such
>places.
Those guys will have to bump mod refcount themselves then.
sock_init_data() and sock_graft() have access to ->owner field but sk_alloc()
doesn't. So we either have to change sk_alloc() API or make call to
sock_init_data()/sock_graft() a must. Any other suggestions ?

Max


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-01-09 20:45   ` Max Krasnyansky
@ 2003-01-09 23:53     ` David S. Miller
  0 siblings, 0 replies; 40+ messages in thread
From: David S. Miller @ 2003-01-09 23:53 UTC (permalink / raw)
  To: maxk; +Cc: linux-kernel

   From: Max Krasnyansky <maxk@qualcomm.com>
   Date: Thu, 09 Jan 2003 12:45:37 -0800

   Those guys will have to bump mod refcount themselves then.
   sock_init_data() and sock_graft() have access to ->owner field but sk_alloc()
   doesn't. So we either have to change sk_alloc() API or make call to
   sock_init_data()/sock_graft() a must. Any other suggestions ?

This isn't rocket science, just make a new sock_foo() interface
that merely does the module owner setup.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-01-02 11:43 ` Max Krasnyansky
  2003-01-03  8:24   ` David S. Miller
@ 2003-01-20  3:22   ` Max Krasnyansky
  2003-01-21 11:03     ` David S. Miller
                       ` (2 more replies)
  1 sibling, 3 replies; 40+ messages in thread
From: Max Krasnyansky @ 2003-01-20  3:22 UTC (permalink / raw)
  To: davem; +Cc: linux-kernel


> This isn't rocket science, just make a new sock_foo() interface
> that merely does the module owner setup.

Ok. I really think that we should do module refcounting in the struct 
socket (sock) and struct sock (sk) separately. The only reason to do 
module refcounting in sk is if protocol changes default callbacks 
(i.e. sk->data_ready, etc). Many protocols don't and therefor there is 
no need to keep track of those modules.
So here is new patch.
	socket->owner protects socket->ops
	sk->owner     protects sk callbacks

Protocols that change default callbacks will have to do something like

	sk_set_owner(sk, THIS_MODULE);

	sk->data_read = xxx_data_ready;
	...

Module will be released when sk is destroyed.

Max

===== include/linux/net.h 1.7 vs edited =====
--- 1.7/include/linux/net.h	Tue Oct 15 16:08:01 2002
+++ edited/include/linux/net.h	Sat Jan 18 15:57:01 2003
@@ -76,6 +76,8 @@
 
 	short			type;
 	unsigned char		passcred;
+
+	struct module           *owner;
 };
 
 struct scm_cookie;
@@ -124,6 +126,8 @@
 	short	authentication;
 	short	encryption;
 	short	encrypt_net;
+
+	struct  module *owner;
 };
 
 struct net_proto 
===== include/net/sock.h 1.29 vs edited =====
--- 1.29/include/net/sock.h	Sun Nov 17 04:58:01 2002
+++ edited/include/net/sock.h	Sat Jan 18 18:09:09 2003
@@ -41,6 +41,7 @@
 #include <linux/config.h>
 #include <linux/timer.h>
 #include <linux/cache.h>
+#include <linux/module.h>
 
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>	/* struct sk_buff */
@@ -196,7 +197,9 @@
 
 	/* RPC layer private data */
 	void			*user_data;
-  
+
+	struct module		*owner;
+	
 	/* Callbacks */
 	void			(*state_change)(struct sock *sk);
 	void			(*data_ready)(struct sock *sk,int bytes);
@@ -685,6 +688,19 @@
 	return dst;
 }
 
+/*
+ * Set sk owner.
+ * Must be called by protocols that change sk callbacks.
+ * Owner module will be released when sk is destroyed.
+ */
+
+static inline int sk_set_owner(struct sock *sk, struct module *owner)
+{
+	if (!try_module_get(owner))
+		return -EINVAL;
+	sk->owner = owner;
+	return 0;
+}
 
 /*
  * 	Queue a received datagram if it will fit. Stream and sequenced
===== net/socket.c 1.41 vs edited =====
--- 1.41/net/socket.c	Tue Jan  7 02:17:34 2003
+++ edited/net/socket.c	Sat Jan 18 18:27:39 2003
@@ -466,6 +466,8 @@
 
 	sock = SOCKET_I(inode);
 
+	sock->owner = NULL;
+	
 	inode->i_mode = S_IFSOCK|S_IRWXUGO;
 	inode->i_sock = 1;
 	inode->i_uid = current->fsuid;
@@ -515,6 +517,8 @@
 		return;
 	}
 	sock->file=NULL;
+
+	module_put(sock->owner);
 }
 
 static int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, int size)
@@ -962,8 +966,9 @@
 
 int sock_create(int family, int type, int protocol, struct socket **res)
 {
-	int i;
+	struct net_proto_family *npf;
 	struct socket *sock;
+	int err;
 
 	/*
 	 *	Check protocol is in range
@@ -988,14 +993,8 @@
 	}
 		
 #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
-	/* Attempt to load a protocol module if the find failed. 
-	 * 
-	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
-	 * requested real, full-featured networking support upon configuration.
-	 * Otherwise module support will break!
-	 */
-	if (net_families[family]==NULL)
-	{
+	/* Attempt to load a protocol module if the find failed. */
+	if (net_families[family]==NULL) {
 		char module_name[30];
 		sprintf(module_name,"net-pf-%d",family);
 		request_module(module_name);
@@ -1003,29 +1002,32 @@
 #endif
 
 	net_family_read_lock();
-	if (net_families[family] == NULL) {
-		i = -EAFNOSUPPORT;
-		goto out;
-	}
 
-/*
- *	Allocate the socket and allow the family to set things up. if
- *	the protocol is 0, the family is instructed to select an appropriate
- *	default.
- */
+	npf = net_families[family];
+	if (!npf || !try_module_get(npf->owner)) {
+		net_family_read_unlock();
+		return -EAFNOSUPPORT;
+	}
+	
+	/*
+	 * Allocate the socket and allow the family to set things up. if
+	 * the protocol is 0, the family is instructed to select an appropriate
+	 * default.
+ 	 */
 
-	if (!(sock = sock_alloc())) 
-	{
+	sock = sock_alloc();
+	if (!sock) {
 		printk(KERN_WARNING "socket: no more sockets\n");
-		i = -ENFILE;		/* Not exactly a match, but its the
+		err = -ENFILE;		/* Not exactly a match, but its the
 					   closest posix thing */
+		module_put(npf->owner);
 		goto out;
 	}
 
 	sock->type  = type;
+	sock->owner = npf->owner;
 
-	if ((i = net_families[family]->create(sock, protocol)) < 0) 
-	{
+	if ((err = npf->create(sock, protocol)) < 0) {
 		sock_release(sock);
 		goto out;
 	}
@@ -1034,7 +1036,7 @@
 
 out:
 	net_family_read_unlock();
-	return i;
+	return err;
 }
 
 asmlinkage long sys_socket(int family, int type, int protocol)
@@ -1196,9 +1198,13 @@
 	if (!(newsock = sock_alloc())) 
 		goto out_put;
 
-	newsock->type = sock->type;
-	newsock->ops = sock->ops;
+	newsock->type  = sock->type;
+	newsock->ops   = sock->ops;
+	newsock->owner = sock->owner;
 
+	try_module_get(sock->owner);
+	newsock->owner = sock->owner;
+	
 	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
 	if (err < 0)
 		goto out_release;
===== net/core/sock.c 1.14 vs edited =====
--- 1.14/net/core/sock.c	Fri Oct 18 17:45:16 2002
+++ edited/net/core/sock.c	Sat Jan 18 17:48:42 2003
@@ -601,6 +601,7 @@
 			sock_lock_init(sk);
 		}
 		sk->slab = slab;
+		sk->owner = NULL;
 	}
 
 	return sk;
@@ -626,6 +627,8 @@
 	if (atomic_read(&sk->omem_alloc))
 		printk(KERN_DEBUG "sk_free: optmem leakage (%d bytes) detected.\n", atomic_read(&sk->omem_alloc));
 
+	module_put(sk->owner);
+	
 	kmem_cache_free(sk->slab, sk);
 }
 
@@ -1084,8 +1087,7 @@
 	sk->zapped	=	1;
 	sk->socket	=	sock;
 
-	if(sock)
-	{
+	if (sock) {
 		sk->type	=	sock->type;
 		sk->sleep	=	&sock->wait;
 		sock->sk	=	sk;
-- 







^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-01-20  3:22   ` Max Krasnyansky
@ 2003-01-21 11:03     ` David S. Miller
  2003-01-21 19:42       ` Max Krasnyansky
  2003-02-07  9:48     ` David S. Miller
  2003-02-18  3:46     ` David S. Miller
  2 siblings, 1 reply; 40+ messages in thread
From: David S. Miller @ 2003-01-21 11:03 UTC (permalink / raw)
  To: maxk; +Cc: linux-kernel

   From: Max Krasnyansky <maxk@qualcomm.com>
   Date: Sun, 19 Jan 2003 19:22:44 -0800 (PST)
   
   The only reason to do module refcounting in sk is if protocol
   changes default callbacks  (i.e. sk->data_ready, etc).

What about the reference to skb->sk?  Are you sure there are
no cases where:

1) skb->sk will be non-NULL
2) default callbacks are used

?

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-01-21 19:42       ` Max Krasnyansky
@ 2003-01-21 19:36         ` David S. Miller
  0 siblings, 0 replies; 40+ messages in thread
From: David S. Miller @ 2003-01-21 19:36 UTC (permalink / raw)
  To: maxk; +Cc: linux-kernel

   From: Max Krasnyansky <maxk@qualcomm.com>
   Date: Tue, 21 Jan 2003 11:42:41 -0800

   Here is a little summary about families that I looked at:

Thanks for doing all the studying.  I'll review this some more as the
weekend approaches.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-01-21 11:03     ` David S. Miller
@ 2003-01-21 19:42       ` Max Krasnyansky
  2003-01-21 19:36         ` David S. Miller
  0 siblings, 1 reply; 40+ messages in thread
From: Max Krasnyansky @ 2003-01-21 19:42 UTC (permalink / raw)
  To: David S. Miller; +Cc: linux-kernel

At 03:03 AM 1/21/2003 -0800, David S. Miller wrote:
>   From: Max Krasnyansky <maxk@qualcomm.com>
>   Date: Sun, 19 Jan 2003 19:22:44 -0800 (PST)
>   
>   The only reason to do module refcounting in sk is if protocol
>   changes default callbacks  (i.e. sk->data_ready, etc).
>
>What about the reference to skb->sk?  Are you sure there are
>no cases where:
>
>1) skb->sk will be non-NULL
>2) default callbacks are used

In that case we don't care whether the module is still loaded or not.
Here is the list of sk callbacks
        void                    (*state_change)(struct sock *sk);
        void                    (*data_ready)(struct sock *sk, int bytes);
        void                    (*write_space)(struct sock *sk);
        void                    (*error_report)(struct sock *sk);
        int                     (*backlog_rcv) (struct sock *sk, struct sk_buff *skb);  
        void                    (*destruct)(struct sock *sk);
Net core doesn't make any other calls into the protocol module.

In any case if protocol module wants, for some reason, to be around until sk is destroyed
it can call sk_set_owner() right after sk_alloc(), even if it uses default callbacks.

While looking at the net families that we support I realized that there is one more reason
why protocol may have to call sk_set_owner(). Is if protocol uses private slab cache for its 
sks. Otherwise cache will be destroyed during module unloading but sk may still be alive.

Here is a little summary about families that I looked at:
        Netlink - Global sk cache, replaces sk->destruct() callback.
                  Must call sk_set_owner()

        Unix -  Private sk cache, replaces sk->destruct() and sk->write_space() callbacks
                Must call sk_set_owner()

        IPv4/6 - Private cache, non-default callbacks.
                Must call sk_set_owner() (cannot be a module, so we don't really care)

        IPX - Global cache, default callbacks.

        ATM - Global cache, default callbacks.
                
        IRDA - Global cache, default callbacks.

        Bluetooth - Private cache (don't really need it, will change to global), non-default callbacks
                Must call sk_set_owner()

        AX25 - Global cache, non-default sk->destroy()
                Must call sk_set_owner()

        Rose - Global cache, default callbacks. 
        
        DecNet - Private cache, non-default callbacks.
                Must call sk_set_owner()
        
        LLC - Global cache, default callbacks.

Max


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-01-20  3:22   ` Max Krasnyansky
  2003-01-21 11:03     ` David S. Miller
@ 2003-02-07  9:48     ` David S. Miller
  2003-02-07 23:34       ` Max Krasnyansky
  2003-02-18  3:46     ` David S. Miller
  2 siblings, 1 reply; 40+ messages in thread
From: David S. Miller @ 2003-02-07  9:48 UTC (permalink / raw)
  To: maxk; +Cc: linux-kernel

   From: Max Krasnyansky <maxk@qualcomm.com>
   Date: Sun, 19 Jan 2003 19:22:44 -0800 (PST)

   So here is new patch.

Ok, it generally looks fine, and try_module_get() is cheap enough
(basically the equivalent of a local-cpu statistic bump plus
a compare) that I'm not concerned about any added overhead.

And since it is fixing a bug... :-)

Just let me discuss some things with Alexey before I apply this.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-07  9:48     ` David S. Miller
@ 2003-02-07 23:34       ` Max Krasnyansky
  2003-02-08  8:44         ` David S. Miller
  0 siblings, 1 reply; 40+ messages in thread
From: Max Krasnyansky @ 2003-02-07 23:34 UTC (permalink / raw)
  To: David S. Miller; +Cc: linux-kernel

At 01:48 AM 2/7/2003 -0800, David S. Miller wrote:
>   From: Max Krasnyansky <maxk@qualcomm.com>
>   Date: Sun, 19 Jan 2003 19:22:44 -0800 (PST)
>
>   So here is new patch.
>
>Ok, it generally looks fine, and try_module_get() is cheap enough
>(basically the equivalent of a local-cpu statistic bump plus
>a compare) that I'm not concerned about any added overhead.
>
>And since it is fixing a bug... :-)
Good.

>Just let me discuss some things with Alexey before I apply this.
Sure.

Do you want me to push this stuff to BK where you can pull from ?

Max


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-07 23:34       ` Max Krasnyansky
@ 2003-02-08  8:44         ` David S. Miller
  0 siblings, 0 replies; 40+ messages in thread
From: David S. Miller @ 2003-02-08  8:44 UTC (permalink / raw)
  To: maxk; +Cc: linux-kernel

   From: Max Krasnyansky <maxk@qualcomm.com>
   Date: Fri, 07 Feb 2003 15:34:22 -0800
   
   Do you want me to push this stuff to BK where you can pull from ?

No need.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-01-20  3:22   ` Max Krasnyansky
  2003-01-21 11:03     ` David S. Miller
  2003-02-07  9:48     ` David S. Miller
@ 2003-02-18  3:46     ` David S. Miller
  2003-02-18 18:50       ` Max Krasnyansky
  2 siblings, 1 reply; 40+ messages in thread
From: David S. Miller @ 2003-02-18  3:46 UTC (permalink / raw)
  To: maxk; +Cc: linux-kernel


After talking to Alexey, I don't like this patch.

The new module subsystem was supposed to deal with things
like this cleanly, and this patch is merely a hack to cover
up for it's shortcomings.

To be honest, I'd rather just disallow module unloading or
let them stay buggy than put this hack into the tree.

Special hacks are for 2.4.x where things like full cleanups
are not allowed.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-18  3:46     ` David S. Miller
@ 2003-02-18 18:50       ` Max Krasnyansky
  2003-02-18 21:09         ` Jean Tourrilhes
  2003-02-19  3:54         ` Rusty Russell
  0 siblings, 2 replies; 40+ messages in thread
From: Max Krasnyansky @ 2003-02-18 18:50 UTC (permalink / raw)
  To: David S. Miller, Alexey Kuznetsov, Jean Tourrilhes, Rusty Russell
  Cc: linux-kernel

At 07:46 PM 2/17/2003, David S. Miller wrote:

>After talking to Alexey, I don't like this patch.
>
>The new module subsystem was supposed to deal with things
>like this cleanly, and this patch is merely a hack to cover
>up for it's shortcomings.
No it's not. Are you guys saying that module refcounting in net/core/dev.c 
is a hack too ? Patch that I sent implements exactly the same logic.
Grab module reference before creating net family socket and release
module when socket is gone. Where is the hack here ?

>To be honest, I'd rather just disallow module unloading or
>let them stay buggy than put this hack into the tree.
This comment makes no sense to me. Especially "let them stay buggy" part ;-).
(I wonder what Alex could have told you).

>Special hacks are for 2.4.x where things like full cleanups are not allowed.
It's not a special hack. If it has problems let's fix them.
I want to keep Bluetooth socket modules loadable and unloadable. And I'm sure
Jean and other folks want's the same thing for IrDA and other subsystems with sockets. 
So, you guys would have to come with a better argument than "ohh it's an ugly hack" ;-).

Alexey, could you please post what you told Dave here ?
Rusty, any comments from you ?

Here is the link to the patch that I sent
        http://marc.theaimsgroup.com/?l=linux-kernel&m=104308300808557&w=2

and here is some explanation about protocol families
        http://marc.theaimsgroup.com/?l=linux-kernel&m=104317832227499&w=2

Dave, Alex, seriously let's come up with the solution rather than ignoring
the problem. I think my patch is a good solution. If you don't like overhead 
or whatever else let's fix that.

Thanks
Max


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-18 18:50       ` Max Krasnyansky
@ 2003-02-18 21:09         ` Jean Tourrilhes
  2003-02-19  3:54         ` Rusty Russell
  1 sibling, 0 replies; 40+ messages in thread
From: Jean Tourrilhes @ 2003-02-18 21:09 UTC (permalink / raw)
  To: Max Krasnyansky
  Cc: David S. Miller, Alexey Kuznetsov, Rusty Russell, linux-kernel

On Tue, Feb 18, 2003 at 10:50:49AM -0800, Max Krasnyansky wrote:
> At 07:46 PM 2/17/2003, David S. Miller wrote:
> 
> >After talking to Alexey, I don't like this patch.
> >
> >The new module subsystem was supposed to deal with things
> >like this cleanly, and this patch is merely a hack to cover
> >up for it's shortcomings.
> 
> No it's not. Are you guys saying that module refcounting in net/core/dev.c 
> is a hack too ? Patch that I sent implements exactly the same logic.
> Grab module reference before creating net family socket and release
> module when socket is gone. Where is the hack here ?

	Few facts :
	1) You can't get away without refcounting the module users,
	   unless you implement weak pointers in the kernel
	2) You can do refcounting in the module itself (old way, 2.2, 2.4)
	3) You can do refcounting in the module users (new way)
	4) The module code itself can't do the refcounting, unless it puts
	   itself explicitely between the module and its users and
	   understand the usage semantic of the module APIs

> >To be honest, I'd rather just disallow module unloading or
> >let them stay buggy than put this hack into the tree.
> >Special hacks are for 2.4.x where things like full cleanups are not allowed.
>
> It's not a special hack. If it has problems let's fix them.

	Well, there is still some time before 2.6.X, but not that
much. We understand what the issues are, we now some of the solutions,
now we just need a schedule and implement those. I personally don't
care too much of the details as long s there is a solution.
	When are we supposed to get a preview of the "right" way to do
it ?

> I want to keep Bluetooth socket modules loadable and unloadable. And
> I'm sure Jean and other folks want's the same thing for IrDA and
> other subsystems with sockets.

	IrDA sockets will be unloadable, because IrDA wants to be
modular, and people need to unload it to do CIR. So, for me the issue
is more the probability of crashing the system.

> Thanks
> Max

	Have fun...

	Jean

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-18 18:50       ` Max Krasnyansky
  2003-02-18 21:09         ` Jean Tourrilhes
@ 2003-02-19  3:54         ` Rusty Russell
  2003-02-19  7:04           ` David S. Miller
  2003-02-19 17:45           ` Max Krasnyansky
  1 sibling, 2 replies; 40+ messages in thread
From: Rusty Russell @ 2003-02-19  3:54 UTC (permalink / raw)
  To: Max Krasnyansky
  Cc: David S. Miller, Alexey Kuznetsov, Jean Tourrilhes, linux-kernel

In message <5.1.0.14.2.20030218101309.048d4288@mail1.qualcomm.com> you write:
> At 07:46 PM 2/17/2003, David S. Miller wrote:
> 
> >After talking to Alexey, I don't like this patch.
> >
> >The new module subsystem was supposed to deal with things
> >like this cleanly, and this patch is merely a hack to cover
> >up for it's shortcomings.

I don't quite understand.  

There are some issue with this patch, however.

Firstly, the owner field should probably be in struct proto_ops not
struct socket, where the function pointers are.

The sk thing looks reasonable at first glance.  Getting a reference to
npf->owner, then holding it for the socket is a little confusing, but
an obvious optimization over a naive "get, use, drop, get".

In sys_accept:

> @@ -1196,9 +1198,13 @@
>  	if (!(newsock = sock_alloc())) 
>  		goto out_put;
>  
> -	newsock->type = sock->type;
> -	newsock->ops = sock->ops;
> +	newsock->type  = sock->type;
> +	newsock->ops   = sock->ops;
> +	newsock->owner = sock->owner;
>  
> +	try_module_get(sock->owner);
> +	newsock->owner = sock->owner;
> +	
>  	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
>  	if (err < 0)
>  		goto out_release;

You still need to check the result of try_module_get, and fail if it
fails.  The *only* time this will fail is when someone is doing an
"rmmod --wait" on the module, which presumably means they really do
not want you to increase the reference count furthur.

Hope that helps,
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-19  3:54         ` Rusty Russell
@ 2003-02-19  7:04           ` David S. Miller
  2003-02-19 18:03             ` Max Krasnyansky
  2003-02-19 20:31             ` Roman Zippel
  2003-02-19 17:45           ` Max Krasnyansky
  1 sibling, 2 replies; 40+ messages in thread
From: David S. Miller @ 2003-02-19  7:04 UTC (permalink / raw)
  To: rusty; +Cc: maxk, kuznet, jt, linux-kernel

   From: Rusty Russell <rusty@rustcorp.com.au>
   Date: Wed, 19 Feb 2003 14:54:21 +1100
   
   Firstly, the owner field should probably be in struct proto_ops not
   struct socket, where the function pointers are.

I think this is one of Alexey's main problems with the
patch.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-19  3:54         ` Rusty Russell
  2003-02-19  7:04           ` David S. Miller
@ 2003-02-19 17:45           ` Max Krasnyansky
  2003-02-20  1:21             ` Rusty Russell
  1 sibling, 1 reply; 40+ messages in thread
From: Max Krasnyansky @ 2003-02-19 17:45 UTC (permalink / raw)
  To: Rusty Russell
  Cc: David S. Miller, Alexey Kuznetsov, Jean Tourrilhes, linux-kernel

At 07:54 PM 2/18/2003, Rusty Russell wrote:
>In message <5.1.0.14.2.20030218101309.048d4288@mail1.qualcomm.com> you write:
>> At 07:46 PM 2/17/2003, David S. Miller wrote:
>> 
>> >After talking to Alexey, I don't like this patch.
>> >
>> >The new module subsystem was supposed to deal with things
>> >like this cleanly, and this patch is merely a hack to cover
>> >up for it's shortcomings.
>
>I don't quite understand.  
>
>There are some issue with this patch, however.
>
>Firstly, the owner field should probably be in struct proto_ops not
>struct socket, where the function pointers are.
struct proto_ops doesn't exists on its own without struct socket.
I think it make sense to simply keep track of the sockets but I don't 
see any problem with putting it in proto_ops.

struct sock is different though. callbacks are inside.

>The sk thing looks reasonable at first glance.  Getting a reference to
>npf->owner, then holding it for the socket is a little confusing, but
>an obvious optimization over a naive "get, use, drop, get".
That was an optimization indeed. There is no point in dropping reference
there.


>In sys_accept:
>
>> @@ -1196,9 +1198,13 @@
>>       if (!(newsock = sock_alloc())) 
>>               goto out_put;
>>  
>> -     newsock->type = sock->type;
>> -     newsock->ops = sock->ops;
>> +     newsock->type  = sock->type;
>> +     newsock->ops   = sock->ops;
>> +     newsock->owner = sock->owner;
>>  
>> +     try_module_get(sock->owner);
>> +     newsock->owner = sock->owner;
>> +     
>>       err = sock->ops->accept(sock, newsock, sock->file->f_flags);
>>       if (err < 0)
>>               goto out_release;
>
>You still need to check the result of try_module_get, and fail if it
>fails.  The *only* time this will fail is when someone is doing an
>"rmmod --wait" on the module, which presumably means they really do
>not want you to increase the reference count furthur.
Ohh, I see. My assumption here was that we know for sure 
that module is alive at this point since we already hold a reference to the 
first socket. Actually I was going to send another email and ask for unconditional 
module_get() specifically for the cases like that. 

Even after your explanation I still think we need unconditional module_get() there.
Because in this case 'rmmod --wait' will simply brake accept() logic. I mean it'll 
keep waiting until listening socket is destroyed (i.e. until socket app is killed) 
but accept() will mysteriously fail for no good reason.
Comments ?

Max


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-19  7:04           ` David S. Miller
@ 2003-02-19 18:03             ` Max Krasnyansky
  2003-02-19 20:31             ` Roman Zippel
  1 sibling, 0 replies; 40+ messages in thread
From: Max Krasnyansky @ 2003-02-19 18:03 UTC (permalink / raw)
  To: David S. Miller, rusty; +Cc: kuznet, jt, linux-kernel

At 11:04 PM 2/18/2003, David S. Miller wrote:
>   From: Rusty Russell <rusty@rustcorp.com.au>
>   Date: Wed, 19 Feb 2003 14:54:21 +1100
>   
>   Firstly, the owner field should probably be in struct proto_ops not
>   struct socket, where the function pointers are.
>
>I think this is one of Alexey's main problems with the patch.
This is a bit more informative than "oh it's an ugly hack" ;-)

Ok. I got at least three reasons why I think owner field should be in struct 
socket:
        - struct proto_ops doesn't exists without struct socket.
         It cannot be registered or otherwise used on it's own. 
        - struct sock might inherit (when needed see my explanation about different families)
        its owner from struct socket. In which case sk_set_owner(sk, socket->ops->owner) doesn't
        look right.
        - we might want to protect something else besides socket->ops.

None of those reasons are critical. If you guys still feel that ->owner must be in struct 
proto_ops be that way, I'm ok with it.

Any other comments ? 
Alexey, this is the time for you to speak up ;-). Please please. So far I got zero feedback 
from you. And you are the one who somehow made DaveM radically change his mind :).

Thanks
Max


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-19  7:04           ` David S. Miller
  2003-02-19 18:03             ` Max Krasnyansky
@ 2003-02-19 20:31             ` Roman Zippel
  1 sibling, 0 replies; 40+ messages in thread
From: Roman Zippel @ 2003-02-19 20:31 UTC (permalink / raw)
  To: David S. Miller; +Cc: rusty, maxk, kuznet, jt, linux-kernel

Hi,

On Tue, 18 Feb 2003, David S. Miller wrote:

>    Firstly, the owner field should probably be in struct proto_ops not
>    struct socket, where the function pointers are.
> 
> I think this is one of Alexey's main problems with the
> patch.

BTW the access to sockets_in_use is not preempt safe. Although the data 
has only statistic value, it might be worth to fix this. The easiest 
solution might be to move socket allocation and the create call outside 
the net_family lock. The read lock should at least get a preempt enable/ 
disable (brlock might be another possibilty) and within this lock we can 
safely modify sockets_in_use and do something like net_family_get()/ 
net_family_put() similiar to get_fs_type().

bye, Roman



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-19 17:45           ` Max Krasnyansky
@ 2003-02-20  1:21             ` Rusty Russell
  2003-02-20 17:38               ` Max Krasnyansky
  0 siblings, 1 reply; 40+ messages in thread
From: Rusty Russell @ 2003-02-20  1:21 UTC (permalink / raw)
  To: Max Krasnyansky
  Cc: David S. Miller, Alexey Kuznetsov, Jean Tourrilhes, linux-kernel

In message <5.1.0.14.2.20030219092611.0d0d00c8@mail1.qualcomm.com> you write:
> >Firstly, the owner field should probably be in struct proto_ops not
> >struct socket, where the function pointers are.
> struct proto_ops doesn't exists on its own without struct socket.
> I think it make sense to simply keep track of the sockets but I don't 
> see any problem with putting it in proto_ops.

Well, the purpose is to stop those methods from vanishing, so it makes
sense to have the owner field next to those pointers.  It's
nitpicking, really.

> struct sock is different though. callbacks are inside.

Yes.

> >> +     try_module_get(sock->owner);
> >> +     newsock->owner = sock->owner;
> >> +     
> >>       err = sock->ops->accept(sock, newsock, sock->file->f_flags);
> >>       if (err < 0)
> >>               goto out_release;
> >
> >You still need to check the result of try_module_get, and fail if it
> >fails.  The *only* time this will fail is when someone is doing an
> >"rmmod --wait" on the module, which presumably means they really do
> >not want you to increase the reference count furthur.

> Ohh, I see. My assumption here was that we know for sure that module
> is alive at this point since we already hold a reference to the
> first socket. Actually I was going to send another email and ask for
> unconditional module_get() specifically for the cases like that.

There has been talk of this, but OTOH, the admin has explicitly gone
out of their way to remove this module.  They really don't want anyone
new using it.  Presumably at this very moment they are killing off all
the processes they can find with such a socket.

My other reluctance is that people will use "module_get" the way they
used MOD_INC_USE_COUNT() (ie. when *not* already holding a reference),
and we'll have all those races back.

I think it can be argued both ways, honestly.

Clarifies?
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-20  1:21             ` Rusty Russell
@ 2003-02-20 17:38               ` Max Krasnyansky
  2003-02-21  0:30                 ` Rusty Russell
  0 siblings, 1 reply; 40+ messages in thread
From: Max Krasnyansky @ 2003-02-20 17:38 UTC (permalink / raw)
  To: Rusty Russell
  Cc: David S. Miller, Alexey Kuznetsov, Jean Tourrilhes, linux-kernel

At 05:21 PM 2/19/2003, Rusty Russell wrote:
>In message <5.1.0.14.2.20030219092611.0d0d00c8@mail1.qualcomm.com> you write:
>> >Firstly, the owner field should probably be in struct proto_ops not
>> >struct socket, where the function pointers are.
>> struct proto_ops doesn't exists on its own without struct socket.
>> I think it make sense to simply keep track of the sockets but I don't 
>> see any problem with putting it in proto_ops.
>
>Well, the purpose is to stop those methods from vanishing, so it makes
>sense to have the owner field next to those pointers.  It's
>nitpicking, really.
Actually I think I have strong reason to have it in struct socket. I'll reply to
Dave's email and explain it there.

>> >> +     try_module_get(sock->owner);
>> >> +     newsock->owner = sock->owner;
>> >> +     
>> >>       err = sock->ops->accept(sock, newsock, sock->file->f_flags);
>> >>       if (err < 0)
>> >>               goto out_release;
>> >
>> >You still need to check the result of try_module_get, and fail if it
>> >fails.  The *only* time this will fail is when someone is doing an
>> >"rmmod --wait" on the module, which presumably means they really do
>> >not want you to increase the reference count furthur.
>
>> Ohh, I see. My assumption here was that we know for sure that module
>> is alive at this point since we already hold a reference to the
>> first socket. Actually I was going to send another email and ask for
>> unconditional module_get() specifically for the cases like that.
>
>There has been talk of this, but OTOH, the admin has explicitly gone
>out of their way to remove this module.  They really don't want anyone
>new using it.  Presumably at this very moment they are killing off all
>the processes they can find with such a socket.
The thing is that once those processes are killed sockets will be 
destroyed and release the module anyway. i.e. There is no reason to
sort of artificially force accept() to fail. Everything will be cleaned 
up once the process is gone.

>My other reluctance is that people will use "module_get" the way they
>used MOD_INC_USE_COUNT() (ie. when *not* already holding a reference),
>and we'll have all those races back.
Yes that'd be wrong. However I think in general when module wants to create 
a copy of its own object (ie already holds reference) it should be able to 
copy the object and simply bump refcount with module_get(). That will help 
to avoid a lot of unnecessary error recovery paths (like in the accept() case).

>I think it can be argued both ways, honestly.
Yep. And I'd argue in for of module_get() :)

Thanks
Max


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-20 17:38               ` Max Krasnyansky
@ 2003-02-21  0:30                 ` Rusty Russell
  2003-02-21  1:17                   ` Max Krasnyansky
  0 siblings, 1 reply; 40+ messages in thread
From: Rusty Russell @ 2003-02-21  0:30 UTC (permalink / raw)
  To: Max Krasnyansky
  Cc: David S. Miller, Alexey Kuznetsov, Jean Tourrilhes, linux-kernel

In message <5.1.0.14.2.20030220092216.0d3fefd0@mail1.qualcomm.com> you write:
> >There has been talk of this, but OTOH, the admin has explicitly gone
> >out of their way to remove this module.  They really don't want anyone
> >new using it.  Presumably at this very moment they are killing off all
> >the processes they can find with such a socket.
> The thing is that once those processes are killed sockets will be 
> destroyed and release the module anyway. i.e. There is no reason to
> sort of artificially force accept() to fail. Everything will be cleaned 
> up once the process is gone.

Yes, but in practical terms it's probably going to fork a child with
that socket.

> >I think it can be argued both ways, honestly.
> Yep. And I'd argue in for of module_get() :)

My only real insistence in this is that such an interface be called
__try_module_get(), because the "__" warn people that it's a "you'd
better know *exactly* what you are doing", even though the "try" is a
bit of a misnomer.

"module_get" sounds like a "simpler" try_module_get()...
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-21  0:30                 ` Rusty Russell
@ 2003-02-21  1:17                   ` Max Krasnyansky
  2003-02-21  8:45                     ` Christoph Hellwig
  2003-02-24  1:01                     ` Rusty Russell
  0 siblings, 2 replies; 40+ messages in thread
From: Max Krasnyansky @ 2003-02-21  1:17 UTC (permalink / raw)
  To: Rusty Russell
  Cc: David S. Miller, Alexey Kuznetsov, Jean Tourrilhes, linux-kernel

At 04:30 PM 2/20/2003, Rusty Russell wrote:
>In message <5.1.0.14.2.20030220092216.0d3fefd0@mail1.qualcomm.com> you write:
>> >There has been talk of this, but OTOH, the admin has explicitly gone
>> >out of their way to remove this module.  They really don't want anyone
>> >new using it.  Presumably at this very moment they are killing off all
>> >the processes they can find with such a socket.
>> The thing is that once those processes are killed sockets will be 
>> destroyed and release the module anyway. i.e. There is no reason to
>> sort of artificially force accept() to fail. Everything will be cleaned 
>> up once the process is gone.
>
>Yes, but in practical terms it's probably going to fork a child with
>that socket.
But it will also be killed.

>> >I think it can be argued both ways, honestly.
>> Yep. And I'd argue in for of module_get() :)
>
>My only real insistence in this is that such an interface be called
>__try_module_get(), because the "__" warn people that it's a "you'd
>better know *exactly* what you are doing", even though the "try" is a
>bit of a misnomer.
Yeah, I think 'try' is definitely be a misnomer in this case.
How about something like this ?

static inline void __module_get(struct module *mod)
{
#ifdef CONFIG_MODULE_DETECT_API_VIOLATION
        if (!module_refcount(mod))
                __unsafe(mod);
#endif
        local_inc(&mod->ref[get_cpu()].count);
        put_cpu();
}

We will be able to compile the kernel with CONFIG_MODULE_DETECT_API_VIOLATION
and easily find all modules that call __module_get() without holding a reference.

Comments ?

Max






^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-21  1:17                   ` Max Krasnyansky
@ 2003-02-21  8:45                     ` Christoph Hellwig
  2003-02-21 17:44                       ` Max Krasnyansky
  2003-02-24  1:01                     ` Rusty Russell
  1 sibling, 1 reply; 40+ messages in thread
From: Christoph Hellwig @ 2003-02-21  8:45 UTC (permalink / raw)
  To: Max Krasnyansky
  Cc: Rusty Russell, David S. Miller, Alexey Kuznetsov,
	Jean Tourrilhes, linux-kernel

On Thu, Feb 20, 2003 at 05:17:52PM -0800, Max Krasnyansky wrote:
> Yeah, I think 'try' is definitely be a misnomer in this case.
> How about something like this ?
> 
> static inline void __module_get(struct module *mod)
> {
> #ifdef CONFIG_MODULE_DETECT_API_VIOLATION
>         if (!module_refcount(mod))
>                 __unsafe(mod);
> #endif
>         local_inc(&mod->ref[get_cpu()].count);
>         put_cpu();
> }
> 
> We will be able to compile the kernel with CONFIG_MODULE_DETECT_API_VIOLATION
> and easily find all modules that call __module_get() without holding a reference.

Drop the ifdef, add and add an unlikely instead?


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-21  8:45                     ` Christoph Hellwig
@ 2003-02-21 17:44                       ` Max Krasnyansky
  0 siblings, 0 replies; 40+ messages in thread
From: Max Krasnyansky @ 2003-02-21 17:44 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Rusty Russell, David S. Miller, Alexey Kuznetsov,
	Jean Tourrilhes, linux-kernel

At 12:45 AM 2/21/2003, Christoph Hellwig wrote:
>On Thu, Feb 20, 2003 at 05:17:52PM -0800, Max Krasnyansky wrote:
>> Yeah, I think 'try' is definitely be a misnomer in this case.
>> How about something like this ?
>> 
>> static inline void __module_get(struct module *mod)
>> {
>> #ifdef CONFIG_MODULE_DETECT_API_VIOLATION
>>         if (!module_refcount(mod))
>>                 __unsafe(mod);
>> #endif
>>         local_inc(&mod->ref[get_cpu()].count);
>>         put_cpu();
>> }
>> 
>> We will be able to compile the kernel with CONFIG_MODULE_DETECT_API_VIOLATION
>> and easily find all modules that call __module_get() without holding a reference.
>
>Drop the ifdef, add and add an unlikely instead?
Well, module_refcount() is fairly expensive (loop for NR_CPUS). So I'd keep the ifdef.

Max


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-21  1:17                   ` Max Krasnyansky
  2003-02-21  8:45                     ` Christoph Hellwig
@ 2003-02-24  1:01                     ` Rusty Russell
  2003-02-24 19:35                       ` Max Krasnyansky
  1 sibling, 1 reply; 40+ messages in thread
From: Rusty Russell @ 2003-02-24  1:01 UTC (permalink / raw)
  To: Max Krasnyansky
  Cc: David S. Miller, Alexey Kuznetsov, Jean Tourrilhes, linux-kernel,
	jgarzik

In message <5.1.0.14.2.20030220165016.0d47c688@mail1.qualcomm.com> you write:
> At 04:30 PM 2/20/2003, Rusty Russell wrote:
> >Yes, but in practical terms it's probably going to fork a child with
> >that socket.
> But it will also be killed.

Once they track it down 8).

> >> >I think it can be argued both ways, honestly.
> >> Yep. And I'd argue in for of module_get() :)
> >
> >My only real insistence in this is that such an interface be called
> >__try_module_get(), because the "__" warn people that it's a "you'd
> >better know *exactly* what you are doing", even though the "try" is a
> >bit of a misnomer.
> Yeah, I think 'try' is definitely be a misnomer in this case.
> How about something like this ?

No, I definitely want the name __try_module_get.  Sure, it's a
misnomer in one sense, which will hopefully scare off people looking
for an easy way out.  OTOH, it accurately reflects "you should be
using try_module_get but you have special circumstances" more
eloquently than any comment ever would.  Especially since there are
only a handful of places where it is appropriate.

I think a CONFIG option for checking is overkill: better is to grep
each kernel for __try_module_get() being added and make sure the damn
thing doesn't spread 8)

Cheers,
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

Name: __try_module_get
Author: Rusty Russell
Status: Trivial
Depends: 

D: Introduces __try_module_get for places where we know we already hold
D: a reference and ignoring the fact that the module is being "rmmod --wait"ed
D: is simpler.

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .12219-linux-2.5.62-bk3/include/linux/module.h .12219-linux-2.5.62-bk3.updated/include/linux/module.h
--- .12219-linux-2.5.62-bk3/include/linux/module.h	2003-02-21 10:32:28.000000000 +1100
+++ .12219-linux-2.5.62-bk3.updated/include/linux/module.h	2003-02-24 11:46:13.000000000 +1100
@@ -292,17 +292,23 @@ void symbol_put_addr(void *addr);
 #define local_dec(x) atomic_dec(x)
 #endif
 
+/* Sometimes we know we already have a refcount, and it's easier not
+   to handle the error case (which only happens with rmmod --wait). */
+static inline void __try_module_get(struct module *module)
+{
+	local_inc(&module->ref[get_cpu()].count);
+	put_cpu();
+}
+
 static inline int try_module_get(struct module *module)
 {
 	int ret = 1;
 
 	if (module) {
-		unsigned int cpu = get_cpu();
 		if (likely(module_is_live(module)))
-			local_inc(&module->ref[cpu].count);
+			__try_module_get(module);
 		else
 			ret = 0;
-		put_cpu();
 	}
 	return ret;
 }
@@ -327,6 +332,9 @@ static inline int try_module_get(struct 
 static inline void module_put(struct module *module)
 {
 }
+static inline void __try_module_get(struct module *module)
+{
+}
 #define symbol_put(x) do { } while(0)
 #define symbol_put_addr(p) do { } while(0)
 
@@ -381,6 +389,10 @@ static inline int module_text_address(un
 #define symbol_put(x) do { } while(0)
 #define symbol_put_addr(x) do { } while(0)
 
+static inline void __try_module_get(struct module *module)
+{
+}
+
 static inline int try_module_get(struct module *module)
 {
 	return 1;

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-24  1:01                     ` Rusty Russell
@ 2003-02-24 19:35                       ` Max Krasnyansky
  2003-02-25  5:02                         ` Rusty Russell
  0 siblings, 1 reply; 40+ messages in thread
From: Max Krasnyansky @ 2003-02-24 19:35 UTC (permalink / raw)
  To: Rusty Russell
  Cc: David S. Miller, Alexey Kuznetsov, Jean Tourrilhes, linux-kernel,
	jgarzik

At 05:01 PM 2/23/2003, Rusty Russell wrote:
>> >better know *exactly* what you are doing", even though the "try" is a
>> >bit of a misnomer.
>> Yeah, I think 'try' is definitely a misnomer in this case.
>> How about something like this ?
>
>No, I definitely want the name __try_module_get.  Sure, it's a
>misnomer in one sense, which will hopefully scare off people looking
>for an easy way out.  OTOH, it accurately reflects "you should be
>using try_module_get but you have special circumstances" more
>eloquently than any comment ever would.  Especially since there are
>only a handful of places where it is appropriate.
>
>I think a CONFIG option for checking is overkill: better is to grep
>each kernel for __try_module_get() being added and make sure the damn
>thing doesn't spread 8)
Ok.

>+/* Sometimes we know we already have a refcount, and it's easier not
>+   to handle the error case (which only happens with rmmod --wait). */
>+static inline void __try_module_get(struct module *module)
>+{
>+       local_inc(&module->ref[get_cpu()].count);
>+       put_cpu();
>+}

I still think that __try is confusing and __module_get() would be more 
appropriate name for that function. But I can live with __try_module_get() :)
I'll make new patch for net/socket.c as soon as yours goes in.

Thanks
Max


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-24 19:35                       ` Max Krasnyansky
@ 2003-02-25  5:02                         ` Rusty Russell
  2003-02-26 20:21                           ` Max Krasnyansky
  0 siblings, 1 reply; 40+ messages in thread
From: Rusty Russell @ 2003-02-25  5:02 UTC (permalink / raw)
  To: Max Krasnyansky
  Cc: David S. Miller, Alexey Kuznetsov, Jean Tourrilhes, linux-kernel,
	jgarzik, torvalds

In message <5.1.0.14.2.20030224112723.05a5e640@mail1.qualcomm.com> you write:
> appropriate name for that function. But I can live with __try_module_get() :)
> I'll make new patch for net/socket.c as soon as yours goes in.

Linus, please apply.  This is the "module_dup" which Viro wanted, by
another name.

Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

Name: __try_module_get
Author: Rusty Russell
Status: Compile tested on 2.5.63

D: Introduces __try_module_get for places where we know we already hold
D: a reference and ignoring the fact that the module is being "rmmod --wait"ed
D: is simpler.

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .22009-linux-2.5.63/include/linux/module.h .22009-linux-2.5.63.updated/include/linux/module.h
--- .22009-linux-2.5.63/include/linux/module.h	2003-02-25 10:11:08.000000000 +1100
+++ .22009-linux-2.5.63.updated/include/linux/module.h	2003-02-25 16:00:53.000000000 +1100
@@ -292,6 +292,16 @@ void symbol_put_addr(void *addr);
 #define local_dec(x) atomic_dec(x)
 #endif
 
+/* Sometimes we know we already have a refcount, and it's easier not
+   to handle the error case (which only happens with rmmod --wait). */
+static inline void __try_module_get(struct module *module)
+{
+	if (module) {
+		local_inc(&module->ref[get_cpu()].count);
+		put_cpu();
+	}
+}
+
 static inline int try_module_get(struct module *module)
 {
 	int ret = 1;
@@ -327,6 +337,9 @@ static inline int try_module_get(struct 
 static inline void module_put(struct module *module)
 {
 }
+static inline void __try_module_get(struct module *module)
+{
+}
 #define symbol_put(x) do { } while(0)
 #define symbol_put_addr(p) do { } while(0)
 
@@ -381,6 +394,10 @@ static inline int module_text_address(un
 #define symbol_put(x) do { } while(0)
 #define symbol_put_addr(x) do { } while(0)
 
+static inline void __try_module_get(struct module *module)
+{
+}
+
 static inline int try_module_get(struct module *module)
 {
 	return 1;

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2003-02-25  5:02                         ` Rusty Russell
@ 2003-02-26 20:21                           ` Max Krasnyansky
  0 siblings, 0 replies; 40+ messages in thread
From: Max Krasnyansky @ 2003-02-26 20:21 UTC (permalink / raw)
  To: Rusty Russell
  Cc: David S. Miller, Alexey Kuznetsov, Jean Tourrilhes, linux-kernel,
	jgarzik

At 09:02 PM 2/24/2003, Rusty Russell wrote:
>In message <5.1.0.14.2.20030224112723.05a5e640@mail1.qualcomm.com> you write:
>> appropriate name for that function. But I can live with __try_module_get() :)
>> I'll make new patch for net/socket.c as soon as yours goes in.
>
>Linus, please apply.  This is the "module_dup" which Viro wanted, by
>another name.

Rusty,
Looks like Linus ignored this thread. He applied your other patches but not this one.
Could please resend it with the new subject or something.

Thanks
Max


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
@ 2003-02-20 17:52 Max Krasnyansky
  0 siblings, 0 replies; 40+ messages in thread
From: Max Krasnyansky @ 2003-02-20 17:52 UTC (permalink / raw)
  To: David S. Miller, rusty; +Cc: kuznet, jt, linux-kernel

At 11:04 PM 2/18/2003, David S. Miller wrote:
>>   From: Rusty Russell <rusty@rustcorp.com.au>
>>   Date: Wed, 19 Feb 2003 14:54:21 +1100
>>   
>>   Firstly, the owner field should probably be in struct proto_ops not
>>   struct socket, where the function pointers are.
>>
>>I think this is one of Alexey's main problems with the patch.
>This is a bit more informative than "oh it's an ugly hack" ;-)
>
>Ok. I got at least three reasons why I think owner field should be in struct 
>socket:
>        - struct proto_ops doesn't exists without struct socket.
>        It cannot be registered or otherwise used on it's own. 
>        - struct sock might inherit (when needed see my explanation about different families)
>        its owner from struct socket. In which case sk_set_owner(sk, socket->ops->owner) doesn't
>        look right.
>        - we might want to protect something else besides socket->ops.
>
>None of those reasons are critical. If you guys still feel that ->owner must be in struct 
>proto_ops be that way, I'm ok with it.
Ok. I'll take that back :).
The thing is that socket->ops is set from the protocol itself not in the generic socket code.
Here is what sock_create() does

        if (!(sock = sock_alloc())) 
        {
                printk(KERN_WARNING "socket: no more sockets\n");
                i = -ENFILE;            /* Not exactly a match, but its the
                                           closest posix thing */
                goto out;
        }

        sock->type  = type;

        if ((i = net_families[family]->create(sock, protocol)) < 0) 
        {
                sock_release(sock);
                goto out;
        }

It simply calls net_family->create() which then sets its private struct proto_ops.

So I think owner field should be in the struct socket because it needs to be 
accessible from net/socket.c:sock_create()/sock_release().

Dave, Alexey, do you guys still strongly believe that it's a hack ? 
If yes what do I need to do to convince otherwise ? ;-)

Max


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2002-12-19 16:05 ` Max Krasnyansky
  2002-12-19 19:28   ` Alan Cox
@ 2002-12-21  6:54   ` David S. Miller
  1 sibling, 0 replies; 40+ messages in thread
From: David S. Miller @ 2002-12-21  6:54 UTC (permalink / raw)
  To: maxk; +Cc: linux-kernel


Bunch of problems with this patch:

1) Module leak.  If try_module_get(npf->owner) works but sock_alloc()
   fails, we never put the module.  It just branches to the "out"
   label in that case, which unlocks the net_family table and returns
   err.

2) Bigger issue, why not attach the owner to struct sock
   instead of socket?  The sock can exist, and thus reference
   the protocol family code, long after the socket (ie. the
   user end) is killed off and closed.

   For example, this could happen for just about any protocol family
   due to stray device sk_buff references to the sock and thus the
   protocol family.

Please address this stuff.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2002-12-19 23:08 Jean Tourrilhes
@ 2002-12-19 23:23 ` Max Krasnyansky
  0 siblings, 0 replies; 40+ messages in thread
From: Max Krasnyansky @ 2002-12-19 23:23 UTC (permalink / raw)
  To: jt, Linux kernel mailing list

At 03:08 PM 12/19/2002 -0800, Jean Tourrilhes wrote:
>Max Krasnyansky wrote :
>> Ok. Drop me a note and I'll push this stuff to BK were you can pull from. 
>> In the mean time I'll go bug other folks :). I want to do same kinda changes 
>> for the TTY ldisc code.
>> 
>> Max
>
>        Go for it, I've got the exact same problem with IrDA (both
>socket and tty ldisc). 
Sockets should work with my patch and I'll fix ldisc today or tomorrow.

>Maybe worth sending an entry for the FAQ of Rusty and include PPP maintainer 
>in the loop.
Ok.

>P.S. : Talking about it, I'm away for Chrismas & New-Year...
Happy holidays then.

Max


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
@ 2002-12-19 23:08 Jean Tourrilhes
  2002-12-19 23:23 ` Max Krasnyansky
  0 siblings, 1 reply; 40+ messages in thread
From: Jean Tourrilhes @ 2002-12-19 23:08 UTC (permalink / raw)
  To: Linux kernel mailing list, Max Krasnyansky

Max Krasnyansky wrote :
> Ok. Drop me a note and I'll push this stuff to BK were you can pull from. 
> In the mean time I'll go bug other folks :). I want to do same kinda changes 
> for the TTY ldisc code.
> 
> Max

	Go for it, I've got the exact same problem with IrDA (both
socket and tty ldisc). Maybe worth sending an entry for the FAQ of
Rusty and include PPP maintainer in the loop.
	Have fun...

	Jean

P.S. : Talking about it, I'm away for Chrismas & New-Year...

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2002-12-19 19:12     ` David S. Miller
@ 2002-12-19 22:17       ` Max Krasnyansky
  0 siblings, 0 replies; 40+ messages in thread
From: Max Krasnyansky @ 2002-12-19 22:17 UTC (permalink / raw)
  To: David S. Miller, alan; +Cc: linux-kernel

At 11:12 AM 12/19/2002 -0800, David S. Miller wrote:
>   From: Alan Cox <alan@lxorguk.ukuu.org.uk>
>   Date: 19 Dec 2002 19:28:35 +0000
>
>   On Thu, 2002-12-19 at 16:05, Max Krasnyansky wrote:
>   > Hmm, no replies. Nobody is interested in this or what ?
>   > I want to get this fixed otherwise I can't fix Bluetooth module
>   > refcounting. 
>   
>   Looks good to me, but its christmas so I wouldnt expect much to happen
>   till the new year
Ah, Christmas time :)

>I just got back from a long snowboarding trip, I'll look at this
>over the weekend before I disappear for another similar trip :)

Ok. Drop me a note and I'll push this stuff to BK were you can pull from. 
In the mean time I'll go bug other folks :). I want to do same kinda changes 
for the TTY ldisc code.

Max


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2002-12-19 16:05 ` Max Krasnyansky
@ 2002-12-19 19:28   ` Alan Cox
  2002-12-19 19:12     ` David S. Miller
  2002-12-21  6:54   ` David S. Miller
  1 sibling, 1 reply; 40+ messages in thread
From: Alan Cox @ 2002-12-19 19:28 UTC (permalink / raw)
  To: Max Krasnyansky; +Cc: Linux Kernel Mailing List, davem

On Thu, 2002-12-19 at 16:05, Max Krasnyansky wrote:
> Hmm, no replies. Nobody is interested in this or what ?
> I want to get this fixed otherwise I can't fix Bluetooth module
> refcounting. 

Looks good to me, but its christmas so I wouldnt expect much to happen
till the new year


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2002-12-19 19:28   ` Alan Cox
@ 2002-12-19 19:12     ` David S. Miller
  2002-12-19 22:17       ` Max Krasnyansky
  0 siblings, 1 reply; 40+ messages in thread
From: David S. Miller @ 2002-12-19 19:12 UTC (permalink / raw)
  To: alan; +Cc: maxk, linux-kernel

   From: Alan Cox <alan@lxorguk.ukuu.org.uk>
   Date: 19 Dec 2002 19:28:35 +0000

   On Thu, 2002-12-19 at 16:05, Max Krasnyansky wrote:
   > Hmm, no replies. Nobody is interested in this or what ?
   > I want to get this fixed otherwise I can't fix Bluetooth module
   > refcounting. 
   
   Looks good to me, but its christmas so I wouldnt expect much to happen
   till the new year
   
I just got back from a long snowboarding trip, I'll look at this
over the weekend before I disappear for another similar trip :)

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH/RFC] New module refcounting for net_proto_family
  2002-12-18 15:25 Max Krasnyansky
@ 2002-12-19 16:05 ` Max Krasnyansky
  2002-12-19 19:28   ` Alan Cox
  2002-12-21  6:54   ` David S. Miller
  0 siblings, 2 replies; 40+ messages in thread
From: Max Krasnyansky @ 2002-12-19 16:05 UTC (permalink / raw)
  To: linux-kernel; +Cc: davem

Hmm, no replies. Nobody is interested in this or what ?
I want to get this fixed otherwise I can't fix Bluetooth module
refcounting. 

Max 


On Wed, 2002-12-18 at 07:25, Max Krasnyansky wrote:
> Hi Folks,
> 
> It seems that new module code is going to stay with us at least for a
> while :).
> So it's probably time to start fixing old interfaces to use new
> refcounting scheme.
> 
> Here is a patch for sock_create() and stuff that uses net_proto_family
> interface. Tested with modified af_bluetooth.c and seems to work fine.
> Other families are unaffected for now because their owner field == NULL.
> 
> If people are ok with this aproach I will also fix af_unix and other
> families and push all fixes into my BK tree.
> 
> (URL in case if my mailer messed up spaces
> http://bluez.sourceforge.net/patches/npf_refcnt_patch-2.5.52.gz)
> 
> 
> # This is a BitKeeper generated patch for the following project:
> # Project Name: Linux kernel tree
> # This patch format is intended for GNU patch command version 2.5 or
> higher.
> # This patch includes the following deltas:
> #	           ChangeSet	1.888   -> 1.889  
> #	        net/socket.c	1.39    -> 1.40   
> #	 include/linux/net.h	1.7     -> 1.8    
> #
> # The following is the BitKeeper ChangeSet Log
> # --------------------------------------------
> # 02/12/17	maxk@qualcomm.com	1.889
> # Convert generic socket code to new module refcounting.
> # --------------------------------------------
> #
> diff -Nru a/include/linux/net.h b/include/linux/net.h
> --- a/include/linux/net.h	Tue Dec 17 20:02:04 2002
> +++ b/include/linux/net.h	Tue Dec 17 20:02:04 2002
> @@ -76,6 +76,8 @@
>  
>  	short			type;
>  	unsigned char		passcred;
> +
> +	struct module           *owner;
>  };
>  
>  struct scm_cookie;
> @@ -124,6 +126,8 @@
>  	short	authentication;
>  	short	encryption;
>  	short	encrypt_net;
> +
> +	struct  module *owner;
>  };
>  
>  struct net_proto 
> diff -Nru a/net/socket.c b/net/socket.c
> --- a/net/socket.c	Tue Dec 17 20:02:04 2002
> +++ b/net/socket.c	Tue Dec 17 20:02:04 2002
> @@ -470,6 +470,8 @@
>  
>  	sock = SOCKET_I(inode);
>  
> +	sock->owner = NULL;
> +	
>  	inode->i_mode = S_IFSOCK|S_IRWXUGO;
>  	inode->i_sock = 1;
>  	inode->i_uid = current->fsuid;
> @@ -517,6 +519,8 @@
>  		return;
>  	}
>  	sock->file=NULL;
> +
> +	module_put(sock->owner);
>  }
>  
>  static int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
> struct msghdr *msg, int size)
> @@ -964,8 +968,9 @@
>  
>  int sock_create(int family, int type, int protocol, struct socket
> **res)
>  {
> -	int i;
> +	struct net_proto_family *npf;
>  	struct socket *sock;
> +	int err;
>  
>  	/*
>  	 *	Check protocol is in range
> @@ -990,14 +995,8 @@
>  	}
>  		
>  #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
> -	/* Attempt to load a protocol module if the find failed. 
> -	 * 
> -	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
> -	 * requested real, full-featured networking support upon
> configuration.
> -	 * Otherwise module support will break!
> -	 */
> -	if (net_families[family]==NULL)
> -	{
> +	/* Attempt to load a protocol module if the find failed. */
> +	if (net_families[family]==NULL) {
>  		char module_name[30];
>  		sprintf(module_name,"net-pf-%d",family);
>  		request_module(module_name);
> @@ -1005,29 +1004,31 @@
>  #endif
>  
>  	net_family_read_lock();
> -	if (net_families[family] == NULL) {
> -		i = -EAFNOSUPPORT;
> +
> +	npf = net_families[family];
> +	if (!npf || !try_module_get(npf->owner)) {
> +		err = -EAFNOSUPPORT;
>  		goto out;
>  	}
> +	
> +	/*
> +	 * Allocate the socket and allow the family to set things up. if
> +	 * the protocol is 0, the family is instructed to select an
> appropriate
> +	 * default.
> + 	 */
>  
> -/*
> - *	Allocate the socket and allow the family to set things up. if
> - *	the protocol is 0, the family is instructed to select an appropriate
> - *	default.
> - */
> -
> -	if (!(sock = sock_alloc())) 
> -	{
> +	sock = sock_alloc();
> +	if (!sock) {
>  		printk(KERN_WARNING "socket: no more sockets\n");
> -		i = -ENFILE;		/* Not exactly a match, but its the
> +		err = -ENFILE;		/* Not exactly a match, but its the
>  					   closest posix thing */
>  		goto out;
>  	}
>  
>  	sock->type  = type;
> +	sock->owner = npf->owner;
>  
> -	if ((i = net_families[family]->create(sock, protocol)) < 0) 
> -	{
> +	if ((err = npf->create(sock, protocol)) < 0) {
>  		sock_release(sock);
>  		goto out;
>  	}
> @@ -1036,7 +1037,7 @@
>  
>  out:
>  	net_family_read_unlock();
> -	return i;
> +	return err;
>  }
>  
>  asmlinkage long sys_socket(int family, int type, int protocol)
> @@ -1201,6 +1202,9 @@
>  	newsock->type = sock->type;
>  	newsock->ops = sock->ops;
>  
> +	try_module_get(sock->owner);
> +	newsock->owner = sock->owner;
> +	
>  	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
>  	if (err < 0)
>  		goto out_release;
-- 
Max Krasnyansky <maxk@qualcomm.com>
Qualcomm


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH/RFC] New module refcounting for net_proto_family
@ 2002-12-18 15:25 Max Krasnyansky
  2002-12-19 16:05 ` Max Krasnyansky
  0 siblings, 1 reply; 40+ messages in thread
From: Max Krasnyansky @ 2002-12-18 15:25 UTC (permalink / raw)
  To: linux-kernel

Hi Folks,

It seems that new module code is going to stay with us at least for a
while :).
So it's probably time to start fixing old interfaces to use new
refcounting scheme.

Here is a patch for sock_create() and stuff that uses net_proto_family
interface. Tested with modified af_bluetooth.c and seems to work fine.
Other families are unaffected for now because their owner field == NULL.

If people are ok with this aproach I will also fix af_unix and other
families and push all fixes into my BK tree.

(URL in case if my mailer messed up spaces
http://bluez.sourceforge.net/patches/npf_refcnt_patch-2.5.52.gz)


# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or
higher.
# This patch includes the following deltas:
#	           ChangeSet	1.888   -> 1.889  
#	        net/socket.c	1.39    -> 1.40   
#	 include/linux/net.h	1.7     -> 1.8    
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/12/17	maxk@qualcomm.com	1.889
# Convert generic socket code to new module refcounting.
# --------------------------------------------
#
diff -Nru a/include/linux/net.h b/include/linux/net.h
--- a/include/linux/net.h	Tue Dec 17 20:02:04 2002
+++ b/include/linux/net.h	Tue Dec 17 20:02:04 2002
@@ -76,6 +76,8 @@
 
 	short			type;
 	unsigned char		passcred;
+
+	struct module           *owner;
 };
 
 struct scm_cookie;
@@ -124,6 +126,8 @@
 	short	authentication;
 	short	encryption;
 	short	encrypt_net;
+
+	struct  module *owner;
 };
 
 struct net_proto 
diff -Nru a/net/socket.c b/net/socket.c
--- a/net/socket.c	Tue Dec 17 20:02:04 2002
+++ b/net/socket.c	Tue Dec 17 20:02:04 2002
@@ -470,6 +470,8 @@
 
 	sock = SOCKET_I(inode);
 
+	sock->owner = NULL;
+	
 	inode->i_mode = S_IFSOCK|S_IRWXUGO;
 	inode->i_sock = 1;
 	inode->i_uid = current->fsuid;
@@ -517,6 +519,8 @@
 		return;
 	}
 	sock->file=NULL;
+
+	module_put(sock->owner);
 }
 
 static int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, int size)
@@ -964,8 +968,9 @@
 
 int sock_create(int family, int type, int protocol, struct socket
**res)
 {
-	int i;
+	struct net_proto_family *npf;
 	struct socket *sock;
+	int err;
 
 	/*
 	 *	Check protocol is in range
@@ -990,14 +995,8 @@
 	}
 		
 #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
-	/* Attempt to load a protocol module if the find failed. 
-	 * 
-	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
-	 * requested real, full-featured networking support upon
configuration.
-	 * Otherwise module support will break!
-	 */
-	if (net_families[family]==NULL)
-	{
+	/* Attempt to load a protocol module if the find failed. */
+	if (net_families[family]==NULL) {
 		char module_name[30];
 		sprintf(module_name,"net-pf-%d",family);
 		request_module(module_name);
@@ -1005,29 +1004,31 @@
 #endif
 
 	net_family_read_lock();
-	if (net_families[family] == NULL) {
-		i = -EAFNOSUPPORT;
+
+	npf = net_families[family];
+	if (!npf || !try_module_get(npf->owner)) {
+		err = -EAFNOSUPPORT;
 		goto out;
 	}
+	
+	/*
+	 * Allocate the socket and allow the family to set things up. if
+	 * the protocol is 0, the family is instructed to select an
appropriate
+	 * default.
+ 	 */
 
-/*
- *	Allocate the socket and allow the family to set things up. if
- *	the protocol is 0, the family is instructed to select an appropriate
- *	default.
- */
-
-	if (!(sock = sock_alloc())) 
-	{
+	sock = sock_alloc();
+	if (!sock) {
 		printk(KERN_WARNING "socket: no more sockets\n");
-		i = -ENFILE;		/* Not exactly a match, but its the
+		err = -ENFILE;		/* Not exactly a match, but its the
 					   closest posix thing */
 		goto out;
 	}
 
 	sock->type  = type;
+	sock->owner = npf->owner;
 
-	if ((i = net_families[family]->create(sock, protocol)) < 0) 
-	{
+	if ((err = npf->create(sock, protocol)) < 0) {
 		sock_release(sock);
 		goto out;
 	}
@@ -1036,7 +1037,7 @@
 
 out:
 	net_family_read_unlock();
-	return i;
+	return err;
 }
 
 asmlinkage long sys_socket(int family, int type, int protocol)
@@ -1201,6 +1202,9 @@
 	newsock->type = sock->type;
 	newsock->ops = sock->ops;
 
+	try_module_get(sock->owner);
+	newsock->owner = sock->owner;
+	
 	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
 	if (err < 0)
 		goto out_release;

-- 
Max Krasnyansky <maxk@qualcomm.com>
Qualcomm


^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2003-02-26 20:15 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-12-26  8:11 [PATCH/RFC] New module refcounting for net_proto_family Max Krasnyansky
2003-01-02 11:43 ` Max Krasnyansky
2003-01-03  8:24   ` David S. Miller
2003-01-20  3:22   ` Max Krasnyansky
2003-01-21 11:03     ` David S. Miller
2003-01-21 19:42       ` Max Krasnyansky
2003-01-21 19:36         ` David S. Miller
2003-02-07  9:48     ` David S. Miller
2003-02-07 23:34       ` Max Krasnyansky
2003-02-08  8:44         ` David S. Miller
2003-02-18  3:46     ` David S. Miller
2003-02-18 18:50       ` Max Krasnyansky
2003-02-18 21:09         ` Jean Tourrilhes
2003-02-19  3:54         ` Rusty Russell
2003-02-19  7:04           ` David S. Miller
2003-02-19 18:03             ` Max Krasnyansky
2003-02-19 20:31             ` Roman Zippel
2003-02-19 17:45           ` Max Krasnyansky
2003-02-20  1:21             ` Rusty Russell
2003-02-20 17:38               ` Max Krasnyansky
2003-02-21  0:30                 ` Rusty Russell
2003-02-21  1:17                   ` Max Krasnyansky
2003-02-21  8:45                     ` Christoph Hellwig
2003-02-21 17:44                       ` Max Krasnyansky
2003-02-24  1:01                     ` Rusty Russell
2003-02-24 19:35                       ` Max Krasnyansky
2003-02-25  5:02                         ` Rusty Russell
2003-02-26 20:21                           ` Max Krasnyansky
2003-01-07  9:21 ` David S. Miller
2003-01-09 20:45   ` Max Krasnyansky
2003-01-09 23:53     ` David S. Miller
  -- strict thread matches above, loose matches on Subject: below --
2003-02-20 17:52 Max Krasnyansky
2002-12-19 23:08 Jean Tourrilhes
2002-12-19 23:23 ` Max Krasnyansky
2002-12-18 15:25 Max Krasnyansky
2002-12-19 16:05 ` Max Krasnyansky
2002-12-19 19:28   ` Alan Cox
2002-12-19 19:12     ` David S. Miller
2002-12-19 22:17       ` Max Krasnyansky
2002-12-21  6:54   ` David S. Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).