All of lore.kernel.org
 help / color / mirror / Atom feed
* PPP-over-L2TP kernel support, new patch for review
@ 2004-09-20 21:11 James Chapman
  2004-09-20 21:17 ` David S. Miller
  0 siblings, 1 reply; 15+ messages in thread
From: James Chapman @ 2004-09-20 21:11 UTC (permalink / raw)
  To: netdev; +Cc: Martijn van Oosterhout, mostrows

[-- Attachment #1: Type: text/plain, Size: 1251 bytes --]

Attached is a revised version of the new PPP over L2TP support for
review. Thanks DaveM and Herbert for comments so far. The following
comments have been addressed in this new version:

- use Linux list macros for all lists
- split sockaddr_pppox into separate sockaddr_pppoe and
  sockaddr_pppol2tp structs

I've split the patch into 3 diffs:

1. sockaddr_pppoe.diff  - fix sockaddr_pppox issue
2. if_pppox.h_ws.diff	- fixup whitespace formatting
3. pppol2tp-2.diff	- add PPPoL2TP

Please also check the following FIXMEs in pppol2tp.c. If these aren't
issues, I'll remove the FIXMEs and submit a new patch.

- pppol2tp_data_ready()
  lock the socket when walking sk->sk_receive_queue?
- pppol2tp_build_l2tp_header()
  unaligned accesses?
- pppol2tp_xmit()
  handle skb fragments?
- pppol2tp_create()
  sk_set_owner() - what is the real problem here?
- pppol2tp_session_setsockopt()
  change ppp channel's hdrlen on the fly?

Also, since submitting the previous version, I've made a few internal
L2TP changes which are included in the pppol2tp patch. Most relevant
is the addition of a using_ipsec flag - I'm trying to return a
read-only indicator to userspace whether the L2TP tunnel is protected
by IPSEC. Is this the right way to do it?

/james




[-- Attachment #2: sockaddr_pppoe.diff --]
[-- Type: application/octet-stream, Size: 4269 bytes --]

diff -Naur linux-2.6.8.1.orig/drivers/net/pppoe.c linux-2.6.8.1/drivers/net/pppoe.c
--- linux-2.6.8.1.orig/drivers/net/pppoe.c	2004-08-14 11:55:35.000000000 +0100
+++ linux-2.6.8.1/drivers/net/pppoe.c	2004-09-20 10:52:14.000000000 +0100
@@ -201,9 +201,9 @@
 	return po;
 }
 
-static inline struct pppox_opt *get_item_by_addr(struct sockaddr_pppox *sp)
+static inline struct pppox_opt *get_item_by_addr(struct sockaddr_pppoe *sp)
 {
-	return get_item(sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote);
+	return get_item(sp->pppoe.sid, sp->pppoe.remote);
 }
 
 static inline int set_item(struct pppox_opt *po)
@@ -572,7 +572,7 @@
 {
 	struct sock *sk = sock->sk;
 	struct net_device *dev = NULL;
-	struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr;
+	struct sockaddr_pppoe *sp = (struct sockaddr_pppoe *) uservaddr;
 	struct pppox_opt *po = pppox_sk(sk);
 	int error;
 
@@ -584,12 +584,12 @@
 
 	/* Check for already bound sockets */
 	error = -EBUSY;
-	if ((sk->sk_state & PPPOX_CONNECTED) && sp->sa_addr.pppoe.sid)
+	if ((sk->sk_state & PPPOX_CONNECTED) && sp->pppoe.sid)
 		goto end;
 
 	/* Check for already disconnected sockets, on attempts to disconnect */
 	error = -EALREADY;
-	if ((sk->sk_state & PPPOX_DEAD) && !sp->sa_addr.pppoe.sid )
+	if ((sk->sk_state & PPPOX_DEAD) && !sp->pppoe.sid )
 		goto end;
 
 	error = 0;
@@ -609,8 +609,8 @@
 	}
 
 	/* Don't re-bind if sid==0 */
-	if (sp->sa_addr.pppoe.sid != 0) {
-		dev = dev_get_by_name(sp->sa_addr.pppoe.dev);
+	if (sp->pppoe.sid != 0) {
+		dev = dev_get_by_name(sp->pppoe.dev);
 
 		error = -ENODEV;
 		if (!dev)
@@ -622,7 +622,7 @@
 			goto err_put;
 
 		memcpy(&po->pppoe_pa,
-		       &sp->sa_addr.pppoe,
+		       &sp->pppoe,
 		       sizeof(struct pppoe_addr));
 
 		error = set_item(po);
@@ -642,7 +642,7 @@
 		sk->sk_state = PPPOX_CONNECTED;
 	}
 
-	po->num = sp->sa_addr.pppoe.sid;
+	po->num = sp->pppoe.sid;
 
  end:
 	release_sock(sk);
@@ -659,12 +659,12 @@
 static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr,
 		  int *usockaddr_len, int peer)
 {
-	int len = sizeof(struct sockaddr_pppox);
-	struct sockaddr_pppox sp;
+	int len = sizeof(struct sockaddr_pppoe);
+	struct sockaddr_pppoe sp;
 
 	sp.sa_family	= AF_PPPOX;
 	sp.sa_protocol	= PX_PROTO_OE;
-	memcpy(&sp.sa_addr.pppoe, &pppox_sk(sock->sk)->pppoe_pa,
+	memcpy(&sp.pppoe, &pppox_sk(sock->sk)->pppoe_pa,
 	       sizeof(struct pppoe_addr));
 
 	memcpy(uaddr, &sp, len);
@@ -740,7 +740,7 @@
 		err = -EFAULT;
 		if (copy_from_user(&po->pppoe_relay,
 				   (void __user *)arg,
-				   sizeof(struct sockaddr_pppox)))
+				   sizeof(struct sockaddr_pppoe)))
 			break;
 
 		err = -EINVAL;
diff -Naur linux-2.6.8.1.orig/include/linux/if_pppox.h linux-2.6.8.1/include/linux/if_pppox.h
--- linux-2.6.8.1.orig/include/linux/if_pppox.h	2004-08-14 11:54:50.000000000 +0100
+++ linux-2.6.8.1/include/linux/if_pppox.h	2004-09-20 12:19:24.000000000 +0100
@@ -51,15 +51,26 @@
  */ 
 #define PX_PROTO_OE    0 /* Currently just PPPoE */
 #define PX_MAX_PROTO   1	
- 
+
+/* The use of a union isn't viable because the size of this struct
+ * must stay fixed over time -- applications use sizeof(struct
+ * sockaddr_pppox) to fill it. Use protocol specific sockaddr types
+ * instead.
+ */ 
 struct sockaddr_pppox { 
        sa_family_t     sa_family;            /* address family, AF_PPPOX */ 
        unsigned int    sa_protocol;          /* protocol identifier */ 
        union{ 
                struct pppoe_addr       pppoe; 
        }sa_addr; 
-}__attribute__ ((packed)); 
+}__attribute__ ((packed)) __deprecated; 
 
+/* Must be binary-compatible with sockaddr_pppox for backwards compatabilty */
+struct sockaddr_pppoe { 
+	sa_family_t     sa_family;	/* address family, AF_PPPOX */ 
+	unsigned int    sa_protocol;    /* protocol identifier */ 
+	struct pppoe_addr pppoe; 
+}__attribute__ ((packed)); 
 
 /*********************************************************************
  *
@@ -115,7 +126,7 @@
 struct pppoe_opt {
 	struct net_device      *dev;	  /* device associated with socket*/
 	struct pppoe_addr	pa;	  /* what this socket is bound to*/
-	struct sockaddr_pppox	relay;	  /* what socket data will be
+	struct sockaddr_pppoe	relay;	  /* what socket data will be
 					     relayed to (PPPoE relaying) */
 };
 

[-- Attachment #3: if_pppox.h_ws.diff --]
[-- Type: application/octet-stream, Size: 5141 bytes --]

diff -Naur linux-2.6.8.1.orig/include/linux/if_pppox.h linux-2.6.8.1.ws/include/linux/if_pppox.h
--- linux-2.6.8.1.orig/include/linux/if_pppox.h	2004-09-20 13:19:50.000000000 +0100
+++ linux-2.6.8.1.ws/include/linux/if_pppox.h	2004-09-20 13:18:20.000000000 +0100
@@ -1,6 +1,6 @@
 /***************************************************************************
  * Linux PPP over X - Generic PPP transport layer sockets
- * Linux PPP over Ethernet (PPPoE) Socket Implementation (RFC 2516) 
+ * Linux PPP over Ethernet (PPPoE) Socket Implementation (RFC 2516)
  *
  * This file supplies definitions required by the PPP over Ethernet driver
  * (pppox.c).  All version information wrt this file is located in pppox.c
@@ -20,7 +20,7 @@
 #include <asm/types.h>
 #include <asm/byteorder.h>
 
-#ifdef  __KERNEL__
+#ifdef	__KERNEL__
 #include <linux/if_ether.h>
 #include <linux/if.h>
 #include <linux/netdevice.h>
@@ -36,41 +36,41 @@
 #define PF_PPPOX	AF_PPPOX
 #endif /* !(AF_PPPOX) */
 
-/************************************************************************ 
- * PPPoE addressing definition 
- */ 
-typedef __u16 sid_t; 
-struct pppoe_addr{ 
-       sid_t           sid;                    /* Session identifier */ 
-       unsigned char   remote[ETH_ALEN];       /* Remote address */ 
-       char            dev[IFNAMSIZ];          /* Local device to use */ 
-}; 
- 
-/************************************************************************ 
- * Protocols supported by AF_PPPOX 
- */ 
-#define PX_PROTO_OE    0 /* Currently just PPPoE */
-#define PX_MAX_PROTO   1	
+/************************************************************************
+ * PPPoE addressing definition
+ */
+typedef __u16 sid_t;
+struct pppoe_addr {
+	sid_t		sid;			/* Session identifier */
+	unsigned char	remote[ETH_ALEN];	/* Remote address */
+	char		dev[IFNAMSIZ];		/* Local device to use */
+};
+
+/************************************************************************
+ * Protocols supported by AF_PPPOX
+ */
+#define PX_PROTO_OE	0 /* Currently just PPPoE */
+#define PX_MAX_PROTO	1
 
 /* The use of a union isn't viable because the size of this struct
  * must stay fixed over time -- applications use sizeof(struct
  * sockaddr_pppox) to fill it. Use protocol specific sockaddr types
  * instead.
- */ 
-struct sockaddr_pppox { 
-       sa_family_t     sa_family;            /* address family, AF_PPPOX */ 
-       unsigned int    sa_protocol;          /* protocol identifier */ 
-       union{ 
-               struct pppoe_addr       pppoe; 
-       }sa_addr; 
-}__attribute__ ((packed)) __deprecated; 
+ */
+struct sockaddr_pppox {
+	sa_family_t	sa_family;		/* address family, AF_PPPOX */
+	unsigned int	sa_protocol;		/* protocol identifier */
+	union {
+		struct pppoe_addr	pppoe;
+	} sa_addr;
+}__attribute__ ((packed)) __deprecated;
 
 /* Must be binary-compatible with sockaddr_pppox for backwards compatabilty */
-struct sockaddr_pppoe { 
-	sa_family_t     sa_family;	/* address family, AF_PPPOX */ 
-	unsigned int    sa_protocol;    /* protocol identifier */ 
-	struct pppoe_addr pppoe; 
-}__attribute__ ((packed)); 
+struct sockaddr_pppoe {
+	sa_family_t	sa_family;	/* address family, AF_PPPOX */
+	unsigned int	sa_protocol;	/* protocol identifier */
+	struct pppoe_addr pppoe;
+}__attribute__ ((packed));
 
 /*********************************************************************
  *
@@ -100,11 +100,11 @@
 #define PTT_AC_NAME	__constant_htons(0x0102)
 #define PTT_HOST_UNIQ	__constant_htons(0x0103)
 #define PTT_AC_COOKIE	__constant_htons(0x0104)
-#define PTT_VENDOR 	__constant_htons(0x0105)
+#define PTT_VENDOR	__constant_htons(0x0105)
 #define PTT_RELAY_SID	__constant_htons(0x0110)
-#define PTT_SRV_ERR     __constant_htons(0x0201)
-#define PTT_SYS_ERR  	__constant_htons(0x0202)
-#define PTT_GEN_ERR  	__constant_htons(0x0203)
+#define PTT_SRV_ERR	__constant_htons(0x0201)
+#define PTT_SYS_ERR	__constant_htons(0x0202)
+#define PTT_GEN_ERR	__constant_htons(0x0203)
 
 struct pppoe_hdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
@@ -124,7 +124,7 @@
 
 #ifdef __KERNEL__
 struct pppoe_opt {
-	struct net_device      *dev;	  /* device associated with socket*/
+	struct net_device	*dev;	  /* device associated with socket*/
 	struct pppoe_addr	pa;	  /* what this socket is bound to*/
 	struct sockaddr_pppoe	relay;	  /* what socket data will be
 					     relayed to (PPPoE relaying) */
@@ -162,12 +162,12 @@
 
 /* PPPoX socket states */
 enum {
-    PPPOX_NONE		= 0,  /* initial state */
-    PPPOX_CONNECTED	= 1,  /* connection established ==TCP_ESTABLISHED */
-    PPPOX_BOUND		= 2,  /* bound to ppp device */
-    PPPOX_RELAY		= 4,  /* forwarding is enabled */
-    PPPOX_ZOMBIE	= 8,  /* dead, but still bound to ppp device */
-    PPPOX_DEAD		= 16  /* dead, useless, please clean me up!*/
+	PPPOX_NONE	= 0,  /* initial state */
+	PPPOX_CONNECTED	= 1,  /* connection established ==TCP_ESTABLISHED */
+	PPPOX_BOUND	= 2,  /* bound to ppp device */
+	PPPOX_RELAY	= 4,  /* forwarding is enabled */
+	PPPOX_ZOMBIE	= 8,  /* dead, but still bound to ppp device */
+	PPPOX_DEAD	= 16  /* dead, useless, please clean me up!*/
 };
 
 #endif /* __KERNEL__ */

[-- Attachment #4: pppol2tp-2.diff --]
[-- Type: application/octet-stream, Size: 70565 bytes --]

diff -Naur linux-2.6.8.1.orig/drivers/net/Kconfig linux-2.6.8.1/drivers/net/Kconfig
--- linux-2.6.8.1.orig/drivers/net/Kconfig	2004-08-14 11:56:00.000000000 +0100
+++ linux-2.6.8.1/drivers/net/Kconfig	2004-09-20 11:18:13.000000000 +0100
@@ -2481,6 +2481,19 @@
 	  which can lead to bad results if the ATM peer loses state and
 	  changes its encapsulation unilaterally.
 
+config PPPOL2TP
+	tristate "PPP over L2TP (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && PPP
+	help
+	  Support for PPP-over-L2TP socket family. L2TP is a protocol
+	  used by ISPs and enterprises to tunnel PPP traffic over UDP
+	  tunnels. L2TP is replacing PPTP for VPN uses.
+
+	  This kernel component handles only L2TP data packets: a
+	  userland daemon handles L2TP the control protocol (tunnel
+	  and session setup). One such daemon is OpenL2TP
+	  (http://openl2tp.sourceforge.net/).
+
 config SLIP
 	tristate "SLIP (serial line) support"
 	depends on NETDEVICES
diff -Naur linux-2.6.8.1.orig/drivers/net/Makefile linux-2.6.8.1/drivers/net/Makefile
--- linux-2.6.8.1.orig/drivers/net/Makefile	2004-08-14 11:55:09.000000000 +0100
+++ linux-2.6.8.1/drivers/net/Makefile	2004-09-20 11:17:47.000000000 +0100
@@ -101,6 +101,7 @@
 obj-$(CONFIG_PPP_DEFLATE) += ppp_deflate.o
 obj-$(CONFIG_PPP_BSDCOMP) += bsd_comp.o
 obj-$(CONFIG_PPPOE) += pppox.o pppoe.o
+obj-$(CONFIG_PPPOL2TP) += pppox.o pppol2tp.o
 
 obj-$(CONFIG_SLIP) += slip.o
 ifeq ($(CONFIG_SLIP_COMPRESSED),y)
diff -Naur linux-2.6.8.1.orig/drivers/net/pppol2tp.c linux-2.6.8.1/drivers/net/pppol2tp.c
--- linux-2.6.8.1.orig/drivers/net/pppol2tp.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.8.1/drivers/net/pppol2tp.c	2004-09-20 11:37:58.000000000 +0100
@@ -0,0 +1,2275 @@
+/** -*- linux-c -*- ***********************************************************
+ * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
+ *
+ * PPPoX    --- Generic PPP encapsulation socket family
+ * PPPoL2TP --- PPP over L2TP (RFC 2661)
+ *
+ *
+ * Version:    0.3.0
+ *
+ * 251003 :	Copied from pppoe.c version 0.6.9.
+ *
+ * Author:	Martijn van Oosterhout <kleptog@svana.org>
+ * Contributors:
+ *		Michal Ostrowski <mostrows@speakeasy.net>
+ *		Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
+ *		David S. Miller (davem@redhat.com)
+ *		James Chapman (jchapman@katalix.com)
+ *
+ * License:
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* This driver handles only L2TP data frames; control frames are handled by a
+ * userspace application.
+ *
+ * To send data in an L2TP session, userspace opens a PPPoL2TP socket and
+ * attaches it to a bound UDP socket with local tunnel_id / session_id and
+ * peer tunnel_id / session_id set. Data can then be sent or received using
+ * regular socket sendmsg() / recvmsg() calls. Kernel parameters of the socket
+ * can be read or modified using ioctl() or [gs]etsockopt() calls.
+ *
+ * When a PPPoL2TP socket is connected with local and peer session_id values
+ * zero, the socket is treated as a special tunnel management socket.
+ *
+ * Here's example userspace code to create a socket for sending/receiving data
+ * over an L2TP session:-
+ *
+ *	struct sockaddr_pppol2tp sax;
+ *	int fd;
+ *	int session_fd;
+ *	
+ *	fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
+ *
+ *	sax.sa_family = AF_PPPOX;
+ *	sax.sa_protocol = PX_PROTO_OL2TP;
+ *	sax.pppol2tp.fd = tunnel_fd; // bound UDP socket
+ *	sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
+ *	sax.pppol2tp.addr.sin_port = addr->sin_port;
+ *	sax.pppol2tp.addr.sin_family = AF_INET;
+ *	sax.pppol2tp.s_tunnel  = tunnel_id;
+ *	sax.pppol2tp.s_session = session_id;
+ *	sax.pppol2tp.d_tunnel  = peer_tunnel_id;
+ *	sax.pppol2tp.d_session = peer_session_id;
+ *  
+ *	session_fd = connect(fd, (struct sockaddr *)&sax, sizeof(sax));
+ *
+ */
+
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/version.h>
+
+#include <asm/uaccess.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/udp.h>
+#include <linux/if_pppox.h>
+#include <net/sock.h>
+#include <linux/ppp_channel.h>
+#include <linux/ppp_defs.h>
+#include <linux/if_ppp.h>
+#include <linux/file.h>
+#include <linux/hash.h>
+#include <linux/proc_fs.h>
+#include <net/dst.h>
+
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+
+#define PPPOL2TP_DRV_VERSION	"V0.5"
+
+/* Developer debug code. */
+#if 0
+#define DEBUG	/* Define to compile in very verbose developer debug */
+#endif
+
+/* Timeouts are specified in milliseconds to/from userspace */
+#define JIFFIES_TO_MS(t) ((t) * 1000 / HZ)
+#define MS_TO_JIFFIES(j) ((j * HZ) / 1000)
+
+/* L2TP header constants */
+#define L2TP_HDRFLAG_T	   0x8000
+#define L2TP_HDRFLAG_L	   0x4000
+#define L2TP_HDRFLAG_S	   0x0800
+#define L2TP_HDRFLAG_O	   0x0200
+#define L2TP_HDRFLAG_P	   0x0100
+
+#define L2TP_HDR_VER_MASK  0x000F
+#define L2TP_HDR_VER	   0x0002
+
+/* Space for UDP, L2TP and PPP headers, plus some slack */
+#define PPPOL2TP_HEADER_OVERHEAD	40
+
+/* Just some random numbers */
+#define L2TP_TUNNEL_MAGIC   0x42114DDA
+#define L2TP_SESSION_MAGIC  0x0C04EB7D
+
+#define PPPOL2TP_HASH_BITS 4
+#define PPPOL2TP_HASH_SIZE (1 << PPPOL2TP_HASH_BITS)
+
+/* Default trace flags */
+#ifdef DEBUG
+#define PPPOL2TP_DEFAULT_DEBUG_FLAGS	-1
+#else
+#define PPPOL2TP_DEFAULT_DEBUG_FLAGS	0
+#endif
+
+
+/* Debug kernel message control.
+ * Verbose debug messages (L2TP_MSG_DEBUG flag) are optionally compiled in.
+ */
+#ifdef DEBUG
+#define DPRINTK(_mask, _fmt, args...)					\
+	do {								\
+		if ((_mask) & PPPOL2TP_MSG_DEBUG)			\
+			printk(KERN_DEBUG "PPPOL2TP %s: " _fmt,		\
+			       __FUNCTION__, ##args);			\
+	} while(0)
+#else
+#define DPRINTK(_mask, srgs...) do { } while(0)
+#endif /* DEBUG */
+
+#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
+	do {								\
+		if ((_mask) & (_type))					\
+			printk(_lvl "PPPOL2TP: " _fmt, ##args);		\
+	} while(0)
+
+/* Extra driver debug. Should only be enabled by developers working on
+ * this driver. 
+ */
+#ifdef DEBUG
+#define ENTER_FUNCTION	 printk(KERN_DEBUG "PPPOL2TP: --> %s\n", __FUNCTION__)
+#define EXIT_FUNCTION	 printk(KERN_DEBUG "PPPOL2TP: <-- %s\n", __FUNCTION__)
+#else
+#define ENTER_FUNCTION	 do { } while(0)
+#define EXIT_FUNCTION	 do { } while(0)
+#endif
+
+struct pppol2tp_tunnel;
+
+/* Describes a session. It is the sk_user_data field in the PPPoL2TP
+ * socket. Contains information to determine incoming packets and transmit
+ * outgoing ones.
+ */
+struct pppol2tp_session
+{
+	int			magic;		/* should be 
+						 * L2TP_SESSION_MAGIC */
+	int			owner;		/* pid that opened the socket */
+	
+	struct sock		*sock;		/* Pointer to the session
+						 * PPPoX socket */
+	struct sock		*tunnel_sock;	/* Pointer to the tunnel UDP 
+						 * socket */
+	
+	struct pppol2tp_addr	tunnel_addr;	/* Description of tunnel */
+
+	struct pppol2tp_tunnel	*tunnel;	/* back pointer to tunnel 
+						 * context */
+	
+	char			name[20];	/* "sess xxxxx/yyyyy", where 
+						 * x=tunnel_id, y=session_id */
+	int			mtu;
+	int			mru;
+	int			flags;		/* accessed by PPPIOCGFLAGS. 
+						 * Unused. */
+	int			recv_seq:1;	/* expect receive packets with 
+						 * sequence numbers? */
+	int			send_seq:1;	/* send packets with sequence 
+						 * numbers? */
+	int			lns_mode:1;	/* behave as LNS? LAC enables 
+						 * sequence numbers under 
+						 * control of LNS. */
+	int			debug;		/* bitmask of debug message 
+						 * categories */
+	int			reorder_timeout; /* configured reorder timeout 
+						  * (in jiffies) */
+	u16			nr;		/* session NR state (receive) */
+	u16			ns;		/* session NR state (send) */
+	struct pppol2tp_ioc_stats stats;
+	struct hlist_node	hlist;		/* Hash list node */
+};
+
+/* The sk_user_data field of the tunnel's UDP socket. It contains info to track
+ * all the associated sessions so incoming packets can be sorted out
+ */
+struct pppol2tp_tunnel
+{
+	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
+	
+	struct proto		old_proto;	/* original proto */
+	struct proto		l2tp_proto;	/* L2TP proto */
+	rwlock_t		hlist_lock;	/* protect session_hlist */
+	struct hlist_head	session_hlist[PPPOL2TP_HASH_SIZE]; 
+						/* hashed list of sessions, 
+						 * hashed by id */
+	int			debug;		/* bitmask of debug message 
+						 * categories */
+	char			name[12];	/* "tunl xxxxx" */
+	struct pppol2tp_ioc_stats stats;
+	
+	void (*old_data_ready)(struct sock *, int);
+	void (*old_sk_destruct)(struct sock *);
+
+	struct sock		*sock;		/* Parent socket */	
+	struct list_head	list;		/* Keep a list of all open 
+						 * prepared sockets */
+
+	atomic_t		session_count;
+};
+
+/* Number of bytes to build transmit L2TP headers.
+ * Unfortunately the size is different depending on whether sequence numbers
+ * are enabled.
+ */
+#define PPPOL2TP_L2TP_HDR_SIZE_SEQ		10
+#define PPPOL2TP_L2TP_HDR_SIZE_NOSEQ		6
+
+
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
+
+static struct ppp_channel_ops pppol2tp_chan_ops = { pppol2tp_xmit , NULL };
+static struct proto_ops pppol2tp_ops;
+static LIST_HEAD(pppol2tp_tunnel_list);
+
+/* Macros to derive session/tunnel context pointers from a socket. */
+#define SOCK_2_SESSION(sock, session, err, errval, label, quiet) \
+	session = (struct pppol2tp_session *)((sock)->sk_user_data);  \
+	if (!session || session->magic != L2TP_SESSION_MAGIC) {	       \
+		if (!quiet) \
+			printk(KERN_ERR "%s: %s:%d: BAD SESSION MAGIC " \
+			       "(" #sock "=%p) session=%p magic=%x\n", \
+			       __FUNCTION__, __FILE__, __LINE__, sock, \
+			       session, session ? session->magic : 0); \
+		err = errval; \
+		goto label; \
+	}
+	
+#define SOCK_2_TUNNEL(sock, tunnel, err, errval, label, quiet) \
+	tunnel = (struct pppol2tp_tunnel *)((sock)->sk_user_data);	 \
+	if (!tunnel || tunnel->magic != L2TP_TUNNEL_MAGIC) {	     \
+		if (!quiet) \
+			printk(KERN_ERR "%s: %s:%d: BAD TUNNEL MAGIC " \
+			       "(" #sock "=%p) tunnel=%p magic=%x\n", \
+			       __FUNCTION__, __FILE__, __LINE__, sock, \
+			       tunnel, tunnel ? tunnel->magic : 0); \
+		err = errval; \
+		goto label; \
+	}
+
+/* Session hash list.
+ * The session_id SHOULD be random according to RFC2661, but several
+ * L2TP implementations (Cisco and Microsoft) use incrementing
+ * session_ids.  So we do a real hash on the session_id, rather than a
+ * simple bitmask.
+ */
+static inline struct hlist_head *
+pppol2tp_session_id_hash(struct pppol2tp_tunnel *tunnel, u16 session_id)
+{
+	unsigned long hash_val = (unsigned long) session_id;
+	return &tunnel->session_hlist[hash_long(hash_val, PPPOL2TP_HASH_BITS)];
+}
+
+/* Lookup a session by id
+ */
+static struct pppol2tp_session *
+pppol2tp_session_find(struct pppol2tp_tunnel *tunnel, u16 session_id)
+{
+	struct hlist_head *session_list = 
+		pppol2tp_session_id_hash(tunnel, session_id);
+	struct hlist_node *tmp;
+	struct hlist_node *walk;
+	struct pppol2tp_session *session;
+
+	hlist_for_each_safe(walk, tmp, session_list) {
+		session = hlist_entry(walk, struct pppol2tp_session, hlist);
+		if (session->tunnel_addr.s_session == session_id) {
+			return session;
+		}
+	}
+
+	return NULL;
+}
+
+/* Easy way to locate features not yet implemented 
+ */
+static void pppol2tp_warn_not_yet_implemented(int debug_mask, const char *what)
+{
+	PRINTK(debug_mask, PPPOL2TP_MSG_CONTROL, KERN_WARNING, 
+	       "feature %s not yet implemented\n", what);
+}
+
+/*****************************************************************************
+ * Receive data handling
+ *****************************************************************************/
+
+/* Internal receive frame. Do the real work of receiving an L2TP data frame
+ * here.
+ * Returns 0 if the packet was a data packet and was successfully passed on.
+ * Returns 1 if the packet was not a good data packet and could not be
+ * forwarded.  All such packets are passed up to userspace to deal with.
+ */
+static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb)
+{
+	struct pppox_opt *po;
+	struct pppol2tp_session *session = NULL;
+	int error = 0;
+	struct pppol2tp_tunnel *tunnel;
+	struct sock *session_sock = NULL;
+	unsigned char *ptr;
+	u16 hdrflags;
+	u16 tunnel_id, session_id;
+	int length;
+	int result;
+
+	ENTER_FUNCTION;
+
+	SOCK_2_TUNNEL(sock, tunnel, error, 1, end, 0);
+
+	/* Short packet? */
+	if (skb->len < sizeof(struct udphdr)) {
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO, 
+		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
+		goto end;
+	}
+
+	/* Point to L2TP header */
+	ptr = skb->data + sizeof(struct udphdr);
+
+	/* Get L2TP header flags */
+	hdrflags = ntohs(*(u16*)ptr);
+
+	/* Trace packet contents, if enabled */
+	if (tunnel->debug & PPPOL2TP_MSG_DATA) {
+		printk(KERN_DEBUG "%s: recv: " KERN_DEBUG, tunnel->name);
+
+		for (length = 0; length < 16; length++)
+			printk(" %02X", ptr[length]);
+		printk("\n");
+	}
+
+	/* Get length of L2TP packet */
+	length = ntohs(skb->h.uh->len) - sizeof(struct udphdr);
+	
+	/* Too short? */
+	if (length < 12) {
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO, 
+		       "%s: recv short L2TP packet (len=%d)\n", tunnel->name, length);
+		goto end;
+	}
+	
+	/* If type is control packet, it is handled by userspace. */
+	if (hdrflags & L2TP_HDRFLAG_T) { 
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG, 
+		       "%s: recv control packet, len=%d\n", tunnel->name, length);
+		goto end;
+	}
+
+	/* Skip flags */
+	ptr += 2;
+	
+	/* If length is present, skip it */
+	if (hdrflags & L2TP_HDRFLAG_L)
+		ptr += 2;
+
+	/* Extract tunnel and session ID */
+	tunnel_id = ntohs(*(u16 *) ptr);
+	ptr += 2;
+	session_id = ntohs(*(u16 *) ptr);
+	ptr += 2;
+
+	/* Find the session context */
+	session = pppol2tp_session_find(tunnel, session_id);
+	if (!session) {
+		/* Not found? Pass to userspace to deal with */
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO, 
+		       "%s: no socket found (%hu/%hu). Passing up.\n", 
+		       tunnel->name, tunnel_id, session_id);
+		goto end;
+	}
+	sock_hold(session->sock);
+
+	DPRINTK(session->debug, "%s: socket rcvbuf alloc=%d\n", 
+		session->name, atomic_read(&sock->sk_rmem_alloc));
+
+	/* The ref count on the socket was increased by the above call since
+	 * we now hold a pointer to the session. Take care to do sock_put()
+	 * when exiting this function from now on...
+	 */
+
+	/* Handle the optional sequence numbers.  If we are the LAC,
+	 * enable/disable sequence numbers under the control of the LNS.  If
+	 * no sequence numbers present but we were expecting them, discard
+	 * frame.
+	 */
+	if (hdrflags & L2TP_HDRFLAG_S) {
+		u16 ns, nr;
+		ns = ntohs(*(u16 *) ptr);
+		ptr += 2;
+		nr = ntohs(*(u16 *) ptr);
+		ptr += 2;
+
+		/* Received a packet with sequence numbers. If we're the LNS,
+		 * check if we sre sending sequence numbers and if not,
+		 * configure it so.
+		 */
+		if ((!session->lns_mode) && (!session->send_seq)) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO, 
+			       "%s: requested to enable seq numbers by LNS\n", 
+			       session->name);
+			session->send_seq = -1;
+		}
+
+		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG, 
+		       "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n", 
+		       session->name, ns, nr, session->nr);
+
+		/* Discard out-of-sequence packets */
+		if (ns != session->nr) {
+			session->stats.rx_oos_packets++;
+			session->stats.rx_errors++;
+			goto discard;
+		}
+
+		/* Bump our Nr */
+		session->nr++;
+		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG, 
+		       "%s: updated nr to %hu\n", session->name, session->nr);
+	} else {
+		/* No sequence numbers.
+		 * If user has configured mandatory sequence numbers, discard.
+		 */
+		if (session->recv_seq) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING, 
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			session->stats.rx_errors++;
+			goto discard;
+		}
+
+		/* If we're the LAC and we're sending sequence numbers, the
+		 * LNS has requested that we no longer send sequence numbers.
+		 * If we're the LNS and we're sending sequence numbers, the
+		 * LAC is broken. Discard the frame.
+		 */
+		if ((!session->lns_mode) && (session->send_seq)) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO, 
+			       "%s: requested to disable seq numbers by LNS\n", 
+			       session->name);
+			session->send_seq = 0;
+		} else if (session->send_seq) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING, 
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			session->stats.rx_errors++;
+			goto discard;
+		}
+	}
+		
+	/* If offset bit set, skip it. */
+	if (hdrflags & L2TP_HDRFLAG_O)
+		ptr += 2 + ntohs(*(u16 *) ptr);
+
+	skb_pull(skb, ptr - skb->data);
+
+	/* Skip PPP header, if present.	 In testing, Microsoft L2TP clients
+	 * don't send the PPP header (PPP header compression enabled), but
+	 * other clients can include the header. So we cope with both cases
+	 * here. The PPP header is always FF03 when using L2TP.
+	 *
+	 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
+	 * the field may be unaligned.
+	 */
+	if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+		skb_pull(skb, 2);
+
+	/* We're about to requeue the skb, so unlink it and return resources
+	 * to its current owner (a socket receive buffer). Also release the
+	 * dst to force a route lookup on the inner IP packet since skb->dst
+	 * currently points to the dst of the UDP tunnel.
+	 */
+	skb_unlink(skb);
+	skb_orphan(skb);
+	dst_release(skb->dst);
+	skb->dst = NULL;
+
+	tunnel->stats.rx_packets++;
+	tunnel->stats.rx_bytes += length;
+	session->stats.rx_packets++;
+	session->stats.rx_bytes += length;
+
+	/* If the socket is bound, send it in to PPP's input queue.  Otherwise
+	 * queue it on the socket.
+	 */
+	session_sock = session->sock;
+	if (session_sock->sk_state & PPPOX_BOUND) {
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG, 
+		       "%s: recv %d byte data frame, passing to ppp\n", 
+		       session->name, length);
+		po = pppox_sk(session_sock);
+		ppp_input(&po->chan, skb);
+	} else {
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO, 
+		       "%s: socket not bound\n", session->name);
+		/* Not bound. Queue it now */
+		sock_queue_rcv_skb(session_sock, skb);
+	}
+
+	result = 0;
+
+out:
+	DPRINTK(session->debug, "calling sock_put; refcnt=%d\n", 
+		session->sock->sk_refcnt.counter);
+	sock_put(session->sock);
+	EXIT_FUNCTION;
+	return result;
+
+discard:
+	DPRINTK(session->debug, "discarding skb, len=%d\n", skb->len);
+	skb_unlink(skb);
+	kfree_skb(skb);
+	result = 0;
+	goto out;
+
+end:
+	EXIT_FUNCTION;
+	return 1;
+}
+
+/* The data_ready hook on the UDP socket. Scan the incoming packet list for
+ * packets to process
+ */
+static void pppol2tp_data_ready(struct sock *sk, int len)
+{
+	int err;
+	struct pppol2tp_tunnel *tunnel;
+	struct sk_buff *skb;
+	int processed = 0;
+	
+	ENTER_FUNCTION;
+	SOCK_2_TUNNEL(sk, tunnel, err, -EBADF, end, 0);
+	
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG, 
+	       "%s: received %d bytes\n", tunnel->name, len);
+	
+	/* FIXME: Do we need to lock the socket here? */
+	skb_queue_walk(&sk->sk_receive_queue, skb) {
+		if (pppol2tp_recv_core(sk, skb)) {
+			/* skb was passed to userspace */
+			processed = 1;
+			PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG, 
+			       "%s: packet passed to userspace\n", 
+			       tunnel->name);
+		} else {
+			PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG, 
+			       "%s: data packet accepted\n", tunnel->name);
+			/* If the packet has been accepted it is the
+			 * responsibility of the receiver (either socket or
+			 * ppp device) to dispose of it.
+			 *
+			 * Also, since recv_core has requeued the packet
+			 * elsewhere, it's not safe to continue this loop, so
+			 * we break
+			 */
+			break;
+		}
+	}
+	if (processed) {
+		DPRINTK(tunnel->debug, "%s: calling old old_data_ready\n", 
+			tunnel->name);
+		tunnel->old_data_ready(sk, len);
+	}
+end:
+	EXIT_FUNCTION;
+	return;
+}
+
+/* Receive message. This is the recvmsg for the PPPoL2TP socket.
+ */
+static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock, 
+			    struct msghdr *msg, size_t len,
+			    int flags)
+{
+	int err = 0;
+	struct sk_buff *skb = NULL;
+	struct sock *sk = sock->sk;
+
+	ENTER_FUNCTION;
+
+	err = -EIO;
+	if (sock->state & PPPOX_BOUND)
+		goto error;
+			
+	msg->msg_namelen = 0;
+	
+	skb=skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+			      flags & MSG_DONTWAIT, &err);
+	if (skb) {
+		err = memcpy_toiovec(msg->msg_iov, (unsigned char *) skb->data,
+				     skb->len);
+		if (err < 0)
+			goto do_skb_free;
+		err = skb->len;
+	}
+do_skb_free:
+	if (skb)
+		kfree_skb(skb);
+error:
+	EXIT_FUNCTION;
+	return err;
+}
+
+/************************************************************************
+ * Transmit handling
+ ***********************************************************************/
+
+/* Internal UDP socket transmission
+ */
+static int pppol2tp_udp_sock_send(struct kiocb *iocb,
+				  struct pppol2tp_session *session, 
+				  struct pppol2tp_tunnel *tunnel,
+				  struct msghdr *msg, int total_len)
+{
+	mm_segment_t fs;
+	int error;
+
+	ENTER_FUNCTION;
+
+	DPRINTK(session->debug, "%s: udp_sendmsg call...\n", session->name);
+
+	/* Set to userspace data segment while we do a sendmsg() call.	We're
+	 * actually calling a userspace API from the kernel here...
+	 */
+	fs = get_fs();
+	set_fs(get_ds());
+
+	/* The actual sendmsg() call... */
+	error = tunnel->old_proto.sendmsg(iocb, session->tunnel_sock, msg, total_len);
+	if (error == -EIOCBQUEUED)
+		error = wait_on_sync_kiocb(iocb);
+
+	/* Back to kernel space */
+	set_fs(fs);
+
+	if (error >= 0) {
+		tunnel->stats.tx_packets++;
+		tunnel->stats.tx_bytes += error;
+		session->stats.tx_packets++;
+		session->stats.tx_bytes += error;
+	} else {
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+	}
+
+	DPRINTK(session->debug, "%s: %s: returning result %d\n", __FUNCTION__, 
+		session->name, error);
+	kfree(msg->msg_iov);
+	kfree(msg);
+	
+	EXIT_FUNCTION;
+	return error;
+}
+
+/* Build an L2TP header for the session into the buffer provided.
+ */
+static int pppol2tp_build_l2tp_header(struct pppol2tp_session *session, 
+				      void *buf)
+{
+	u16 *bufp = buf;
+	u16 flags = L2TP_HDR_VER;
+
+	if (session->send_seq) {
+		flags |= L2TP_HDRFLAG_S;
+	}
+
+	/* Setup L2TP header.
+	 * FIXME: Can this ever be unaligned? Is direct dereferencing of
+	 * 16-bit header fields safe here for all architectures?
+	 */	
+	*bufp++ = htons(flags);
+	*bufp++ = htons(session->tunnel_addr.d_tunnel);
+	*bufp++ = htons(session->tunnel_addr.d_session);
+	if (session->send_seq) {
+		*bufp++ = htons(session->ns);
+		*bufp++ = 0;
+		session->ns++;
+		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG, 
+		       "%s: updated ns to %hu\n", session->name, session->ns);
+	}
+
+	return ((void *) bufp) - buf;
+}
+
+/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
+ * when a user application does a sendmsg() on the session socket. L2TP and
+ * PPP headers must be inserted into the user's data.
+ */
+static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+			    size_t total_len)
+{
+	static unsigned char ppph[2] = { 0xff, 0x03 };
+	struct sock *sk = sock->sk;
+	int error = 0;
+	u8 hdr[PPPOL2TP_L2TP_HDR_SIZE_SEQ];
+	int hdr_len;
+	struct msghdr *msg;
+	struct pppol2tp_session *session;
+	struct pppol2tp_tunnel *tunnel;
+
+	ENTER_FUNCTION;
+
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) {
+		error = -ENOTCONN;
+		goto end;
+	}
+
+	/* Get session and tunnel contexts */
+	SOCK_2_SESSION(sk, session, error, -EBADF, end, 0);
+	SOCK_2_TUNNEL(session->tunnel_sock, tunnel, error, -EBADF, end, 0);
+
+	/* Setup L2TP header */	
+	hdr_len = pppol2tp_build_l2tp_header(session, &hdr);
+
+	if (session->send_seq)
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG, 
+		       "%s: send %d bytes, ns=%hu\n", session->name, 
+		       total_len, session->ns - 1);
+	else
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG, 
+		       "%s: send %d bytes\n", session->name, total_len);
+
+	/* Unfortunately, there is no direct way for us to pass an skb to the
+	 * UDP layer, we have to pretend to be sending ordinary data and use
+	 * sendmsg.
+	 *
+	 * We add the L2TP and PPP headers here. To do so, we create a new
+	 * struct msghdr and insert the headers as the first iovecs.
+	 */
+	msg = kmalloc(sizeof(struct msghdr), GFP_ATOMIC);
+	if (msg == NULL) {
+		error = -ENOBUFS;
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+		goto end;
+	}
+
+	msg->msg_iov = kmalloc((m->msg_iovlen + 2) * sizeof(struct iovec), 
+			       GFP_ATOMIC);
+	if (msg->msg_iov == NULL) {
+		error = -ENOBUFS;
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+		kfree(msg);
+		goto end;
+	}
+
+	msg->msg_iov[0].iov_base = &hdr;
+	msg->msg_iov[0].iov_len	 = hdr_len;
+	msg->msg_iov[1].iov_base = &ppph;
+	msg->msg_iov[1].iov_len	 = sizeof(ppph);
+	memcpy(&msg->msg_iov[2], &m->msg_iov[0], 
+	       m->msg_iovlen * sizeof(struct iovec));
+	msg->msg_iovlen = m->msg_iovlen + 2;
+	
+	/* If the user calls sendto() that's just too bad */
+	msg->msg_name	 = &session->tunnel_addr.addr;
+	msg->msg_namelen = sizeof(session->tunnel_addr.addr);
+	
+	msg->msg_control    = m->msg_control;
+	msg->msg_controllen = m->msg_controllen;
+	msg->msg_flags	    = m->msg_flags;
+
+	/* Do the real work. This always frees msg, regardless of whether
+	 * there was an error
+	 */
+	error = pppol2tp_udp_sock_send(iocb, session, tunnel, msg, 
+				       total_len + hdr_len + sizeof(ppph));
+
+end:
+	EXIT_FUNCTION;
+	return error;
+}
+
+
+/* Transmit function called by generic PPP driver.  Sends PPP frame over
+ * PPPoL2TP socket.
+ *
+ * This is almost the same as pppol2tp_sendmsg(), but rather than being called
+ * with a msghdr from userspace, it is called with a skb from the kernel.
+ */
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+{
+	struct sock *sk = (struct sock *) chan->private;
+	int error = 0;
+	u8 hdr[PPPOL2TP_L2TP_HDR_SIZE_SEQ];
+	int hdr_len;
+	struct msghdr *msg;
+	struct pppol2tp_session *session;
+	struct pppol2tp_tunnel *tunnel;
+	struct kiocb iocb;
+	struct sock_iocb siocb;
+	
+	ENTER_FUNCTION;
+	
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) {
+		DPRINTK(-1, "dead=%d state=%x\n", sock_flag(sk, SOCK_DEAD), sk->sk_state);
+		error = -ENOTCONN;
+		goto end;
+	}
+
+	/* Get session and tunnel contexts from the socket */
+	SOCK_2_SESSION(sk, session, error, -EBADF, end, 0);
+	SOCK_2_TUNNEL(session->tunnel_sock, tunnel, error, -EBADF, end, 0);
+
+	/* Setup L2TP header */	
+	hdr_len = pppol2tp_build_l2tp_header(session, &hdr);
+
+	if (session->send_seq)
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG, 
+		       "%s: send %d bytes, ns=%hu\n", 
+		       session->name, skb->len, session->ns - 1);
+	else
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG, 
+		       "%s: send %d bytes\n", session->name, skb->len);
+
+	/* Unfortunatly there doesn't appear to be a way for us to pass an skb
+	 * to the UDP layer, we have to pretend to be sending ordinary data
+	 * and use sendmsg
+	 */
+	msg = kmalloc(sizeof(struct msghdr), GFP_ATOMIC);
+	if (msg == NULL) {
+		error = -ENOBUFS;
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+		goto end;
+	}
+	
+	msg->msg_iov = kmalloc(2 * sizeof(struct iovec), GFP_ATOMIC);
+	if (msg->msg_iov == NULL) {
+		error = -ENOBUFS;
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+		kfree(msg);
+		goto end;
+	}
+	msg->msg_iov[0].iov_base = &hdr;
+	msg->msg_iov[0].iov_len	 = hdr_len;
+	/* FIXME: do we need to handle skb fragments here? */
+	msg->msg_iov[1].iov_base = skb->data;
+	msg->msg_iov[1].iov_len	 = skb->len;
+	msg->msg_iovlen = 2;
+	
+	/* If the user calls sendto() that's just too bad */
+	msg->msg_name	 = &session->tunnel_addr.addr;
+	msg->msg_namelen = sizeof(session->tunnel_addr.addr);
+	
+	msg->msg_control    = NULL;
+	msg->msg_controllen = 0;
+	msg->msg_flags	    = MSG_DONTWAIT;	/* Need this to prevent blocking */
+
+	/* Do the real work. This always frees msg, regardless of whether
+	 * there was an error
+	 */
+	init_sync_kiocb(&iocb, NULL);
+	iocb.private = &siocb;
+	error = pppol2tp_udp_sock_send(&iocb, session, tunnel, msg, 
+				       skb->len + hdr_len);
+
+	kfree_skb(skb);
+
+end:
+	EXIT_FUNCTION;
+	return error;
+}
+
+/*****************************************************************************
+ * Session (and tunnel control) socket create/destroy.
+ *****************************************************************************/
+
+/* When the tunnel UDP socket is closed, all the attached sockets need to go
+ * too. This handles that.
+ */
+static void pppol2tp_tunnel_closeall(struct pppol2tp_tunnel *tunnel)
+{
+	int hash;
+	struct hlist_node *walk;
+	struct hlist_node *tmp;
+	struct pppol2tp_session *session;
+	struct sock *sk;
+
+	ENTER_FUNCTION;
+	
+	if (tunnel == NULL)
+		BUG();
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+	       "%s: closing all sessions...\n", tunnel->name);
+
+	for (hash = 0; hash < PPPOL2TP_HASH_SIZE; hash++) {
+		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
+			session = hlist_entry(walk, struct pppol2tp_session, hlist);
+
+			sk = session->sock;
+
+			PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+			       "%s: closing session\n", session->name);
+
+			write_lock_bh(&tunnel->hlist_lock);
+			hlist_del_init(&session->hlist);
+			write_unlock_bh(&tunnel->hlist_lock);
+
+			sock_hold(sk);
+
+			lock_sock(sk);
+
+			if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+				pppox_unbind_sock(sk);
+				sk->sk_state = PPPOX_DEAD;
+				sk->sk_state_change(sk);
+			}
+
+			/* Purge any queued data */
+			skb_queue_purge(&sk->sk_receive_queue);
+			skb_queue_purge(&sk->sk_write_queue);
+
+			release_sock(sk);
+
+			DPRINTK(session->debug, "calling sock_put; refcnt=%d\n",
+				sk->sk_refcnt.counter);
+			sock_put(sk);
+		}
+	}
+
+	EXIT_FUNCTION;
+}
+
+/* Really kill the tunnel.
+ * Come here only when all sessions have been cleared from the tunnel.
+ */
+static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel)
+{
+	ENTER_FUNCTION;
+
+	/* Remove from socket list */
+	list_del(&tunnel->list);
+
+	DPRINTK(tunnel->debug, "%s: MOD_DEC_USE_COUNT\n", tunnel->name);
+	kfree(tunnel);
+
+	EXIT_FUNCTION;
+}
+
+/* Tunnel UDP socket destruct hook.
+ * The tunnel context is deleted only when all session sockets have been
+ * closed.
+ */
+static void pppol2tp_tunnel_destruct(struct sock *sk)
+{
+	struct pppol2tp_tunnel *tunnel;
+	int error = 0;
+	ENTER_FUNCTION;
+	
+	SOCK_2_TUNNEL(sk, tunnel, error, -EBADF, end, 0);
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+	       "%s: closing...\n", tunnel->name); 
+	
+	pppol2tp_tunnel_closeall(tunnel);
+
+end:
+	EXIT_FUNCTION;
+	return;
+}
+
+/* Really kill the socket. (Called from sock_put if refcnt == 0.)
+ */
+static void pppol2tp_session_destruct(struct sock *sk)
+{
+	struct pppox_opt *po = pppox_sk(sk);
+	struct pppol2tp_session *session = NULL;
+	int error = 0;
+
+	ENTER_FUNCTION;
+	
+	if (sk->sk_user_data != NULL) {
+		struct pppol2tp_tunnel *tunnel;
+
+		SOCK_2_SESSION(sk, session, error, -EBADF, out, 0);
+
+		/* Don't use SOCK_2_TUNNEL() here to get the tunnel context
+		 * because the tunnel socket might have already been closed
+		 * (its sk->sk_user_data will be NULL) so use the session's
+		 * private tunnel ptr instead.
+		 */
+		tunnel = session->tunnel;
+		if (tunnel != NULL) {
+			if (tunnel->magic != L2TP_TUNNEL_MAGIC) {
+				printk(KERN_ERR "%s: %s:%d: BAD TUNNEL MAGIC "
+				       "( tunnel=%p magic=%x )\n",
+				       __FUNCTION__, __FILE__, __LINE__, 
+				       tunnel, tunnel->magic);
+				goto out;
+			}
+		}
+
+		/* Delete tunnel context if this was the last session on the
+		 * tunnel.  This was allocated when the first session was
+		 * created on the tunnel. See
+		 * pppol2tp_prepare_tunnel_socket().
+		 */
+		DPRINTK(tunnel->debug, "%s: session_count=%d\n", 
+			tunnel->name, atomic_read(&tunnel->session_count));
+		if (atomic_dec_and_test(&tunnel->session_count)) {
+			pppol2tp_tunnel_free(tunnel);
+		}
+	}
+
+	if (po)
+		kfree(po);
+
+	if (session != NULL)
+		kfree(session);
+
+out:
+	EXIT_FUNCTION;
+}
+
+/* Called when the PPPoX socket (session) is closed.
+ */
+static int pppol2tp_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session = NULL;
+	struct pppol2tp_tunnel *tunnel;
+	int error = 0;
+	ENTER_FUNCTION;
+
+	if (!sk)
+		return 0;
+
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		return -EBADF;
+
+	if (sk->sk_user_data) {	    /* Was this socket actually connected? */
+		SOCK_2_SESSION(sk, session, error, -EBADF, end, 0);
+
+		/* Don't use SOCK_2_TUNNEL() here to get the tunnel context
+		 * because the tunnel socket might have already been closed
+		 * (its sk->sk_user_data will be NULL) so use the session's
+		 * private tunnel ptr instead.
+		 */
+		tunnel = session->tunnel;
+		if (tunnel != NULL) {
+			if (tunnel->magic == L2TP_TUNNEL_MAGIC) {
+				/* Delete the session socket from the hash */
+				write_lock_bh(&tunnel->hlist_lock);
+				hlist_del_init(&session->hlist);
+				write_unlock_bh(&tunnel->hlist_lock);
+			} else {
+				printk(KERN_ERR "%s: %s:%d: BAD TUNNEL MAGIC "
+				       "( tunnel=%p magic=%x )\n",
+				       __FUNCTION__, __FILE__, __LINE__, 
+				       tunnel, tunnel->magic);
+				goto end;
+			}
+		}
+	}
+
+	lock_sock(sk);
+
+	if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))
+		pppox_unbind_sock(sk);
+
+	/* Signal the death of the socket. */
+	sk->sk_state = PPPOX_DEAD;
+	sock_orphan(sk);
+	sock->sk = NULL;
+
+	/* Purge any queued data */
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+
+	release_sock(sk);
+
+	if (session != NULL)
+		DPRINTK(session->debug, "calling sock_put; refcnt=%d\n", 
+			session->sock->sk_refcnt.counter);
+	sock_put(sk);
+
+end:
+	EXIT_FUNCTION;
+	return error;
+}
+
+/* Internal function to prepare a tunnel (UDP) socket to have PPPoX sockets
+ * attached to it
+ */
+static struct sock *pppol2tp_prepare_tunnel_socket(int fd, u16 tunnel_id, 
+						   int *error)
+{
+	int err;
+	struct socket *sock = NULL;
+	struct sock *sk;
+	struct pppol2tp_tunnel *tunnel;
+	struct sock *ret = NULL;
+
+	ENTER_FUNCTION;
+	
+	/* Get the socket from the fd */
+	err = -EBADF;
+	sock = sockfd_lookup(fd, &err);
+	if (!sock) {
+		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR, 
+		       "tunl %hu: sockfd_lookup(fd=%d) returned %d\n", 
+		       tunnel_id, fd, err);
+		goto err;
+	}
+
+	/* Quick sanity checks */
+	err = -ESOCKTNOSUPPORT;
+	if (sock->type != SOCK_DGRAM) {
+		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR, 
+		       "tunl %hu: fd %d wrong type, got %d, expected %d\n", 
+		       tunnel_id, fd, sock->type, SOCK_DGRAM);
+		goto err;
+	}
+	err = -EAFNOSUPPORT;
+	if (sock->ops->family!=AF_INET) {
+		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR, 
+		       "tunl %hu: fd %d wrong family, got %d, expected %d\n", 
+		       tunnel_id, fd, sock->ops->family, AF_INET);
+		goto err;
+	}
+
+	err = -ENOTCONN;
+	sk = sock->sk;
+	
+	/* Check if this socket has already been prepped */
+	tunnel = (struct pppol2tp_tunnel *)sk->sk_user_data;
+	if (tunnel != NULL) {
+		/* User-data field already set */
+		err = -EBUSY;
+		if (tunnel->magic != L2TP_TUNNEL_MAGIC) {
+			printk(KERN_ERR "%s: %s:%d: BAD TUNNEL MAGIC "
+			       "( tunnel=%p magic=%x )\n",
+			       __FUNCTION__, __FILE__, __LINE__, 
+			       tunnel, tunnel->magic);
+			goto err;
+		}
+
+		/* This socket has already been prepped */
+		ret = tunnel->sock;
+		goto out;
+	}
+
+	/* This socket is available and needs prepping. Create anew tunnel
+	 * context and init it.
+	 */
+	sk->sk_user_data = tunnel = kmalloc(sizeof(struct pppol2tp_tunnel), GFP_KERNEL);
+	if (sk->sk_user_data == NULL) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	memset(tunnel, 0, sizeof(struct pppol2tp_tunnel));
+	
+	tunnel->magic = L2TP_TUNNEL_MAGIC;
+	sprintf(&tunnel->name[0], "tunl %hu", tunnel_id);
+
+	tunnel->stats.tunnel_id = tunnel_id;
+
+	tunnel->debug = PPPOL2TP_DEFAULT_DEBUG_FLAGS;
+
+	DPRINTK(tunnel->debug, "tunl %hu: allocated tunnel=%p, sk=%p, sock=%p\n", 
+		tunnel_id, tunnel, sk, sock);
+
+	/* Setup the new protocol stuff */
+	tunnel->old_proto  = *sk->sk_prot;
+	tunnel->l2tp_proto = *sk->sk_prot;
+	
+	sk->sk_prot = &tunnel->l2tp_proto;
+	
+	tunnel->old_data_ready = sk->sk_data_ready;
+	sk->sk_data_ready	       = &pppol2tp_data_ready;
+
+	tunnel->old_sk_destruct = sk->sk_destruct;
+	sk->sk_destruct		= &pppol2tp_tunnel_destruct;
+
+	tunnel->sock   = sk;
+	sk->sk_allocation = GFP_ATOMIC;
+
+	rwlock_init(&tunnel->hlist_lock);
+
+	/* Add tunnel to our list */
+	INIT_LIST_HEAD(&tunnel->list);
+	list_add(&tunnel->list, &pppol2tp_tunnel_list);
+	
+	ret = tunnel->sock;
+	
+	*error = 0;
+out:
+	if (sock)
+		sockfd_put(sock);
+	EXIT_FUNCTION;
+
+	return ret;
+
+err:
+	*error = err;
+	goto out;
+}
+
+/* socket() handler. Initialize a new struct sock.
+ */
+static int pppol2tp_create(struct socket *sock)
+{
+	int error = 0;
+	struct sock *sk;
+	struct pppox_opt *po;
+
+	ENTER_FUNCTION;
+	DPRINTK(-1, "sock=%p\n", sock);
+
+	sk = sk_alloc(PF_PPPOX, GFP_KERNEL, 1, NULL);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+
+	/* FIXME: Not sure why the module use counter is zero when we
+	 * get here.  Similar socket code doesn't seem to bump its
+	 * module use count before calling sk_set_owner(), so why
+	 * pppol2tp?
+	 */
+	if (try_module_get(THIS_MODULE)) {
+		sk_set_owner(sk, THIS_MODULE);
+		module_put(THIS_MODULE);
+	}
+
+	sock->state  = SS_UNCONNECTED;
+	sock->ops    = &pppol2tp_ops;
+
+	sk->sk_backlog_rcv = pppol2tp_recv_core;
+	sk->sk_protocol    = PX_PROTO_OL2TP;
+	sk->sk_family      = PF_PPPOX;
+	sk->sk_state       = PPPOX_NONE;
+	sk->sk_type        = SOCK_STREAM;
+	sk->sk_destruct    = pppol2tp_session_destruct;
+
+	po = sk->sk_protinfo = kmalloc(sizeof(struct pppox_opt), GFP_KERNEL);
+	if (!po) {
+		error = -ENOMEM;
+		goto free_sk;
+	}
+
+	memset((void *) po, 0, sizeof(*po));
+	po->sk = sk;
+
+	sock->sk = sk;
+
+	EXIT_FUNCTION;
+	return 0;
+
+free_sk:
+	sk_free(sk);
+	EXIT_FUNCTION;
+	return error;
+}
+
+/* connect() handler..	Attach a PPPoX socket to a tunnel UDP socket
+ */
+int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
+		     int sockaddr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
+	struct pppox_opt *po = pppox_sk(sk);
+	struct sock *tunnel_sock = NULL;
+	struct pppol2tp_session *session = NULL;
+	struct pppol2tp_tunnel *tunnel;
+	struct dst_entry *dst;
+	int error = 0;
+
+	ENTER_FUNCTION;
+	
+	DPRINTK(-1, "sock=%p, uservaddr=%p, sockaddr_len=%d, flags=%d\n", 
+		sock, uservaddr, sockaddr_len, flags);
+	lock_sock(sk);
+
+	error = -EINVAL;
+	if (sp->sa_protocol != PX_PROTO_OL2TP)
+		goto end;
+
+	/* Check for already bound sockets */
+	error = -EBUSY;
+	if (sk->sk_state & PPPOX_CONNECTED)
+		goto end;
+
+	/* We don't supporting rebinding anyway */		
+	if (sk->sk_user_data)
+		goto end; /* socket is already attached */
+
+	/* Don't bind if s_tunnel is 0 */
+	error = -EINVAL;
+	if (sp->pppol2tp.s_tunnel == 0)
+		goto end;
+
+	/* This looks up the tunnel socket and configures it if necessary */
+	tunnel_sock = 
+		pppol2tp_prepare_tunnel_socket(sp->pppol2tp.fd, 
+					       sp->pppol2tp.s_tunnel, 
+					       &error);
+	if (tunnel_sock == NULL)
+		goto end;
+	tunnel = tunnel_sock->sk_user_data;
+
+	/* Allocate and initialize a new session context.
+	 */
+	session = kmalloc(sizeof(struct pppol2tp_session), GFP_KERNEL);
+	if (session == NULL) {
+		error = -ENOMEM;
+		goto end;
+	}
+
+	memset(session, 0, sizeof(struct pppol2tp_session));
+
+	session->magic	     = L2TP_SESSION_MAGIC;
+	session->owner	     = current->pid;
+	session->sock	     = sk;
+	session->tunnel	     = tunnel;
+	session->tunnel_sock = tunnel_sock;
+	session->tunnel_addr = sp->pppol2tp;
+	sprintf(&session->name[0], "sess %hu/%hu", 
+		session->tunnel_addr.s_tunnel, 
+		session->tunnel_addr.s_session);
+
+	session->stats.tunnel_id  = session->tunnel_addr.s_tunnel;
+	session->stats.session_id = session->tunnel_addr.s_session;
+
+	INIT_HLIST_NODE(&session->hlist);
+
+	session->debug = PPPOL2TP_DEFAULT_DEBUG_FLAGS;
+
+	/* Default MTU must allow space for UDP/L2TP/PPP
+	 * headers. Leave some slack. 
+	 */
+	session->mtu = session->mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+
+	/* If PMTU discovery was enabled, use the MTU that was discovered */
+	dst = sk_dst_get(sk);
+	if (dst != NULL) {
+		u32 pmtu = dst_pmtu(__sk_dst_get(sk));
+		if (pmtu != 0) {
+			session->mtu = session->mru = pmtu - 
+				PPPOL2TP_HEADER_OVERHEAD;
+			DPRINTK(session->debug, 
+				"%s: MTU set by Path MTU discovery: mtu=%d\n",
+				session->name, session->mtu);
+		}
+		dst_release(dst);
+	}
+
+	/* Special case: if source & dest session_id == 0x0000, this socket is
+	 * being created to manage the tunnel. Don't add the session to the
+	 * session hash list, just set up the internal context for use by
+	 * ioctl() and sockopt() handlers.
+	 */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		error = 0;
+		DPRINTK(session->debug, 
+			"tunl %hu: socket created for tunnel mgmt ops\n", 
+			session->tunnel_addr.s_tunnel);
+		sk->sk_user_data = session;
+		goto out_no_ppp;
+	}
+
+	DPRINTK(session->debug, "%s: allocated session=%p, sock=%p, owner=%d\n", 
+		session->name, session, sk, session->owner);
+
+	/* Add session to the tunnel's hash list */
+	SOCK_2_TUNNEL(tunnel_sock, tunnel, error, -EBADF, end, 0);
+	write_lock_bh(&tunnel->hlist_lock);
+	hlist_add_head(&session->hlist, 
+		       pppol2tp_session_id_hash(tunnel, 
+						session->tunnel_addr.s_session));
+	write_unlock_bh(&tunnel->hlist_lock);
+	
+	/* This is how we get the session context from the socket. */
+	sk->sk_user_data = session;
+		
+	/* We don't store any more options in the pppox_opt, everything is in
+	 * user_data (struct pppol2tp_session)
+	 */
+	po->sk = sk;
+
+	/* Right now, because we don't have a way to push the incoming skb's
+	 * straight through the UDP layer, the only header we need to worry
+	 * about is the L2TP header. This size is different depending on
+	 * whether sequence numbers are enabled for the data channel.
+	 */
+	po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+
+	po->chan.private = sk;
+	po->chan.ops	 = &pppol2tp_chan_ops;
+
+	error = ppp_register_channel(&po->chan);
+	if (error)
+		goto end;
+
+out_no_ppp:
+	atomic_inc(&tunnel->session_count);
+	sk->sk_state = PPPOX_CONNECTED;
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+	       "%s: created\n", session->name);
+
+end:
+	release_sock(sk);
+
+	if (error != 0)
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_WARNING, 
+		       "%s: connect failed: %d\n", session->name, error);
+
+	EXIT_FUNCTION;
+
+	return error;
+}
+
+/* getname() support.
+ */
+static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
+			    int *usockaddr_len, int peer)
+{
+	int len = sizeof(struct sockaddr_pppol2tp);
+	struct sockaddr_pppol2tp sp;
+	int error = 0;
+	struct pppol2tp_session *session;
+
+	ENTER_FUNCTION;
+	
+	error = -ENOTCONN;
+	if (sock->sk->sk_state != PPPOX_CONNECTED)
+		goto end;
+	
+	SOCK_2_SESSION(sock->sk, session, error, -EBADF, end, 0);
+	
+	sp.sa_family	= AF_PPPOX;
+	sp.sa_protocol	= PX_PROTO_OL2TP;
+	memcpy(&sp.pppol2tp, &session->tunnel_addr,
+	       sizeof(struct pppol2tp_addr));
+
+	memcpy(uaddr, &sp, len);
+
+	*usockaddr_len = len;
+
+	error = 0;
+end:
+	EXIT_FUNCTION;
+	return error;
+}
+
+/****************************************************************************
+ * ioctl() handlers.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. However, in order to control kernel tunnel features, we allow
+ * userspace to create a special "tunnel" PPPoX socket which is used for
+ * control only.  Tunnel PPPoX sockets have session_id == 0 and simply allow
+ * the user application to issue L2TP setsockopt(), getsockopt() and ioctl()
+ * calls.
+ ****************************************************************************/
+
+/* Session ioctl helper.
+ */
+static int pppol2tp_session_ioctl(struct pppol2tp_session *session, 
+				  unsigned int cmd, unsigned long arg)
+{
+	struct ifreq ifr;
+	int err = 0;
+	struct sock *sk = session->sock;
+	int val = (int) arg;
+
+	sock_hold(sk);
+
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG, 
+	       "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n", 
+	       session->name, cmd, arg);
+	
+	switch (cmd) {
+	case SIOCGIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+		ifr.ifr_mtu = session->mtu;
+		if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: get mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case SIOCSIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+
+		session->mtu = ifr.ifr_mtu;
+;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: set mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case PPPIOCGMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (put_user(session->mru, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: get mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCSMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (get_user(val,(int __user *) arg))
+			break;
+
+		session->mru = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: set mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCGFLAGS:
+		err = -EFAULT;
+		if (put_user(session->flags, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: get flags=%d\n", session->name, session->flags);
+		err = 0;
+		break;
+
+	case PPPIOCSFLAGS:
+		err = -EFAULT;
+		if (get_user(val, (int __user *) arg))
+			break;
+		session->flags = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: set flags=%d\n", session->name, session->flags);
+		err = 0;
+		break;
+
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		if (copy_to_user((void __user *) arg, &session->stats, 
+				 sizeof(session->stats)))
+			break;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: get L2TP stats\n", session->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Tunnel ioctl helper.
+ *
+ * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data
+ * specifies a session_id, the session ioctl handler is called. This allows an
+ * application to retrieve session stats via a tunnel socket.
+ */
+static int pppol2tp_tunnel_ioctl(struct pppol2tp_tunnel *tunnel, 
+				 unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct sock *sk = tunnel->sock;
+	struct pppol2tp_ioc_stats stats_req;
+
+	sock_hold(sk);
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG, 
+	       "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n", tunnel->name, 
+	       cmd, arg);
+
+	switch (cmd) {
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		if (copy_from_user(&stats_req, (void __user *) arg, 
+				   sizeof(stats_req))) {
+			err = -EFAULT;
+			break;
+		}
+		if (stats_req.session_id != 0) {
+			/* resend to session ioctl handler */
+			struct pppol2tp_session *session = 
+				pppol2tp_session_find(tunnel, stats_req.session_id);
+			if (session != NULL)
+				err = pppol2tp_session_ioctl(session, cmd, arg);
+			else
+				err = -EBADR;
+			break;
+		}
+#ifdef CONFIG_XFRM
+		tunnel->stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
+#endif
+		if (copy_to_user((void __user *) arg, &tunnel->stats, 
+				 sizeof(tunnel->stats))) {
+			err = -EFAULT;
+			break;
+		}
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: get L2TP stats\n", tunnel->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Main ioctl() handler.
+ * Dispatch to tunnel or session helpers depending on the socket.
+ */
+static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
+			    unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session;
+	struct pppol2tp_tunnel *tunnel;
+	int err = 0;
+
+	ENTER_FUNCTION;
+	
+	if (!sk)
+		return 0;
+
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		return -EBADF;
+
+	if ((sk->sk_user_data == NULL) || 
+	    (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)))) {
+		err = -ENOTCONN;
+		DPRINTK(-1, "ioctl: socket %p not connected.\n", sk);
+		goto end;
+	}
+
+	SOCK_2_SESSION(sk, session, err, -EBADF, end, 0);
+	SOCK_2_TUNNEL(session->tunnel_sock, tunnel, err, -EBADF, end, 1);
+
+	/* Special case: if session's session_id is zero, treat ioctl as a
+	 * tunnel ioctl
+	 */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
+		goto end;
+	}
+
+	err = pppol2tp_session_ioctl(session, cmd, arg);
+
+end:
+	EXIT_FUNCTION;
+	return err;
+}
+
+/*****************************************************************************
+ * setsockopt() / getsockopt() support.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. In order to control kernel tunnel features, we allow userspace to
+ * create a special "tunnel" PPPoX socket which is used for control only.
+ * Tunnel PPPoX sockets have session_id == 0 and simply allow the user
+ * application to issue L2TP setsockopt(), getsockopt() and ioctl() calls.
+ *****************************************************************************/
+
+/* Tunnel setsockopt() helper.
+ */
+static int pppol2tp_tunnel_setsockopt(struct sock *sk,
+				      struct pppol2tp_tunnel *tunnel, 
+				      int optname, int val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		tunnel->debug = val;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: set debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+	
+	return err;
+}
+
+/* Session setsockopt helper.
+ */
+static int pppol2tp_session_setsockopt(struct sock *sk,
+				       struct pppol2tp_session *session, 
+				       int optname, int val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->recv_seq = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: set recv_seq=%d\n", session->name, 
+		       session->recv_seq);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->send_seq = val ? -1 : 0;
+		{
+			/* FIXME: is it safe to change the ppp channel's
+			 * hdrlen on the fly?
+			 */
+			struct sock *sk	     = session->sock;
+			struct pppox_opt *po = pppox_sk(sk);
+			po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : 
+				PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+		}
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: set send_seq=%d\n", session->name, session->send_seq);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->lns_mode = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: set lns_mode=%d\n", session->name, 
+		       session->lns_mode);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		session->debug = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: set debug=%x\n", session->name, session->debug);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		session->reorder_timeout = MS_TO_JIFFIES(val);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: set reorder_timeout=%d\n", session->name, 
+		       session->reorder_timeout);
+		if (session->reorder_timeout != 0)
+			pppol2tp_warn_not_yet_implemented(session->debug, "SO_REORDERTO");
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Main setsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session setsockopt
+ * handler, according to whether the PPPoL2TP socket is a for a regular
+ * session or the special tunnel type.
+ */
+static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, 
+			       char *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session = sk->sk_user_data;
+	struct pppol2tp_tunnel *tunnel;
+	int val;
+	int err = 0;
+
+	if (level != SOL_PPPOL2TP)
+		return -ESOCKTNOSUPPORT;
+
+	if (optlen<sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	if (sk->sk_user_data == NULL) {
+		err = -ENOTCONN;
+		DPRINTK(-1, "setsockopt: socket %p not connected.\n", sk);
+		goto end;
+	}
+
+	SOCK_2_SESSION(sk, session, err, -EBADF, end, 0);
+	SOCK_2_TUNNEL(session->tunnel_sock, tunnel, err, -EBADF, end, 1);
+
+	lock_sock(sk);
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel
+	 */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0))
+		err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
+	else
+		err = pppol2tp_session_setsockopt(sk, session, optname, val);
+	
+	release_sock(sk);
+end:
+	return err;
+}
+
+/* Tunnel getsockopt helper.
+ */
+static int pppol2tp_tunnel_getsockopt(struct sock *sk,
+				      struct pppol2tp_tunnel *tunnel, 
+				      int optname, int *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		*val = tunnel->debug;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: get debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+	
+	return err;
+}
+
+/* Session getsockopt helper.
+ */
+static int pppol2tp_session_getsockopt(struct sock *sk, 
+				       struct pppol2tp_session *session, 
+				       int optname, int *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		*val = session->recv_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: get recv_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		*val = session->send_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: get send_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		*val = session->lns_mode;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: get lns_mode=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		*val = session->debug;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: get debug=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		*val = JIFFIES_TO_MS(session->reorder_timeout);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, 
+		       "%s: get reorder_timeout=%d\n", session->name, *val);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+	}
+
+	return err;
+}
+
+/* Main getsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session getsockopt
+ * handler, according to whether the PPPoX socket is a for a regular session
+ * or the special tunnel type.
+ */
+static int pppol2tp_getsockopt(struct socket *sock, int level, 
+			       int optname, char *optval, int *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session = sk->sk_user_data;
+	struct pppol2tp_tunnel *tunnel;
+	int val, len;
+	int err = 0;
+
+	if (level != SOL_PPPOL2TP)
+		return -ESOCKTNOSUPPORT;
+
+	if (get_user(len, (int __user *) optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+	
+	if (len < 0)
+		return -EINVAL;
+
+	if (sk->sk_user_data == NULL) {
+		err = -ENOTCONN;
+		DPRINTK(-1, "getsockopt: socket %p not connected.\n", sk);
+		goto end;
+	}
+
+	/* Get the session and tunnel contexts */
+	SOCK_2_SESSION(sk, session, err, -EBADF, end, 0);
+	SOCK_2_TUNNEL(session->tunnel_sock, tunnel, err, -EBADF, end, 1);
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0))
+		err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
+	else
+		err = pppol2tp_session_getsockopt(sk, session, optname, &val);
+	
+
+	if (put_user(len, (int __user *) optlen))
+		return -EFAULT;
+
+	if (copy_to_user((void __user *) optval, &val, len))
+		return -EFAULT;
+
+end:
+	return err;
+}
+
+/*****************************************************************************
+ * /proc filesystem for debug
+ *****************************************************************************/
+
+#ifdef CONFIG_PROC_FS
+
+#include <linux/seq_file.h>
+
+static int pppol2tp_proc_open(struct inode *inode, struct file *file);
+static void *pppol2tp_proc_start(struct seq_file *m, loff_t *_pos);
+static void *pppol2tp_proc_next(struct seq_file *p, void *v, loff_t *pos);
+static void pppol2tp_proc_stop(struct seq_file *p, void *v);
+static int pppol2tp_proc_show(struct seq_file *m, void *v);
+
+static struct proc_dir_entry *pppol2tp_proc;
+
+static struct seq_operations pppol2tp_proc_ops = {
+	.start		= pppol2tp_proc_start,
+	.next		= pppol2tp_proc_next,
+	.stop		= pppol2tp_proc_stop,
+	.show		= pppol2tp_proc_show,
+};
+
+static struct file_operations pppol2tp_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pppol2tp_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int pppol2tp_proc_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret = 0;
+
+	ENTER_FUNCTION;
+	ret = seq_open(file, &pppol2tp_proc_ops);
+	if (ret < 0)
+		goto out;
+
+	m	   = file->private_data;
+	m->private = PDE(inode)->data;
+
+out:
+	EXIT_FUNCTION;
+	return ret;
+}
+
+static void *pppol2tp_proc_start(struct seq_file *m, loff_t *_pos)
+{
+	struct pppol2tp_tunnel *tunnel = NULL;
+	loff_t pos = *_pos;
+	struct list_head *walk;
+	struct list_head *tmp;
+
+	ENTER_FUNCTION;
+
+	/* allow for the header line */
+	if (!pos) {
+		tunnel = (void *)1;
+		goto out;
+	}
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each_safe(walk, tmp, &pppol2tp_tunnel_list) {	
+		tunnel = list_entry(walk, struct pppol2tp_tunnel, list);
+		if (!pos--) {
+			sock_hold(tunnel->sock);
+			goto out;
+		}
+	}
+	tunnel = NULL;
+
+out:
+	EXIT_FUNCTION;
+
+	return tunnel;
+}
+
+static void *pppol2tp_proc_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct pppol2tp_tunnel *tunnel = v;
+	struct list_head *tmp;
+	struct list_head *list;
+
+	ENTER_FUNCTION;
+
+	(*pos)++;
+
+	if (v == (void *)1)
+		list = &pppol2tp_tunnel_list;
+	else
+		list = &tunnel->list;
+
+	tmp = list->next;
+	if (tmp == &pppol2tp_tunnel_list)
+		tunnel = NULL;
+	else
+		tunnel = list_entry(tmp, struct pppol2tp_tunnel, list);
+
+	EXIT_FUNCTION;
+
+	return tunnel;
+}
+
+static void pppol2tp_proc_stop(struct seq_file *p, void *v)
+{
+	struct pppol2tp_tunnel *tunnel = v;
+
+	ENTER_FUNCTION;
+
+	if (tunnel != NULL)
+		sock_put(tunnel->sock);
+
+	EXIT_FUNCTION;
+}
+
+static int pppol2tp_proc_show(struct seq_file *m, void *v)
+{
+	struct pppol2tp_tunnel *tunnel = v;
+	struct pppol2tp_session *session;
+	struct hlist_node *walk;
+	struct hlist_node *tmp;
+	int i;
+
+	ENTER_FUNCTION;
+
+	/* display header on line 1 */
+	if (v == (void *)1) {
+		seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
+		seq_puts(m, "TUNNEL name, user-data-ok "
+			 "session-count magic-ok\n");
+		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		seq_puts(m, "  SESSION name, addr/port src-tid/sid "
+			 "dest-tid/sid state user-data-ok magic-ok\n");
+		seq_puts(m, "   mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
+		seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		goto out;
+	}
+
+	seq_printf(m, "TUNNEL '%s', %c %d MAGIC %s\n", 
+		   tunnel->name,
+		   (tunnel == tunnel->sock->sk_user_data) ? 'Y':'N',
+		   atomic_read(&tunnel->session_count),
+		   (tunnel->magic == L2TP_TUNNEL_MAGIC) ? "OK" : "BAD");
+	seq_printf(m, " %08x %u/%u/%u %u/%u/%u\n",
+		   tunnel->debug,
+		   tunnel->stats.tx_packets, tunnel->stats.tx_bytes, 
+		   tunnel->stats.tx_errors,
+		   tunnel->stats.rx_packets, tunnel->stats.rx_bytes, 
+		   tunnel->stats.rx_errors);
+
+	if (tunnel->magic != L2TP_TUNNEL_MAGIC) {
+		seq_puts(m, "*** Aborting ***\n");
+		goto out;
+	}
+
+	for (i = 0; i < PPPOL2TP_HASH_SIZE; i++) {
+		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[i]) {
+			session = hlist_entry(walk, struct pppol2tp_session, hlist);
+			seq_printf(m, "  SESSION '%s' %08X/%d %04X/%04X -> "
+				   "%04X/%04X %d %c MAGIC %s\n",
+				   session->name,
+				   htonl(session->tunnel_addr.addr.sin_addr.s_addr),
+				   htons(session->tunnel_addr.addr.sin_port),
+				   session->tunnel_addr.s_tunnel, 
+				   session->tunnel_addr.s_session,
+				   session->tunnel_addr.d_tunnel, 
+				   session->tunnel_addr.d_session,
+				   session->sock->sk_state,
+				   (session == session->sock->sk_user_data) ? 
+				   'Y' : 'N',
+				   (session->magic == L2TP_SESSION_MAGIC) ? 
+				   "OK" : "BAD");
+
+			seq_printf(m, "   %d/%d/%c/%c/%s %08x %d\n",
+				   session->mtu, session->mru, 
+				   session->recv_seq ? 'R' : '-',
+				   session->send_seq ? 'S' : '-',
+				   session->lns_mode ? "LNS" : "LAC",
+				   session->debug,
+				   JIFFIES_TO_MS(session->reorder_timeout));
+			seq_printf(m, "   %hu/%hu %u/%u/%u %u/%u/%u\n",
+				   session->nr, session->ns,
+				   session->stats.tx_packets, 
+				   session->stats.tx_bytes, 
+				   session->stats.tx_errors,
+				   session->stats.rx_packets, 
+				   session->stats.rx_bytes, 
+				   session->stats.rx_errors);
+
+			if (session->magic != L2TP_SESSION_MAGIC) {
+				seq_puts(m, "*** Aborting ***\n");
+				goto out;
+			}	
+		}
+	}
+out:
+	seq_puts(m, "\n");
+
+	EXIT_FUNCTION;
+
+	return 0;
+}
+
+#endif /* CONFIG_PROC_FS */
+
+/*****************************************************************************
+ * Init and cleanup
+ *****************************************************************************/
+
+static struct proto_ops pppol2tp_ops = {
+	.family		= AF_PPPOX,
+	.owner		= THIS_MODULE,
+	.release	= pppol2tp_release,
+	.bind		= sock_no_bind,
+	.connect	= pppol2tp_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.getname	= pppol2tp_getname,
+	.poll		= datagram_poll,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= pppol2tp_setsockopt,
+	.getsockopt	= pppol2tp_getsockopt,
+	.sendmsg	= pppol2tp_sendmsg,
+	.recvmsg	= pppol2tp_recvmsg,
+	.mmap		= sock_no_mmap
+};
+
+struct pppox_proto pppol2tp_proto = {
+	.create		= pppol2tp_create,
+	.ioctl		= pppol2tp_ioctl
+};
+
+int __init pppol2tp_init(void)
+{
+	int err = register_pppox_proto(PX_PROTO_OL2TP, &pppol2tp_proto);
+
+	if (err == 0) {
+#ifdef CONFIG_PROC_FS
+		pppol2tp_proc = create_proc_entry("pppol2tp", 0, proc_net);
+		if (!pppol2tp_proc) {
+			return -ENOMEM;
+		}
+		pppol2tp_proc->proc_fops = &pppol2tp_proc_fops;
+#endif /* CONFIG_PROC_FS */
+		printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
+		       PPPOL2TP_DRV_VERSION);
+	}
+
+	return err;
+}
+
+void __exit pppol2tp_exit(void)
+{
+	unregister_pppox_proto(PX_PROTO_OL2TP);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("pppol2tp", proc_net);
+#endif
+}
+
+module_init(pppol2tp_init);
+module_exit(pppol2tp_exit);
+
+MODULE_AUTHOR("Martijn van Oosterhout <kleptog@svana.org>");
+MODULE_DESCRIPTION("PPP over L2TP over UDP, " PPPOL2TP_DRV_VERSION);
+MODULE_LICENSE("GPL");
diff -Naur linux-2.6.8.1.orig/include/linux/if_ppp.h linux-2.6.8.1/include/linux/if_ppp.h
--- linux-2.6.8.1.orig/include/linux/if_ppp.h	2004-08-14 11:56:22.000000000 +0100
+++ linux-2.6.8.1/include/linux/if_ppp.h	2004-09-20 12:12:28.000000000 +0100
@@ -107,6 +107,21 @@
 	struct ppp_comp_stats stats;
 };
 
+/* For PPPIOCGL2TPSTATS */
+struct pppol2tp_ioc_stats {
+	__u16	tunnel_id;	/* redundant */
+	__u16	session_id;	/* if zero, get tunnel stats */
+	__u32	tx_packets;
+	__u32	tx_bytes;
+	__u32	tx_errors;
+	__u32	rx_packets;
+	__u32	rx_bytes;
+	__u32	rx_seq_discards;
+	__u32	rx_oos_packets;
+	__u32	rx_errors;
+	int	using_ipsec;	/* valid only for session_id == 0 */
+};
+
 #define ifr__name       b.ifr_ifrn.ifrn_name
 #define stats_ptr       b.ifr_ifru.ifru_data
 
@@ -143,6 +158,7 @@
 #define PPPIOCDISCONN	_IO('t', 57)		/* disconnect channel */
 #define PPPIOCATTCHAN	_IOW('t', 56, int)	/* attach to ppp channel */
 #define PPPIOCGCHAN	_IOR('t', 55, int)	/* get ppp channel number */
+#define	PPPIOCGL2TPSTATS _IOR('t', 54, struct pppol2tp_ioc_stats)
 
 #define SIOCGPPPSTATS   (SIOCDEVPRIVATE + 0)
 #define SIOCGPPPVER     (SIOCDEVPRIVATE + 1)	/* NEVER change this!! */
diff -Naur linux-2.6.8.1.orig/include/linux/if_pppol2tp.h linux-2.6.8.1/include/linux/if_pppol2tp.h
--- linux-2.6.8.1.orig/include/linux/if_pppol2tp.h	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.8.1/include/linux/if_pppol2tp.h	2004-09-20 12:07:59.000000000 +0100
@@ -0,0 +1,65 @@
+/***************************************************************************
+ * Linux PPP over L2TP (PPPoL2TP) Socket Implementation (RFC 2661) 
+ *
+ * This file supplies definitions required by the PPP over L2TP driver
+ * (pppol2tp.c).  All version information wrt this file is located in pppol2tp.c
+ *
+ * License:
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+#ifndef __LINUX_IF_PPPOL2TP_H
+#define __LINUX_IF_PPPOL2TP_H
+
+#include <asm/types.h>
+
+#ifdef __KERNEL__
+#include <linux/in.h>
+#endif
+
+/* Structure used to bind() the socket to a particular socket & tunnel */
+struct pppol2tp_addr
+{
+	int	fd;		 /* FD of UDP socket to use */
+	
+	struct sockaddr_in addr; /* IP address and port to send to */
+	
+	__u16 s_tunnel, s_session;    /* For matching incoming packets */
+	__u16 d_tunnel, d_session;    /* For sending outgoing packets */
+};
+
+/* Socket options:
+ * DEBUG	- bitmask of debug message categories
+ * SENDSEQ	- 0 => don't send packets with sequence numbers
+ *		  1 => send packets with sequence numbers
+ * RECVSEQ	- 0 => receive packet sequence numbers are optional
+ *		  1 => drop receive packets without sequence numbers
+ * LNSMODE	- 0 => act as LAC.
+ *		  1 => act as LNS.
+ * REORDERTO	- reorder timeout (in millisecs). If 0, don't try to reorder.
+ */
+enum {
+	PPPOL2TP_SO_DEBUG	= 1,
+	PPPOL2TP_SO_RECVSEQ	= 2,
+	PPPOL2TP_SO_SENDSEQ	= 3,
+	PPPOL2TP_SO_LNSMODE	= 4,
+	PPPOL2TP_SO_REORDERTO	= 5,
+};
+
+/* Debug message categories for the DEBUG socket option */
+enum {
+	PPPOL2TP_MSG_DEBUG	= (1 << 0),	/* verbose debug (if
+						 * compiled in) */ 
+	PPPOL2TP_MSG_CONTROL	= (1 << 1),	/* userspace - kernel
+						 * interface */
+	PPPOL2TP_MSG_SEQ	= (1 << 2),	/* sequence numbers */
+	PPPOL2TP_MSG_DATA	= (1 << 3),	/* data packets */
+};
+
+
+
+#endif
diff -Naur linux-2.6.8.1.orig/include/linux/if_pppox.h linux-2.6.8.1/include/linux/if_pppox.h
--- linux-2.6.8.1.orig/include/linux/if_pppox.h	2004-09-20 12:45:20.000000000 +0100
+++ linux-2.6.8.1/include/linux/if_pppox.h	2004-09-20 12:46:46.000000000 +0100
@@ -27,6 +27,7 @@
 #include <asm/semaphore.h>
 #include <linux/ppp_channel.h>
 #endif /* __KERNEL__ */
+#include <linux/if_pppol2tp.h>
 
 /* For user-space programs to pick up these definitions
  * which they wouldn't get otherwise without defining __KERNEL__
@@ -50,7 +51,8 @@
  * Protocols supported by AF_PPPOX
  */
 #define PX_PROTO_OE	0 /* Currently just PPPoE */
-#define PX_MAX_PROTO	1
+#define PX_PROTO_OL2TP	1 /* Now L2TP also */
+#define PX_MAX_PROTO	2
 
 /* The use of a union isn't viable because the size of this struct
  * must stay fixed over time -- applications use sizeof(struct
@@ -72,6 +74,12 @@
 	struct pppoe_addr pppoe;
 }__attribute__ ((packed));
 
+struct sockaddr_pppol2tp {
+	sa_family_t	sa_family;	/* address family, AF_PPPOX */
+	unsigned int	sa_protocol;	/* protocol identifier */
+	struct pppol2tp_addr pppol2tp;
+}__attribute__ ((packed));
+
 /*********************************************************************
  *
  * ioctl interface for defining forwarding of connections
diff -Naur linux-2.6.8.1.orig/include/linux/socket.h linux-2.6.8.1/include/linux/socket.h
--- linux-2.6.8.1.orig/include/linux/socket.h	2004-08-14 11:55:59.000000000 +0100
+++ linux-2.6.8.1/include/linux/socket.h	2004-09-01 17:37:55.000000000 +0100
@@ -268,6 +268,7 @@
 #define SOL_IRDA        266
 #define SOL_NETBEUI	267
 #define SOL_LLC		268
+#define SOL_PPPOL2TP	269
 
 /* IPX options */
 #define IPX_TYPE	1
diff -Naur linux-2.6.8.1.orig/MAINTAINERS linux-2.6.8.1/MAINTAINERS
--- linux-2.6.8.1.orig/MAINTAINERS	2004-08-14 11:55:34.000000000 +0100
+++ linux-2.6.8.1/MAINTAINERS	2004-09-20 11:23:42.000000000 +0100
@@ -1694,6 +1694,11 @@
 M:	mostrows@styx.uwaterloo.ca
 S:	Maintained
 
+PPP OVER L2TP
+P:	Martijn van Oosterhout
+M:	kleptog@svana.org
+S:	Maintained
+
 PREEMPTIBLE KERNEL
 P:	Robert Love
 M:	rml@tech9.net

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-20 21:11 PPP-over-L2TP kernel support, new patch for review James Chapman
@ 2004-09-20 21:17 ` David S. Miller
  2004-09-21  9:55   ` James Chapman
  2004-09-21 21:11   ` Benjamin LaHaise
  0 siblings, 2 replies; 15+ messages in thread
From: David S. Miller @ 2004-09-20 21:17 UTC (permalink / raw)
  To: James Chapman, bcrl; +Cc: netdev, kleptog, mostrows

On Mon, 20 Sep 2004 22:11:44 +0100
James Chapman <jchapman@katalix.com> wrote:

> Attached is a revised version of the new PPP over L2TP support for
> review. Thanks DaveM and Herbert for comments so far. The following
> comments have been addressed in this new version:

What relation does your work have to the L2TP implementation
being worked on by Ben LaHaise?  See:

http://marc.theaimsgroup.com/?l=linux-netdev&m=109375044707414&w=2

Do we have two people working on this thing. :-/

Ben didn't post any pointers to his work so I couldn't do the
comparison myself.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-20 21:17 ` David S. Miller
@ 2004-09-21  9:55   ` James Chapman
  2004-09-21 21:04     ` Benjamin LaHaise
  2004-09-21 21:11   ` Benjamin LaHaise
  1 sibling, 1 reply; 15+ messages in thread
From: James Chapman @ 2004-09-21  9:55 UTC (permalink / raw)
  To: David S. Miller, bcrl; +Cc: netdev, kleptog, mostrows

Quoting "David S. Miller" <davem@davemloft.net>:

> On Mon, 20 Sep 2004 22:11:44 +0100
> James Chapman <jchapman@katalix.com> wrote:
>
> > Attached is a revised version of the new PPP over L2TP support for
> > review. Thanks DaveM and Herbert for comments so far. The following
> > comments have been addressed in this new version:
>
> What relation does your work have to the L2TP implementation
> being worked on by Ben LaHaise?  See:
>
> http://marc.theaimsgroup.com/?l=linux-netdev&m=109375044707414&w=2
>
> Do we have two people working on this thing. :-/
>
> Ben didn't post any pointers to his work so I couldn't do the
> comparison myself.
>

Ben and I are working on separate projects. I was unaware of his work
until I saw his netdev post a few weeks ago and mailed him privately to find
out more. He's using the old Babylon (Spellcaster) proprietary PPP
stack that has now been GPL'd.

Unfortunately I haven't seen Ben's code yet either so I can't give a
direct comparison. Ben? I did have a look at the Babylon stuff
(1.6-pre3), although I've no idea how much of it Ben has
changed. Here's a summary, fyi.

Babylon:-

- Architecture seems to be using char devices for communication with
  the kernel and all the PPP datapath is handled by custom virtual
  net_devices; the generic PPP kernel code isn't used as far as I can
  tell. Unfortunately it is very old (linux-2.0 era I think) but Ben
  has probably updated it.

- Some form of L2TP support is there but it is very basic. Userspace
  sends data through char devices (read()/write() which the kernel
  char driver converts to skbs and passes on. Nasty.

- PPP stack supports multiple PPP sessions in one daemon (unlike pppd).

- Unlikely to integrate with the new native IPSEC stuff.

OpenL2TP:-

- Communication with kernel is through a new PPPoL2TP socket family.
  There's one socket per L2TP session so MAX_FILES limits max
  sessions. Works with the new native IPSEC kernel code.

- Comprehensive userspace L2TP protocol implementation written from
  scratch, targetted specifically for enterprise VPN and embedded
  networking products. Efficient kernel datapath was deemed essential
  for this environment.

- Plugin architecture allows different PPP implementations to be used.
  Only pppd supported so far (limits max sessions still further due
  to process overhead) but I'm working on a daemon to support multiple
  sessions -- still early stages, evaluating alternatives. Babylon or
  hacking pppd or start again...

rp-l2tp:-

- No kernel datapath (all data copied into userspace through ptys).
  However, it could be modified to use the socket based kernel driver.

I think for general Linux L2TP support, a socket architecture makes
more sense. But maybe I'm biased... :)

If you want to find out more about OpenL2TP, checkout the
online man pages at http://openl2tp.sourceforge.net/.

BTW, I asked on linux-ppp if anyone was working on a single daemon PPP
to handle multiple sessions but got zero response.  Anyone on this
list know of any work in this area?

I hope this was useful.

/james

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-21  9:55   ` James Chapman
@ 2004-09-21 21:04     ` Benjamin LaHaise
  2004-09-21 23:07       ` Herbert Xu
  2004-09-22  9:58       ` James Chapman
  0 siblings, 2 replies; 15+ messages in thread
From: Benjamin LaHaise @ 2004-09-21 21:04 UTC (permalink / raw)
  To: James Chapman; +Cc: David S. Miller, netdev, kleptog, mostrows

On Tue, Sep 21, 2004 at 10:55:25AM +0100, James Chapman wrote:
> Unfortunately I haven't seen Ben's code yet either so I can't give a
> direct comparison. Ben? I did have a look at the Babylon stuff
> (1.6-pre3), although I've no idea how much of it Ben has
> changed. Here's a summary, fyi.
> 
> Babylon:-
> 
> - Architecture seems to be using char devices for communication with
>   the kernel and all the PPP datapath is handled by custom virtual
>   net_devices; the generic PPP kernel code isn't used as far as I can
>   tell. Unfortunately it is very old (linux-2.0 era I think) but Ben
>   has probably updated it.

I've updated it.  The current build is at http://www.kvack.org/~bcrl/babylon/ 
and is 1.6-pre3-bcrl8.  The L2TP support is approaching beta, with the only 
real todo items being proper retransmit with congestion control, plus the 
kernel side of multihop support, and some locking fixes for smp and preempt.  
Plus the api needs to be made into something more paletable (it abuses 
a mix of ioctl, bind & connect to create l2tp sessions).  No flames please, 
but patches that keep the scaling characteristics and make the interface 
more paletable gladly accepted.

> - Some form of L2TP support is there but it is very basic. Userspace
>   sends data through char devices (read()/write() which the kernel
>   char driver converts to skbs and passes on. Nasty.

That old code was completely tossed.

> - PPP stack supports multiple PPP sessions in one daemon (unlike pppd).
> 
> - Unlikely to integrate with the new native IPSEC stuff.

L2TP over IPSEC?  Are you insane?  You'd not be able to terminate more than 
a couple of dozen connections over it. =-)

> I think for general Linux L2TP support, a socket architecture makes
> more sense. But maybe I'm biased... :)

The current babylon code is using sockets.  The l2tp sockets passes all 
packets for a given tunnel through a single file descriptor.  This seems 
like the best tradeoff for being able to scale to decent numbers of 
sessions.

		-ben
-- 
"Time is what keeps everything from happening all at once." -- John Wheeler

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-20 21:17 ` David S. Miller
  2004-09-21  9:55   ` James Chapman
@ 2004-09-21 21:11   ` Benjamin LaHaise
  1 sibling, 0 replies; 15+ messages in thread
From: Benjamin LaHaise @ 2004-09-21 21:11 UTC (permalink / raw)
  To: David S. Miller; +Cc: James Chapman, netdev, kleptog, mostrows

On Mon, Sep 20, 2004 at 02:17:04PM -0700, David S. Miller wrote:
> Ben didn't post any pointers to his work so I couldn't do the
> comparison myself.

It's still alpha and ugly, but... See src/mkifaces.cc for the microbench 
that shows the scaling issues with lots of interfaces.  I'd hoped to have 
time to test it on the weekend, but other stuff got in the way.  The quick 
guide to setup is:

	./configure --with-l2tp
	make
	insmod kernel/kern.o
	insmod drivers/l2tp/l2tp_k.o
	make -C src mkifaces
	src/mkifaces <num>

Although I'm not sure if this works on 2.6 in this build as most of my 
development was done on 2.4 over the summer.  Cheers,

		-ben
-- 
"Time is what keeps everything from happening all at once." -- John Wheeler

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-21 21:04     ` Benjamin LaHaise
@ 2004-09-21 23:07       ` Herbert Xu
  2004-09-22  0:00         ` Michael Richardson
  2004-09-22  1:14         ` Benjamin LaHaise
  2004-09-22  9:58       ` James Chapman
  1 sibling, 2 replies; 15+ messages in thread
From: Herbert Xu @ 2004-09-21 23:07 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: jchapman, davem, netdev, kleptog, mostrows

Benjamin LaHaise <bcrl@kvack.org> wrote:
>
>> - Unlikely to integrate with the new native IPSEC stuff.
> 
> L2TP over IPSEC?  Are you insane?  You'd not be able to terminate more than 
> a couple of dozen connections over it. =-)

Why not? L2TP over IPsec is the only reason I'm looking at L2TP at all.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-21 23:07       ` Herbert Xu
@ 2004-09-22  0:00         ` Michael Richardson
  2004-09-22  1:14         ` Benjamin LaHaise
  1 sibling, 0 replies; 15+ messages in thread
From: Michael Richardson @ 2004-09-22  0:00 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Benjamin LaHaise, jchapman, davem, netdev, kleptog, mostrows

-----BEGIN PGP SIGNED MESSAGE-----


>>>>> "Herbert" == Herbert Xu <herbert@gondor.apana.org.au> writes:
    >>> - Unlikely to integrate with the new native IPSEC stuff.
    >> L2TP over IPSEC?  Are you insane?  You'd not be able to terminate
    >> more than a couple of dozen connections over it. =-)

    Herbert> Why not? L2TP over IPsec is the only reason I'm looking at
    Herbert> L2TP at all.

  Stupidly, it is the only way for a Microsoft XP native stack to be
"auto-configured". Sad. sad stupid crap that I wish could be thrown out.

- --
]     "Elmo went to the wrong fundraiser" - The Simpson         |  firewalls  [
]   Michael Richardson,    Xelerance Corporation, Ottawa, ON    |net architect[
] mcr@xelerance.com      http://www.sandelman.ottawa.on.ca/mcr/ |device driver[
] panic("Just another Debian GNU/Linux using, kernel hacking, security guy"); [


-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.2 (GNU/Linux)
Comment: Finger me for keys

iQCVAwUBQVDAg4qHRg3pndX9AQElfQP/aSfT8/pNvtwaYQyAgin9vI9eAEQmI1uk
VlhrzJB8SajEiYg9oQxuiBTqhxjUhG1/9Cp8m3NEofeW9D1YLZwv6TEhN/M+TLrI
cIG8H4qvu+//L64Zxa8P+X7ZsM3+5tFkpu+QFE374VuoGaiEMUCMl4WLTTST1YnJ
VEpnvhotGSs=
=8O9W
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-21 23:07       ` Herbert Xu
  2004-09-22  0:00         ` Michael Richardson
@ 2004-09-22  1:14         ` Benjamin LaHaise
  2004-09-22  2:42           ` David S. Miller
  2004-09-22  3:03           ` jamal
  1 sibling, 2 replies; 15+ messages in thread
From: Benjamin LaHaise @ 2004-09-22  1:14 UTC (permalink / raw)
  To: Herbert Xu; +Cc: jchapman, davem, netdev, kleptog, mostrows

On Wed, Sep 22, 2004 at 09:07:06AM +1000, Herbert Xu wrote:
> Benjamin LaHaise <bcrl@kvack.org> wrote:
> >
> >> - Unlikely to integrate with the new native IPSEC stuff.
> > 
> > L2TP over IPSEC?  Are you insane?  You'd not be able to terminate more than 
> > a couple of dozen connections over it. =-)
> 
> Why not? L2TP over IPsec is the only reason I'm looking at L2TP at all.

CPU load.  The main reason I was forced to revisit L2TP (imo, it's a 
horrible protocol that suffers from too many bad decisions) was in its 
role for terminating DSL.  In this case one expects to be able to have 
tens of thousands of connections terminated by a single box, which 
means pushing hundreds of megabits of traffic.  The overhead of crypto 
operations in such a scenario makes it a far too costly choice.

		-ben

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-22  1:14         ` Benjamin LaHaise
@ 2004-09-22  2:42           ` David S. Miller
  2004-09-22  3:03           ` jamal
  1 sibling, 0 replies; 15+ messages in thread
From: David S. Miller @ 2004-09-22  2:42 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: herbert, jchapman, netdev, kleptog, mostrows

On Tue, 21 Sep 2004 21:14:21 -0400
Benjamin LaHaise <bcrl@kvack.org> wrote:

> On Wed, Sep 22, 2004 at 09:07:06AM +1000, Herbert Xu wrote:
> > Benjamin LaHaise <bcrl@kvack.org> wrote:
> > >
> > >> - Unlikely to integrate with the new native IPSEC stuff.
> > > 
> > > L2TP over IPSEC?  Are you insane?  You'd not be able to terminate more than 
> > > a couple of dozen connections over it. =-)
> > 
> > Why not? L2TP over IPsec is the only reason I'm looking at L2TP at all.
> 
> CPU load.  The main reason I was forced to revisit L2TP (imo, it's a 
> horrible protocol that suffers from too many bad decisions) was in its 
> role for terminating DSL.  In this case one expects to be able to have 
> tens of thousands of connections terminated by a single box, which 
> means pushing hundreds of megabits of traffic.  The overhead of crypto 
> operations in such a scenario makes it a far too costly choice.

I've heard of usage of both types described by Herbert and yourself,
and both are valid.

Therefore it's great that your scheme scales so well Ben, but it
has to support IPSEC properly as well.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-22  1:14         ` Benjamin LaHaise
  2004-09-22  2:42           ` David S. Miller
@ 2004-09-22  3:03           ` jamal
  1 sibling, 0 replies; 15+ messages in thread
From: jamal @ 2004-09-22  3:03 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: Herbert Xu, jchapman, davem, netdev, kleptog, mostrows

On Tue, 2004-09-21 at 21:14, Benjamin LaHaise wrote:
> On Wed, Sep 22, 2004 at 09:07:06AM +1000, Herbert Xu wrote:

> CPU load.  
> The main reason I was forced to revisit L2TP (imo, it's a 
> horrible protocol that suffers from too many bad decisions) was in its 
> role for terminating DSL.  In this case one expects to be able to have 
> tens of thousands of connections terminated by a single box, which 
> means pushing hundreds of megabits of traffic.  The overhead of crypto 
> operations in such a scenario makes it a far too costly choice.

Bad excuse ;->
So use a crypto chip or do less connections and scale by distributing
etc. I have a feeling tehres nothing inherent in your code that stops
you from intergrating into ipsec.

cheers,
jamal

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-21 21:04     ` Benjamin LaHaise
  2004-09-21 23:07       ` Herbert Xu
@ 2004-09-22  9:58       ` James Chapman
  2004-09-22 10:53         ` Herbert Xu
  1 sibling, 1 reply; 15+ messages in thread
From: James Chapman @ 2004-09-22  9:58 UTC (permalink / raw)
  To: Benjamin LaHaise, David S. Miller; +Cc: netdev, kleptog, mostrows

Although Ben and I are both working on L2TP projects, I think we're
targetting two totally different usage scenarios.

Time for some ASCII art...

1. ISP scenario

               +----+
            ---+ D  |
            ---| S  |      +--------+              +-------+
            ---| L  |======|  LAC   |==============| LNS   |
            ---| A  |      |        |              |       |
            ---| M  |      +--------+              +-------+
            ---|    |
            ---|    |
            ---|    |
               +----+
     thousands       PPPoA           each PPP session
     of PPP          or              tunneled thru
     clients         PPPoE           L2TP session to LNS


2. VPN scenario

           +---+                                  +------+
           |LAC+----------------------------------|      |
           +---+                                  |      |
           +---+                                  |      |
           |LAC+----------------------------------| LNS  |
           +---+                                  |      |
           +---+                                  |      |
           |LAC+----------------------------------|      |
           +---+                                  |      |
                                                  +------+
        PPP client                               Terminates
        in each LAC.                             multiple tunnels
        Typically 1
        session per tunnel

Ben is addressing the ISP case, where client PPP sessions are
terminated only temporarily at the LAC while it authenticates the PPP
users to decide how to tunnel them. Each PPP session is then tunneled
straight through the LAC via an L2TP session (inside an L2TP tunnel)
to the LNS. Scalability is important here, which Ben has addressed.

I'm addressing the VPN case, where multiple clients (M$ L2TP/IPSEC
or rp-l2tp/openl2tp clients) connect to an enterprise VPN server.
Current solutions involve userspace pty hacks in the datapath,
shunting data in and out of pppd ttys. The proposed driver moves the
datapath into the kernel where it should be. The driver integrates
with existing PPP and IPSEC kernel subsystems and will handle
hundreds of sessions today.

The biggest difference in our approaches is that Martijn and I use a
PPPoL2TP socket per session bound through a plain AF_INET UDP tunnel
socket while Ben uses a new AF_L2TP tunnel socket and no separate
socket per session. Both have their merits.

If I can put the case for our driver, Linux really needs a good VPN
solution. I know of two companies that run their offices with Linux
servers yet they both retain a Microsoft server _only_ to provide VPN
access to off-site workers. Including this driver in the kernel will
enable us to build binary packages for OpenL2TP and perhaps other L2TP
solutions like rp-l2tp would be updated to use it too. Being able to
handle hundreds (not thousands) of sessions is more than adequate
in this case.

Where do we go from here then? Perhaps both approaches are equally
valid and could even co-exist in the Linux kernel distribution?

/james

Quoting Benjamin LaHaise <bcrl@kvack.org>:

> > - Architecture seems to be using char devices for communication with
> >   the kernel and all the PPP datapath is handled by custom virtual
> >   net_devices; the generic PPP kernel code isn't used as far as I can
> >   tell. Unfortunately it is very old (linux-2.0 era I think) but Ben
> >   has probably updated it.
>
> I've updated it.  The current build is at http://www.kvack.org/~bcrl/babylon/
> and is 1.6-pre3-bcrl8.

[snip]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-22  9:58       ` James Chapman
@ 2004-09-22 10:53         ` Herbert Xu
  2004-09-22 21:28           ` James Chapman
  0 siblings, 1 reply; 15+ messages in thread
From: Herbert Xu @ 2004-09-22 10:53 UTC (permalink / raw)
  To: James Chapman; +Cc: bcrl, davem, netdev, kleptog, mostrows

James Chapman <jchapman@katalix.com> wrote:
> 
> The biggest difference in our approaches is that Martijn and I use a
> PPPoL2TP socket per session bound through a plain AF_INET UDP tunnel
> socket while Ben uses a new AF_L2TP tunnel socket and no separate
> socket per session. Both have their merits.

Can you elaborate on the merits of having a socket? It would seem to me
that not having a socket is a lot more scalable.  After all IPsec doesn't
carry a socket around per session.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-22 10:53         ` Herbert Xu
@ 2004-09-22 21:28           ` James Chapman
  2004-09-22 22:05             ` Herbert Xu
  2004-09-23 18:34             ` Martijn van Oosterhout
  0 siblings, 2 replies; 15+ messages in thread
From: James Chapman @ 2004-09-22 21:28 UTC (permalink / raw)
  To: Herbert Xu; +Cc: bcrl, davem, netdev, kleptog, mostrows

Hi Herbert,

Quoting Herbert Xu <herbert@gondor.apana.org.au>:

> James Chapman <jchapman@katalix.com> wrote:
> >
> > The biggest difference in our approaches is that Martijn and I use a
> > PPPoL2TP socket per session bound through a plain AF_INET UDP tunnel
> > socket while Ben uses a new AF_L2TP tunnel socket and no separate
> > socket per session. Both have their merits.
>
> Can you elaborate on the merits of having a socket? It would seem to me
> that not having a socket is a lot more scalable.  After all IPsec doesn't
> carry a socket around per session.

What I meant by "both have their merits" is that both general
approaches have their merits. It's a shame Martijn isn't available
right now (he's moving home to a new country) as he came up with the
initial kernel driver concept. Anyway, I'm sure he'll chime in later.

We could have implemented this thing by hooking specific UDP ports out
in udp.c or adding to the UDP encap framework that you use for IPsec.
Neither seem ideal for this case -- modifying core code in udp.c to
add support for a new UDP protocol just seemed wrong.  IPsec is
special -- it is a core technology that the IP stack should know
about. L2TP isn't special at all.

There are several reasons why I think the approach taken is a
reasonable compromise:-

- No changes to core kernel code needed. The new L2TP support can be
  loaded as a module into current 2.4 and 2.6 Linux distributions.

- It fits well into the existing PPPoX framework. This is, after all,
  PPP over L2TP.

- Implementing a new socket address family for the tunnel socket
  instead of using a plain UDP socket meant cloning a lot of UDP
  code. We thought it might break IPsec assumptions and having code
  that did almost the same thing as other components of the kernel
  was not ideal. It also seemed to set the wrong precedent for adding
  a new UDP protocol.

- Having PPP create a socket and bind it to the L2TP tunnel UDP socket
  gave us all the packet hooks we needed to implement the L2TP
  datapath. And using a per-session socket made it trivial to add
  control and status APIs for userspace. Also, for handling L2TP
  (optional) data packet reordering, the socket buffer would help
  when holding packets for their predecessors, preventing one bad
  session from breaking others in the tunnel.

- For VPN usage scenarios where 50 or so L2TP sessions would be
  unusually large, the extra socket overhead didn't seem to matter
  that much when weighed up against all its advantages. Since in the
  VPN case, PPP sessions are locally terminated, there's also a
  net_device per session. Are we going to get rid of that too?

We keep coming back to the scalability issue. Sure this driver
wouldn't work when handling thousands of sessions, but I just don't
think it's an issue in the VPN case. People will be nailing up their
VPNs directly with hardware-assisted IPsec or even MPLS well before we
hear users complaining that they want Linux's L2TP to support
thousands of sessions for VPN use.

Please don't get me wrong, I'm not saying that scalability doesn't
matter. But I do think we're getting hung up on the need to support
thousands of L2TP sessions carrying non-terminated PPP sessions as a
typical Linux use case. Linux systems, especially those built from the
core kernel source distribution, are much more likely to be using L2TP
for VPNs, terminating PPP locally.

There are many problems to solve first in order to handle thousands of
sessions before our proposed driver's limitations become an issue, i.e.
single daemon pppd and how to handle each PPP interface without a
net_device. Ben's Babylon-based solution does this but it won't solve
the VPN problem.

I really want to see an end to those userspace pty pppd hacks being
used by current L2TP, PPTP and PPPoE solutions. I think the proposed
driver solves that problem well for L2TP and because it can be
installed in current Linux installations with little effort (as a
module), it stands a reasonable chance.

I'll shut up now, I've gone on long enough. Thanks for reading this
far! :)

/james

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-22 21:28           ` James Chapman
@ 2004-09-22 22:05             ` Herbert Xu
  2004-09-23 18:34             ` Martijn van Oosterhout
  1 sibling, 0 replies; 15+ messages in thread
From: Herbert Xu @ 2004-09-22 22:05 UTC (permalink / raw)
  To: James Chapman; +Cc: herbert, bcrl, davem, netdev, kleptog, mostrows

James Chapman <jchapman@katalix.com> wrote:
> 
> We keep coming back to the scalability issue. Sure this driver
> wouldn't work when handling thousands of sessions, but I just don't
> think it's an issue in the VPN case. People will be nailing up their

That's where I disagree.  We don't want to end up with two L2TP
implementations in the kernel, one geared towards large installations
and one geared towards IPsec setups.

So I think both Ben and James need to work out how they can best
absorb the advantages of each other's stack.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: PPP-over-L2TP kernel support, new patch for review
  2004-09-22 21:28           ` James Chapman
  2004-09-22 22:05             ` Herbert Xu
@ 2004-09-23 18:34             ` Martijn van Oosterhout
  1 sibling, 0 replies; 15+ messages in thread
From: Martijn van Oosterhout @ 2004-09-23 18:34 UTC (permalink / raw)
  To: James Chapman; +Cc: Herbert Xu, bcrl, davem, netdev, mostrows

[-- Attachment #1: Type: text/plain, Size: 2964 bytes --]

On Wed, Sep 22, 2004 at 10:28:53PM +0100, James Chapman wrote:
> Hi Herbert,
> 
> Quoting Herbert Xu <herbert@gondor.apana.org.au>:
> 
> > James Chapman <jchapman@katalix.com> wrote:
> > >
> > > The biggest difference in our approaches is that Martijn and I use a
> > > PPPoL2TP socket per session bound through a plain AF_INET UDP tunnel
> > > socket while Ben uses a new AF_L2TP tunnel socket and no separate
> > > socket per session. Both have their merits.
> >
> > Can you elaborate on the merits of having a socket? It would seem to me
> > that not having a socket is a lot more scalable.  After all IPsec doesn't
> > carry a socket around per session.
> 
> What I meant by "both have their merits" is that both general
> approaches have their merits. It's a shame Martijn isn't available
> right now (he's moving home to a new country) as he came up with the
> initial kernel driver concept. Anyway, I'm sure he'll chime in later.

Ok, I've just cut off the power connector of my laptop and whacked a
new one on, so I'm just beginning ot catch up. I'd just like to comment
that the socket-per-connection is part of the kernel generic-PPP
support. The PPP packets not handled by the kernel need to be
transported somewhere and I guess the decision was made to pass it
though a PPPoX socket.

If you want to get away from the one socket per session model, you
can't use PPPoX sockets. You need something in the kernel to hold the
ppp generic data structure. I imagine Ben's uses an array in the kernel
and passes stuff to userspace in a way so the user-space daemon can
identify the session it belongs to.

I don't see why this PPPoX solution won't scale to thousands of
sessions. Sure, you get one socket per session plus one socket per
tunnel, but IRC servers run with thousands of sockets and the costs
here aren't much more. Sure, someone needs to write a PPP daemon that
can handle multiple simultaneous connections, but that's orthoginal to
the issue at hand.

If you want to remove the one socket per session requirement, someone
needs to redo the PPPoX support. In fact, the whole PPPoX idea seems to
have been a bit of a dud since even with it there it seems to be better
to just invent your own character device/protocal family/etc than use
it. When I started I just used the PPPoX stuff since I figured that was
going to be the "supported" way to use the in-kernel PPP stuff.

Also, if Ben's stuff is handling the case of taking seperate L2TP
sessions and merging them through to another LNS server, then it is a
completely orthoginal system, since that doesn't require full PPP
support anyway...

Have a nice day,
-- 
Martijn van Oosterhout   <kleptog@svana.org>   http://svana.org/kleptog/
> Patent. n. Genius is 5% inspiration and 95% perspiration. A patent is a
> tool for doing 5% of the work and then sitting around waiting for someone
> else to do the other 95% so you can sue them.

[-- Attachment #2: Type: application/pgp-signature, Size: 232 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2004-09-23 18:34 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-09-20 21:11 PPP-over-L2TP kernel support, new patch for review James Chapman
2004-09-20 21:17 ` David S. Miller
2004-09-21  9:55   ` James Chapman
2004-09-21 21:04     ` Benjamin LaHaise
2004-09-21 23:07       ` Herbert Xu
2004-09-22  0:00         ` Michael Richardson
2004-09-22  1:14         ` Benjamin LaHaise
2004-09-22  2:42           ` David S. Miller
2004-09-22  3:03           ` jamal
2004-09-22  9:58       ` James Chapman
2004-09-22 10:53         ` Herbert Xu
2004-09-22 21:28           ` James Chapman
2004-09-22 22:05             ` Herbert Xu
2004-09-23 18:34             ` Martijn van Oosterhout
2004-09-21 21:11   ` Benjamin LaHaise

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.