All of lore.kernel.org
 help / color / mirror / Atom feed
* A modular approach to handling netdev address c/r
@ 2010-04-08 17:48 Dan Smith
       [not found] ` <1270748932-26745-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Dan Smith @ 2010-04-08 17:48 UTC (permalink / raw)
  To: containers-qjLDD68F18O7TbgM5vRIOg

Based on Serge's obsession with avoiding build breaks, I've changed
around the ipv6 patch to make the whole process more modular.  This
set now:

1. Introduces a registration system for interface address families to
   declare support for checkpoint and provide handlers to do it
2. Moves the inet4 code into net/ipv4/devinet.c in much the same form
   as it was generalized in the previous set of ipv6 patches
3. Adds the ipv6 code to net/ipv6/addrconf.c
4. Compiles for every combination of checkpoint, netns, ipv4, and ipv6
   support that I can think of

(not necessarily in the above order).

I think it makes it all much nicer.

Comments?

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 1/4] Modularize the handling of netdev address c/r
       [not found] ` <1270748932-26745-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-04-08 17:48   ` Dan Smith
       [not found]     ` <1270748932-26745-2-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2010-04-08 17:48   ` [PATCH 2/4] Fail checkpoint if IPv4 multicast addresses are configured Dan Smith
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 23+ messages in thread
From: Dan Smith @ 2010-04-08 17:48 UTC (permalink / raw)
  To: containers-qjLDD68F18O7TbgM5vRIOg

This moves the INET4 address checkpoint and restart routines into
net/ipv4/devinet.c and introduces a registration method to present
the checkpoint code with the handler functions.

This makes it easier to add additional address types, and also
makes the cases where inet4 is absent, inet6 is a module, etc much
easier.  It also elminates the need for a couple of helper functions.

Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 include/linux/checkpoint.h     |   18 +++++-
 include/linux/checkpoint_hdr.h |    1 +
 net/checkpoint_dev.c           |  152 ++++++++++++++++++++++++----------------
 net/ipv4/devinet.c             |   75 ++++++++++++++++++++
 4 files changed, 184 insertions(+), 62 deletions(-)

diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index 96693e2..5fdbd01 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -132,7 +132,8 @@ extern void *restore_netdev(struct ckpt_ctx *ctx);
 
 extern int ckpt_netdev_in_init_netns(struct ckpt_ctx *ctx,
 				     struct net_device *dev);
-extern int ckpt_netdev_inet_addrs(struct in_device *indev,
+extern int ckpt_netdev_inet_addrs(struct ckpt_ctx *ctx,
+				  struct net_device *dev,
 				  struct ckpt_netdev_addr *list[]);
 extern int ckpt_netdev_hwaddr(struct net_device *dev,
 			      struct ckpt_hdr_netdev *h);
@@ -513,6 +514,21 @@ extern void _ckpt_msg_complete(struct ckpt_ctx *ctx);
 	_do_ckpt_msg(ctx, err, "[E @ %s:%d]" fmt, __func__, __LINE__, ##args); \
 } while (0)
 
+struct ckpt_netdev_addr_handler {
+	int type;
+	struct module *owner;
+	int (*checkpoint_addr)(struct ckpt_ctx *ctx,
+			       struct net_device *dev,
+			       int index, int max,
+			       struct ckpt_netdev_addr *addrs);
+	int (*restore_addr)(struct ckpt_ctx *ctx,
+			    struct net_device *dev,
+			    struct net *net,
+			    struct ckpt_netdev_addr *addr);
+};
+extern int ckpt_netdev_addr_register(struct ckpt_netdev_addr_handler *);
+extern int ckpt_netdev_addr_unregister(struct ckpt_netdev_addr_handler *);
+
 #endif /* CONFIG_CHECKPOINT */
 #endif /* __KERNEL__ */
 
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 36386ad..13bf62c 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -804,6 +804,7 @@ struct ckpt_hdr_netdev {
 
 enum ckpt_netdev_addr_types {
 	CKPT_NETDEV_ADDR_IPV4,
+	CKPT_NETDEV_ADDR_MAX
 };
 
 struct ckpt_netdev_addr {
diff --git a/net/checkpoint_dev.c b/net/checkpoint_dev.c
index 5a4a95b..4ef06e3 100644
--- a/net/checkpoint_dev.c
+++ b/net/checkpoint_dev.c
@@ -12,11 +12,11 @@
 #include <linux/sched.h>
 #include <linux/if.h>
 #include <linux/if_arp.h>
-#include <linux/inetdevice.h>
 #include <linux/veth.h>
 #include <linux/checkpoint.h>
 #include <linux/checkpoint_hdr.h>
 #include <linux/deferqueue.h>
+#include <linux/module.h>
 
 #include <net/net_namespace.h>
 #include <net/sch_generic.h>
@@ -34,17 +34,64 @@ struct mvl_newlink {
 
 typedef int (*new_link_fn)(struct sk_buff *, void *);
 
-static int __kern_devinet_ioctl(struct net *net, unsigned int cmd, void *arg)
+static struct ckpt_netdev_addr_handler *addr_handlers[CKPT_NETDEV_ADDR_MAX];
+
+static char *addr_modules[] = {
+	"ipv4",		/* CKPT_NETDEV_ADDR_IPV4 */
+	"ipv6",		/* CKPT_NETDEV_ADDR_IPV6 */
+};
+
+int ckpt_netdev_addr_register(struct ckpt_netdev_addr_handler *h)
 {
-	mm_segment_t fs;
-	int ret;
+	if (h->type >= CKPT_NETDEV_ADDR_MAX)
+		return -EINVAL;
 
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = devinet_ioctl(net, cmd, arg);
-	set_fs(fs);
+	if (addr_handlers[h->type] != NULL)
+		return -EEXIST;
 
-	return ret;
+	ckpt_debug("Registered addr type %s\n", addr_modules[h->type]);
+	addr_handlers[h->type] = h;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ckpt_netdev_addr_register);
+
+int ckpt_netdev_addr_unregister(struct ckpt_netdev_addr_handler *h)
+{
+	if (h->type >= CKPT_NETDEV_ADDR_MAX)
+		return -EINVAL;
+
+	if (addr_handlers[h->type] == NULL)
+		return -ESRCH;
+
+	ckpt_debug("Unregistered addr type %s\n", addr_modules[h->type]);
+	addr_handlers[h->type] = NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL(ckpt_netdev_addr_unregister);
+
+static struct ckpt_netdev_addr_handler *get_addr_handler(int type)
+{
+	struct ckpt_netdev_addr_handler *h;
+
+	if (type >= CKPT_NETDEV_ADDR_MAX)
+		return ERR_PTR(-EINVAL);
+
+	h = addr_handlers[type];
+
+	if (h == NULL)
+		return NULL;
+
+	if (try_module_get(h->owner))
+		return h;
+	else
+		return ERR_PTR(-EBUSY);
+}
+
+static void put_addr_handler(struct ckpt_netdev_addr_handler *h)
+{
+	module_put(h->owner);
 }
 
 static int __kern_dev_ioctl(struct net *net, unsigned int cmd, void *arg)
@@ -151,13 +198,14 @@ int ckpt_netdev_hwaddr(struct net_device *dev, struct ckpt_hdr_netdev *h)
 	return 0;
 }
 
-int ckpt_netdev_inet_addrs(struct in_device *indev,
+int ckpt_netdev_inet_addrs(struct ckpt_ctx *ctx,
+			   struct net_device *dev,
 			   struct ckpt_netdev_addr *_abuf[])
 {
 	struct ckpt_netdev_addr *abuf = NULL;
-	struct in_ifaddr *addr = indev->ifa_list;
 	int addrs = 0;
 	int max = 32;
+	int i;
 
  retry:
 	*_abuf = krealloc(abuf, max * sizeof(*abuf), GFP_KERNEL);
@@ -169,19 +217,29 @@ int ckpt_netdev_inet_addrs(struct in_device *indev,
 
 	read_lock(&dev_base_lock);
 
-	while (addr) {
-		abuf[addrs].type = CKPT_NETDEV_ADDR_IPV4; /* Only IPv4 now */
-		abuf[addrs].inet4_local = htonl(addr->ifa_local);
-		abuf[addrs].inet4_address = htonl(addr->ifa_address);
-		abuf[addrs].inet4_mask = htonl(addr->ifa_mask);
-		abuf[addrs].inet4_broadcast = htonl(addr->ifa_broadcast);
+	addrs = 0;
 
-		addr = addr->ifa_next;
-		if (++addrs >= max) {
+	for (i = 0; i < CKPT_NETDEV_ADDR_MAX; i++) {
+		struct ckpt_netdev_addr_handler *h;
+
+		h = get_addr_handler(i);
+		if (!h)
+			continue;
+		else if (IS_ERR(h)) {
+			addrs = PTR_ERR(h);
+			ckpt_err(ctx, addrs,
+				 "Unable to handle netdev addr type %s\n",
+				 addr_modules[i]);
+			break;
+		}
+
+		addrs = h->checkpoint_addr(ctx, dev, addrs, max, abuf);
+		put_addr_handler(h);
+		if (addrs == -E2BIG) {
 			read_unlock(&dev_base_lock);
-			max *= 2;
 			goto retry;
-		}
+		} else if (addrs < 0)
+			break;
 	}
 
 	read_unlock(&dev_base_lock);
@@ -210,7 +268,7 @@ struct ckpt_hdr_netdev *ckpt_netdev_base(struct ckpt_ctx *ctx,
 		goto out;
 
 	*addrs = NULL;
-	ret = h->inet_addrs = ckpt_netdev_inet_addrs(dev->ip_ptr, addrs);
+	ret = h->inet_addrs = ckpt_netdev_inet_addrs(ctx, dev, addrs);
 	if (ret < 0)
 		goto out;
 
@@ -307,49 +365,21 @@ static int restore_in_addrs(struct ckpt_ctx *ctx,
 
 	for (i = 0; i < naddrs; i++) {
 		struct ckpt_netdev_addr *addr = &addrs[i];
-		struct ifreq req;
-		struct sockaddr_in *inaddr;
-
-		if (addr->type != CKPT_NETDEV_ADDR_IPV4) {
-			ret = -EINVAL;
-			ckpt_err(ctx, ret, "Unsupported netdev addr type %i\n",
-				 addr->type);
-			break;
-		}
-
-		ckpt_debug("restoring %s: %x/%x/%x\n", dev->name,
-			   addr->inet4_address,
-			   addr->inet4_mask,
-			   addr->inet4_broadcast);
-
-		memcpy(req.ifr_name, dev->name, IFNAMSIZ);
-
-		inaddr = (struct sockaddr_in *)&req.ifr_addr;
-		inaddr->sin_addr.s_addr = ntohl(addr->inet4_address);
-		inaddr->sin_family = AF_INET;
-		ret = __kern_devinet_ioctl(net, SIOCSIFADDR, &req);
-		if (ret < 0) {
-			ckpt_err(ctx, ret, "Failed to set address\n");
-			break;
-		}
-
-		inaddr = (struct sockaddr_in *)&req.ifr_addr;
-		inaddr->sin_addr.s_addr = ntohl(addr->inet4_mask);
-		inaddr->sin_family = AF_INET;
-		ret = __kern_devinet_ioctl(net, SIOCSIFNETMASK, &req);
-		if (ret < 0) {
-			ckpt_err(ctx, ret, "Failed to set netmask\n");
+		struct ckpt_netdev_addr_handler *h;
+
+		h = get_addr_handler(addr->type);
+		if (!h || IS_ERR(h)) {
+			ret = h ? PTR_ERR(h) : -EINVAL;
+			ckpt_err(ctx, ret,
+				 "Netdev addr type %s not supported\n",
+				 addr_modules[addr->type]);
 			break;
 		}
 
-		inaddr = (struct sockaddr_in *)&req.ifr_addr;
-		inaddr->sin_addr.s_addr = ntohl(addr->inet4_broadcast);
-		inaddr->sin_family = AF_INET;
-		ret = __kern_devinet_ioctl(net, SIOCSIFBRDADDR, &req);
-		if (ret < 0) {
-			ckpt_err(ctx, ret, "Failed to set broadcast\n");
+		ret = h->restore_addr(ctx, dev, net, addr);
+		put_addr_handler(h);
+		if (ret < 0)
 			break;
-		}
 	}
 
  out:
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 26dec2b..fa7799c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -54,6 +54,7 @@
 #include <linux/sysctl.h>
 #endif
 #include <linux/kmod.h>
+#include <linux/checkpoint.h>
 
 #include <net/arp.h>
 #include <net/ip.h>
@@ -1616,6 +1617,76 @@ static __net_initdata struct pernet_operations devinet_ops = {
 	.exit = devinet_exit_net,
 };
 
+#ifdef CONFIG_NETNS_CHECKPOINT
+static int devinet_checkpoint(struct ckpt_ctx *ctx, struct net_device *dev,
+			      int index, int max,
+			      struct ckpt_netdev_addr *addrs)
+{
+	struct in_device *indev = dev->ip_ptr;
+	struct in_ifaddr *addr = indev->ifa_list;
+
+	for (addr = indev->ifa_list; addr ; addr = addr->ifa_next) {
+		addrs[index].type = CKPT_NETDEV_ADDR_IPV4;
+		addrs[index].inet4_local = htonl(addr->ifa_local);
+		addrs[index].inet4_address = htonl(addr->ifa_address);
+		addrs[index].inet4_mask = htonl(addr->ifa_mask);
+		addrs[index].inet4_broadcast = htonl(addr->ifa_broadcast);
+
+		if (++index >= max)
+			return -E2BIG;
+	}
+
+	return index;
+}
+
+static int devinet_restore(struct ckpt_ctx *ctx, struct net_device *dev,
+			   struct net *net, struct ckpt_netdev_addr *addr)
+{
+	struct ifreq req;
+	struct sockaddr_in *inaddr;
+	int ret;
+	mm_segment_t fs = get_fs();
+
+	set_fs(KERNEL_DS);
+
+	inaddr = (struct sockaddr_in *)&req.ifr_addr;
+	inaddr->sin_family = AF_INET;
+	memcpy(req.ifr_name, dev->name, IFNAMSIZ);
+
+	inaddr->sin_addr.s_addr = ntohl(addr->inet4_address);
+	ret = devinet_ioctl(net, SIOCSIFADDR, &req);
+	if (ret < 0) {
+		ckpt_err(ctx, ret, "Failed to set address\n");
+		goto out;
+	}
+
+	inaddr->sin_addr.s_addr = ntohl(addr->inet4_mask);
+	ret = devinet_ioctl(net, SIOCSIFNETMASK, &req);
+	if (ret < 0) {
+		ckpt_err(ctx, ret, "Failed to set netmask\n");
+		goto out;
+	}
+
+	inaddr->sin_addr.s_addr = ntohl(addr->inet4_broadcast);
+	ret = devinet_ioctl(net, SIOCSIFBRDADDR, &req);
+	if (ret < 0) {
+		ckpt_err(ctx, ret, "Failed to set broadcast\n");
+		goto out;
+	}
+ out:
+	set_fs(fs);
+
+	return ret;
+}
+
+static struct ckpt_netdev_addr_handler devinet_ckpt = {
+	.type			= CKPT_NETDEV_ADDR_IPV4,
+	.owner			= THIS_MODULE,
+	.checkpoint_addr	= devinet_checkpoint,
+	.restore_addr		= devinet_restore,
+};
+#endif /* CONFIG_NETNS_CHECKPOINT */
+
 void __init devinet_init(void)
 {
 	register_pernet_subsys(&devinet_ops);
@@ -1626,5 +1697,9 @@ void __init devinet_init(void)
 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
+
+#ifdef CONFIG_NETNS_CHECKPOINT
+	ckpt_netdev_addr_register(&devinet_ckpt);
+#endif
 }
 
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 2/4] Fail checkpoint if IPv4 multicast addresses are configured
       [not found] ` <1270748932-26745-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2010-04-08 17:48   ` [PATCH 1/4] Modularize the handling of " Dan Smith
@ 2010-04-08 17:48   ` Dan Smith
       [not found]     ` <1270748932-26745-3-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2010-04-08 17:48   ` [PATCH 3/4] Add IPv6 address checkpoint handler Dan Smith
  2010-04-08 17:48   ` [PATCH 4/4] C/R: Fix storing IPv6 addresses and handle the "ipv6only" socket flag (v2) Dan Smith
  3 siblings, 1 reply; 23+ messages in thread
From: Dan Smith @ 2010-04-08 17:48 UTC (permalink / raw)
  To: containers-qjLDD68F18O7TbgM5vRIOg

This causes checkpoint to fail if an interface actually has a
multicast membership.

Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 net/ipv4/devinet.c |   15 +++++++++++++++
 1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index fa7799c..83411fc 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1624,6 +1624,7 @@ static int devinet_checkpoint(struct ckpt_ctx *ctx, struct net_device *dev,
 {
 	struct in_device *indev = dev->ip_ptr;
 	struct in_ifaddr *addr = indev->ifa_list;
+	struct ip_mc_list *mcaddr;
 
 	for (addr = indev->ifa_list; addr ; addr = addr->ifa_next) {
 		addrs[index].type = CKPT_NETDEV_ADDR_IPV4;
@@ -1636,6 +1637,20 @@ static int devinet_checkpoint(struct ckpt_ctx *ctx, struct net_device *dev,
 			return -E2BIG;
 	}
 
+	for (mcaddr = indev->mc_list; mcaddr; mcaddr = mcaddr->next) {
+		if ((mcaddr->multiaddr & IGMP_LOCAL_GROUP_MASK) ==
+		    IGMP_LOCAL_GROUP)
+			continue;
+
+		/* TODO */
+
+		/* Multicast addresses are not supported, so do not
+		 * allow checkpoint to continue if one is assigned
+		 */
+		ckpt_debug("ipv4 multicast addresses are not supported\n");
+		return -EINVAL;
+	}
+
 	return index;
 }
 
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 3/4] Add IPv6 address checkpoint handler
       [not found] ` <1270748932-26745-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2010-04-08 17:48   ` [PATCH 1/4] Modularize the handling of " Dan Smith
  2010-04-08 17:48   ` [PATCH 2/4] Fail checkpoint if IPv4 multicast addresses are configured Dan Smith
@ 2010-04-08 17:48   ` Dan Smith
       [not found]     ` <1270748932-26745-4-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2010-04-08 17:48   ` [PATCH 4/4] C/R: Fix storing IPv6 addresses and handle the "ipv6only" socket flag (v2) Dan Smith
  3 siblings, 1 reply; 23+ messages in thread
From: Dan Smith @ 2010-04-08 17:48 UTC (permalink / raw)
  To: containers-qjLDD68F18O7TbgM5vRIOg

Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 include/linux/checkpoint_hdr.h |    8 +++
 net/ipv6/addrconf.c            |   95 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 103 insertions(+), 0 deletions(-)

diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 13bf62c..98aa79c 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -804,6 +804,7 @@ struct ckpt_hdr_netdev {
 
 enum ckpt_netdev_addr_types {
 	CKPT_NETDEV_ADDR_IPV4,
+	CKPT_NETDEV_ADDR_IPV6,
 	CKPT_NETDEV_ADDR_MAX
 };
 
@@ -816,6 +817,13 @@ struct ckpt_netdev_addr {
 			__be32 inet4_mask;
 			__be32 inet4_broadcast;
 		};
+		struct {
+			struct in6_addr inet6_addr;
+			__u32  inet6_prefix_len;
+			__u32  inet6_valid_lft;
+			__u32  inet6_prefered_lft;
+			__u16  inet6_scope;
+		};
 	} __attribute__((aligned(8)));
 } __attribute__((aligned(8)));
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 143791d..75cc3b8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -87,6 +87,8 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
+#include <linux/checkpoint.h>
+
 /* Set to 3 to get tracing... */
 #define ACONF_DEBUG 2
 
@@ -4514,6 +4516,91 @@ int unregister_inet6addr_notifier(struct notifier_block *nb)
 
 EXPORT_SYMBOL(unregister_inet6addr_notifier);
 
+#ifdef CONFIG_NETNS_CHECKPOING
+static int inet6_checkpoint(struct ckpt_ctx *ctx, struct net_device *dev,
+			    int index, int max,
+			    struct ckpt_netdev_addr *addrs)
+{
+	struct inet6_dev *indev = dev->ip6_ptr;
+	struct inet6_ifaddr *addr;
+	struct ifmcaddr6 *mcaddr;
+	struct ifacaddr6 *acaddr;
+
+	for (addr = indev->addr_list; addr; addr = addr->if_next) {
+		if (ipv6_addr_scope(&addr->addr))
+			continue; /* Ignore non-global scope addresses */
+
+		addrs[index].type = CKPT_NETDEV_ADDR_IPV6;
+
+		ipv6_addr_copy(&addrs[index].inet6_addr, &addr->addr);
+
+		ckpt_debug("Checkpointed inet6: %pI6\n", &addr->addr);
+
+		addrs[index].inet6_prefix_len = addr->prefix_len;
+		addrs[index].inet6_valid_lft = addr->valid_lft;
+		addrs[index].inet6_prefered_lft = addr->prefered_lft;
+		addrs[index].inet6_scope = addr->scope;
+
+		if (++index >= max)
+			return -E2BIG;
+	}
+
+	for (mcaddr = indev->mc_list; mcaddr; mcaddr = mcaddr->next) {
+		if (ipv6_addr_scope(&mcaddr->mca_addr))
+			continue; /* Ignore non-global scope addresses */
+
+		/* TODO */
+
+		/* Multicast addresses are not supported, so do not
+		 * allow checkpoint to continue if one is assigned
+		 */
+		ckpt_debug("ipv6 multicast addresses are not supported\n");
+		return -EINVAL;
+	}
+
+	for (acaddr = indev->ac_list; acaddr; acaddr = acaddr->aca_next) {
+		if (ipv6_addr_scope(&acaddr->aca_addr))
+			continue; /* Ignore non-global scope addresses */
+
+		/* TODO */
+
+		/* Anycast addresses are not supported, so do not
+		 * allow checkpoint to continue if one is assigned
+		 */
+		ckpt_debug("ipv6 anycast addresses are not supported\n");
+		return -EINVAL;
+	}
+
+	return index;
+}
+
+static int inet6_restore(struct ckpt_ctx *ctx,
+			 struct net_device *dev,
+			 struct net *net,
+			 struct ckpt_netdev_addr *addr)
+{
+	int ret;
+
+	rtnl_lock();
+	ret = inet6_addr_add(net, dev->ifindex, &addr->inet6_addr,
+			     addr->inet6_prefix_len, IFA_F_PERMANENT,
+			     INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+	rtnl_unlock();
+	if (ret < 0)
+		ckpt_err(ctx, ret, "Failed to set address %pI6\n",
+			 &addr->inet6_addr);
+
+	return ret;
+}
+
+struct ckpt_netdev_addr_handler inet6_addr_handler = {
+	.type =			CKPT_NETDEV_ADDR_IPV6,
+	.owner =		THIS_MODULE,
+	.checkpoint_addr =	inet6_checkpoint,
+	.restore_addr =		inet6_restore,
+};
+#endif /* CONFIG_NETNS_CHECKPOINT */
+
 /*
  *	Init / cleanup code
  */
@@ -4572,6 +4659,10 @@ int __init addrconf_init(void)
 
 	ipv6_addr_label_rtnl_register();
 
+#ifdef CONFIG_NETNS_CHECKPOINT
+	ckpt_netdev_addr_register(&inet6_addr_handler);
+#endif
+
 	return 0;
 errout:
 	unregister_netdevice_notifier(&ipv6_dev_notf);
@@ -4590,6 +4681,10 @@ void addrconf_cleanup(void)
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 	unregister_pernet_subsys(&addrconf_ops);
 
+#ifdef CONFIG_NETNS_CHECKPOINT
+	ckpt_netdev_addr_unregister(&inet6_addr_handler);
+#endif
+
 	rtnl_lock();
 
 	/* clean dev list */
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 4/4] C/R: Fix storing IPv6 addresses and handle the "ipv6only" socket flag (v2)
       [not found] ` <1270748932-26745-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
                     ` (2 preceding siblings ...)
  2010-04-08 17:48   ` [PATCH 3/4] Add IPv6 address checkpoint handler Dan Smith
@ 2010-04-08 17:48   ` Dan Smith
       [not found]     ` <1270748932-26745-5-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  3 siblings, 1 reply; 23+ messages in thread
From: Dan Smith @ 2010-04-08 17:48 UTC (permalink / raw)
  To: containers-qjLDD68F18O7TbgM5vRIOg

The first item is a result of sockaddr_in6 being larger than the base
sockaddr structure, thus not being long enough to reserve enough space in
the checkpoint header.

The second comes into play when things (like sshd) bind to INADDR6_ANY,
set the "ipv6only" socket flag and then bind an IPv4 socket to the same
port.

Changes in v2:
 - Export the inet_* symbols used by inet6 so that CONFIG_IPV6=m
   will compile

Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 include/linux/checkpoint_hdr.h |   13 +++++++++++--
 net/ipv4/checkpoint.c          |   37 +++++++++++++++++++++++--------------
 net/ipv6/af_inet6.c            |    2 ++
 3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 98aa79c..7fde6b5 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -758,12 +758,21 @@ struct ckpt_hdr_socket_inet {
 		struct in6_addr saddr;
 		struct in6_addr rcv_saddr;
 		struct in6_addr daddr;
+		__u8 ipv6only;
 	} inet6 __attribute__ ((aligned(8)));
 
 	__u32 laddr_len;
 	__u32 raddr_len;
-	struct sockaddr_in laddr;
-	struct sockaddr_in raddr;
+	union {
+		struct sockaddr laddr;
+		struct sockaddr_in laddr4;
+		struct sockaddr_in6 laddr6;
+	};
+	union {
+		struct sockaddr raddr;
+		struct sockaddr_in raddr4;
+		struct sockaddr_in6 raddr6;
+	};
 } __attribute__((aligned(8)));
 
 struct ckpt_hdr_file_socket {
diff --git a/net/ipv4/checkpoint.c b/net/ipv4/checkpoint.c
index b4024e7..1c43570 100644
--- a/net/ipv4/checkpoint.c
+++ b/net/ipv4/checkpoint.c
@@ -190,12 +190,14 @@ static int sock_inet_restore_connection(struct sock *sk,
 	struct inet_sock *inet = inet_sk(sk);
 	int tcp_gso = sk->sk_family == AF_INET ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
 
-	inet->inet_daddr = hh->raddr.sin_addr.s_addr;
-	inet->inet_saddr = hh->laddr.sin_addr.s_addr;
-	inet->inet_rcv_saddr = inet->inet_saddr;
+	if (sk->sk_family == AF_INET) {
+		inet->inet_daddr = hh->raddr4.sin_addr.s_addr;
+		inet->inet_saddr = hh->laddr4.sin_addr.s_addr;
+		inet->inet_rcv_saddr = inet->inet_saddr;
 
-	inet->inet_dport = hh->raddr.sin_port;
-	inet->inet_sport = hh->laddr.sin_port;
+		inet->inet_dport = hh->raddr4.sin_port;
+		inet->inet_sport = hh->laddr4.sin_port;
+	}
 
 	if (sk->sk_protocol == IPPROTO_TCP)
 		sk->sk_gso_type = tcp_gso;
@@ -266,6 +268,7 @@ static int sock_inet_cptrst(struct ckpt_ctx *ctx,
 			ipv6_addr_copy(&inet6->rcv_saddr, &hh->inet6.rcv_saddr);
 			ipv6_addr_copy(&inet6->daddr, &hh->inet6.daddr);
 		}
+		CKPT_COPY(op, hh->inet6.ipv6only, inet6->ipv6only);
 	}
 
 	return ret;
@@ -281,8 +284,8 @@ int inet_checkpoint(struct ckpt_ctx *ctx, struct socket *sock)
 		return -EINVAL;
 
 	ret = ckpt_sock_getnames(ctx, sock,
-				(struct sockaddr *)&in->laddr, &in->laddr_len,
-				(struct sockaddr *)&in->raddr, &in->raddr_len);
+				&in->laddr, &in->laddr_len,
+				&in->raddr, &in->raddr_len);
 	if (ret)
 		goto out;
 
@@ -296,11 +299,13 @@ int inet_checkpoint(struct ckpt_ctx *ctx, struct socket *sock)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(inet_checkpoint);
 
 int inet_collect(struct ckpt_ctx *ctx, struct socket *sock)
 {
 	return ckpt_obj_collect(ctx, sock->sk, CKPT_OBJ_SOCK);
 }
+EXPORT_SYMBOL_GPL(inet_collect);
 
 static int inet_read_buffer(struct ckpt_ctx *ctx,
 			    struct sk_buff_head *queue,
@@ -387,19 +392,19 @@ static int inet_precheck(struct socket *sock, struct ckpt_hdr_socket_inet *in)
 	__u8 nonagle_mask = TCP_NAGLE_OFF | TCP_NAGLE_CORK | TCP_NAGLE_PUSH;
 	__u8 ecn_mask = TCP_ECN_OK | TCP_ECN_QUEUE_CWR | TCP_ECN_DEMAND_CWR;
 
-	if ((htons(in->laddr.sin_port) < PROT_SOCK) &&
+	if ((htons(in->laddr4.sin_port) < PROT_SOCK) &&
 	    !capable(CAP_NET_BIND_SERVICE)) {
 		ckpt_debug("unable to bind to port %hu\n",
-			   htons(in->laddr.sin_port));
+			   htons(in->laddr4.sin_port));
 		return -EINVAL;
 	}
 
-	if (in->laddr_len > sizeof(struct sockaddr_in)) {
+	if (in->laddr_len > sizeof(in->laddr6)) {
 		ckpt_debug("laddr_len is too big\n");
 		return -EINVAL;
 	}
 
-	if (in->raddr_len > sizeof(struct sockaddr_in)) {
+	if (in->raddr_len > sizeof(in->laddr6)) {
 		ckpt_debug("raddr_len is too big\n");
 		return -EINVAL;
 	}
@@ -496,11 +501,14 @@ int inet_restore(struct ckpt_ctx *ctx,
 	 */
 	if ((h->sock.state == TCP_LISTEN) ||
 	    ((h->sock.state == TCP_CLOSE) && (in->laddr_len > 0))) {
+		if (in->inet6.ipv6only) {
+			struct ipv6_pinfo *np = inet6_sk(sock->sk);
+			np->ipv6only = 1;
+		}
+
 		sock->sk->sk_reuse = 2;
 		inet_sk(sock->sk)->freebind = 1;
-		ret = sock->ops->bind(sock,
-				      (struct sockaddr *)&in->laddr,
-				      in->laddr_len);
+		ret = sock->ops->bind(sock, &in->laddr, in->laddr_len);
 		ckpt_debug("inet bind: %i\n", ret);
 		if (ret < 0)
 			goto out;
@@ -547,3 +555,4 @@ int inet_restore(struct ckpt_ctx *ctx,
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(inet_restore);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 12e69d3..9acb55a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -523,6 +523,8 @@ const struct proto_ops inet6_stream_ops = {
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = tcp_sendpage,
 	.splice_read	   = tcp_splice_read,
+	.checkpoint	   = inet_checkpoint,
+	.restore	   = inet_restore,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] Modularize the handling of netdev address c/r
       [not found]     ` <1270748932-26745-2-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-04-12 16:08       ` Serge E. Hallyn
  2010-04-25 21:04       ` Oren Laadan
  1 sibling, 0 replies; 23+ messages in thread
From: Serge E. Hallyn @ 2010-04-12 16:08 UTC (permalink / raw)
  To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

Quoting Dan Smith (danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org):
> This moves the INET4 address checkpoint and restart routines into
> net/ipv4/devinet.c and introduces a registration method to present
> the checkpoint code with the handler functions.
> 
> This makes it easier to add additional address types, and also
> makes the cases where inet4 is absent, inet6 is a module, etc much
> easier.  It also elminates the need for a couple of helper functions.
> 
> Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
>  include/linux/checkpoint.h     |   18 +++++-
>  include/linux/checkpoint_hdr.h |    1 +
>  net/checkpoint_dev.c           |  152 ++++++++++++++++++++++++----------------
>  net/ipv4/devinet.c             |   75 ++++++++++++++++++++
>  4 files changed, 184 insertions(+), 62 deletions(-)
> 
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index 96693e2..5fdbd01 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -132,7 +132,8 @@ extern void *restore_netdev(struct ckpt_ctx *ctx);
> 
>  extern int ckpt_netdev_in_init_netns(struct ckpt_ctx *ctx,
>  				     struct net_device *dev);
> -extern int ckpt_netdev_inet_addrs(struct in_device *indev,
> +extern int ckpt_netdev_inet_addrs(struct ckpt_ctx *ctx,
> +				  struct net_device *dev,
>  				  struct ckpt_netdev_addr *list[]);
>  extern int ckpt_netdev_hwaddr(struct net_device *dev,
>  			      struct ckpt_hdr_netdev *h);
> @@ -513,6 +514,21 @@ extern void _ckpt_msg_complete(struct ckpt_ctx *ctx);
>  	_do_ckpt_msg(ctx, err, "[E @ %s:%d]" fmt, __func__, __LINE__, ##args); \
>  } while (0)
> 
> +struct ckpt_netdev_addr_handler {
> +	int type;
> +	struct module *owner;
> +	int (*checkpoint_addr)(struct ckpt_ctx *ctx,
> +			       struct net_device *dev,
> +			       int index, int max,
> +			       struct ckpt_netdev_addr *addrs);
> +	int (*restore_addr)(struct ckpt_ctx *ctx,
> +			    struct net_device *dev,
> +			    struct net *net,
> +			    struct ckpt_netdev_addr *addr);
> +};
> +extern int ckpt_netdev_addr_register(struct ckpt_netdev_addr_handler *);
> +extern int ckpt_netdev_addr_unregister(struct ckpt_netdev_addr_handler *);
> +
>  #endif /* CONFIG_CHECKPOINT */
>  #endif /* __KERNEL__ */
> 
> diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
> index 36386ad..13bf62c 100644
> --- a/include/linux/checkpoint_hdr.h
> +++ b/include/linux/checkpoint_hdr.h
> @@ -804,6 +804,7 @@ struct ckpt_hdr_netdev {
> 
>  enum ckpt_netdev_addr_types {
>  	CKPT_NETDEV_ADDR_IPV4,
> +	CKPT_NETDEV_ADDR_MAX
>  };
> 
>  struct ckpt_netdev_addr {
> diff --git a/net/checkpoint_dev.c b/net/checkpoint_dev.c
> index 5a4a95b..4ef06e3 100644
> --- a/net/checkpoint_dev.c
> +++ b/net/checkpoint_dev.c
> @@ -12,11 +12,11 @@
>  #include <linux/sched.h>
>  #include <linux/if.h>
>  #include <linux/if_arp.h>
> -#include <linux/inetdevice.h>
>  #include <linux/veth.h>
>  #include <linux/checkpoint.h>
>  #include <linux/checkpoint_hdr.h>
>  #include <linux/deferqueue.h>
> +#include <linux/module.h>
> 
>  #include <net/net_namespace.h>
>  #include <net/sch_generic.h>
> @@ -34,17 +34,64 @@ struct mvl_newlink {
> 
>  typedef int (*new_link_fn)(struct sk_buff *, void *);
> 
> -static int __kern_devinet_ioctl(struct net *net, unsigned int cmd, void *arg)
> +static struct ckpt_netdev_addr_handler *addr_handlers[CKPT_NETDEV_ADDR_MAX];
> +
> +static char *addr_modules[] = {
> +	"ipv4",		/* CKPT_NETDEV_ADDR_IPV4 */
> +	"ipv6",		/* CKPT_NETDEV_ADDR_IPV6 */
> +};

Looks good to me, thanks Dan.

Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

Just one comment:

Should ckpt_netdev_addr_types/CKPT_NETDEV_ADDR_MAX somehow be
sanity-checked against the size of array?


-serge

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/4] Fail checkpoint if IPv4 multicast addresses are configured
       [not found]     ` <1270748932-26745-3-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-04-12 16:09       ` Serge E. Hallyn
  0 siblings, 0 replies; 23+ messages in thread
From: Serge E. Hallyn @ 2010-04-12 16:09 UTC (permalink / raw)
  To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

Quoting Dan Smith (danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org):
> This causes checkpoint to fail if an interface actually has a
> multicast membership.
> 
> Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
>  net/ipv4/devinet.c |   15 +++++++++++++++
>  1 files changed, 15 insertions(+), 0 deletions(-)
> 
> diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
> index fa7799c..83411fc 100644
> --- a/net/ipv4/devinet.c
> +++ b/net/ipv4/devinet.c
> @@ -1624,6 +1624,7 @@ static int devinet_checkpoint(struct ckpt_ctx *ctx, struct net_device *dev,
>  {
>  	struct in_device *indev = dev->ip_ptr;
>  	struct in_ifaddr *addr = indev->ifa_list;
> +	struct ip_mc_list *mcaddr;
> 
>  	for (addr = indev->ifa_list; addr ; addr = addr->ifa_next) {
>  		addrs[index].type = CKPT_NETDEV_ADDR_IPV4;
> @@ -1636,6 +1637,20 @@ static int devinet_checkpoint(struct ckpt_ctx *ctx, struct net_device *dev,
>  			return -E2BIG;
>  	}
> 
> +	for (mcaddr = indev->mc_list; mcaddr; mcaddr = mcaddr->next) {
> +		if ((mcaddr->multiaddr & IGMP_LOCAL_GROUP_MASK) ==
> +		    IGMP_LOCAL_GROUP)
> +			continue;
> +
> +		/* TODO */
> +
> +		/* Multicast addresses are not supported, so do not
> +		 * allow checkpoint to continue if one is assigned
> +		 */
> +		ckpt_debug("ipv4 multicast addresses are not supported\n");

Could you use a ckpt_err() here?  This ultimately will be the most informative
reason for the failed checkpoint...

> +		return -EINVAL;
> +	}
> +
>  	return index;
>  }
> 
> -- 
> 1.6.2.5

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] Add IPv6 address checkpoint handler
       [not found]     ` <1270748932-26745-4-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-04-12 16:11       ` Serge E. Hallyn
       [not found]         ` <20100412161148.GC23380-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2010-04-15 19:35       ` Brian Haley
  1 sibling, 1 reply; 23+ messages in thread
From: Serge E. Hallyn @ 2010-04-12 16:11 UTC (permalink / raw)
  To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

Quoting Dan Smith (danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org):
> Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
>  include/linux/checkpoint_hdr.h |    8 +++
>  net/ipv6/addrconf.c            |   95 ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 103 insertions(+), 0 deletions(-)
> 
> diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
> index 13bf62c..98aa79c 100644
> --- a/include/linux/checkpoint_hdr.h
> +++ b/include/linux/checkpoint_hdr.h
> @@ -804,6 +804,7 @@ struct ckpt_hdr_netdev {
> 
>  enum ckpt_netdev_addr_types {
>  	CKPT_NETDEV_ADDR_IPV4,
> +	CKPT_NETDEV_ADDR_IPV6,
>  	CKPT_NETDEV_ADDR_MAX
>  };
> 
> @@ -816,6 +817,13 @@ struct ckpt_netdev_addr {
>  			__be32 inet4_mask;
>  			__be32 inet4_broadcast;
>  		};
> +		struct {
> +			struct in6_addr inet6_addr;
> +			__u32  inet6_prefix_len;
> +			__u32  inet6_valid_lft;
> +			__u32  inet6_prefered_lft;
> +			__u16  inet6_scope;
> +		};
>  	} __attribute__((aligned(8)));
>  } __attribute__((aligned(8)));
> 
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index 143791d..75cc3b8 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -87,6 +87,8 @@
>  #include <linux/proc_fs.h>
>  #include <linux/seq_file.h>
> 
> +#include <linux/checkpoint.h>
> +
>  /* Set to 3 to get tracing... */
>  #define ACONF_DEBUG 2
> 
> @@ -4514,6 +4516,91 @@ int unregister_inet6addr_notifier(struct notifier_block *nb)
> 
>  EXPORT_SYMBOL(unregister_inet6addr_notifier);
> 
> +#ifdef CONFIG_NETNS_CHECKPOING

How could that be defined?  :)

> +static int inet6_checkpoint(struct ckpt_ctx *ctx, struct net_device *dev,
> +			    int index, int max,
> +			    struct ckpt_netdev_addr *addrs)
> +{
> +	struct inet6_dev *indev = dev->ip6_ptr;
> +	struct inet6_ifaddr *addr;
> +	struct ifmcaddr6 *mcaddr;
> +	struct ifacaddr6 *acaddr;
> +
> +	for (addr = indev->addr_list; addr; addr = addr->if_next) {
> +		if (ipv6_addr_scope(&addr->addr))
> +			continue; /* Ignore non-global scope addresses */
> +
> +		addrs[index].type = CKPT_NETDEV_ADDR_IPV6;
> +
> +		ipv6_addr_copy(&addrs[index].inet6_addr, &addr->addr);
> +
> +		ckpt_debug("Checkpointed inet6: %pI6\n", &addr->addr);
> +
> +		addrs[index].inet6_prefix_len = addr->prefix_len;
> +		addrs[index].inet6_valid_lft = addr->valid_lft;
> +		addrs[index].inet6_prefered_lft = addr->prefered_lft;
> +		addrs[index].inet6_scope = addr->scope;
> +
> +		if (++index >= max)
> +			return -E2BIG;
> +	}
> +
> +	for (mcaddr = indev->mc_list; mcaddr; mcaddr = mcaddr->next) {
> +		if (ipv6_addr_scope(&mcaddr->mca_addr))
> +			continue; /* Ignore non-global scope addresses */
> +
> +		/* TODO */
> +
> +		/* Multicast addresses are not supported, so do not
> +		 * allow checkpoint to continue if one is assigned
> +		 */
> +		ckpt_debug("ipv6 multicast addresses are not supported\n");

Again, I'd prefer ckpt_err here.

Note that in my last email that really was a q - if you're under
spinlock here, then you can't use ckpt_err().

> +		return -EINVAL;
> +	}
> +
> +	for (acaddr = indev->ac_list; acaddr; acaddr = acaddr->aca_next) {
> +		if (ipv6_addr_scope(&acaddr->aca_addr))
> +			continue; /* Ignore non-global scope addresses */
> +
> +		/* TODO */
> +
> +		/* Anycast addresses are not supported, so do not
> +		 * allow checkpoint to continue if one is assigned
> +		 */
> +		ckpt_debug("ipv6 anycast addresses are not supported\n");

and here

> +		return -EINVAL;
> +	}
> +
> +	return index;
> +}
> +
> +static int inet6_restore(struct ckpt_ctx *ctx,
> +			 struct net_device *dev,
> +			 struct net *net,
> +			 struct ckpt_netdev_addr *addr)
> +{
> +	int ret;
> +
> +	rtnl_lock();
> +	ret = inet6_addr_add(net, dev->ifindex, &addr->inet6_addr,
> +			     addr->inet6_prefix_len, IFA_F_PERMANENT,
> +			     INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
> +	rtnl_unlock();
> +	if (ret < 0)
> +		ckpt_err(ctx, ret, "Failed to set address %pI6\n",
> +			 &addr->inet6_addr);
> +
> +	return ret;
> +}
> +
> +struct ckpt_netdev_addr_handler inet6_addr_handler = {
> +	.type =			CKPT_NETDEV_ADDR_IPV6,
> +	.owner =		THIS_MODULE,
> +	.checkpoint_addr =	inet6_checkpoint,
> +	.restore_addr =		inet6_restore,
> +};
> +#endif /* CONFIG_NETNS_CHECKPOINT */
> +
>  /*
>   *	Init / cleanup code
>   */
> @@ -4572,6 +4659,10 @@ int __init addrconf_init(void)
> 
>  	ipv6_addr_label_rtnl_register();
> 
> +#ifdef CONFIG_NETNS_CHECKPOINT
> +	ckpt_netdev_addr_register(&inet6_addr_handler);
> +#endif
> +
>  	return 0;
>  errout:
>  	unregister_netdevice_notifier(&ipv6_dev_notf);
> @@ -4590,6 +4681,10 @@ void addrconf_cleanup(void)
>  	unregister_netdevice_notifier(&ipv6_dev_notf);
>  	unregister_pernet_subsys(&addrconf_ops);
> 
> +#ifdef CONFIG_NETNS_CHECKPOINT
> +	ckpt_netdev_addr_unregister(&inet6_addr_handler);
> +#endif
> +
>  	rtnl_lock();
> 
>  	/* clean dev list */
> -- 
> 1.6.2.5

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] Add IPv6 address checkpoint handler
       [not found]         ` <20100412161148.GC23380-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-04-12 17:39           ` Dan Smith
       [not found]             ` <871vekbmen.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Dan Smith @ 2010-04-12 17:39 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

>> +#ifdef CONFIG_NETNS_CHECKPOING

SH> How could that be defined?  :)

Hmm, I must have typo'd that after I did all my config testing because
otherwise I wouldn't have been able to test checkpointing ipv6 stuff.

SH> Again, I'd prefer ckpt_err here.

SH> Note that in my last email that really was a q - if you're under
SH> spinlock here, then you can't use ckpt_err().

Right, the point of this loop was to iterate the list quickly while
holding the device lock, so we could write out the results after we
release it.

I think these two cases (and the ipv4 case) are pretty unlikely to be
a problem as they would only be triggered if you actually have
active multicast or anycast sessions configured.  This will not
trigger for the default addresses.

I don't think that dropping the lock to do ckpt_err() would be very
pretty, nor would introducing a result string for an error message.
This is plumbed a couple levels deep.

Is there some way you see this being handled better?

-- 
Dan Smith
IBM Linux Technology Center
email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] Add IPv6 address checkpoint handler
       [not found]             ` <871vekbmen.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
@ 2010-04-12 17:47               ` Serge E. Hallyn
  2010-04-12 17:58                 ` Dan Smith
       [not found]                 ` <20100412174756.GA15269-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 2 replies; 23+ messages in thread
From: Serge E. Hallyn @ 2010-04-12 17:47 UTC (permalink / raw)
  To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

Quoting Dan Smith (danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org):
> >> +#ifdef CONFIG_NETNS_CHECKPOING
> 
> SH> How could that be defined?  :)
> 
> Hmm, I must have typo'd that after I did all my config testing because
> otherwise I wouldn't have been able to test checkpointing ipv6 stuff.
> 
> SH> Again, I'd prefer ckpt_err here.
> 
> SH> Note that in my last email that really was a q - if you're under
> SH> spinlock here, then you can't use ckpt_err().
> 
> Right, the point of this loop was to iterate the list quickly while
> holding the device lock, so we could write out the results after we
> release it.
> 
> I think these two cases (and the ipv4 case) are pretty unlikely to be
> a problem as they would only be triggered if you actually have
> active multicast or anycast sessions configured.  This will not
> trigger for the default addresses.
> 
> I don't think that dropping the lock to do ckpt_err() would be very
> pretty, nor would introducing a result string for an error message.

Agreed.

> This is plumbed a couple levels deep.
> 
> Is there some way you see this being handled better?

Not really...  looks like we're doing what we can at the moment
then, good enough :)

thanks,
-serge

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] Add IPv6 address checkpoint handler
  2010-04-12 17:47               ` Serge E. Hallyn
@ 2010-04-12 17:58                 ` Dan Smith
       [not found]                 ` <20100412174756.GA15269-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  1 sibling, 0 replies; 23+ messages in thread
From: Dan Smith @ 2010-04-12 17:58 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

SH> Not really...  looks like we're doing what we can at the moment
SH> then, good enough :)

Okay, well, I've got the ifdef issue fixed locally.  I'll wait to
resend until Brian and Oren have a little more time to review the set.

Thanks!

-- 
Dan Smith
IBM Linux Technology Center
email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] Add IPv6 address checkpoint handler
       [not found]     ` <1270748932-26745-4-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2010-04-12 16:11       ` Serge E. Hallyn
@ 2010-04-15 19:35       ` Brian Haley
       [not found]         ` <4BC76A65.7060909-VXdhtT5mjnY@public.gmane.org>
  1 sibling, 1 reply; 23+ messages in thread
From: Brian Haley @ 2010-04-15 19:35 UTC (permalink / raw)
  To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

Dan Smith wrote:
> +static int inet6_restore(struct ckpt_ctx *ctx,
> +			 struct net_device *dev,
> +			 struct net *net,
> +			 struct ckpt_netdev_addr *addr)
> +{
> +	int ret;
> +
> +	rtnl_lock();
> +	ret = inet6_addr_add(net, dev->ifindex, &addr->inet6_addr,
> +			     addr->inet6_prefix_len, IFA_F_PERMANENT,
> +			     INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
> +	rtnl_unlock();

Is using IFA_F_PERMANENT correct here?  Should you save the flags from
the address when checkpointing?  Permanent means it was added by the
user, not by the kernel, so you could be changing things slightly.

-Brian

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] Add IPv6 address checkpoint handler
       [not found]         ` <4BC76A65.7060909-VXdhtT5mjnY@public.gmane.org>
@ 2010-04-15 19:46           ` Dan Smith
       [not found]             ` <87vdbs1oty.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Dan Smith @ 2010-04-15 19:46 UTC (permalink / raw)
  To: Brian Haley; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

BH> Is using IFA_F_PERMANENT correct here?  Should you save the flags
BH> from the address when checkpointing?  Permanent means it was added
BH> by the user, not by the kernel, so you could be changing things
BH> slightly.

Does the kernel create global scope addresses?  Maybe it does in some
more advanced IPv6 environments, but it seemed like excluding global
scope addresses in checkpoint meant that we only saved (and thus
restore) the permanent ones anyway.

I guess it's a better idea to just save the flags anyhow now that I
have a way to restore them.

-- 
Dan Smith
IBM Linux Technology Center
email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] Add IPv6 address checkpoint handler
       [not found]             ` <87vdbs1oty.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
@ 2010-04-15 20:32               ` Brian Haley
       [not found]                 ` <4BC777C8.6050102-VXdhtT5mjnY@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Brian Haley @ 2010-04-15 20:32 UTC (permalink / raw)
  To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

Dan Smith wrote:
> BH> Is using IFA_F_PERMANENT correct here?  Should you save the flags
> BH> from the address when checkpointing?  Permanent means it was added
> BH> by the user, not by the kernel, so you could be changing things
> BH> slightly.
> 
> Does the kernel create global scope addresses?  Maybe it does in some
> more advanced IPv6 environments, but it seemed like excluding global
> scope addresses in checkpoint meant that we only saved (and thus
> restore) the permanent ones anyway.

It adds global-scope addresses through the auto-configuration process,
i.e. by receiving a prefix in a router advertisement.

> I guess it's a better idea to just save the flags anyhow now that I
> have a way to restore them.

Yes, and I just realized something else.  This code:

+	ret = inet6_addr_add(net, dev->ifindex, &addr->inet6_addr,
+			     addr->inet6_prefix_len, IFA_F_PERMANENT,
+			     INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);

isn't using the saved lifetimes either, so it won't ever go away.

And calling inet6_addr_add() isn't correct in all cases - you can
use it for manually-configured addresses (marked permanent), but
not for those added through address-autoconfiguration - for those
you'll want to use ipv6_add_addr().  But if you do that you'll
need to duplicate what's done after the add succeeds:

	ifp = ipv6_add_addr(idev, pfx, plen, addr_scope, addr_flags);

        if (!IS_ERR(ifp)) {
                spin_lock_bh(&ifp->lock);
                ifp->valid_lft = valid_lft;
                ifp->prefered_lft = prefered_lft;
                ifp->tstamp = jiffies;
                spin_unlock_bh(&ifp->lock);

                addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
                                      expires, flags);
                /*
                 * Note that section 3.1 of RFC 4429 indicates
                 * that the Optimistic flag should not be set for
                 * manually configured addresses
                 */
                addrconf_dad_start(ifp, 0);
                in6_ifa_put(ifp);
                addrconf_verify(0);
                return 0;
        }

That's just an example, not exactly correct.

-Brian

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] Add IPv6 address checkpoint handler
       [not found]                 ` <4BC777C8.6050102-VXdhtT5mjnY@public.gmane.org>
@ 2010-04-16 15:02                   ` Dan Smith
  0 siblings, 0 replies; 23+ messages in thread
From: Dan Smith @ 2010-04-16 15:02 UTC (permalink / raw)
  To: Brian Haley; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

BH> Yes, and I just realized something else.  This code:

BH> +	ret = inet6_addr_add(net, dev->ifindex, &addr->inet6_addr,
BH> +			     addr->inet6_prefix_len, IFA_F_PERMANENT,
BH> +			     INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);

BH> isn't using the saved lifetimes either, so it won't ever go away.

I again have to plead ignorance here.  Do permanent addresses ever
have anything other than INFINITY_LIFE_TIME?  Assuming not, I think
it's reasonable (for now, at least) to stick to (correctly) saving and
restoring the permanent addresses.  What do you think?

-- 
Dan Smith
IBM Linux Technology Center
email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/4] Add IPv6 address checkpoint handler
       [not found]                 ` <20100412174756.GA15269-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-04-25 20:49                   ` Oren Laadan
  0 siblings, 0 replies; 23+ messages in thread
From: Oren Laadan @ 2010-04-25 20:49 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: containers-qjLDD68F18O7TbgM5vRIOg, Dan Smith



Serge E. Hallyn wrote:
> Quoting Dan Smith (danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org):
>>>> +#ifdef CONFIG_NETNS_CHECKPOING
>> SH> How could that be defined?  :)
>>
>> Hmm, I must have typo'd that after I did all my config testing because
>> otherwise I wouldn't have been able to test checkpointing ipv6 stuff.
>>
>> SH> Again, I'd prefer ckpt_err here.
>>
>> SH> Note that in my last email that really was a q - if you're under
>> SH> spinlock here, then you can't use ckpt_err().
>>
>> Right, the point of this loop was to iterate the list quickly while
>> holding the device lock, so we could write out the results after we
>> release it.
>>
>> I think these two cases (and the ipv4 case) are pretty unlikely to be
>> a problem as they would only be triggered if you actually have
>> active multicast or anycast sessions configured.  This will not
>> trigger for the default addresses.
>>
>> I don't think that dropping the lock to do ckpt_err() would be very
>> pretty, nor would introducing a result string for an error message.
> 
> Agreed.
> 
>> This is plumbed a couple levels deep.
>>
>> Is there some way you see this being handled better?
> 
> Not really...  looks like we're doing what we can at the moment
> then, good enough :)

Actually, you can use _ckpt_err() which can be called under
spinlock, and then the caller should call _ckpt_msg_complete()
after it drops the lock, to flush the message out.

Oren.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] Modularize the handling of netdev address c/r
       [not found]     ` <1270748932-26745-2-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2010-04-12 16:08       ` Serge E. Hallyn
@ 2010-04-25 21:04       ` Oren Laadan
       [not found]         ` <4BD4AE6D.2070507-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
  1 sibling, 1 reply; 23+ messages in thread
From: Oren Laadan @ 2010-04-25 21:04 UTC (permalink / raw)
  To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg


Looks good, thanks. Please see comments below:

Dan Smith wrote:
> This moves the INET4 address checkpoint and restart routines into
> net/ipv4/devinet.c and introduces a registration method to present
> the checkpoint code with the handler functions.
> 
> This makes it easier to add additional address types, and also
> makes the cases where inet4 is absent, inet6 is a module, etc much
> easier.  It also elminates the need for a couple of helper functions.
> 
> Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
>  include/linux/checkpoint.h     |   18 +++++-
>  include/linux/checkpoint_hdr.h |    1 +
>  net/checkpoint_dev.c           |  152 ++++++++++++++++++++++++----------------
>  net/ipv4/devinet.c             |   75 ++++++++++++++++++++
>  4 files changed, 184 insertions(+), 62 deletions(-)
> 
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index 96693e2..5fdbd01 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -132,7 +132,8 @@ extern void *restore_netdev(struct ckpt_ctx *ctx);
>  
>  extern int ckpt_netdev_in_init_netns(struct ckpt_ctx *ctx,
>  				     struct net_device *dev);
> -extern int ckpt_netdev_inet_addrs(struct in_device *indev,
> +extern int ckpt_netdev_inet_addrs(struct ckpt_ctx *ctx,
> +				  struct net_device *dev,
>  				  struct ckpt_netdev_addr *list[]);
>  extern int ckpt_netdev_hwaddr(struct net_device *dev,
>  			      struct ckpt_hdr_netdev *h);
> @@ -513,6 +514,21 @@ extern void _ckpt_msg_complete(struct ckpt_ctx *ctx);
>  	_do_ckpt_msg(ctx, err, "[E @ %s:%d]" fmt, __func__, __LINE__, ##args); \
>  } while (0)
>  
> +struct ckpt_netdev_addr_handler {
> +	int type;
> +	struct module *owner;
> +	int (*checkpoint_addr)(struct ckpt_ctx *ctx,
> +			       struct net_device *dev,
> +			       int index, int max,
> +			       struct ckpt_netdev_addr *addrs);
> +	int (*restore_addr)(struct ckpt_ctx *ctx,
> +			    struct net_device *dev,
> +			    struct net *net,
> +			    struct ckpt_netdev_addr *addr);
> +};
> +extern int ckpt_netdev_addr_register(struct ckpt_netdev_addr_handler *);
> +extern int ckpt_netdev_addr_unregister(struct ckpt_netdev_addr_handler *);

In another context, Matt pointed out that the naming convention
should start with "register", so something like:

   register_checkpoint_netdev_addr()
   unregister_checkpoint_netdev_addr()

> +
>  #endif /* CONFIG_CHECKPOINT */
>  #endif /* __KERNEL__ */
>  
> diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
> index 36386ad..13bf62c 100644
> --- a/include/linux/checkpoint_hdr.h
> +++ b/include/linux/checkpoint_hdr.h
> @@ -804,6 +804,7 @@ struct ckpt_hdr_netdev {
>  
>  enum ckpt_netdev_addr_types {
>  	CKPT_NETDEV_ADDR_IPV4,
> +	CKPT_NETDEV_ADDR_MAX
>  };
>  
>  struct ckpt_netdev_addr {
> diff --git a/net/checkpoint_dev.c b/net/checkpoint_dev.c
> index 5a4a95b..4ef06e3 100644
> --- a/net/checkpoint_dev.c
> +++ b/net/checkpoint_dev.c
> @@ -12,11 +12,11 @@
>  #include <linux/sched.h>
>  #include <linux/if.h>
>  #include <linux/if_arp.h>
> -#include <linux/inetdevice.h>
>  #include <linux/veth.h>
>  #include <linux/checkpoint.h>
>  #include <linux/checkpoint_hdr.h>
>  #include <linux/deferqueue.h>
> +#include <linux/module.h>
>  
>  #include <net/net_namespace.h>
>  #include <net/sch_generic.h>
> @@ -34,17 +34,64 @@ struct mvl_newlink {
>  
>  typedef int (*new_link_fn)(struct sk_buff *, void *);
>  
> -static int __kern_devinet_ioctl(struct net *net, unsigned int cmd, void *arg)
> +static struct ckpt_netdev_addr_handler *addr_handlers[CKPT_NETDEV_ADDR_MAX];
> +
> +static char *addr_modules[] = {
> +	"ipv4",		/* CKPT_NETDEV_ADDR_IPV4 */
> +	"ipv6",		/* CKPT_NETDEV_ADDR_IPV6 */
> +};
> +
> +int ckpt_netdev_addr_register(struct ckpt_netdev_addr_handler *h)
>  {
> -	mm_segment_t fs;
> -	int ret;
> +	if (h->type >= CKPT_NETDEV_ADDR_MAX)
> +		return -EINVAL;

h->type is 'int' and can be negative :(
Either test for it, or better - change to unsigned int (above).

>  
> -	fs = get_fs();
> -	set_fs(KERNEL_DS);
> -	ret = devinet_ioctl(net, cmd, arg);
> -	set_fs(fs);
> +	if (addr_handlers[h->type] != NULL)
> +		return -EEXIST;

Does this test-and-set need locking ?

>  
> -	return ret;
> +	ckpt_debug("Registered addr type %s\n", addr_modules[h->type]);
> +	addr_handlers[h->type] = h;
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(ckpt_netdev_addr_register);
> +
> +int ckpt_netdev_addr_unregister(struct ckpt_netdev_addr_handler *h)
> +{
> +	if (h->type >= CKPT_NETDEV_ADDR_MAX)
> +		return -EINVAL;
> +
> +	if (addr_handlers[h->type] == NULL)
> +		return -ESRCH;
> +
> +	ckpt_debug("Unregistered addr type %s\n", addr_modules[h->type]);
> +	addr_handlers[h->type] = NULL;

Ditto ?

> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(ckpt_netdev_addr_unregister);
> +
> +static struct ckpt_netdev_addr_handler *get_addr_handler(int type)
> +{
> +	struct ckpt_netdev_addr_handler *h;
> +
> +	if (type >= CKPT_NETDEV_ADDR_MAX)
> +		return ERR_PTR(-EINVAL);

type is 'int' - same comment as above.

> +
> +	h = addr_handlers[type];

Ditto for locking ?  (try_module_get below should be inside the
lock, of course).

> +
> +	if (h == NULL)
> +		return NULL;
> +
> +	if (try_module_get(h->owner))
> +		return h;
> +	else
> +		return ERR_PTR(-EBUSY);

Maybe some ckpt_err() here ?  I can feel the frustration of trying
to figure out where _this_ came from !

> +}
> +
> +static void put_addr_handler(struct ckpt_netdev_addr_handler *h)
> +{
> +	module_put(h->owner);
>  }

[...]

The rest looks ok :)

Oren.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/4] C/R: Fix storing IPv6 addresses and handle the "ipv6only" socket flag (v2)
       [not found]     ` <1270748932-26745-5-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-04-25 21:08       ` Oren Laadan
       [not found]         ` <4BD4AF37.10504-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Oren Laadan @ 2010-04-25 21:08 UTC (permalink / raw)
  To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

Hi Dan,

Dan Smith wrote:
> The first item is a result of sockaddr_in6 being larger than the base
> sockaddr structure, thus not being long enough to reserve enough space in
> the checkpoint header.
> 
> The second comes into play when things (like sshd) bind to INADDR6_ANY,
> set the "ipv6only" socket flag and then bind an IPv4 socket to the same
> port.
> 
> Changes in v2:
>  - Export the inet_* symbols used by inet6 so that CONFIG_IPV6=m
>    will compile
> 
> Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
>  include/linux/checkpoint_hdr.h |   13 +++++++++++--
>  net/ipv4/checkpoint.c          |   37 +++++++++++++++++++++++--------------
>  net/ipv6/af_inet6.c            |    2 ++
>  3 files changed, 36 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
> index 98aa79c..7fde6b5 100644
> --- a/include/linux/checkpoint_hdr.h
> +++ b/include/linux/checkpoint_hdr.h
> @@ -758,12 +758,21 @@ struct ckpt_hdr_socket_inet {
>  		struct in6_addr saddr;
>  		struct in6_addr rcv_saddr;
>  		struct in6_addr daddr;
> +		__u8 ipv6only;
>  	} inet6 __attribute__ ((aligned(8)));
>  
>  	__u32 laddr_len;
>  	__u32 raddr_len;
> -	struct sockaddr_in laddr;
> -	struct sockaddr_in raddr;
> +	union {
> +		struct sockaddr laddr;
> +		struct sockaddr_in laddr4;
> +		struct sockaddr_in6 laddr6;
> +	};
> +	union {
> +		struct sockaddr raddr;
> +		struct sockaddr_in raddr4;
> +		struct sockaddr_in6 raddr6;
> +	};
>  } __attribute__((aligned(8)));
>  
>  struct ckpt_hdr_file_socket {
> diff --git a/net/ipv4/checkpoint.c b/net/ipv4/checkpoint.c
> index b4024e7..1c43570 100644
> --- a/net/ipv4/checkpoint.c
> +++ b/net/ipv4/checkpoint.c
> @@ -190,12 +190,14 @@ static int sock_inet_restore_connection(struct sock *sk,
>  	struct inet_sock *inet = inet_sk(sk);
>  	int tcp_gso = sk->sk_family == AF_INET ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
>  
> -	inet->inet_daddr = hh->raddr.sin_addr.s_addr;
> -	inet->inet_saddr = hh->laddr.sin_addr.s_addr;
> -	inet->inet_rcv_saddr = inet->inet_saddr;
> +	if (sk->sk_family == AF_INET) {
> +		inet->inet_daddr = hh->raddr4.sin_addr.s_addr;
> +		inet->inet_saddr = hh->laddr4.sin_addr.s_addr;
> +		inet->inet_rcv_saddr = inet->inet_saddr;
>  
> -	inet->inet_dport = hh->raddr.sin_port;
> -	inet->inet_sport = hh->laddr.sin_port;
> +		inet->inet_dport = hh->raddr4.sin_port;
> +		inet->inet_sport = hh->laddr4.sin_port;
> +	}
>  
>  	if (sk->sk_protocol == IPPROTO_TCP)
>  		sk->sk_gso_type = tcp_gso;
> @@ -266,6 +268,7 @@ static int sock_inet_cptrst(struct ckpt_ctx *ctx,
>  			ipv6_addr_copy(&inet6->rcv_saddr, &hh->inet6.rcv_saddr);
>  			ipv6_addr_copy(&inet6->daddr, &hh->inet6.daddr);
>  		}
> +		CKPT_COPY(op, hh->inet6.ipv6only, inet6->ipv6only);
>  	}
>  
>  	return ret;
> @@ -281,8 +284,8 @@ int inet_checkpoint(struct ckpt_ctx *ctx, struct socket *sock)
>  		return -EINVAL;
>  
>  	ret = ckpt_sock_getnames(ctx, sock,
> -				(struct sockaddr *)&in->laddr, &in->laddr_len,
> -				(struct sockaddr *)&in->raddr, &in->raddr_len);
> +				&in->laddr, &in->laddr_len,
> +				&in->raddr, &in->raddr_len);
>  	if (ret)
>  		goto out;
>  
> @@ -296,11 +299,13 @@ int inet_checkpoint(struct ckpt_ctx *ctx, struct socket *sock)
>  
>  	return ret;
>  }
> +EXPORT_SYMBOL_GPL(inet_checkpoint);

This belongs to a separate patch ?  Especially for as long as we
need to keep our patchset clean, until it makes it to -mm

>  
>  int inet_collect(struct ckpt_ctx *ctx, struct socket *sock)
>  {
>  	return ckpt_obj_collect(ctx, sock->sk, CKPT_OBJ_SOCK);
>  }
> +EXPORT_SYMBOL_GPL(inet_collect);

Ditto.

>  
>  static int inet_read_buffer(struct ckpt_ctx *ctx,
>  			    struct sk_buff_head *queue,
> @@ -387,19 +392,19 @@ static int inet_precheck(struct socket *sock, struct ckpt_hdr_socket_inet *in)
>  	__u8 nonagle_mask = TCP_NAGLE_OFF | TCP_NAGLE_CORK | TCP_NAGLE_PUSH;
>  	__u8 ecn_mask = TCP_ECN_OK | TCP_ECN_QUEUE_CWR | TCP_ECN_DEMAND_CWR;
>  
> -	if ((htons(in->laddr.sin_port) < PROT_SOCK) &&
> +	if ((htons(in->laddr4.sin_port) < PROT_SOCK) &&
>  	    !capable(CAP_NET_BIND_SERVICE)) {
>  		ckpt_debug("unable to bind to port %hu\n",
> -			   htons(in->laddr.sin_port));
> +			   htons(in->laddr4.sin_port));
>  		return -EINVAL;
>  	}
>  
> -	if (in->laddr_len > sizeof(struct sockaddr_in)) {
> +	if (in->laddr_len > sizeof(in->laddr6)) {
>  		ckpt_debug("laddr_len is too big\n");
>  		return -EINVAL;
>  	}
>  
> -	if (in->raddr_len > sizeof(struct sockaddr_in)) {
> +	if (in->raddr_len > sizeof(in->laddr6)) {
>  		ckpt_debug("raddr_len is too big\n");
>  		return -EINVAL;
>  	}
> @@ -496,11 +501,14 @@ int inet_restore(struct ckpt_ctx *ctx,
>  	 */
>  	if ((h->sock.state == TCP_LISTEN) ||
>  	    ((h->sock.state == TCP_CLOSE) && (in->laddr_len > 0))) {
> +		if (in->inet6.ipv6only) {
> +			struct ipv6_pinfo *np = inet6_sk(sock->sk);
> +			np->ipv6only = 1;
> +		}
> +
>  		sock->sk->sk_reuse = 2;
>  		inet_sk(sock->sk)->freebind = 1;
> -		ret = sock->ops->bind(sock,
> -				      (struct sockaddr *)&in->laddr,
> -				      in->laddr_len);
> +		ret = sock->ops->bind(sock, &in->laddr, in->laddr_len);
>  		ckpt_debug("inet bind: %i\n", ret);
>  		if (ret < 0)
>  			goto out;
> @@ -547,3 +555,4 @@ int inet_restore(struct ckpt_ctx *ctx,
>  
>  	return ret;
>  }
> +EXPORT_SYMBOL_GPL(inet_restore);

Ditto.

> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> index 12e69d3..9acb55a 100644
> --- a/net/ipv6/af_inet6.c
> +++ b/net/ipv6/af_inet6.c
> @@ -523,6 +523,8 @@ const struct proto_ops inet6_stream_ops = {
>  	.mmap		   = sock_no_mmap,
>  	.sendpage	   = tcp_sendpage,
>  	.splice_read	   = tcp_splice_read,
> +	.checkpoint	   = inet_checkpoint,
> +	.restore	   = inet_restore,
>  #ifdef CONFIG_COMPAT
>  	.compat_setsockopt = compat_sock_common_setsockopt,
>  	.compat_getsockopt = compat_sock_common_getsockopt,

Oren.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/4] C/R: Fix storing IPv6 addresses and handle the "ipv6only" socket flag (v2)
       [not found]         ` <4BD4AF37.10504-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
@ 2010-04-26 13:41           ` Dan Smith
       [not found]             ` <87d3xmfhzx.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Dan Smith @ 2010-04-26 13:41 UTC (permalink / raw)
  To: Oren Laadan; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

>> +EXPORT_SYMBOL_GPL(inet_checkpoint);

OL> This belongs to a separate patch ?  Especially for as long as we
OL> need to keep our patchset clean, until it makes it to -mm

Does it?  It's just exporting a symbol that is already in the tree so
we can use it.  Are you saying you just want the export to be done in
its own patch?

-- 
Dan Smith
IBM Linux Technology Center
email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/4] C/R: Fix storing IPv6 addresses and handle the "ipv6only" socket flag (v2)
       [not found]             ` <87d3xmfhzx.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
@ 2010-04-26 14:35               ` Oren Laadan
       [not found]                 ` <4BD5A49B.9030605-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Oren Laadan @ 2010-04-26 14:35 UTC (permalink / raw)
  To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg


Dan Smith wrote:
>>> +EXPORT_SYMBOL_GPL(inet_checkpoint);
> 
> OL> This belongs to a separate patch ?  Especially for as long as we
> OL> need to keep our patchset clean, until it makes it to -mm
> 
> Does it?  It's just exporting a symbol that is already in the tree so
> we can use it.  Are you saying you just want the export to be done in
> its own patch?
> 

I guess I was too quick on the trigger. If you have other future
users for inet_checkpoint and co., then yes, a separate patch -
since I would fold it into the original inet patches (assuming
we are not yet in -mm).

Oren.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/4] C/R: Fix storing IPv6 addresses and handle the "ipv6only" socket flag (v2)
       [not found]                 ` <4BD5A49B.9030605-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
@ 2010-04-26 14:43                   ` Dan Smith
  0 siblings, 0 replies; 23+ messages in thread
From: Dan Smith @ 2010-04-26 14:43 UTC (permalink / raw)
  To: Oren Laadan; +Cc: containers-qjLDD68F18O7TbgM5vRIOg

OL> I guess I was too quick on the trigger. If you have other future
OL> users for inet_checkpoint and co., then yes, a separate patch -
OL> since I would fold it into the original inet patches (assuming we
OL> are not yet in -mm).

I don't think that there will be any additional users besides inet4
and inet6, so I'm still not sure what the issue is.  Regardless, I'll
break that out into a separate patch for you when I re-post that set.

Thanks!

-- 
Dan Smith
IBM Linux Technology Center
email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] Modularize the handling of netdev address c/r
       [not found]         ` <4BD4AE6D.2070507-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
@ 2010-04-26 15:11           ` Dan Smith
       [not found]             ` <874oiyfdv1.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Dan Smith @ 2010-04-26 15:11 UTC (permalink / raw)
  To: Oren Laadan; +Cc: containers-qjLDD68F18O7TbgM5vRIOg


OL> Does this test-and-set need locking ?

No, I was just planning on getting lucky each time.  Er, okay, yes :)

>> +	if (try_module_get(h->owner))
>> +		return h;
>> +	else
>> +		return ERR_PTR(-EBUSY);

OL> Maybe some ckpt_err() here ?  I can feel the frustration of trying
OL> to figure out where _this_ came from !

I'd prefer not to do that this deep.  I think the varying depth at
which we call ckpt_err() starts to get confusing.  Regardless, the
call of this function is checked and reported, which I think will make
tracking down an error result rather easy:

  h = get_addr_handler(i);
  if (!h)
  	continue;
  else if (IS_ERR(h)) {
  	addrs = PTR_ERR(h);
  	ckpt_err(ctx, addrs,
  		 "Unable to handle netdev addr type %s\n",
  		 addr_modules[i]);
  	break;
  }

no?

-- 
Dan Smith
IBM Linux Technology Center
email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] Modularize the handling of netdev address c/r
       [not found]             ` <874oiyfdv1.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
@ 2010-04-26 16:05               ` Oren Laadan
  0 siblings, 0 replies; 23+ messages in thread
From: Oren Laadan @ 2010-04-26 16:05 UTC (permalink / raw)
  To: Dan Smith; +Cc: containers-qjLDD68F18O7TbgM5vRIOg



Dan Smith wrote:
> OL> Does this test-and-set need locking ?
> 
> No, I was just planning on getting lucky each time.  Er, okay, yes :)
> 
>>> +	if (try_module_get(h->owner))
>>> +		return h;
>>> +	else
>>> +		return ERR_PTR(-EBUSY);
> 
> OL> Maybe some ckpt_err() here ?  I can feel the frustration of trying
> OL> to figure out where _this_ came from !
> 
> I'd prefer not to do that this deep.  I think the varying depth at
> which we call ckpt_err() starts to get confusing.  Regardless, the

True.

I was just thinking about a user seeing EBUSY and a message about
the netdev addr, and how he/she would reason that this is because
a module is missing ...

On the other hand, we could keep the scheme you have now, and then
document this possibility in a FAQ, readme, manual etc.

> call of this function is checked and reported, which I think will make
> tracking down an error result rather easy:
> 
>   h = get_addr_handler(i);
>   if (!h)
>   	continue;
>   else if (IS_ERR(h)) {
>   	addrs = PTR_ERR(h);
>   	ckpt_err(ctx, addrs,
>   		 "Unable to handle netdev addr type %s\n",
>   		 addr_modules[i]);
>   	break;
>   }
> 
> no?

Sure.

Oren.

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2010-04-26 16:05 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-04-08 17:48 A modular approach to handling netdev address c/r Dan Smith
     [not found] ` <1270748932-26745-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-04-08 17:48   ` [PATCH 1/4] Modularize the handling of " Dan Smith
     [not found]     ` <1270748932-26745-2-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-04-12 16:08       ` Serge E. Hallyn
2010-04-25 21:04       ` Oren Laadan
     [not found]         ` <4BD4AE6D.2070507-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-04-26 15:11           ` Dan Smith
     [not found]             ` <874oiyfdv1.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2010-04-26 16:05               ` Oren Laadan
2010-04-08 17:48   ` [PATCH 2/4] Fail checkpoint if IPv4 multicast addresses are configured Dan Smith
     [not found]     ` <1270748932-26745-3-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-04-12 16:09       ` Serge E. Hallyn
2010-04-08 17:48   ` [PATCH 3/4] Add IPv6 address checkpoint handler Dan Smith
     [not found]     ` <1270748932-26745-4-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-04-12 16:11       ` Serge E. Hallyn
     [not found]         ` <20100412161148.GC23380-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-04-12 17:39           ` Dan Smith
     [not found]             ` <871vekbmen.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2010-04-12 17:47               ` Serge E. Hallyn
2010-04-12 17:58                 ` Dan Smith
     [not found]                 ` <20100412174756.GA15269-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-04-25 20:49                   ` Oren Laadan
2010-04-15 19:35       ` Brian Haley
     [not found]         ` <4BC76A65.7060909-VXdhtT5mjnY@public.gmane.org>
2010-04-15 19:46           ` Dan Smith
     [not found]             ` <87vdbs1oty.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2010-04-15 20:32               ` Brian Haley
     [not found]                 ` <4BC777C8.6050102-VXdhtT5mjnY@public.gmane.org>
2010-04-16 15:02                   ` Dan Smith
2010-04-08 17:48   ` [PATCH 4/4] C/R: Fix storing IPv6 addresses and handle the "ipv6only" socket flag (v2) Dan Smith
     [not found]     ` <1270748932-26745-5-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-04-25 21:08       ` Oren Laadan
     [not found]         ` <4BD4AF37.10504-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-04-26 13:41           ` Dan Smith
     [not found]             ` <87d3xmfhzx.fsf-FLMGYpZoEPULwtHQx/6qkW3U47Q5hpJU@public.gmane.org>
2010-04-26 14:35               ` Oren Laadan
     [not found]                 ` <4BD5A49B.9030605-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-04-26 14:43                   ` Dan Smith

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.