All of lore.kernel.org
 help / color / mirror / Atom feed
* ipctl - new tool for efficient read/write of net related sysctl
@ 2012-05-05 15:13 Oskar Berggren
  2012-05-06  1:29 ` Stephen Hemminger
  0 siblings, 1 reply; 5+ messages in thread
From: Oskar Berggren @ 2012-05-05 15:13 UTC (permalink / raw)
  To: netdev

Hi,

In a project of mine I need to read (and possibly set) many of the properties
found under /proc/sys/net/ipv4/conf/. This is simple enough, except that
when you have hundreds of interfaces, it is really slow. In my tests it takes
about 4 seconds to read a single variable for 700 interfaces. For a while I
worked around this using the binary sysctl() interface, but this is deprecated.

In an experiment to get around this limitation I have created "ipctl", a kernel
module and accompanying user space library/tool. Communication between
kernel and user space is based on generic netlink. What used to take
seconds now happen in a few milliseconds.

So far I have only implemented support for the proxy_arp setting. Do you
think it's worthwhile to pursue this to create something more complete? Are
there other ideas on how one might get fast read/write of the IP-related
settings in procfs?

The full source code is available at:
https://github.com/oskarb/ipctl

Kernel module enclosed below. Haven't done much kernel programming
before, so comments are most welcome!

/Oskar


#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <net/netlink.h>
#include <net/genetlink.h>
#include "../../include/libipctl/ipctl-nl.h"

#define MOD_AUTHOR "Oskar Berggren <oskar.berggren@gmail.com>"
#define MOD_DESC "A module to offer efficient mass control of the IP
sysctl family traditionally controlled through /proc."
#define MOD_VER "0.1"


static int ipctl_get_proxyarp_by_ifindex(int ifIndex, int *on)
{
	struct net *net = &init_net;
	struct net_device *dev;
	struct in_device *in_dev;

	dev = dev_get_by_index(net, ifIndex);

	if (dev)
	{
		if (__in_dev_get_rtnl(dev))
		{
			in_dev = __in_dev_get_rtnl(dev);
			*on = IN_DEV_CONF_GET(in_dev, PROXY_ARP);
		}

		dev_put(dev);  // Release reference.
	}

	return 0;
}


static int ipctl_set_proxyarp_by_ifindex(int ifIndex, int on)
{
	struct net *net = &init_net;
	struct net_device *dev;
	struct in_device *in_dev;

	dev = dev_get_by_index(net, ifIndex);

	if (dev)
	{
		if (__in_dev_get_rtnl(dev))
		{
			in_dev = __in_dev_get_rtnl(dev);
			IN_DEV_CONF_SET(in_dev, PROXY_ARP, on);
		}

		dev_put(dev);  // Release reference.
	}

	return 0;
}


/* family definition */
static struct genl_family ipctl_gnl_family = {
	.id = GENL_ID_GENERATE,
	.hdrsize = 0,
	.name = IPCTL_GENL_NAME,
	.version = IPCTL_GENL_VERSION,
	.maxattr = IPCTL_ATTR_MAX,
};


static int ipctl_reply(struct sk_buff *skb, struct genl_info *info,
		       int property, int ifIndex, int value)
{
	struct sk_buff *skb_reply;
	void *msg_head;
	int rc;

	pr_debug("ipctl: reply start\n");

	skb_reply = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
	if (skb_reply == NULL)
		goto out;

	msg_head = genlmsg_put(skb_reply, 0, info->snd_seq,
&ipctl_gnl_family, 0, IPCTL_CMD_GET);
	if (msg_head == NULL) {
		rc = -ENOMEM;
		goto out;
	}

	rc = nla_put_u32(skb_reply, IPCTL_ATTR_PROPERTY, property);
	if (rc != 0)
		goto out;

	rc = nla_put_u32(skb_reply, IPCTL_ATTR_IFINDEX, ifIndex);
	if (rc != 0)
		goto out;

	rc = nla_put_u8(skb_reply, IPCTL_ATTR_VALUE, value);
	if (rc != 0)
		goto out;
	
	/* finalize the message */
	genlmsg_end(skb_reply, msg_head);

	rc = genlmsg_reply(skb_reply , info);
	if (rc != 0)
		goto out;

	return 0;
out:
	pr_warning("ipctl: Error occured in reply: %d\n", rc);

	return rc;
}


/* handler for SET messages via NETLINK */
int ipctl_set(struct sk_buff *skb, struct genl_info *info)
{
	/* message handling code goes here; return 0 on success, negative
	 * values on failure */

	int property = nla_get_u32(info->attrs[IPCTL_ATTR_PROPERTY]);
	int ifIndex = nla_get_u32(info->attrs[IPCTL_ATTR_IFINDEX]);
	int value = nla_get_u8(info->attrs[IPCTL_ATTR_VALUE]);

	pr_debug("ipctl: set p=%d i=%d v=%d\n", property, ifIndex, value);

	if (property == IPCTL_PROPERTY_PROXYARP)
		return ipctl_set_proxyarp_by_ifindex(ifIndex, value);

	return 0;
}


/* handler for GET messages via NETLINK */
int ipctl_get(struct sk_buff *skb, struct genl_info *info)
{
	/* message handling code goes here; return 0 on success, negative
	 * values on failure */

	int property = nla_get_u32(info->attrs[IPCTL_ATTR_PROPERTY]);
	int ifIndex = nla_get_u32(info->attrs[IPCTL_ATTR_IFINDEX]);
	int value = 0;
	int retval = 0;

	pr_debug("ipctl: get p=%d i=%d\n", property, ifIndex);

	if (property == IPCTL_PROPERTY_PROXYARP)
		retval = ipctl_get_proxyarp_by_ifindex(ifIndex, &value);

	if (retval)
		return retval;

	return ipctl_reply(skb, info, property, ifIndex, value);
}


/* NETLINK operation definition */
struct genl_ops ipctl_gnl_ops_set = {
	.cmd = IPCTL_CMD_SET,
	.flags = GENL_ADMIN_PERM,
	.policy = ipctl_genl_policy,
	.doit = ipctl_set,
	.dumpit = NULL,
};

struct genl_ops ipctl_gnl_ops_get = {
	.cmd = IPCTL_CMD_GET,
	.flags = 0,
	.policy = ipctl_genl_policy,
	.doit = ipctl_get,
	.dumpit = NULL,
};


static int __init ipctl_init(void)
{
	int rc;

	printk(KERN_INFO "ipctl: %s.\n", MOD_VER);

	rc = genl_register_family(&ipctl_gnl_family);
	if (rc)
		printk("ipctl: genl_register_family: %d.\n", rc);

	rc = genl_register_ops(&ipctl_gnl_family, &ipctl_gnl_ops_set);
	if (rc)
		printk("ipctl: genl_register_ops: %d.\n", rc);

	rc = genl_register_ops(&ipctl_gnl_family, &ipctl_gnl_ops_get);
	if (rc)
		printk("ipctl: genl_register_ops: %d.\n", rc);

	/*
	 * A non 0 return means init_module failed; module can't be loaded.
	 */
	return 0;
}


static void __exit ipctl_exit(void)
{
	genl_unregister_family(&ipctl_gnl_family);
}


module_init(ipctl_init);
module_exit(ipctl_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR(MOD_AUTHOR);
MODULE_DESCRIPTION(MOD_DESC);
MODULE_VERSION(MOD_VER);

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: ipctl - new tool for efficient read/write of net related sysctl
  2012-05-05 15:13 ipctl - new tool for efficient read/write of net related sysctl Oskar Berggren
@ 2012-05-06  1:29 ` Stephen Hemminger
  2012-05-06 12:46   ` Oskar Berggren
  0 siblings, 1 reply; 5+ messages in thread
From: Stephen Hemminger @ 2012-05-06  1:29 UTC (permalink / raw)
  To: Oskar Berggren; +Cc: netdev


> 
> In a project of mine I need to read (and possibly set) many of the
> properties
> found under /proc/sys/net/ipv4/conf/. This is simple enough, except
> that
> when you have hundreds of interfaces, it is really slow. In my tests
> it takes
> about 4 seconds to read a single variable for 700 interfaces. For a
> while I
> worked around this using the binary sysctl() interface, but this is
> deprecated.
> 

What about exposing these as NETLINK attributes? That would be faster
and you could do bulk updates.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: ipctl - new tool for efficient read/write of net related sysctl
  2012-05-06  1:29 ` Stephen Hemminger
@ 2012-05-06 12:46   ` Oskar Berggren
  2012-05-07  6:14     ` Thomas Graf
  0 siblings, 1 reply; 5+ messages in thread
From: Oskar Berggren @ 2012-05-06 12:46 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev

2012/5/6 Stephen Hemminger <stephen.hemminger@vyatta.com>:
>
>>
>> In a project of mine I need to read (and possibly set) many of the
>> properties
>> found under /proc/sys/net/ipv4/conf/. This is simple enough, except
>> that
>> when you have hundreds of interfaces, it is really slow. In my tests
>> it takes
>> about 4 seconds to read a single variable for 700 interfaces. For a
>> while I
>> worked around this using the binary sysctl() interface, but this is
>> deprecated.
>>
>
> What about exposing these as NETLINK attributes? That would be faster
> and you could do bulk updates.


This is my first attempt at using NETLINK, so could you please elaborate?
Below is the generic netlink interface I implemented so far. Any pointers
on how I should do this differently?


#ifndef IPCTL_NL_H_INCLUDED
#define IPCTL_NL_H_INCLUDED

/* Copyright (C) 2012 Oskar Berggren <oskar.berggren@gmail.com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */


/* NETLINK attributes */
enum {
  IPCTL_ATTR_UNSPEC,
  IPCTL_ATTR_PROPERTY,   /* Use IPCTL_PROPERTY_*  */
  IPCTL_ATTR_IFINDEX,
  IPCTL_ATTR_VALUE,
  __IPCTL_ATTR_MAX,
};
#define IPCTL_ATTR_MAX (__IPCTL_ATTR_MAX - 1)

/* attribute policy */
static struct nla_policy ipctl_genl_policy[IPCTL_ATTR_MAX + 1] = {
	[IPCTL_ATTR_PROPERTY] = { .type = NLA_U32 },
	[IPCTL_ATTR_IFINDEX] = { .type = NLA_U32 },
	[IPCTL_ATTR_VALUE] = { .type = NLA_U8 },
};


/* NETLINK commands */
enum {
  IPCTL_CMD_UNSPEC,
  IPCTL_CMD_SET,
  IPCTL_CMD_GET,
  __IPCTL_CMD_MAX,
};
#define IPCTL_CMD_MAX (__IPCTL_CMD_MAX - 1)


/* Values for IPCTL_ATTR_PROPERTY. */
enum {
  IPCTL_PROPERTY_PROXYARP,
};


/* ipctl use the generic netlink facility. */
#define IPCTL_GENL_NAME "ipctl"

#define IPCTL_GENL_VERSION 1

#endif

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: ipctl - new tool for efficient read/write of net related sysctl
  2012-05-06 12:46   ` Oskar Berggren
@ 2012-05-07  6:14     ` Thomas Graf
  2012-05-09  5:22       ` Oskar Berggren
  0 siblings, 1 reply; 5+ messages in thread
From: Thomas Graf @ 2012-05-07  6:14 UTC (permalink / raw)
  To: Oskar Berggren; +Cc: Stephen Hemminger, netdev

On Sun, May 06, 2012 at 02:46:01PM +0200, Oskar Berggren wrote:
> 2012/5/6 Stephen Hemminger <stephen.hemminger@vyatta.com>:
> >
> >>
> >> In a project of mine I need to read (and possibly set) many of the
> >> properties
> >> found under /proc/sys/net/ipv4/conf/. This is simple enough, except
> >> that
> >> when you have hundreds of interfaces, it is really slow. In my tests
> >> it takes
> >> about 4 seconds to read a single variable for 700 interfaces. For a
> >> while I
> >> worked around this using the binary sysctl() interface, but this is
> >> deprecated.
> >>
> >
> > What about exposing these as NETLINK attributes? That would be faster
> > and you could do bulk updates.
> 
> 
> This is my first attempt at using NETLINK, so could you please elaborate?
> Below is the generic netlink interface I implemented so far. Any pointers
> on how I should do this differently?

What Stephen means is to use the existing message types RTM_SETLINK
and RTM_GETLINK in the NETLINK_ROUTE family.

This is already partially implemented. See the IFLA_AF_SPEC attribute
carrying IPV4_DEVCONF_ and DEVCONF_ (IPv6). Grep for rtnl_af_register()
and you will find the corresponding implementations.

Feel free to complete these existing interfaces, such as adding write
support to IPv6 or adding support to iproute2 which is currently
lacking.

src/nl-link-list.c in the libnl sources allows you to display the
configurations:

$ src/nl-link-list --details --name virbr0-nic
virbr0-nic ether 52:54:00:cb:da:db master virbr0 <broadcast,multicast> 
    mtu 1500 txqlen 500 weight 0 qdisc noop index 7 
    brd ff:ff:ff:ff:ff:ff state down mode default
    ipv4 devconf:
      forwarding            1  mc_forwarding         0  proxy_arp             0
      accept_redirects      1  secure_redirects      1  send_redirects        1
      shared_media          1  rp_filter             1  accept_source_route   0
      bootp_relay           0  log_martians          0  tag                   0
      arpfilter             0  medium_id             0  noxfrm                0
      nopolicy              0  force_igmp_version    0  arp_announce          0
      arp_ignore            0  promote_secondaries   0  arp_accept            0
      arp_notify            0  accept_local          0  src_vmark             0
      proxy_arp_pvlan       0  
    ipv6 max-reasm-len 64KiB <>
      create-stamp 13.35s reachable-time 40s 898msec retrans-time 1s
      devconf:
      forwarding            1  hoplimit             64  mtu6               1500
      accept_ra             1  accept_redirects      1  autoconf              1
      dad_transmits         1  rtr_solicits          3  rtr_solicit_interval 4s
      rtr_solicit_delay    1s  use_tempaddr          0  temp_valid_lft       7d
      temp_prefered_lft    1d  regen_max_retry       3  max_desync_factor   600
      max_addresses        16  force_mld_version     0  accept_ra_defrtr      1
      accept_ra_pinfo       1  accept_ra_rtr_pref    1  rtr_probe_interval   1m
      accept_ra_rt_info     0  proxy_ndp             0  optimistic_dad        0
      accept_source_route   0  mc_forwarding         0  disable_ipv6          0
      accept_dad            1  force_tllao           0  

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: ipctl - new tool for efficient read/write of net related sysctl
  2012-05-07  6:14     ` Thomas Graf
@ 2012-05-09  5:22       ` Oskar Berggren
  0 siblings, 0 replies; 5+ messages in thread
From: Oskar Berggren @ 2012-05-09  5:22 UTC (permalink / raw)
  To: Oskar Berggren, Stephen Hemminger, netdev

2012/5/7 Thomas Graf <tgraf@infradead.org>:
> On Sun, May 06, 2012 at 02:46:01PM +0200, Oskar Berggren wrote:
>> 2012/5/6 Stephen Hemminger <stephen.hemminger@vyatta.com>:
>> >
>> >>
>> >> In a project of mine I need to read (and possibly set) many of the
>> >> properties
>> >> found under /proc/sys/net/ipv4/conf/. This is simple enough, except
>> >> that
>> >> when you have hundreds of interfaces, it is really slow. In my tests
>> >> it takes
>> >> about 4 seconds to read a single variable for 700 interfaces. For a
>> >> while I
>> >> worked around this using the binary sysctl() interface, but this is
>> >> deprecated.
>> >>
>> >
>> > What about exposing these as NETLINK attributes? That would be faster
>> > and you could do bulk updates.
>>
>>
>> This is my first attempt at using NETLINK, so could you please elaborate?
>> Below is the generic netlink interface I implemented so far. Any pointers
>> on how I should do this differently?
>
> What Stephen means is to use the existing message types RTM_SETLINK
> and RTM_GETLINK in the NETLINK_ROUTE family.
>
> This is already partially implemented. See the IFLA_AF_SPEC attribute
> carrying IPV4_DEVCONF_ and DEVCONF_ (IPv6). Grep for rtnl_af_register()
> and you will find the corresponding implementations.
>
> Feel free to complete these existing interfaces, such as adding write
> support to IPv6 or adding support to iproute2 which is currently
> lacking.


Cool, this seems to be exactly what I need. I'll experiment with it
when I get the time.

/Oskar

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-05-09  5:23 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-05 15:13 ipctl - new tool for efficient read/write of net related sysctl Oskar Berggren
2012-05-06  1:29 ` Stephen Hemminger
2012-05-06 12:46   ` Oskar Berggren
2012-05-07  6:14     ` Thomas Graf
2012-05-09  5:22       ` Oskar Berggren

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.