All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/14] Facebook's netbooting patches
@ 2016-02-10 21:20 Josef Bacik
  2016-02-10 21:20 ` [PATCH 01/14] Added net_bootp6 command Josef Bacik
                   ` (13 more replies)
  0 siblings, 14 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:20 UTC (permalink / raw)
  To: grub-devel, kernel-team

I've recently fixed up a bunch of new issues and rebased onto todays master so I
figured I'd send our current backlog.  So first the new patches that you guys
haven't seen yet

[PATCH 10/14] dns: poll card between each dns request
[PATCH 11/14] dns: reset data->naddresses for every packet we receive
[PATCH 12/14] icmp6: use default interface as the route interface
[PATCH 13/14] bootp: don't add multiple interfaces for the same
[PATCH 14/14] net: add interfaces when we open a card

Basically DNS would not wait long enough for AAAA records to come back and would
corrupt memory if it got multiple records in different packets.  I've also made
it so we try to prefer our dhcp interface when possible and to make sure we're
not adding duplicate interfaces if we run net_bootp/6 multiple times.

The rest of the patches have already been posted before and have been updated to
reflect the reviews that where given.

The first 3 patches

[PATCH 01/14] Added net_bootp6 command
[PATCH 02/14] UEFI IPv6 PXE support
[PATCH 03/14] Use UEFI MAC device as default configured by net_bootp6

Are not mine, they are Michael Chang's and I've kept the authorship and
everything set properly to reflect that.

With these patches we've been provisioning tens of thousands of boxes in ipv4
only, mixed ipv4/ipv6, and ipv6 only environments with very small failure rates
(around 2%).  They are solid and being used in production on a wide variety of
hardware, both EFI and legacy BIOS.  If you like you can pull them from my
github

https://github.com/josefbacik/grub2.git

Any comments or review would be greatly appreciated.  We are keen to get these
patches upstream so we can start doing continual testing with the grub2 master
branch.  Thanks,

Josef


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 01/14] Added net_bootp6 command
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
@ 2016-02-10 21:20 ` Josef Bacik
  2016-02-10 21:20 ` [PATCH 02/14] UEFI IPv6 PXE support Josef Bacik
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:20 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Michael Chang

From: Michael Chang <mchang@suse.com>

The net_bootp6 is used to configure the ipv6 network interface through the
DHCPv6 protocol Solict/Advertise/Request/Reply.
---
 grub-core/net/bootp.c  | 885 ++++++++++++++++++++++++++++++++++++++++++++++++-
 grub-core/net/ip.c     |  35 ++
 include/grub/efi/api.h |  56 +++-
 include/grub/net.h     |  19 ++
 4 files changed, 993 insertions(+), 2 deletions(-)

diff --git a/grub-core/net/bootp.c b/grub-core/net/bootp.c
index 4fdeac3..25ab70c 100644
--- a/grub-core/net/bootp.c
+++ b/grub-core/net/bootp.c
@@ -24,6 +24,7 @@
 #include <grub/net/netbuff.h>
 #include <grub/net/udp.h>
 #include <grub/datetime.h>
+#include <grub/time.h>
 
 static void
 parse_dhcp_vendor (const char *name, const void *vend, int limit, int *mask)
@@ -263,6 +264,653 @@ grub_net_configure_by_dhcp_ack (const char *name,
   return inter;
 }
 
+struct grub_dhcpv6_option {
+  grub_uint16_t code;
+  grub_uint16_t len;
+  grub_uint8_t data[0];
+} GRUB_PACKED;
+
+
+struct grub_dhcpv6_iana_option {
+  grub_uint32_t iaid;
+  grub_uint32_t t1;
+  grub_uint32_t t2;
+  grub_uint8_t data[0];
+} GRUB_PACKED;
+
+struct grub_dhcpv6_iaaddr_option {
+  grub_uint8_t addr[16];
+  grub_uint32_t preferred_lifetime;
+  grub_uint32_t valid_lifetime;
+  grub_uint8_t data[0];
+} GRUB_PACKED;
+
+struct grub_DUID_LL
+{
+  grub_uint16_t type;
+  grub_uint16_t hw_type;
+  grub_uint8_t hwaddr[6];
+} GRUB_PACKED;
+
+struct grub_dhcpv6_dns_servers {
+  grub_uint8_t addr[16];
+  grub_uint8_t next_addr[0];
+} GRUB_PACKED;
+
+#define DHCPv6_REPLY 7
+#define DHCPv6_ADVERTISE 2
+#define DHCPv6_REQUEST 3
+#define OPTION_BOOTFILE_URL 59
+#define OPTION_DNS_SERVERS 23
+#define OPTION_IA_NA 3
+#define OPTION_IAADDR 5
+#define OPTION_CLIENTID 1
+#define OPTION_SERVERID 2
+#define OPTION_ORO 6
+#define OPTION_ELAPSED_TIME 8
+
+struct grub_dhcpv6_session
+{
+  struct grub_dhcpv6_session *next;
+  struct grub_dhcpv6_session **prev;
+  grub_uint32_t iaid;
+  grub_uint32_t transaction_id:24;
+  grub_uint64_t start_time;
+  struct grub_net_network_level_interface *ifaces;
+};
+
+static struct grub_dhcpv6_session *grub_dhcpv6_sessions = NULL;
+#define FOR_DHCPV6_SESSIONS(var) \
+    for (var = grub_dhcpv6_sessions ; var; var = var->next)
+
+static void
+grub_dhcpv6_session_add (struct grub_dhcpv6_session *session)
+{
+  struct grub_datetime date;
+  grub_err_t err;
+  grub_int32_t t = 0;
+
+  err = grub_get_datetime (&date);
+  if (err || !grub_datetime2unixtime (&date, &t))
+    {
+      grub_errno = GRUB_ERR_NONE;
+      t = 0;
+    }
+
+  session->transaction_id = t;
+  session->start_time = grub_get_time_ms ();
+
+  session->prev = &grub_dhcpv6_sessions;
+  session->next = grub_dhcpv6_sessions;
+
+  if (session->next)
+    session->next->prev = &session->next;
+
+  grub_dhcpv6_sessions = session;
+  return;
+}
+
+static void
+grub_dhcpv6_session_remove (struct grub_dhcpv6_session *session)
+{
+  *session->prev = session->next;
+  if (session->next)
+    session->next->prev = session->prev;
+  session->next = NULL;
+  session->prev = NULL;
+  return;
+}
+
+static const struct grub_dhcpv6_option*
+find_dhcpv6_option (const struct grub_net_dhcpv6_packet *packet,
+		    grub_uint16_t option)
+{
+  grub_uint16_t code, len;
+  const struct grub_dhcpv6_option *popt;
+
+  popt = (const struct grub_dhcpv6_option *)packet->dhcp_options;
+  code = grub_be_to_cpu16 (popt->code);
+  len = grub_be_to_cpu16 (popt->len);
+
+  while (0 != code && option != code)
+    {
+      popt = (const struct grub_dhcpv6_option *)((grub_uint8_t *)popt +
+		len + sizeof(*popt));
+      code = grub_be_to_cpu16 (popt->code);
+      len = grub_be_to_cpu16 (popt->len);
+    }
+
+  if (option == code)
+      return popt;
+
+  return NULL;
+}
+
+static const grub_uint8_t*
+find_dhcpv6_address (const struct grub_net_dhcpv6_packet *packet)
+{
+  const struct grub_dhcpv6_option* popt = find_dhcpv6_option (packet, OPTION_IA_NA);
+  const struct grub_dhcpv6_iana_option *ia_na;
+  const struct grub_dhcpv6_option *iaaddr_hdr;
+  const struct grub_dhcpv6_iaaddr_option *iaaddr;
+  grub_uint16_t ia_na_data_offset, ia_na_data_len, len;
+
+  if (grub_be_to_cpu16 (popt->code) != OPTION_IA_NA)
+    {
+      grub_error (GRUB_ERR_IO, N_("not an IA_NA DHCPv6 option"));
+      return NULL;
+    }
+
+  ia_na = (const struct grub_dhcpv6_iana_option *)popt->data;
+
+  if (grub_be_to_cpu16(popt->len) <= sizeof (*ia_na))
+    {
+      grub_error (GRUB_ERR_IO, N_("invalid size for IAADDR"));
+      return NULL;
+    }
+
+  ia_na_data_len = grub_be_to_cpu16(popt->len) - sizeof (*ia_na);
+  ia_na_data_offset = 0;
+
+  iaaddr_hdr = (const struct grub_dhcpv6_option *) ia_na->data;
+  len = grub_be_to_cpu16 (iaaddr_hdr->len);
+
+  while (grub_be_to_cpu16(iaaddr_hdr->code) != OPTION_IAADDR)
+    {
+      ia_na_data_offset += (len + sizeof (*iaaddr_hdr));
+
+      if (ia_na_data_offset < ia_na_data_len)
+	{
+	  iaaddr_hdr =(const struct grub_dhcpv6_option *)(ia_na->data +
+	    ia_na_data_offset);
+	  len = grub_be_to_cpu16 (iaaddr_hdr->len);
+	}
+      else
+	{
+	  iaaddr_hdr = NULL;
+	  break;
+	}
+    }
+
+  if (!iaaddr_hdr)
+    {
+      grub_error (GRUB_ERR_IO, N_("IAADDR not found"));
+      return NULL;
+    }
+
+  if ((ia_na_data_offset + sizeof (*iaaddr_hdr) + len) > ia_na_data_len)
+    {
+      grub_error (GRUB_ERR_IO, N_("IAADDR size check failed"));
+      return NULL;
+    }
+
+  iaaddr = (const struct grub_dhcpv6_iaaddr_option *) iaaddr_hdr->data;
+
+  return iaaddr->addr;
+}
+
+static void
+get_dhcpv6_dns_address (const struct grub_net_dhcpv6_packet *packet,
+	grub_net_network_level_address_t **addr, grub_uint16_t *naddr)
+{
+  const struct grub_dhcpv6_option* popt;
+  const struct grub_dhcpv6_dns_servers *dns;
+  grub_uint16_t len;
+  const grub_uint8_t *pa;
+  int i, ln;
+  grub_net_network_level_address_t *la;
+
+  if (addr)
+    *addr = NULL;
+
+  if (naddr)
+    *naddr = 0;
+
+  popt = find_dhcpv6_option (packet, OPTION_DNS_SERVERS);
+  if (!popt)
+    return;
+
+  len = grub_be_to_cpu16 (popt->len);
+  if ((len % 16) != 0)
+    {
+      grub_error (GRUB_ERR_IO, N_("invalid dns address length"));
+      return;
+    }
+
+  dns = (const struct grub_dhcpv6_dns_servers *)popt->data;
+
+  ln = len / 16;
+  la = grub_zalloc (sizeof (grub_net_network_level_address_t) * ln);
+
+  for (i = 0, pa = dns->addr; i < ln; i++, pa = dns->next_addr)
+    {
+      (la + i)->type = GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6;
+      (la + i)->ipv6[0] = grub_get_unaligned64 (pa);
+      (la + i)->ipv6[1] = grub_get_unaligned64 (pa + 8);
+      (la + i)->option = DNS_OPTION_PREFER_IPV6;
+    }
+
+  *addr = la;
+  *naddr = ln;
+
+  return;
+}
+
+static void
+find_dhcpv6_bootfile_url (const struct grub_net_dhcpv6_packet *packet,
+	char **proto, char **server_ip, char **boot_file)
+{
+  char *bootfile_url;
+  const struct grub_dhcpv6_option* opt_url;
+  char *ip_start, *ip_end;
+  char *path;
+  grub_size_t ip_len;
+  grub_uint16_t len;
+  const char *protos[] = {"tftp://", "http://", NULL};
+  const char *pr;
+  int i;
+
+  if (proto)
+    *proto = NULL;
+
+  if (server_ip)
+    *server_ip = NULL;
+
+  if (boot_file)
+    *boot_file = NULL;
+
+  opt_url = find_dhcpv6_option (packet, OPTION_BOOTFILE_URL);
+
+  if (!opt_url)
+    {
+      grub_error (GRUB_ERR_IO, N_("no bootfile-url in DHCPv6 option"));
+      return;
+    }
+
+  len = grub_be_to_cpu16 (opt_url->len);
+
+  bootfile_url = grub_malloc (len + 1);
+
+  if (!bootfile_url)
+    return;
+
+  grub_memcpy (bootfile_url, opt_url->data, len);
+  bootfile_url[len]   = '\0';
+
+  for (i = 0; (pr = *(protos + i)); ++i)
+      if (grub_strncmp (bootfile_url, pr, grub_strlen(pr)) == 0)
+	break;
+
+  if (!pr)
+    {
+      grub_error (GRUB_ERR_IO,
+	N_("unsupported protocol, only tftp and http are supported"));
+      goto cleanup;
+    }
+
+  ip_start = ip_end = NULL;
+  ip_start = bootfile_url + grub_strlen(pr);
+
+  if (*ip_start != '[')
+    ip_start = NULL;
+  else
+    ip_end = grub_strchr (++ip_start, ']');
+
+  if (!ip_start || !ip_end)
+    {
+      grub_error (GRUB_ERR_IO, N_("IPv6-address not in square brackets"));
+      goto cleanup;
+    }
+
+  ip_len = ip_end - ip_start;
+
+  if (proto)
+    {
+      grub_size_t proto_len  = grub_strlen (pr) - 3;
+
+      *proto = grub_malloc (proto_len + 1);
+      if (!*proto)
+	goto cleanup;
+
+      grub_memcpy (*proto, pr, proto_len);
+      *(*proto + proto_len)  = '\0';
+    }
+
+  if (server_ip)
+    {
+      *server_ip = grub_malloc (ip_len + 1);
+
+      if (!*server_ip)
+	goto cleanup;
+
+      grub_memcpy (*server_ip, ip_start, ip_len);
+      *(*server_ip + ip_len) = '\0';
+    }
+
+  path = ip_end + 1;
+
+  if (boot_file)
+    {
+      *boot_file = grub_strdup (path);
+
+      if (!*boot_file)
+	goto cleanup;
+    }
+
+cleanup:
+
+  if (bootfile_url)
+    grub_free (bootfile_url);
+
+  if (grub_errno)
+    {
+      if (proto && *proto)
+	{
+	  grub_free (proto);
+	  *proto = NULL;
+	}
+
+      if (server_ip && *server_ip)
+	{
+	  grub_free (server_ip);
+	  *server_ip = NULL;
+	}
+
+      if (boot_file && *boot_file)
+	{
+	  grub_free (boot_file);
+	  *boot_file = NULL;
+	}
+    }
+
+  return;
+}
+
+
+static grub_err_t
+grub_net_configure_by_dhcpv6_adv (const struct grub_net_dhcpv6_packet *v6_adv,
+	struct grub_dhcpv6_session *session)
+{
+  struct grub_net_buff *nb;
+  const struct grub_dhcpv6_option *opt_client, *opt_server, *opt_iana;
+  struct grub_dhcpv6_option *popt;
+  struct grub_net_dhcpv6_packet *v6;
+  struct udphdr *udph;
+  grub_net_network_level_address_t multicast;
+  grub_net_link_level_address_t ll_multicast;
+  struct grub_net_network_level_interface *inf;
+  grub_err_t err;
+  grub_uint16_t len;
+  grub_uint64_t elapsed;
+  char err_msg[64];
+
+  if (v6_adv->message_type != DHCPv6_ADVERTISE)
+    {
+      grub_error (GRUB_ERR_IO, N_("DHCPv6 info not found"));
+      return grub_errno;
+    }
+
+  opt_client = find_dhcpv6_option (v6_adv, OPTION_CLIENTID);
+  opt_server = find_dhcpv6_option (v6_adv, OPTION_SERVERID);
+  opt_iana = find_dhcpv6_option (v6_adv, OPTION_IA_NA);
+
+  err_msg[0] = '\0';
+  if (!opt_client)
+      grub_strcpy (err_msg, "client id");
+
+  if (!opt_server)
+    {
+      if (grub_strlen (err_msg))
+	grub_strcpy (err_msg + grub_strlen (err_msg), ", server id");
+      else
+	grub_strcpy (err_msg, "server id");
+    }
+
+  if (!opt_iana)
+    {
+      if (grub_strlen (err_msg))
+	grub_strcpy (err_msg + grub_strlen (err_msg), ", iana");
+      else
+	grub_strcpy (err_msg, "iana");
+    }
+
+  if (grub_strlen (err_msg))
+    {
+      grub_strcpy (err_msg + grub_strlen (err_msg), " missing");
+      grub_error (GRUB_ERR_IO, N_(err_msg));
+      return grub_errno;
+    }
+
+  inf = session->ifaces;
+
+  multicast.type = GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6;
+  multicast.ipv6[0] = grub_cpu_to_be64_compile_time (0xff02ULL << 48);
+  multicast.ipv6[1] = grub_cpu_to_be64_compile_time (0x10002ULL);
+
+  err = grub_net_link_layer_resolve (inf, &multicast, &ll_multicast);
+  if (err)
+    return err;
+
+  nb = grub_netbuff_alloc (512);
+
+  if (!nb)
+    {
+      grub_netbuff_free (nb);
+      return grub_errno;
+    }
+
+  err = grub_netbuff_reserve (nb, 512);
+  if (err)
+    {
+      grub_netbuff_free (nb);
+      return err;
+    }
+
+  len = grub_cpu_to_be16(opt_client->len);
+  err = grub_netbuff_push (nb, len + 4);
+  if (err)
+    {
+      grub_netbuff_free (nb);
+      return err;
+    }
+  grub_memcpy (nb->data, opt_client, len + 4);
+
+  len = grub_cpu_to_be16(opt_server->len);
+  err = grub_netbuff_push (nb, len + 4);
+  if (err)
+    {
+      grub_netbuff_free (nb);
+      return err;
+    }
+  grub_memcpy (nb->data, opt_server, len + 4);
+
+  len = grub_cpu_to_be16(opt_iana->len);
+  err = grub_netbuff_push (nb, len + 4);
+  if (err)
+    {
+      grub_netbuff_free (nb);
+      return err;
+    }
+  grub_memcpy (nb->data, opt_iana, len + 4);
+
+  err = grub_netbuff_push (nb, 8);
+  if (err)
+    {
+      grub_netbuff_free (nb);
+      return err;
+    }
+
+  popt = (struct grub_dhcpv6_option*) nb->data;
+  popt->code = grub_cpu_to_be16_compile_time (OPTION_ORO);
+  popt->len = grub_cpu_to_be16_compile_time (4);
+  grub_set_unaligned16 (popt->data, grub_cpu_to_be16_compile_time (OPTION_BOOTFILE_URL));
+  grub_set_unaligned16 (popt->data + 2, grub_cpu_to_be16_compile_time (OPTION_DNS_SERVERS));
+
+  err = grub_netbuff_push (nb, 6);
+  if (err)
+    {
+      grub_netbuff_free (nb);
+      return err;
+    }
+  popt = (struct grub_dhcpv6_option*) nb->data;
+  popt->code = grub_cpu_to_be16_compile_time (OPTION_ELAPSED_TIME);
+  popt->len = grub_cpu_to_be16_compile_time (2);
+
+  // the time is expressed in hundredths of a second
+  elapsed = grub_divmod64 (grub_get_time_ms () - session->start_time, 10, 0);
+
+  if (elapsed > 0xffff)
+    elapsed = 0xffff;
+
+  grub_set_unaligned16 (popt->data,  grub_cpu_to_be16 ((grub_uint16_t)elapsed));
+
+  err = grub_netbuff_push (nb, 4);
+  if (err)
+    {
+      grub_netbuff_free (nb);
+      return err;
+    }
+
+  v6 = (struct grub_net_dhcpv6_packet *) nb->data;
+  v6->message_type = DHCPv6_REQUEST;
+  v6->transaction_id = v6_adv->transaction_id;
+
+  err = grub_netbuff_push (nb, sizeof (*udph));
+  if (err)
+    {
+      grub_netbuff_free (nb);
+      return err;
+    }
+
+  udph = (struct udphdr *) nb->data;
+  udph->src = grub_cpu_to_be16_compile_time (546);
+  udph->dst = grub_cpu_to_be16_compile_time (547);
+  udph->chksum = 0;
+  udph->len = grub_cpu_to_be16 (nb->tail - nb->data);
+
+  udph->chksum = grub_net_ip_transport_checksum (nb, GRUB_NET_IP_UDP,
+						 &inf->address,
+						 &multicast);
+  err = grub_net_send_ip_packet (inf, &multicast, &ll_multicast, nb,
+				 GRUB_NET_IP_UDP);
+
+  grub_netbuff_free (nb);
+
+  if (err)
+    return err;
+
+  return GRUB_ERR_NONE;
+}
+
+
+struct grub_net_network_level_interface *
+grub_net_configure_by_dhcpv6_reply (const char *name,
+	struct grub_net_card *card,
+	grub_net_interface_flags_t flags,
+	const struct grub_net_dhcpv6_packet *v6,
+	grub_size_t size __attribute__ ((unused)),
+	int is_def,
+	char **device, char **path)
+{
+  grub_net_network_level_address_t addr;
+  grub_net_network_level_netaddress_t netaddr;
+  struct grub_net_network_level_interface *inf;
+  const grub_uint8_t *your_ip;
+  char *proto;
+  char *server_ip;
+  char *boot_file;
+  grub_net_network_level_address_t *dns;
+  grub_uint16_t num_dns;
+
+  if (device)
+    *device = NULL;
+
+  if (path)
+    *path = NULL;
+
+  if (v6->message_type != DHCPv6_REPLY)
+    {
+      grub_error (GRUB_ERR_IO, N_("DHCPv6 info not found"));
+      return NULL;
+    }
+
+  your_ip = find_dhcpv6_address(v6);
+
+  if (!your_ip)
+    {
+      grub_error (GRUB_ERR_IO, N_("DHCPv6 address not found"));
+      return NULL;
+    }
+
+  get_dhcpv6_dns_address (v6, &dns, &num_dns);
+
+  if (dns && num_dns)
+    {
+      int i;
+
+      for (i = 0; i < num_dns; ++i)
+	grub_net_add_dns_server (dns + i);
+
+      grub_free (dns);
+    }
+  else
+    {
+      if (grub_errno)
+	grub_print_error ();
+    }
+
+  find_dhcpv6_bootfile_url (v6, &proto, &server_ip, &boot_file);
+
+  if (grub_errno)
+    grub_print_error ();
+
+  addr.type = GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6;
+  addr.ipv6[0] = grub_get_unaligned64 (your_ip);
+  addr.ipv6[1] = grub_get_unaligned64 (your_ip + 8);
+  inf = grub_net_add_addr (name, card, &addr, &card->default_address, flags);
+
+  netaddr.type = GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6;
+  netaddr.ipv6.base[0] = grub_get_unaligned64 (your_ip);
+  netaddr.ipv6.base[1] = 0;
+  netaddr.ipv6.masksize = 64;
+  grub_net_add_route (name, netaddr, inf);
+
+  grub_env_set_net_property (name, "boot_file", boot_file,
+			  grub_strlen (boot_file));
+
+  if (is_def && server_ip)
+    {
+      grub_net_default_server = grub_strdup (server_ip);
+      grub_env_set ("net_default_interface", name);
+      grub_env_export ("net_default_interface");
+    }
+
+  if (device && server_ip && proto)
+    {
+      *device = grub_xasprintf ("%s,%s", proto, server_ip);
+      if (!*device)
+	return NULL;
+    }
+
+  if (path && boot_file)
+    {
+      *path = grub_strdup (boot_file);
+      if (*path)
+	{
+	  char *slash;
+	  slash = grub_strrchr (*path, '/');
+	  if (slash)
+	    *slash = 0;
+	  else
+	    **path = 0;
+	}
+      else
+	return NULL;
+    }
+
+  return inf;
+}
+
 void
 grub_net_process_dhcp (struct grub_net_buff *nb,
 		       struct grub_net_card *card)
@@ -295,6 +943,67 @@ grub_net_process_dhcp (struct grub_net_buff *nb,
     }
 }
 
+void
+grub_net_process_dhcp6 (struct grub_net_buff *nb,
+	struct grub_net_card *card __attribute__ ((unused)))
+{
+  const struct grub_net_dhcpv6_packet *v6;
+  struct grub_dhcpv6_session *session;
+  const struct grub_dhcpv6_option *opt_iana;
+  const struct grub_dhcpv6_iana_option *ia_na;
+
+  v6 = (const struct grub_net_dhcpv6_packet *) nb->data;
+
+  opt_iana = find_dhcpv6_option (v6, OPTION_IA_NA);
+  if (!opt_iana)
+    return;
+
+  ia_na = (const struct grub_dhcpv6_iana_option *)opt_iana->data;
+  FOR_DHCPV6_SESSIONS (session)
+    {
+      if (session->transaction_id == v6->transaction_id
+	  && session->iaid == grub_cpu_to_be32 (ia_na->iaid))
+	break;
+    }
+
+  if (!session)
+    return;
+
+
+  if (v6->message_type == DHCPv6_ADVERTISE)
+    {
+      grub_net_configure_by_dhcpv6_adv (
+	  (const struct grub_net_dhcpv6_packet*) nb->data, session);
+    }
+  else if (v6->message_type == DHCPv6_REPLY)
+    {
+      char *name;
+      struct grub_net_network_level_interface *inf;
+
+      inf = session->ifaces;
+      name = grub_xasprintf ("%s:dhcp", inf->card->name);
+      if (!name)
+	return;
+
+      grub_net_configure_by_dhcpv6_reply (name, inf->card,
+	  0, (const struct grub_net_dhcpv6_packet *) nb->data,
+	  (nb->tail - nb->data), 0, 0, 0);
+
+      if (!grub_errno)
+	{
+	  grub_dhcpv6_session_remove (session);
+	  grub_free (session);
+	}
+
+      grub_free (name);
+    }
+
+  if (grub_errno)
+    grub_print_error ();
+
+  return;
+}
+
 static char
 hexdigit (grub_uint8_t val)
 {
@@ -571,7 +1280,177 @@ grub_cmd_bootp (struct grub_command *cmd __attribute__ ((unused)),
   return err;
 }
 
-static grub_command_t cmd_getdhcp, cmd_bootp;
+static grub_err_t
+grub_cmd_bootp6 (struct grub_command *cmd __attribute__ ((unused)),
+	int argc, char **args)
+{
+  struct grub_net_card *card;
+  grub_size_t ncards = 0;
+  unsigned j = 0;
+  int interval;
+  grub_err_t err;
+  struct grub_dhcpv6_session *session;
+
+  err = GRUB_ERR_NONE;
+
+  FOR_NET_CARDS (card)
+  {
+    if (argc > 0 && grub_strcmp (card->name, args[0]) != 0)
+      continue;
+    ncards++;
+  }
+
+  FOR_NET_CARDS (card)
+  {
+    struct grub_net_network_level_interface *ifaces;
+
+    if (argc > 0 && grub_strcmp (card->name, args[0]) != 0)
+      continue;
+
+    ifaces = grub_net_ipv6_get_link_local (card, &card->default_address);
+    if (!ifaces)
+      {
+	grub_free (ifaces);
+	return grub_errno;
+      }
+
+    session = grub_zalloc (sizeof (*session));
+    session->ifaces = ifaces;
+    session->iaid = j;
+    grub_dhcpv6_session_add (session);
+    j++;
+  }
+
+  for (interval = 200; interval < 10000; interval *= 2)
+    {
+      int done = 1;
+
+      FOR_DHCPV6_SESSIONS (session)
+	{
+	  struct grub_net_buff *nb;
+	  struct grub_dhcpv6_option *opt;
+	  struct grub_net_dhcpv6_packet *v6;
+	  struct grub_DUID_LL *duid;
+	  struct grub_dhcpv6_iana_option *ia_na;
+	  grub_net_network_level_address_t multicast;
+	  grub_net_link_level_address_t ll_multicast;
+	  struct udphdr *udph;
+
+	  multicast.type = GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6;
+	  multicast.ipv6[0] = grub_cpu_to_be64_compile_time (0xff02ULL << 48);
+	  multicast.ipv6[1] = grub_cpu_to_be64_compile_time (0x10002ULL);
+
+	  err = grub_net_link_layer_resolve (session->ifaces,
+		    &multicast, &ll_multicast);
+	  if (err)
+	    return grub_errno;
+	  nb = grub_netbuff_alloc (512);
+	  if (!nb)
+	    {
+	      grub_netbuff_free (nb);
+	      return grub_errno;
+	    }
+
+	  err = grub_netbuff_reserve (nb, 512);
+	  if (err)
+	    {
+	      grub_netbuff_free (nb);
+	      return err;
+	    }
+
+	  err = grub_netbuff_push (nb, 6);
+	  if (err)
+	    {
+	      grub_netbuff_free (nb);
+	      return err;
+	    }
+
+	  opt = (struct grub_dhcpv6_option *)nb->data;
+	  opt->code = grub_cpu_to_be16_compile_time (OPTION_ELAPSED_TIME);
+	  opt->len = grub_cpu_to_be16_compile_time (2);
+	  grub_set_unaligned16 (opt->data, 0);
+
+	  err = grub_netbuff_push (nb, sizeof(*duid) + 4);
+	  if (err)
+	    {
+	      grub_netbuff_free (nb);
+	      return err;
+	    }
+
+	  opt = (struct grub_dhcpv6_option *)nb->data;
+	  opt->code = grub_cpu_to_be16_compile_time (OPTION_CLIENTID); //option_client_id
+	  opt->len = grub_cpu_to_be16 (sizeof(*duid));
+
+	  duid = (struct grub_DUID_LL *) opt->data;
+
+	  duid->type = grub_cpu_to_be16_compile_time (3) ;
+	  duid->hw_type = grub_cpu_to_be16_compile_time (1);
+	  grub_memcpy (&duid->hwaddr, &session->ifaces->hwaddress.mac,
+	      sizeof (session->ifaces->hwaddress.mac));
+
+	  err = grub_netbuff_push (nb, sizeof (*ia_na) + 4);
+	  if (err)
+	    {
+	      grub_netbuff_free (nb);
+	      return err;
+	    }
+
+	  opt = (struct grub_dhcpv6_option *)nb->data;
+	  opt->code = grub_cpu_to_be16_compile_time (OPTION_IA_NA);
+	  opt->len = grub_cpu_to_be16 (sizeof (*ia_na));
+	  ia_na = (struct grub_dhcpv6_iana_option *)opt->data;
+	  ia_na->iaid = grub_cpu_to_be32 (session->iaid);
+	  ia_na->t1 = 0;
+	  ia_na->t2 = 0;
+
+	  err = grub_netbuff_push (nb, 4);
+	  if (err)
+	    {
+	      grub_netbuff_free (nb);
+	      return err;
+	    }
+
+	  v6 = (struct grub_net_dhcpv6_packet *)nb->data;
+	  v6->message_type = 1;
+	  v6->transaction_id = session->transaction_id;
+
+	  grub_netbuff_push (nb, sizeof (*udph));
+
+	  udph = (struct udphdr *) nb->data;
+	  udph->src = grub_cpu_to_be16_compile_time (546);
+	  udph->dst = grub_cpu_to_be16_compile_time (547);
+	  udph->chksum = 0;
+	  udph->len = grub_cpu_to_be16 (nb->tail - nb->data);
+
+	  udph->chksum = grub_net_ip_transport_checksum (nb, GRUB_NET_IP_UDP,
+			    &session->ifaces->address, &multicast);
+
+	  err = grub_net_send_ip_packet (session->ifaces, &multicast,
+		    &ll_multicast, nb, GRUB_NET_IP_UDP);
+	  done = 0;
+	  grub_netbuff_free (nb);
+
+	  if (err)
+	    return err;
+	}
+      if (!done)
+	grub_net_poll_cards (interval, 0);
+    }
+
+  FOR_DHCPV6_SESSIONS (session)
+    {
+      err = grub_error (GRUB_ERR_FILE_NOT_FOUND,
+			N_("couldn't autoconfigure %s"),
+			session->ifaces->card->name);
+      grub_dhcpv6_session_remove (session);
+      grub_free (session);
+    }
+
+
+  return err;
+}
+
+static grub_command_t cmd_getdhcp, cmd_bootp, cmd_bootp6;
 
 void
 grub_bootp_init (void)
@@ -582,6 +1461,9 @@ grub_bootp_init (void)
   cmd_getdhcp = grub_register_command ("net_get_dhcp_option", grub_cmd_dhcpopt,
 				       N_("VAR INTERFACE NUMBER DESCRIPTION"),
 				       N_("retrieve DHCP option and save it into VAR. If VAR is - then print the value."));
+  cmd_bootp6 = grub_register_command ("net_bootp6", grub_cmd_bootp6,
+				     N_("[CARD]"),
+				     N_("perform a dhcpv6 autoconfiguration"));
 }
 
 void
@@ -589,4 +1471,5 @@ grub_bootp_fini (void)
 {
   grub_unregister_command (cmd_getdhcp);
   grub_unregister_command (cmd_bootp);
+  grub_unregister_command (cmd_bootp6);
 }
diff --git a/grub-core/net/ip.c b/grub-core/net/ip.c
index 8c56baa..8bb56be 100644
--- a/grub-core/net/ip.c
+++ b/grub-core/net/ip.c
@@ -238,6 +238,41 @@ handle_dgram (struct grub_net_buff *nb,
   {
     struct udphdr *udph;
     udph = (struct udphdr *) nb->data;
+
+    if (proto == GRUB_NET_IP_UDP && grub_be_to_cpu16 (udph->dst) == 546)
+      {
+	if (udph->chksum)
+	  {
+	    grub_uint16_t chk, expected;
+	    chk = udph->chksum;
+	    udph->chksum = 0;
+	    expected = grub_net_ip_transport_checksum (nb,
+						       GRUB_NET_IP_UDP,
+						       source,
+						       dest);
+	    if (expected != chk)
+	      {
+		grub_dprintf ("net", "Invalid UDP checksum. "
+			      "Expected %x, got %x\n",
+			      grub_be_to_cpu16 (expected),
+			      grub_be_to_cpu16 (chk));
+		grub_netbuff_free (nb);
+		return GRUB_ERR_NONE;
+	      }
+	    udph->chksum = chk;
+	  }
+
+	err = grub_netbuff_pull (nb, sizeof (*udph));
+	if (err)
+	  {
+	    grub_netbuff_free (nb);
+	    return err;
+	  }
+	grub_net_process_dhcp6 (nb, card);
+	grub_netbuff_free (nb);
+	return GRUB_ERR_NONE;
+      }
+
     if (proto == GRUB_NET_IP_UDP && grub_be_to_cpu16 (udph->dst) == 68)
       {
 	const struct grub_net_bootp_packet *bootp;
diff --git a/include/grub/efi/api.h b/include/grub/efi/api.h
index c7c9f0e..fa0a0ce 100644
--- a/include/grub/efi/api.h
+++ b/include/grub/efi/api.h
@@ -1452,14 +1452,68 @@ typedef struct grub_efi_simple_text_output_interface grub_efi_simple_text_output
 
 typedef grub_uint8_t grub_efi_pxe_packet_t[1472];
 
+typedef struct {
+  grub_uint8_t addr[4];
+} grub_efi_pxe_ipv4_address_t;
+
+typedef struct {
+  grub_uint8_t addr[16];
+} grub_efi_pxe_ipv6_address_t;
+
+typedef struct {
+  grub_uint8_t addr[32];
+} grub_efi_pxe_mac_address_t;
+
+typedef union {
+    grub_uint32_t addr[4];
+    grub_efi_pxe_ipv4_address_t v4;
+    grub_efi_pxe_ipv6_address_t v6;
+} grub_efi_pxe_ip_address_t;
+
+#define EFI_PXE_BASE_CODE_MAX_IPCNT             8
+typedef struct {
+    grub_uint8_t filters;
+    grub_uint8_t ip_cnt;
+    grub_uint16_t reserved;
+    grub_efi_pxe_ip_address_t ip_list[EFI_PXE_BASE_CODE_MAX_IPCNT];
+} grub_efi_pxe_ip_filter_t;
+
+typedef struct {
+    grub_efi_pxe_ip_address_t ip_addr;
+    grub_efi_pxe_mac_address_t mac_addr;
+} grub_efi_pxe_arp_entry_t;
+
+typedef struct {
+    grub_efi_pxe_ip_address_t ip_addr;
+    grub_efi_pxe_ip_address_t subnet_mask;
+    grub_efi_pxe_ip_address_t gw_addr;
+} grub_efi_pxe_route_entry_t;
+
+
+#define EFI_PXE_BASE_CODE_MAX_ARP_ENTRIES 8
+#define EFI_PXE_BASE_CODE_MAX_ROUTE_ENTRIES 8
+
 typedef struct grub_efi_pxe_mode
 {
-  grub_uint8_t unused[52];
+  grub_uint8_t started;
+  grub_uint8_t ipv6_available;
+  grub_uint8_t ipv6_supported;
+  grub_uint8_t using_ipv6;
+  //grub_uint8_t unused[48];
+  grub_uint8_t unused[16];
+  grub_efi_pxe_ip_address_t station_ip;
+  grub_efi_pxe_ip_address_t subnet_mask;
   grub_efi_pxe_packet_t dhcp_discover;
   grub_efi_pxe_packet_t dhcp_ack;
   grub_efi_pxe_packet_t proxy_offer;
   grub_efi_pxe_packet_t pxe_discover;
   grub_efi_pxe_packet_t pxe_reply;
+  grub_efi_pxe_packet_t pxe_bis_reply;
+  grub_efi_pxe_ip_filter_t ip_filter;
+  grub_uint32_t arp_cache_entries;
+  grub_efi_pxe_arp_entry_t arp_cache[EFI_PXE_BASE_CODE_MAX_ARP_ENTRIES];
+  grub_uint32_t route_table_entries;
+  grub_efi_pxe_route_entry_t route_table[EFI_PXE_BASE_CODE_MAX_ROUTE_ENTRIES];
 } grub_efi_pxe_mode_t;
 
 typedef struct grub_efi_pxe
diff --git a/include/grub/net.h b/include/grub/net.h
index 538baa3..71dc243 100644
--- a/include/grub/net.h
+++ b/include/grub/net.h
@@ -418,6 +418,13 @@ struct grub_net_bootp_packet
   grub_uint8_t vendor[0];
 } GRUB_PACKED;
 
+struct grub_net_dhcpv6_packet
+{
+  grub_uint32_t message_type:8;
+  grub_uint32_t transaction_id:24;
+  grub_uint8_t dhcp_options[0];
+} GRUB_PACKED;
+
 #define	GRUB_NET_BOOTP_RFC1048_MAGIC_0	0x63
 #define	GRUB_NET_BOOTP_RFC1048_MAGIC_1	0x82
 #define	GRUB_NET_BOOTP_RFC1048_MAGIC_2	0x53
@@ -444,6 +451,14 @@ grub_net_configure_by_dhcp_ack (const char *name,
 				grub_size_t size,
 				int is_def, char **device, char **path);
 
+struct grub_net_network_level_interface *
+grub_net_configure_by_dhcpv6_reply (const char *name,
+				    struct grub_net_card *card,
+				    grub_net_interface_flags_t flags,
+				    const struct grub_net_dhcpv6_packet *v6,
+				    grub_size_t size,
+				    int is_def, char **device, char **path);
+
 grub_err_t
 grub_net_add_ipv4_local (struct grub_net_network_level_interface *inf,
 			 int mask);
@@ -452,6 +467,10 @@ void
 grub_net_process_dhcp (struct grub_net_buff *nb,
 		       struct grub_net_card *card);
 
+void
+grub_net_process_dhcp6 (struct grub_net_buff *nb,
+			struct grub_net_card *card);
+
 int
 grub_net_hwaddr_cmp (const grub_net_link_level_address_t *a,
 		     const grub_net_link_level_address_t *b);
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 02/14] UEFI IPv6 PXE support
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
  2016-02-10 21:20 ` [PATCH 01/14] Added net_bootp6 command Josef Bacik
@ 2016-02-10 21:20 ` Josef Bacik
  2016-02-10 21:20 ` [PATCH 03/14] Use UEFI MAC device as default configured by net_bootp6 Josef Bacik
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:20 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Michael Chang

From: Michael Chang <mchang@suse.com>

When system is booted from UEFI IPv6 PXE, the network interface can be
configured directly by cached DHCPv6 packet in firmware without doing it all
over again by contacting DHCP server.
---
 grub-core/net/drivers/efi/efinet.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/grub-core/net/drivers/efi/efinet.c b/grub-core/net/drivers/efi/efinet.c
index 5388f95..2b9a0e7 100644
--- a/grub-core/net/drivers/efi/efinet.c
+++ b/grub-core/net/drivers/efi/efinet.c
@@ -378,11 +378,25 @@ grub_efi_net_config_real (grub_efi_handle_t hnd, char **device,
     if (! pxe)
       continue;
     pxe_mode = pxe->mode;
-    grub_net_configure_by_dhcp_ack (card->name, card, 0,
-				    (struct grub_net_bootp_packet *)
-				    &pxe_mode->dhcp_ack,
-				    sizeof (pxe_mode->dhcp_ack),
-				    1, device, path);
+
+    if (pxe_mode->using_ipv6)
+      {
+	grub_net_configure_by_dhcpv6_reply (card->name, card, 0,
+					    (struct grub_net_dhcpv6_packet *)
+					    &pxe_mode->dhcp_ack,
+					    sizeof (pxe_mode->dhcp_ack),
+					    1, device, path);
+	if (grub_errno)
+	  grub_print_error ();
+      }
+    else
+      {
+	grub_net_configure_by_dhcp_ack (card->name, card, 0,
+					(struct grub_net_bootp_packet *)
+					&pxe_mode->dhcp_ack,
+					sizeof (pxe_mode->dhcp_ack),
+					1, device, path);
+      }
     return;
   }
 }
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 03/14] Use UEFI MAC device as default configured by net_bootp6
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
  2016-02-10 21:20 ` [PATCH 01/14] Added net_bootp6 command Josef Bacik
  2016-02-10 21:20 ` [PATCH 02/14] UEFI IPv6 PXE support Josef Bacik
@ 2016-02-10 21:20 ` Josef Bacik
  2016-02-10 21:20 ` [PATCH 04/14] tcp: add window scaling and RTTM support Josef Bacik
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:20 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Michael Chang

From: Michael Chang <mchang@suse.com>

The grub_efinet_findcards will register cards by checking if it can support EFI
Simple Netowork Protocol which create more than one device to a physical NIC
device.

If without specifying any device to be configured by net_bootp6, it should pick
up one from them but not all. In my case three firmware device are listed.
IPv4, IPv6 and MAC device. Both IPv4 and IPv6 are derived from MAC device for
providing PXE Base Code Protocol. I think we should use MAC device instead of
those two to avoid collision, because net_bootp6 command does not depend on PXE
Base Code but only Simple Network Protocol to work
---
 grub-core/net/bootp.c              |  8 ++++++++
 grub-core/net/drivers/efi/efinet.c | 40 ++++++++++++++++++++++++++++++++++++++
 include/grub/net.h                 |  1 +
 3 files changed, 49 insertions(+)

diff --git a/grub-core/net/bootp.c b/grub-core/net/bootp.c
index 25ab70c..37d1cfa 100644
--- a/grub-core/net/bootp.c
+++ b/grub-core/net/bootp.c
@@ -1297,6 +1297,10 @@ grub_cmd_bootp6 (struct grub_command *cmd __attribute__ ((unused)),
   {
     if (argc > 0 && grub_strcmp (card->name, args[0]) != 0)
       continue;
+#ifdef GRUB_MACHINE_EFI
+    else if (!card->is_efi_mac_device (card))
+      continue;
+#endif
     ncards++;
   }
 
@@ -1306,6 +1310,10 @@ grub_cmd_bootp6 (struct grub_command *cmd __attribute__ ((unused)),
 
     if (argc > 0 && grub_strcmp (card->name, args[0]) != 0)
       continue;
+#ifdef GRUB_MACHINE_EFI
+    else if (!card->is_efi_mac_device (card))
+      continue;
+#endif
 
     ifaces = grub_net_ipv6_get_link_local (card, &card->default_address);
     if (!ifaces)
diff --git a/grub-core/net/drivers/efi/efinet.c b/grub-core/net/drivers/efi/efinet.c
index 2b9a0e7..692d5ad 100644
--- a/grub-core/net/drivers/efi/efinet.c
+++ b/grub-core/net/drivers/efi/efinet.c
@@ -229,6 +229,45 @@ grub_efinet_get_device_handle (struct grub_net_card *card)
   return card->efi_handle;
 }
 
+static int
+grub_efinet_is_mac_device (struct grub_net_card *card)
+{
+  grub_efi_handle_t efi_handle;
+  grub_efi_device_path_t *dp;
+  grub_efi_device_path_t *next, *p;
+  grub_efi_uint8_t type;
+  grub_efi_uint8_t subtype;
+
+  efi_handle = grub_efinet_get_device_handle (card);
+
+  if (!efi_handle)
+    return 0;
+
+  dp = grub_efi_get_device_path (efi_handle);
+
+  if (GRUB_EFI_END_ENTIRE_DEVICE_PATH (dp))
+    return 0;
+
+  for (p = (grub_efi_device_path_t *) dp, next = GRUB_EFI_NEXT_DEVICE_PATH (p);
+       ! GRUB_EFI_END_ENTIRE_DEVICE_PATH (next);
+       p = next, next = GRUB_EFI_NEXT_DEVICE_PATH (next))
+    ;
+
+  if (p)
+    {
+      type = GRUB_EFI_DEVICE_PATH_TYPE (p);
+      subtype = GRUB_EFI_DEVICE_PATH_SUBTYPE (p);
+
+      if (type == GRUB_EFI_MESSAGING_DEVICE_PATH_TYPE
+	  && subtype == GRUB_EFI_MAC_ADDRESS_DEVICE_PATH_SUBTYPE)
+	{
+	  return 1;
+	}
+    }
+
+  return 0;
+}
+
 static void
 grub_efinet_findcards (void)
 {
@@ -318,6 +357,7 @@ grub_efinet_findcards (void)
 		   sizeof (card->default_address.mac));
       card->efi_net = net;
       card->efi_handle = *handle;
+      card->is_efi_mac_device = grub_efinet_is_mac_device;
 
       grub_net_card_register (card);
     }
diff --git a/include/grub/net.h b/include/grub/net.h
index 71dc243..4571b72 100644
--- a/include/grub/net.h
+++ b/include/grub/net.h
@@ -140,6 +140,7 @@ struct grub_net_card
       struct grub_efi_simple_network *efi_net;
       grub_efi_handle_t efi_handle;
       grub_size_t last_pkt_size;
+      int (*is_efi_mac_device) (struct grub_net_card* card);
     };
 #endif
     void *data;
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 04/14] tcp: add window scaling and RTTM support
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
                   ` (2 preceding siblings ...)
  2016-02-10 21:20 ` [PATCH 03/14] Use UEFI MAC device as default configured by net_bootp6 Josef Bacik
@ 2016-02-10 21:20 ` Josef Bacik
  2016-02-10 21:20 ` [PATCH 05/14] net: don't free uninitialized sockets in dns Josef Bacik
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:20 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Josef Bacik

Sometimes we have to provision boxes across regions, such as California to
Sweden.  The http server has a 10 minute timeout, so if we can't get our 250mb
image transferred fast enough our provisioning fails, which is not ideal.  So
add tcp window scaling on open connections and set the window size to 1mb.  With
this change we're able to get higher sustained transfers between regions and can
transfer our image in well below 10 minutes.  Without this patch we'd time out
every time halfway through the transfer.

RTTM is needed in order to make window scaling work well under heavy congestion
or packet loss.  In most cases grub could recover with just window scaling
enabled, but on some machines the congestion would be so high that it would
never recover and would timeout.

I've made the window size configureable with the grub env variable
"tcp_window_size".  By default this is set to 1mb but can be configured to
whatever a user wants, and we will calculate the appropriate window size and
scale settings.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 grub-core/net/net.c |   1 +
 grub-core/net/tcp.c | 186 +++++++++++++++++++++++++++++++++++++++++++++++++---
 include/grub/net.h  |   2 +
 3 files changed, 181 insertions(+), 8 deletions(-)

diff --git a/grub-core/net/net.c b/grub-core/net/net.c
index d07029b..16bc4e7 100644
--- a/grub-core/net/net.c
+++ b/grub-core/net/net.c
@@ -1784,6 +1784,7 @@ GRUB_MOD_INIT(net)
 				       "", N_("list network cards"));
   cmd_lsaddr = grub_register_command ("net_ls_addr", grub_cmd_listaddrs,
 				       "", N_("list network addresses"));
+  grub_net_tcp_init ();
   grub_bootp_init ();
   grub_dns_init ();
 
diff --git a/grub-core/net/tcp.c b/grub-core/net/tcp.c
index e8ad34b..5b5b857 100644
--- a/grub-core/net/tcp.c
+++ b/grub-core/net/tcp.c
@@ -22,6 +22,7 @@
 #include <grub/net/netbuff.h>
 #include <grub/time.h>
 #include <grub/priority_queue.h>
+#include <grub/env.h>
 
 #define TCP_SYN_RETRANSMISSION_TIMEOUT GRUB_NET_INTERVAL
 #define TCP_SYN_RETRANSMISSION_COUNT GRUB_NET_TRIES
@@ -61,10 +62,12 @@ struct grub_net_tcp_socket
   int they_reseted;
   int i_reseted;
   int i_stall;
+  int timestamp_supported;
   grub_uint32_t my_start_seq;
   grub_uint32_t my_cur_seq;
   grub_uint32_t their_start_seq;
   grub_uint32_t their_cur_seq;
+  grub_uint32_t cur_tsecr;
   grub_uint16_t my_window;
   struct unacked *unack_first;
   struct unacked *unack_last;
@@ -106,6 +109,31 @@ struct tcphdr
   grub_uint16_t urgent;
 } GRUB_PACKED;
 
+enum
+  {
+    TCP_SCALE_OPT = 3,
+    TCP_TIMESTAMP_OPT = 8,
+  };
+
+struct tcp_opt_hdr
+{
+  grub_uint8_t kind;
+  grub_uint8_t length;
+} GRUB_PACKED;
+
+struct tcp_scale_opt
+{
+  struct tcp_opt_hdr opt;
+  grub_uint8_t scale;
+} GRUB_PACKED;
+
+struct tcp_timestamp_opt
+{
+  struct tcp_opt_hdr opt;
+  grub_uint32_t tsval;
+  grub_uint32_t tsecr;
+} GRUB_PACKED;
+
 struct tcp_pseudohdr
 {
   grub_uint32_t src;
@@ -126,6 +154,9 @@ struct tcp6_pseudohdr
 
 static struct grub_net_tcp_socket *tcp_sockets;
 static struct grub_net_tcp_listen *tcp_listens;
+static char *grub_net_tcp_window_size;
+static grub_uint32_t tcp_window_size;
+static grub_uint8_t tcp_window_scale;
 
 #define FOR_TCP_SOCKETS(var) FOR_LIST_ELEMENTS (var, tcp_sockets)
 #define FOR_TCP_LISTENS(var) FOR_LIST_ELEMENTS (var, tcp_listens)
@@ -299,9 +330,16 @@ ack_real (grub_net_tcp_socket_t sock, int res)
 {
   struct grub_net_buff *nb_ack;
   struct tcphdr *tcph_ack;
+  grub_size_t headersize;
   grub_err_t err;
 
-  nb_ack = grub_netbuff_alloc (sizeof (*tcph_ack) + 128);
+  if (sock->timestamp_supported)
+    headersize = ALIGN_UP (sizeof (*tcph_ack) +
+			   sizeof (struct tcp_timestamp_opt), 4);
+  else
+    headersize = ALIGN_UP (sizeof (*tcph_ack), 4);
+
+  nb_ack = grub_netbuff_alloc (headersize + 128);
   if (!nb_ack)
     return;
   err = grub_netbuff_reserve (nb_ack, 128);
@@ -313,7 +351,7 @@ ack_real (grub_net_tcp_socket_t sock, int res)
       return;
     }
 
-  err = grub_netbuff_put (nb_ack, sizeof (*tcph_ack));
+  err = grub_netbuff_put (nb_ack, headersize);
   if (err)
     {
       grub_netbuff_free (nb_ack);
@@ -322,22 +360,35 @@ ack_real (grub_net_tcp_socket_t sock, int res)
       return;
     }
   tcph_ack = (void *) nb_ack->data;
+  grub_memset (tcph_ack, 0, headersize);
   if (res)
     {
       tcph_ack->ack = grub_cpu_to_be32_compile_time (0);
-      tcph_ack->flags = grub_cpu_to_be16_compile_time ((5 << 12) | TCP_RST);
+      tcph_ack->flags = grub_cpu_to_be16 ((headersize << 10) | TCP_RST);
       tcph_ack->window = grub_cpu_to_be16_compile_time (0);
     }
   else
     {
       tcph_ack->ack = grub_cpu_to_be32 (sock->their_cur_seq);
-      tcph_ack->flags = grub_cpu_to_be16_compile_time ((5 << 12) | TCP_ACK);
+      /* See comment in grub_net_tcp_open for how this magic works. */
+      tcph_ack->flags = grub_cpu_to_be16 ((headersize << 10) | TCP_ACK);
       tcph_ack->window = !sock->i_stall ? grub_cpu_to_be16 (sock->my_window)
 	: 0;
     }
   tcph_ack->urgent = 0;
   tcph_ack->src = grub_cpu_to_be16 (sock->in_port);
   tcph_ack->dst = grub_cpu_to_be16 (sock->out_port);
+  if (sock->timestamp_supported)
+    {
+      struct tcp_timestamp_opt *timestamp;
+
+      timestamp = (struct tcp_timestamp_opt *)(tcph_ack + 1);
+      timestamp->opt.kind = TCP_TIMESTAMP_OPT;
+      timestamp->opt.length = sizeof (struct tcp_timestamp_opt);
+      timestamp->tsval = grub_cpu_to_be32 (grub_get_time_ms ());
+      timestamp->tsecr = grub_cpu_to_be32 (sock->cur_tsecr);
+    }
+
   err = tcp_send (nb_ack, sock);
   if (err)
     {
@@ -567,9 +618,12 @@ grub_net_tcp_open (char *server,
   static grub_uint16_t in_port = 21550;
   struct grub_net_buff *nb;
   struct tcphdr *tcph;
+  struct tcp_scale_opt *scale;
+  struct tcp_timestamp_opt *timestamp;
   int i;
   grub_uint8_t *nbd;
   grub_net_link_level_address_t ll_target_addr;
+  grub_size_t headersize;
 
   err = grub_net_resolve_address (server, &addr);
   if (err)
@@ -604,7 +658,9 @@ grub_net_tcp_open (char *server,
   socket->fin_hook = fin_hook;
   socket->hook_data = hook_data;
 
-  nb = grub_netbuff_alloc (sizeof (*tcph) + 128);
+  headersize = ALIGN_UP (sizeof (*tcph) + sizeof (*scale) +
+			 sizeof (*timestamp), 4);
+  nb = grub_netbuff_alloc (headersize + 128);
   if (!nb)
     {
       grub_free (socket);
@@ -619,7 +675,7 @@ grub_net_tcp_open (char *server,
       return NULL;
     }
 
-  err = grub_netbuff_put (nb, sizeof (*tcph));
+  err = grub_netbuff_put (nb, headersize);
   if (err)
     {
       grub_free (socket);
@@ -635,17 +691,33 @@ grub_net_tcp_open (char *server,
     }
 
   tcph = (void *) nb->data;
+  grub_memset(tcph, 0, headersize);
   socket->my_start_seq = grub_get_time_ms ();
   socket->my_cur_seq = socket->my_start_seq + 1;
-  socket->my_window = 8192;
+  socket->my_window = tcp_window_size;
   tcph->seqnr = grub_cpu_to_be32 (socket->my_start_seq);
   tcph->ack = grub_cpu_to_be32_compile_time (0);
-  tcph->flags = grub_cpu_to_be16_compile_time ((5 << 12) | TCP_SYN);
+  /* The top 4 bits of flags indicate how many words long the header is, and
+     since headersize is in bytes we can just shif up 10 to get the right number
+     of words in headersize, it's equivalent to ((headersize >> 2) << 12). */
+  tcph->flags = grub_cpu_to_be16 ((headersize << 10) | TCP_SYN);
   tcph->window = grub_cpu_to_be16 (socket->my_window);
   tcph->urgent = 0;
   tcph->src = grub_cpu_to_be16 (socket->in_port);
   tcph->dst = grub_cpu_to_be16 (socket->out_port);
   tcph->checksum = 0;
+
+  scale = (struct tcp_scale_opt *)(tcph + 1);
+  scale->opt.kind = TCP_SCALE_OPT;
+  scale->opt.length = sizeof (struct tcp_scale_opt);
+  scale->scale = tcp_window_scale;
+
+  timestamp = (struct tcp_timestamp_opt *)(scale + 1);
+  timestamp->opt.kind = TCP_TIMESTAMP_OPT;
+  timestamp->opt.length = sizeof (struct tcp_timestamp_opt);
+  timestamp->tsval = grub_cpu_to_be32 (grub_get_time_ms ());
+  timestamp->tsecr = 0;
+
   tcph->checksum = grub_net_ip_transport_checksum (nb, GRUB_NET_IP_TCP,
 						   &socket->inf->address,
 						   &socket->out_nla);
@@ -763,6 +835,7 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
 {
   struct tcphdr *tcph;
   grub_net_tcp_socket_t sock;
+  grub_uint32_t tsecr = 0;
   grub_err_t err;
 
   /* Ignore broadcast.  */
@@ -789,6 +862,38 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
       return GRUB_ERR_NONE;
     }
 
+  /* If the packet is large enough to have the timestamp opt then lets look for
+     the tsecr value. */
+  if ((grub_be_to_cpu16 (tcph->flags >> 12) * sizeof (grub_uint32_t)) >=
+      ALIGN_UP (sizeof (struct tcphdr) + sizeof (struct tcp_timestamp_opt), 4))
+    {
+      struct tcp_opt_hdr *opt;
+      grub_size_t remaining = nb->tail - nb->data;
+
+      opt = (struct tcp_opt_hdr *)(tcph + 1);
+      while (remaining > 0)
+	{
+	  grub_uint8_t len = 1;
+	  if (opt->kind == 8 || opt->kind == 0)
+	    break;
+	  if (opt->kind > 1)
+	    len = opt->length;
+	  if (len > remaining)
+	    len = remaining;
+	  remaining -= len;
+	  opt = (struct tcp_opt_hdr *)((grub_uint8_t *)opt + len);
+	}
+
+      /* Ok we definitely have the timestamp option. */
+      if (opt->kind == 8)
+	{
+	  struct tcp_timestamp_opt *timestamp;
+
+	  timestamp = (struct tcp_timestamp_opt *)opt;
+	  tsecr = grub_be_to_cpu32 (timestamp->tsval);
+	}
+    }
+
   FOR_TCP_SOCKETS (sock)
   {
     if (!(grub_be_to_cpu16 (tcph->dst) == sock->in_port
@@ -823,6 +928,9 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
 	sock->their_start_seq = grub_be_to_cpu32 (tcph->seqnr);
 	sock->their_cur_seq = sock->their_start_seq + 1;
 	sock->established = 1;
+	sock->timestamp_supported = 0;
+	if (tsecr)
+	  sock->timestamp_supported = 1;
       }
 
     if (grub_be_to_cpu16 (tcph->flags) & TCP_RST)
@@ -924,6 +1032,8 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
 	      return err;
 	    }
 
+	  /* We only update the tsecr when we advance the window. */
+	  sock->cur_tsecr = tsecr;
 	  sock->their_cur_seq += (nb_top->tail - nb_top->data);
 	  if (grub_be_to_cpu16 (tcph->flags) & TCP_FIN)
 	    {
@@ -1018,3 +1128,63 @@ grub_net_tcp_unstall (grub_net_tcp_socket_t sock)
   sock->i_stall = 0;
   ack (sock);
 }
+
+static const char *
+window_get_env (struct grub_env_var *var __attribute__ ((unused)),
+		const char *val __attribute__ ((unused)))
+{
+  return grub_net_tcp_window_size;
+}
+
+static char *
+window_set_env (struct grub_env_var *var __attribute__ ((unused)),
+		const char *val)
+{
+  grub_uint32_t ret;
+
+  if (val == NULL)
+    return NULL;
+
+  grub_error_push ();
+  ret = (grub_uint32_t) grub_strtoul (val, 0, 0);
+  if (grub_errno != GRUB_ERR_NONE)
+    {
+      grub_printf ("Invalid number for window size '%s'.\n", val);
+      grub_errno = GRUB_ERR_NONE;
+      grub_error_pop ();
+      return NULL;
+    }
+  grub_error_pop ();
+
+  /* A window size greater than 1gib is invalid. */
+  if (ret > 1024 * 1024 * 1024)
+    {
+      grub_printf ("TCP window size must be <= 1gib.\n");
+      return NULL;
+    }
+  grub_net_tcp_window_size = grub_strdup (val);
+  tcp_window_size = ret;
+  tcp_window_scale = 0;
+
+  /* The window size is only 16 bits long, so we have to scale it down to fit in
+     the header and calculate the scale along the way. */
+  while (tcp_window_size > 65535)
+    {
+      tcp_window_size >>= 1;
+      tcp_window_scale += 1;
+    }
+
+  return grub_net_tcp_window_size;
+}
+
+/* We set the default window size to 1mib. */
+#define DEFAULT_TCP_WINDOW_SIZE "1048576"
+
+void
+grub_net_tcp_init (void)
+{
+  grub_register_variable_hook ("net_tcp_window_size", window_get_env,
+			       window_set_env);
+  grub_env_export ("net_tcp_window_size");
+  grub_env_set ("net_tcp_window_size", DEFAULT_TCP_WINDOW_SIZE);
+}
diff --git a/include/grub/net.h b/include/grub/net.h
index 4571b72..fa3d286 100644
--- a/include/grub/net.h
+++ b/include/grub/net.h
@@ -551,6 +551,8 @@ grub_net_add_dns_server (const struct grub_net_network_level_address *s);
 void
 grub_net_remove_dns_server (const struct grub_net_network_level_address *s);
 
+void
+grub_net_tcp_init (void);
 
 extern char *grub_net_default_server;
 
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 05/14] net: don't free uninitialized sockets in dns
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
                   ` (3 preceding siblings ...)
  2016-02-10 21:20 ` [PATCH 04/14] tcp: add window scaling and RTTM support Josef Bacik
@ 2016-02-10 21:20 ` Josef Bacik
  2016-02-13 17:59   ` Andrei Borzenkov
  2016-02-10 21:21 ` [PATCH 06/14] net: fix ipv6 routing Josef Bacik
                   ` (8 subsequent siblings)
  13 siblings, 1 reply; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:20 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Josef Bacik

If we cannot open a connection to our dns server we will have NULL sockets in
our array, so don't do the cleanup on any sockets that didn't get created.

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 grub-core/net/dns.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/grub-core/net/dns.c b/grub-core/net/dns.c
index 89741dd..82a3307 100644
--- a/grub-core/net/dns.c
+++ b/grub-core/net/dns.c
@@ -598,7 +598,10 @@ grub_net_dns_lookup (const char *name,
   grub_free (data.name);
   grub_netbuff_free (nb);
   for (j = 0; j < send_servers; j++)
-    grub_net_udp_close (sockets[j]);
+    {
+      if (sockets[j])
+	grub_net_udp_close (sockets[j]);
+    }
   
   grub_free (sockets);
 
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 06/14] net: fix ipv6 routing
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
                   ` (4 preceding siblings ...)
  2016-02-10 21:20 ` [PATCH 05/14] net: don't free uninitialized sockets in dns Josef Bacik
@ 2016-02-10 21:21 ` Josef Bacik
  2016-02-25 19:39   ` Andrei Borzenkov
  2016-02-10 21:21 ` [PATCH 07/14] efinet: retransmit if our device is busy Josef Bacik
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:21 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Josef Bacik

ipv6 routing in grub2 is broken, we cannot talk to anything outside our local
network or anything that doesn't route in our global namespace.  This patch
fixes this by doing a couple of things

1) Read the router information off of the router advertisement.  If we have a
router lifetime we need to take the source address and create a route from it.

2) Changes the routing stuff slightly to allow you to specify a gateway _and_ an
interface.  Since the router advertisements come in on the link local address we
need to associate it with the global address on the card.  So when we are
processing the router advertisement, either use the SLAAC interface we create
and add the route to that interface, or loop through the global addresses we
currently have on our interface and associate it with one of those addresses.
We need to have a special case here for the default route so that it gets used,
we do this by setting the masksize to 0 to mean it encompasses all networks.
The routing code will automatically select the best route so if there is a
closer match we will use that.

With this patch I can now talk to ipv6 addresses outside of my local network.
Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 grub-core/net/bootp.c                  |  2 +-
 grub-core/net/drivers/ieee1275/ofnet.c |  4 +--
 grub-core/net/icmp6.c                  | 63 +++++++++++++++++++++++++++++++++-
 grub-core/net/net.c                    | 40 ++++++++-------------
 include/grub/net.h                     | 25 +++++++++++++-
 5 files changed, 103 insertions(+), 31 deletions(-)

diff --git a/grub-core/net/bootp.c b/grub-core/net/bootp.c
index 37d1cfa..9fc47bd 100644
--- a/grub-core/net/bootp.c
+++ b/grub-core/net/bootp.c
@@ -83,7 +83,7 @@ parse_dhcp_vendor (const char *name, const void *vend, int limit, int *mask)
 	      grub_memcpy (&gw.ipv4, ptr, sizeof (gw.ipv4));
 	      rname = grub_xasprintf ("%s:default", name);
 	      if (rname)
-		grub_net_add_route_gw (rname, target, gw);
+		grub_net_add_route_gw (rname, target, gw, NULL);
 	      grub_free (rname);
 	    }
 	  break;
diff --git a/grub-core/net/drivers/ieee1275/ofnet.c b/grub-core/net/drivers/ieee1275/ofnet.c
index eff9085..6bd3b92 100644
--- a/grub-core/net/drivers/ieee1275/ofnet.c
+++ b/grub-core/net/drivers/ieee1275/ofnet.c
@@ -151,7 +151,7 @@ grub_ieee1275_parse_bootpath (const char *devpath, char *bootpath,
   grub_net_network_level_address_t client_addr, gateway_addr, subnet_mask;
   grub_net_link_level_address_t hw_addr;
   grub_net_interface_flags_t flags = 0;
-  struct grub_net_network_level_interface *inter;
+  struct grub_net_network_level_interface *inter = NULL;
 
   hw_addr.type = GRUB_NET_LINK_LEVEL_PROTOCOL_ETHERNET;
 
@@ -221,7 +221,7 @@ grub_ieee1275_parse_bootpath (const char *devpath, char *bootpath,
       target.ipv4.masksize = 0;
       rname = grub_xasprintf ("%s:default", ((*card)->name));
       if (rname)
-        grub_net_add_route_gw (rname, target, gateway_addr);
+        grub_net_add_route_gw (rname, target, gateway_addr, inter);
       else
         return grub_errno;
     }
diff --git a/grub-core/net/icmp6.c b/grub-core/net/icmp6.c
index 7953e68..2cbd95d 100644
--- a/grub-core/net/icmp6.c
+++ b/grub-core/net/icmp6.c
@@ -115,6 +115,7 @@ grub_net_recv_icmp6_packet (struct grub_net_buff *nb,
 			    grub_uint8_t ttl)
 {
   struct icmp_header *icmph;
+  struct grub_net_network_level_interface *orig_inf = inf;
   grub_err_t err;
   grub_uint16_t checksum;
 
@@ -345,14 +346,31 @@ grub_net_recv_icmp6_packet (struct grub_net_buff *nb,
       {
 	grub_uint8_t *ptr;
 	struct option_header *ohdr;
+	struct router_adv *radv;
+	struct grub_net_network_level_interface *route_inf = NULL;
+	int default_route = 0;
 	if (icmph->code)
 	  break;
+	radv = (struct router_adv *)nb->data;
 	err = grub_netbuff_pull (nb, sizeof (struct router_adv));
 	if (err)
 	  {
 	    grub_netbuff_free (nb);
 	    return err;
 	  }
+	if (grub_be_to_cpu16 (radv->router_lifetime) > 0)
+	  {
+	    struct grub_net_route *route;
+
+	    FOR_NET_ROUTES (route)
+	    {
+	      if (!grub_memcmp (&route->gw, source, sizeof (route->gw)))
+		break;
+	    }
+	    if (route == NULL)
+	      default_route = 1;
+	  }
+
 	for (ptr = (grub_uint8_t *) nb->data; ptr < nb->tail;
 	     ptr += ohdr->len * 8)
 	  {
@@ -413,7 +431,11 @@ grub_net_recv_icmp6_packet (struct grub_net_buff *nb,
 		    /* Update lease time if needed here once we have
 		       lease times.  */
 		    if (inf)
-		      continue;
+		      {
+			if (!route_inf)
+			  route_inf = inf;
+			continue;
+		      }
 
 		    grub_dprintf ("net", "creating slaac\n");
 
@@ -429,12 +451,51 @@ grub_net_recv_icmp6_packet (struct grub_net_buff *nb,
 		      inf = grub_net_add_addr (name, 
 					       card, &addr,
 					       &slaac->address, 0);
+		      if (!route_inf)
+			route_inf = inf;
 		      grub_net_add_route (name, netaddr, inf);
 		      grub_free (name);
 		    }
 		  }
 	      }
 	  }
+	if (default_route)
+	  {
+	    char *name;
+	    grub_net_network_level_netaddress_t netaddr;
+	    name = grub_xasprintf ("%s:ra:default6", card->name);
+	    if (!name)
+	      {
+		grub_errno = GRUB_ERR_NONE;
+		goto next;
+	      }
+	    /* Default routes take alll of the traffic, so make the mask huge */
+	    netaddr.type = GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6;
+	    netaddr.ipv6.masksize = 0;
+	    netaddr.ipv6.base[0] = 0;
+	    netaddr.ipv6.base[1] = 0;
+
+	    /* May not have gotten slaac info, find a global address on this
+	      card.  */
+	    if (route_inf == NULL)
+	      {
+		FOR_NET_NETWORK_LEVEL_INTERFACES (inf)
+		{
+		  if (inf->card == card && inf != orig_inf
+		      && inf->address.type == GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6
+		      && grub_net_hwaddr_cmp(&inf->hwaddress,
+					     &orig_inf->hwaddress) == 0)
+		    {
+		      route_inf = inf;
+		      break;
+		    }
+		}
+	      }
+	    if (route_inf != NULL)
+	      grub_net_add_route_gw (name, netaddr, *source, route_inf);
+	    grub_free (name);
+	  }
+next:
 	if (ptr != nb->tail)
 	  break;
       }
diff --git a/grub-core/net/net.c b/grub-core/net/net.c
index 16bc4e7..f49631f 100644
--- a/grub-core/net/net.c
+++ b/grub-core/net/net.c
@@ -37,21 +37,6 @@ GRUB_MOD_LICENSE ("GPLv3+");
 
 char *grub_net_default_server;
 
-struct grub_net_route
-{
-  struct grub_net_route *next;
-  struct grub_net_route **prev;
-  grub_net_network_level_netaddress_t target;
-  char *name;
-  struct grub_net_network_level_protocol *prot;
-  int is_gateway;
-  union
-  {
-    struct grub_net_network_level_interface *interface;
-    grub_net_network_level_address_t gw;
-  };
-};
-
 struct grub_net_route *grub_net_routes = NULL;
 struct grub_net_network_level_interface *grub_net_network_level_interfaces = NULL;
 struct grub_net_card *grub_net_cards = NULL;
@@ -410,14 +395,6 @@ grub_cmd_ipv6_autoconf (struct grub_command *cmd __attribute__ ((unused)),
   return err;
 }
 
-static inline void
-grub_net_route_register (struct grub_net_route *route)
-{
-  grub_list_push (GRUB_AS_LIST_P (&grub_net_routes),
-		  GRUB_AS_LIST (route));
-}
-
-#define FOR_NET_ROUTES(var) for (var = grub_net_routes; var; var = var->next)
 
 static int
 parse_ip (const char *val, grub_uint32_t *ip, const char **rest)
@@ -524,6 +501,8 @@ match_net (const grub_net_network_level_netaddress_t *net,
     case GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6:
       {
 	grub_uint64_t mask[2];
+	if (net->ipv6.masksize == 0)
+	  return 1;
 	if (net->ipv6.masksize <= 64)
 	  {
 	    mask[0] = 0xffffffffffffffffULL << (64 - net->ipv6.masksize);
@@ -687,7 +666,14 @@ grub_net_route_address (grub_net_network_level_address_t addr,
 	  return GRUB_ERR_NONE;
 	}
       if (depth == 0)
-	*gateway = bestroute->gw;
+	{
+	  *gateway = bestroute->gw;
+	  if (bestroute->interface != NULL)
+	    {
+	      *interf = bestroute->interface;
+	      return GRUB_ERR_NONE;
+	    }
+	}
       curtarget = bestroute->gw;
     }
 
@@ -1109,7 +1095,8 @@ grub_net_add_route (const char *name,
 grub_err_t
 grub_net_add_route_gw (const char *name,
 		       grub_net_network_level_netaddress_t target,
-		       grub_net_network_level_address_t gw)
+		       grub_net_network_level_address_t gw,
+		       struct grub_net_network_level_interface *inter)
 {
   struct grub_net_route *route;
 
@@ -1127,6 +1114,7 @@ grub_net_add_route_gw (const char *name,
   route->target = target;
   route->is_gateway = 1;
   route->gw = gw;
+  route->interface = inter;
 
   grub_net_route_register (route);
 
@@ -1152,7 +1140,7 @@ grub_cmd_addroute (struct grub_command *cmd __attribute__ ((unused)),
       err = grub_net_resolve_address (args[3], &gw);
       if (err)
 	return err;
-      return grub_net_add_route_gw (args[0], target, gw);
+      return grub_net_add_route_gw (args[0], target, gw, NULL);
     }
   else
     {
diff --git a/include/grub/net.h b/include/grub/net.h
index fa3d286..b5d4546 100644
--- a/include/grub/net.h
+++ b/include/grub/net.h
@@ -192,6 +192,18 @@ typedef struct grub_net_network_level_netaddress
   };
 } grub_net_network_level_netaddress_t;
 
+struct grub_net_route
+{
+  struct grub_net_route *next;
+  struct grub_net_route **prev;
+  grub_net_network_level_netaddress_t target;
+  char *name;
+  struct grub_net_network_level_protocol *prot;
+  int is_gateway;
+  struct grub_net_network_level_interface *interface;
+  grub_net_network_level_address_t gw;
+};
+
 #define FOR_PACKETS(cont,var) for (var = (cont).first; var; var = var->next)
 
 static inline grub_err_t
@@ -368,6 +380,16 @@ grub_net_card_unregister (struct grub_net_card *card);
 #define FOR_NET_CARDS_SAFE(var, next) for (var = grub_net_cards, next = (var ? var->next : 0); var; var = next, next = (var ? var->next : 0))
 
 
+extern struct grub_net_route *grub_net_routes;
+
+static inline void
+grub_net_route_register (struct grub_net_route *route)
+{
+  grub_list_push (GRUB_AS_LIST_P (&grub_net_routes),
+		  GRUB_AS_LIST (route));
+}
+
+#define FOR_NET_ROUTES(var) for (var = grub_net_routes; var; var = var->next)
 struct grub_net_session *
 grub_net_open_tcp (char *address, grub_uint16_t port);
 
@@ -393,7 +415,8 @@ grub_net_add_route (const char *name,
 grub_err_t
 grub_net_add_route_gw (const char *name,
 		       grub_net_network_level_netaddress_t target,
-		       grub_net_network_level_address_t gw);
+		       grub_net_network_level_address_t gw,
+		       struct grub_net_network_level_interface *inter);
 
 
 #define GRUB_NET_BOOTP_MAC_ADDR_LEN	16
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 07/14] efinet: retransmit if our device is busy
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
                   ` (5 preceding siblings ...)
  2016-02-10 21:21 ` [PATCH 06/14] net: fix ipv6 routing Josef Bacik
@ 2016-02-10 21:21 ` Josef Bacik
  2016-02-10 21:21 ` [PATCH 08/14] efinet: filter multicast traffic based on addresses Josef Bacik
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:21 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Josef Bacik

When I fixed the txbuf handling I ripped out the retransmission code since it
was flooding our network when we had the buggy behavior.  Turns out this was too
heavy handed as we can still have transient tx timeouts.  So instead make sure
we retry our transmission once per timeout.  This way we can deal with transient
transmission problems without flooding the box.  This fixes an issue we were
seeing in production.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 grub-core/net/drivers/efi/efinet.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/grub-core/net/drivers/efi/efinet.c b/grub-core/net/drivers/efi/efinet.c
index 692d5ad..c8f80a1 100644
--- a/grub-core/net/drivers/efi/efinet.c
+++ b/grub-core/net/drivers/efi/efinet.c
@@ -38,6 +38,7 @@ send_card_buffer (struct grub_net_card *dev,
   grub_efi_simple_network_t *net = dev->efi_net;
   grub_uint64_t limit_time = grub_get_time_ms () + 4000;
   void *txbuf;
+  int retry = 0;
 
   if (dev->txbusy)
     while (1)
@@ -60,6 +61,15 @@ send_card_buffer (struct grub_net_card *dev,
 	    dev->txbusy = 0;
 	    break;
 	  }
+	if (!retry)
+	  {
+	    st = efi_call_7 (net->transmit, net, 0, dev->last_pkt_size,
+			     dev->txbuf, NULL, NULL, NULL);
+	    if (st != GRUB_EFI_SUCCESS)
+	      return grub_error (GRUB_ERR_IO,
+				 N_("couldn't send network packet"));
+	    retry = 1;
+	  }
 	if (limit_time < grub_get_time_ms ())
 	  return grub_error (GRUB_ERR_TIMEOUT,
 			     N_("couldn't send network packet"));
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 08/14] efinet: filter multicast traffic based on addresses
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
                   ` (6 preceding siblings ...)
  2016-02-10 21:21 ` [PATCH 07/14] efinet: retransmit if our device is busy Josef Bacik
@ 2016-02-10 21:21 ` Josef Bacik
  2016-02-10 21:21 ` [PATCH 09/14] efinet: clear the txbuffer before modifying the receive filters Josef Bacik
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:21 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Josef Bacik

We have some hardware that claims to support PROMISCUOUS_MULTICAST but doesn't
actually work.  Instead utilize the multicast filters and specifically enable
the multicast traffic we care about.  In reality we only care about ipv6
multicast traffic but enable ipv4 multicast as well just in case.  Whenever we
add a new address to the card we calculate the solicited node multicast address
to the multicast filter.  With this patch my broken hardware is still broken but
functional.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 grub-core/net/drivers/efi/efinet.c | 84 ++++++++++++++++++++++++++++++++++----
 grub-core/net/net.c                |  2 +
 include/grub/net.h                 | 54 ++++++++++++------------
 3 files changed, 105 insertions(+), 35 deletions(-)

diff --git a/grub-core/net/drivers/efi/efinet.c b/grub-core/net/drivers/efi/efinet.c
index c8f80a1..bbbadd2 100644
--- a/grub-core/net/drivers/efi/efinet.c
+++ b/grub-core/net/drivers/efi/efinet.c
@@ -23,6 +23,7 @@
 #include <grub/efi/api.h>
 #include <grub/efi/efi.h>
 #include <grub/i18n.h>
+#include <grub/net/ip.h>
 
 GRUB_MOD_LICENSE ("GPLv3+");
 
@@ -183,8 +184,9 @@ open_card (struct grub_net_card *dev)
 	 We need unicast and broadcast and additionaly all nodes and
 	 solicited multicast for IPv6. Solicited multicast is per-IPv6
 	 address and we currently do not have API to do it so simply
-	 try to enable receive of all multicast packets or evertyhing in
-	 the worst case (i386 PXE driver always enables promiscuous too).
+	 enable the all node addresses and the link local address.  We do this
+	 because some firmware has been found to not do promiscuous multicast
+	 mode properly.
 
 	 This does trust firmware to do what it claims to do.
        */
@@ -192,14 +194,25 @@ open_card (struct grub_net_card *dev)
 	{
 	  grub_uint32_t filters = GRUB_EFI_SIMPLE_NETWORK_RECEIVE_UNICAST   |
 				  GRUB_EFI_SIMPLE_NETWORK_RECEIVE_BROADCAST |
-				  GRUB_EFI_SIMPLE_NETWORK_RECEIVE_PROMISCUOUS_MULTICAST;
+				  GRUB_EFI_SIMPLE_NETWORK_RECEIVE_MULTICAST;
+	  grub_efi_status_t st;
+	  grub_efi_mac_address_t mac_filter[2] = {
+		  { 0x1, 0, 0x5e, 0, 0, 1, },
+		  { 0x33, 0x33, 0, 0, 0, 1, },};
 
 	  filters &= net->mode->receive_filter_mask;
-	  if (!(filters & GRUB_EFI_SIMPLE_NETWORK_RECEIVE_PROMISCUOUS_MULTICAST))
-	    filters |= (net->mode->receive_filter_mask &
-			GRUB_EFI_SIMPLE_NETWORK_RECEIVE_PROMISCUOUS);
-
-	  efi_call_6 (net->receive_filters, net, filters, 0, 0, 0, NULL);
+	  if (net->mode->max_mcast_filter_count < 2)
+	    filters &= ~GRUB_EFI_SIMPLE_NETWORK_RECEIVE_MULTICAST;
+
+	  if (filters & GRUB_EFI_SIMPLE_NETWORK_RECEIVE_MULTICAST)
+	    st = efi_call_6 (net->receive_filters, net, filters, 0, 0, 2,
+			     mac_filter);
+	  else
+	    st = efi_call_6 (net->receive_filters, net, filters, 0, 0, 0,
+			     NULL);
+	  if (st != GRUB_EFI_SUCCESS)
+	    grub_dprintf("efinet", "failed to set receive filters %u, %u\n",
+			 (unsigned)filters, (unsigned)st);
 	}
 
       efi_call_4 (grub_efi_system_table->boot_services->close_protocol,
@@ -212,6 +225,58 @@ open_card (struct grub_net_card *dev)
   return GRUB_ERR_NONE;
 }
 
+/* We only need the lower 24 bits of the address, so just take the bottom part
+   of the address and convert it over.
+ */
+static void
+solicited_node_mcast_addr_to_mac (grub_uint64_t addr,
+				  grub_efi_mac_address_t mac)
+{
+  grub_uint64_t cpu_addr = grub_be_to_cpu64(addr);
+  int i, c = 0;
+
+  /* The format is 33:33:xx:xx:xx:xx, where xx is the last 32 bits of the
+     multicast address.
+
+     The solicited node mcast addr is in the format ff02:0:0:0:0:1:ffxx:xxxx,
+     where xx is the last 24 bits of the ipv6 address.
+   */
+  mac[0] = 0x33;
+  mac[1] = 0x33;
+  mac[2] = 0xff;
+  for (i = 3; i < 6; i++, c++)
+    mac[i] = (grub_uint8_t)((cpu_addr >> (16 - 8 * c)) & 0xff);
+}
+
+static void
+add_addr (struct grub_net_card *dev,
+	  const grub_net_network_level_address_t *address)
+{
+  grub_efi_simple_network_t *net = dev->efi_net;
+  grub_efi_mac_address_t mac_filters[16];
+  grub_efi_status_t st;
+  unsigned slot = net->mode->mcast_filter_count;
+
+  /* We don't need to add anything for ipv4 addresses. */
+  if (address->type != GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6)
+    return;
+
+  if ((slot >= net->mode->max_mcast_filter_count)
+      || !(GRUB_EFI_SIMPLE_NETWORK_RECEIVE_MULTICAST &
+	   net->mode->receive_filter_mask))
+    return;
+
+  grub_memcpy(mac_filters, net->mode->mcast_filter,
+	      sizeof (grub_efi_mac_address_t) * slot);
+  solicited_node_mcast_addr_to_mac (address->ipv6[1], mac_filters[slot++]);
+  st = efi_call_6 (net->receive_filters, net,
+		   GRUB_EFI_SIMPLE_NETWORK_RECEIVE_MULTICAST, 0, 0, slot,
+		   mac_filters);
+  if (st != GRUB_EFI_SUCCESS)
+    grub_dprintf("efinet", "failed to add new receive filter %u\n",
+		 (unsigned)st);
+}
+
 static void
 close_card (struct grub_net_card *dev)
 {
@@ -228,7 +293,8 @@ static struct grub_net_card_driver efidriver =
     .open = open_card,
     .close = close_card,
     .send = send_card_buffer,
-    .recv = get_card_packet
+    .recv = get_card_packet,
+    .add_addr = add_addr,
   };
 
 grub_efi_handle_t
diff --git a/grub-core/net/net.c b/grub-core/net/net.c
index f49631f..e04a35b 100644
--- a/grub-core/net/net.c
+++ b/grub-core/net/net.c
@@ -252,6 +252,8 @@ grub_net_add_addr_real (char *name,
   inter->dhcp_ack = NULL;
   inter->dhcp_acklen = 0;
 
+  if (card->driver->add_addr)
+    card->driver->add_addr(card, addr);
   grub_net_network_level_interface_register (inter);
 
   return inter;
diff --git a/include/grub/net.h b/include/grub/net.h
index b5d4546..080a2d9 100644
--- a/include/grub/net.h
+++ b/include/grub/net.h
@@ -67,6 +67,32 @@ typedef enum grub_net_card_flags
     GRUB_NET_CARD_NO_MANUAL_INTERFACES = 2
   } grub_net_card_flags_t;
 
+typedef enum grub_network_level_protocol_id 
+{
+  GRUB_NET_NETWORK_LEVEL_PROTOCOL_DHCP_RECV,
+  GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV4,
+  GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6
+} grub_network_level_protocol_id_t;
+
+typedef enum
+{
+  DNS_OPTION_IPV4,
+  DNS_OPTION_IPV6,
+  DNS_OPTION_PREFER_IPV4,
+  DNS_OPTION_PREFER_IPV6
+} grub_dns_option_t;
+
+typedef struct grub_net_network_level_address
+{
+  grub_network_level_protocol_id_t type;
+  union
+  {
+    grub_uint32_t ipv4;
+    grub_uint64_t ipv6[2];
+  };
+  grub_dns_option_t option;
+} grub_net_network_level_address_t;
+
 struct grub_net_card;
 
 struct grub_net_card_driver
@@ -79,6 +105,8 @@ struct grub_net_card_driver
   grub_err_t (*send) (struct grub_net_card *dev,
 		      struct grub_net_buff *buf);
   struct grub_net_buff * (*recv) (struct grub_net_card *dev);
+  void (*add_addr) (struct grub_net_card *dev,
+		    const grub_net_network_level_address_t *address);
 };
 
 typedef struct grub_net_packet
@@ -150,32 +178,6 @@ struct grub_net_card
 
 struct grub_net_network_level_interface;
 
-typedef enum grub_network_level_protocol_id 
-{
-  GRUB_NET_NETWORK_LEVEL_PROTOCOL_DHCP_RECV,
-  GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV4,
-  GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6
-} grub_network_level_protocol_id_t;
-
-typedef enum
-{
-  DNS_OPTION_IPV4,
-  DNS_OPTION_IPV6,
-  DNS_OPTION_PREFER_IPV4,
-  DNS_OPTION_PREFER_IPV6
-} grub_dns_option_t;
-
-typedef struct grub_net_network_level_address
-{
-  grub_network_level_protocol_id_t type;
-  union
-  {
-    grub_uint32_t ipv4;
-    grub_uint64_t ipv6[2];
-  };
-  grub_dns_option_t option;
-} grub_net_network_level_address_t;
-
 typedef struct grub_net_network_level_netaddress
 {
   grub_network_level_protocol_id_t type;
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 09/14] efinet: clear the txbuffer before modifying the receive filters
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
                   ` (7 preceding siblings ...)
  2016-02-10 21:21 ` [PATCH 08/14] efinet: filter multicast traffic based on addresses Josef Bacik
@ 2016-02-10 21:21 ` Josef Bacik
  2016-02-10 21:21 ` [PATCH 10/14] dns: poll card between each dns request Josef Bacik
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:21 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Josef Bacik

We had some hardware that would hang when trying to modify the receive filters
if there was anything in the tx queue.  So move the common logic out of
send_card_buffer into a new function called clear_txbuffer and then call that
from send_card_buffer and add_addr.  With this patch the buggy firmware now
properly updates the multicast receive filters.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 grub-core/net/drivers/efi/efinet.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/grub-core/net/drivers/efi/efinet.c b/grub-core/net/drivers/efi/efinet.c
index bbbadd2..c1ee18a 100644
--- a/grub-core/net/drivers/efi/efinet.c
+++ b/grub-core/net/drivers/efi/efinet.c
@@ -32,8 +32,7 @@ static grub_efi_guid_t net_io_guid = GRUB_EFI_SIMPLE_NETWORK_GUID;
 static grub_efi_guid_t pxe_io_guid = GRUB_EFI_PXE_GUID;
 
 static grub_err_t
-send_card_buffer (struct grub_net_card *dev,
-		  struct grub_net_buff *pack)
+clear_txbuffer (struct grub_net_card *dev)
 {
   grub_efi_status_t st;
   grub_efi_simple_network_t *net = dev->efi_net;
@@ -75,6 +74,21 @@ send_card_buffer (struct grub_net_card *dev,
 	  return grub_error (GRUB_ERR_TIMEOUT,
 			     N_("couldn't send network packet"));
       }
+  return GRUB_ERR_NONE;
+}
+
+static grub_err_t
+send_card_buffer (struct grub_net_card *dev,
+		  struct grub_net_buff *pack)
+{
+  grub_efi_status_t st;
+  grub_efi_simple_network_t *net = dev->efi_net;
+  grub_err_t ret;
+  void *txbuf;
+
+  ret = clear_txbuffer (dev);
+  if (ret != GRUB_ERR_NONE)
+    return ret;
 
   dev->last_pkt_size = (pack->tail - pack->data);
   if (dev->last_pkt_size > dev->mtu)
@@ -254,6 +268,7 @@ add_addr (struct grub_net_card *dev,
 {
   grub_efi_simple_network_t *net = dev->efi_net;
   grub_efi_mac_address_t mac_filters[16];
+  grub_uint32_t current_settings = net->mode->receive_filter_setting;
   grub_efi_status_t st;
   unsigned slot = net->mode->mcast_filter_count;
 
@@ -266,11 +281,18 @@ add_addr (struct grub_net_card *dev,
 	   net->mode->receive_filter_mask))
     return;
 
+  /* Copy the existing filters and add the new filter. */
   grub_memcpy(mac_filters, net->mode->mcast_filter,
 	      sizeof (grub_efi_mac_address_t) * slot);
   solicited_node_mcast_addr_to_mac (address->ipv6[1], mac_filters[slot++]);
-  st = efi_call_6 (net->receive_filters, net,
-		   GRUB_EFI_SIMPLE_NETWORK_RECEIVE_MULTICAST, 0, 0, slot,
+
+  /* Some firmware will hang if we try to modify the receive filters while the
+     tx buffer still has something in the queue, so clear it before resetting
+     the filters. */
+  if (clear_txbuffer (dev) != GRUB_ERR_NONE)
+    grub_dprintf("efinet", "couldn't clear the txbuffer.\n");
+
+  st = efi_call_6 (net->receive_filters, net, current_settings, 0, 0, slot,
 		   mac_filters);
   if (st != GRUB_EFI_SUCCESS)
     grub_dprintf("efinet", "failed to add new receive filter %u\n",
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 10/14] dns: poll card between each dns request
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
                   ` (8 preceding siblings ...)
  2016-02-10 21:21 ` [PATCH 09/14] efinet: clear the txbuffer before modifying the receive filters Josef Bacik
@ 2016-02-10 21:21 ` Josef Bacik
  2016-02-15  6:45   ` Andrei Borzenkov
  2016-02-10 21:21 ` [PATCH 11/14] dns: reset data->naddresses for every packet we receive Josef Bacik
                   ` (3 subsequent siblings)
  13 siblings, 1 reply; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:21 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Josef Bacik

If we have dns servers that we prefer to get AAAA records from we'll send a
packet and immediately check data.naddresses to see if we got a response.  If we
didn't we'll then send a request for an A record, and _then_ we'll poll the
card.  So if the DNS server doesn't respond between us sending the packet and
checking data.naddresses we'll send a request for the A record and then poll the
card.  Instead we need to make sure we poll after we issue each request to make
sure we give the server enough time to respond to our initial request.

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 grub-core/net/dns.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/grub-core/net/dns.c b/grub-core/net/dns.c
index 82a3307..86e609b 100644
--- a/grub-core/net/dns.c
+++ b/grub-core/net/dns.c
@@ -587,12 +587,12 @@ grub_net_dns_lookup (const char *name,
                   grub_errno = GRUB_ERR_NONE;
                   err = err2;
                 }
+	      grub_net_poll_cards (200, &data.stop);
               if (*data.naddresses)
                 goto out;
             }
           while (t == 1);
 	}
-      grub_net_poll_cards (200, &data.stop);
     }
  out:
   grub_free (data.name);
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 11/14] dns: reset data->naddresses for every packet we receive
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
                   ` (9 preceding siblings ...)
  2016-02-10 21:21 ` [PATCH 10/14] dns: poll card between each dns request Josef Bacik
@ 2016-02-10 21:21 ` Josef Bacik
  2016-02-13 16:05   ` Andrei Borzenkov
  2016-02-10 21:21 ` [PATCH 12/14] icmp6: use default interface as the route interface Josef Bacik
                   ` (2 subsequent siblings)
  13 siblings, 1 reply; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:21 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Josef Bacik

I noticed when debugging a problem that we'd corrupt memory if our dns server
didn't respond fast enough and we ended up asking for both an AAAA and A record
for a server.  The problem is we alloc data->addresses based on the number of
addresses in the packet, but we populate it based on data->naddresses.  So we
get the AAAA record with one address, and we add that, then we get the A record
with one address and now data->naddresses == 1 but the ancount is 1, so we
allocate data->addresses to hold one address but write the new address outside
the array.  We also leak the old addresses memory.  So fix this by noticing if
we already have an address and free the old memory and reset naddresses so we
don't overflow our new array.

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 grub-core/net/dns.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/grub-core/net/dns.c b/grub-core/net/dns.c
index 86e609b..7a6c4b4 100644
--- a/grub-core/net/dns.c
+++ b/grub-core/net/dns.c
@@ -276,6 +276,9 @@ recv_hook (grub_net_udp_socket_t sock __attribute__ ((unused)),
       ptr++;
       ptr += 4;
     }
+  if (*data->naddresses)
+    grub_free (*data->addresses);
+  *data->naddresses = 0;
   *data->addresses = grub_malloc (sizeof ((*data->addresses)[0])
 				 * grub_be_to_cpu16 (head->ancount));
   if (!*data->addresses)
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 12/14] icmp6: use default interface as the route interface
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
                   ` (10 preceding siblings ...)
  2016-02-10 21:21 ` [PATCH 11/14] dns: reset data->naddresses for every packet we receive Josef Bacik
@ 2016-02-10 21:21 ` Josef Bacik
  2016-02-10 21:21 ` [PATCH 13/14] bootp: don't add multiple interfaces for the same address Josef Bacik
  2016-02-10 21:21 ` [PATCH 14/14] net: add interfaces when we open a card Josef Bacik
  13 siblings, 0 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:21 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Josef Bacik

As it stands now if we do dhcp and slaac we'll use the slaac interface for any
routes advertised on the local network as those will be the first interfaces we
find.  However if we have a default interface, ie the one set up from the pxe
dhcp packet, we would prefer to use this one.  So when getting RA's, check to
see if our default interface is a v6 interface and matches the card we got the
RA on, and if so use that as the route interface.  This allows us to get traffic
from the dhcp interface instead of through the slaac interface.

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 grub-core/net/icmp6.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/grub-core/net/icmp6.c b/grub-core/net/icmp6.c
index 2cbd95d..f02e9cd 100644
--- a/grub-core/net/icmp6.c
+++ b/grub-core/net/icmp6.c
@@ -19,6 +19,7 @@
 #include <grub/net.h>
 #include <grub/net/ip.h>
 #include <grub/net/netbuff.h>
+#include <grub/env.h>
 
 struct icmp_header
 {
@@ -361,6 +362,7 @@ grub_net_recv_icmp6_packet (struct grub_net_buff *nb,
 	if (grub_be_to_cpu16 (radv->router_lifetime) > 0)
 	  {
 	    struct grub_net_route *route;
+	    const char *default_inf = grub_env_get ("net_default_interface");
 
 	    FOR_NET_ROUTES (route)
 	    {
@@ -369,6 +371,24 @@ grub_net_recv_icmp6_packet (struct grub_net_buff *nb,
 	    }
 	    if (route == NULL)
 	      default_route = 1;
+
+	    /* If we have a default interface and it's on the same card as we're
+	       getting this advertisement on then we want to make sure we use
+	       that interface as the route interface. */
+	    if (default_inf)
+	      {
+		FOR_NET_NETWORK_LEVEL_INTERFACES (inf)
+		{
+		  if (grub_strcmp(default_inf, inf->name))
+		    continue;
+		  if (inf->card == card && inf != orig_inf
+		      && inf->address.type == GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6
+		      && grub_net_hwaddr_cmp(&inf->hwaddress,
+					     &orig_inf->hwaddress) == 0)
+		    route_inf = inf;
+		  break;
+		}
+	      }
 	  }
 
 	for (ptr = (grub_uint8_t *) nb->data; ptr < nb->tail;
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 13/14] bootp: don't add multiple interfaces for the same address
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
                   ` (11 preceding siblings ...)
  2016-02-10 21:21 ` [PATCH 12/14] icmp6: use default interface as the route interface Josef Bacik
@ 2016-02-10 21:21 ` Josef Bacik
  2016-02-10 21:21 ` [PATCH 14/14] net: add interfaces when we open a card Josef Bacik
  13 siblings, 0 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:21 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Josef Bacik

Since we can sometimes configure an interface from the dhcp packet that is still
in the pxe configuration any subsequent dhcp requests could add duplicate
interfaces for the same address.  Fix this by checking to see if we already have
an interface configured for the address we got from the server and return.  Also
make these functions void as nobody uses the return value.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 grub-core/net/bootp.c | 29 ++++++++++++++++++-----------
 include/grub/net.h    |  4 ++--
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/grub-core/net/bootp.c b/grub-core/net/bootp.c
index 9fc47bd..345ad7a 100644
--- a/grub-core/net/bootp.c
+++ b/grub-core/net/bootp.c
@@ -131,7 +131,7 @@ parse_dhcp_vendor (const char *name, const void *vend, int limit, int *mask)
 
 #define OFFSET_OF(x, y) ((grub_size_t)((grub_uint8_t *)((y)->x) - (grub_uint8_t *)(y)))
 
-struct grub_net_network_level_interface *
+void
 grub_net_configure_by_dhcp_ack (const char *name,
 				struct grub_net_card *card,
 				grub_net_interface_flags_t flags,
@@ -157,6 +157,13 @@ grub_net_configure_by_dhcp_ack (const char *name,
 	       : sizeof (hwaddr.mac));
   hwaddr.type = GRUB_NET_LINK_LEVEL_PROTOCOL_ETHERNET;
 
+  FOR_NET_NETWORK_LEVEL_INTERFACES (inter)
+  {
+    if (inter->card == card &&
+	grub_net_addr_cmp (&inter->address, &addr) == 0)
+      return;
+  }
+
   inter = grub_net_add_addr (name, card, &addr, &hwaddr, flags);
 #if 0
   /* This is likely based on misunderstanding. gateway_ip refers to
@@ -260,8 +267,6 @@ grub_net_configure_by_dhcp_ack (const char *name,
     }
   else
     grub_errno = GRUB_ERR_NONE;
-
-  return inter;
 }
 
 struct grub_dhcpv6_option {
@@ -803,7 +808,7 @@ grub_net_configure_by_dhcpv6_adv (const struct grub_net_dhcpv6_packet *v6_adv,
 }
 
 
-struct grub_net_network_level_interface *
+void
 grub_net_configure_by_dhcpv6_reply (const char *name,
 	struct grub_net_card *card,
 	grub_net_interface_flags_t flags,
@@ -831,7 +836,7 @@ grub_net_configure_by_dhcpv6_reply (const char *name,
   if (v6->message_type != DHCPv6_REPLY)
     {
       grub_error (GRUB_ERR_IO, N_("DHCPv6 info not found"));
-      return NULL;
+      return;
     }
 
   your_ip = find_dhcpv6_address(v6);
@@ -839,7 +844,7 @@ grub_net_configure_by_dhcpv6_reply (const char *name,
   if (!your_ip)
     {
       grub_error (GRUB_ERR_IO, N_("DHCPv6 address not found"));
-      return NULL;
+      return;
     }
 
   get_dhcpv6_dns_address (v6, &dns, &num_dns);
@@ -867,6 +872,12 @@ grub_net_configure_by_dhcpv6_reply (const char *name,
   addr.type = GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6;
   addr.ipv6[0] = grub_get_unaligned64 (your_ip);
   addr.ipv6[1] = grub_get_unaligned64 (your_ip + 8);
+  FOR_NET_NETWORK_LEVEL_INTERFACES (inf)
+  {
+    if (inf->card == card
+	&& grub_net_addr_cmp (&inf->address, &addr) == 0)
+      return;
+  }
   inf = grub_net_add_addr (name, card, &addr, &card->default_address, flags);
 
   netaddr.type = GRUB_NET_NETWORK_LEVEL_PROTOCOL_IPV6;
@@ -889,7 +900,7 @@ grub_net_configure_by_dhcpv6_reply (const char *name,
     {
       *device = grub_xasprintf ("%s,%s", proto, server_ip);
       if (!*device)
-	return NULL;
+	return;
     }
 
   if (path && boot_file)
@@ -904,11 +915,7 @@ grub_net_configure_by_dhcpv6_reply (const char *name,
 	  else
 	    **path = 0;
 	}
-      else
-	return NULL;
     }
-
-  return inf;
 }
 
 void
diff --git a/include/grub/net.h b/include/grub/net.h
index 080a2d9..393ad3c 100644
--- a/include/grub/net.h
+++ b/include/grub/net.h
@@ -469,7 +469,7 @@ enum
     GRUB_NET_BOOTP_END = 0xff
   };
 
-struct grub_net_network_level_interface *
+void
 grub_net_configure_by_dhcp_ack (const char *name,
 				struct grub_net_card *card,
 				grub_net_interface_flags_t flags,
@@ -477,7 +477,7 @@ grub_net_configure_by_dhcp_ack (const char *name,
 				grub_size_t size,
 				int is_def, char **device, char **path);
 
-struct grub_net_network_level_interface *
+void
 grub_net_configure_by_dhcpv6_reply (const char *name,
 				    struct grub_net_card *card,
 				    grub_net_interface_flags_t flags,
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 14/14] net: add interfaces when we open a card
  2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
                   ` (12 preceding siblings ...)
  2016-02-10 21:21 ` [PATCH 13/14] bootp: don't add multiple interfaces for the same address Josef Bacik
@ 2016-02-10 21:21 ` Josef Bacik
  13 siblings, 0 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-10 21:21 UTC (permalink / raw)
  To: grub-devel, kernel-team; +Cc: Josef Bacik

Since we've started adding addresses to the multicast filter in efi we need to
make sure that any addresses that are added before we call ->open are added as
well.  This can happen when we configure interfaces from the dhcp packet left in
the pxe config.  Do this in a helper function and have the two people that call
->open use the helper function instead.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 grub-core/net/ethernet.c | 12 +++---------
 grub-core/net/net.c      | 43 +++++++++++++++++++++++++++++++------------
 include/grub/net.h       |  3 +++
 3 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/grub-core/net/ethernet.c b/grub-core/net/ethernet.c
index c397b1b..443ac7b 100644
--- a/grub-core/net/ethernet.c
+++ b/grub-core/net/ethernet.c
@@ -67,15 +67,9 @@ send_ethernet_packet (struct grub_net_network_level_interface *inf,
   grub_memcpy (eth->src, inf->hwaddress.mac, 6);
 
   eth->type = grub_cpu_to_be16 (ethertype);
-  if (!inf->card->opened)
-    {
-      err = GRUB_ERR_NONE;
-      if (inf->card->driver->open)
-	err = inf->card->driver->open (inf->card);
-      if (err)
-	return err;
-      inf->card->opened = 1;
-    }
+  err = net_open_card (inf->card);
+  if (err)
+    return err;
   return inf->card->driver->send (inf->card, nb);
 }
 
diff --git a/grub-core/net/net.c b/grub-core/net/net.c
index e04a35b..599a311 100644
--- a/grub-core/net/net.c
+++ b/grub-core/net/net.c
@@ -1460,24 +1460,43 @@ grub_net_fs_close (grub_file_t file)
   return GRUB_ERR_NONE;
 }
 
+grub_err_t
+net_open_card (struct grub_net_card *card)
+{
+  struct grub_net_network_level_interface *inf;
+  grub_err_t err = GRUB_ERR_NONE;
+
+  if (card->opened)
+    return err;
+
+  if (card->driver->open)
+    err = card->driver->open (card);
+  if (err)
+    return err;
+  card->opened = 1;
+
+  if (!card->driver->add_addr)
+    return err;
+
+  FOR_NET_NETWORK_LEVEL_INTERFACES (inf)
+  {
+    if (inf->card == card)
+      card->driver->add_addr(card, &inf->address);
+  }
+  return err;
+}
+
 static void
 receive_packets (struct grub_net_card *card, int *stop_condition)
 {
   int received = 0;
+  grub_err_t err;
+
   if (card->num_ifaces == 0)
     return;
-  if (!card->opened)
-    {
-      grub_err_t err = GRUB_ERR_NONE;
-      if (card->driver->open)
-	err = card->driver->open (card);
-      if (err)
-	{
-	  grub_errno = GRUB_ERR_NONE;
-	  return;
-	}
-      card->opened = 1;
-    }
+  err = net_open_card (card);
+  if (err)
+    return;
   while (received < 100)
     {
       /* Maybe should be better have a fixed number of packets for each card
diff --git a/include/grub/net.h b/include/grub/net.h
index 393ad3c..7e54f55 100644
--- a/include/grub/net.h
+++ b/include/grub/net.h
@@ -341,6 +341,9 @@ grub_net_add_addr (const char *name,
 		   const grub_net_link_level_address_t *hwaddress,
 		   grub_net_interface_flags_t flags);
 
+grub_err_t
+net_open_card (struct grub_net_card *card);
+
 extern struct grub_net_network_level_interface *grub_net_network_level_interfaces;
 #define FOR_NET_NETWORK_LEVEL_INTERFACES(var) for (var = grub_net_network_level_interfaces; var; var = var->next)
 #define FOR_NET_NETWORK_LEVEL_INTERFACES_SAFE(var,next) for (var = grub_net_network_level_interfaces, next = (var ? var->next : 0); var; var = next, next = (var ? var->next : 0))
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 11/14] dns: reset data->naddresses for every packet we receive
  2016-02-10 21:21 ` [PATCH 11/14] dns: reset data->naddresses for every packet we receive Josef Bacik
@ 2016-02-13 16:05   ` Andrei Borzenkov
  2016-02-16 16:18     ` Josef Bacik
  0 siblings, 1 reply; 24+ messages in thread
From: Andrei Borzenkov @ 2016-02-13 16:05 UTC (permalink / raw)
  To: The development of GNU GRUB, kernel-team; +Cc: Josef Bacik

11.02.2016 00:21, Josef Bacik пишет:
> I noticed when debugging a problem that we'd corrupt memory if our dns server
> didn't respond fast enough and we ended up asking for both an AAAA and A record
> for a server.  The problem is we alloc data->addresses based on the number of
> addresses in the packet, but we populate it based on data->naddresses.  So we
> get the AAAA record with one address, and we add that, then we get the A record
> with one address and now data->naddresses == 1 but the ancount is 1, so we
> allocate data->addresses to hold one address but write the new address outside
> the array.  We also leak the old addresses memory.  So fix this by noticing if
> we already have an address and free the old memory and reset naddresses so we
> don't overflow our new array.
> 
> Signed-off-by: Josef Bacik <jbacik@fb.com>
> ---
>  grub-core/net/dns.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/grub-core/net/dns.c b/grub-core/net/dns.c
> index 86e609b..7a6c4b4 100644
> --- a/grub-core/net/dns.c
> +++ b/grub-core/net/dns.c
> @@ -276,6 +276,9 @@ recv_hook (grub_net_udp_socket_t sock __attribute__ ((unused)),
>        ptr++;
>        ptr += 4;
>      }
> +  if (*data->naddresses)
> +    grub_free (*data->addresses);
> +  *data->naddresses = 0;
>    *data->addresses = grub_malloc (sizeof ((*data->addresses)[0])
>  				 * grub_be_to_cpu16 (head->ancount));

Hmm ... cannot we resize it?

*data->addresses = grub_realloc (*data->addresses,
sizeof ((*data->addresses)[0]) * (*data->naddresses += grub_be_to_cpu16
(head->ancount)))

as adjusted to not leak old pointer.

This way answers we got before would not be lost.

>    if (!*data->addresses)
> 



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 05/14] net: don't free uninitialized sockets in dns
  2016-02-10 21:20 ` [PATCH 05/14] net: don't free uninitialized sockets in dns Josef Bacik
@ 2016-02-13 17:59   ` Andrei Borzenkov
  2016-02-16 16:13     ` Josef Bacik
  0 siblings, 1 reply; 24+ messages in thread
From: Andrei Borzenkov @ 2016-02-13 17:59 UTC (permalink / raw)
  To: The development of GNU GRUB, kernel-team; +Cc: Josef Bacik

11.02.2016 00:20, Josef Bacik пишет:
> If we cannot open a connection to our dns server we will have NULL sockets in
> our array, so don't do the cleanup on any sockets that didn't get created.
> 

Was not it already fixed by

commit a01ab69848257a3df705e524c4acafe795e05cc9
Author: Andrei Borzenkov <arvidjaar@gmail.com>
Date:   Mon Oct 12 23:16:23 2015 +0300

    net: avoid closing NULL socket in DNS lookup

    Refactor code so that we do not store NULL pointers in array
    of in-flight DNS servers.

    Reported-By: Josef Bacik <jbacik@fb.com>


> Signed-off-by: Josef Bacik <jbacik@fb.com>
> ---
>  grub-core/net/dns.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/grub-core/net/dns.c b/grub-core/net/dns.c
> index 89741dd..82a3307 100644
> --- a/grub-core/net/dns.c
> +++ b/grub-core/net/dns.c
> @@ -598,7 +598,10 @@ grub_net_dns_lookup (const char *name,
>    grub_free (data.name);
>    grub_netbuff_free (nb);
>    for (j = 0; j < send_servers; j++)
> -    grub_net_udp_close (sockets[j]);
> +    {
> +      if (sockets[j])
> +	grub_net_udp_close (sockets[j]);
> +    }
>    
>    grub_free (sockets);
>  
> 



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 10/14] dns: poll card between each dns request
  2016-02-10 21:21 ` [PATCH 10/14] dns: poll card between each dns request Josef Bacik
@ 2016-02-15  6:45   ` Andrei Borzenkov
  2016-02-16 16:16     ` Josef Bacik
  2016-02-23 22:02     ` Josef Bacik
  0 siblings, 2 replies; 24+ messages in thread
From: Andrei Borzenkov @ 2016-02-15  6:45 UTC (permalink / raw)
  To: The development of GNU GRUB; +Cc: Josef Bacik, Kernel Team

On Thu, Feb 11, 2016 at 12:21 AM, Josef Bacik <jbacik@fb.com> wrote:
> If we have dns servers that we prefer to get AAAA records from we'll send a
> packet and immediately check data.naddresses to see if we got a response.  If we
> didn't we'll then send a request for an A record, and _then_ we'll poll the
> card.  So if the DNS server doesn't respond between us sending the packet and
> checking data.naddresses we'll send a request for the A record and then poll the
> card.  Instead we need to make sure we poll after we issue each request to make
> sure we give the server enough time to respond to our initial request.
>
> Signed-off-by: Josef Bacik <jbacik@fb.com>
> ---
>  grub-core/net/dns.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/grub-core/net/dns.c b/grub-core/net/dns.c
> index 82a3307..86e609b 100644
> --- a/grub-core/net/dns.c
> +++ b/grub-core/net/dns.c
> @@ -587,12 +587,12 @@ grub_net_dns_lookup (const char *name,
>                    grub_errno = GRUB_ERR_NONE;
>                    err = err2;
>                  }
> +             grub_net_poll_cards (200, &data.stop);

One consideration is that it will increase timeouts in case of
non-responsive servers, as now they are processed sequentially.

But more importantly, this is still hit and miss - we rely on delivery
order which is non-deterministic. We really need to ask for all and
filter on receiving side. Two possible implementations are

1. Keep track of IPv4 and IPv6 answers separately; if non-preferred
answer is received, continue to wait for preferred one until timeout.

2. Queries for both A and AAAA in the same packet and filter out answers.

The 2 looks better. It avoids extra timeouts (at least if we assume
that all DNS servers are equally authoritative) because as soon as we
get any response we can stop polling.

This will also indirectly fix another reported issue as we now can
ignore any duplicate packet.

Would you consider implementing it?

>                if (*data.naddresses)
>                  goto out;
>              }
>            while (t == 1);
>         }
> -      grub_net_poll_cards (200, &data.stop);
>      }
>   out:
>    grub_free (data.name);
> --
> 1.8.1
>
>
> _______________________________________________
> Grub-devel mailing list
> Grub-devel@gnu.org
> https://lists.gnu.org/mailman/listinfo/grub-devel


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 05/14] net: don't free uninitialized sockets in dns
  2016-02-13 17:59   ` Andrei Borzenkov
@ 2016-02-16 16:13     ` Josef Bacik
  0 siblings, 0 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-16 16:13 UTC (permalink / raw)
  To: Andrei Borzenkov, The development of GNU GRUB, kernel-team

On 02/13/2016 12:59 PM, Andrei Borzenkov wrote:
> 11.02.2016 00:20, Josef Bacik пишет:
>> If we cannot open a connection to our dns server we will have NULL sockets in
>> our array, so don't do the cleanup on any sockets that didn't get created.
>>
>
> Was not it already fixed by
>

Yes it was, sorry about that, I rebased without thinking.  I'll drop 
this one.

Josef



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 10/14] dns: poll card between each dns request
  2016-02-15  6:45   ` Andrei Borzenkov
@ 2016-02-16 16:16     ` Josef Bacik
  2016-02-23 22:02     ` Josef Bacik
  1 sibling, 0 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-16 16:16 UTC (permalink / raw)
  To: Andrei Borzenkov, The development of GNU GRUB; +Cc: Kernel Team

On 02/15/2016 01:45 AM, Andrei Borzenkov wrote:
> On Thu, Feb 11, 2016 at 12:21 AM, Josef Bacik <jbacik@fb.com> wrote:
>> If we have dns servers that we prefer to get AAAA records from we'll send a
>> packet and immediately check data.naddresses to see if we got a response.  If we
>> didn't we'll then send a request for an A record, and _then_ we'll poll the
>> card.  So if the DNS server doesn't respond between us sending the packet and
>> checking data.naddresses we'll send a request for the A record and then poll the
>> card.  Instead we need to make sure we poll after we issue each request to make
>> sure we give the server enough time to respond to our initial request.
>>
>> Signed-off-by: Josef Bacik <jbacik@fb.com>
>> ---
>>   grub-core/net/dns.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/grub-core/net/dns.c b/grub-core/net/dns.c
>> index 82a3307..86e609b 100644
>> --- a/grub-core/net/dns.c
>> +++ b/grub-core/net/dns.c
>> @@ -587,12 +587,12 @@ grub_net_dns_lookup (const char *name,
>>                     grub_errno = GRUB_ERR_NONE;
>>                     err = err2;
>>                   }
>> +             grub_net_poll_cards (200, &data.stop);
>
> One consideration is that it will increase timeouts in case of
> non-responsive servers, as now they are processed sequentially.
>
> But more importantly, this is still hit and miss - we rely on delivery
> order which is non-deterministic. We really need to ask for all and
> filter on receiving side. Two possible implementations are
>
> 1. Keep track of IPv4 and IPv6 answers separately; if non-preferred
> answer is received, continue to wait for preferred one until timeout.
>
> 2. Queries for both A and AAAA in the same packet and filter out answers.
>
> The 2 looks better. It avoids extra timeouts (at least if we assume
> that all DNS servers are equally authoritative) because as soon as we
> get any response we can stop polling.
>
> This will also indirectly fix another reported issue as we now can
> ignore any duplicate packet.
>
> Would you consider implementing it?
>

Yeah that sounds reasonable, I'll take a crack at it in a few days.  Thanks,

Josef



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 11/14] dns: reset data->naddresses for every packet we receive
  2016-02-13 16:05   ` Andrei Borzenkov
@ 2016-02-16 16:18     ` Josef Bacik
  0 siblings, 0 replies; 24+ messages in thread
From: Josef Bacik @ 2016-02-16 16:18 UTC (permalink / raw)
  To: Andrei Borzenkov, The development of GNU GRUB, kernel-team

On 02/13/2016 11:05 AM, Andrei Borzenkov wrote:
> 11.02.2016 00:21, Josef Bacik пишет:
>> I noticed when debugging a problem that we'd corrupt memory if our dns server
>> didn't respond fast enough and we ended up asking for both an AAAA and A record
>> for a server.  The problem is we alloc data->addresses based on the number of
>> addresses in the packet, but we populate it based on data->naddresses.  So we
>> get the AAAA record with one address, and we add that, then we get the A record
>> with one address and now data->naddresses == 1 but the ancount is 1, so we
>> allocate data->addresses to hold one address but write the new address outside
>> the array.  We also leak the old addresses memory.  So fix this by noticing if
>> we already have an address and free the old memory and reset naddresses so we
>> don't overflow our new array.
>>
>> Signed-off-by: Josef Bacik <jbacik@fb.com>
>> ---
>>   grub-core/net/dns.c | 3 +++
>>   1 file changed, 3 insertions(+)
>>
>> diff --git a/grub-core/net/dns.c b/grub-core/net/dns.c
>> index 86e609b..7a6c4b4 100644
>> --- a/grub-core/net/dns.c
>> +++ b/grub-core/net/dns.c
>> @@ -276,6 +276,9 @@ recv_hook (grub_net_udp_socket_t sock __attribute__ ((unused)),
>>         ptr++;
>>         ptr += 4;
>>       }
>> +  if (*data->naddresses)
>> +    grub_free (*data->addresses);
>> +  *data->naddresses = 0;
>>     *data->addresses = grub_malloc (sizeof ((*data->addresses)[0])
>>   				 * grub_be_to_cpu16 (head->ancount));
>
> Hmm ... cannot we resize it?
>
> *data->addresses = grub_realloc (*data->addresses,
> sizeof ((*data->addresses)[0]) * (*data->naddresses += grub_be_to_cpu16
> (head->ancount)))
>
> as adjusted to not leak old pointer.
>
> This way answers we got before would not be lost.
>

So I did it this way because we copy the whole array into the dns cache 
at the bottom and I felt like keeping track of where we currently were 
in the array was overly complicated when in the end we're ending up with 
two entries in the cache anyway.  But if I do the multiple request thing 
then this patch will be rendered useless anyway so we can just ignore it 
for now.  Thanks,

Josef



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 10/14] dns: poll card between each dns request
  2016-02-15  6:45   ` Andrei Borzenkov
  2016-02-16 16:16     ` Josef Bacik
@ 2016-02-23 22:02     ` Josef Bacik
  2016-02-24  3:25       ` Andrei Borzenkov
  1 sibling, 1 reply; 24+ messages in thread
From: Josef Bacik @ 2016-02-23 22:02 UTC (permalink / raw)
  To: Andrei Borzenkov, The development of GNU GRUB; +Cc: Kernel Team

On 02/15/2016 01:45 AM, Andrei Borzenkov wrote:
> On Thu, Feb 11, 2016 at 12:21 AM, Josef Bacik <jbacik@fb.com> wrote:
>> If we have dns servers that we prefer to get AAAA records from we'll send a
>> packet and immediately check data.naddresses to see if we got a response.  If we
>> didn't we'll then send a request for an A record, and _then_ we'll poll the
>> card.  So if the DNS server doesn't respond between us sending the packet and
>> checking data.naddresses we'll send a request for the A record and then poll the
>> card.  Instead we need to make sure we poll after we issue each request to make
>> sure we give the server enough time to respond to our initial request.
>>
>> Signed-off-by: Josef Bacik <jbacik@fb.com>
>> ---
>>   grub-core/net/dns.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/grub-core/net/dns.c b/grub-core/net/dns.c
>> index 82a3307..86e609b 100644
>> --- a/grub-core/net/dns.c
>> +++ b/grub-core/net/dns.c
>> @@ -587,12 +587,12 @@ grub_net_dns_lookup (const char *name,
>>                     grub_errno = GRUB_ERR_NONE;
>>                     err = err2;
>>                   }
>> +             grub_net_poll_cards (200, &data.stop);
>
> One consideration is that it will increase timeouts in case of
> non-responsive servers, as now they are processed sequentially.
>
> But more importantly, this is still hit and miss - we rely on delivery
> order which is non-deterministic. We really need to ask for all and
> filter on receiving side. Two possible implementations are
>
> 1. Keep track of IPv4 and IPv6 answers separately; if non-preferred
> answer is received, continue to wait for preferred one until timeout.
>
> 2. Queries for both A and AAAA in the same packet and filter out answers.
>
> The 2 looks better. It avoids extra timeouts (at least if we assume
> that all DNS servers are equally authoritative) because as soon as we
> get any response we can stop polling.
>
> This will also indirectly fix another reported issue as we now can
> ignore any duplicate packet.
>
> Would you consider implementing it?

Got this all implemented, started testing it and it just wasn't working, 
come to find out our dns server (also bind) doesn't support more than 
one question per packet.  So instead I'm going to keep track of which 
type my DNS servers support and only do those questions.  Updating the 
cache is kind of a pain in the ass, I'll probably make it so we just 
update in place the existing cache with the new answers.  Thanks,

Josef



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 10/14] dns: poll card between each dns request
  2016-02-23 22:02     ` Josef Bacik
@ 2016-02-24  3:25       ` Andrei Borzenkov
  0 siblings, 0 replies; 24+ messages in thread
From: Andrei Borzenkov @ 2016-02-24  3:25 UTC (permalink / raw)
  To: Josef Bacik, The development of GNU GRUB; +Cc: Kernel Team

24.02.2016 01:02, Josef Bacik пишет:
> On 02/15/2016 01:45 AM, Andrei Borzenkov wrote:
>> On Thu, Feb 11, 2016 at 12:21 AM, Josef Bacik <jbacik@fb.com> wrote:
>>> If we have dns servers that we prefer to get AAAA records from we'll
>>> send a
>>> packet and immediately check data.naddresses to see if we got a
>>> response.  If we
>>> didn't we'll then send a request for an A record, and _then_ we'll
>>> poll the
>>> card.  So if the DNS server doesn't respond between us sending the
>>> packet and
>>> checking data.naddresses we'll send a request for the A record and
>>> then poll the
>>> card.  Instead we need to make sure we poll after we issue each
>>> request to make
>>> sure we give the server enough time to respond to our initial request.
>>>
>>> Signed-off-by: Josef Bacik <jbacik@fb.com>
>>> ---
>>>   grub-core/net/dns.c | 2 +-
>>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/grub-core/net/dns.c b/grub-core/net/dns.c
>>> index 82a3307..86e609b 100644
>>> --- a/grub-core/net/dns.c
>>> +++ b/grub-core/net/dns.c
>>> @@ -587,12 +587,12 @@ grub_net_dns_lookup (const char *name,
>>>                     grub_errno = GRUB_ERR_NONE;
>>>                     err = err2;
>>>                   }
>>> +             grub_net_poll_cards (200, &data.stop);
>>
>> One consideration is that it will increase timeouts in case of
>> non-responsive servers, as now they are processed sequentially.
>>
>> But more importantly, this is still hit and miss - we rely on delivery
>> order which is non-deterministic. We really need to ask for all and
>> filter on receiving side. Two possible implementations are
>>
>> 1. Keep track of IPv4 and IPv6 answers separately; if non-preferred
>> answer is received, continue to wait for preferred one until timeout.
>>
>> 2. Queries for both A and AAAA in the same packet and filter out answers.
>>
>> The 2 looks better. It avoids extra timeouts (at least if we assume
>> that all DNS servers are equally authoritative) because as soon as we
>> get any response we can stop polling.
>>
>> This will also indirectly fix another reported issue as we now can
>> ignore any duplicate packet.
>>
>> Would you consider implementing it?
> 
> Got this all implemented, started testing it and it just wasn't working,
> come to find out our dns server (also bind) doesn't support more than
> one question per packet. 

Oh! :( Sorry, I really did not expect it, given that multi-query was in
DNS RFC from the very beginning.

> So instead I'm going to keep track of which
> type my DNS servers support and only do those questions.  Updating the
> cache is kind of a pain in the ass, I'll probably make it so we just
> update in place the existing cache with the new answers.  Thanks,
> 
> Josef
> 



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 06/14] net: fix ipv6 routing
  2016-02-10 21:21 ` [PATCH 06/14] net: fix ipv6 routing Josef Bacik
@ 2016-02-25 19:39   ` Andrei Borzenkov
  0 siblings, 0 replies; 24+ messages in thread
From: Andrei Borzenkov @ 2016-02-25 19:39 UTC (permalink / raw)
  To: The development of GNU GRUB, kernel-team; +Cc: Josef Bacik

11.02.2016 00:21, Josef Bacik пишет:
> ipv6 routing in grub2 is broken, we cannot talk to anything outside our local
> network or anything that doesn't route in our global namespace.  This patch
> fixes this by doing a couple of things
> 
> 1) Read the router information off of the router advertisement.  If we have a
> router lifetime we need to take the source address and create a route from it.
> 
> 2) Changes the routing stuff slightly to allow you to specify a gateway _and_ an
> interface.  Since the router advertisements come in on the link local address we
> need to associate it with the global address on the card.  So when we are
> processing the router advertisement, either use the SLAAC interface we create
> and add the route to that interface, or loop through the global addresses we
> currently have on our interface and associate it with one of those addresses.
> We need to have a special case here for the default route so that it gets used,
> we do this by setting the masksize to 0 to mean it encompasses all networks.
> The routing code will automatically select the best route so if there is a
> closer match we will use that.
> 
> With this patch I can now talk to ipv6 addresses outside of my local network.
> Thanks,
> 

Committed. Thanks!


^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2016-02-25 19:39 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-10 21:20 [PATCH 00/14] Facebook's netbooting patches Josef Bacik
2016-02-10 21:20 ` [PATCH 01/14] Added net_bootp6 command Josef Bacik
2016-02-10 21:20 ` [PATCH 02/14] UEFI IPv6 PXE support Josef Bacik
2016-02-10 21:20 ` [PATCH 03/14] Use UEFI MAC device as default configured by net_bootp6 Josef Bacik
2016-02-10 21:20 ` [PATCH 04/14] tcp: add window scaling and RTTM support Josef Bacik
2016-02-10 21:20 ` [PATCH 05/14] net: don't free uninitialized sockets in dns Josef Bacik
2016-02-13 17:59   ` Andrei Borzenkov
2016-02-16 16:13     ` Josef Bacik
2016-02-10 21:21 ` [PATCH 06/14] net: fix ipv6 routing Josef Bacik
2016-02-25 19:39   ` Andrei Borzenkov
2016-02-10 21:21 ` [PATCH 07/14] efinet: retransmit if our device is busy Josef Bacik
2016-02-10 21:21 ` [PATCH 08/14] efinet: filter multicast traffic based on addresses Josef Bacik
2016-02-10 21:21 ` [PATCH 09/14] efinet: clear the txbuffer before modifying the receive filters Josef Bacik
2016-02-10 21:21 ` [PATCH 10/14] dns: poll card between each dns request Josef Bacik
2016-02-15  6:45   ` Andrei Borzenkov
2016-02-16 16:16     ` Josef Bacik
2016-02-23 22:02     ` Josef Bacik
2016-02-24  3:25       ` Andrei Borzenkov
2016-02-10 21:21 ` [PATCH 11/14] dns: reset data->naddresses for every packet we receive Josef Bacik
2016-02-13 16:05   ` Andrei Borzenkov
2016-02-16 16:18     ` Josef Bacik
2016-02-10 21:21 ` [PATCH 12/14] icmp6: use default interface as the route interface Josef Bacik
2016-02-10 21:21 ` [PATCH 13/14] bootp: don't add multiple interfaces for the same address Josef Bacik
2016-02-10 21:21 ` [PATCH 14/14] net: add interfaces when we open a card Josef Bacik

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.