[PATCH] numa: select nodes by cpu affinity

* [PATCH] numa: select nodes by cpu affinity
@ 2010-08-04 12:04 Andrew Jones
  2010-08-04 12:38 ` Keir Fraser
  0 siblings, 1 reply; 5+ messages in thread
From: Andrew Jones @ 2010-08-04 12:04 UTC (permalink / raw)
  To: xen-devel; +Cc: dulloor

[-- Attachment #1: Type: text/plain, Size: 0 bytes --]



[-- Attachment #2: nodemask.patch --]
[-- Type: text/x-patch, Size: 4305 bytes --]

numa: select nodes by cpu affinity

Along the same theme as changeset 21719, but expanded to all nodes
from which the domain is using processors. Don't be as strict as
exact_node_request, but rather just fall back to the current
behavior on failure. This should help performance if cpu affinity
is selected by numa-aware tools.

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -292,17 +292,47 @@
     return needed;
 }
 
+static void get_nodemask_by_cpu_affinity(
+                const struct domain *d, nodemask_t *nodemask)
+{
+    cpumask_t cpumask = CPU_MASK_NONE;
+    struct vcpu *v;
+    unsigned int node;
+
+    nodes_clear(*nodemask);
+
+    if ( d == NULL || num_online_nodes() == 1 )
+        goto all_online_nodes;
+
+    for_each_vcpu(d, v)
+        cpus_or(cpumask, cpumask, v->cpu_affinity);
+
+    if ( cpus_subset(cpu_online_map, cpumask) )
+        goto all_online_nodes;
+
+    for_each_online_node(node)
+        if ( cpus_intersects(node_to_cpumask(node), cpumask) )
+            node_set(node, *nodemask);
+    return;
+
+all_online_nodes:
+    nodes_or(*nodemask, *nodemask, node_online_map);
+    return;
+}
+
 /* Allocate 2^@order contiguous pages. */
 static struct page_info *alloc_heap_pages(
     unsigned int zone_lo, unsigned int zone_hi,
-    unsigned int node, unsigned int order, unsigned int memflags)
+    unsigned int node, unsigned int order, unsigned int memflags,
+    nodemask_t nodemask)
 {
     unsigned int i, j, zone = 0;
-    unsigned int num_nodes = num_online_nodes();
+    unsigned int num_nodes;
     unsigned long request = 1UL << order;
     bool_t exact_node_request = !!(memflags & MEMF_exact_node);
     cpumask_t extra_cpus_mask, mask;
     struct page_info *pg;
+    int nodemask_retry = 1;
 
     if ( node == NUMA_NO_NODE )
     {
@@ -335,6 +365,15 @@
      * zone before failing, only calc new node value if we fail to find memory 
      * in target node, this avoids needless computation on fast-path.
      */
+    if ( exact_node_request )
+        num_nodes = 1;
+    else
+    {
+        nodes_and(nodemask, nodemask, node_online_map);
+        num_nodes = nodes_weight(nodemask);
+    }
+
+try_nodemask:
     for ( i = 0; i < num_nodes; i++ )
     {
         zone = zone_hi;
@@ -353,9 +392,17 @@
             goto not_found;
 
         /* Pick next node, wrapping around if needed. */
-        node = next_node(node, node_online_map);
+        node = next_node(node, nodemask);
         if (node == MAX_NUMNODES)
-            node = first_node(node_online_map);
+            node = first_node(nodemask);
+    }
+
+    if ( nodemask_retry-- && !nodes_equal(nodemask, node_online_map) )
+    {
+        nodes_andnot(nodemask, node_online_map, nodemask);
+        num_nodes = nodes_weight(nodemask);
+        node = first_node(nodemask);
+        goto try_nodemask;
     }
 
  try_tmem:
@@ -1010,7 +1057,7 @@
     ASSERT(!in_irq());
 
     pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN,
-        cpu_to_node(smp_processor_id()), order, memflags);
+        cpu_to_node(smp_processor_id()), order, memflags, node_online_map);
     if ( unlikely(pg == NULL) )
         return NULL;
 
@@ -1154,6 +1201,7 @@
     struct page_info *pg = NULL;
     unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1;
     unsigned int node = (uint8_t)((memflags >> _MEMF_node) - 1), dma_zone;
+    nodemask_t nodemask;
 
     ASSERT(!in_irq());
 
@@ -1164,13 +1212,16 @@
     if ( (zone_hi = min_t(unsigned int, bits_to_zone(bits), zone_hi)) == 0 )
         return NULL;
 
+    get_nodemask_by_cpu_affinity(d, &nodemask);
+
     if ( dma_bitsize && ((dma_zone = bits_to_zone(dma_bitsize)) < zone_hi) )
-        pg = alloc_heap_pages(dma_zone + 1, zone_hi, node, order, memflags);
+        pg = alloc_heap_pages(dma_zone + 1, zone_hi, node, order, memflags,
+                              nodemask);
 
     if ( (pg == NULL) &&
          ((memflags & MEMF_no_dma) ||
           ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
-                                  node, order, memflags)) == NULL)) )
+                                  node, order, memflags, nodemask)) == NULL)) )
          return NULL;
 
     if ( (d != NULL) && assign_pages(d, pg, order, memflags) )

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread