Hi all, Today's linux-next merge of the tip tree got a conflict in mm/mempolicy.c between commit 63f74ca21f1f ("mempolicy: fix refcount leak in mpol_set_shared_policy()") from Linus' tree and commit 4d58c795f691 ("mm/mpol: Check for misplaced page") from the tip tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc mm/mempolicy.c index 0b78fb9,3360a8d..0000000 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@@ -2170,12 -2168,116 +2203,122 @@@ mpol_shared_policy_lookup(struct shared return pol; } +static void sp_free(struct sp_node *n) +{ + mpol_put(n->policy); + kmem_cache_free(sn_cache, n); +} + + /** + * mpol_misplaced - check whether current page node is valid in policy + * + * @page - page to be checked + * @vma - vm area where page mapped + * @addr - virtual address where page mapped + * @multi - use multi-stage node binding + * + * Lookup current policy node id for vma,addr and "compare to" page's + * node id. + * + * Returns: + * -1 - not misplaced, page is in the right node + * node - node id where the page should be + * + * Policy determination "mimics" alloc_page_vma(). + * Called from fault path where we know the vma and faulting address. + */ + int mpol_misplaced(struct page *page, struct vm_area_struct *vma, + unsigned long addr, int multi) + { + struct mempolicy *pol; + struct zone *zone; + int curnid = page_to_nid(page); + unsigned long pgoff; + int polnid = -1; + int ret = -1; + + BUG_ON(!vma); + + pol = get_vma_policy(current, vma, addr); + if (!(pol->flags & MPOL_F_MOF)) + goto out; + + switch (pol->mode) { + case MPOL_INTERLEAVE: + BUG_ON(addr >= vma->vm_end); + BUG_ON(addr < vma->vm_start); + + pgoff = vma->vm_pgoff; + pgoff += (addr - vma->vm_start) >> PAGE_SHIFT; + polnid = offset_il_node(pol, vma, pgoff); + break; + + case MPOL_PREFERRED: + if (pol->flags & MPOL_F_LOCAL) + polnid = numa_node_id(); + else + polnid = pol->v.preferred_node; + break; + + case MPOL_BIND: + /* + * allows binding to multiple nodes. + * use current page if in policy nodemask, + * else select nearest allowed node, if any. + * If no allowed nodes, use current [!misplaced]. + */ + if (node_isset(curnid, pol->v.nodes)) + goto out; + (void)first_zones_zonelist( + node_zonelist(numa_node_id(), GFP_HIGHUSER), + gfp_zone(GFP_HIGHUSER), + &pol->v.nodes, &zone); + polnid = zone->node; + break; + + default: + BUG(); + } + + /* + * Multi-stage node selection is used in conjunction with a periodic + * migration fault to build a temporal task<->page relation. By + * using a two-stage filter we remove short/unlikely relations. + * + * Using P(p) ~ n_p / n_t as per frequentist probability, we can + * equate a task's usage of a particular page (n_p) per total usage + * of this page (n_t) (in a given time-span) to a probability. + * + * Our periodic faults will then sample this probability and getting + * the same result twice in a row, given these samples are fully + * independent, is then given by P(n)^2, provided our sample period + * is sufficiently short compared to the usage pattern. + * + * This quadric squishes small probabilities, making it less likely + * we act on an unlikely task<->page relation. + * + * NOTE: effectively we're using task-home-node<->page-node relations + * since those are the only thing we can affect. + * + * NOTE: we're using task-home-node as opposed to the current node + * the task might be running on, since the task-home-node is the + * long-term node of this task, further reducing noise. Also see + * task_tick_numa(). + */ + if (multi && (pol->flags & MPOL_F_HOME)) { + int last_nid = page_xchg_last_nid(page, polnid); + if (last_nid != polnid) + goto out; + } + + if (curnid != polnid) + ret = polnid; + out: + mpol_cond_put(pol); + + return ret; + } + static void sp_delete(struct shared_policy *sp, struct sp_node *n) { pr_debug("deleting %lx-l%lx\n", n->start, n->end);