Hi Mike, I love your patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v4.18 next-20180810] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Mike-Kravetz/mm-migration-fix-migration-of-huge-PMD-shared-pages/20180813-114549 config: i386-randconfig-x003-201832 (attached as .config) compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): mm/rmap.c: In function 'try_to_unmap_one': >> mm/rmap.c:1425:7: error: implicit declaration of function 'huge_pmd_unshare'; did you mean '__NR_unshare'? [-Werror=implicit-function-declaration] huge_pmd_unshare(mm, &address, pvmw.pte)) { ^~~~~~~~~~~~~~~~ __NR_unshare cc1: some warnings being treated as errors vim +1425 mm/rmap.c 1382 1383 /* 1384 * If the page is mlock()d, we cannot swap it out. 1385 * If it's recently referenced (perhaps page_referenced 1386 * skipped over this mm) then we should reactivate it. 1387 */ 1388 if (!(flags & TTU_IGNORE_MLOCK)) { 1389 if (vma->vm_flags & VM_LOCKED) { 1390 /* PTE-mapped THP are never mlocked */ 1391 if (!PageTransCompound(page)) { 1392 /* 1393 * Holding pte lock, we do *not* need 1394 * mmap_sem here 1395 */ 1396 mlock_vma_page(page); 1397 } 1398 ret = false; 1399 page_vma_mapped_walk_done(&pvmw); 1400 break; 1401 } 1402 if (flags & TTU_MUNLOCK) 1403 continue; 1404 } 1405 1406 /* Unexpected PMD-mapped THP? */ 1407 VM_BUG_ON_PAGE(!pvmw.pte, page); 1408 1409 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); 1410 address = pvmw.address; 1411 1412 /* 1413 * PMDs for hugetlbfs pages could be shared. In this case, 1414 * pages with shared PMDs will have a mapcount of 1 no matter 1415 * how many times it is actually mapped. Map counting for 1416 * PMD sharing is mostly done via the reference count on the 1417 * PMD page itself. If the page we are trying to unmap is a 1418 * hugetlbfs page, attempt to 'unshare' at the PMD level. 1419 * huge_pmd_unshare takes care of clearing the PUD and 1420 * reference counting on the PMD page which effectively unmaps 1421 * the page. Take care of flushing cache and TLB for page in 1422 * this specific mapping here. 1423 */ 1424 if (PageHuge(page) && > 1425 huge_pmd_unshare(mm, &address, pvmw.pte)) { 1426 unsigned long end_add = address + vma_mmu_pagesize(vma); 1427 1428 flush_cache_range(vma, address, end_add); 1429 flush_tlb_range(vma, address, end_add); 1430 mmu_notifier_invalidate_range(mm, address, end_add); 1431 continue; 1432 } 1433 1434 if (IS_ENABLED(CONFIG_MIGRATION) && 1435 (flags & TTU_MIGRATION) && 1436 is_zone_device_page(page)) { 1437 swp_entry_t entry; 1438 pte_t swp_pte; 1439 1440 pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte); 1441 1442 /* 1443 * Store the pfn of the page in a special migration 1444 * pte. do_swap_page() will wait until the migration 1445 * pte is removed and then restart fault handling. 1446 */ 1447 entry = make_migration_entry(page, 0); 1448 swp_pte = swp_entry_to_pte(entry); 1449 if (pte_soft_dirty(pteval)) 1450 swp_pte = pte_swp_mksoft_dirty(swp_pte); 1451 set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); 1452 /* 1453 * No need to invalidate here it will synchronize on 1454 * against the special swap migration pte. 1455 */ 1456 goto discard; 1457 } 1458 1459 if (!(flags & TTU_IGNORE_ACCESS)) { 1460 if (ptep_clear_flush_young_notify(vma, address, 1461 pvmw.pte)) { 1462 ret = false; 1463 page_vma_mapped_walk_done(&pvmw); 1464 break; 1465 } 1466 } 1467 1468 /* Nuke the page table entry. */ 1469 flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); 1470 if (should_defer_flush(mm, flags)) { 1471 /* 1472 * We clear the PTE but do not flush so potentially 1473 * a remote CPU could still be writing to the page. 1474 * If the entry was previously clean then the 1475 * architecture must guarantee that a clear->dirty 1476 * transition on a cached TLB entry is written through 1477 * and traps if the PTE is unmapped. 1478 */ 1479 pteval = ptep_get_and_clear(mm, address, pvmw.pte); 1480 1481 set_tlb_ubc_flush_pending(mm, pte_dirty(pteval)); 1482 } else { 1483 pteval = ptep_clear_flush(vma, address, pvmw.pte); 1484 } 1485 1486 /* Move the dirty bit to the page. Now the pte is gone. */ 1487 if (pte_dirty(pteval)) 1488 set_page_dirty(page); 1489 1490 /* Update high watermark before we lower rss */ 1491 update_hiwater_rss(mm); 1492 1493 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { 1494 pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); 1495 if (PageHuge(page)) { 1496 int nr = 1 << compound_order(page); 1497 hugetlb_count_sub(nr, mm); 1498 set_huge_swap_pte_at(mm, address, 1499 pvmw.pte, pteval, 1500 vma_mmu_pagesize(vma)); 1501 } else { 1502 dec_mm_counter(mm, mm_counter(page)); 1503 set_pte_at(mm, address, pvmw.pte, pteval); 1504 } 1505 1506 } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) { 1507 /* 1508 * The guest indicated that the page content is of no 1509 * interest anymore. Simply discard the pte, vmscan 1510 * will take care of the rest. 1511 * A future reference will then fault in a new zero 1512 * page. When userfaultfd is active, we must not drop 1513 * this page though, as its main user (postcopy 1514 * migration) will not expect userfaults on already 1515 * copied pages. 1516 */ 1517 dec_mm_counter(mm, mm_counter(page)); 1518 /* We have to invalidate as we cleared the pte */ 1519 mmu_notifier_invalidate_range(mm, address, 1520 address + PAGE_SIZE); 1521 } else if (IS_ENABLED(CONFIG_MIGRATION) && 1522 (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) { 1523 swp_entry_t entry; 1524 pte_t swp_pte; 1525 1526 if (arch_unmap_one(mm, vma, address, pteval) < 0) { 1527 set_pte_at(mm, address, pvmw.pte, pteval); 1528 ret = false; 1529 page_vma_mapped_walk_done(&pvmw); 1530 break; 1531 } 1532 1533 /* 1534 * Store the pfn of the page in a special migration 1535 * pte. do_swap_page() will wait until the migration 1536 * pte is removed and then restart fault handling. 1537 */ 1538 entry = make_migration_entry(subpage, 1539 pte_write(pteval)); 1540 swp_pte = swp_entry_to_pte(entry); 1541 if (pte_soft_dirty(pteval)) 1542 swp_pte = pte_swp_mksoft_dirty(swp_pte); 1543 set_pte_at(mm, address, pvmw.pte, swp_pte); 1544 /* 1545 * No need to invalidate here it will synchronize on 1546 * against the special swap migration pte. 1547 */ 1548 } else if (PageAnon(page)) { 1549 swp_entry_t entry = { .val = page_private(subpage) }; 1550 pte_t swp_pte; 1551 /* 1552 * Store the swap location in the pte. 1553 * See handle_pte_fault() ... 1554 */ 1555 if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) { 1556 WARN_ON_ONCE(1); 1557 ret = false; 1558 /* We have to invalidate as we cleared the pte */ 1559 mmu_notifier_invalidate_range(mm, address, 1560 address + PAGE_SIZE); 1561 page_vma_mapped_walk_done(&pvmw); 1562 break; 1563 } 1564 1565 /* MADV_FREE page check */ 1566 if (!PageSwapBacked(page)) { 1567 if (!PageDirty(page)) { 1568 /* Invalidate as we cleared the pte */ 1569 mmu_notifier_invalidate_range(mm, 1570 address, address + PAGE_SIZE); 1571 dec_mm_counter(mm, MM_ANONPAGES); 1572 goto discard; 1573 } 1574 1575 /* 1576 * If the page was redirtied, it cannot be 1577 * discarded. Remap the page to page table. 1578 */ 1579 set_pte_at(mm, address, pvmw.pte, pteval); 1580 SetPageSwapBacked(page); 1581 ret = false; 1582 page_vma_mapped_walk_done(&pvmw); 1583 break; 1584 } 1585 1586 if (swap_duplicate(entry) < 0) { 1587 set_pte_at(mm, address, pvmw.pte, pteval); 1588 ret = false; 1589 page_vma_mapped_walk_done(&pvmw); 1590 break; 1591 } 1592 if (arch_unmap_one(mm, vma, address, pteval) < 0) { 1593 set_pte_at(mm, address, pvmw.pte, pteval); 1594 ret = false; 1595 page_vma_mapped_walk_done(&pvmw); 1596 break; 1597 } 1598 if (list_empty(&mm->mmlist)) { 1599 spin_lock(&mmlist_lock); 1600 if (list_empty(&mm->mmlist)) 1601 list_add(&mm->mmlist, &init_mm.mmlist); 1602 spin_unlock(&mmlist_lock); 1603 } 1604 dec_mm_counter(mm, MM_ANONPAGES); 1605 inc_mm_counter(mm, MM_SWAPENTS); 1606 swp_pte = swp_entry_to_pte(entry); 1607 if (pte_soft_dirty(pteval)) 1608 swp_pte = pte_swp_mksoft_dirty(swp_pte); 1609 set_pte_at(mm, address, pvmw.pte, swp_pte); 1610 /* Invalidate as we cleared the pte */ 1611 mmu_notifier_invalidate_range(mm, address, 1612 address + PAGE_SIZE); 1613 } else { 1614 /* 1615 * We should not need to notify here as we reach this 1616 * case only from freeze_page() itself only call from 1617 * split_huge_page_to_list() so everything below must 1618 * be true: 1619 * - page is not anonymous 1620 * - page is locked 1621 * 1622 * So as it is a locked file back page thus it can not 1623 * be remove from the page cache and replace by a new 1624 * page before mmu_notifier_invalidate_range_end so no 1625 * concurrent thread might update its page table to 1626 * point at new page while a device still is using this 1627 * page. 1628 * 1629 * See Documentation/vm/mmu_notifier.rst 1630 */ 1631 dec_mm_counter(mm, mm_counter_file(page)); 1632 } 1633 discard: 1634 /* 1635 * No need to call mmu_notifier_invalidate_range() it has be 1636 * done above for all cases requiring it to happen under page 1637 * table lock before mmu_notifier_invalidate_range_end() 1638 * 1639 * See Documentation/vm/mmu_notifier.rst 1640 */ 1641 page_remove_rmap(subpage, PageHuge(page)); 1642 put_page(page); 1643 } 1644 1645 mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); 1646 1647 return ret; 1648 } 1649 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation