All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sourav Panda <souravpanda@google.com>
To: Mike Rapoport <rppt@kernel.org>
Cc: corbet@lwn.net, gregkh@linuxfoundation.org, rafael@kernel.org,
	 akpm@linux-foundation.org, mike.kravetz@oracle.com,
	muchun.song@linux.dev,  david@redhat.com, rdunlap@infradead.org,
	chenlinxuan@uniontech.com,  yang.yang29@zte.com.cn,
	tomas.mudrunka@gmail.com, bhelgaas@google.com,
	 ivan@cloudflare.com, pasha.tatashin@soleen.com,
	yosryahmed@google.com,  hannes@cmpxchg.org, shakeelb@google.com,
	kirill.shutemov@linux.intel.com,  wangkefeng.wang@huawei.com,
	adobriyan@gmail.com, vbabka@suse.cz,  Liam.Howlett@oracle.com,
	surenb@google.com, linux-kernel@vger.kernel.org,
	 linux-fsdevel@vger.kernel.org, linux-doc@vger.kernel.org,
	linux-mm@kvack.org
Subject: Re: [PATCH v1 1/1] mm: report per-page metadata information
Date: Thu, 14 Sep 2023 15:41:00 -0700	[thread overview]
Message-ID: <CANruzcSCFHRgB7oCoZRPOerR=FH3PqvoW0ea+v-p=y+sb-c=kA@mail.gmail.com> (raw)
In-Reply-To: <20230913205125.GA3303@kernel.org>

[-- Attachment #1: Type: text/plain, Size: 9140 bytes --]

Thank you Mike Rapoport for reviewing this patch series. Please find my
responses below.


>
> >  #endif /* _LINUX_VMSTAT_H */
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index ba6d39b71cb1..ca36751be50e 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -1758,6 +1758,10 @@ static void
> __update_and_free_hugetlb_folio(struct hstate *h,
> >               destroy_compound_gigantic_folio(folio, huge_page_order(h));
> >               free_gigantic_folio(folio, huge_page_order(h));
> >       } else {
> > +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> > +             __mod_node_page_state(NODE_DATA(page_to_nid(&folio->page)),
> > +                                   NR_PAGE_METADATA,
> -huge_page_order(h));
>
> I don't think memory map will change here with classic SPARSEMEM
>

Thank you. Yes, I agree with your comment.


>
> > +#endif
> >               __free_pages(&folio->page, huge_page_order(h));
> >       }
> >  }
> > @@ -2143,7 +2147,9 @@ static struct folio
> *alloc_buddy_hugetlb_folio(struct hstate *h,
> >               __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
> >               return NULL;
> >       }
> > -
> > +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> > +     __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> huge_page_order(h));
> > +#endif
> >       __count_vm_event(HTLB_BUDDY_PGALLOC);
> >       return page_folio(page);
> >  }
> > diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> > index 4b9734777f69..7f920bfa8e79 100644
> > --- a/mm/hugetlb_vmemmap.c
> > +++ b/mm/hugetlb_vmemmap.c
> > @@ -214,6 +214,8 @@ static inline void free_vmemmap_page(struct page
> *page)
> >               free_bootmem_page(page);
> >       else
> >               __free_page(page);
> > +     __mod_node_page_state(NODE_DATA(page_to_nid(page)),
> > +                           NR_PAGE_METADATA, -1);
> >  }
> >
> >  /* Free a list of the vmemmap pages */
> > @@ -336,6 +338,7 @@ static int vmemmap_remap_free(unsigned long start,
> unsigned long end,
> >                         (void *)walk.reuse_addr);
> >               list_add(&walk.reuse_page->lru, &vmemmap_pages);
> >       }
> > +     __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA, 1);
> >
> >       /*
> >        * In order to make remapping routine most efficient for the huge
> pages,
> > @@ -387,8 +390,12 @@ static int alloc_vmemmap_page_list(unsigned long
> start, unsigned long end,
> >
> >       while (nr_pages--) {
> >               page = alloc_pages_node(nid, gfp_mask, 0);
> > -             if (!page)
> > +             if (!page) {
> >                       goto out;
> > +             } else {
> > +                     __mod_node_page_state(NODE_DATA(page_to_nid(page)),
> > +                                           NR_PAGE_METADATA, 1);
>
> We can update this once for nr_pages outside the loop, cannot we?
>

Thank you for the comment. I agree with you and shall incorporate it.


>
> > +             }
> >               list_add_tail(&page->lru, list);
> >       }
> >
> > diff --git a/mm/mm_init.c b/mm/mm_init.c
> > index 50f2f34745af..e02dce7e2e9a 100644
> > --- a/mm/mm_init.c
> > +++ b/mm/mm_init.c
> > @@ -26,6 +26,7 @@
> >  #include <linux/pgtable.h>
> >  #include <linux/swap.h>
> >  #include <linux/cma.h>
> > +#include <linux/vmstat.h>
> >  #include "internal.h"
> >  #include "slab.h"
> >  #include "shuffle.h"
> > @@ -1656,6 +1657,8 @@ static void __init alloc_node_mem_map(struct
> pglist_data *pgdat)
> >                       panic("Failed to allocate %ld bytes for node %d
> memory map\n",
> >                             size, pgdat->node_id);
> >               pgdat->node_mem_map = map + offset;
> > +             mod_node_early_perpage_metadata(pgdat->node_id,
> > +                                             PAGE_ALIGN(size) >>
> PAGE_SHIFT);
> >       }
> >       pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
> >                               __func__, pgdat->node_id, (unsigned
> long)pgdat,
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index 0c5be12f9336..4e295d5087f4 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -5443,6 +5443,7 @@ void __init setup_per_cpu_pageset(void)
> >       for_each_online_pgdat(pgdat)
> >               pgdat->per_cpu_nodestats =
> >                       alloc_percpu(struct per_cpu_nodestat);
> > +     writeout_early_perpage_metadata();
>
> Why it's called here?
> You can copy early stats to actual node stats as soon as the nodes and page
> allocator are initialized.
>

Thank you for mentioning this. I agree with you and shall move it there.


>
> >  }
> >
> >  __meminit void zone_pcp_init(struct zone *zone)
> > diff --git a/mm/page_ext.c b/mm/page_ext.c
> > index 4548fcc66d74..b5b9d3079e20 100644
> > --- a/mm/page_ext.c
> > +++ b/mm/page_ext.c
> > @@ -201,6 +201,8 @@ static int __init alloc_node_page_ext(int nid)
> >               return -ENOMEM;
> >       NODE_DATA(nid)->node_page_ext = base;
> >       total_usage += table_size;
> > +     __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> > +                           PAGE_ALIGN(table_size) >> PAGE_SHIFT);
> >       return 0;
> >  }
> >
> > @@ -255,12 +257,15 @@ static void *__meminit alloc_page_ext(size_t size,
> int nid)
> >       void *addr = NULL;
> >
> >       addr = alloc_pages_exact_nid(nid, size, flags);
> > -     if (addr) {
> > +     if (addr)
> >               kmemleak_alloc(addr, size, 1, flags);
> > -             return addr;
> > -     }
> > +     else
> > +             addr = vzalloc_node(size, nid);
> >
> > -     addr = vzalloc_node(size, nid);
> > +     if (addr) {
> > +             __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> > +                                   PAGE_ALIGN(size) >> PAGE_SHIFT);
> > +     }
> >
> >       return addr;
> >  }
> > @@ -314,6 +319,10 @@ static void free_page_ext(void *addr)
> >               BUG_ON(PageReserved(page));
> >               kmemleak_free(addr);
> >               free_pages_exact(addr, table_size);
> > +
> > +             __mod_node_page_state(NODE_DATA(page_to_nid(page)),
> NR_PAGE_METADATA,
> > +                                   (long)-1 * (PAGE_ALIGN(table_size)
> >> PAGE_SHIFT));
> > +
>
> what happens with vmalloc()ed page_ext?
>

Thank you for pointing this out. I shall also make this change for
vmalloc()ed page_ext.


>
> >       }
> >  }
> >
> > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> > index a2cbe44c48e1..e33f302db7c6 100644
> > --- a/mm/sparse-vmemmap.c
> > +++ b/mm/sparse-vmemmap.c
> > @@ -469,5 +469,8 @@ struct page * __meminit
> __populate_section_memmap(unsigned long pfn,
> >       if (r < 0)
> >               return NULL;
> >
> > +     __mod_node_page_state(NODE_DATA(nid), NR_PAGE_METADATA,
> > +                           PAGE_ALIGN(end - start) >> PAGE_SHIFT);
> > +
> >       return pfn_to_page(pfn);
> >  }
> > diff --git a/mm/sparse.c b/mm/sparse.c
> > index 77d91e565045..db78233a85ef 100644
> > --- a/mm/sparse.c
> > +++ b/mm/sparse.c
> > @@ -14,7 +14,7 @@
> >  #include <linux/swap.h>
> >  #include <linux/swapops.h>
> >  #include <linux/bootmem_info.h>
> > -
> > +#include <linux/vmstat.h>
> >  #include "internal.h"
> >  #include <asm/dma.h>
> >
> > @@ -465,6 +465,9 @@ static void __init sparse_buffer_init(unsigned long
> size, int nid)
> >        */
> >       sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid,
> true);
> >       sparsemap_buf_end = sparsemap_buf + size;
> > +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> > +     mod_node_early_perpage_metadata(nid, PAGE_ALIGN(size) >>
> PAGE_SHIFT);
>
> All early struct pages are allocated in memmap_alloc(). It'd make sense to
> update
> the counter there.
>

Thanks for the comment. The reason why we did not do it in memmap_alloc()
is because the struct pages can decrease as well.


>
> > +#endif
> >  }
> >
> >  static void __init sparse_buffer_fini(void)
> > @@ -641,6 +644,8 @@ static void depopulate_section_memmap(unsigned long
> pfn, unsigned long nr_pages,
> >       unsigned long start = (unsigned long) pfn_to_page(pfn);
> >       unsigned long end = start + nr_pages * sizeof(struct page);
> >
> > +     __mod_node_page_state(NODE_DATA(page_to_nid(pfn_to_page(pfn))),
> NR_PAGE_METADATA,
> > +                           (long)-1 * (PAGE_ALIGN(end - start) >>
> PAGE_SHIFT));
> >       vmemmap_free(start, end, altmap);
> >  }
> >  static void free_map_bootmem(struct page *memmap)
> > diff --git a/mm/vmstat.c b/mm/vmstat.c
> > index 00e81e99c6ee..731eb5264b49 100644
> > --- a/mm/vmstat.c
> > +++ b/mm/vmstat.c
> > @@ -1245,6 +1245,7 @@ const char * const vmstat_text[] = {
> >       "pgpromote_success",
> >       "pgpromote_candidate",
> >  #endif
> > +     "nr_page_metadata",
> >
> >       /* enum writeback_stat_item counters */
> >       "nr_dirty_threshold",
> > @@ -2274,4 +2275,24 @@ static int __init extfrag_debug_init(void)
> >  }
> >
> >  module_init(extfrag_debug_init);
> > +
> > +// Page metadata size (struct page and page_ext) in pages
> > +unsigned long early_perpage_metadata[MAX_NUMNODES] __initdata;
>
> static?
>

Thanks for pointing this out. I shall make  __initdata static in the next
version of the patch.

[-- Attachment #2: Type: text/html, Size: 12433 bytes --]

  parent reply	other threads:[~2023-09-14 22:41 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-13 17:29 [PATCH v1 0/1] Report perpage metadata information Sourav Panda
2023-09-13 17:30 ` [PATCH v1 1/1] mm: report per-page " Sourav Panda
2023-09-13 17:56   ` Matthew Wilcox
2023-09-14 21:04     ` Sourav Panda
2023-09-13 19:34   ` kernel test robot
2023-09-13 20:51   ` Mike Rapoport
2023-09-14 12:47     ` Matthew Wilcox
2023-09-14 22:45       ` Sourav Panda
2023-09-14 22:41     ` Sourav Panda [this message]
2023-09-13 21:53   ` kernel test robot
2023-09-14 13:00   ` David Hildenbrand
2023-09-14 22:47     ` Sourav Panda
2023-09-18  8:14   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CANruzcSCFHRgB7oCoZRPOerR=FH3PqvoW0ea+v-p=y+sb-c=kA@mail.gmail.com' \
    --to=souravpanda@google.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=adobriyan@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=bhelgaas@google.com \
    --cc=chenlinxuan@uniontech.com \
    --cc=corbet@lwn.net \
    --cc=david@redhat.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=ivan@cloudflare.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mike.kravetz@oracle.com \
    --cc=muchun.song@linux.dev \
    --cc=pasha.tatashin@soleen.com \
    --cc=rafael@kernel.org \
    --cc=rdunlap@infradead.org \
    --cc=rppt@kernel.org \
    --cc=shakeelb@google.com \
    --cc=surenb@google.com \
    --cc=tomas.mudrunka@gmail.com \
    --cc=vbabka@suse.cz \
    --cc=wangkefeng.wang@huawei.com \
    --cc=yang.yang29@zte.com.cn \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.