Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in arch/x86/include/asm/unistd_64.h between commit fce8dc06423d ("x86-64: Wire up getcpu syscall") from Linus' tree and commit ("Add x86_64 specific wire up") from the akpm tree. I have fixed it up (see below) and can carry the fix as necessary. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index d92641c..f9fff6a 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -683,6 +683,10 @@ __SYSCALL(__NR_sendmmsg, sys_sendmmsg) __SYSCALL(__NR_setns, sys_setns) #define __NR_getcpu 309 __SYSCALL(__NR_getcpu, sys_getcpu) +#define __NR_process_vm_readv 310 +__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv) +#define __NR_process_vm_writev 311 +__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR
Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in mm/memcontrol.c between commit 185efc0f9a1f ("memcg: Revert "memcg: add memory.vmscan_stat"") from Linus' tree and commit 970f23b7f013 ("The memcg code sometimes uses "struct mem_cgroup *mem" and sometimes uses") from the akpm tree. I fixed it up (I think - see below) and can carry the fix as necessary. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc mm/memcontrol.c index 1364b5e,e47a504..0000000 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@@ -202,9 -202,53 +202,9 @@@ struct mem_cgroup_eventfd_list struct eventfd_ctx *eventfd; }; - static void mem_cgroup_threshold(struct mem_cgroup *mem); - static void mem_cgroup_oom_notify(struct mem_cgroup *mem); + static void mem_cgroup_threshold(struct mem_cgroup *memcg); + static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); -enum { - SCAN_BY_LIMIT, - SCAN_BY_SYSTEM, - NR_SCAN_CONTEXT, - SCAN_BY_SHRINK, /* not recorded now */ -}; - -enum { - SCAN, - SCAN_ANON, - SCAN_FILE, - ROTATE, - ROTATE_ANON, - ROTATE_FILE, - FREED, - FREED_ANON, - FREED_FILE, - ELAPSED, - NR_SCANSTATS, -}; - -struct scanstat { - spinlock_t lock; - unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS]; - unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS]; -}; - -const char *scanstat_string[NR_SCANSTATS] = { - "scanned_pages", - "scanned_anon_pages", - "scanned_file_pages", - "rotated_pages", - "rotated_anon_pages", - "rotated_file_pages", - "freed_pages", - "freed_anon_pages", - "freed_file_pages", - "elapsed_ns", -}; -#define SCANSTAT_WORD_LIMIT "_by_limit" -#define SCANSTAT_WORD_SYSTEM "_by_system" -#define SCANSTAT_WORD_HIERARCHY "_under_hierarchy" - - /* * The memory controller data structure. The memory controller controls both * page cache and RSS per cgroup. We would eventually like to provide @@@ -1659,18 -1745,28 +1662,18 @@@ static int mem_cgroup_hierarchical_recl bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; - struct memcg_scanrecord rec; unsigned long excess; - unsigned long scanned; + unsigned long nr_scanned; - excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; + excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; /* If memsw_is_minimum==1, swap-out is of-no-use. */ - if (!check_soft && !shrink && root_mem->memsw_is_minimum) + if (!check_soft && !shrink && root_memcg->memsw_is_minimum) noswap = true; - if (shrink) - rec.context = SCAN_BY_SHRINK; - else if (check_soft) - rec.context = SCAN_BY_SYSTEM; - else - rec.context = SCAN_BY_LIMIT; - - rec.root = root_memcg; - while (1) { - victim = mem_cgroup_select_victim(root_mem); - if (victim == root_mem) { + victim = mem_cgroup_select_victim(root_memcg); + if (victim == root_memcg) { loop++; /* * We are not draining per cpu cached charges during @@@ -3753,15 -3858,19 +3756,15 @@@ try_to_free lru_add_drain_all(); /* try to free all pages in this cgroup */ shrink = 1; - while (nr_retries && mem->res.usage > 0) { + while (nr_retries && memcg->res.usage > 0) { - struct memcg_scanrecord rec; int progress; if (signal_pending(current)) { ret = -EINTR; goto out; } - progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, - false); - rec.context = SCAN_BY_SHRINK; - rec.mem = memcg; - rec.root = memcg; + progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, + false, &rec); if (!progress) { nr_retries--; /* maybe some writeback is necessary */ @@@ -4926,21 -5091,22 +4929,21 @@@ mem_cgroup_create(struct cgroup_subsys */ mem_cgroup_get(parent); } else { - res_counter_init(&mem->res, NULL); - res_counter_init(&mem->memsw, NULL); + res_counter_init(&memcg->res, NULL); + res_counter_init(&memcg->memsw, NULL); } - mem->last_scanned_child = 0; - mem->last_scanned_node = MAX_NUMNODES; - INIT_LIST_HEAD(&mem->oom_notify); + memcg->last_scanned_child = 0; + memcg->last_scanned_node = MAX_NUMNODES; + INIT_LIST_HEAD(&memcg->oom_notify); if (parent) - mem->swappiness = mem_cgroup_swappiness(parent); - atomic_set(&mem->refcnt, 1); - mem->move_charge_at_immigrate = 0; - mutex_init(&mem->thresholds_lock); - return &mem->css; + memcg->swappiness = mem_cgroup_swappiness(parent); + atomic_set(&memcg->refcnt, 1); + memcg->move_charge_at_immigrate = 0; + mutex_init(&memcg->thresholds_lock); - spin_lock_init(&memcg->scanstat.lock); + return &memcg->css; free_out: - __mem_cgroup_free(mem); + __mem_cgroup_free(memcg); root_mem_cgroup = NULL; return ERR_PTR(error); }
[-- Attachment #1: Type: text/plain, Size: 2163 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/direct-io.c between commit eb28be2b4c0a ("direct-io: separate fields only used in the submission path from struct dio") from Linus' tree and commit "fs/direct-io.c: salcuate fs_count correctly in get_more_blocks()" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc fs/direct-io.c index d740ab6,b05f24e..0000000 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@@ -575,14 -564,13 +575,13 @@@ static inline int dio_bio_reap(struct d * buffer_mapped(). However the direct-io code will only process holes one * block at a time - it will repeatedly call get_block() as it walks the hole. */ -static int get_more_blocks(struct dio *dio) +static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, + struct buffer_head *map_bh) { int ret; - struct buffer_head *map_bh = &dio->map_bh; sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ + sector_t fs_endblk; /* Into file, in filesystem-sized blocks */ unsigned long fs_count; /* Number of filesystem-sized blocks */ - unsigned long dio_count;/* Number of dio_block-sized blocks */ - unsigned long blkmask; int create; /* @@@ -591,13 -579,10 +590,10 @@@ */ ret = dio->page_errors; if (ret == 0) { - BUG_ON(dio->block_in_file >= dio->final_block_in_request); - fs_startblk = dio->block_in_file >> dio->blkfactor; - fs_endblk = (dio->final_block_in_request - 1) >> dio->blkfactor; + BUG_ON(sdio->block_in_file >= sdio->final_block_in_request); + fs_startblk = sdio->block_in_file >> sdio->blkfactor; - dio_count = sdio->final_block_in_request - sdio->block_in_file; - fs_count = dio_count >> sdio->blkfactor; - blkmask = (1 << sdio->blkfactor) - 1; - if (dio_count & blkmask) - fs_count++; ++ fs_endblk = (sdio->final_block_in_request - 1) >> sdio->blkfactor; + fs_count = fs_endblk - fs_startblk + 1; map_bh->b_state = 0; map_bh->b_size = fs_count << dio->inode->i_blkbits; [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
Hi Stephen,
Thanks for the merge.
On 11/01/2011 04:16 PM, Stephen Rothwell wrote:
> Hi Andrew,
>
> Today's linux-next merge of the akpm tree got a conflict in
> fs/direct-io.c between commit eb28be2b4c0a ("direct-io: separate fields
> only used in the submission path from struct dio") from Linus' tree and
> commit "fs/direct-io.c: salcuate fs_count correctly in get_more_blocks()"
> from the akpm tree.
Actually I have another patch which is rebased to current linus' tree.
So Andrew, would you mind replace the old one with the patch named
"[PATCH for 3.2] fs/direct-io.c: Calculate fs_count correctly in
get_more_blocks."
Thanks
Tao
[-- Attachment #1: Type: text/plain, Size: 2274 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in drivers/idle/intel_idle.c between commit 46bcfad7a819 ("cpuidle: Single/Global registration of idle states") from Linus' tree and commit "intel_idle: fix API misuse" from the akpm tree. The former moved the code modified by the latter. I fixed it up (see below0 and can carry the fix as necessary. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc drivers/idle/intel_idle.c index 5d2f8e1,0a53882..0000000 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@@ -424,60 -398,6 +424,60 @@@ static void intel_idle_cpuidle_devices_ return; } /* + * intel_idle_cpuidle_driver_init() + * allocate, initialize cpuidle_states + */ +static int intel_idle_cpuidle_driver_init(void) +{ + int cstate; + struct cpuidle_driver *drv = &intel_idle_driver; + + drv->state_count = 1; + + for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { + int num_substates; + + if (cstate > max_cstate) { + printk(PREFIX "max_cstate %d reached\n", + max_cstate); + break; + } + + /* does the state exist in CPUID.MWAIT? */ + num_substates = (mwait_substates >> ((cstate) * 4)) + & MWAIT_SUBSTATE_MASK; + if (num_substates == 0) + continue; + /* is the state not enabled? */ + if (cpuidle_state_table[cstate].enter == NULL) { + /* does the driver not know about the state? */ + if (*cpuidle_state_table[cstate].name == '\0') + pr_debug(PREFIX "unaware of model 0x%x" + " MWAIT %d please" + " contact lenb@kernel.org", + boot_cpu_data.x86_model, cstate); + continue; + } + + if ((cstate > 2) && + !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + mark_tsc_unstable("TSC halts in idle" + " states deeper than C2"); + + drv->states[drv->state_count] = /* structure copy */ + cpuidle_state_table[cstate]; + + drv->state_count += 1; + } + + if (auto_demotion_disable_flags) - smp_call_function(auto_demotion_disable, NULL, 1); ++ on_each_cpu(auto_demotion_disable, NULL, 1); + + return 0; +} + + +/* * intel_idle_cpuidle_devices_init() * allocate, initialize, register cpuidle_devices */ [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 674 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in include/linux/lglock.h between commit e30e2fdfe562 ("VFS: Fix race between CPU hotplug and lglocks") from Linus' tree and commit "brlocks/lglocks: clean up code" from the akpm tree. Given the extent of the conflict and that this patch also conflicts in several other files (fs/dcache.c, fs/file_table.c, fs/namei.c, fs/namespace.c - presumably against the vfs tree), I have dropped this patch (and the following "brlocks-lglocks-clean-up-code-checkpatch-fixes") from the akpm tree. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au http://www.canb.auug.org.au/~sfr/ [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 465 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in arch/x86/mm/hugetlbpage.c between commit 097d59106a8e ("vm: avoid using find_vma_prev() unnecessarily") from Linus' tree and commit "hugetlb: drop prev_vma in hugetlb_get_unmapped_area_topdown()" from the akpm tree. I think the latter is a superset of the former, so I just dropped the changes from the former. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
On Thu, 8 Mar 2012 17:53:07 +1100 Stephen Rothwell <sfr@canb.auug.org.au> wrote: > Hi Andrew, > > Today's linux-next merge of the akpm tree got a conflict in > arch/x86/mm/hugetlbpage.c between commit 097d59106a8e ("vm: avoid using > find_vma_prev() unnecessarily") from Linus' tree and commit "hugetlb: > drop prev_vma in hugetlb_get_unmapped_area_topdown()" from the akpm tree. > > I think the latter is a superset of the former, so I just dropped the > changes from the former. Actually they're different. I reworked the earlier patch as below. From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Subject: hugetlb: drop prev_vma in hugetlb_get_unmapped_area_topdown() After the call find_vma_prev(mm, addr, &prev_vma), the following condition is always true: !prev_vma || (addr >= prev_vma->vm_end) it can happily drop prev_vma and use find_vma() instead of find_vma_prev(). Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: Hillf Danton <dhillf@gmail.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> --- arch/x86/mm/hugetlbpage.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff -puN arch/x86/mm/hugetlbpage.c~hugetlb-drop-prev_vma-in-hugetlb_get_unmapped_area_topdown arch/x86/mm/hugetlbpage.c --- a/arch/x86/mm/hugetlbpage.c~hugetlb-drop-prev_vma-in-hugetlb_get_unmapped_area_topdown +++ a/arch/x86/mm/hugetlbpage.c @@ -308,7 +308,7 @@ static unsigned long hugetlb_get_unmappe { struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *prev_vma; + struct vm_area_struct *vma; unsigned long base = mm->mmap_base, addr = addr0; unsigned long largest_hole = mm->cached_hole_size; int first_time = 1; @@ -334,25 +334,16 @@ try_again: * i.e. return with success: */ vma = find_vma(mm, addr); - if (!vma) - return addr; - - /* - * new region fits between prev_vma->vm_end and - * vma->vm_start, use it: - */ - prev_vma = vma->vm_prev; - if (addr + len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) { + if (vma) + prev_vma = vma->vm_prev; + if (!vma || addr + len <= vma->vm_start) { /* remember the address as a hint for next time */ mm->cached_hole_size = largest_hole; return (mm->free_area_cache = addr); - } else { + } else if (mm->free_area_cache == vma->vm_end) { /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) { - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } + mm->free_area_cache = vma->vm_start; + mm->cached_hole_size = largest_hole; } /* remember the largest hole we saw so far */ _
[-- Attachment #1: Type: text/plain, Size: 2073 bytes --] Hi Andrew, On Wed, 7 Mar 2012 23:32:20 -0800 Andrew Morton <akpm@linux-foundation.org> wrote: > > Actually they're different. I reworked the earlier patch as below. OK. But some comments. > diff -puN arch/x86/mm/hugetlbpage.c~hugetlb-drop-prev_vma-in-hugetlb_get_unmapped_area_topdown arch/x86/mm/hugetlbpage.c > --- a/arch/x86/mm/hugetlbpage.c~hugetlb-drop-prev_vma-in-hugetlb_get_unmapped_area_topdown > +++ a/arch/x86/mm/hugetlbpage.c > @@ -308,7 +308,7 @@ static unsigned long hugetlb_get_unmappe > { > struct hstate *h = hstate_file(file); > struct mm_struct *mm = current->mm; > - struct vm_area_struct *vma, *prev_vma; > + struct vm_area_struct *vma; You still remove prev_vma ... > unsigned long base = mm->mmap_base, addr = addr0; > unsigned long largest_hole = mm->cached_hole_size; > int first_time = 1; > @@ -334,25 +334,16 @@ try_again: > * i.e. return with success: > */ > vma = find_vma(mm, addr); > - if (!vma) > - return addr; > - > - /* > - * new region fits between prev_vma->vm_end and > - * vma->vm_start, use it: > - */ > - prev_vma = vma->vm_prev; > - if (addr + len <= vma->vm_start && > - (!prev_vma || (addr >= prev_vma->vm_end))) { > + if (vma) > + prev_vma = vma->vm_prev; But then assign to it ... > + if (!vma || addr + len <= vma->vm_start) { > /* remember the address as a hint for next time */ > mm->cached_hole_size = largest_hole; > return (mm->free_area_cache = addr); > - } else { > + } else if (mm->free_area_cache == vma->vm_end) { > /* pull free_area_cache down to the first hole */ > - if (mm->free_area_cache == vma->vm_end) { > - mm->free_area_cache = vma->vm_start; > - mm->cached_hole_size = largest_hole; > - } > + mm->free_area_cache = vma->vm_start; > + mm->cached_hole_size = largest_hole; > } > > /* remember the largest hole we saw so far */ But it never gets used. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
On Thu, 8 Mar 2012 18:41:05 +1100 Stephen Rothwell <sfr@canb.auug.org.au> wrote:
> Hi Andrew,
>
> On Wed, 7 Mar 2012 23:32:20 -0800 Andrew Morton <akpm@linux-foundation.org> wrote:
> >
> > Actually they're different. I reworked the earlier patch as below.
>
> OK. But some comments.
>
> > diff -puN arch/x86/mm/hugetlbpage.c~hugetlb-drop-prev_vma-in-hugetlb_get_unmapped_area_topdown arch/x86/mm/hugetlbpage.c
> > --- a/arch/x86/mm/hugetlbpage.c~hugetlb-drop-prev_vma-in-hugetlb_get_unmapped_area_topdown
> > +++ a/arch/x86/mm/hugetlbpage.c
> > @@ -308,7 +308,7 @@ static unsigned long hugetlb_get_unmappe
> > {
> > struct hstate *h = hstate_file(file);
> > struct mm_struct *mm = current->mm;
> > - struct vm_area_struct *vma, *prev_vma;
> > + struct vm_area_struct *vma;
>
> You still remove prev_vma ...
>
> > unsigned long base = mm->mmap_base, addr = addr0;
> > unsigned long largest_hole = mm->cached_hole_size;
> > int first_time = 1;
> > @@ -334,25 +334,16 @@ try_again:
> > * i.e. return with success:
> > */
> > vma = find_vma(mm, addr);
> > - if (!vma)
> > - return addr;
> > -
> > - /*
> > - * new region fits between prev_vma->vm_end and
> > - * vma->vm_start, use it:
> > - */
> > - prev_vma = vma->vm_prev;
> > - if (addr + len <= vma->vm_start &&
> > - (!prev_vma || (addr >= prev_vma->vm_end))) {
> > + if (vma)
> > + prev_vma = vma->vm_prev;
>
> But then assign to it ...
>
> > + if (!vma || addr + len <= vma->vm_start) {
> > /* remember the address as a hint for next time */
> > mm->cached_hole_size = largest_hole;
> > return (mm->free_area_cache = addr);
> > - } else {
> > + } else if (mm->free_area_cache == vma->vm_end) {
> > /* pull free_area_cache down to the first hole */
> > - if (mm->free_area_cache == vma->vm_end) {
> > - mm->free_area_cache = vma->vm_start;
> > - mm->cached_hole_size = largest_hole;
> > - }
> > + mm->free_area_cache = vma->vm_start;
> > + mm->cached_hole_size = largest_hole;
> > }
> >
> > /* remember the largest hole we saw so far */
>
> But it never gets used.
Doh. The author reviewed that for me too!
I'll drop it - please rework, retest and resend?
On 03/08/2012 03:50 PM, Andrew Morton wrote:
> On Thu, 8 Mar 2012 18:41:05 +1100 Stephen Rothwell <sfr@canb.auug.org.au> wrote:
>
>> Hi Andrew,
>>
>> On Wed, 7 Mar 2012 23:32:20 -0800 Andrew Morton <akpm@linux-foundation.org> wrote:
>>>
>>> Actually they're different. I reworked the earlier patch as below.
>>
>> OK. But some comments.
>>
>>> diff -puN arch/x86/mm/hugetlbpage.c~hugetlb-drop-prev_vma-in-hugetlb_get_unmapped_area_topdown arch/x86/mm/hugetlbpage.c
>>> --- a/arch/x86/mm/hugetlbpage.c~hugetlb-drop-prev_vma-in-hugetlb_get_unmapped_area_topdown
>>> +++ a/arch/x86/mm/hugetlbpage.c
>>> @@ -308,7 +308,7 @@ static unsigned long hugetlb_get_unmappe
>>> {
>>> struct hstate *h = hstate_file(file);
>>> struct mm_struct *mm = current->mm;
>>> - struct vm_area_struct *vma, *prev_vma;
>>> + struct vm_area_struct *vma;
>>
>> You still remove prev_vma ...
>>
>>> unsigned long base = mm->mmap_base, addr = addr0;
>>> unsigned long largest_hole = mm->cached_hole_size;
>>> int first_time = 1;
>>> @@ -334,25 +334,16 @@ try_again:
>>> * i.e. return with success:
>>> */
>>> vma = find_vma(mm, addr);
>>> - if (!vma)
>>> - return addr;
>>> -
>>> - /*
>>> - * new region fits between prev_vma->vm_end and
>>> - * vma->vm_start, use it:
>>> - */
>>> - prev_vma = vma->vm_prev;
>>> - if (addr + len <= vma->vm_start &&
>>> - (!prev_vma || (addr >= prev_vma->vm_end))) {
>>> + if (vma)
>>> + prev_vma = vma->vm_prev;
>>
>> But then assign to it ...
>>
>>> + if (!vma || addr + len <= vma->vm_start) {
>>> /* remember the address as a hint for next time */
>>> mm->cached_hole_size = largest_hole;
>>> return (mm->free_area_cache = addr);
>>> - } else {
>>> + } else if (mm->free_area_cache == vma->vm_end) {
>>> /* pull free_area_cache down to the first hole */
>>> - if (mm->free_area_cache == vma->vm_end) {
>>> - mm->free_area_cache = vma->vm_start;
>>> - mm->cached_hole_size = largest_hole;
>>> - }
>>> + mm->free_area_cache = vma->vm_start;
>>> + mm->cached_hole_size = largest_hole;
>>> }
>>>
>>> /* remember the largest hole we saw so far */
>>
>> But it never gets used.
>
> Doh. The author reviewed that for me too!
>
> I'll drop it - please rework, retest and resend?
I am sorry, i will repost it!
On 03/08/2012 03:50 PM, Andrew Morton wrote: > Doh. The author reviewed that for me too! > > I'll drop it - please rework, retest and resend? > Andrew, the patch has been reworked. Subject: [PATCH] hugetlb: drop prev_vma in hugetlb_get_unmapped_area_topdown() After the call find_vma_prev(mm, addr, &prev_vma), the following condition is always true: !prev_vma || (addr >= prev_vma->vm_end) it can happily drop prev_vma and use find_vma() instead of find_vma_prev(). Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> --- arch/x86/mm/hugetlbpage.c | 18 +++++------------- 1 files changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 8ecbb4b..0c1701b 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -308,7 +308,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, { struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *prev_vma; + struct vm_area_struct *vma; unsigned long base = mm->mmap_base, addr = addr0; unsigned long largest_hole = mm->cached_hole_size; int first_time = 1; @@ -337,22 +337,14 @@ try_again: if (!vma) return addr; - /* - * new region fits between prev_vma->vm_end and - * vma->vm_start, use it: - */ - prev_vma = vma->vm_prev; - if (addr + len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) { + if (addr + len <= vma->vm_start) { /* remember the address as a hint for next time */ mm->cached_hole_size = largest_hole; return (mm->free_area_cache = addr); - } else { + } else if (mm->free_area_cache == vma->vm_end) { /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) { - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } + mm->free_area_cache = vma->vm_start; + mm->cached_hole_size = largest_hole; } /* remember the largest hole we saw so far */ -- 1.7.7.6
On Thu, 08 Mar 2012 17:59:02 +0800 Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> wrote: > Andrew, the patch has been reworked. > > Subject: [PATCH] hugetlb: drop prev_vma in hugetlb_get_unmapped_area_topdown() > > After the call find_vma_prev(mm, addr, &prev_vma), the following condition > is always true: > > !prev_vma || (addr >= prev_vma->vm_end) > > it can happily drop prev_vma and use find_vma() instead of find_vma_prev(). > Well OK, but the changelog is wrong - we don't call find_vma_prev() here any more. How does this look? From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Subject: hugetlb: remove prev_vma from hugetlb_get_unmapped_area_topdown() After looking up the vma which covers or follows the cached search address, the following condition is always true: !prev_vma || (addr >= prev_vma->vm_end) so we can stop checking the previous VMA altogether. Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> --- arch/x86/mm/hugetlbpage.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff -puN arch/x86/mm/hugetlbpage.c~hugetlb-drop-prev_vma-in-hugetlb_get_unmapped_area_topdown arch/x86/mm/hugetlbpage.c --- a/arch/x86/mm/hugetlbpage.c~hugetlb-drop-prev_vma-in-hugetlb_get_unmapped_area_topdown +++ a/arch/x86/mm/hugetlbpage.c @@ -308,7 +308,7 @@ static unsigned long hugetlb_get_unmappe { struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *prev_vma; + struct vm_area_struct *vma; unsigned long base = mm->mmap_base; unsigned long addr = addr0; unsigned long largest_hole = mm->cached_hole_size; @@ -340,22 +340,14 @@ try_again: if (!vma) return addr; - /* - * new region fits between prev_vma->vm_end and - * vma->vm_start, use it: - */ - prev_vma = vma->vm_prev; - if (addr + len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) { + if (addr + len <= vma->vm_start) { /* remember the address as a hint for next time */ mm->cached_hole_size = largest_hole; return (mm->free_area_cache = addr); - } else { + } else if (mm->free_area_cache == vma->vm_end) { /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) { - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } + mm->free_area_cache = vma->vm_start; + mm->cached_hole_size = largest_hole; } /* remember the largest hole we saw so far */ _
On Thu, Mar 8, 2012 at 1:24 PM, Andrew Morton <akpm@linux-foundation.org> wrote:
>
> How does this look?
Looks good to me.
Linus
[-- Attachment #1: Type: text/plain, Size: 435 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/proc/base.c between commit eb94cd96e05d ("fs, proc: fix ABBA deadlock in case of execution attempt of map_files/ entries") from Linus' tree and commit "proc: unify ptrace_may_access() locking code" from the akpm tree. I used the version of the conflicting code from Linus' tree. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
On Mon, May 21, 2012 at 06:13:47PM +1000, Stephen Rothwell wrote:
> Hi Andrew,
>
> Today's linux-next merge of the akpm tree got a conflict in
> fs/proc/base.c between commit eb94cd96e05d ("fs, proc: fix ABBA deadlock
> in case of execution attempt of map_files/ entries") from Linus' tree and
> commit "proc: unify ptrace_may_access() locking code" from the akpm tree.
>
> I used the version of the conflicting code from Linus' tree.
Hi Stephen, gimme some time to take a look on...
Cyrill
[-- Attachment #1: Type: text/plain, Size: 1004 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/proc/base.c between commit 30a08bf2d31d ("proc: move fd symlink i_mode calculations into tid_fd_revalidate()") from the tree and commit "proc: pass "fd" by value in /proc/*/{fd,fdinfo} code" from the akpm tree. Just context changes. I fixed it up (see below) and can carry the fix as necessary. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc fs/proc/base.c index 2308157,c560fc8..0000000 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@@ -1847,7 -1835,9 +1847,7 @@@ static const struct dentry_operations t static struct dentry *proc_fd_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { - unsigned fd = *(const unsigned *)ptr; + unsigned fd = (unsigned long)ptr; - struct file *file; - struct files_struct *files; struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-ENOENT); [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 392 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in mm/memcontrol.c between commit 78ccf5b5ab83 ("mm: memcg: print statistics directly to seq_file") and others from Linus' tree and commit "memcg: add mlock statistic in memory.stat" from the akpm tree. I just dropped the akpm patch for now. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 877 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in mm/swap.c between commit fa9add641b1b ("mm/memcg: apply add/del_page to lruvec") from Linus' tree and commit "mm/huge_memory.c: use lockdep_assert_held()" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc mm/swap.c index 4e7e2ec,8ff73d8..0000000 --- a/mm/swap.c +++ b/mm/swap.c @@@ -685,8 -653,7 +685,7 @@@ void lru_add_page_tail(struct page *pag VM_BUG_ON(!PageHead(page)); VM_BUG_ON(PageCompound(page_tail)); VM_BUG_ON(PageLRU(page_tail)); - VM_BUG_ON(NR_CPUS != 1 && - !spin_is_locked(&lruvec_zone(lruvec)->lru_lock)); - lockdep_assert_held(&zone->lru_lock); ++ lockdep_assert_held(&lruvec_zone(lruvec)->lru_lock); SetPageLRU(page_tail); [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
On Thu, May 31, 2012 at 02:13:29PM +1000, Stephen Rothwell wrote:
> Hi Andrew,
>
> Today's linux-next merge of the akpm tree got a conflict in
> mm/memcontrol.c between commit 78ccf5b5ab83 ("mm: memcg: print statistics
> directly to seq_file") and others from Linus' tree and commit "memcg: add
> mlock statistic in memory.stat" from the akpm tree.
>
> I just dropped the akpm patch for now.
Thanks, Stephen.
However, the patch is known to be broken and, according to mm-commits,
was removed from -mm on May 16 already. How'd it reappear?
[-- Attachment #1: Type: text/plain, Size: 999 bytes --] Hi Johannes, On Thu, 31 May 2012 09:25:05 +0200 Johannes Weiner <hannes@cmpxchg.org> wrote: > > On Thu, May 31, 2012 at 02:13:29PM +1000, Stephen Rothwell wrote: > > > > Today's linux-next merge of the akpm tree got a conflict in > > mm/memcontrol.c between commit 78ccf5b5ab83 ("mm: memcg: print statistics > > directly to seq_file") and others from Linus' tree and commit "memcg: add > > mlock statistic in memory.stat" from the akpm tree. > > > > I just dropped the akpm patch for now. > > Thanks, Stephen. > > However, the patch is known to be broken and, according to mm-commits, > was removed from -mm on May 16 already. How'd it reappear? Andrew only send me a new set of patches at irregular intervals, in this case the last set arrived on May 10/11. I just keep rebasing the patches until Andrew sends me a new set (Andrew: big hint :-)). I will remove that patch tomorrow in any case. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 706 bytes --] On Thu, 31 May 2012 18:24:51 +1000 Stephen Rothwell <sfr@canb.auug.org.au> wrote: > > > On Thu, May 31, 2012 at 02:13:29PM +1000, Stephen Rothwell wrote: > > > > > > Today's linux-next merge of the akpm tree got a conflict in > > > mm/memcontrol.c between commit 78ccf5b5ab83 ("mm: memcg: print statistics > > > directly to seq_file") and others from Linus' tree and commit "memcg: add > > > mlock statistic in memory.stat" from the akpm tree. > > > > > > I just dropped the akpm patch for now. ... > I will remove that patch tomorrow in any case. I should actually read my email - I already did remove it. :-) -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 1127 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in kernel/fork.c between commit touching paths: ("fork: call complete_vfork_done() after clearing child_tid and flushing rss-counters") from Linus' tree and commit "mm: correctly synchronize rss-counters at exit/exec" from the akpm tree. Just context changes. I fixed it up (see below) and can carry the fix as necessary. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc kernel/fork.c index 095a951,30e9ee8..0000000 --- a/kernel/fork.c +++ b/kernel/fork.c @@@ -808,11 -811,12 +808,19 @@@ void mm_release(struct task_struct *tsk } /* + * Final rss-counter synchronization. After this point there must be + * no pagefaults into this mm from the current context. Otherwise + * mm->rss_stat will be inconsistent. + */ + if (mm) + sync_mm_rss(mm); ++ ++ /* + * All done, finally we can wake up parent and return this mm to him. + * Also kthread_stop() uses this completion for synchronization. + */ + if (tsk->vfork_done) + complete_vfork_done(tsk); } /* [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 954 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in kernel/printk.c between commit 084681d14e42 ("printk: flush continuation lines immediately to console") from Linus' tree and commit "printk: add generic functions to find KERN_<LEVEL> headers" from the akpm tree. Just context changes. I fixed it up (see below) and can carry the fix as necessary. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc kernel/printk.c index a6f19ef,f591849..0000000 --- a/kernel/printk.c +++ b/kernel/printk.c @@@ -1399,6 -1301,11 +1399,7 @@@ asmlinkage int vprintk_emit(int facilit const char *fmt, va_list args) { static int recursion_bug; - static char cont_buf[LOG_LINE_MAX]; - static size_t cont_len; - static int cont_level; + int kern_level; - static struct task_struct *cont_task; static char textbuf[LOG_LINE_MAX]; char *text = textbuf; size_t text_len; [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 1055 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/btrfs/relocation.c between commit 23291a044c31 ("Btrfs: fix error handling in __add_reloc_root()") from Linus' tree and commit "btrfs: use printk_get_level and printk_skip_level, add __printf, fix fallout" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc fs/btrfs/relocation.c index c5dbd91,790f492..0000000 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@@ -1239,11 -1239,10 +1239,11 @@@ static int __must_check __add_reloc_roo node->bytenr, &node->rb_node); spin_unlock(&rc->reloc_root_tree.lock); if (rb_node) { - kfree(node); btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " "for start=%llu while inserting into relocation " - "tree\n"); + "tree\n", node->bytenr); + kfree(node); + return -EEXIST; } list_add_tail(&root->root_list, &rc->reloc_roots); [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 1259 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in mm/page_alloc.c between commit c67fe3752abe ("mm: compaction: Abort async compaction if locks are contended or taking too long") from Linus' tree and commit "mm: remove __GFP_NO_KSWAPD" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc mm/page_alloc.c index 5b3cc33,cefac39..0000000 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@@ -2436,11 -2429,10 +2435,10 @@@ rebalance /* * If compaction is deferred for high-order allocations, it is because * sync compaction recently failed. In this is the case and the caller - * has requested the system not be heavily disrupted, fail the - * allocation now instead of entering direct reclaim + * requested a movable allocation that does not heavily disrupt the + * system then fail the allocation instead of entering direct reclaim. */ - if ((deferred_compaction || contended_compaction) && - (gfp_mask & __GFP_NO_KSWAPD)) - if (deferred_compaction) ++ if (deferred_compaction || contended_compaction) goto nopage; /* Try direct reclaim and then allocating */ [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
On Wed, Aug 22, 2012 at 03:59:41PM +1000, Stephen Rothwell wrote: > Hi Andrew, > > Today's linux-next merge of the akpm tree got a conflict in > mm/page_alloc.c between commit c67fe3752abe ("mm: compaction: Abort async > compaction if locks are contended or taking too long") from Linus' tree > and commit "mm: remove __GFP_NO_KSWAPD" from the akpm tree. > > I fixed it up (see below) and can carry the fix as necessary. Hi Stephen, I think this will be a temporary problem. In the patch series that contained "mm: compaction: Abort async compaction if locks are contended or taking too long" there is a replacement patch for "mm: remove __GFP_NO_KSWAPD" that handles the conflict. According to the mm-commits mailing list the replacement patch has been picked up by Andrew but I see the old versions in linux-next/akpm. It should get resolved the next time Andrews tree gets pushed out. > diff --cc mm/page_alloc.c > index 5b3cc33,cefac39..0000000 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@@ -2436,11 -2429,10 +2435,10 @@@ rebalance > /* > * If compaction is deferred for high-order allocations, it is because > * sync compaction recently failed. In this is the case and the caller > - * has requested the system not be heavily disrupted, fail the > - * allocation now instead of entering direct reclaim > + * requested a movable allocation that does not heavily disrupt the > + * system then fail the allocation instead of entering direct reclaim. > */ > - if ((deferred_compaction || contended_compaction) && > - (gfp_mask & __GFP_NO_KSWAPD)) > - if (deferred_compaction) > ++ if (deferred_compaction || contended_compaction) > goto nopage; > > /* Try direct reclaim and then allocating */ It is not obvious at all but the correct resolution is actually the following. Andrew should already have the right version. @@ -2437,7 +2436,7 @@ rebalance: * system then fail the allocation instead of entering direct reclaim. */ if ((deferred_compaction || contended_compaction) && - (gfp_mask & __GFP_NO_KSWAPD)) + (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) goto nopage; /* Try direct reclaim and then allocating */ -- Mel Gorman SUSE Labs
[-- Attachment #1: Type: text/plain, Size: 1057 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in drivers/mtd/mtdchar.c between commit 9c603e53d380 ("mtdchar: fix offset overflow detection") from Linus' tree and commit "mm: kill vma flag VM_RESERVED and mm->reserved_vm counter" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc drivers/mtd/mtdchar.c index a6e7451,c4e01c5..0000000 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@@ -1177,12 -1145,8 +1177,12 @@@ static int mtdchar_mmap(struct file *fi return -EINVAL; off += start; - vma->vm_pgoff = off >> PAGE_SHIFT; + /* Did that overflow? */ + if (off < start) + return -EINVAL; + if (set_vm_offset(vma, off) < 0) + return -EINVAL; - vma->vm_flags |= VM_IO | VM_RESERVED; + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; #ifdef pgprot_noncached if (file->f_flags & O_DSYNC || off >= __pa(high_memory)) [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 918 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in arch/arm64/include/asm/unistd32.h between commit f3d447a97f24 ("arm64: Do not include asm/unistd32.h in asm/unistd.h") from Linus' tree and commit "compat: generic compat_sys_sched_rr_get_interval implementation" from the akpm tree. I fixed it up (I think - see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 63f853f..0f13ca8 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -25,5 +25,6 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_COMPAT_SYS_SENDFILE +#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL #endif #include <uapi/asm/unistd.h> [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
Hi Stephen,
On Mon, Oct 15, 2012 at 03:07:28AM +0100, Stephen Rothwell wrote:
> Today's linux-next merge of the akpm tree got a conflict in
> arch/arm64/include/asm/unistd32.h between commit f3d447a97f24 ("arm64: Do
> not include asm/unistd32.h in asm/unistd.h") from Linus' tree and commit
> "compat: generic compat_sys_sched_rr_get_interval implementation" from
> the akpm tree.
>
> I fixed it up (I think - see below) and can carry the fix as necessary
> (no action is required).
>
> --
> Cheers,
> Stephen Rothwell sfr@canb.auug.org.au
>
> diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
> index 63f853f..0f13ca8 100644
> --- a/arch/arm64/include/asm/unistd.h
> +++ b/arch/arm64/include/asm/unistd.h
> @@ -25,5 +25,6 @@
> #define __ARCH_WANT_SYS_SIGPROCMASK
> #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
> #define __ARCH_WANT_COMPAT_SYS_SENDFILE
> +#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
> #endif
> #include <uapi/asm/unistd.h>
The fix is correct (I moved the __ARCH_WANT_* from unistd32.h to
unistd.h) but I'll post another version of the patch tomorrow for Andrew
to merge.
Thanks.
--
Catalin
[-- Attachment #1: Type: text/plain, Size: 435 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in mm/page_alloc.c between commit 5576646f3c1a ("revert "mm: fix-up zone present pages"") from Linus' tree and commit "mm: fix a regression with HIGHMEM" from the akpm tree. I fixed it up (by dropping the akpm tree patch) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 421 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in drivers/net/ethernet/jme.c between commit 71c6c837a0fe ("drivers/net: fix tasklet misuse issue") from Linus' tree and commit "tasklet: ignore disabled tasklet in tasklet_action()" from the akpm tree. I am not sure what to do here, so I have dropped the akpm patch. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 952 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in mm/highmem.c between commit 498c22802123 ("mm: highmem: don't treat PKMAP_ADDR (LAST_PKMAP) as a highmem address") from Linus' tree and commit "mm, highmem: use PKMAP_NR() to calculate an index of pkmap" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc mm/highmem.c index 09fc744,017cccc..0000000 --- a/mm/highmem.c +++ b/mm/highmem.c @@@ -98,8 -98,8 +98,8 @@@ struct page *kmap_to_page(void *vaddr { unsigned long addr = (unsigned long)vaddr; - if (addr >= PKMAP_ADDR(0) && addr <= PKMAP_ADDR(LAST_PKMAP)) { + if (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) { - int i = (addr - PKMAP_ADDR(0)) >> PAGE_SHIFT; + int i = PKMAP_NR(addr); return pte_page(pkmap_page_table[i]); } [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
On Mon, Nov 26, 2012 at 8:48 PM, Stephen Rothwell <sfr@canb.auug.org.au> wrote: > Hi Andrew, > > Today's linux-next merge of the akpm tree got a conflict in > drivers/net/ethernet/jme.c between commit 71c6c837a0fe ("drivers/net: fix > tasklet misuse issue") from Linus' tree and commit "tasklet: ignore > disabled tasklet in tasklet_action()" from the akpm tree. > You can simply remove the following part of the patch @@ -1862,8 +1862,8 @@ jme_open(struct net_device *netdev) tasklet_enable(&jme->linkch_task); tasklet_enable(&jme->txclean_task); - tasklet_hi_enable(&jme->rxclean_task); - tasklet_hi_enable(&jme->rxempty_task); + tasklet_enable(&jme->rxclean_task); + tasklet_enable(&jme->rxempty_task); rc = jme_request_irq(jme); if (rc) Do you want me to re-generate a patch for you? > I am not sure what to do here, so I have dropped the akpm patch. > > -- > Cheers, > Stephen Rothwell sfr@canb.auug.org.au
[-- Attachment #1: Type: text/plain, Size: 505 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in include/linux/percpu-rwsem.h between commit 4b05a1c74d1c ("percpu-rwsem: use synchronize_sched_expedited") from Linus' tree and commit "percpu_rw_semaphore: reimplement to not block the readers unnecessarily" from the akpm tree. I fixed it up (using the version from the akpm tree) and can carry the fix as necessary (more action may be required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 536 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in mm/vmscan.c between commit c702418f8a2f ("mm: vmscan: do not keep kswapd looping forever due to individual uncompactable zones") from Linus' tree and commit "mm: use IS_ENABLED(CONFIG_COMPACTION) instead of COMPACTION_BUILD" from the akpm tree. The former removed part of the code modified by the latter, so I did that and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 2229 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in include/linux/gfp.h between commit caf491916b1c ("Revert "revert "Revert "mm: remove __GFP_NO_KSWAPD""" and associated damage") from Linus' tree and commit "mm: add a __GFP_KMEMCG flag" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc include/linux/gfp.h index 31e8041,5520344..0000000 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@@ -30,10 -30,10 +30,11 @@@ struct vm_area_struct #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u #define ___GFP_RECLAIMABLE 0x80000u -#define ___GFP_NOTRACK 0x100000u -#define ___GFP_OTHER_NODE 0x200000u -#define ___GFP_WRITE 0x400000u -#define ___GFP_KMEMCG 0x800000u +#define ___GFP_NOTRACK 0x200000u +#define ___GFP_NO_KSWAPD 0x400000u +#define ___GFP_OTHER_NODE 0x800000u +#define ___GFP_WRITE 0x1000000u ++#define ___GFP_KMEMCG 0x2000000u /* * GFP bitmasks.. @@@ -86,17 -86,16 +87,17 @@@ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */ #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ +#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ - + #define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */ /* * This may seem redundant, but it's a way of annotating false positives vs. * allocations that simply cannot be supported (e.g. page tables). */ #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) - #define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT 24 /* Room for N __GFP_FOO bits */ ++#define __GFP_BITS_SHIFT 26 /* Room for N __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* This equals 0, but use constants in case they ever change */ [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 1189 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in include/linux/gfp.h between commit caf491916b1c ("Revert "revert "Revert "mm: remove __GFP_NO_KSWAPD""" and associated damage") from Linus' tree and commit "mm: add a reminder comment for __GFP_BITS_SHIFT" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc include/linux/gfp.h index 976a8e3,c0fb4d8..0000000 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@@ -30,11 -30,11 +30,12 @@@ struct vm_area_struct #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u #define ___GFP_RECLAIMABLE 0x80000u -#define ___GFP_NOTRACK 0x100000u -#define ___GFP_OTHER_NODE 0x200000u -#define ___GFP_WRITE 0x400000u -#define ___GFP_KMEMCG 0x800000u +#define ___GFP_NOTRACK 0x200000u +#define ___GFP_NO_KSWAPD 0x400000u +#define ___GFP_OTHER_NODE 0x800000u +#define ___GFP_WRITE 0x1000000u +#define ___GFP_KMEMCG 0x2000000u + /* If the above are modified, __GFP_BITS_SHIFT may need updating */ /* * GFP bitmasks.. [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
On 12/11/2012 09:22 AM, Stephen Rothwell wrote:
> Hi Andrew,
>
> Today's linux-next merge of the akpm tree got a conflict in
> include/linux/gfp.h between commit caf491916b1c ("Revert "revert "Revert
> "mm: remove __GFP_NO_KSWAPD""" and associated damage") from Linus' tree
> and commit "mm: add a __GFP_KMEMCG flag" from the akpm tree.
>
> I fixed it up (see below) and can carry the fix as necessary (no action
> is required).
>
Fix is fine, thanks.
[-- Attachment #1: Type: text/plain, Size: 488 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got conflicts in include/linux/mempolicy.h and mm/mempolicy.c between commit 42288fe366c4 ("mm: mempolicy: Convert shared_policy mutex to spinlock") from Linus' tree and commit "mm, mempolicy: introduce spinlock to read shared policy tree" from the akpm tree. These two commits seem to be fixing the same problem, so I dropped the akpm tree commit. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 2367 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in mm/shmem.c between commit 26567cdbbf1a ("fix nommu breakage in shmem.c") from Linus' tree and commit "shmem-fix-build-regression-fix" from the akpm tree. I fixed it up (I used the latter version - see the full new version below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 5bf5500..69e37c2 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -6,7 +6,13 @@ struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir, extern struct dentry *ramfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); -#ifndef CONFIG_MMU +#ifdef CONFIG_MMU +static inline int +ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) +{ + return 0; +} +#else extern int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize); extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file, unsigned long addr, diff --git a/mm/shmem.c b/mm/shmem.c index b863bf3..a8d9da0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -25,6 +25,7 @@ #include <linux/init.h> #include <linux/vfs.h> #include <linux/mount.h> +#include <linux/ramfs.h> #include <linux/pagemap.h> #include <linux/file.h> #include <linux/mm.h> @@ -2830,8 +2831,6 @@ out4: * effectively equivalent, but much lighter weight. */ -#include <linux/ramfs.h> - static struct file_system_type shmem_fs_type = { .name = "tmpfs", .mount = ramfs_mount, @@ -2897,7 +2896,6 @@ static struct dentry_operations anon_ops = { */ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) { - int error; struct file *res; struct inode *inode; struct path path; @@ -2932,11 +2930,10 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags d_instantiate(path.dentry, inode); inode->i_size = size; clear_nlink(inode); /* It is unlinked */ -#ifndef CONFIG_MMU + res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size)); if (IS_ERR(res)) goto put_dentry; -#endif res = alloc_file(&path, FMODE_WRITE | FMODE_READ, &shmem_file_operations); [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 1311 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/proc/inode.c between commit ("vfs,proc: guarantee unique inodes in /proc") from Linus' tree and commit "procfs-improve-scaling-in-proc-v5" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc fs/proc/inode.c index 38f0775,f53660a..0000000 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@@ -463,10 -464,10 +464,11 @@@ static const struct file_operations pro struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) { - struct inode *inode = iget_locked(sb, de->low_ino); + struct inode *inode = new_inode_pseudo(sb); + const struct file_operations *fops; - if (inode && (inode->i_state & I_NEW)) { + if (inode) { + inode->i_ino = de->low_ino; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; PROC_I(inode)->pde = de; @@@ -491,9 -494,11 +495,10 @@@ #endif inode->i_fop = &proc_reg_file_ops; } else { - inode->i_fop = de->proc_fops; + inode->i_fop = fops; } } + rcu_read_unlock(); - unlock_new_inode(inode); } else pde_put(de); return inode; [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 1074 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in ipc/msg.c between commit 2dc958fa2fe6 ("ipc: set msg back to -EAGAIN if copy wasn't performed") from Linus' tree and commit "ipc: remove msg handling from queue scan" from the akpm tree. I fixed it up (I think - see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc ipc/msg.c index 4eaf3fd,628c6ea..0000000 --- a/ipc/msg.c +++ b/ipc/msg.c @@@ -860,17 -860,8 +860,9 @@@ long do_msgrcv(int msqid, void __user * walk_msg->m_type != 1) { msgtyp = walk_msg->m_type - 1; } else if (msgflg & MSG_COPY) { - if (copy_number == msg_counter) { - /* - * Found requested message. - * Copy it. - */ - msg = copy_msg(msg, copy); - if (IS_ERR(msg)) - goto out_unlock; + if (copy_number == msg_counter) break; - } + msg = ERR_PTR(-EAGAIN); } else break; msg_counter++; [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 2727 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in kernel/kthread.c between commit f2530dc71cf0 ("kthread: Prevent unpark race which puts threads on the wrong cpu") from Linus' tree and commit "kthread: kill task_get_live_kthread()" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc kernel/kthread.c index e820aa6,b9db231..0000000 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@@ -324,28 -324,6 +324,22 @@@ struct task_struct *kthread_create_on_c return p; } - static struct kthread *task_get_live_kthread(struct task_struct *k) - { - get_task_struct(k); - return to_live_kthread(k); - } - +static void __kthread_unpark(struct task_struct *k, struct kthread *kthread) +{ + clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + /* + * We clear the IS_PARKED bit here as we don't wait + * until the task has left the park code. So if we'd + * park before that happens we'd see the IS_PARKED bit + * which might be about to be cleared. + */ + if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { + if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) + __kthread_bind(k, kthread->cpu, TASK_PARKED); + wake_up_state(k, TASK_PARKED); + } +} + /** * kthread_unpark - unpark a thread created by kthread_create(). * @k: thread created by kthread_create(). @@@ -356,11 -334,22 +350,10 @@@ */ void kthread_unpark(struct task_struct *k) { - struct kthread *kthread = task_get_live_kthread(k); + struct kthread *kthread = to_live_kthread(k); - if (kthread) { - clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); - /* - * We clear the IS_PARKED bit here as we don't wait - * until the task has left the park code. So if we'd - * park before that happens we'd see the IS_PARKED bit - * which might be about to be cleared. - */ - if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { - if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) - __kthread_bind(k, kthread->cpu); - wake_up_process(k); - } - } + if (kthread) + __kthread_unpark(k, kthread); - put_task_struct(k); } /** @@@ -415,9 -403,12 +407,12 @@@ int kthread_stop(struct task_struct *k int ret; trace_sched_kthread_stop(k); + + get_task_struct(k); + kthread = to_live_kthread(k); if (kthread) { set_bit(KTHREAD_SHOULD_STOP, &kthread->flags); - clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + __kthread_unpark(k, kthread); wake_up_process(k); wait_for_completion(&kthread->exited); } [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 2293 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/bio.c between commit 0a82a8d132b2 ("Revert "block: add missing block_bio_complete() tracepoint"") from Linus' tree and commit "block, aio: batch completion for bios/kiocbs" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc fs/bio.c index e082907,a8081ae..0000000 --- a/fs/bio.c +++ b/fs/bio.c @@@ -1688,31 -1689,42 +1689,40 @@@ void bio_flush_dcache_pages(struct bio EXPORT_SYMBOL(bio_flush_dcache_pages); #endif - /** - * bio_endio - end I/O on a bio - * @bio: bio - * @error: error, if any - * - * Description: - * bio_endio() will end I/O on the whole bio. bio_endio() is the - * preferred way to end I/O on a bio, it takes care of clearing - * BIO_UPTODATE on error. @error is 0 on success, and and one of the - * established -Exxxx (-EIO, for instance) error values in case - * something went wrong. No one should call bi_end_io() directly on a - * bio unless they own it and thus know that it has an end_io - * function. - **/ - void bio_endio(struct bio *bio, int error) + static inline void __bio_endio(struct bio *bio, struct batch_complete *batch) { - if (error) + if (bio->bi_error) clear_bit(BIO_UPTODATE, &bio->bi_flags); else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - error = -EIO; + bio->bi_error = -EIO; if (bio->bi_end_io) - bio->bi_end_io(bio, error, NULL); + bio->bi_end_io(bio, bio->bi_error, batch); + } + + void bio_endio_batch(struct bio *bio, int error, struct batch_complete *batch) + { + if (error) + bio->bi_error = error; + - trace_block_bio_complete(bio, error); - + if (batch) + bio_list_add(&batch->bio, bio); + else + __bio_endio(bio, batch); + + } + EXPORT_SYMBOL(bio_endio_batch); + + void batch_complete(struct batch_complete *batch) + { + struct bio *bio; + + while ((bio = bio_list_pop(&batch->bio))) + __bio_endio(bio, batch); + + batch_complete_aio(batch); } - EXPORT_SYMBOL(bio_endio); + EXPORT_SYMBOL(batch_complete); void bio_pair_release(struct bio_pair *bp) { [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 881 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/aio.c between commit 91d80a84bbc8 ("aio: fix possible invalid memory access when DEBUG is enabled") from Linus' tree and commit "aio: dprintk() -> pr_debug()" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc fs/aio.c index 670cb8b,1574cb2..0000000 --- a/fs/aio.c +++ b/fs/aio.c @@@ -802,9 -794,8 +794,8 @@@ static int aio_read_evt(struct kioctx * spin_unlock(&info->ring_lock); out: - dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret, - (unsigned long)ring->head, (unsigned long)ring->tail); - kunmap_atomic(ring); + pr_debug("%d h%u t%u\n", ret, ring->head, ring->tail); + kunmap_atomic(ring); return ret; } [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 1758 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in ipc/msg.c between commit 8ac6ed5857c8 ("ipc: implement MSG_COPY as a new receive mode") from Linus' tree and commit "revert "ipc: don't allocate a copy larger than max"" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). [ The whole patch looks like this, now: diff --git a/ipc/msg.c b/ipc/msg.c index d0c6d96..2978721 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -827,16 +827,15 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, struct ipc_namespace *ns; struct msg_msg *copy = NULL; - ns = current->nsproxy->ipc_ns; - if (msqid < 0 || (long) bufsz < 0) return -EINVAL; if (msgflg & MSG_COPY) { - copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); + copy = prepare_copy(buf, bufsz); if (IS_ERR(copy)) return PTR_ERR(copy); } mode = convert_mode(&msgtyp, msgflg); + ns = current->nsproxy->ipc_ns; msq = msg_lock_check(ns, msqid); if (IS_ERR(msq)) { ] -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc ipc/msg.c index d0c6d96,4eaf3fd..0000000 --- a/ipc/msg.c +++ b/ipc/msg.c @@@ -826,13 -818,12 +826,11 @@@ long do_msgrcv(int msqid, void __user * int mode; struct ipc_namespace *ns; struct msg_msg *copy = NULL; - unsigned long copy_number = 0; if (msqid < 0 || (long) bufsz < 0) return -EINVAL; if (msgflg & MSG_COPY) { - copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); - copy = prepare_copy(buf, bufsz, msgflg, &msgtyp, ©_number); ++ copy = prepare_copy(buf, bufsz); if (IS_ERR(copy)) return PTR_ERR(copy); } [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 2031 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in kernel/auditsc.c between commit b24a30a73054 ("audit: fix event coverage of AUDIT_ANOM_LINK") from Linus' tree and commit "audit: fix mq_open and mq_unlink to add the MQ root as a hidden parent audit_names record" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). BTW, commit b24a30a73054 from Linus' tree has Eric Paris as Author and Committer, but is only Signed-off-by Kees Cook. It is part of a long series that did not go anywhere near linus-next. I do have an audit tree in linux-next (git://git.kernel.org/pub/scm/linux/kernel/git/viro/audit.git#for-next) but that hasn't seen any recent activity. There is also another commit in that series that doesn't even have a Signed-off-by line at all (4d3fb709b285 "helper for some session id stuff"). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc kernel/auditsc.c index 3c8a601,f9eaa16..0000000 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@@ -1399,8 -1695,11 +1399,11 @@@ static void audit_log_exit(struct audit } i = 0; - list_for_each_entry(n, &context->names_list, list) + list_for_each_entry(n, &context->names_list, list) { + if (n->hidden) + continue; - audit_log_name(context, n, i++, &call_panic); + audit_log_name(context, n, NULL, i++, &call_panic); + } /* Send end of event record to help user space know we are finished */ ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); diff --git a/kernel/audit.h b/kernel/audit.h index 1c95131..52dfbfc 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -97,6 +97,7 @@ struct audit_names { struct audit_cap_data fcap; unsigned int fcap_ver; unsigned char type; /* record type */ + bool hidden; /* don't log this record */ /* * This was an allocated audit_names and not from the array of * names allocated in the task audit context. Thus this name [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
On Mon, 2013-05-13 at 12:07 +1000, Stephen Rothwell wrote: > Hi Andrew, > > Today's linux-next merge of the akpm tree got a conflict in > kernel/auditsc.c between commit b24a30a73054 ("audit: fix event coverage > of AUDIT_ANOM_LINK") from Linus' tree and commit "audit: fix mq_open and > mq_unlink to add the MQ root as a hidden parent audit_names record" from > the akpm tree. Actually, I've already picked the patch up for 3.11. So Andrew, you can drop it. > I fixed it up (see below) and can carry the fix as necessary (no action > is required). > > BTW, commit b24a30a73054 from Linus' tree has Eric Paris as Author and > Committer, but is only Signed-off-by Kees Cook. It is part of a long > series that did not go anywhere near linus-next. I do have an audit > tree in linux-next > (git://git.kernel.org/pub/scm/linux/kernel/git/viro/audit.git#for-next) > but that hasn't seen any recent activity. I thought I sent you a note asking for audit to get pulled into -next quite a while back. I'll resend...
[-- Attachment #1: Type: text/plain, Size: 1037 bytes --] Hi Eric, On Sun, 12 May 2013 22:11:10 -0400 Eric Paris <eparis@redhat.com> wrote: > > I thought I sent you a note asking for audit to get pulled into -next > quite a while back. I'll resend... You sent an email on Jan 4: > I know that Al hates audit so I created a new audit tree and decided to > start trying to allow him to avoid the problem whenever possible. So > unless Al objects to me going around him except when I touch the VFS can > you pull my new tree? > > git://git.infradead.org/users/eparis/audit.git To which I replied: > I really don't like getting new tree requests just as Linus is about to > open the merge window. Al says he will look at this stuff and put it > into his audit tree > (git://git.kernel.org/pub/scm/linux/kernel/git/viro/audit-current.git#for-next) > by tomorrow, so I will wait for that. > > If you and Al come to some other arrangement, let me know. I guess it fell through the cracks ... -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
On Sun, May 12, 2013 at 7:11 PM, Eric Paris <eparis@redhat.com> wrote:
> On Mon, 2013-05-13 at 12:07 +1000, Stephen Rothwell wrote:
>> Hi Andrew,
>>
>> Today's linux-next merge of the akpm tree got a conflict in
>> kernel/auditsc.c between commit b24a30a73054 ("audit: fix event coverage
>> of AUDIT_ANOM_LINK") from Linus' tree and commit "audit: fix mq_open and
>> mq_unlink to add the MQ root as a hidden parent audit_names record" from
>> the akpm tree.
>
> Actually, I've already picked the patch up for 3.11. So Andrew, you can
> drop it.
>
>> I fixed it up (see below) and can carry the fix as necessary (no action
>> is required).
>>
>> BTW, commit b24a30a73054 from Linus' tree has Eric Paris as Author and
>> Committer, but is only Signed-off-by Kees Cook. It is part of a long
>> series that did not go anywhere near linus-next. I do have an audit
>> tree in linux-next
>> (git://git.kernel.org/pub/scm/linux/kernel/git/viro/audit.git#for-next)
>> but that hasn't seen any recent activity.
>
> I thought I sent you a note asking for audit to get pulled into -next
> quite a while back. I'll resend...
>
Hrm, how did the Author get mangled?
-Kees
--
Kees Cook
Chrome OS Security
[-- Attachment #1: Type: text/plain, Size: 1688 bytes --] -----Original Message----- From: Kees Cook [keescook@chromium.org] Received: Monday, 13 May 2013, 12:49am To: Eric Paris [eparis@redhat.com] CC: Stephen Rothwell [sfr@canb.auug.org.au]; Andrew Morton [akpm@linux-foundation.org]; Linus [torvalds@linux-foundation.org]; Linux-Next [linux-next@vger.kernel.org]; LKML [linux-kernel@vger.kernel.org]; Jeff Layton [jlayton@redhat.com]; Al Viro [viro@zeniv.linux.org.uk] Subject: Re: linux-next: manual merge of the akpm tree with Linus' tree On Sun, May 12, 2013 at 7:11 PM, Eric Paris <eparis@redhat.com> wrote: > On Mon, 2013-05-13 at 12:07 +1000, Stephen Rothwell wrote: >> Hi Andrew, >> >> Today's linux-next merge of the akpm tree got a conflict in >> kernel/auditsc.c between commit b24a30a73054 ("audit: fix event coverage >> of AUDIT_ANOM_LINK") from Linus' tree and commit "audit: fix mq_open and >> mq_unlink to add the MQ root as a hidden parent audit_names record" from >> the akpm tree. > > Actually, I've already picked the patch up for 3.11. So Andrew, you can > drop it. > >> I fixed it up (see below) and can carry the fix as necessary (no action >> is required). >> >> BTW, commit b24a30a73054 from Linus' tree has Eric Paris as Author and >> Committer, but is only Signed-off-by Kees Cook. It is part of a long >> series that did not go anywhere near linus-next. I do have an audit >> tree in linux-next >> (git://git.kernel.org/pub/scm/linux/kernel/git/viro/audit.git#for-next) >> but that hasn't seen any recent activity. > > I thought I sent you a note asking for audit to get pulled into -next > quite a while back. I'll resend... > Hrm, how did the Author get mangled? -Kees -- Kees Cook Chrome OS Security
[-- Attachment #1: Type: text/plain, Size: 2097 bytes --] -----Original Message----- From: Kees Cook [keescook@chromium.org] Received: Monday, 13 May 2013, 12:49am To: Eric Paris [eparis@redhat.com] CC: Stephen Rothwell [sfr@canb.auug.org.au]; Andrew Morton [akpm@linux-foundation.org]; Linus [torvalds@linux-foundation.org]; Linux-Next [linux-next@vger.kernel.org]; LKML [linux-kernel@vger.kernel.org]; Jeff Layton [jlayton@redhat.com]; Al Viro [viro@zeniv.linux.org.uk] Subject: Re: linux-next: manual merge of the akpm tree with Linus' tree On Sun, May 12, 2013 at 7:11 PM, Eric Paris <eparis@redhat.com> wrote: > On Mon, 2013-05-13 at 12:07 +1000, Stephen Rothwell wrote: >> Hi Andrew, >> >> Today's linux-next merge of the akpm tree got a conflict in >> kernel/auditsc.c between commit b24a30a73054 ("audit: fix event coverage >> of AUDIT_ANOM_LINK") from Linus' tree and commit "audit: fix mq_open and >> mq_unlink to add the MQ root as a hidden parent audit_names record" from >> the akpm tree. > > Actually, I've already picked the patch up for 3.11. So Andrew, you can > drop it. > >> I fixed it up (see below) and can carry the fix as necessary (no action >> is required). >> >> BTW, commit b24a30a73054 from Linus' tree has Eric Paris as Author and >> Committer, but is only Signed-off-by Kees Cook. It is part of a long >> series that did not go anywhere near linus-next. I do have an audit >> tree in linux-next >> (git://git.kernel.org/pub/scm/linux/kernel/git/viro/audit.git#for-next) >> but that hasn't seen any recent activity. > > I thought I sent you a note asking for audit to get pulled into -next > quite a while back. I'll resend... > Hrm, how did the Author get mangled? I remember it having a conflict when I tried to merge it (someone else had changed the same area of the header file). So I used patch -p1 and fixed up the reject by hand. I wonder if I screwed up and used git commit -a instead of git am --resolved? That is 2 things I should have caught on final review I missed. :-( Now to wait for everything else I screwed up...
[-- Attachment #1: Type: text/plain, Size: 3747 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got conflicts in fs/btrfs/inode.c and fs/btrfs/volumes.c between commit 9be3395bcd4a ("Btrfs: use a btrfs bioset instead of abusing bio internals") from Linus' tree and commit "block: prep work for batch completion" from the akpm tree. I fixed it up (I think - see below) and can carry the fix as necessary (no action is required). I also noticed that a single conversion of bio_endio to bio_endio_batch is done in the akpm patch but bio_endio_batch is not introduced until a later patch ... :-( -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc fs/btrfs/inode.c index af978f7,551c8bd..0000000 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@@ -6932,14 -6928,11 +6932,15 @@@ struct btrfs_dio_private /* IO errors */ int errors; + /* orig_bio is our btrfs_io_bio */ struct bio *orig_bio; + + /* dio_bio came from fs/direct-io.c */ + struct bio *dio_bio; }; - static void btrfs_endio_direct_read(struct bio *bio, int err) + static void btrfs_endio_direct_read(struct bio *bio, int err, + struct batch_complete *batch) { struct btrfs_dio_private *dip = bio->bi_private; struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; @@@ -6992,12 -6984,12 +6993,13 @@@ failed /* If we had a csum failure make sure to clear the uptodate flag */ if (err) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - dio_end_io(bio, err, batch); + clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); - dio_end_io(dio_bio, err); ++ dio_end_io(dio_bio, err, batch); + bio_put(bio); } - static void btrfs_endio_direct_write(struct bio *bio, int err) + static void btrfs_endio_direct_write(struct bio *bio, int err, + struct batch_complete *batch) { struct btrfs_dio_private *dip = bio->bi_private; struct inode *inode = dip->inode; @@@ -7039,9 -7030,8 +7041,9 @@@ out_done /* If we had an error make sure to clear the uptodate flag */ if (err) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - dio_end_io(bio, err, batch); + clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); - dio_end_io(dio_bio, err); ++ dio_end_io(dio_bio, err, batch); + bio_put(bio); } static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, diff --cc fs/btrfs/volumes.c index 8bffb91,7299b55..0000000 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@@ -5018,9 -5019,35 +5018,10 @@@ int btrfs_rmap_block(struct btrfs_mappi return 0; } - static void btrfs_end_bio(struct bio *bio, int err) -static void *merge_stripe_index_into_bio_private(void *bi_private, - unsigned int stripe_index) -{ - /* - * with single, dup, RAID0, RAID1 and RAID10, stripe_index is - * at most 1. - * The alternative solution (instead of stealing bits from the - * pointer) would be to allocate an intermediate structure - * that contains the old private pointer plus the stripe_index. - */ - BUG_ON((((uintptr_t)bi_private) & 3) != 0); - BUG_ON(stripe_index > 3); - return (void *)(((uintptr_t)bi_private) | stripe_index); -} - -static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private) -{ - return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3)); -} - -static unsigned int extract_stripe_index_from_bio_private(void *bi_private) -{ - return (unsigned int)((uintptr_t)bi_private) & 3; -} - + static void btrfs_end_bio(struct bio *bio, int err, + struct batch_complete *batch) { - struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private); + struct btrfs_bio *bbio = bio->bi_private; int is_orig_bio = 0; if (err) { [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
Quoting Stephen Rothwell (2013-05-20 00:04:49) > Hi Andrew, > > Today's linux-next merge of the akpm tree got conflicts in > fs/btrfs/inode.c and fs/btrfs/volumes.c between commit 9be3395bcd4a > ("Btrfs: use a btrfs bioset instead of abusing bio internals") from > Linus' tree and commit "block: prep work for batch completion" from the > akpm tree. > > I fixed it up (I think - see below) and can carry the fix as necessary > (no action is required). > > I also noticed that a single conversion of bio_endio to bio_endio_batch > is done in the akpm patch but bio_endio_batch is not introduced until a > later patch ... :-( Thanks, this looks right and I've linux-next through an aio/dio test on btrfs. Kent, reviewing the merge I see a missing bio_endio_batch conversion. I think this was missing from the original: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index faf20f5..a47bc10 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7084,7 +7084,7 @@ static void btrfs_end_dio_bio(struct bio *bio, int err, bio_io_error(dip->orig_bio); } else { set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags); - bio_endio(dip->orig_bio, 0); + bio_endio_batch(dip->orig_bio, 0, batch); } out: bio_put(bio);
[-- Attachment #1: Type: text/plain, Size: 1138 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/aio.c between commit 03e04f048d27 ("aio: fix kioctx not being freed after cancellation at exit time") from Linus' tree and commit "aio: reqs_active -> reqs_available" from the akpm tree. I fixed it up (see below - taken from a similar patch later in the akpm tree) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc fs/aio.c index 7fe5bde,009a7b5..0000000 --- a/fs/aio.c +++ b/fs/aio.c @@@ -306,10 -312,8 +312,10 @@@ static void free_ioctx(struct kioctx *c head = ring->head; kunmap_atomic(ring); - while (atomic_read(&ctx->reqs_active) > 0) { + while (atomic_read(&ctx->reqs_available) < ctx->nr_events - 1) { - wait_event(ctx->wait, head != ctx->tail); + wait_event(ctx->wait, - head != ctx->tail || - atomic_read(&ctx->reqs_active) <= 0); ++ (head != ctx->tail) || ++ (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1)); avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head; [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 2687 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/dcache.c between commit 98474236f72e ("vfs: make the dentry cache use the lockref infrastructure") from Linus' tree and commit "dcache: remove dentries from LRU before putting on dispose list" from the akpm tree. I fixed it up (I think - please check, see below and also check the final result when I publish) and can carry the fix as necessary (no action is required). I also added this fix patch: From: Stephen Rothwell <sfr@canb.auug.org.au> Date: Fri, 30 Aug 2013 18:38:42 +1000 Subject: [PATCH] dcache: fix up for lockref changes Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index 58c5e4b..39939f1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -837,7 +837,7 @@ dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) * counts, just remove them from the LRU. Otherwise give them * another pass through the LRU. */ - if (dentry->d_count) { + if (dentry->d_lockref.count) { list_del_init(&dentry->d_lru); spin_unlock(&dentry->d_lock); return LRU_REMOVED; -- 1.8.4.rc3 -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc fs/dcache.c index 0fe810f,f5f8924..0000000 --- a/fs/dcache.c +++ b/fs/dcache.c @@@ -780,10 -801,15 +794,11 @@@ static struct dentry * try_prune_one_de /* Prune ancestors. */ dentry = parent; while (dentry) { - spin_lock(&dentry->d_lock); - if (dentry->d_count > 1) { - dentry->d_count--; - spin_unlock(&dentry->d_lock); + if (lockref_put_or_lock(&dentry->d_lockref)) - return; - dentry = dentry_kill(dentry, 1); + return NULL; - } + dentry = dentry_kill(dentry, 1, 1); } + return NULL; } static void shrink_dentry_list(struct list_head *list) @@@ -802,12 -828,18 +817,18 @@@ } /* + * The dispose list is isolated and dentries are not accounted + * to the LRU here, so we can simply remove it from the list + * here regardless of whether it is referenced or not. + */ + list_del_init(&dentry->d_lru); + dentry->d_flags &= ~DCACHE_SHRINK_LIST; + + /* * We found an inuse dentry which was not removed from - * the LRU because of laziness during lookup. Do not free - * it - just keep it off the LRU list. + * the LRU because of laziness during lookup. Do not free it. */ - if (dentry->d_count) { + if (dentry->d_lockref.count) { - dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; } [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 867 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/internal.h between commit eed810076685 ("vfs: check unlinked ancestors before mount") from Linus' tree and commit "shrinker: convert superblock shrinkers to new API" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc fs/internal.h index 62f05e9,c909014..0000000 --- a/fs/internal.h +++ b/fs/internal.h @@@ -126,7 -127,7 +127,8 @@@ extern int invalidate_inodes(struct sup * dcache.c */ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); +extern int d_set_mounted(struct dentry *dentry); + extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan); /* * read_write.c [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 459 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/dcache.c between commit db14fc3abcd5 ("vfs: add d_walk()") from Linus' tree and commit "shrinker: convert superblock shrinkers to new API" from the akpm tree. I fixed it up (I just used the version of shrink_dcache_parent from Linus' tree) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 4363 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/dcache.c between commit db14fc3abcd5 ("vfs: add d_walk()") from Linus' tree and commit "dcache: convert to use new lru list infrastructure" from the akpm tree. I fixed it up (hopefully - see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc fs/dcache.c index fc05994,e4df9de..0000000 --- a/fs/dcache.c +++ b/fs/dcache.c @@@ -1313,46 -1197,94 +1297,47 @@@ out * drop the lock and return early due to latency * constraints. */ -static int select_parent(struct dentry *parent, struct list_head *dispose) -{ - struct dentry *this_parent; - struct list_head *next; - unsigned seq; - int found = 0; - int locked = 0; - seq = read_seqbegin(&rename_lock); -again: - this_parent = parent; - spin_lock(&this_parent->d_lock); -repeat: - next = this_parent->d_subdirs.next; -resume: - while (next != &this_parent->d_subdirs) { - struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); - next = tmp->next; - - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); +struct select_data { + struct dentry *start; + struct list_head dispose; + int found; +}; - /* - * move only zero ref count dentries to the dispose list. - * - * Those which are presently on the shrink list, being processed - * by shrink_dentry_list(), shouldn't be moved. Otherwise the - * loop in shrink_dcache_parent() might not make any progress - * and loop forever. - */ - if (dentry->d_lockref.count) { - dentry_lru_del(dentry); - } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { - dentry_lru_del(dentry); - list_add_tail(&dentry->d_lru, dispose); - dentry->d_flags |= DCACHE_SHRINK_LIST; - found++; - } - /* - * We can return to the caller if we have found some (this - * ensures forward progress). We'll be coming back to find - * the rest. - */ - if (found && need_resched()) { - spin_unlock(&dentry->d_lock); - goto out; - } +static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) +{ + struct select_data *data = _data; + enum d_walk_ret ret = D_WALK_CONTINUE; - /* - * Descend a level if the d_subdirs list is non-empty. - */ - if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&this_parent->d_lock); - spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); - this_parent = dentry; - spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); - goto repeat; - } + if (data->start == dentry) + goto out; - spin_unlock(&dentry->d_lock); - } /* - * All done at this level ... ascend and resume the search. + * move only zero ref count dentries to the dispose list. + * + * Those which are presently on the shrink list, being processed + * by shrink_dentry_list(), shouldn't be moved. Otherwise the + * loop in shrink_dcache_parent() might not make any progress + * and loop forever. */ - if (this_parent != parent) { - struct dentry *child = this_parent; - this_parent = try_to_ascend(this_parent, locked, seq); - if (!this_parent) - goto rename_retry; - next = child->d_u.d_child.next; - goto resume; + if (dentry->d_lockref.count) { + dentry_lru_del(dentry); + } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { - dentry_lru_move_list(dentry, &data->dispose); ++ dentry_lru_del(dentry); ++ list_add_tail(&dentry->d_lru, &data->dispose); + dentry->d_flags |= DCACHE_SHRINK_LIST; + data->found++; + ret = D_WALK_NORETRY; } + /* + * We can return to the caller if we have found some (this + * ensures forward progress). We'll be coming back to find + * the rest. + */ + if (data->found && need_resched()) + ret = D_WALK_QUIT; out: - spin_unlock(&this_parent->d_lock); - if (!locked && read_seqretry(&rename_lock, seq)) - goto rename_retry; - if (locked) - write_sequnlock(&rename_lock); - return found; - -rename_retry: - if (found) - return found; - if (locked) - goto again; - locked = 1; - write_seqlock(&rename_lock); - goto again; + return ret; } /** [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 1450 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/dcache.c between commit 8aab6a27332b ("vfs: reorganize dput() memory accesses") from Linus' tree and commit "dentry: move to per-sb LRU locks" from the akpm tree. I fixed it up (I think - see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc fs/dcache.c index 664554e,6e212bd..0000000 --- a/fs/dcache.c +++ b/fs/dcache.c @@@ -362,9 -332,8 +361,9 @@@ static void dentry_unlink_inode(struct */ static void dentry_lru_add(struct dentry *dentry) { - if (list_empty(&dentry->d_lru)) { + if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { - spin_lock(&dcache_lru_lock); + spin_lock(&dentry->d_sb->s_dentry_lru_lock); + dentry->d_flags |= DCACHE_LRU_LIST; list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; this_cpu_inc(nr_dentry_unused); @@@ -394,9 -363,8 +393,9 @@@ static void dentry_lru_del(struct dentr static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) { - spin_lock(&dcache_lru_lock); + spin_lock(&dentry->d_sb->s_dentry_lru_lock); if (list_empty(&dentry->d_lru)) { + dentry->d_flags |= DCACHE_LRU_LIST; list_add_tail(&dentry->d_lru, list); dentry->d_sb->s_nr_dentry_unused++; this_cpu_inc(nr_dentry_unused); [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 1738 bytes --] [ Just adding Dave Chinner to the cc list] On Tue, 10 Sep 2013 14:09:23 +1000 Stephen Rothwell <sfr@canb.auug.org.au> wrote: > > Hi Andrew, > > Today's linux-next merge of the akpm tree got a conflict in fs/dcache.c > between commit 8aab6a27332b ("vfs: reorganize dput() memory accesses") > from Linus' tree and commit "dentry: move to per-sb LRU locks" from the > akpm tree. > > I fixed it up (I think - see below) and can carry the fix as necessary > (no action is required). > > -- > Cheers, > Stephen Rothwell sfr@canb.auug.org.au > > diff --cc fs/dcache.c > index 664554e,6e212bd..0000000 > --- a/fs/dcache.c > +++ b/fs/dcache.c > @@@ -362,9 -332,8 +361,9 @@@ static void dentry_unlink_inode(struct > */ > static void dentry_lru_add(struct dentry *dentry) > { > - if (list_empty(&dentry->d_lru)) { > + if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { > - spin_lock(&dcache_lru_lock); > + spin_lock(&dentry->d_sb->s_dentry_lru_lock); > + dentry->d_flags |= DCACHE_LRU_LIST; > list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); > dentry->d_sb->s_nr_dentry_unused++; > this_cpu_inc(nr_dentry_unused); > @@@ -394,9 -363,8 +393,9 @@@ static void dentry_lru_del(struct dentr > > static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) > { > - spin_lock(&dcache_lru_lock); > + spin_lock(&dentry->d_sb->s_dentry_lru_lock); > if (list_empty(&dentry->d_lru)) { > + dentry->d_flags |= DCACHE_LRU_LIST; > list_add_tail(&dentry->d_lru, list); > dentry->d_sb->s_nr_dentry_unused++; > this_cpu_inc(nr_dentry_unused); -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 3056 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/dcache.c between commits 8aab6a27332b ("vfs: reorganize dput() memory accesses") and 0d98439ea3c6 ("vfs: use lockred "dead" flag to mark unrecoverably dead dentries") from Linus' tree and commit "dcache: remove dentries from LRU before putting on dispose list" from the akpm tree. I fixed it up (hopefully - see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc fs/dcache.c index a4cc2eb,43a1c0e..0000000 --- a/fs/dcache.c +++ b/fs/dcache.c @@@ -374,7 -344,6 +374,7 @@@ static void dentry_lru_add(struct dentr static void __dentry_lru_del(struct dentry *dentry) { list_del_init(&dentry->d_lru); - dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); ++ dentry->d_flags &= ~DCACHE_LRU_LIST; dentry->d_sb->s_nr_dentry_unused--; this_cpu_dec(nr_dentry_unused); } @@@ -393,14 -372,15 +403,16 @@@ static void dentry_lru_del(struct dentr static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) { + BUG_ON(dentry->d_flags & DCACHE_SHRINK_LIST); + spin_lock(&dentry->d_sb->s_dentry_lru_lock); if (list_empty(&dentry->d_lru)) { + dentry->d_flags |= DCACHE_LRU_LIST; list_add_tail(&dentry->d_lru, list); - dentry->d_sb->s_nr_dentry_unused++; - this_cpu_inc(nr_dentry_unused); } else { list_move_tail(&dentry->d_lru, list); + dentry->d_sb->s_nr_dentry_unused--; + this_cpu_dec(nr_dentry_unused); } spin_unlock(&dentry->d_sb->s_dentry_lru_lock); } @@@ -498,7 -478,8 +510,8 @@@ EXPORT_SYMBOL(d_drop) * If ref is non-zero, then decrement the refcount too. * Returns dentry requiring refcount drop, or NULL if we're done. */ - static inline struct dentry *dentry_kill(struct dentry *dentry) + static inline struct dentry * -dentry_kill(struct dentry *dentry, int ref, int unlock_on_failure) ++dentry_kill(struct dentry *dentry, int unlock_on_failure) __releases(dentry->d_lock) { struct inode *inode; @@@ -591,7 -573,7 +606,7 @@@ repeat return; kill_it: - dentry = dentry_kill(dentry); - dentry = dentry_kill(dentry, 1, 1); ++ dentry = dentry_kill(dentry, 1); if (dentry) goto repeat; } @@@ -816,7 -798,7 +831,7 @@@ static struct dentry * try_prune_one_de { struct dentry *parent; - parent = dentry_kill(dentry); - parent = dentry_kill(dentry, 0, 0); ++ parent = dentry_kill(dentry, 0); /* * If dentry_kill returns NULL, we have nothing more to do. * if it returns the same dentry, trylocks failed. In either @@@ -836,9 -818,10 +851,10 @@@ dentry = parent; while (dentry) { if (lockref_put_or_lock(&dentry->d_lockref)) - return; - dentry = dentry_kill(dentry); + return NULL; - dentry = dentry_kill(dentry, 1, 1); ++ dentry = dentry_kill(dentry, 1); } + return NULL; } static void shrink_dentry_list(struct list_head *list) [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
[-- Attachment #1: Type: text/plain, Size: 645 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in fs/dcache.c between commit 8aab6a27332b ("vfs: reorganize dput() memory accesses") from Linus' tree and commit "dcache: convert to use new lru list infrastructure" from the akpm tree. /me mutters about development happening during the merge window - especially when Andrew is absent. I have no idea if this will be correct, but I just used the version from the akpm tree (effectively reverting parts of commit 8aab6a27332b) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
On Tue, 10 Sep 2013 14:38:07 +1000 Stephen Rothwell <sfr@canb.auug.org.au> wrote: > Hi Andrew, > > Today's linux-next merge of the akpm tree got a conflict in fs/dcache.c > between commit 8aab6a27332b ("vfs: reorganize dput() memory accesses") > from Linus' tree and commit "dcache: convert to use new lru list > infrastructure" from the akpm tree. > > /me mutters about development happening during the merge window - > especially when Andrew is absent. > > I have no idea if this will be correct, but I just used the version from > the akpm tree (effectively reverting parts of commit 8aab6a27332b) and > can carry the fix as necessary (no action is required). This is rather a fiasco. "vfs: reorganize dput() memory accesses" made rather a mess of a 46 patch series which has been under development and test for two cycles so far. I reverted it so I could get it all to apply and build with some confidence that I didn't break anything. Then I went to re-apply "vfs: reorganize dput() memory accesses" on top but I'm unsure that it's the right thing to do. ->d_lru is now reused for s_dentry_lru and for the shrink list and for the dispose list, so simply testing its list_emptiness doesn't work any more. And it's unobvious that the problem which "vfs: reorganize dput() memory accesses" addresses still exists, or whether it was worsened or whatever. And given that "vfs: reorganize dput() memory accesses" was driven by careful profiling work, there's not a lot of point in going any further until the new code is profiled and comparisons are performed. Am moderately frustrated by all of this. It would be nice if the vfs maintainer could have, you know, paid some attention to a massive great vfs patchset instead of blithely wrecking it :( Dave, can you please eyeball the below and have a think about its applicability under the new regime? Thanks. Right now I'm not very confident in all of this. I think what I'll do is send the patches out for re-re-re-review right now and I'll ask Al and Linus to please find some time to think them over. commit 8aab6a27332bbf2abfcb35224738394e784d940b Author: Linus Torvalds <torvalds@linux-foundation.org> AuthorDate: Sun Sep 8 13:26:18 2013 -0700 Commit: Linus Torvalds <torvalds@linux-foundation.org> CommitDate: Sun Sep 8 13:26:18 2013 -0700 vfs: reorganize dput() memory accesses This is me being a bit OCD after all the dentry optimization work this merge window: profiles end up showing 'dput()' as a rather expensive operation, and there were two unrelated bad reasons for that. The first reason was reading d_lockref.count for debugging purposes, which touches the lockref cacheline (for reads) before really need to. More importantly, the debugging test in question is _wrong_, and has hidden bugs. It's true that we can only sleep when the count goes down to zero, but the test as-is hides the much more subtle bug that happens if we race with somebody else deleting the file. Anyway we _will_ touch that cacheline, but let's do it for a write and in the right routine (ie in "lockref_put_or_lock()") which annotates the costs better. So remove the misleading debug code. The other was an unnecessary access to the cacheline that contains the d_lru list, just to check whether we already were on the LRU list or not. This is exactly what we have d_flags for, so that we can avoid touching extra cache lines for the common case. So just add another bit for "is this dentry on the LRU". Finally, mark the tests properly likely/unlikely, so that the common fast-paths are dense in the instruction stream. This makes the profiles look much saner. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> diff --git a/fs/dcache.c b/fs/dcache.c index 761e31b..bf3c4f9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -308,8 +308,9 @@ static void dentry_unlink_inode(struct dentry * dentry) */ static void dentry_lru_add(struct dentry *dentry) { - if (list_empty(&dentry->d_lru)) { + if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { spin_lock(&dcache_lru_lock); + dentry->d_flags |= DCACHE_LRU_LIST; list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; dentry_stat.nr_unused++; @@ -320,7 +321,7 @@ static void dentry_lru_add(struct dentry *dentry) static void __dentry_lru_del(struct dentry *dentry) { list_del_init(&dentry->d_lru); - dentry->d_flags &= ~DCACHE_SHRINK_LIST; + dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); dentry->d_sb->s_nr_dentry_unused--; dentry_stat.nr_unused--; } @@ -341,6 +342,7 @@ static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) { spin_lock(&dcache_lru_lock); if (list_empty(&dentry->d_lru)) { + dentry->d_flags |= DCACHE_LRU_LIST; list_add_tail(&dentry->d_lru, list); dentry->d_sb->s_nr_dentry_unused++; dentry_stat.nr_unused++; @@ -509,24 +511,22 @@ relock: */ void dput(struct dentry *dentry) { - if (!dentry) + if (unlikely(!dentry)) return; repeat: - if (dentry->d_lockref.count == 1) - might_sleep(); if (lockref_put_or_lock(&dentry->d_lockref)) return; - if (dentry->d_flags & DCACHE_OP_DELETE) { + /* Unreachable? Get rid of it */ + if (unlikely(d_unhashed(dentry))) + goto kill_it; + + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { if (dentry->d_op->d_delete(dentry)) goto kill_it; } - /* Unreachable? Get rid of it */ - if (d_unhashed(dentry)) - goto kill_it; - dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index fe50f3d..feaa8d8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -208,6 +208,7 @@ struct dentry_operations { #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) +#define DCACHE_LRU_LIST 0x80000 #define DCACHE_DENTRY_KILLED 0x100000 extern seqlock_t rename_lock;
On Tue, Sep 10, 2013 at 03:27:53PM -0700, Andrew Morton wrote:
> This is rather a fiasco. "vfs: reorganize dput() memory accesses" made
> rather a mess of a 46 patch series which has been under development and
> test for two cycles so far.
Check vfs.git#for-next...
On Tue, Sep 10, 2013 at 3:27 PM, Andrew Morton
<akpm@linux-foundation.org> wrote:
>
> This is rather a fiasco. "vfs: reorganize dput() memory accesses" made
> rather a mess of a 46 patch series which has been under development and
> test for two cycles so far.
Andrew, *please* don't do the insane rebasing you keep on doing.
Nobody else does that, and it adds more work for you, and makes your
patch bombs be untimely.
And I'll be honest, I don't care about the "more work for you" part,
since I don't really see it, but I do care about the "untimely" part.
That's the one that affects me. For example, I'm traveling starting
Friday, so I'll close the merge window on the road (linuxCon US next
week, a weekend of diving before that). It would be much nicer if I
have almost nothing pending before I leave.
And quite frankly, there is absolutely nothing that touches
fs/dcache.c that should be in your tree anyway, as far as I can tell.
But seriously - don't do the constant rebasing. I tell the git people
that, because I hate how it actually dilutes the value of any testing
they did. The fact that you rebase to the day you send the patches is
actually taking *away* value from what you do, and it adds a lot of
work for you.
So I'd (once again) suggest you base your pile of patches on the
previous stable kernel, and that linux-next take it *first* rather
than take it last.
Linus
On Tue, 10 Sep 2013 23:29:24 +0100 Al Viro <viro@ZenIV.linux.org.uk> wrote:
> On Tue, Sep 10, 2013 at 03:27:53PM -0700, Andrew Morton wrote:
>
> > This is rather a fiasco. "vfs: reorganize dput() memory accesses" made
> > rather a mess of a 46 patch series which has been under development and
> > test for two cycles so far.
>
> Check vfs.git#for-next...
Why? What is in it?
On Tue, Sep 10, 2013 at 03:35:20PM -0700, Andrew Morton wrote:
> On Tue, 10 Sep 2013 23:29:24 +0100 Al Viro <viro@ZenIV.linux.org.uk> wrote:
>
> > On Tue, Sep 10, 2013 at 03:27:53PM -0700, Andrew Morton wrote:
> >
> > > This is rather a fiasco. "vfs: reorganize dput() memory accesses" made
> > > rather a mess of a 46 patch series which has been under development and
> > > test for two cycles so far.
> >
> > Check vfs.git#for-next...
>
> Why? What is in it?
Initial attempt to port that very thing on top of current mainline.
On Tue, Sep 10, 2013 at 11:36:24PM +0100, Al Viro wrote:
> On Tue, Sep 10, 2013 at 03:35:20PM -0700, Andrew Morton wrote:
> > On Tue, 10 Sep 2013 23:29:24 +0100 Al Viro <viro@ZenIV.linux.org.uk> wrote:
> >
> > > On Tue, Sep 10, 2013 at 03:27:53PM -0700, Andrew Morton wrote:
> > >
> > > > This is rather a fiasco. "vfs: reorganize dput() memory accesses" made
> > > > rather a mess of a 46 patch series which has been under development and
> > > > test for two cycles so far.
> > >
> > > Check vfs.git#for-next...
> >
> > Why? What is in it?
>
> Initial attempt to port that very thing on top of current mainline.
... with considerable breakage around "dcache: convert to use new lru list
infrastructure", indeed. I'll see what I can do there...
On Tue, 10 Sep 2013 23:36:24 +0100 Al Viro <viro@ZenIV.linux.org.uk> wrote:
> On Tue, Sep 10, 2013 at 03:35:20PM -0700, Andrew Morton wrote:
> > On Tue, 10 Sep 2013 23:29:24 +0100 Al Viro <viro@ZenIV.linux.org.uk> wrote:
> >
> > > On Tue, Sep 10, 2013 at 03:27:53PM -0700, Andrew Morton wrote:
> > >
> > > > This is rather a fiasco. "vfs: reorganize dput() memory accesses" made
> > > > rather a mess of a 46 patch series which has been under development and
> > > > test for two cycles so far.
> > >
> > > Check vfs.git#for-next...
> >
> > Why? What is in it?
>
> Initial attempt to port that very thing on top of current mainline.
Obtained from where? There are a whole pile of fixes resulting from
review and linux-next testing. Are they included?
On Tue, 10 Sep 2013 15:35:04 -0700 Linus Torvalds <torvalds@linux-foundation.org> wrote:
> So I'd (once again) suggest you base your pile of patches on the
> previous stable kernel, and that linux-next take it *first* rather
> than take it last.
That's what we're now doing. But this particular patchset was
different because it's changing multiple subsystems, several of which
are concurrently being changed in an uncoordinated fashion.
On Tue, Sep 10, 2013 at 03:41:16PM -0700, Andrew Morton wrote:
> Obtained from where? There are a whole pile of fixes resulting from
> review and linux-next testing. Are they included?
-next and yes. The trivial ones - folded into the commits they are fixing
(I mean, ones directly following the commit being fixed). The rest included
as individual commits.
I'm looking through the DCACHE_LRU_LIST mess right now...
On Tue, Sep 10, 2013 at 11:48:23PM +0100, Al Viro wrote:
> On Tue, Sep 10, 2013 at 03:41:16PM -0700, Andrew Morton wrote:
>
> > Obtained from where? There are a whole pile of fixes resulting from
> > review and linux-next testing. Are they included?
>
> -next and yes. The trivial ones - folded into the commits they are fixing
> (I mean, ones directly following the commit being fixed). The rest included
> as individual commits.
>
> I'm looking through the DCACHE_LRU_LIST mess right now...
It's not that bad, actually; I think the variant I've pushed right now
(vfs.git#for-next, head at f5e1dd34561e0fb06400b378d595198918833021) should
be doing the right thing. It ought to cover everything in your branch
in -next from "fs: bump inode and dentry counters to long" on to the
end of queue.
On Tue, 10 Sep 2013 23:59:34 +0100 Al Viro <viro@ZenIV.linux.org.uk> wrote:
> On Tue, Sep 10, 2013 at 11:48:23PM +0100, Al Viro wrote:
> > On Tue, Sep 10, 2013 at 03:41:16PM -0700, Andrew Morton wrote:
> >
> > > Obtained from where? There are a whole pile of fixes resulting from
> > > review and linux-next testing. Are they included?
> >
> > -next and yes. The trivial ones - folded into the commits they are fixing
> > (I mean, ones directly following the commit being fixed). The rest included
> > as individual commits.
> >
> > I'm looking through the DCACHE_LRU_LIST mess right now...
>
> It's not that bad, actually; I think the variant I've pushed right now
> (vfs.git#for-next, head at f5e1dd34561e0fb06400b378d595198918833021) should
> be doing the right thing. It ought to cover everything in your branch
> in -next from "fs: bump inode and dentry counters to long" on to the
> end of queue.
That's the correct starting point. The end point should be
"staging/lustre/libcfs: cleanup linux-mem.h".
On Tue, Sep 10, 2013 at 3:59 PM, Al Viro <viro@zeniv.linux.org.uk> wrote: > > It's not that bad, actually; I think the variant I've pushed right now > (vfs.git#for-next, head at f5e1dd34561e0fb06400b378d595198918833021) should > be doing the right thing. It ought to cover everything in your branch > in -next from "fs: bump inode and dentry counters to long" on to the > end of queue. >From a quick look, this looks pretty broken: if (list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)) this_cpu_inc(nr_dentry_unused); dentry->d_flags |= DCACHE_LRU_LIST; because if that list_lru_add() can fail, then we shouldn't set the DCACHE_LRU_LIST bit either. That said, I don't see how it can fail. We only do this with the dentry locked, and when it's not already on the LRU list. So I think the "if()" is just misleading and unnecessary - but the code works. Linus
On Tue, Sep 10, 2013 at 04:37:19PM -0700, Linus Torvalds wrote:
> On Tue, Sep 10, 2013 at 3:59 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> >
> > It's not that bad, actually; I think the variant I've pushed right now
> > (vfs.git#for-next, head at f5e1dd34561e0fb06400b378d595198918833021) should
> > be doing the right thing. It ought to cover everything in your branch
> > in -next from "fs: bump inode and dentry counters to long" on to the
> > end of queue.
>
> >From a quick look, this looks pretty broken:
>
> if (list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru))
> this_cpu_inc(nr_dentry_unused);
> dentry->d_flags |= DCACHE_LRU_LIST;
>
> because if that list_lru_add() can fail, then we shouldn't set the
> DCACHE_LRU_LIST bit either.
list_lru_add() can fail if it's already on the list; leaving the counter
alone should've been conditional on that, setting the flag - no. Said
that, it probably should be WARN_ON(!...); this_cpu_inc(); ... |= ...;
On Tue, Sep 10, 2013 at 04:13:02PM -0700, Andrew Morton wrote:
> > in -next from "fs: bump inode and dentry counters to long" on to the
> > end of queue.
>
> That's the correct starting point. The end point should be
> "staging/lustre/libcfs: cleanup linux-mem.h".
... which is the end of queue (well, the last one I grabbed, anyway).
And it obviously needed to go before the removal of ->shrinker(),
not after it, so it got moved up.
On Tue, Sep 10, 2013 at 4:53 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> list_lru_add() can fail if it's already on the list; leaving the counter
> alone should've been conditional on that, setting the flag - no. Said
> that, it probably should be WARN_ON(!...); this_cpu_inc(); ... |= ...;
That WARN_ON_(!..) might indeed be better (maybe just WARN_ON_ONCE())..
That DCACHE_LRU_LIST bit needs to be coherent with "the dentry->d_lru
entry is on _some_ list" (whether it's the dentry one or the shrinker
one), so if that list_lru_add() ever fails, that would be a sign of
badness.
And that whole function is very performance-critical, to the point
where we not only don't want to call down to list_lry_add(), we don't
even want to touch the d_lru list entry itself to even _look_ if it's
empty or not, because that will take a cache miss. Which was obviously
the whole reason for that DCACHE_LRU_LIST bit existing...
Linus
[-- Attachment #1: Type: text/plain, Size: 6496 bytes --] Hi Linus, On Tue, 10 Sep 2013 15:44:00 -0700 Andrew Morton <akpm@linux-foundation.org> wrote: > > On Tue, 10 Sep 2013 15:35:04 -0700 Linus Torvalds <torvalds@linux-foundation.org> wrote: > > > So I'd (once again) suggest you base your pile of patches on the > > previous stable kernel, and that linux-next take it *first* rather > > than take it last. > > That's what we're now doing. But this particular patchset was > different because it's changing multiple subsystems, several of which > are concurrently being changed in an uncoordinated fashion. Yeah, Andrew and I came to an arrangement this round so that almost all of his series is based only on your tree. Currently I have 375 patches based on v3.11-rc7-14-gfa8218d in a branch that I just merge each day. The remaining bit (which contains the series that caused this thread) is now only 38 patches (after removing the stuff that Al has taken) is still rebased on top of linux-next each day due to dependencies on other trees in linux-next. (Yes, I know this is not ideal, but it is a work in progress.) So, Andrew, you should be able to just about send those 375 patches to Linus (I know that there may be some fix folding to do before that) and have him apply them on top of v3.11-rc7-14-gfa8218d in a separate branch and then merge that branch. I have included the "git diff-tree --cc" output from my merge of that into linux-next yesterday. Some of it is not applicable yet (since there are still some other outstanding trees), but it gives you some idea of how little the merge is a problem. I hope this is helpful. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc arch/s390/kernel/kprobes.c index adbbe7f,cb7ac9e..0ce9fb2 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@@ -100,17 -120,13 +120,16 @@@ static int __kprobes get_fixup_type(kpr fixup |= FIXUP_RETURN_REGISTER; break; case 0xc0: - if ((insn[0] & 0x0f) == 0x00 || /* larl */ - (insn[0] & 0x0f) == 0x05) /* brasl */ - fixup |= FIXUP_RETURN_REGISTER; + if ((insn[0] & 0x0f) == 0x05) /* brasl */ + fixup |= FIXUP_RETURN_REGISTER; break; case 0xeb: - if ((insn[2] & 0xff) == 0x44 || /* bxhg */ - (insn[2] & 0xff) == 0x45) /* bxleg */ + switch (insn[2] & 0xff) { + case 0x44: /* bxhg */ + case 0x45: /* bxleg */ fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } break; case 0xe3: /* bctg */ if ((insn[2] & 0xff) == 0x46) diff --cc arch/x86/lib/csum-wrappers_64.c index 7609e0e,aaba241..2528500 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c @@@ -4,9 -4,9 +4,10 @@@ * * Wrappers of assembly checksum functions for x86-64. */ + #include <linux/sched.h> #include <asm/checksum.h> #include <linux/module.h> +#include <asm/smap.h> /** * csum_partial_copy_from_user - Copy and checksum from user space. diff --cc drivers/staging/lustre/lustre/llite/remote_perm.c index dedd56a,ceac936..50de0f0 --- a/drivers/staging/lustre/lustre/llite/remote_perm.c +++ b/drivers/staging/lustre/lustre/llite/remote_perm.c @@@ -44,7 -44,9 +44,8 @@@ #define DEBUG_SUBSYSTEM S_LLITE #include <linux/module.h> + #include <linux/sched.h> #include <linux/types.h> -#include <linux/version.h> #include <lustre_lite.h> #include <lustre_ha.h> diff --cc fs/fat/file.c index 33711ff,00b5810..26c8e32 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@@ -148,6 -151,22 +151,22 @@@ static long fat_generic_compat_ioctl(st static int fat_file_release(struct inode *inode, struct file *filp) { + + struct super_block *sb = inode->i_sb; + loff_t mmu_private_ideal; + + /* + * Release unwritten fallocated blocks on file release. + * Do this only when the last open file descriptor is closed. + */ + mutex_lock(&inode->i_mutex); + mmu_private_ideal = round_up(inode->i_size, sb->s_blocksize); + + if (mmu_private_ideal < MSDOS_I(inode)->mmu_private && - filp->f_dentry->d_count == 1) ++ d_count(filp->f_dentry) == 1) + fat_truncate_blocks(inode, inode->i_size); + mutex_unlock(&inode->i_mutex); + if ((filp->f_mode & FMODE_WRITE) && MSDOS_SB(inode->i_sb)->options.flush) { fat_flush_inodes(inode->i_sb, inode, NULL); diff --cc fs/xfs/xfs_mount.c index 5dcc680,eb9ba15..65dbf17 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@@ -15,9 -15,10 +15,10 @@@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + #include <linux/sched.h> #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_format.h" #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" diff --cc kernel/fork.c index fb4406b,04a8c2a..5ede60b --- a/kernel/fork.c +++ b/kernel/fork.c @@@ -1173,13 -1171,15 +1171,16 @@@ static struct task_struct *copy_process return ERR_PTR(-EINVAL); /* - * If the new process will be in a different pid namespace - * don't allow the creation of threads. + * If the new process will be in a different pid or user namespace + * do not allow it to share a thread group or signal handlers or + * parent with the forking task. */ - if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) && - (task_active_pid_ns(current) != - current->nsproxy->pid_ns_for_children)) - return ERR_PTR(-EINVAL); + if (clone_flags & (CLONE_SIGHAND | CLONE_PARENT)) { + if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) || - (task_active_pid_ns(current) != current->nsproxy->pid_ns)) ++ (task_active_pid_ns(current) != ++ current->nsproxy->pid_ns_for_children)) + return ERR_PTR(-EINVAL); + } retval = security_task_create(clone_flags); if (retval) diff --cc kernel/watchdog.c index 51c4f34,410d5bb..373d3e1 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@@ -553,6 -607,14 +607,6 @@@ void __init lockup_detector_init(void { set_sample_period(); -#ifdef CONFIG_NO_HZ_FULL - if (watchdog_user_enabled) { - watchdog_user_enabled = 0; - pr_warning("Disabled lockup detectors by default for full dynticks\n"); - pr_warning("You can reactivate it with 'sysctl -w kernel.watchdog=1'\n"); - } -#endif - if (watchdog_user_enabled) - watchdog_enable_all_cpus(); + watchdog_enable_all_cpus(false); } [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
On Tue, Sep 10, 2013 at 05:01:23PM -0700, Linus Torvalds wrote:
> On Tue, Sep 10, 2013 at 4:53 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> >
> > list_lru_add() can fail if it's already on the list; leaving the counter
> > alone should've been conditional on that, setting the flag - no. Said
> > that, it probably should be WARN_ON(!...); this_cpu_inc(); ... |= ...;
>
> That WARN_ON_(!..) might indeed be better (maybe just WARN_ON_ONCE())..
>
> That DCACHE_LRU_LIST bit needs to be coherent with "the dentry->d_lru
> entry is on _some_ list" (whether it's the dentry one or the shrinker
> one), so if that list_lru_add() ever fails, that would be a sign of
> badness.
>
> And that whole function is very performance-critical, to the point
> where we not only don't want to call down to list_lry_add(), we don't
> even want to touch the d_lru list entry itself to even _look_ if it's
> empty or not, because that will take a cache miss. Which was obviously
> the whole reason for that DCACHE_LRU_LIST bit existing...
Guys, I'm about to be out of the office for 4-5 days, so this is
real bad timing for me. When I get back I'll put some effort into
validating that everything still works properly and performs as
expected.
Cheers,
Dave.
--
Dave Chinner
dchinner@redhat.com
On Tue, Sep 10, 2013 at 5:30 PM, Stephen Rothwell <sfr@canb.auug.org.au> wrote: > > So, Andrew, you should be able to just about send those 375 patches to > Linus (I know that there may be some fix folding to do before that) and > have him apply them on top of v3.11-rc7-14-gfa8218d in a separate branch > and then merge that branch. That's how I have been doing Andrew's patch-bombs anyway, since I started asking him what they were based on (they *usually* apply on top of my current tip, but during the merge window so much happens that it wasn't all that unusual that one or two patches didn't quite apply). So these days I just always do a "git checkout -b akpm <base-andrew-gives-me>" and apply the patches that way. It's just that normally the base is within about six hours of my tip anyway. This would be the first time it's way back. But I much prefer having more of a merge of well-tested code over having an easier merge of something that was rebased. That's especially true if the rebasing ends up delaying me getting the patches in the first place. I detest having merge windows where the last few days are busy, I'd really much rather have a couple of days with very little going on before doing the -rc1 (in the hope that rc1 actually ends up being pretty good). > I have included the "git diff-tree --cc" > output from my merge of that into linux-next yesterday. Some of it is > not applicable yet (since there are still some other outstanding trees), > but it gives you some idea of how little the merge is a problem. Yes. We actually seldom have real merge problems. The only really annoying merges tend to be when somebody did something that isn't really code-related, like sorting a big list of entries (Makefiles, Kconfig files, device tree cleanups) and then both sides do a fair amount of changes on their respective - and very different - sides. The "multiple people worked on the same code" part is fairly unusual, and particular if I know the code and can even compile it, it's all fine. Linus
[-- Attachment #1: Type: text/plain, Size: 807 bytes --] Hi Andrew, On Tue, 10 Sep 2013 16:13:02 -0700 Andrew Morton <akpm@linux-foundation.org> wrote: > > On Tue, 10 Sep 2013 23:59:34 +0100 Al Viro <viro@ZenIV.linux.org.uk> wrote: > > > > It's not that bad, actually; I think the variant I've pushed right now > > (vfs.git#for-next, head at f5e1dd34561e0fb06400b378d595198918833021) should > > be doing the right thing. It ought to cover everything in your branch > > in -next from "fs: bump inode and dentry counters to long" on to the > > end of queue. > > That's the correct starting point. The end point should be > "staging/lustre/libcfs: cleanup linux-mem.h". OK, so I have removed that set of patches from the copy of the -mm tree that is in linux-next today. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
On Tue, Sep 10, 2013 at 4:37 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> From a quick look, this looks pretty broken:
>
> if (list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru))
> this_cpu_inc(nr_dentry_unused);
> dentry->d_flags |= DCACHE_LRU_LIST;
>
> because if that list_lru_add() can fail, then we shouldn't set the
> DCACHE_LRU_LIST bit either.
>
> That said, I don't see how it can fail. We only do this with the
> dentry locked, and when it's not already on the LRU list. So I think
> the "if()" is just misleading and unnecessary - but the code works.
So I thought you'd clean this up. Looking again, it still seems really
confused, and I'm finding actual bugs.
You don't clear the DCACHE_LRU_LIST when you remove dentries from the
d_lru list. In other cases (like shrink_dentry_list), you clear just
the DCACHE_SHRINK_LIST.
As a result, the "if ()" isn't necessarily unnecessary, but there are
actual bugs. It looks like the dentry can be removed from the d_lru
lists without the bit ever getting cleared, and if that happens, it
will never be moved back.
The rule for DCACHE_LRU_LIST was - and should be - that the bit is set
IFF the d_lru list is not empty. So it gets set when a dentry is moved
to the LRU lists, but it _stays_ set if the dentry is moved to the
shrink_list. It then gets cleared when the dentry is removed from any
d_lru list (ie "list_del_init()").
I'll walk through the code, it looked suspicious. Maybe there's
something subtle that makes it work, but I don't see it.
Linus
On Thu, Sep 12, 2013 at 5:56 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> I'll walk through the code, it looked suspicious. Maybe there's
> something subtle that makes it work, but I don't see it.
Btw, it's not just the DCACHE_LRU_LIST bit. The games with
"nr_dentry_unused" look totally broken too. It's decremented in
dentry_lru_isolate_shrink() for each dentry we remove, and then it is
decremented *again* in shrink_dcache_sb() by the number of dentries we
removed.
Maybe I'm confused, but the code sure looks more confused than I feel.
I would suggest keeping the same semantics for 'nr_dentry_unused'.
Dentries are unused whether they are on the "real" LRU list or have
been tagged with DCACHE_SHRINK_LIST. So moving from one list to the
other does nothing. It's the "list_del_init()" that should trigger
both 'nr_dentry_unused' and DCACHE_LRU_LIST bit-clearing.
In fact, maybe a helper function for _actually_ removing the thing
from all lists, and adding them back. Right now there are
"list_del_init()" and "list_add[_tail]()" calls sprinkled around in
random places, mixed up with the new "list_lru_add()".
Damn, the code is too confused. I have to go to a highschool parent
back-to-school meeting, so I won't get to this until maybe on a plane
tomorrow. Al, can you please give this a look?
Linus
On Thu, Sep 12, 2013 at 06:12:24PM -0700, Linus Torvalds wrote:
> On Thu, Sep 12, 2013 at 5:56 PM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> >
> > I'll walk through the code, it looked suspicious. Maybe there's
> > something subtle that makes it work, but I don't see it.
>
> Btw, it's not just the DCACHE_LRU_LIST bit. The games with
> "nr_dentry_unused" look totally broken too. It's decremented in
> dentry_lru_isolate_shrink() for each dentry we remove, and then it is
> decremented *again* in shrink_dcache_sb() by the number of dentries we
> removed.
>
> Maybe I'm confused, but the code sure looks more confused than I feel.
>
> I would suggest keeping the same semantics for 'nr_dentry_unused'.
> Dentries are unused whether they are on the "real" LRU list or have
> been tagged with DCACHE_SHRINK_LIST. So moving from one list to the
> other does nothing. It's the "list_del_init()" that should trigger
> both 'nr_dentry_unused' and DCACHE_LRU_LIST bit-clearing.
>
> In fact, maybe a helper function for _actually_ removing the thing
> from all lists, and adding them back. Right now there are
> "list_del_init()" and "list_add[_tail]()" calls sprinkled around in
> random places, mixed up with the new "list_lru_add()".
>
> Damn, the code is too confused. I have to go to a highschool parent
> back-to-school meeting, so I won't get to this until maybe on a plane
> tomorrow. Al, can you please give this a look?
Will do...
[-- Attachment #1: Type: text/plain, Size: 1573 bytes --] On Thu, Sep 12, 2013 at 6:12 PM, Linus Torvalds <torvalds@linux-foundation.org> wrote: > > Damn, the code is too confused. I have to go to a highschool parent > back-to-school meeting, so I won't get to this until maybe on a plane > tomorrow. Al, can you please give this a look? I'm on a plane. I have gogo. Here's my current TOTALLY UNTESTED patch. It tries to consolidate the dentry LRU stuff into a few helper functions that right now have anal checking of the flags. Maybe I overdid it, but the code was really confusing, and I think we got the free dentry counts wrong, and the bits wrong too, so I tried to be extra careful. There are several cases: - d_lru_add/del: fairly obvious - d_lru_isolate: this is when the LRU callbacks ask us to remove the entry from the list. This is different from d_lru_del() only in that it uses the raw list removal, not the lru list helper function. I'm not sure that's right, but that's what the code used to do. - d_lru_shrink_move: move from the "global" lru list to a private shrinker list - d_shrink_add/del: fairly obvious. And then "denty_lru_add/del" that actually take the current state into account and do the right thing. Those we had before, I'm just explaining the difference from the low-level operations that have fixed "from this state to that" semantics Hmm? Does it work? Who knows.. But *if* it works, I think it has a higher chance of getting all the rules for bits and free object counting right. Somebody not in a plane please double-check my low-oxygen-pressure thinking.. Linus [-- Attachment #2: patch.diff --] [-- Type: application/octet-stream, Size: 6399 bytes --] fs/dcache.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 27 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 1bd4614ce93b..6443b07f8a3c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -357,15 +357,74 @@ static void dentry_unlink_inode(struct dentry * dentry) } /* + * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry + * is in use - which includes both the "real" per-superblock + * LRU list _and_ the DCACHE_SHRINK_LIST use. + * + * The DCACHE_SHRINK_LIST bit is set whenever the dentry is + * on the shrink list (ie not on the superblock LRU list). + * + * The per-cpu "nd_dentry_unused" counters are updated with + * the DCACHE_LRU_LIST bit. + * + * These helper functions make sure we always follow the + * rules. d_lock must be held by the caller. + */ +#define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x)) +static void d_lru_add(struct dentry *dentry) +{ + D_FLAG_VERIFY(dentry, 0); + dentry->d_flags |= DCACHE_LRU_LIST; + this_cpu_inc(nr_dentry_unused); + WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); +} + +static void d_lru_del(struct dentry *dentry) +{ + D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); + dentry->d_flags &= ~DCACHE_LRU_LIST; + this_cpu_dec(nr_dentry_unused); + WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); +} + +static void d_lru_isolate(struct dentry *dentry) +{ + D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); + dentry->d_flags &= ~DCACHE_LRU_LIST; + this_cpu_dec(nr_dentry_unused); + list_del_init(&dentry->d_lru); +} + +static void d_shrink_del(struct dentry *dentry) +{ + D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); + list_del_init(&dentry->d_lru); + dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); + this_cpu_dec(nr_dentry_unused); +} + +static void d_shrink_add(struct dentry *dentry, struct list_head *list) +{ + D_FLAG_VERIFY(dentry, 0); + list_add(&dentry->d_lru, list); + dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST; + this_cpu_inc(nr_dentry_unused); +} + +static void d_lru_shrink_move(struct dentry *dentry, struct list_head *list) +{ + D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); + dentry->d_flags |= DCACHE_SHRINK_LIST; + list_move_tail(&dentry->d_lru, list); +} + +/* * dentry_lru_(add|del)_list) must be called with d_lock held. */ static void dentry_lru_add(struct dentry *dentry) { - if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { - if (list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)) - this_cpu_inc(nr_dentry_unused); - dentry->d_flags |= DCACHE_LRU_LIST; - } + if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) + d_lru_add(dentry); } /* @@ -377,15 +436,11 @@ static void dentry_lru_add(struct dentry *dentry) */ static void dentry_lru_del(struct dentry *dentry) { - if (dentry->d_flags & DCACHE_SHRINK_LIST) { - list_del_init(&dentry->d_lru); - dentry->d_flags &= ~DCACHE_SHRINK_LIST; - return; + if (dentry->d_flags & DCACHE_LRU_LIST) { + if (dentry->d_flags & DCACHE_SHRINK_LIST) + return d_shrink_del(dentry); + d_lru_del(dentry); } - - if (list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)) - this_cpu_dec(nr_dentry_unused); - dentry->d_flags &= ~DCACHE_LRU_LIST; } /** @@ -837,6 +892,13 @@ static void shrink_dentry_list(struct list_head *list) dentry = list_entry_rcu(list->prev, struct dentry, d_lru); if (&dentry->d_lru == list) break; /* empty */ + + + /* + * Get the dentry lock, and re-verify that the dentry is + * this on the shrinking list. If it is, we know that + * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set. + */ spin_lock(&dentry->d_lock); if (dentry != list_entry(list->prev, struct dentry, d_lru)) { spin_unlock(&dentry->d_lock); @@ -848,8 +910,7 @@ static void shrink_dentry_list(struct list_head *list) * to the LRU here, so we can simply remove it from the list * here regardless of whether it is referenced or not. */ - list_del_init(&dentry->d_lru); - dentry->d_flags &= ~DCACHE_SHRINK_LIST; + d_shrink_del(dentry); /* * We found an inuse dentry which was not removed from @@ -861,12 +922,20 @@ static void shrink_dentry_list(struct list_head *list) } rcu_read_unlock(); + /* + * If 'try_to_prune()' returns a dentry, it will + * be the same one we passed in, and d_lock will + * have been held the whole time, so it will not + * have been added to any other lists. We failed + * to get the inode lock. + * + * We just add it back to the shrink list. + */ dentry = try_prune_one_dentry(dentry); rcu_read_lock(); if (dentry) { - dentry->d_flags |= DCACHE_SHRINK_LIST; - list_add(&dentry->d_lru, list); + d_shrink_add(dentry, list); spin_unlock(&dentry->d_lock); } } @@ -894,7 +963,7 @@ dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) * another pass through the LRU. */ if (dentry->d_lockref.count) { - list_del_init(&dentry->d_lru); + d_lru_isolate(dentry); spin_unlock(&dentry->d_lock); return LRU_REMOVED; } @@ -925,9 +994,7 @@ dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) return LRU_ROTATE; } - dentry->d_flags |= DCACHE_SHRINK_LIST; - list_move_tail(&dentry->d_lru, freeable); - this_cpu_dec(nr_dentry_unused); + d_lru_shrink_move(dentry, freeable); spin_unlock(&dentry->d_lock); return LRU_REMOVED; @@ -972,9 +1039,7 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, if (!spin_trylock(&dentry->d_lock)) return LRU_SKIP; - dentry->d_flags |= DCACHE_SHRINK_LIST; - list_move_tail(&dentry->d_lru, freeable); - this_cpu_dec(nr_dentry_unused); + d_lru_shrink_move(dentry, freeable); spin_unlock(&dentry->d_lock); return LRU_REMOVED; @@ -1362,9 +1427,12 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) if (dentry->d_lockref.count) { dentry_lru_del(dentry); } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { - dentry_lru_del(dentry); - list_add_tail(&dentry->d_lru, &data->dispose); - dentry->d_flags |= DCACHE_SHRINK_LIST; + /* + * If we get here, lockref.count is zero, and + * the dentry should be on the LRU list. So we + * can just use the "move" helper. + */ + d_lru_shrink_move(dentry, &data->dispose); data->found++; ret = D_WALK_NORETRY; }
On Fri, Sep 13, 2013 at 12:12 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> Does it work? Who knows.. But *if* it works, I think it has a higher
> chance of getting all the rules for bits and free object counting
> right.
>
> Somebody not in a plane please double-check my low-oxygen-pressure thinking..
Ok, it seems to work for me, even when memory pressure causes the
dentries to be shrunk.
I still would like people (ie Al, in particular), to double-check everything.
Linus
On Fri, Sep 13, 2013 at 12:12 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> - d_lru_isolate: this is when the LRU callbacks ask us to remove the
> entry from the list. This is different from d_lru_del() only in that
> it uses the raw list removal, not the lru list helper function. I'm
> not sure that's right, but that's what the code used to do.
Yes, verified. The LRU isolation functions are called with the lru
list lock held, so d_lru_isolate() basically just does the
list)_del_init() and our own state management.
Anyway, I fixed a typo in a comment (nd_dentry_unused ->
nr_dentry_unused) but the patch still looks good to me.
Linus
On Fri, Sep 13, 2013 at 12:12:20PM -0700, Linus Torvalds wrote: > It tries to consolidate the dentry LRU stuff into a few helper > functions that right now have anal checking of the flags. Maybe I > overdid it, but the code was really confusing, and I think we got the > free dentry counts wrong, and the bits wrong too, so I tried to be > extra careful. > > There are several cases: > - d_lru_add/del: fairly obvious > - d_lru_isolate: this is when the LRU callbacks ask us to remove the > entry from the list. This is different from d_lru_del() only in that > it uses the raw list removal, not the lru list helper function. I'm > not sure that's right, but that's what the code used to do. It is right - for one thing, we are holding the lock on that LRU list, so list_lru_del() would deadlock right there. For another, the same list_lru_walk (OK, list_lru_walk_node()) will do ->nr_items decrement when we return LRU_REMOVED to it, so we don't want to do it twice. Plain list_del_init() is correct here. > - d_lru_shrink_move: move from the "global" lru list to a private shrinker list > - d_shrink_add/del: fairly obvious. > > And then "denty_lru_add/del" that actually take the current state into > account and do the right thing. Those we had before, I'm just > explaining the difference from the low-level operations that have > fixed "from this state to that" semantics Looks sane; FWIW, the variant I'm playing with uses two independent flags for "shrinker" and "per-sb", but AFAICS that doesn't yield better code. Feel free to slap my acked-by on it.
On Fri, Sep 13, 2013 at 09:00:00PM +0100, Al Viro wrote:
> > - d_lru_shrink_move: move from the "global" lru list to a private shrinker list
> > - d_shrink_add/del: fairly obvious.
> >
> > And then "denty_lru_add/del" that actually take the current state into
> > account and do the right thing. Those we had before, I'm just
> > explaining the difference from the low-level operations that have
> > fixed "from this state to that" semantics
>
> Looks sane; FWIW, the variant I'm playing with uses two independent
> flags for "shrinker" and "per-sb", but AFAICS that doesn't yield better
> code.
Actually, it does yield slightly better code... Look - if you take your
patch and replace LRU_LIST | SHRINK_LIST combination with bare SHRINK_LIST
(which can't occur right now). Then all transitions turn into flipping
a single bit, check in dentry_lru_add() becomes if (!(flags & (SHRINK | LRU))
and dentry_lru_del() -- if (... & SHRINK) return ...; if (... & LRU) return ...
It can be done as a followup, anyway - better not mix that with fixes.
On Fri, Sep 13, 2013 at 09:18:03PM +0100, Al Viro wrote: > On Fri, Sep 13, 2013 at 09:00:00PM +0100, Al Viro wrote: > > > - d_lru_shrink_move: move from the "global" lru list to a private shrinker list > > > - d_shrink_add/del: fairly obvious. > > > > > > And then "denty_lru_add/del" that actually take the current state into > > > account and do the right thing. Those we had before, I'm just > > > explaining the difference from the low-level operations that have > > > fixed "from this state to that" semantics > > > > Looks sane; FWIW, the variant I'm playing with uses two independent > > flags for "shrinker" and "per-sb", but AFAICS that doesn't yield better > > code. > > Actually, it does yield slightly better code... Look - if you take your > patch and replace LRU_LIST | SHRINK_LIST combination with bare SHRINK_LIST > (which can't occur right now). Then all transitions turn into flipping ^^^ > a single bit, Obviously not *and* lru->shrink is common ;-/ Anyway, any benefit from microoptimizing that will drown in noise, so... nevermind that idea.
[-- Attachment #1: Type: text/plain, Size: 853 bytes --] On Fri, Sep 13, 2013 at 4:00 PM, Al Viro <viro@zeniv.linux.org.uk> wrote: \> > It is right - for one thing, we are holding the lock on that LRU list, > so list_lru_del() would deadlock right there. For another, the same > list_lru_walk (OK, list_lru_walk_node()) will do ->nr_items decrement > when we return LRU_REMOVED to it, so we don't want to do it twice. > Plain list_del_init() is correct here. Yes. And I found the opposite bug in one place: when we are collecting dentries by walking the parents etc, we do *not* hold the global RCU lock, so we cannot use the "d_lru_shrink_list()" thing after all. It's correct as far as the internal logic of fs/dcache.c goes, but it violates the global LRU list rules. So I replaced that with a dentry_lru_del() followed by a d_shrink_add() instead. Updated patch attached. Linus [-- Attachment #2: patch.diff --] [-- Type: application/octet-stream, Size: 6636 bytes --] fs/dcache.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 102 insertions(+), 27 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 1bd4614ce93b..435b97560674 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -357,15 +357,80 @@ static void dentry_unlink_inode(struct dentry * dentry) } /* + * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry + * is in use - which includes both the "real" per-superblock + * LRU list _and_ the DCACHE_SHRINK_LIST use. + * + * The DCACHE_SHRINK_LIST bit is set whenever the dentry is + * on the shrink list (ie not on the superblock LRU list). + * + * The per-cpu "nr_dentry_unused" counters are updated with + * the DCACHE_LRU_LIST bit. + * + * These helper functions make sure we always follow the + * rules. d_lock must be held by the caller. + */ +#define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x)) +static void d_lru_add(struct dentry *dentry) +{ + D_FLAG_VERIFY(dentry, 0); + dentry->d_flags |= DCACHE_LRU_LIST; + this_cpu_inc(nr_dentry_unused); + WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); +} + +static void d_lru_del(struct dentry *dentry) +{ + D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); + dentry->d_flags &= ~DCACHE_LRU_LIST; + this_cpu_dec(nr_dentry_unused); + WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); +} + +static void d_shrink_del(struct dentry *dentry) +{ + D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); + list_del_init(&dentry->d_lru); + dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); + this_cpu_dec(nr_dentry_unused); +} + +static void d_shrink_add(struct dentry *dentry, struct list_head *list) +{ + D_FLAG_VERIFY(dentry, 0); + list_add(&dentry->d_lru, list); + dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST; + this_cpu_inc(nr_dentry_unused); +} + +/* + * These can only be called under the global LRU lock, ie during the + * callback for freeing the LRU list. "isolate" removes it from the + * LRU lists entirely, while shrink_move moves it to the indicated + * private list. + */ +static void d_lru_isolate(struct dentry *dentry) +{ + D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); + dentry->d_flags &= ~DCACHE_LRU_LIST; + this_cpu_dec(nr_dentry_unused); + list_del_init(&dentry->d_lru); +} + +static void d_lru_shrink_move(struct dentry *dentry, struct list_head *list) +{ + D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); + dentry->d_flags |= DCACHE_SHRINK_LIST; + list_move_tail(&dentry->d_lru, list); +} + +/* * dentry_lru_(add|del)_list) must be called with d_lock held. */ static void dentry_lru_add(struct dentry *dentry) { - if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { - if (list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)) - this_cpu_inc(nr_dentry_unused); - dentry->d_flags |= DCACHE_LRU_LIST; - } + if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) + d_lru_add(dentry); } /* @@ -377,15 +442,11 @@ static void dentry_lru_add(struct dentry *dentry) */ static void dentry_lru_del(struct dentry *dentry) { - if (dentry->d_flags & DCACHE_SHRINK_LIST) { - list_del_init(&dentry->d_lru); - dentry->d_flags &= ~DCACHE_SHRINK_LIST; - return; + if (dentry->d_flags & DCACHE_LRU_LIST) { + if (dentry->d_flags & DCACHE_SHRINK_LIST) + return d_shrink_del(dentry); + d_lru_del(dentry); } - - if (list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)) - this_cpu_dec(nr_dentry_unused); - dentry->d_flags &= ~DCACHE_LRU_LIST; } /** @@ -837,6 +898,13 @@ static void shrink_dentry_list(struct list_head *list) dentry = list_entry_rcu(list->prev, struct dentry, d_lru); if (&dentry->d_lru == list) break; /* empty */ + + + /* + * Get the dentry lock, and re-verify that the dentry is + * this on the shrinking list. If it is, we know that + * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set. + */ spin_lock(&dentry->d_lock); if (dentry != list_entry(list->prev, struct dentry, d_lru)) { spin_unlock(&dentry->d_lock); @@ -848,8 +916,7 @@ static void shrink_dentry_list(struct list_head *list) * to the LRU here, so we can simply remove it from the list * here regardless of whether it is referenced or not. */ - list_del_init(&dentry->d_lru); - dentry->d_flags &= ~DCACHE_SHRINK_LIST; + d_shrink_del(dentry); /* * We found an inuse dentry which was not removed from @@ -861,12 +928,20 @@ static void shrink_dentry_list(struct list_head *list) } rcu_read_unlock(); + /* + * If 'try_to_prune()' returns a dentry, it will + * be the same one we passed in, and d_lock will + * have been held the whole time, so it will not + * have been added to any other lists. We failed + * to get the inode lock. + * + * We just add it back to the shrink list. + */ dentry = try_prune_one_dentry(dentry); rcu_read_lock(); if (dentry) { - dentry->d_flags |= DCACHE_SHRINK_LIST; - list_add(&dentry->d_lru, list); + d_shrink_add(dentry, list); spin_unlock(&dentry->d_lock); } } @@ -894,7 +969,7 @@ dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) * another pass through the LRU. */ if (dentry->d_lockref.count) { - list_del_init(&dentry->d_lru); + d_lru_isolate(dentry); spin_unlock(&dentry->d_lock); return LRU_REMOVED; } @@ -925,9 +1000,7 @@ dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) return LRU_ROTATE; } - dentry->d_flags |= DCACHE_SHRINK_LIST; - list_move_tail(&dentry->d_lru, freeable); - this_cpu_dec(nr_dentry_unused); + d_lru_shrink_move(dentry, freeable); spin_unlock(&dentry->d_lock); return LRU_REMOVED; @@ -972,9 +1045,7 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, if (!spin_trylock(&dentry->d_lock)) return LRU_SKIP; - dentry->d_flags |= DCACHE_SHRINK_LIST; - list_move_tail(&dentry->d_lru, freeable); - this_cpu_dec(nr_dentry_unused); + d_lru_shrink_move(dentry, freeable); spin_unlock(&dentry->d_lock); return LRU_REMOVED; @@ -1362,9 +1433,13 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) if (dentry->d_lockref.count) { dentry_lru_del(dentry); } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { - dentry_lru_del(dentry); - list_add_tail(&dentry->d_lru, &data->dispose); - dentry->d_flags |= DCACHE_SHRINK_LIST; + /* + * We can't use d_lru_shrink_move() because we + * need to get the global LRU lock and do the + * RLU accounting. + */ + d_lru_del(dentry); + d_shrink_add(dentry, &data->dispose); data->found++; ret = D_WALK_NORETRY; }
On Fri, Sep 13, 2013 at 4:25 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> Yes. And I found the opposite bug in one place: when we are collecting
> dentries by walking the parents etc, we do *not* hold the global RCU
> lock, so we cannot use the "d_lru_shrink_list()" thing after all. It's
> correct as far as the internal logic of fs/dcache.c goes, but it
> violates the global LRU list rules. So I replaced that with a
> dentry_lru_del() followed by a d_shrink_add() instead.
Actually, I replaced it with d_lru_del()+d_shrink_add(), because afaik
we should be guaranteed that the dentry in question is on the global
RCU list (we checked that refcount is 0, and it's not on a local
list).
Agreed?
The patch I sent out already had that version, I just "documented" my first one.
Linus
On Fri, Sep 13, 2013 at 04:25:48PM -0400, Linus Torvalds wrote: > On Fri, Sep 13, 2013 at 4:00 PM, Al Viro <viro@zeniv.linux.org.uk> wrote: > \> > > It is right - for one thing, we are holding the lock on that LRU list, > > so list_lru_del() would deadlock right there. For another, the same > > list_lru_walk (OK, list_lru_walk_node()) will do ->nr_items decrement > > when we return LRU_REMOVED to it, so we don't want to do it twice. > > Plain list_del_init() is correct here. > > Yes. And I found the opposite bug in one place: when we are collecting > dentries by walking the parents etc, we do *not* hold the global RCU > lock, ??? LRU list lock, presumably? so we cannot use the "d_lru_shrink_list()" thing after all. It's > correct as far as the internal logic of fs/dcache.c goes, but it > violates the global LRU list rules. So I replaced that with a > dentry_lru_del() followed by a d_shrink_add() instead.
On Fri, Sep 13, 2013 at 4:31 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> On Fri, Sep 13, 2013 at 04:25:48PM -0400, Linus Torvalds wrote:
>>
>> Yes. And I found the opposite bug in one place: when we are collecting
>> dentries by walking the parents etc, we do *not* hold the global RCU
>> lock,
>
> ??? LRU list lock, presumably?
Heh, yes. You wouldn't believe how many times I mis-typed "lru" as
"rcu" while doing that patch.
Much of the other dentry work this merge window was all about rcu, so
my fingers end up being confused..
Linus
Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in: arch/x86/mm/mpx.c between commit: a89652769470 ("x86/mpx: Do not set ->vm_ops on MPX VMAs") from Linus' tree and patch: "mm, mpx: add "vm_flags_t vm_flags" arg to do_mmap_pgoff()" from the akpm tree. I fixed it up (I used the akpm tree version of mpx_map()) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au
Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in: include/net/tcp_memcontrol.h between commit: cdb00777ffad ("tcp_memcontrol: Forward declare cgroup_subsys and mem_cgroup stucts") from Linus' tree and commit: "mm: memcontrol: drop unused @css argument in memcg_init_kmem" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au diff --cc include/net/tcp_memcontrol.h index 01ff7c6efada,dc2da2f8c8b2..000000000000 --- a/include/net/tcp_memcontrol.h +++ b/include/net/tcp_memcontrol.h @@@ -1,9 -1,7 +1,9 @@@ #ifndef _TCP_MEMCG_H #define _TCP_MEMCG_H - struct cgroup_subsys; +struct mem_cgroup; + - int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss); + int tcp_init_cgroup(struct mem_cgroup *memcg); void tcp_destroy_cgroup(struct mem_cgroup *memcg); + #endif /* _TCP_MEMCG_H */
Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in: include/net/tcp_memcontrol.h between commit: cdb00777ffad ("tcp_memcontrol: Forward declare cgroup_subsys and mem_cgroup stucts") from Linus' tree and commit: "net: drop tcp_memcontrol.c" from the akpm tree. I fixed it up (the latter removed the file, so I did that) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwell sfr@canb.auug.org.au
Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in: lib/radix-tree.c between commit: 2b41226b39b6 ("Revert "radix tree test suite: fix compilation"") from Linus' tree and patch: "reimplement IDR and IDA using the radix tree" from the akpm tree. I fixed it up (I added back the include of notifier.h) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell
Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in: include/linux/radix-tree.h between commit: ea07b862ac8e ("mm: workingset: fix use-after-free in shadow node shrinker") from Linus' tree and patch: "Reimplement IDR and IDA using the radix tree" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc include/linux/radix-tree.h index 52bda854593b,50aaac14a4f1..000000000000 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@@ -306,9 -309,9 +309,11 @@@ void radix_tree_iter_replace(struct rad void radix_tree_replace_slot(struct radix_tree_root *root, void **slot, void *item); void __radix_tree_delete_node(struct radix_tree_root *root, - struct radix_tree_node *node); + struct radix_tree_node *node, + radix_tree_update_node_t update_node, + void *private); + void radix_tree_iter_delete(struct radix_tree_root *, + const struct radix_tree_iter *iter); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete(struct radix_tree_root *, unsigned long); void radix_tree_clear_tags(struct radix_tree_root *root,
[-- Attachment #1: Type: text/plain, Size: 746 bytes --] Hi Andrew, Today's linux-next merge of the akpm tree got a conflict in: ipc/mqueue.c between commit: cfb2f6f6e0ba ("Revert "mqueue: switch to on-demand creation of internal mount"") from Linus' tree and patch: "ipc/mqueue: add missing error code in init_mqueue_fs()" from the akpm tree. I fixed it up (I dropped the akpm tree patch) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 488 bytes --]
[-- Attachment #1: Type: text/plain, Size: 1420 bytes --] Hi all, Today's linux-next merge of the akpm tree got a conflict in: arch/arm64/kernel/setup.c between commit: d91680e687f4 ("arm64: Fix /proc/iomem for reserved but not memory regions") from Linus' tree and patch: "memblock: replace alloc_bootmem_low with memblock_alloc_low (2)" from the akpm tree. I fixed it up (the new patch hunk for this file is below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index d0f62dd24c90..c8ba593cc3ac 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -217,8 +217,8 @@ static void __init request_standard_resources(void) kernel_data.end = __pa_symbol(_end - 1); num_standard_resources = memblock.memory.cnt; - standard_resources = alloc_bootmem_low(num_standard_resources * - sizeof(*standard_resources)); + standard_resources = memblock_alloc_low(num_standard_resources * + sizeof(*standard_resources), 0); for_each_memblock(memory, region) { res = &standard_resources[i++]; [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 488 bytes --]
[-- Attachment #1: Type: text/plain, Size: 1532 bytes --] Hi all, Today's linux-next merge of the akpm tree got a conflict in: arch/arm64/kernel/setup.c between commit: d91680e687f4 ("arm64: Fix /proc/iomem for reserved but not memory regions") from Linus' tree and patch: "memblock: stop using implicit alignment to SMP_CACHE_BYTES" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc arch/arm64/kernel/setup.c index 811e3267412a,9084fec753f5..000000000000 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@@ -215,12 -211,8 +215,13 @@@ static void __init request_standard_res kernel_data.start = __pa_symbol(_sdata); kernel_data.end = __pa_symbol(_end - 1); + num_standard_resources = memblock.memory.cnt; + standard_resources = memblock_alloc_low(num_standard_resources * - sizeof(*standard_resources), 0); ++ sizeof(*standard_resources), ++ SMP_CACHE_BYTES); + for_each_memblock(memory, region) { - res = memblock_alloc_low(sizeof(*res), SMP_CACHE_BYTES); + res = &standard_resources[i++]; if (memblock_is_nomap(region)) { res->name = "reserved"; res->flags = IORESOURCE_MEM; [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 488 bytes --]
[-- Attachment #1: Type: text/plain, Size: 1855 bytes --] Hi all, Today's linux-next merge of the akpm tree got a conflict in: include/linux/pagewalk.h between commit: ecaad8aca204 ("mm: Add a walk_page_mapping() function to the pagewalk code") from Linus' tree and patch: "mm: pagewalk: add p4d_entry() and pgd_entry()" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc include/linux/pagewalk.h index 6ec82e92c87f,12004b097eae..000000000000 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@@ -24,11 -24,15 +24,18 @@@ struct mm_walk * "do page table walk over the current vma", returning * a negative value means "abort current page table walk * right now" and returning 1 means "skip the current vma" + * @pre_vma: if set, called before starting walk on a non-null vma. + * @post_vma: if set, called after a walk on a non-null vma, provided + * that @pre_vma and the vma walk succeeded. + * + * p?d_entry callbacks are called even if those levels are folded on a + * particular architecture/configuration. */ struct mm_walk_ops { + int (*pgd_entry)(pgd_t *pgd, unsigned long addr, + unsigned long next, struct mm_walk *walk); + int (*p4d_entry)(p4d_t *p4d, unsigned long addr, + unsigned long next, struct mm_walk *walk); int (*pud_entry)(pud_t *pud, unsigned long addr, unsigned long next, struct mm_walk *walk); int (*pmd_entry)(pmd_t *pmd, unsigned long addr, [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 488 bytes --]
[-- Attachment #1: Type: text/plain, Size: 2389 bytes --] Hi all, Today's linux-next merge of the akpm tree got a conflict in: include/linux/pagewalk.h between commit: 5b932abfc562 ("mm: pagewalk: add test_p?d callbacks") from Linus' tree and patch: "mm: pagewalk: add test_p?d callbacks" from the akpm tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc include/linux/pagewalk.h index 2c9725bdcf1f,fe61448c5900..000000000000 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@@ -24,9 -24,11 +24,14 @@@ struct mm_walk * "do page table walk over the current vma", returning * a negative value means "abort current page table walk * right now" and returning 1 means "skip the current vma" + * @test_pmd: similar to test_walk(), but called for every pmd. + * @test_pud: similar to test_walk(), but called for every pud. + * @test_p4d: similar to test_walk(), but called for every p4d. + * Returning 0 means walk this part of the page tables, + * returning 1 means to skip this range. + * @pre_vma: if set, called before starting walk on a non-null vma. + * @post_vma: if set, called after a walk on a non-null vma, provided + * that @pre_vma and the vma walk succeeded. * * p?d_entry callbacks are called even if those levels are folded on a * particular architecture/configuration. @@@ -49,9 -51,12 +54,15 @@@ struct mm_walk_ops struct mm_walk *walk); int (*test_walk)(unsigned long addr, unsigned long next, struct mm_walk *walk); + int (*test_pmd)(unsigned long addr, unsigned long next, + pmd_t *pmd_start, struct mm_walk *walk); + int (*test_pud)(unsigned long addr, unsigned long next, + pud_t *pud_start, struct mm_walk *walk); + int (*test_p4d)(unsigned long addr, unsigned long next, + p4d_t *p4d_start, struct mm_walk *walk); + int (*pre_vma)(unsigned long start, unsigned long end, + struct mm_walk *walk); + void (*post_vma)(struct mm_walk *walk); }; /** [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 488 bytes --]