From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932321AbdGKG6k (ORCPT ); Tue, 11 Jul 2017 02:58:40 -0400 Received: from mx2.suse.de ([195.135.220.15]:60461 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S932114AbdGKG6j (ORCPT ); Tue, 11 Jul 2017 02:58:39 -0400 Date: Tue, 11 Jul 2017 08:58:34 +0200 From: Michal Hocko To: David Rientjes Cc: linux-mm@kvack.org, Tetsuo Handa , Oleg Nesterov , Andrea Argangeli , Andrew Morton , LKML Subject: Re: [RFC PATCH] mm, oom: allow oom reaper to race with exit_mmap Message-ID: <20170711065834.GF24852@dhcp22.suse.cz> References: <20170626130346.26314-1-mhocko@kernel.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: User-Agent: Mutt/1.5.23 (2014-03-12) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Mon 10-07-17 16:55:22, David Rientjes wrote: > On Mon, 26 Jun 2017, Michal Hocko wrote: > > > diff --git a/mm/mmap.c b/mm/mmap.c > > index 3bd5ecd20d4d..253808e716dc 100644 > > --- a/mm/mmap.c > > +++ b/mm/mmap.c > > @@ -2962,6 +2962,11 @@ void exit_mmap(struct mm_struct *mm) > > /* Use -1 here to ensure all VMAs in the mm are unmapped */ > > unmap_vmas(&tlb, vma, 0, -1); > > > > + /* > > + * oom reaper might race with exit_mmap so make sure we won't free > > + * page tables or unmap VMAs under its feet > > + */ > > + down_write(&mm->mmap_sem); > > free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); > > tlb_finish_mmu(&tlb, 0, -1); > > > > @@ -2974,7 +2979,9 @@ void exit_mmap(struct mm_struct *mm) > > nr_accounted += vma_pages(vma); > > vma = remove_vma(vma); > > } > > + mm->mmap = NULL; > > vm_unacct_memory(nr_accounted); > > + up_write(&mm->mmap_sem); > > } > > > > /* Insert vm structure into process list sorted by address > > diff --git a/mm/oom_kill.c b/mm/oom_kill.c > > index 0e2c925e7826..5dc0ff22d567 100644 > > --- a/mm/oom_kill.c > > +++ b/mm/oom_kill.c > > @@ -472,36 +472,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) > > struct vm_area_struct *vma; > > bool ret = true; > > > > - /* > > - * We have to make sure to not race with the victim exit path > > - * and cause premature new oom victim selection: > > - * __oom_reap_task_mm exit_mm > > - * mmget_not_zero > > - * mmput > > - * atomic_dec_and_test > > - * exit_oom_victim > > - * [...] > > - * out_of_memory > > - * select_bad_process > > - * # no TIF_MEMDIE task selects new victim > > - * unmap_page_range # frees some memory > > - */ > > - mutex_lock(&oom_lock); > > - > > - if (!down_read_trylock(&mm->mmap_sem)) { > > - ret = false; > > - goto unlock_oom; > > - } > > - > > - /* > > - * increase mm_users only after we know we will reap something so > > - * that the mmput_async is called only when we have reaped something > > - * and delayed __mmput doesn't matter that much > > - */ > > - if (!mmget_not_zero(mm)) { > > - up_read(&mm->mmap_sem); > > - goto unlock_oom; > > - } > > + if (!down_read_trylock(&mm->mmap_sem)) > > + return false; > > I think this should return true if mm->mmap == NULL here. This? --- diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 5dc0ff22d567..e155d1d8064f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -470,11 +470,14 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) { struct mmu_gather tlb; struct vm_area_struct *vma; - bool ret = true; if (!down_read_trylock(&mm->mmap_sem)) return false; + /* There is nothing to reap so bail out without signs in the log */ + if (!mm->mmap) + goto unlock; + /* * Tell all users of get_user/copy_from_user etc... that the content * is no longer stable. No barriers really needed because unmapping @@ -508,9 +511,10 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) K(get_mm_counter(mm, MM_ANONPAGES)), K(get_mm_counter(mm, MM_FILEPAGES)), K(get_mm_counter(mm, MM_SHMEMPAGES))); +unlock: up_read(&mm->mmap_sem); - return ret; + return true; } #define MAX_OOM_REAP_RETRIES 10 -- Michal Hocko SUSE Labs From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wr0-f200.google.com (mail-wr0-f200.google.com [209.85.128.200]) by kanga.kvack.org (Postfix) with ESMTP id 405C16B04C9 for ; Tue, 11 Jul 2017 02:58:39 -0400 (EDT) Received: by mail-wr0-f200.google.com with SMTP id 4so29450233wrc.15 for ; Mon, 10 Jul 2017 23:58:39 -0700 (PDT) Received: from mx1.suse.de (mx2.suse.de. [195.135.220.15]) by mx.google.com with ESMTPS id j126si8841886wmg.7.2017.07.10.23.58.37 for (version=TLS1 cipher=AES128-SHA bits=128/128); Mon, 10 Jul 2017 23:58:38 -0700 (PDT) Date: Tue, 11 Jul 2017 08:58:34 +0200 From: Michal Hocko Subject: Re: [RFC PATCH] mm, oom: allow oom reaper to race with exit_mmap Message-ID: <20170711065834.GF24852@dhcp22.suse.cz> References: <20170626130346.26314-1-mhocko@kernel.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: Sender: owner-linux-mm@kvack.org List-ID: To: David Rientjes Cc: linux-mm@kvack.org, Tetsuo Handa , Oleg Nesterov , Andrea Argangeli , Andrew Morton , LKML On Mon 10-07-17 16:55:22, David Rientjes wrote: > On Mon, 26 Jun 2017, Michal Hocko wrote: > > > diff --git a/mm/mmap.c b/mm/mmap.c > > index 3bd5ecd20d4d..253808e716dc 100644 > > --- a/mm/mmap.c > > +++ b/mm/mmap.c > > @@ -2962,6 +2962,11 @@ void exit_mmap(struct mm_struct *mm) > > /* Use -1 here to ensure all VMAs in the mm are unmapped */ > > unmap_vmas(&tlb, vma, 0, -1); > > > > + /* > > + * oom reaper might race with exit_mmap so make sure we won't free > > + * page tables or unmap VMAs under its feet > > + */ > > + down_write(&mm->mmap_sem); > > free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); > > tlb_finish_mmu(&tlb, 0, -1); > > > > @@ -2974,7 +2979,9 @@ void exit_mmap(struct mm_struct *mm) > > nr_accounted += vma_pages(vma); > > vma = remove_vma(vma); > > } > > + mm->mmap = NULL; > > vm_unacct_memory(nr_accounted); > > + up_write(&mm->mmap_sem); > > } > > > > /* Insert vm structure into process list sorted by address > > diff --git a/mm/oom_kill.c b/mm/oom_kill.c > > index 0e2c925e7826..5dc0ff22d567 100644 > > --- a/mm/oom_kill.c > > +++ b/mm/oom_kill.c > > @@ -472,36 +472,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) > > struct vm_area_struct *vma; > > bool ret = true; > > > > - /* > > - * We have to make sure to not race with the victim exit path > > - * and cause premature new oom victim selection: > > - * __oom_reap_task_mm exit_mm > > - * mmget_not_zero > > - * mmput > > - * atomic_dec_and_test > > - * exit_oom_victim > > - * [...] > > - * out_of_memory > > - * select_bad_process > > - * # no TIF_MEMDIE task selects new victim > > - * unmap_page_range # frees some memory > > - */ > > - mutex_lock(&oom_lock); > > - > > - if (!down_read_trylock(&mm->mmap_sem)) { > > - ret = false; > > - goto unlock_oom; > > - } > > - > > - /* > > - * increase mm_users only after we know we will reap something so > > - * that the mmput_async is called only when we have reaped something > > - * and delayed __mmput doesn't matter that much > > - */ > > - if (!mmget_not_zero(mm)) { > > - up_read(&mm->mmap_sem); > > - goto unlock_oom; > > - } > > + if (!down_read_trylock(&mm->mmap_sem)) > > + return false; > > I think this should return true if mm->mmap == NULL here. This? --- diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 5dc0ff22d567..e155d1d8064f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -470,11 +470,14 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) { struct mmu_gather tlb; struct vm_area_struct *vma; - bool ret = true; if (!down_read_trylock(&mm->mmap_sem)) return false; + /* There is nothing to reap so bail out without signs in the log */ + if (!mm->mmap) + goto unlock; + /* * Tell all users of get_user/copy_from_user etc... that the content * is no longer stable. No barriers really needed because unmapping @@ -508,9 +511,10 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) K(get_mm_counter(mm, MM_ANONPAGES)), K(get_mm_counter(mm, MM_FILEPAGES)), K(get_mm_counter(mm, MM_SHMEMPAGES))); +unlock: up_read(&mm->mmap_sem); - return ret; + return true; } #define MAX_OOM_REAP_RETRIES 10 -- Michal Hocko SUSE Labs -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org