From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751286AbcGMUbn (ORCPT ); Wed, 13 Jul 2016 16:31:43 -0400 Received: from mail-io0-f181.google.com ([209.85.223.181]:33031 "EHLO mail-io0-f181.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750904AbcGMUbf (ORCPT ); Wed, 13 Jul 2016 16:31:35 -0400 MIME-Version: 1.0 In-Reply-To: <20160713183347.GK4065@mtj.duckdns.org> References: <20160713182102.GJ4065@mtj.duckdns.org> <20160713183347.GK4065@mtj.duckdns.org> From: Dmitry Shmidt Date: Wed, 13 Jul 2016 13:31:27 -0700 Message-ID: Subject: Re: Severe performance regression w/ 4.4+ on Android due to cgroup locking changes To: Tejun Heo Cc: John Stultz , Ingo Molnar , Peter Zijlstra , lkml , Rom Lemarchand , Colin Cross , Todd Kjos , Oleg Nesterov Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Wed, Jul 13, 2016 at 11:33 AM, Tejun Heo wrote: > On Wed, Jul 13, 2016 at 02:21:02PM -0400, Tejun Heo wrote: >> One interesting thing to try would be replacing it with a regular >> non-percpu rwsem and see how it behaves. That should easily tell us >> whether this is from actual contention or artifacts from percpu_rwsem >> implementation. > > So, something like the following. Can you please see whether this > makes any difference? It is making difference. We need to conduct more tests, but it looks much better. I see only one 18.5 ms delay. Most of time it is <200 us delay. > Thanks. > > diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h > index 5b17de6..bc1e4d8 100644 > --- a/include/linux/cgroup-defs.h > +++ b/include/linux/cgroup-defs.h > @@ -14,7 +14,7 @@ > #include > #include > #include > -#include > +#include > #include > > #ifdef CONFIG_CGROUPS > @@ -518,7 +518,7 @@ struct cgroup_subsys { > unsigned int depends_on; > }; > > -extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; > +extern struct rw_semaphore cgroup_threadgroup_rwsem; > > /** > * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups > @@ -529,7 +529,7 @@ extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; > */ > static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) > { > - percpu_down_read(&cgroup_threadgroup_rwsem); > + down_read(&cgroup_threadgroup_rwsem); > } > > /** > @@ -541,7 +541,7 @@ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) > */ > static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) > { > - percpu_up_read(&cgroup_threadgroup_rwsem); > + up_read(&cgroup_threadgroup_rwsem); > } > > #else /* CONFIG_CGROUPS */ > diff --git a/kernel/cgroup.c b/kernel/cgroup.c > index 86cb5c6..ed9c142 100644 > --- a/kernel/cgroup.c > +++ b/kernel/cgroup.c > @@ -113,7 +113,7 @@ static DEFINE_SPINLOCK(cgroup_file_kn_lock); > */ > static DEFINE_SPINLOCK(release_agent_path_lock); > > -struct percpu_rw_semaphore cgroup_threadgroup_rwsem; > +DECLARE_RWSEM(cgroup_threadgroup_rwsem); > > #define cgroup_assert_mutex_or_rcu_locked() \ > RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ > @@ -2899,7 +2899,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, > if (!cgrp) > return -ENODEV; > > - percpu_down_write(&cgroup_threadgroup_rwsem); > + down_write(&cgroup_threadgroup_rwsem); > rcu_read_lock(); > if (pid) { > tsk = find_task_by_vpid(pid); > @@ -2937,7 +2937,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, > out_unlock_rcu: > rcu_read_unlock(); > out_unlock_threadgroup: > - percpu_up_write(&cgroup_threadgroup_rwsem); > + up_write(&cgroup_threadgroup_rwsem); > for_each_subsys(ss, ssid) > if (ss->post_attach) > ss->post_attach(); > @@ -3077,7 +3077,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) > > lockdep_assert_held(&cgroup_mutex); > > - percpu_down_write(&cgroup_threadgroup_rwsem); > + down_write(&cgroup_threadgroup_rwsem); > > /* look up all csses currently attached to @cgrp's subtree */ > spin_lock_bh(&css_set_lock); > @@ -3112,7 +3112,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) > ret = cgroup_taskset_migrate(&tset, cgrp->root); > out_finish: > cgroup_migrate_finish(&preloaded_csets); > - percpu_up_write(&cgroup_threadgroup_rwsem); > + up_write(&cgroup_threadgroup_rwsem); > return ret; > } > > @@ -5601,7 +5601,7 @@ int __init cgroup_init(void) > int ssid; > > BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16); > - BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem)); > + //BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem)); > BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); > BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); >