All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Handle PCI unbind
@ 2020-10-20 10:08 Tvrtko Ursulin
  2020-10-20 10:08 ` [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs Tvrtko Ursulin
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Tvrtko Ursulin @ 2020-10-20 10:08 UTC (permalink / raw)
  To: Intel-gfx; +Cc: Chris Wilson

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Mark the device as closed and keep references to driver data alive to
allow for safe driver unbind with active PMU clients. Perf core does not
otherwise handle this case so we have to do it manually like this.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_pmu.c | 39 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_pmu.h |  4 ++++
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 69c0fa20eba1..51ed7d0efcdc 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -445,6 +445,8 @@ static void i915_pmu_event_destroy(struct perf_event *event)
 		container_of(event->pmu, typeof(*i915), pmu.base);
 
 	drm_WARN_ON(&i915->drm, event->parent);
+
+	drm_dev_put(&i915->drm);
 }
 
 static int
@@ -510,8 +512,12 @@ static int i915_pmu_event_init(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = &i915->pmu;
 	int ret;
 
+	if (pmu->closed)
+		return -ENODEV;
+
 	if (event->attr.type != event->pmu->type)
 		return -ENOENT;
 
@@ -536,8 +542,10 @@ static int i915_pmu_event_init(struct perf_event *event)
 	if (ret)
 		return ret;
 
-	if (!event->parent)
+	if (!event->parent) {
+		drm_dev_get(&i915->drm);
 		event->destroy = i915_pmu_event_destroy;
+	}
 
 	return 0;
 }
@@ -594,9 +602,16 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 
 static void i915_pmu_event_read(struct perf_event *event)
 {
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
 	struct hw_perf_event *hwc = &event->hw;
+	struct i915_pmu *pmu = &i915->pmu;
 	u64 prev, new;
 
+	if (pmu->closed) {
+		event->hw.state = PERF_HES_STOPPED;
+		return;
+	}
 again:
 	prev = local64_read(&hwc->prev_count);
 	new = __i915_pmu_event_read(event);
@@ -724,6 +739,13 @@ static void i915_pmu_disable(struct perf_event *event)
 
 static void i915_pmu_event_start(struct perf_event *event, int flags)
 {
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = &i915->pmu;
+
+	if (pmu->closed)
+		return;
+
 	i915_pmu_enable(event);
 	event->hw.state = 0;
 }
@@ -738,6 +760,13 @@ static void i915_pmu_event_stop(struct perf_event *event, int flags)
 
 static int i915_pmu_event_add(struct perf_event *event, int flags)
 {
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), pmu.base);
+	struct i915_pmu *pmu = &i915->pmu;
+
+	if (pmu->closed)
+		return -ENODEV;
+
 	if (flags & PERF_EF_START)
 		i915_pmu_event_start(event, flags);
 
@@ -1167,7 +1196,13 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 	if (!pmu->base.event_init)
 		return;
 
-	drm_WARN_ON(&i915->drm, pmu->enable);
+	/*
+	 * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
+	 * ensures all currently executing ones will have exited before we
+	 * proceed with unregistration.
+	 */
+	pmu->closed = true;
+	synchronize_rcu();
 
 	hrtimer_cancel(&pmu->timer);
 
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 941f0c14037c..59a0d19afb67 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -49,6 +49,10 @@ struct i915_pmu {
 	 * @base: PMU base.
 	 */
 	struct pmu base;
+	/**
+	 * @closed: i915 is unregistering.
+	 */
+	bool closed;
 	/**
 	 * @name: Name as registered with perf core.
 	 */
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs
  2020-10-20 10:08 [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Handle PCI unbind Tvrtko Ursulin
@ 2020-10-20 10:08 ` Tvrtko Ursulin
  2020-10-20 11:59   ` Chris Wilson
  2020-10-20 16:11   ` [Intel-gfx] [PATCH v2 " Tvrtko Ursulin
  2020-10-20 12:25 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/pmu: Handle PCI unbind Patchwork
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 14+ messages in thread
From: Tvrtko Ursulin @ 2020-10-20 10:08 UTC (permalink / raw)
  To: Intel-gfx; +Cc: Daniel Vetter, Chris Wilson

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Since we keep a driver global mask of online CPUs and base the decision
whether PMU needs to be migrated upon it, we need to make sure the
migration is done for all registered PMUs (so GPUs).

To do this we need to track the current CPU for each PMU and base the
decision on whether to migrate on a comparison between global and local
state.

At the same time, since dynamic CPU hotplug notification slots are a
scarce resource and given how we already register the multi instance type
state, we can and should add multiple instance of the i915 PMU to this
same state and not allocate a new one for every GPU.

v2:
 * Use pr_notice. (Chris)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Daniel Vetter <daniel.vetter@intel.com> # dynamic slot optimisation
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_pci.c |  7 ++++-
 drivers/gpu/drm/i915/i915_pmu.c | 50 ++++++++++++++++++++-------------
 drivers/gpu/drm/i915/i915_pmu.h |  6 +++-
 3 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 27964ac0638a..a384f51c91c1 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1150,9 +1150,13 @@ static int __init i915_init(void)
 		return 0;
 	}
 
+	i915_pmu_init();
+
 	err = pci_register_driver(&i915_pci_driver);
-	if (err)
+	if (err) {
+		i915_pmu_exit();
 		return err;
+	}
 
 	i915_perf_sysctl_register();
 	return 0;
@@ -1166,6 +1170,7 @@ static void __exit i915_exit(void)
 	i915_perf_sysctl_unregister();
 	pci_unregister_driver(&i915_pci_driver);
 	i915_globals_exit();
+	i915_pmu_exit();
 }
 
 module_init(i915_init);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 51ed7d0efcdc..0d6c0945621e 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -30,6 +30,7 @@
 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
 
 static cpumask_t i915_pmu_cpumask;
+static unsigned int i915_pmu_target_cpu = -1;
 
 static u8 engine_config_sample(u64 config)
 {
@@ -1049,25 +1050,32 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
 {
 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
-	unsigned int target;
+	unsigned int target = i915_pmu_target_cpu;
 
 	GEM_BUG_ON(!pmu->base.event_init);
 
 	if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
 		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+
 		/* Migrate events if there is a valid target */
 		if (target < nr_cpu_ids) {
 			cpumask_set_cpu(target, &i915_pmu_cpumask);
-			perf_pmu_migrate_context(&pmu->base, cpu, target);
+			i915_pmu_target_cpu = target;
 		}
 	}
 
+	if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
+		perf_pmu_migrate_context(&pmu->base, cpu, target);
+		pmu->cpuhp.cpu = target;
+	}
+
 	return 0;
 }
 
-static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
+static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
+
+void i915_pmu_init(void)
 {
-	enum cpuhp_state slot;
 	int ret;
 
 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
@@ -1075,27 +1083,29 @@ static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
 				      i915_pmu_cpu_online,
 				      i915_pmu_cpu_offline);
 	if (ret < 0)
-		return ret;
+		pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
+			  ret);
+	else
+		cpuhp_slot = ret;
+}
 
-	slot = ret;
-	ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node);
-	if (ret) {
-		cpuhp_remove_multi_state(slot);
-		return ret;
-	}
+void i915_pmu_exit(void)
+{
+	if (cpuhp_slot != CPUHP_INVALID)
+		cpuhp_remove_multi_state(cpuhp_slot);
+}
 
-	pmu->cpuhp.slot = slot;
-	return 0;
+static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
+{
+	if (cpuhp_slot == CPUHP_INVALID)
+		return -EINVAL;
+
+	return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
 }
 
 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
 {
-	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
-
-	drm_WARN_ON(&i915->drm, pmu->cpuhp.slot == CPUHP_INVALID);
-	drm_WARN_ON(&i915->drm, cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node));
-	cpuhp_remove_multi_state(pmu->cpuhp.slot);
-	pmu->cpuhp.slot = CPUHP_INVALID;
+	cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
 }
 
 static bool is_igp(struct drm_i915_private *i915)
@@ -1129,7 +1139,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
 	spin_lock_init(&pmu->lock);
 	hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	pmu->timer.function = i915_sample;
-	pmu->cpuhp.slot = CPUHP_INVALID;
+	pmu->cpuhp.cpu = -1;
 
 	if (!is_igp(i915)) {
 		pmu->name = kasprintf(GFP_KERNEL,
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 59a0d19afb67..a24885ab415c 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -43,7 +43,7 @@ struct i915_pmu {
 	 */
 	struct {
 		struct hlist_node node;
-		enum cpuhp_state slot;
+		unsigned int cpu;
 	} cpuhp;
 	/**
 	 * @base: PMU base.
@@ -126,11 +126,15 @@ struct i915_pmu {
 };
 
 #ifdef CONFIG_PERF_EVENTS
+void i915_pmu_init(void);
+void i915_pmu_exit(void);
 void i915_pmu_register(struct drm_i915_private *i915);
 void i915_pmu_unregister(struct drm_i915_private *i915);
 void i915_pmu_gt_parked(struct drm_i915_private *i915);
 void i915_pmu_gt_unparked(struct drm_i915_private *i915);
 #else
+static inline void i915_pmu_init(void) {}
+static inline void i915_pmu_exit(void) {}
 static inline void i915_pmu_register(struct drm_i915_private *i915) {}
 static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
 static inline void i915_pmu_gt_parked(struct drm_i915_private *i915) {}
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs
  2020-10-20 10:08 ` [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs Tvrtko Ursulin
@ 2020-10-20 11:59   ` Chris Wilson
  2020-10-20 12:10     ` Chris Wilson
  2020-10-20 16:11   ` [Intel-gfx] [PATCH v2 " Tvrtko Ursulin
  1 sibling, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-10-20 11:59 UTC (permalink / raw)
  To: Intel-gfx, Tvrtko Ursulin; +Cc: Daniel Vetter

Quoting Tvrtko Ursulin (2020-10-20 11:08:22)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Since we keep a driver global mask of online CPUs and base the decision
> whether PMU needs to be migrated upon it, we need to make sure the
> migration is done for all registered PMUs (so GPUs).
> 
> To do this we need to track the current CPU for each PMU and base the
> decision on whether to migrate on a comparison between global and local
> state.
> 
> At the same time, since dynamic CPU hotplug notification slots are a
> scarce resource and given how we already register the multi instance type
> state, we can and should add multiple instance of the i915 PMU to this
> same state and not allocate a new one for every GPU.
> 
> v2:
>  * Use pr_notice. (Chris)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Suggested-by: Daniel Vetter <daniel.vetter@intel.com> # dynamic slot optimisation
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_pci.c |  7 ++++-
>  drivers/gpu/drm/i915/i915_pmu.c | 50 ++++++++++++++++++++-------------
>  drivers/gpu/drm/i915/i915_pmu.h |  6 +++-
>  3 files changed, 41 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 27964ac0638a..a384f51c91c1 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -1150,9 +1150,13 @@ static int __init i915_init(void)
>                 return 0;
>         }
>  
> +       i915_pmu_init();
> +
>         err = pci_register_driver(&i915_pci_driver);
> -       if (err)
> +       if (err) {
> +               i915_pmu_exit();
>                 return err;
> +       }
>  
>         i915_perf_sysctl_register();
>         return 0;
> @@ -1166,6 +1170,7 @@ static void __exit i915_exit(void)
>         i915_perf_sysctl_unregister();
>         pci_unregister_driver(&i915_pci_driver);
>         i915_globals_exit();
> +       i915_pmu_exit();
>  }
>  
>  module_init(i915_init);
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 51ed7d0efcdc..0d6c0945621e 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -30,6 +30,7 @@
>  #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
>  
>  static cpumask_t i915_pmu_cpumask;
> +static unsigned int i915_pmu_target_cpu = -1;
>  
>  static u8 engine_config_sample(u64 config)
>  {
> @@ -1049,25 +1050,32 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
>  static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
>  {
>         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
> -       unsigned int target;
> +       unsigned int target = i915_pmu_target_cpu;

So we still have multiple callbacks, one per pmu. But each callback is
now stored in a list from the cpuhp_slot instead of each callback having
its own slot.

>  
>         GEM_BUG_ON(!pmu->base.event_init);
>  
>         if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {

On first callback...

>                 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);

Pick any other cpu.

> +
>                 /* Migrate events if there is a valid target */
>                 if (target < nr_cpu_ids) {
>                         cpumask_set_cpu(target, &i915_pmu_cpumask);
> -                       perf_pmu_migrate_context(&pmu->base, cpu, target);
> +                       i915_pmu_target_cpu = target;

Store target for all callbacks.

>                 }
>         }
>  
> +       if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {

If global [i915_pmu_target_cpu] target has changed, update perf.

> +               perf_pmu_migrate_context(&pmu->base, cpu, target);
> +               pmu->cpuhp.cpu = target;

It is claimed that cpuhp_state_remove_instance() will call the offline
callback for all online cpus... Do we need a pmu->base.state != STOPPED
guard?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs
  2020-10-20 11:59   ` Chris Wilson
@ 2020-10-20 12:10     ` Chris Wilson
  2020-10-20 12:33       ` Tvrtko Ursulin
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-10-20 12:10 UTC (permalink / raw)
  To: Intel-gfx, Tvrtko Ursulin; +Cc: Daniel Vetter

Quoting Chris Wilson (2020-10-20 12:59:57)
> Quoting Tvrtko Ursulin (2020-10-20 11:08:22)
> > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > 
> > Since we keep a driver global mask of online CPUs and base the decision
> > whether PMU needs to be migrated upon it, we need to make sure the
> > migration is done for all registered PMUs (so GPUs).
> > 
> > To do this we need to track the current CPU for each PMU and base the
> > decision on whether to migrate on a comparison between global and local
> > state.
> > 
> > At the same time, since dynamic CPU hotplug notification slots are a
> > scarce resource and given how we already register the multi instance type
> > state, we can and should add multiple instance of the i915 PMU to this
> > same state and not allocate a new one for every GPU.
> > 
> > v2:
> >  * Use pr_notice. (Chris)
> > 
> > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Suggested-by: Daniel Vetter <daniel.vetter@intel.com> # dynamic slot optimisation
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_pci.c |  7 ++++-
> >  drivers/gpu/drm/i915/i915_pmu.c | 50 ++++++++++++++++++++-------------
> >  drivers/gpu/drm/i915/i915_pmu.h |  6 +++-
> >  3 files changed, 41 insertions(+), 22 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> > index 27964ac0638a..a384f51c91c1 100644
> > --- a/drivers/gpu/drm/i915/i915_pci.c
> > +++ b/drivers/gpu/drm/i915/i915_pci.c
> > @@ -1150,9 +1150,13 @@ static int __init i915_init(void)
> >                 return 0;
> >         }
> >  
> > +       i915_pmu_init();
> > +
> >         err = pci_register_driver(&i915_pci_driver);
> > -       if (err)
> > +       if (err) {
> > +               i915_pmu_exit();
> >                 return err;
> > +       }
> >  
> >         i915_perf_sysctl_register();
> >         return 0;
> > @@ -1166,6 +1170,7 @@ static void __exit i915_exit(void)
> >         i915_perf_sysctl_unregister();
> >         pci_unregister_driver(&i915_pci_driver);
> >         i915_globals_exit();
> > +       i915_pmu_exit();
> >  }
> >  
> >  module_init(i915_init);
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index 51ed7d0efcdc..0d6c0945621e 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -30,6 +30,7 @@
> >  #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
> >  
> >  static cpumask_t i915_pmu_cpumask;
> > +static unsigned int i915_pmu_target_cpu = -1;
> >  
> >  static u8 engine_config_sample(u64 config)
> >  {
> > @@ -1049,25 +1050,32 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
> >  static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
> >  {
> >         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
> > -       unsigned int target;
> > +       unsigned int target = i915_pmu_target_cpu;
> 
> So we still have multiple callbacks, one per pmu. But each callback is
> now stored in a list from the cpuhp_slot instead of each callback having
> its own slot.
> 
> >  
> >         GEM_BUG_ON(!pmu->base.event_init);
> >  
> >         if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
> 
> On first callback...
> 
> >                 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
> 
> Pick any other cpu.
> 
> > +
> >                 /* Migrate events if there is a valid target */
> >                 if (target < nr_cpu_ids) {
> >                         cpumask_set_cpu(target, &i915_pmu_cpumask);
> > -                       perf_pmu_migrate_context(&pmu->base, cpu, target);
> > +                       i915_pmu_target_cpu = target;
> 
> Store target for all callbacks.
> 
> >                 }
> >         }
> >  
> > +       if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
> 
> If global [i915_pmu_target_cpu] target has changed, update perf.
> 
> > +               perf_pmu_migrate_context(&pmu->base, cpu, target);
> > +               pmu->cpuhp.cpu = target;
> 
> It is claimed that cpuhp_state_remove_instance() will call the offline
> callback for all online cpus... Do we need a pmu->base.state != STOPPED
> guard?

s/claimed/it definitely does :)/

Or rather pmu->closed.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/pmu: Handle PCI unbind
  2020-10-20 10:08 [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Handle PCI unbind Tvrtko Ursulin
  2020-10-20 10:08 ` [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs Tvrtko Ursulin
@ 2020-10-20 12:25 ` Patchwork
  2020-10-20 15:19 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-10-20 12:25 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 4660 bytes --]

== Series Details ==

Series: series starting with [1/2] drm/i915/pmu: Handle PCI unbind
URL   : https://patchwork.freedesktop.org/series/82864/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_9169 -> Patchwork_18740
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/index.html

Known issues
------------

  Here are the changes found in Patchwork_18740 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_pm_rpm@basic-pci-d3-state:
    - fi-byt-j1900:       [PASS][1] -> [DMESG-WARN][2] ([i915#1982])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-byt-j1900/igt@i915_pm_rpm@basic-pci-d3-state.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/fi-byt-j1900/igt@i915_pm_rpm@basic-pci-d3-state.html

  * igt@kms_chamelium@hdmi-crc-fast:
    - fi-kbl-7500u:       [PASS][3] -> [DMESG-WARN][4] ([i915#2203])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-kbl-7500u/igt@kms_chamelium@hdmi-crc-fast.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/fi-kbl-7500u/igt@kms_chamelium@hdmi-crc-fast.html

  
#### Possible fixes ####

  * igt@i915_module_load@reload:
    - fi-icl-y:           [DMESG-WARN][5] ([i915#1982]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-icl-y/igt@i915_module_load@reload.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/fi-icl-y/igt@i915_module_load@reload.html

  * igt@i915_pm_rpm@basic-pci-d3-state:
    - fi-bsw-n3050:       [DMESG-WARN][7] ([i915#1982]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-bsw-n3050/igt@i915_pm_rpm@basic-pci-d3-state.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/fi-bsw-n3050/igt@i915_pm_rpm@basic-pci-d3-state.html

  * igt@i915_selftest@live@gt_heartbeat:
    - fi-kbl-soraka:      [DMESG-FAIL][9] ([i915#541]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-kbl-soraka/igt@i915_selftest@live@gt_heartbeat.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/fi-kbl-soraka/igt@i915_selftest@live@gt_heartbeat.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
    - {fi-kbl-7560u}:     [DMESG-WARN][11] ([i915#1982]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-kbl-7560u/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/fi-kbl-7560u/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
    - fi-bsw-kefka:       [DMESG-WARN][13] ([i915#1982]) -> [PASS][14] +1 similar issue
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-bsw-kefka/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/fi-bsw-kefka/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html

  
#### Warnings ####

  * igt@i915_pm_rpm@basic-rte:
    - fi-kbl-guc:         [DMESG-FAIL][15] ([i915#2203]) -> [SKIP][16] ([fdo#109271])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-kbl-guc/igt@i915_pm_rpm@basic-rte.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/fi-kbl-guc/igt@i915_pm_rpm@basic-rte.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2203]: https://gitlab.freedesktop.org/drm/intel/issues/2203
  [i915#541]: https://gitlab.freedesktop.org/drm/intel/issues/541


Participating hosts (45 -> 39)
------------------------------

  Missing    (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_9169 -> Patchwork_18740

  CI-20190529: 20190529
  CI_DRM_9169: 8a581847c1bea831f8edfbb813225df47fe28a3a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5822: b4bcf05cb9839037128905deda7146434155cc41 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_18740: 4ff04849da48332f9715dcca1d196436b2320799 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

4ff04849da48 drm/i915/pmu: Fix CPU hotplug with multiple GPUs
7c0f98609c66 drm/i915/pmu: Handle PCI unbind

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/index.html

[-- Attachment #1.2: Type: text/html, Size: 5800 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs
  2020-10-20 12:10     ` Chris Wilson
@ 2020-10-20 12:33       ` Tvrtko Ursulin
  2020-10-20 12:40         ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Tvrtko Ursulin @ 2020-10-20 12:33 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx; +Cc: Daniel Vetter


On 20/10/2020 13:10, Chris Wilson wrote:
> Quoting Chris Wilson (2020-10-20 12:59:57)
>> Quoting Tvrtko Ursulin (2020-10-20 11:08:22)
>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>
>>> Since we keep a driver global mask of online CPUs and base the decision
>>> whether PMU needs to be migrated upon it, we need to make sure the
>>> migration is done for all registered PMUs (so GPUs).
>>>
>>> To do this we need to track the current CPU for each PMU and base the
>>> decision on whether to migrate on a comparison between global and local
>>> state.
>>>
>>> At the same time, since dynamic CPU hotplug notification slots are a
>>> scarce resource and given how we already register the multi instance type
>>> state, we can and should add multiple instance of the i915 PMU to this
>>> same state and not allocate a new one for every GPU.
>>>
>>> v2:
>>>   * Use pr_notice. (Chris)
>>>
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Suggested-by: Daniel Vetter <daniel.vetter@intel.com> # dynamic slot optimisation
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>   drivers/gpu/drm/i915/i915_pci.c |  7 ++++-
>>>   drivers/gpu/drm/i915/i915_pmu.c | 50 ++++++++++++++++++++-------------
>>>   drivers/gpu/drm/i915/i915_pmu.h |  6 +++-
>>>   3 files changed, 41 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>>> index 27964ac0638a..a384f51c91c1 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -1150,9 +1150,13 @@ static int __init i915_init(void)
>>>                  return 0;
>>>          }
>>>   
>>> +       i915_pmu_init();
>>> +
>>>          err = pci_register_driver(&i915_pci_driver);
>>> -       if (err)
>>> +       if (err) {
>>> +               i915_pmu_exit();
>>>                  return err;
>>> +       }
>>>   
>>>          i915_perf_sysctl_register();
>>>          return 0;
>>> @@ -1166,6 +1170,7 @@ static void __exit i915_exit(void)
>>>          i915_perf_sysctl_unregister();
>>>          pci_unregister_driver(&i915_pci_driver);
>>>          i915_globals_exit();
>>> +       i915_pmu_exit();
>>>   }
>>>   
>>>   module_init(i915_init);
>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>>> index 51ed7d0efcdc..0d6c0945621e 100644
>>> --- a/drivers/gpu/drm/i915/i915_pmu.c
>>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
>>> @@ -30,6 +30,7 @@
>>>   #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
>>>   
>>>   static cpumask_t i915_pmu_cpumask;
>>> +static unsigned int i915_pmu_target_cpu = -1;
>>>   
>>>   static u8 engine_config_sample(u64 config)
>>>   {
>>> @@ -1049,25 +1050,32 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
>>>   static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
>>>   {
>>>          struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
>>> -       unsigned int target;
>>> +       unsigned int target = i915_pmu_target_cpu;
>>
>> So we still have multiple callbacks, one per pmu. But each callback is
>> now stored in a list from the cpuhp_slot instead of each callback having
>> its own slot.
>>
>>>   
>>>          GEM_BUG_ON(!pmu->base.event_init);
>>>   
>>>          if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
>>
>> On first callback...
>>
>>>                  target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
>>
>> Pick any other cpu.
>>
>>> +
>>>                  /* Migrate events if there is a valid target */
>>>                  if (target < nr_cpu_ids) {
>>>                          cpumask_set_cpu(target, &i915_pmu_cpumask);
>>> -                       perf_pmu_migrate_context(&pmu->base, cpu, target);
>>> +                       i915_pmu_target_cpu = target;
>>
>> Store target for all callbacks.
>>
>>>                  }
>>>          }
>>>   
>>> +       if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
>>
>> If global [i915_pmu_target_cpu] target has changed, update perf.
>>
>>> +               perf_pmu_migrate_context(&pmu->base, cpu, target);
>>> +               pmu->cpuhp.cpu = target;
>>
>> It is claimed that cpuhp_state_remove_instance() will call the offline
>> callback for all online cpus... Do we need a pmu->base.state != STOPPED
>> guard?
> 
> s/claimed/it definitely does :)/
> 
> Or rather pmu->closed.

Hm why? You think perf_pmu_migrate_context accesses something in the PMU 
outside of the already protected entry points?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs
  2020-10-20 12:33       ` Tvrtko Ursulin
@ 2020-10-20 12:40         ` Chris Wilson
  2020-10-20 13:05           ` Tvrtko Ursulin
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-10-20 12:40 UTC (permalink / raw)
  To: Intel-gfx, Tvrtko Ursulin; +Cc: Daniel Vetter

Quoting Tvrtko Ursulin (2020-10-20 13:33:12)
> 
> On 20/10/2020 13:10, Chris Wilson wrote:
> > Quoting Chris Wilson (2020-10-20 12:59:57)
> >> Quoting Tvrtko Ursulin (2020-10-20 11:08:22)
> >>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>
> >>> Since we keep a driver global mask of online CPUs and base the decision
> >>> whether PMU needs to be migrated upon it, we need to make sure the
> >>> migration is done for all registered PMUs (so GPUs).
> >>>
> >>> To do this we need to track the current CPU for each PMU and base the
> >>> decision on whether to migrate on a comparison between global and local
> >>> state.
> >>>
> >>> At the same time, since dynamic CPU hotplug notification slots are a
> >>> scarce resource and given how we already register the multi instance type
> >>> state, we can and should add multiple instance of the i915 PMU to this
> >>> same state and not allocate a new one for every GPU.
> >>>
> >>> v2:
> >>>   * Use pr_notice. (Chris)
> >>>
> >>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> Suggested-by: Daniel Vetter <daniel.vetter@intel.com> # dynamic slot optimisation
> >>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >>> ---
> >>>   drivers/gpu/drm/i915/i915_pci.c |  7 ++++-
> >>>   drivers/gpu/drm/i915/i915_pmu.c | 50 ++++++++++++++++++++-------------
> >>>   drivers/gpu/drm/i915/i915_pmu.h |  6 +++-
> >>>   3 files changed, 41 insertions(+), 22 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> >>> index 27964ac0638a..a384f51c91c1 100644
> >>> --- a/drivers/gpu/drm/i915/i915_pci.c
> >>> +++ b/drivers/gpu/drm/i915/i915_pci.c
> >>> @@ -1150,9 +1150,13 @@ static int __init i915_init(void)
> >>>                  return 0;
> >>>          }
> >>>   
> >>> +       i915_pmu_init();
> >>> +
> >>>          err = pci_register_driver(&i915_pci_driver);
> >>> -       if (err)
> >>> +       if (err) {
> >>> +               i915_pmu_exit();
> >>>                  return err;
> >>> +       }
> >>>   
> >>>          i915_perf_sysctl_register();
> >>>          return 0;
> >>> @@ -1166,6 +1170,7 @@ static void __exit i915_exit(void)
> >>>          i915_perf_sysctl_unregister();
> >>>          pci_unregister_driver(&i915_pci_driver);
> >>>          i915_globals_exit();
> >>> +       i915_pmu_exit();
> >>>   }
> >>>   
> >>>   module_init(i915_init);
> >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> >>> index 51ed7d0efcdc..0d6c0945621e 100644
> >>> --- a/drivers/gpu/drm/i915/i915_pmu.c
> >>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> >>> @@ -30,6 +30,7 @@
> >>>   #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
> >>>   
> >>>   static cpumask_t i915_pmu_cpumask;
> >>> +static unsigned int i915_pmu_target_cpu = -1;
> >>>   
> >>>   static u8 engine_config_sample(u64 config)
> >>>   {
> >>> @@ -1049,25 +1050,32 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
> >>>   static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
> >>>   {
> >>>          struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
> >>> -       unsigned int target;
> >>> +       unsigned int target = i915_pmu_target_cpu;
> >>
> >> So we still have multiple callbacks, one per pmu. But each callback is
> >> now stored in a list from the cpuhp_slot instead of each callback having
> >> its own slot.
> >>
> >>>   
> >>>          GEM_BUG_ON(!pmu->base.event_init);
> >>>   
> >>>          if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
> >>
> >> On first callback...
> >>
> >>>                  target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
> >>
> >> Pick any other cpu.
> >>
> >>> +
> >>>                  /* Migrate events if there is a valid target */
> >>>                  if (target < nr_cpu_ids) {
> >>>                          cpumask_set_cpu(target, &i915_pmu_cpumask);
> >>> -                       perf_pmu_migrate_context(&pmu->base, cpu, target);
> >>> +                       i915_pmu_target_cpu = target;
> >>
> >> Store target for all callbacks.
> >>
> >>>                  }
> >>>          }
> >>>   
> >>> +       if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
> >>
> >> If global [i915_pmu_target_cpu] target has changed, update perf.
> >>
> >>> +               perf_pmu_migrate_context(&pmu->base, cpu, target);
> >>> +               pmu->cpuhp.cpu = target;
> >>
> >> It is claimed that cpuhp_state_remove_instance() will call the offline
> >> callback for all online cpus... Do we need a pmu->base.state != STOPPED
> >> guard?
> > 
> > s/claimed/it definitely does :)/
> > 
> > Or rather pmu->closed.
> 
> Hm why? You think perf_pmu_migrate_context accesses something in the PMU 
> outside of the already protected entry points?

If this callback is being called for every online when we unplug one
device, we then believe that no cpus remain online for all other devices.
Should a cpu then be offlined, target is -1u so greater than
nr_cpu_online and we move the perf context to the void, worst case, in
the best case we fail to migrate the perf context off the dying cpu.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs
  2020-10-20 12:40         ` Chris Wilson
@ 2020-10-20 13:05           ` Tvrtko Ursulin
  0 siblings, 0 replies; 14+ messages in thread
From: Tvrtko Ursulin @ 2020-10-20 13:05 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx; +Cc: Daniel Vetter


On 20/10/2020 13:40, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-10-20 13:33:12)
>>
>> On 20/10/2020 13:10, Chris Wilson wrote:
>>> Quoting Chris Wilson (2020-10-20 12:59:57)
>>>> Quoting Tvrtko Ursulin (2020-10-20 11:08:22)
>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>
>>>>> Since we keep a driver global mask of online CPUs and base the decision
>>>>> whether PMU needs to be migrated upon it, we need to make sure the
>>>>> migration is done for all registered PMUs (so GPUs).
>>>>>
>>>>> To do this we need to track the current CPU for each PMU and base the
>>>>> decision on whether to migrate on a comparison between global and local
>>>>> state.
>>>>>
>>>>> At the same time, since dynamic CPU hotplug notification slots are a
>>>>> scarce resource and given how we already register the multi instance type
>>>>> state, we can and should add multiple instance of the i915 PMU to this
>>>>> same state and not allocate a new one for every GPU.
>>>>>
>>>>> v2:
>>>>>    * Use pr_notice. (Chris)
>>>>>
>>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> Suggested-by: Daniel Vetter <daniel.vetter@intel.com> # dynamic slot optimisation
>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> ---
>>>>>    drivers/gpu/drm/i915/i915_pci.c |  7 ++++-
>>>>>    drivers/gpu/drm/i915/i915_pmu.c | 50 ++++++++++++++++++++-------------
>>>>>    drivers/gpu/drm/i915/i915_pmu.h |  6 +++-
>>>>>    3 files changed, 41 insertions(+), 22 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>>>>> index 27964ac0638a..a384f51c91c1 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>>> @@ -1150,9 +1150,13 @@ static int __init i915_init(void)
>>>>>                   return 0;
>>>>>           }
>>>>>    
>>>>> +       i915_pmu_init();
>>>>> +
>>>>>           err = pci_register_driver(&i915_pci_driver);
>>>>> -       if (err)
>>>>> +       if (err) {
>>>>> +               i915_pmu_exit();
>>>>>                   return err;
>>>>> +       }
>>>>>    
>>>>>           i915_perf_sysctl_register();
>>>>>           return 0;
>>>>> @@ -1166,6 +1170,7 @@ static void __exit i915_exit(void)
>>>>>           i915_perf_sysctl_unregister();
>>>>>           pci_unregister_driver(&i915_pci_driver);
>>>>>           i915_globals_exit();
>>>>> +       i915_pmu_exit();
>>>>>    }
>>>>>    
>>>>>    module_init(i915_init);
>>>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>>>>> index 51ed7d0efcdc..0d6c0945621e 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_pmu.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
>>>>> @@ -30,6 +30,7 @@
>>>>>    #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
>>>>>    
>>>>>    static cpumask_t i915_pmu_cpumask;
>>>>> +static unsigned int i915_pmu_target_cpu = -1;
>>>>>    
>>>>>    static u8 engine_config_sample(u64 config)
>>>>>    {
>>>>> @@ -1049,25 +1050,32 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
>>>>>    static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
>>>>>    {
>>>>>           struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
>>>>> -       unsigned int target;
>>>>> +       unsigned int target = i915_pmu_target_cpu;
>>>>
>>>> So we still have multiple callbacks, one per pmu. But each callback is
>>>> now stored in a list from the cpuhp_slot instead of each callback having
>>>> its own slot.
>>>>
>>>>>    
>>>>>           GEM_BUG_ON(!pmu->base.event_init);
>>>>>    
>>>>>           if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
>>>>
>>>> On first callback...
>>>>
>>>>>                   target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
>>>>
>>>> Pick any other cpu.
>>>>
>>>>> +
>>>>>                   /* Migrate events if there is a valid target */
>>>>>                   if (target < nr_cpu_ids) {
>>>>>                           cpumask_set_cpu(target, &i915_pmu_cpumask);
>>>>> -                       perf_pmu_migrate_context(&pmu->base, cpu, target);
>>>>> +                       i915_pmu_target_cpu = target;
>>>>
>>>> Store target for all callbacks.
>>>>
>>>>>                   }
>>>>>           }
>>>>>    
>>>>> +       if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
>>>>
>>>> If global [i915_pmu_target_cpu] target has changed, update perf.
>>>>
>>>>> +               perf_pmu_migrate_context(&pmu->base, cpu, target);
>>>>> +               pmu->cpuhp.cpu = target;
>>>>
>>>> It is claimed that cpuhp_state_remove_instance() will call the offline
>>>> callback for all online cpus... Do we need a pmu->base.state != STOPPED
>>>> guard?
>>>
>>> s/claimed/it definitely does :)/
>>>
>>> Or rather pmu->closed.
>>
>> Hm why? You think perf_pmu_migrate_context accesses something in the PMU
>> outside of the already protected entry points?
> 
> If this callback is being called for every online when we unplug one
> device, we then believe that no cpus remain online for all other devices.
> Should a cpu then be offlined, target is -1u so greater than
> nr_cpu_online and we move the perf context to the void, worst case, in
> the best case we fail to migrate the perf context off the dying cpu.

Well spotted nasty interaction, thanks.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [1/2] drm/i915/pmu: Handle PCI unbind
  2020-10-20 10:08 [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Handle PCI unbind Tvrtko Ursulin
  2020-10-20 10:08 ` [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs Tvrtko Ursulin
  2020-10-20 12:25 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/pmu: Handle PCI unbind Patchwork
@ 2020-10-20 15:19 ` Patchwork
  2020-10-20 17:28 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/pmu: Handle PCI unbind (rev2) Patchwork
  2020-10-20 19:47 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  4 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-10-20 15:19 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 17714 bytes --]

== Series Details ==

Series: series starting with [1/2] drm/i915/pmu: Handle PCI unbind
URL   : https://patchwork.freedesktop.org/series/82864/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_9169_full -> Patchwork_18740_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_18740_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_18740_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_18740_full:

### IGT changes ###

#### Possible regressions ####

  * igt@prime_vgem@coherency-blt:
    - shard-hsw:          [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-hsw1/igt@prime_vgem@coherency-blt.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-hsw6/igt@prime_vgem@coherency-blt.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * {igt@core_hotunplug@hotrebind}:
    - shard-hsw:          NOTRUN -> [WARN][3]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-hsw6/igt@core_hotunplug@hotrebind.html

  
Known issues
------------

  Here are the changes found in Patchwork_18740_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_whisper@basic-forked-all:
    - shard-glk:          [PASS][4] -> [DMESG-WARN][5] ([i915#118] / [i915#95])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-glk3/igt@gem_exec_whisper@basic-forked-all.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-glk8/igt@gem_exec_whisper@basic-forked-all.html

  * igt@gem_userptr_blits@unsync-unmap-cycles:
    - shard-skl:          [PASS][6] -> [TIMEOUT][7] ([i915#2424])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl3/igt@gem_userptr_blits@unsync-unmap-cycles.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl7/igt@gem_userptr_blits@unsync-unmap-cycles.html

  * igt@i915_selftest@live@execlists:
    - shard-skl:          [PASS][8] -> [INCOMPLETE][9] ([CI#80])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl2/igt@i915_selftest@live@execlists.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl3/igt@i915_selftest@live@execlists.html

  * igt@i915_suspend@debugfs-reader:
    - shard-kbl:          [PASS][10] -> [INCOMPLETE][11] ([i915#155])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-kbl7/igt@i915_suspend@debugfs-reader.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-kbl6/igt@i915_suspend@debugfs-reader.html

  * igt@kms_big_fb@y-tiled-8bpp-rotate-180:
    - shard-apl:          [PASS][12] -> [DMESG-WARN][13] ([i915#1635] / [i915#1982]) +1 similar issue
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-apl2/igt@kms_big_fb@y-tiled-8bpp-rotate-180.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-apl2/igt@kms_big_fb@y-tiled-8bpp-rotate-180.html

  * igt@kms_cursor_legacy@2x-long-cursor-vs-flip-legacy:
    - shard-hsw:          [PASS][14] -> [FAIL][15] ([i915#96])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-hsw6/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-legacy.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-hsw1/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-legacy.html

  * igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled:
    - shard-skl:          [PASS][16] -> [DMESG-WARN][17] ([i915#1982]) +8 similar issues
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl1/igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl1/igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled.html

  * igt@kms_draw_crc@draw-method-xrgb2101010-blt-untiled:
    - shard-snb:          [PASS][18] -> [FAIL][19] ([i915#54])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-snb2/igt@kms_draw_crc@draw-method-xrgb2101010-blt-untiled.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-snb2/igt@kms_draw_crc@draw-method-xrgb2101010-blt-untiled.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@c-edp1:
    - shard-skl:          [PASS][20] -> [FAIL][21] ([i915#79])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl7/igt@kms_flip@flip-vs-expired-vblank-interruptible@c-edp1.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl10/igt@kms_flip@flip-vs-expired-vblank-interruptible@c-edp1.html

  * igt@kms_flip@plain-flip-fb-recreate-interruptible@a-edp1:
    - shard-skl:          [PASS][22] -> [FAIL][23] ([i915#2122])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl8/igt@kms_flip@plain-flip-fb-recreate-interruptible@a-edp1.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl2/igt@kms_flip@plain-flip-fb-recreate-interruptible@a-edp1.html

  * igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-render:
    - shard-tglb:         [PASS][24] -> [DMESG-WARN][25] ([i915#1982]) +1 similar issue
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-tglb1/igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-render.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-tglb8/igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-render.html

  * igt@kms_hdr@bpc-switch-dpms:
    - shard-skl:          [PASS][26] -> [FAIL][27] ([i915#1188])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl6/igt@kms_hdr@bpc-switch-dpms.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl2/igt@kms_hdr@bpc-switch-dpms.html

  * igt@kms_psr@psr2_cursor_render:
    - shard-iclb:         [PASS][28] -> [SKIP][29] ([fdo#109441]) +2 similar issues
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-iclb2/igt@kms_psr@psr2_cursor_render.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-iclb5/igt@kms_psr@psr2_cursor_render.html

  * igt@kms_universal_plane@universal-plane-gen9-features-pipe-a:
    - shard-kbl:          [PASS][30] -> [DMESG-WARN][31] ([i915#1982])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-kbl4/igt@kms_universal_plane@universal-plane-gen9-features-pipe-a.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-kbl7/igt@kms_universal_plane@universal-plane-gen9-features-pipe-a.html

  * igt@kms_vblank@pipe-a-accuracy-idle:
    - shard-glk:          [PASS][32] -> [FAIL][33] ([i915#43])
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-glk9/igt@kms_vblank@pipe-a-accuracy-idle.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-glk4/igt@kms_vblank@pipe-a-accuracy-idle.html

  
#### Possible fixes ####

  * {igt@core_hotunplug@hotrebind}:
    - shard-iclb:         [DMESG-WARN][34] ([i915#1982]) -> [PASS][35]
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-iclb8/igt@core_hotunplug@hotrebind.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-iclb5/igt@core_hotunplug@hotrebind.html

  * igt@gem_exec_reloc@basic-many-active@vecs0:
    - shard-glk:          [FAIL][36] ([i915#2389]) -> [PASS][37] +1 similar issue
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-glk1/igt@gem_exec_reloc@basic-many-active@vecs0.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-glk7/igt@gem_exec_reloc@basic-many-active@vecs0.html

  * igt@gem_exec_whisper@basic-queues-priority-all:
    - shard-glk:          [FAIL][38] -> [PASS][39]
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-glk8/igt@gem_exec_whisper@basic-queues-priority-all.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-glk6/igt@gem_exec_whisper@basic-queues-priority-all.html

  * igt@kms_cursor_crc@pipe-c-cursor-128x128-onscreen:
    - shard-skl:          [FAIL][40] ([i915#54]) -> [PASS][41]
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl8/igt@kms_cursor_crc@pipe-c-cursor-128x128-onscreen.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl8/igt@kms_cursor_crc@pipe-c-cursor-128x128-onscreen.html

  * igt@kms_cursor_edge_walk@pipe-b-128x128-top-edge:
    - shard-glk:          [DMESG-WARN][42] ([i915#1982]) -> [PASS][43]
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-glk3/igt@kms_cursor_edge_walk@pipe-b-128x128-top-edge.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-glk8/igt@kms_cursor_edge_walk@pipe-b-128x128-top-edge.html

  * igt@kms_cursor_legacy@cursor-vs-flip-atomic-transitions-varying-size:
    - shard-skl:          [DMESG-WARN][44] ([i915#1982]) -> [PASS][45] +3 similar issues
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl3/igt@kms_cursor_legacy@cursor-vs-flip-atomic-transitions-varying-size.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl6/igt@kms_cursor_legacy@cursor-vs-flip-atomic-transitions-varying-size.html

  * igt@kms_cursor_legacy@cursor-vs-flip-varying-size:
    - shard-hsw:          [FAIL][46] ([i915#2370]) -> [PASS][47]
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-hsw6/igt@kms_cursor_legacy@cursor-vs-flip-varying-size.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-hsw2/igt@kms_cursor_legacy@cursor-vs-flip-varying-size.html

  * igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled:
    - shard-apl:          [DMESG-WARN][48] ([i915#1635] / [i915#1982]) -> [PASS][49] +1 similar issue
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-apl3/igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-apl8/igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled.html

  * igt@kms_draw_crc@draw-method-xrgb2101010-blt-untiled:
    - shard-skl:          [TIMEOUT][50] -> [PASS][51] +3 similar issues
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl10/igt@kms_draw_crc@draw-method-xrgb2101010-blt-untiled.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl5/igt@kms_draw_crc@draw-method-xrgb2101010-blt-untiled.html

  * igt@kms_flip@dpms-off-confusion-interruptible@a-dp1:
    - shard-kbl:          [DMESG-WARN][52] ([i915#1982]) -> [PASS][53]
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-kbl2/igt@kms_flip@dpms-off-confusion-interruptible@a-dp1.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-kbl7/igt@kms_flip@dpms-off-confusion-interruptible@a-dp1.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@c-hdmi-a2:
    - shard-glk:          [FAIL][54] ([i915#79]) -> [PASS][55]
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-glk9/igt@kms_flip@flip-vs-expired-vblank-interruptible@c-hdmi-a2.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-glk2/igt@kms_flip@flip-vs-expired-vblank-interruptible@c-hdmi-a2.html

  * igt@kms_flip@flip-vs-expired-vblank@c-edp1:
    - shard-skl:          [FAIL][56] ([i915#79]) -> [PASS][57] +1 similar issue
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl5/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl5/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html

  * igt@kms_hdr@bpc-switch:
    - shard-skl:          [FAIL][58] ([i915#1188]) -> [PASS][59]
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl4/igt@kms_hdr@bpc-switch.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl8/igt@kms_hdr@bpc-switch.html

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          [FAIL][60] ([fdo#108145] / [i915#265]) -> [PASS][61]
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl8/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl8/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html

  * igt@kms_psr@psr2_sprite_plane_move:
    - shard-iclb:         [SKIP][62] ([fdo#109441]) -> [PASS][63] +1 similar issue
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-iclb4/igt@kms_psr@psr2_sprite_plane_move.html
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html

  * igt@kms_setmode@basic:
    - shard-hsw:          [FAIL][64] ([i915#31]) -> [PASS][65]
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-hsw4/igt@kms_setmode@basic.html
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-hsw1/igt@kms_setmode@basic.html

  * igt@perf@enable-disable:
    - shard-skl:          [FAIL][66] ([i915#1352]) -> [PASS][67]
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl5/igt@perf@enable-disable.html
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl7/igt@perf@enable-disable.html

  * igt@perf_pmu@module-unload:
    - shard-tglb:         [DMESG-WARN][68] ([i915#1982]) -> [PASS][69] +2 similar issues
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-tglb3/igt@perf_pmu@module-unload.html
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-tglb7/igt@perf_pmu@module-unload.html

  
#### Warnings ####

  * igt@kms_atomic_transition@1x-modeset-transitions-nonblocking:
    - shard-skl:          [TIMEOUT][70] -> [DMESG-WARN][71] ([i915#1982])
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl10/igt@kms_atomic_transition@1x-modeset-transitions-nonblocking.html
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl5/igt@kms_atomic_transition@1x-modeset-transitions-nonblocking.html

  * igt@kms_frontbuffer_tracking@fbcpsr-suspend:
    - shard-tglb:         [DMESG-WARN][72] ([i915#1982] / [i915#2411]) -> [DMESG-WARN][73] ([i915#2411])
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-tglb7/igt@kms_frontbuffer_tracking@fbcpsr-suspend.html
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-tglb6/igt@kms_frontbuffer_tracking@fbcpsr-suspend.html

  * igt@runner@aborted:
    - shard-skl:          [FAIL][74] ([i915#1814] / [i915#2029]) -> [FAIL][75] ([i915#1436] / [i915#2439])
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl3/igt@runner@aborted.html
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/shard-skl3/igt@runner@aborted.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [CI#80]: https://gitlab.freedesktop.org/gfx-ci/i915-infra/issues/80
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1352]: https://gitlab.freedesktop.org/drm/intel/issues/1352
  [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436
  [i915#155]: https://gitlab.freedesktop.org/drm/intel/issues/155
  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#1814]: https://gitlab.freedesktop.org/drm/intel/issues/1814
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2029]: https://gitlab.freedesktop.org/drm/intel/issues/2029
  [i915#2122]: https://gitlab.freedesktop.org/drm/intel/issues/2122
  [i915#2370]: https://gitlab.freedesktop.org/drm/intel/issues/2370
  [i915#2389]: https://gitlab.freedesktop.org/drm/intel/issues/2389
  [i915#2411]: https://gitlab.freedesktop.org/drm/intel/issues/2411
  [i915#2424]: https://gitlab.freedesktop.org/drm/intel/issues/2424
  [i915#2439]: https://gitlab.freedesktop.org/drm/intel/issues/2439
  [i915#2521]: https://gitlab.freedesktop.org/drm/intel/issues/2521
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#43]: https://gitlab.freedesktop.org/drm/intel/issues/43
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95
  [i915#96]: https://gitlab.freedesktop.org/drm/intel/issues/96


Participating hosts (11 -> 12)
------------------------------

  Additional (1): pig-snb-2600 


Build changes
-------------

  * Linux: CI_DRM_9169 -> Patchwork_18740

  CI-20190529: 20190529
  CI_DRM_9169: 8a581847c1bea831f8edfbb813225df47fe28a3a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5822: b4bcf05cb9839037128905deda7146434155cc41 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_18740: 4ff04849da48332f9715dcca1d196436b2320799 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18740/index.html

[-- Attachment #1.2: Type: text/html, Size: 20431 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH v2 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs
  2020-10-20 10:08 ` [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs Tvrtko Ursulin
  2020-10-20 11:59   ` Chris Wilson
@ 2020-10-20 16:11   ` Tvrtko Ursulin
  2020-10-20 16:19     ` Chris Wilson
  1 sibling, 1 reply; 14+ messages in thread
From: Tvrtko Ursulin @ 2020-10-20 16:11 UTC (permalink / raw)
  To: Intel-gfx; +Cc: Daniel Vetter, Chris Wilson

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Since we keep a driver global mask of online CPUs and base the decision
whether PMU needs to be migrated upon it, we need to make sure the
migration is done for all registered PMUs (so GPUs).

To do this we need to track the current CPU for each PMU and base the
decision on whether to migrate on a comparison between global and local
state.

At the same time, since dynamic CPU hotplug notification slots are a
scarce resource and given how we already register the multi instance type
state, we can and should add multiple instance of the i915 PMU to this
same state and not allocate a new one for every GPU.

v2:
 * Use pr_notice. (Chris)

v3:
 * Handle a nasty interaction where unregistration which triggers a false
   CPU offline event. (Chris)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Daniel Vetter <daniel.vetter@intel.com> # dynamic slot optimisation
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_pci.c |  7 +++-
 drivers/gpu/drm/i915/i915_pmu.c | 57 +++++++++++++++++++++------------
 drivers/gpu/drm/i915/i915_pmu.h |  6 +++-
 3 files changed, 48 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 27964ac0638a..a384f51c91c1 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1150,9 +1150,13 @@ static int __init i915_init(void)
 		return 0;
 	}
 
+	i915_pmu_init();
+
 	err = pci_register_driver(&i915_pci_driver);
-	if (err)
+	if (err) {
+		i915_pmu_exit();
 		return err;
+	}
 
 	i915_perf_sysctl_register();
 	return 0;
@@ -1166,6 +1170,7 @@ static void __exit i915_exit(void)
 	i915_perf_sysctl_unregister();
 	pci_unregister_driver(&i915_pci_driver);
 	i915_globals_exit();
+	i915_pmu_exit();
 }
 
 module_init(i915_init);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 51ed7d0efcdc..cd786ad12be7 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -30,6 +30,7 @@
 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
 
 static cpumask_t i915_pmu_cpumask;
+static unsigned int i915_pmu_target_cpu = -1;
 
 static u8 engine_config_sample(u64 config)
 {
@@ -1049,25 +1050,39 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
 {
 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
-	unsigned int target;
+	unsigned int target = i915_pmu_target_cpu;
 
 	GEM_BUG_ON(!pmu->base.event_init);
 
+	/*
+	 * Unregistering an instance generates a CPU offline event which we must
+	 * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
+	 */
+	if (pmu->closed)
+		return 0;
+
 	if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
 		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+
 		/* Migrate events if there is a valid target */
 		if (target < nr_cpu_ids) {
 			cpumask_set_cpu(target, &i915_pmu_cpumask);
-			perf_pmu_migrate_context(&pmu->base, cpu, target);
+			i915_pmu_target_cpu = target;
 		}
 	}
 
+	if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
+		perf_pmu_migrate_context(&pmu->base, cpu, target);
+		pmu->cpuhp.cpu = target;
+	}
+
 	return 0;
 }
 
-static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
+static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
+
+void i915_pmu_init(void)
 {
-	enum cpuhp_state slot;
 	int ret;
 
 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
@@ -1075,27 +1090,29 @@ static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
 				      i915_pmu_cpu_online,
 				      i915_pmu_cpu_offline);
 	if (ret < 0)
-		return ret;
+		pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
+			  ret);
+	else
+		cpuhp_slot = ret;
+}
 
-	slot = ret;
-	ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node);
-	if (ret) {
-		cpuhp_remove_multi_state(slot);
-		return ret;
-	}
+void i915_pmu_exit(void)
+{
+	if (cpuhp_slot != CPUHP_INVALID)
+		cpuhp_remove_multi_state(cpuhp_slot);
+}
 
-	pmu->cpuhp.slot = slot;
-	return 0;
+static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
+{
+	if (cpuhp_slot == CPUHP_INVALID)
+		return -EINVAL;
+
+	return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
 }
 
 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
 {
-	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
-
-	drm_WARN_ON(&i915->drm, pmu->cpuhp.slot == CPUHP_INVALID);
-	drm_WARN_ON(&i915->drm, cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node));
-	cpuhp_remove_multi_state(pmu->cpuhp.slot);
-	pmu->cpuhp.slot = CPUHP_INVALID;
+	cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
 }
 
 static bool is_igp(struct drm_i915_private *i915)
@@ -1129,7 +1146,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
 	spin_lock_init(&pmu->lock);
 	hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	pmu->timer.function = i915_sample;
-	pmu->cpuhp.slot = CPUHP_INVALID;
+	pmu->cpuhp.cpu = -1;
 
 	if (!is_igp(i915)) {
 		pmu->name = kasprintf(GFP_KERNEL,
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 59a0d19afb67..a24885ab415c 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -43,7 +43,7 @@ struct i915_pmu {
 	 */
 	struct {
 		struct hlist_node node;
-		enum cpuhp_state slot;
+		unsigned int cpu;
 	} cpuhp;
 	/**
 	 * @base: PMU base.
@@ -126,11 +126,15 @@ struct i915_pmu {
 };
 
 #ifdef CONFIG_PERF_EVENTS
+void i915_pmu_init(void);
+void i915_pmu_exit(void);
 void i915_pmu_register(struct drm_i915_private *i915);
 void i915_pmu_unregister(struct drm_i915_private *i915);
 void i915_pmu_gt_parked(struct drm_i915_private *i915);
 void i915_pmu_gt_unparked(struct drm_i915_private *i915);
 #else
+static inline void i915_pmu_init(void) {}
+static inline void i915_pmu_exit(void) {}
 static inline void i915_pmu_register(struct drm_i915_private *i915) {}
 static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
 static inline void i915_pmu_gt_parked(struct drm_i915_private *i915) {}
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH v2 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs
  2020-10-20 16:11   ` [Intel-gfx] [PATCH v2 " Tvrtko Ursulin
@ 2020-10-20 16:19     ` Chris Wilson
  2020-10-22  9:42       ` Tvrtko Ursulin
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-10-20 16:19 UTC (permalink / raw)
  To: Intel-gfx, Tvrtko Ursulin; +Cc: Daniel Vetter

Quoting Tvrtko Ursulin (2020-10-20 17:11:44)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Since we keep a driver global mask of online CPUs and base the decision
> whether PMU needs to be migrated upon it, we need to make sure the
> migration is done for all registered PMUs (so GPUs).
> 
> To do this we need to track the current CPU for each PMU and base the
> decision on whether to migrate on a comparison between global and local
> state.
> 
> At the same time, since dynamic CPU hotplug notification slots are a
> scarce resource and given how we already register the multi instance type
> state, we can and should add multiple instance of the i915 PMU to this
> same state and not allocate a new one for every GPU.
> 
> v2:
>  * Use pr_notice. (Chris)
> 
> v3:
>  * Handle a nasty interaction where unregistration which triggers a false
>    CPU offline event. (Chris)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Suggested-by: Daniel Vetter <daniel.vetter@intel.com> # dynamic slot optimisation
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_pci.c |  7 +++-
>  drivers/gpu/drm/i915/i915_pmu.c | 57 +++++++++++++++++++++------------
>  drivers/gpu/drm/i915/i915_pmu.h |  6 +++-
>  3 files changed, 48 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 27964ac0638a..a384f51c91c1 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -1150,9 +1150,13 @@ static int __init i915_init(void)
>                 return 0;
>         }
>  
> +       i915_pmu_init();
> +
>         err = pci_register_driver(&i915_pci_driver);
> -       if (err)
> +       if (err) {
> +               i915_pmu_exit();
>                 return err;

We could do an onion bhaji and call i915_globals_exit() as well.

> +       }
>  
>         i915_perf_sysctl_register();
>         return 0;
> @@ -1166,6 +1170,7 @@ static void __exit i915_exit(void)
>         i915_perf_sysctl_unregister();
>         pci_unregister_driver(&i915_pci_driver);
>         i915_globals_exit();
> +       i915_pmu_exit();

pmu_exit then globals_exit to pair with i915_init?

>  }
>  
>  module_init(i915_init);
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 51ed7d0efcdc..cd786ad12be7 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -30,6 +30,7 @@
>  #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
>  
>  static cpumask_t i915_pmu_cpumask;
> +static unsigned int i915_pmu_target_cpu = -1;
>  
>  static u8 engine_config_sample(u64 config)
>  {
> @@ -1049,25 +1050,39 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
>  static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
>  {
>         struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
> -       unsigned int target;
> +       unsigned int target = i915_pmu_target_cpu;
>  
>         GEM_BUG_ON(!pmu->base.event_init);
>  
> +       /*
> +        * Unregistering an instance generates a CPU offline event which we must
> +        * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
> +        */
> +       if (pmu->closed)
> +               return 0;
> +
>         if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
>                 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
> +
>                 /* Migrate events if there is a valid target */
>                 if (target < nr_cpu_ids) {
>                         cpumask_set_cpu(target, &i915_pmu_cpumask);
> -                       perf_pmu_migrate_context(&pmu->base, cpu, target);
> +                       i915_pmu_target_cpu = target;
>                 }
>         }
>  
> +       if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
> +               perf_pmu_migrate_context(&pmu->base, cpu, target);
> +               pmu->cpuhp.cpu = target;
> +       }
> +
>         return 0;
>  }
>  
> -static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
> +static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
> +
> +void i915_pmu_init(void)
>  {
> -       enum cpuhp_state slot;
>         int ret;
>  
>         ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
> @@ -1075,27 +1090,29 @@ static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
>                                       i915_pmu_cpu_online,
>                                       i915_pmu_cpu_offline);
>         if (ret < 0)
> -               return ret;
> +               pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
> +                         ret);
> +       else
> +               cpuhp_slot = ret;
> +}
>  
> -       slot = ret;
> -       ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node);
> -       if (ret) {
> -               cpuhp_remove_multi_state(slot);
> -               return ret;
> -       }
> +void i915_pmu_exit(void)
> +{
> +       if (cpuhp_slot != CPUHP_INVALID)
> +               cpuhp_remove_multi_state(cpuhp_slot);
> +}
>  
> -       pmu->cpuhp.slot = slot;
> -       return 0;
> +static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
> +{
> +       if (cpuhp_slot == CPUHP_INVALID)
> +               return -EINVAL;
> +
> +       return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
>  }
>  
>  static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
>  {
> -       struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
> -
> -       drm_WARN_ON(&i915->drm, pmu->cpuhp.slot == CPUHP_INVALID);
> -       drm_WARN_ON(&i915->drm, cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node));
> -       cpuhp_remove_multi_state(pmu->cpuhp.slot);
> -       pmu->cpuhp.slot = CPUHP_INVALID;
> +       cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
>  }
>  
>  static bool is_igp(struct drm_i915_private *i915)
> @@ -1129,7 +1146,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
>         spin_lock_init(&pmu->lock);
>         hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>         pmu->timer.function = i915_sample;
> -       pmu->cpuhp.slot = CPUHP_INVALID;
> +       pmu->cpuhp.cpu = -1;

To the best of my limited understanding of cpuhp,
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/pmu: Handle PCI unbind (rev2)
  2020-10-20 10:08 [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Handle PCI unbind Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  2020-10-20 15:19 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
@ 2020-10-20 17:28 ` Patchwork
  2020-10-20 19:47 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  4 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-10-20 17:28 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 3773 bytes --]

== Series Details ==

Series: series starting with [1/2] drm/i915/pmu: Handle PCI unbind (rev2)
URL   : https://patchwork.freedesktop.org/series/82864/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_9169 -> Patchwork_18744
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/index.html

Known issues
------------

  Here are the changes found in Patchwork_18744 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live@gt_heartbeat:
    - fi-tgl-u2:          [PASS][1] -> [INCOMPLETE][2] ([i915#2557])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-tgl-u2/igt@i915_selftest@live@gt_heartbeat.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/fi-tgl-u2/igt@i915_selftest@live@gt_heartbeat.html

  
#### Possible fixes ####

  * igt@i915_module_load@reload:
    - fi-icl-y:           [DMESG-WARN][3] ([i915#1982]) -> [PASS][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-icl-y/igt@i915_module_load@reload.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/fi-icl-y/igt@i915_module_load@reload.html

  * igt@i915_pm_rpm@basic-pci-d3-state:
    - fi-bsw-n3050:       [DMESG-WARN][5] ([i915#1982]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-bsw-n3050/igt@i915_pm_rpm@basic-pci-d3-state.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/fi-bsw-n3050/igt@i915_pm_rpm@basic-pci-d3-state.html
    - fi-bsw-kefka:       [DMESG-WARN][7] ([i915#1982]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-bsw-kefka/igt@i915_pm_rpm@basic-pci-d3-state.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/fi-bsw-kefka/igt@i915_pm_rpm@basic-pci-d3-state.html

  * igt@i915_selftest@live@gt_heartbeat:
    - fi-kbl-soraka:      [DMESG-FAIL][9] ([i915#541]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-kbl-soraka/igt@i915_selftest@live@gt_heartbeat.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/fi-kbl-soraka/igt@i915_selftest@live@gt_heartbeat.html

  
#### Warnings ####

  * igt@i915_pm_rpm@basic-rte:
    - fi-kbl-guc:         [DMESG-FAIL][11] ([i915#2203]) -> [SKIP][12] ([fdo#109271])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/fi-kbl-guc/igt@i915_pm_rpm@basic-rte.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/fi-kbl-guc/igt@i915_pm_rpm@basic-rte.html

  
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2203]: https://gitlab.freedesktop.org/drm/intel/issues/2203
  [i915#2557]: https://gitlab.freedesktop.org/drm/intel/issues/2557
  [i915#541]: https://gitlab.freedesktop.org/drm/intel/issues/541


Participating hosts (45 -> 38)
------------------------------

  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-skl-guc fi-byt-squawks fi-bsw-cyan fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_9169 -> Patchwork_18744

  CI-20190529: 20190529
  CI_DRM_9169: 8a581847c1bea831f8edfbb813225df47fe28a3a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5822: b4bcf05cb9839037128905deda7146434155cc41 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_18744: 982c1f290dce4481188b3d1673704195360813b6 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

982c1f290dce drm/i915/pmu: Fix CPU hotplug with multiple GPUs
716dbf8b3c33 drm/i915/pmu: Handle PCI unbind

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/index.html

[-- Attachment #1.2: Type: text/html, Size: 4640 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [1/2] drm/i915/pmu: Handle PCI unbind (rev2)
  2020-10-20 10:08 [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Handle PCI unbind Tvrtko Ursulin
                   ` (3 preceding siblings ...)
  2020-10-20 17:28 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/pmu: Handle PCI unbind (rev2) Patchwork
@ 2020-10-20 19:47 ` Patchwork
  4 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-10-20 19:47 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 18368 bytes --]

== Series Details ==

Series: series starting with [1/2] drm/i915/pmu: Handle PCI unbind (rev2)
URL   : https://patchwork.freedesktop.org/series/82864/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_9169_full -> Patchwork_18744_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_18744_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_18744_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_18744_full:

### IGT changes ###

#### Possible regressions ####

  * igt@kms_flip@2x-plain-flip-ts-check@bc-vga1-hdmi-a1:
    - shard-hsw:          [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-hsw6/igt@kms_flip@2x-plain-flip-ts-check@bc-vga1-hdmi-a1.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-hsw2/igt@kms_flip@2x-plain-flip-ts-check@bc-vga1-hdmi-a1.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * {igt@core_hotunplug@hotrebind}:
    - shard-hsw:          NOTRUN -> [WARN][3]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-hsw6/igt@core_hotunplug@hotrebind.html

  
Known issues
------------

  Here are the changes found in Patchwork_18744_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_reloc@basic-many-active@vcs0:
    - shard-glk:          [PASS][4] -> [FAIL][5] ([i915#2389])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-glk1/igt@gem_exec_reloc@basic-many-active@vcs0.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-glk5/igt@gem_exec_reloc@basic-many-active@vcs0.html

  * igt@gem_userptr_blits@sync-unmap-cycles:
    - shard-skl:          [PASS][6] -> [TIMEOUT][7] ([i915#2424])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl2/igt@gem_userptr_blits@sync-unmap-cycles.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl1/igt@gem_userptr_blits@sync-unmap-cycles.html

  * igt@i915_pm_rc6_residency@rc6-fence:
    - shard-hsw:          [PASS][8] -> [WARN][9] ([i915#1519])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-hsw8/igt@i915_pm_rc6_residency@rc6-fence.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-hsw8/igt@i915_pm_rc6_residency@rc6-fence.html

  * igt@i915_suspend@debugfs-reader:
    - shard-kbl:          [PASS][10] -> [INCOMPLETE][11] ([i915#155])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-kbl7/igt@i915_suspend@debugfs-reader.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-kbl4/igt@i915_suspend@debugfs-reader.html

  * igt@kms_big_fb@linear-32bpp-rotate-180:
    - shard-glk:          [PASS][12] -> [DMESG-FAIL][13] ([i915#118] / [i915#95])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-glk9/igt@kms_big_fb@linear-32bpp-rotate-180.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-glk8/igt@kms_big_fb@linear-32bpp-rotate-180.html

  * igt@kms_big_fb@y-tiled-8bpp-rotate-180:
    - shard-kbl:          [PASS][14] -> [DMESG-WARN][15] ([i915#1982])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-kbl7/igt@kms_big_fb@y-tiled-8bpp-rotate-180.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-kbl4/igt@kms_big_fb@y-tiled-8bpp-rotate-180.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-skl:          [PASS][16] -> [INCOMPLETE][17] ([i915#300])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl1/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl8/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_cursor_legacy@flip-vs-cursor-varying-size:
    - shard-skl:          [PASS][18] -> [FAIL][19] ([i915#2346])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl8/igt@kms_cursor_legacy@flip-vs-cursor-varying-size.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl5/igt@kms_cursor_legacy@flip-vs-cursor-varying-size.html

  * igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled:
    - shard-skl:          [PASS][20] -> [DMESG-WARN][21] ([i915#1982]) +6 similar issues
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl1/igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl7/igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled.html

  * igt@kms_flip@plain-flip-ts-check-interruptible@a-edp1:
    - shard-skl:          [PASS][22] -> [FAIL][23] ([i915#2122])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl9/igt@kms_flip@plain-flip-ts-check-interruptible@a-edp1.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl3/igt@kms_flip@plain-flip-ts-check-interruptible@a-edp1.html

  * igt@kms_frontbuffer_tracking@fbc-modesetfrombusy:
    - shard-snb:          [PASS][24] -> [FAIL][25] ([i915#2546])
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-snb7/igt@kms_frontbuffer_tracking@fbc-modesetfrombusy.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-snb2/igt@kms_frontbuffer_tracking@fbc-modesetfrombusy.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-rte:
    - shard-tglb:         [PASS][26] -> [DMESG-WARN][27] ([i915#1982]) +2 similar issues
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-tglb2/igt@kms_frontbuffer_tracking@fbcpsr-1p-rte.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-tglb6/igt@kms_frontbuffer_tracking@fbcpsr-1p-rte.html

  * igt@kms_hdr@bpc-switch-dpms:
    - shard-skl:          [PASS][28] -> [FAIL][29] ([i915#1188])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl6/igt@kms_hdr@bpc-switch-dpms.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl7/igt@kms_hdr@bpc-switch-dpms.html

  * igt@kms_plane_alpha_blend@pipe-a-coverage-7efc:
    - shard-skl:          [PASS][30] -> [FAIL][31] ([fdo#108145] / [i915#265])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl8/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl5/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html

  * igt@kms_psr@psr2_cursor_render:
    - shard-iclb:         [PASS][32] -> [SKIP][33] ([fdo#109441]) +2 similar issues
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-iclb2/igt@kms_psr@psr2_cursor_render.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-iclb8/igt@kms_psr@psr2_cursor_render.html

  * igt@kms_vblank@pipe-c-ts-continuation-suspend:
    - shard-skl:          [PASS][34] -> [INCOMPLETE][35] ([i915#198])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl9/igt@kms_vblank@pipe-c-ts-continuation-suspend.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl2/igt@kms_vblank@pipe-c-ts-continuation-suspend.html

  * igt@prime_vgem@coherency-blt:
    - shard-snb:          [PASS][36] -> [INCOMPLETE][37] ([i915#82])
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-snb2/igt@prime_vgem@coherency-blt.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-snb2/igt@prime_vgem@coherency-blt.html

  
#### Possible fixes ####

  * {igt@core_hotunplug@hotrebind}:
    - shard-iclb:         [DMESG-WARN][38] ([i915#1982]) -> [PASS][39]
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-iclb8/igt@core_hotunplug@hotrebind.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-iclb4/igt@core_hotunplug@hotrebind.html

  * igt@gem_exec_reloc@basic-many-active@rcs0:
    - shard-hsw:          [FAIL][40] ([i915#2389]) -> [PASS][41]
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-hsw2/igt@gem_exec_reloc@basic-many-active@rcs0.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-hsw8/igt@gem_exec_reloc@basic-many-active@rcs0.html

  * igt@gem_exec_whisper@basic-queues-forked:
    - shard-glk:          [DMESG-WARN][42] ([i915#118] / [i915#95]) -> [PASS][43]
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-glk7/igt@gem_exec_whisper@basic-queues-forked.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-glk8/igt@gem_exec_whisper@basic-queues-forked.html

  * igt@gem_exec_whisper@basic-queues-priority-all:
    - shard-glk:          [FAIL][44] -> [PASS][45]
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-glk8/igt@gem_exec_whisper@basic-queues-priority-all.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-glk4/igt@gem_exec_whisper@basic-queues-priority-all.html

  * igt@kms_cursor_crc@pipe-c-cursor-128x128-onscreen:
    - shard-skl:          [FAIL][46] ([i915#54]) -> [PASS][47]
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl8/igt@kms_cursor_crc@pipe-c-cursor-128x128-onscreen.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl5/igt@kms_cursor_crc@pipe-c-cursor-128x128-onscreen.html

  * igt@kms_cursor_edge_walk@pipe-b-128x128-top-edge:
    - shard-glk:          [DMESG-WARN][48] ([i915#1982]) -> [PASS][49]
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-glk3/igt@kms_cursor_edge_walk@pipe-b-128x128-top-edge.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-glk2/igt@kms_cursor_edge_walk@pipe-b-128x128-top-edge.html

  * igt@kms_cursor_legacy@cursor-vs-flip-toggle:
    - shard-skl:          [DMESG-WARN][50] ([i915#1982]) -> [PASS][51] +4 similar issues
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl7/igt@kms_cursor_legacy@cursor-vs-flip-toggle.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl6/igt@kms_cursor_legacy@cursor-vs-flip-toggle.html

  * igt@kms_cursor_legacy@cursor-vs-flip-varying-size:
    - shard-hsw:          [FAIL][52] ([i915#2370]) -> [PASS][53]
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-hsw6/igt@kms_cursor_legacy@cursor-vs-flip-varying-size.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-hsw2/igt@kms_cursor_legacy@cursor-vs-flip-varying-size.html

  * igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled:
    - shard-apl:          [DMESG-WARN][54] ([i915#1635] / [i915#1982]) -> [PASS][55] +1 similar issue
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-apl3/igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-apl1/igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled.html

  * igt@kms_draw_crc@draw-method-xrgb2101010-blt-untiled:
    - shard-skl:          [TIMEOUT][56] -> [PASS][57] +3 similar issues
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl10/igt@kms_draw_crc@draw-method-xrgb2101010-blt-untiled.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl10/igt@kms_draw_crc@draw-method-xrgb2101010-blt-untiled.html

  * igt@kms_draw_crc@draw-method-xrgb8888-render-xtiled:
    - shard-snb:          [FAIL][58] ([i915#54]) -> [PASS][59]
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-snb7/igt@kms_draw_crc@draw-method-xrgb8888-render-xtiled.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-snb2/igt@kms_draw_crc@draw-method-xrgb8888-render-xtiled.html

  * igt@kms_flip@dpms-off-confusion-interruptible@a-dp1:
    - shard-kbl:          [DMESG-WARN][60] ([i915#1982]) -> [PASS][61]
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-kbl2/igt@kms_flip@dpms-off-confusion-interruptible@a-dp1.html
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-kbl4/igt@kms_flip@dpms-off-confusion-interruptible@a-dp1.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@c-hdmi-a2:
    - shard-glk:          [FAIL][62] ([i915#79]) -> [PASS][63]
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-glk9/igt@kms_flip@flip-vs-expired-vblank-interruptible@c-hdmi-a2.html
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-glk7/igt@kms_flip@flip-vs-expired-vblank-interruptible@c-hdmi-a2.html

  * igt@kms_flip@flip-vs-expired-vblank@c-edp1:
    - shard-skl:          [FAIL][64] ([i915#79]) -> [PASS][65] +1 similar issue
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl5/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl10/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          [FAIL][66] ([fdo#108145] / [i915#265]) -> [PASS][67]
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl8/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl5/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html

  * igt@kms_psr@psr2_primary_mmap_cpu:
    - shard-iclb:         [SKIP][68] ([fdo#109441]) -> [PASS][69] +2 similar issues
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-iclb5/igt@kms_psr@psr2_primary_mmap_cpu.html
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-iclb2/igt@kms_psr@psr2_primary_mmap_cpu.html

  * igt@perf@enable-disable:
    - shard-skl:          [FAIL][70] ([i915#1352]) -> [PASS][71]
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl5/igt@perf@enable-disable.html
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl5/igt@perf@enable-disable.html

  * igt@perf_pmu@module-unload:
    - shard-tglb:         [DMESG-WARN][72] ([i915#1982]) -> [PASS][73] +2 similar issues
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-tglb3/igt@perf_pmu@module-unload.html
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-tglb5/igt@perf_pmu@module-unload.html

  
#### Warnings ####

  * igt@kms_atomic_transition@1x-modeset-transitions-nonblocking:
    - shard-skl:          [TIMEOUT][74] -> [DMESG-WARN][75] ([i915#1982])
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl10/igt@kms_atomic_transition@1x-modeset-transitions-nonblocking.html
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl10/igt@kms_atomic_transition@1x-modeset-transitions-nonblocking.html

  * igt@kms_dp_dsc@basic-dsc-enable-edp:
    - shard-iclb:         [SKIP][76] ([fdo#109349]) -> [DMESG-WARN][77] ([i915#1226])
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-iclb5/igt@kms_dp_dsc@basic-dsc-enable-edp.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html

  * igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min:
    - shard-skl:          [FAIL][78] ([fdo#108145] / [i915#265]) -> [DMESG-WARN][79] ([i915#1982])
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9169/shard-skl8/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/shard-skl9/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109349]: https://bugs.freedesktop.org/show_bug.cgi?id=109349
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1226]: https://gitlab.freedesktop.org/drm/intel/issues/1226
  [i915#1352]: https://gitlab.freedesktop.org/drm/intel/issues/1352
  [i915#1519]: https://gitlab.freedesktop.org/drm/intel/issues/1519
  [i915#155]: https://gitlab.freedesktop.org/drm/intel/issues/155
  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#198]: https://gitlab.freedesktop.org/drm/intel/issues/198
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2122]: https://gitlab.freedesktop.org/drm/intel/issues/2122
  [i915#2346]: https://gitlab.freedesktop.org/drm/intel/issues/2346
  [i915#2370]: https://gitlab.freedesktop.org/drm/intel/issues/2370
  [i915#2389]: https://gitlab.freedesktop.org/drm/intel/issues/2389
  [i915#2424]: https://gitlab.freedesktop.org/drm/intel/issues/2424
  [i915#2521]: https://gitlab.freedesktop.org/drm/intel/issues/2521
  [i915#2546]: https://gitlab.freedesktop.org/drm/intel/issues/2546
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#300]: https://gitlab.freedesktop.org/drm/intel/issues/300
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79
  [i915#82]: https://gitlab.freedesktop.org/drm/intel/issues/82
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (11 -> 11)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * Linux: CI_DRM_9169 -> Patchwork_18744

  CI-20190529: 20190529
  CI_DRM_9169: 8a581847c1bea831f8edfbb813225df47fe28a3a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5822: b4bcf05cb9839037128905deda7146434155cc41 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_18744: 982c1f290dce4481188b3d1673704195360813b6 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18744/index.html

[-- Attachment #1.2: Type: text/html, Size: 21262 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH v2 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs
  2020-10-20 16:19     ` Chris Wilson
@ 2020-10-22  9:42       ` Tvrtko Ursulin
  0 siblings, 0 replies; 14+ messages in thread
From: Tvrtko Ursulin @ 2020-10-22  9:42 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx; +Cc: Daniel Vetter


On 20/10/2020 17:19, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-10-20 17:11:44)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Since we keep a driver global mask of online CPUs and base the decision
>> whether PMU needs to be migrated upon it, we need to make sure the
>> migration is done for all registered PMUs (so GPUs).
>>
>> To do this we need to track the current CPU for each PMU and base the
>> decision on whether to migrate on a comparison between global and local
>> state.
>>
>> At the same time, since dynamic CPU hotplug notification slots are a
>> scarce resource and given how we already register the multi instance type
>> state, we can and should add multiple instance of the i915 PMU to this
>> same state and not allocate a new one for every GPU.
>>
>> v2:
>>   * Use pr_notice. (Chris)
>>
>> v3:
>>   * Handle a nasty interaction where unregistration which triggers a false
>>     CPU offline event. (Chris)
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Suggested-by: Daniel Vetter <daniel.vetter@intel.com> # dynamic slot optimisation
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>   drivers/gpu/drm/i915/i915_pci.c |  7 +++-
>>   drivers/gpu/drm/i915/i915_pmu.c | 57 +++++++++++++++++++++------------
>>   drivers/gpu/drm/i915/i915_pmu.h |  6 +++-
>>   3 files changed, 48 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>> index 27964ac0638a..a384f51c91c1 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -1150,9 +1150,13 @@ static int __init i915_init(void)
>>                  return 0;
>>          }
>>   
>> +       i915_pmu_init();
>> +
>>          err = pci_register_driver(&i915_pci_driver);
>> -       if (err)
>> +       if (err) {
>> +               i915_pmu_exit();
>>                  return err;
> 
> We could do an onion bhaji and call i915_globals_exit() as well.
> 
>> +       }
>>   
>>          i915_perf_sysctl_register();
>>          return 0;
>> @@ -1166,6 +1170,7 @@ static void __exit i915_exit(void)
>>          i915_perf_sysctl_unregister();
>>          pci_unregister_driver(&i915_pci_driver);
>>          i915_globals_exit();
>> +       i915_pmu_exit();
> 
> pmu_exit then globals_exit to pair with i915_init?

Maybe later, sorry.

>>   }
>>   
>>   module_init(i915_init);
>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>> index 51ed7d0efcdc..cd786ad12be7 100644
>> --- a/drivers/gpu/drm/i915/i915_pmu.c
>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
>> @@ -30,6 +30,7 @@
>>   #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
>>   
>>   static cpumask_t i915_pmu_cpumask;
>> +static unsigned int i915_pmu_target_cpu = -1;
>>   
>>   static u8 engine_config_sample(u64 config)
>>   {
>> @@ -1049,25 +1050,39 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
>>   static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
>>   {
>>          struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
>> -       unsigned int target;
>> +       unsigned int target = i915_pmu_target_cpu;
>>   
>>          GEM_BUG_ON(!pmu->base.event_init);
>>   
>> +       /*
>> +        * Unregistering an instance generates a CPU offline event which we must
>> +        * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
>> +        */
>> +       if (pmu->closed)
>> +               return 0;
>> +
>>          if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
>>                  target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
>> +
>>                  /* Migrate events if there is a valid target */
>>                  if (target < nr_cpu_ids) {
>>                          cpumask_set_cpu(target, &i915_pmu_cpumask);
>> -                       perf_pmu_migrate_context(&pmu->base, cpu, target);
>> +                       i915_pmu_target_cpu = target;
>>                  }
>>          }
>>   
>> +       if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
>> +               perf_pmu_migrate_context(&pmu->base, cpu, target);
>> +               pmu->cpuhp.cpu = target;
>> +       }
>> +
>>          return 0;
>>   }
>>   
>> -static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
>> +static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
>> +
>> +void i915_pmu_init(void)
>>   {
>> -       enum cpuhp_state slot;
>>          int ret;
>>   
>>          ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
>> @@ -1075,27 +1090,29 @@ static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
>>                                        i915_pmu_cpu_online,
>>                                        i915_pmu_cpu_offline);
>>          if (ret < 0)
>> -               return ret;
>> +               pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
>> +                         ret);
>> +       else
>> +               cpuhp_slot = ret;
>> +}
>>   
>> -       slot = ret;
>> -       ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node);
>> -       if (ret) {
>> -               cpuhp_remove_multi_state(slot);
>> -               return ret;
>> -       }
>> +void i915_pmu_exit(void)
>> +{
>> +       if (cpuhp_slot != CPUHP_INVALID)
>> +               cpuhp_remove_multi_state(cpuhp_slot);
>> +}
>>   
>> -       pmu->cpuhp.slot = slot;
>> -       return 0;
>> +static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
>> +{
>> +       if (cpuhp_slot == CPUHP_INVALID)
>> +               return -EINVAL;
>> +
>> +       return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
>>   }
>>   
>>   static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
>>   {
>> -       struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
>> -
>> -       drm_WARN_ON(&i915->drm, pmu->cpuhp.slot == CPUHP_INVALID);
>> -       drm_WARN_ON(&i915->drm, cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node));
>> -       cpuhp_remove_multi_state(pmu->cpuhp.slot);
>> -       pmu->cpuhp.slot = CPUHP_INVALID;
>> +       cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
>>   }
>>   
>>   static bool is_igp(struct drm_i915_private *i915)
>> @@ -1129,7 +1146,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
>>          spin_lock_init(&pmu->lock);
>>          hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>>          pmu->timer.function = i915_sample;
>> -       pmu->cpuhp.slot = CPUHP_INVALID;
>> +       pmu->cpuhp.cpu = -1;
> 
> To the best of my limited understanding of cpuhp,
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Pushed these two, having declared shard runs flip-flops are unrelated. 
Thanks!

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2020-10-22  9:42 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-20 10:08 [Intel-gfx] [PATCH 1/2] drm/i915/pmu: Handle PCI unbind Tvrtko Ursulin
2020-10-20 10:08 ` [Intel-gfx] [PATCH 2/2] drm/i915/pmu: Fix CPU hotplug with multiple GPUs Tvrtko Ursulin
2020-10-20 11:59   ` Chris Wilson
2020-10-20 12:10     ` Chris Wilson
2020-10-20 12:33       ` Tvrtko Ursulin
2020-10-20 12:40         ` Chris Wilson
2020-10-20 13:05           ` Tvrtko Ursulin
2020-10-20 16:11   ` [Intel-gfx] [PATCH v2 " Tvrtko Ursulin
2020-10-20 16:19     ` Chris Wilson
2020-10-22  9:42       ` Tvrtko Ursulin
2020-10-20 12:25 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/pmu: Handle PCI unbind Patchwork
2020-10-20 15:19 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
2020-10-20 17:28 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/pmu: Handle PCI unbind (rev2) Patchwork
2020-10-20 19:47 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.