All of lore.kernel.org
 help / color / mirror / Atom feed
From: Boris Brezillon <boris.brezillon@bootlin.com>
To: Eric Anholt <eric@anholt.net>, Daniel Vetter <daniel@ffwll.ch>
Cc: Boris Brezillon <boris.brezillon@bootlin.com>,
	dri-devel@lists.freedesktop.org
Subject: [PATCH v2 3/3] drm/vc4: Add a load tracker to prevent HVS underflow errors
Date: Thu, 25 Oct 2018 14:45:46 +0200	[thread overview]
Message-ID: <20181025124546.22145-4-boris.brezillon@bootlin.com> (raw)
In-Reply-To: <20181025124546.22145-1-boris.brezillon@bootlin.com>

The HVS block is supposed to fill the pixelvalve FIFOs fast enough to
meet the requested framerate. The problem is, the HVS and memory bus
bandwidths are limited, and if we don't take these limitations into
account we might end up with HVS underflow errors.

This patch is trying to model the per-plane HVS and memory bus bandwidth
consumption and take a decision at atomic_check() time whether the
estimated load will fit in the HVS and membus budget.

Note that we take an extra margin on the memory bus consumption to let
the system run smoothly when other blocks are doing heavy use of the
memory bus. Same goes for the HVS limit, except the margin is smaller in
this case, since the HVS is not used by external components.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
Changes in v2:
- Remove an unused var in vc4_load_tracker_atomic_check()
---
 drivers/gpu/drm/vc4/vc4_drv.c   |   1 +
 drivers/gpu/drm/vc4/vc4_drv.h   |  11 ++++
 drivers/gpu/drm/vc4/vc4_kms.c   | 103 +++++++++++++++++++++++++++++++-
 drivers/gpu/drm/vc4/vc4_plane.c |  60 +++++++++++++++++++
 4 files changed, 174 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index f6f5cd80c04d..7195a0bcceb3 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -313,6 +313,7 @@ static void vc4_drm_unbind(struct device *dev)
 
 	drm_mode_config_cleanup(drm);
 
+	drm_atomic_private_obj_fini(&vc4->load_tracker);
 	drm_atomic_private_obj_fini(&vc4->ctm_manager);
 
 	drm_dev_put(drm);
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index a6c67c65ffbc..11369da944b6 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -200,6 +200,7 @@ struct vc4_dev {
 
 	struct drm_modeset_lock ctm_state_lock;
 	struct drm_private_obj ctm_manager;
+	struct drm_private_obj load_tracker;
 };
 
 static inline struct vc4_dev *
@@ -369,6 +370,16 @@ struct vc4_plane_state {
 	 * to enable background color fill.
 	 */
 	bool needs_bg_fill;
+
+	/* Load of this plane on the HVS block. The load is expressed in HVS
+	 * cycles/sec.
+	 */
+	u64 hvs_load;
+
+	/* Memory bandwidth needed for this plane. This is expressed in
+	 * bytes/sec.
+	 */
+	u64 membus_load;
 };
 
 static inline struct vc4_plane_state *
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 8e0183b1e8bb..b905a19c1470 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -34,6 +34,18 @@ static struct vc4_ctm_state *to_vc4_ctm_state(struct drm_private_state *priv)
 	return container_of(priv, struct vc4_ctm_state, base);
 }
 
+struct vc4_load_tracker_state {
+	struct drm_private_state base;
+	u64 hvs_load;
+	u64 membus_load;
+};
+
+static struct vc4_load_tracker_state *
+to_vc4_load_tracker_state(struct drm_private_state *priv)
+{
+	return container_of(priv, struct vc4_load_tracker_state, base);
+}
+
 static struct vc4_ctm_state *vc4_get_ctm_state(struct drm_atomic_state *state,
 					       struct drm_private_obj *manager)
 {
@@ -383,6 +395,81 @@ vc4_ctm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
 	return 0;
 }
 
+static int vc4_load_tracker_atomic_check(struct drm_atomic_state *state)
+{
+	struct drm_plane_state *old_plane_state, *new_plane_state;
+	struct vc4_dev *vc4 = to_vc4_dev(state->dev);
+	struct vc4_load_tracker_state *load_state;
+	struct drm_private_state *priv_state;
+	struct drm_plane *plane;
+	int i;
+
+	priv_state = drm_atomic_get_private_obj_state(state,
+						      &vc4->load_tracker);
+	if (IS_ERR(priv_state))
+		return PTR_ERR(priv_state);
+
+	load_state = to_vc4_load_tracker_state(priv_state);
+	for_each_oldnew_plane_in_state(state, plane, old_plane_state,
+				       new_plane_state, i) {
+		struct vc4_plane_state *vc4_plane_state;
+
+		if (old_plane_state->fb && old_plane_state->crtc) {
+			vc4_plane_state = to_vc4_plane_state(old_plane_state);
+			load_state->membus_load -= vc4_plane_state->membus_load;
+			load_state->hvs_load -= vc4_plane_state->hvs_load;
+		}
+
+		if (new_plane_state->fb && new_plane_state->crtc) {
+			vc4_plane_state = to_vc4_plane_state(new_plane_state);
+			load_state->membus_load += vc4_plane_state->membus_load;
+			load_state->hvs_load += vc4_plane_state->hvs_load;
+		}
+	}
+
+	/* The abolsute limit is 2Gbyte/sec, but let's take a margin to let
+	 * the system work when other blocks are accessing the memory.
+	 */
+	if (load_state->membus_load > SZ_1G + SZ_512M)
+		return -ENOSPC;
+
+	/* HVS clock is supposed to run @ 250Mhz, let's take a margin and
+	 * consider the maximum number of cycles is 240M.
+	 */
+	if (load_state->hvs_load > 240000000ULL)
+		return -ENOSPC;
+
+	return 0;
+}
+
+static struct drm_private_state *
+vc4_load_tracker_duplicate_state(struct drm_private_obj *obj)
+{
+	struct vc4_load_tracker_state *state;
+
+	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
+
+	return &state->base;
+}
+
+static void vc4_load_tracker_destroy_state(struct drm_private_obj *obj,
+					   struct drm_private_state *state)
+{
+	struct vc4_load_tracker_state *load_state;
+
+	load_state = to_vc4_load_tracker_state(state);
+	kfree(load_state);
+}
+
+static const struct drm_private_state_funcs vc4_load_tracker_state_funcs = {
+	.atomic_duplicate_state = vc4_load_tracker_duplicate_state,
+	.atomic_destroy_state = vc4_load_tracker_destroy_state,
+};
+
 static int
 vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
 {
@@ -392,7 +479,11 @@ vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
 	if (ret < 0)
 		return ret;
 
-	return drm_atomic_helper_check(dev, state);
+	ret = drm_atomic_helper_check(dev, state);
+	if (ret)
+		return ret;
+
+	return vc4_load_tracker_atomic_check(state);
 }
 
 static const struct drm_mode_config_funcs vc4_mode_funcs = {
@@ -408,6 +499,7 @@ int vc4_kms_load(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_ctm_state *ctm_state;
+	struct vc4_load_tracker_state *load_state;
 	int ret;
 
 	sema_init(&vc4->async_modeset, 1);
@@ -437,6 +529,15 @@ int vc4_kms_load(struct drm_device *dev)
 	drm_atomic_private_obj_init(dev, &vc4->ctm_manager, &ctm_state->base,
 				    &vc4_ctm_state_funcs);
 
+	load_state = kzalloc(sizeof(*load_state), GFP_KERNEL);
+	if (!load_state) {
+		drm_atomic_private_obj_fini(&vc4->ctm_manager);
+		return -ENOMEM;
+	}
+
+	drm_atomic_private_obj_init(dev, &vc4->load_tracker, &load_state->base,
+				    &vc4_load_tracker_state_funcs);
+
 	drm_mode_config_reset(dev);
 
 	drm_kms_helper_poll_init(dev);
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 0f294d612769..5e9687406517 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -450,6 +450,64 @@ static void vc4_write_scaling_parameters(struct drm_plane_state *state,
 	}
 }
 
+static void vc4_plane_calc_load(struct drm_plane_state *state)
+{
+	unsigned int hvs_load_shift, vrefresh, i;
+	struct drm_framebuffer *fb = state->fb;
+	struct vc4_plane_state *vc4_state;
+	struct drm_crtc_state *crtc_state;
+	unsigned int vscale_factor;
+
+	vc4_state = to_vc4_plane_state(state);
+	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
+							state->crtc);
+	vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
+
+	/* The HVS is able to process 2 pixels/cycle when scaling the source,
+	 * 4 pixels/cycle otherwise.
+	 * Alpha blending step seems to be pipelined and it's always operating
+	 * at 4 pixels/cycle, so the limiting aspect here seems to be the
+	 * scaler block.
+	 * HVS load is expressed in clk-cycles/sec (AKA Hz).
+	 */
+	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
+	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
+	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
+	    vc4_state->y_scaling[1] != VC4_SCALING_NONE)
+		hvs_load_shift = 1;
+	else
+		hvs_load_shift = 2;
+
+	vc4_state->membus_load = 0;
+	vc4_state->hvs_load = 0;
+	for (i = 0; i < fb->format->num_planes; i++) {
+		unsigned long pixels_load;
+
+		/* Even if the bandwidth/plane required for a single frame is
+		 *
+		 * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh
+		 *
+		 * when downscaling, we have to read more pixels per line in
+		 * the time frame reserved for a single line, so the bandwidth
+		 * demand can be punctually higher. To account for that, we
+		 * calculate the down-scaling factor and multiply the plane
+		 * load by this number. We're likely over-estimating the read
+		 * demand, but that's better than under-estimating it.
+		 */
+		vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],
+					     vc4_state->crtc_h);
+		pixels_load = vc4_state->src_w[i] * vc4_state->src_h[i] *
+			      vscale_factor;
+
+		vc4_state->membus_load += fb->format->cpp[i] * pixels_load;
+		vc4_state->hvs_load += pixels_load;
+	}
+
+	vc4_state->hvs_load *= vrefresh;
+	vc4_state->hvs_load >>= hvs_load_shift;
+	vc4_state->membus_load *= vrefresh;
+}
+
 /* Writes out a full display list for an active plane to the plane's
  * private dlist state.
  */
@@ -769,6 +827,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
 				   state->alpha != DRM_BLEND_ALPHA_OPAQUE;
 
+	vc4_plane_calc_load(state);
+
 	return 0;
 }
 
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

  parent reply	other threads:[~2018-10-25 12:46 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-25 12:45 [PATCH v2 0/3] drm/vc4: Add a load tracker Boris Brezillon
2018-10-25 12:45 ` [PATCH v2 1/3] drm/atomic: Add a generic infrastructure to track underrun errors Boris Brezillon
2018-10-26 10:33   ` Daniel Vetter
2018-10-26 12:36     ` Boris Brezillon
2018-10-26 13:36       ` Daniel Vetter
2018-10-26 14:13         ` Boris Brezillon
2018-10-26 14:23           ` Daniel Vetter
2018-10-25 12:45 ` [PATCH v2 2/3] drm/vc4: Report " Boris Brezillon
2018-10-25 12:45 ` Boris Brezillon [this message]
2018-10-30 23:12   ` [PATCH v2 3/3] drm/vc4: Add a load tracker to prevent HVS underflow errors Eric Anholt
2018-11-05 11:36     ` Boris Brezillon
2018-11-08 16:26       ` Eric Anholt
2018-11-08 16:50         ` Boris Brezillon
2018-11-08 17:53           ` Eric Anholt
2018-11-06 13:07     ` Boris Brezillon
2018-11-28  9:16 ` [PATCH v2 0/3] drm/vc4: Add a load tracker Paul Kocialkowski
2018-11-28  9:29   ` Boris Brezillon
2018-11-28 13:32     ` Paul Kocialkowski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181025124546.22145-4-boris.brezillon@bootlin.com \
    --to=boris.brezillon@bootlin.com \
    --cc=daniel@ffwll.ch \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=eric@anholt.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.