All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Hellstrom <thellstrom@vmware.com>
To: dri-devel@lists.freedesktop.org, linux-graphics-maintainer@vmware.com
Subject: [PATCH -next 02/11] drm/vmwgfx: Use the cpu blit utility for framebuffer to screen target blits
Date: Thu, 22 Mar 2018 11:23:43 +0100	[thread overview]
Message-ID: <20180322102352.2881-2-thellstrom@vmware.com> (raw)
In-Reply-To: <20180322102352.2881-1-thellstrom@vmware.com>

This blit was previously performed using two large vmaps, one of which
was teared down and remapped on each blit. Use the more resource-
conserving TTM cpu blit instead.

The blit is used in boundary-box computing mode which makes it possible
to minimize the bounding box used in host operations.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |  23 ++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h    |   1 +
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c    |  50 ++++++++++------
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.h    |   4 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c   |   5 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c   | 105 +++++++++++----------------------
 6 files changed, 97 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 7177eecb8c9f..ddf71bef5359 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -185,6 +185,22 @@ static const struct ttm_place evictable_placement_flags[] = {
 	}
 };
 
+static const struct ttm_place nonfixed_placement_flags[] = {
+	{
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
+	}
+};
+
 struct ttm_placement vmw_evictable_placement = {
 	.num_placement = 4,
 	.placement = evictable_placement_flags,
@@ -213,6 +229,13 @@ struct ttm_placement vmw_mob_ne_placement = {
 	.busy_placement = &mob_ne_placement_flags
 };
 
+struct ttm_placement vmw_nonfixed_placement = {
+	.num_placement = 3,
+	.placement = nonfixed_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags
+};
+
 struct vmw_ttm_tt {
 	struct ttm_dma_tt dma_ttm;
 	struct vmw_private *dev_priv;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 15b22e56b8d1..9368bcb492aa 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -767,6 +767,7 @@ extern struct ttm_placement vmw_evictable_placement;
 extern struct ttm_placement vmw_srf_placement;
 extern struct ttm_placement vmw_mob_placement;
 extern struct ttm_placement vmw_mob_ne_placement;
+extern struct ttm_placement vmw_nonfixed_placement;
 extern struct ttm_bo_driver vmw_bo_driver;
 extern int vmw_dma_quiescent(struct drm_device *dev);
 extern int vmw_bo_map_dma(struct ttm_buffer_object *bo);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 0f9c9cd78d83..63159674bf92 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -682,9 +682,6 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane)
 		return NULL;
 
 	vps->pinned = 0;
-
-	/* Mapping is managed by prepare_fb/cleanup_fb */
-	memset(&vps->host_map, 0, sizeof(vps->host_map));
 	vps->cpp = 0;
 
 	/* Each ref counted resource needs to be acquired again */
@@ -746,11 +743,6 @@ vmw_du_plane_destroy_state(struct drm_plane *plane,
 
 
 	/* Should have been freed by cleanup_fb */
-	if (vps->host_map.virtual) {
-		DRM_ERROR("Host mapping not freed\n");
-		ttm_bo_kunmap(&vps->host_map);
-	}
-
 	if (vps->surf)
 		vmw_surface_unreference(&vps->surf);
 
@@ -1129,12 +1121,14 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = {
 };
 
 /**
- * Pin the dmabuffer to the start of vram.
+ * Pin the dmabuffer in a location suitable for access by the
+ * display system.
  */
 static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb)
 {
 	struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
 	struct vmw_dma_buffer *buf;
+	struct ttm_placement *placement;
 	int ret;
 
 	buf = vfb->dmabuf ?  vmw_framebuffer_to_vfbd(&vfb->base)->buffer :
@@ -1151,12 +1145,24 @@ static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb)
 		break;
 	case vmw_du_screen_object:
 	case vmw_du_screen_target:
-		if (vfb->dmabuf)
-			return vmw_dmabuf_pin_in_vram_or_gmr(dev_priv, buf,
-							     false);
+		if (vfb->dmabuf) {
+			if (dev_priv->capabilities & SVGA_CAP_3D) {
+				/*
+				 * Use surface DMA to get content to
+				 * sreen target surface.
+				 */
+				placement = &vmw_vram_gmr_placement;
+			} else {
+				/* Use CPU blit. */
+				placement = &vmw_sys_placement;
+			}
+		} else {
+			/* Use surface / image update */
+			placement = &vmw_mob_placement;
+		}
 
-		return vmw_dmabuf_pin_in_placement(dev_priv, buf,
-						   &vmw_mob_placement, false);
+		return vmw_dmabuf_pin_in_placement(dev_priv, buf, placement,
+						   false);
 	default:
 		return -EINVAL;
 	}
@@ -2419,14 +2425,21 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv,
 int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv,
 				  struct vmw_dma_buffer *buf,
 				  bool interruptible,
-				  bool validate_as_mob)
+				  bool validate_as_mob,
+				  bool for_cpu_blit)
 {
+	struct ttm_operation_ctx ctx = {
+		.interruptible = interruptible,
+		.no_wait_gpu = false};
 	struct ttm_buffer_object *bo = &buf->base;
 	int ret;
 
 	ttm_bo_reserve(bo, false, false, NULL);
-	ret = vmw_validate_single_buffer(dev_priv, bo, interruptible,
-					 validate_as_mob);
+	if (for_cpu_blit)
+		ret = ttm_bo_validate(bo, &vmw_nonfixed_placement, &ctx);
+	else
+		ret = vmw_validate_single_buffer(dev_priv, bo, interruptible,
+						 validate_as_mob);
 	if (ret)
 		ttm_bo_unreserve(bo);
 
@@ -2538,7 +2551,8 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
 	if (res->backup) {
 		ret = vmw_kms_helper_buffer_prepare(res->dev_priv, res->backup,
 						    interruptible,
-						    res->dev_priv->has_mob);
+						    res->dev_priv->has_mob,
+						    false);
 		if (ret)
 			goto out_unreserve;
 	}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 42b0f1589d3f..4e8749a8717e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -177,7 +177,6 @@ struct vmw_plane_state {
 	int pinned;
 
 	/* For CPU Blit */
-	struct ttm_bo_kmap_obj host_map;
 	unsigned int cpp;
 };
 
@@ -289,7 +288,8 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv,
 int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv,
 				  struct vmw_dma_buffer *buf,
 				  bool interruptible,
-				  bool validate_as_mob);
+				  bool validate_as_mob,
+				  bool for_cpu_blit);
 void vmw_kms_helper_buffer_revert(struct vmw_dma_buffer *buf);
 void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv,
 				  struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index 9f6deced4244..3b7bf7ca18b9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -1032,7 +1032,7 @@ int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
 	int ret;
 
 	ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible,
-					    false);
+					    false, false);
 	if (ret)
 		return ret;
 
@@ -1130,7 +1130,8 @@ int vmw_kms_sou_readback(struct vmw_private *dev_priv,
 	struct vmw_kms_dirty dirty;
 	int ret;
 
-	ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false);
+	ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false,
+					    false);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 6de28746f6f7..8eec88920851 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -114,7 +114,6 @@ struct vmw_screen_target_display_unit {
 	bool defined;
 
 	/* For CPU Blit */
-	struct ttm_bo_kmap_obj host_map;
 	unsigned int cpp;
 };
 
@@ -639,10 +638,9 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 		container_of(dirty->unit, typeof(*stdu), base);
 	s32 width, height;
 	s32 src_pitch, dst_pitch;
-	u8 *src, *dst;
-	bool not_used;
-	struct ttm_bo_kmap_obj guest_map;
-	int ret;
+	struct ttm_buffer_object *src_bo, *dst_bo;
+	u32 src_offset, dst_offset;
+	struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(stdu->cpp);
 
 	if (!dirty->num_hits)
 		return;
@@ -653,57 +651,38 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 	if (width == 0 || height == 0)
 		return;
 
-	ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages,
-			  &guest_map);
-	if (ret) {
-		DRM_ERROR("Failed mapping framebuffer for blit: %d\n",
-			  ret);
-		goto out_cleanup;
-	}
-
-	/* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
-	src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
-	src = ttm_kmap_obj_virtual(&stdu->host_map, &not_used);
-	src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
-
-	dst_pitch = ddirty->pitch;
-	dst = ttm_kmap_obj_virtual(&guest_map, &not_used);
-	dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
+	/* Assume we are blitting from Guest (dmabuf) to Host (display_srf) */
+	dst_pitch = stdu->display_srf->base_size.width * stdu->cpp;
+	dst_bo = &stdu->display_srf->res.backup->base;
+	dst_offset = ddirty->top * dst_pitch + ddirty->left * stdu->cpp;
 
+	src_pitch = ddirty->pitch;
+	src_bo = &ddirty->buf->base;
+	src_offset = ddirty->fb_top * src_pitch + ddirty->fb_left * stdu->cpp;
 
-	/* Figure out the real direction */
-	if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) {
-		u8 *tmp;
-		s32 tmp_pitch;
-
-		tmp = src;
-		tmp_pitch = src_pitch;
-
-		src = dst;
-		src_pitch = dst_pitch;
-
-		dst = tmp;
-		dst_pitch = tmp_pitch;
+	/* Swap src and dst if the assumption was wrong. */
+	if (ddirty->transfer != SVGA3D_WRITE_HOST_VRAM) {
+		swap(dst_pitch, src_pitch);
+		swap(dst_bo, src_bo);
+		swap(src_offset, dst_offset);
 	}
 
-	/* CPU Blit */
-	while (height-- > 0) {
-		memcpy(dst, src, width * stdu->cpp);
-		dst += dst_pitch;
-		src += src_pitch;
-	}
+	(void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch,
+			       src_bo, src_offset, src_pitch,
+			       width * stdu->cpp, height, &diff);
 
-	if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) {
+	if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM &&
+	    drm_rect_visible(&diff.rect)) {
 		struct vmw_private *dev_priv;
 		struct vmw_stdu_update *cmd;
 		struct drm_clip_rect region;
 		int ret;
 
 		/* We are updating the actual surface, not a proxy */
-		region.x1 = ddirty->left;
-		region.x2 = ddirty->right;
-		region.y1 = ddirty->top;
-		region.y2 = ddirty->bottom;
+		region.x1 = diff.rect.x1;
+		region.x2 = diff.rect.x2;
+		region.y1 = diff.rect.y1;
+		region.y2 = diff.rect.y2;
 		ret = vmw_kms_update_proxy(
 			(struct vmw_resource *) &stdu->display_srf->res,
 			(const struct drm_clip_rect *) &region, 1, 1);
@@ -720,13 +699,12 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 		}
 
 		vmw_stdu_populate_update(cmd, stdu->base.unit,
-					 ddirty->left, ddirty->right,
-					 ddirty->top, ddirty->bottom);
+					 region.x1, region.x2,
+					 region.y1, region.y2);
 
 		vmw_fifo_commit(dev_priv, sizeof(*cmd));
 	}
 
-	ttm_bo_kunmap(&guest_map);
 out_cleanup:
 	ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
 	ddirty->right = ddirty->bottom = S32_MIN;
@@ -772,9 +750,15 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
 		container_of(vfb, struct vmw_framebuffer_dmabuf, base)->buffer;
 	struct vmw_stdu_dirty ddirty;
 	int ret;
+	bool cpu_blit = !(dev_priv->capabilities & SVGA_CAP_3D);
 
+	/*
+	 * VMs without 3D support don't have the surface DMA command and
+	 * we'll be using a CPU blit, and the framebuffer should be moved out
+	 * of VRAM.
+	 */
 	ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible,
-					    false);
+					    false, cpu_blit);
 	if (ret)
 		return ret;
 
@@ -793,8 +777,8 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
 	if (to_surface)
 		ddirty.base.fifo_reserve_size += sizeof(struct vmw_stdu_update);
 
-	/* 2D VMs cannot use SVGA_3D_CMD_SURFACE_DMA so do CPU blit instead */
-	if (!(dev_priv->capabilities & SVGA_CAP_3D)) {
+
+	if (cpu_blit) {
 		ddirty.base.fifo_commit = vmw_stdu_dmabuf_cpu_commit;
 		ddirty.base.clip = vmw_stdu_dmabuf_cpu_clip;
 		ddirty.base.fifo_reserve_size = 0;
@@ -1071,9 +1055,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane,
 {
 	struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
 
-	if (vps->host_map.virtual)
-		ttm_bo_kunmap(&vps->host_map);
-
 	if (vps->surf)
 		WARN_ON(!vps->pinned);
 
@@ -1235,24 +1216,11 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
 	 * so cache these mappings
 	 */
 	if (vps->content_fb_type == SEPARATE_DMA &&
-	    !(dev_priv->capabilities & SVGA_CAP_3D)) {
-		ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
-				  vps->surf->res.backup->base.num_pages,
-				  &vps->host_map);
-		if (ret) {
-			DRM_ERROR("Failed to map display buffer to CPU\n");
-			goto out_srf_unpin;
-		}
-
+	    !(dev_priv->capabilities & SVGA_CAP_3D))
 		vps->cpp = new_fb->pitches[0] / new_fb->width;
-	}
 
 	return 0;
 
-out_srf_unpin:
-	vmw_resource_unpin(&vps->surf->res);
-	vps->pinned--;
-
 out_srf_unref:
 	vmw_surface_unreference(&vps->surf);
 	return ret;
@@ -1296,7 +1264,6 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
 		stdu->display_srf = vps->surf;
 		stdu->content_fb_type = vps->content_fb_type;
 		stdu->cpp = vps->cpp;
-		memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));
 
 		vclips.x = crtc->x;
 		vclips.y = crtc->y;
-- 
2.14.3

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

  reply	other threads:[~2018-03-22 10:24 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-22 10:23 [PATCH -next 01/11] drm/vmwgfx: Add a cpu blit utility that can be used for page-backed bos Thomas Hellstrom
2018-03-22 10:23 ` Thomas Hellstrom [this message]
2018-03-22 10:23 ` [PATCH -next 03/11] drm/vmwgfx: Fix multiple command buffer context use Thomas Hellstrom
2018-03-22 10:23 ` [PATCH -next 04/11] drm/vmwgfx: Avoid pinning fbdev framebuffers Thomas Hellstrom
2018-03-22 10:23 ` [PATCH -next 05/11] drm/vmwgfx: Improve on hibernation Thomas Hellstrom
2018-03-22 10:23 ` [PATCH -next 06/11] drm/vmwgfx: Get rid of the device-private suspended member Thomas Hellstrom
2018-03-22 10:23 ` [PATCH -next 07/11] drm/vmwgfx: Use kasprintf Thomas Hellstrom
2018-03-22 10:23 ` [PATCH -next 08/11] drm/vmwgfx: Stricter count of legacy surface device resources Thomas Hellstrom
2018-03-22 10:23 ` [PATCH -next 09/11] drm/vmwgfx: Unpin the screen object backup buffer when not used Thomas Hellstrom
2018-03-22 10:23 ` [PATCH -next 10/11] drm/vmwgfx: use monotonic event timestamps Thomas Hellstrom
2018-03-22 10:23 ` [PATCH -next 11/11] drm/vmwgfx: Bump version patchlevel and date Thomas Hellstrom

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180322102352.2881-2-thellstrom@vmware.com \
    --to=thellstrom@vmware.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=linux-graphics-maintainer@vmware.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.