nouveau.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [Nouveau] [PATCH v2] drm/nouveau/gr: enable memory loads on helper invocation on all channels
@ 2023-06-22 15:20 Karol Herbst
  2023-08-03  2:14 ` Dave Airlie
  0 siblings, 1 reply; 2+ messages in thread
From: Karol Herbst @ 2023-06-22 15:20 UTC (permalink / raw)
  To: linux-kernel; +Cc: nouveau, stable, Ben Skeggs

We have a lurking bug where Fragment Shader Helper Invocations can't load
from memory. But this is actually required in OpenGL and is causing random
hangs or failures in random shaders.

It is unknown how widespread this issue is, but shaders hitting this can
end up with infinite loops.

We enable those only on all Kepler and newer GPUs where we use our own
Firmware.

Nvidia's firmware provides a way to set a kernelspace controlled list of
mmio registers in the gr space from push buffers via MME macros.

v2: drop code for gm200 and newer.

Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: David Airlie <airlied@gmail.com>
Cc: nouveau@lists.freedesktop.org
Cc: stable@vger.kernel.org
Signed-off-by: Karol Herbst <kherbst@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h  |  1 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c  |  4 +++-
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c  | 10 ++++++++++
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c |  1 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c  |  1 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c  |  1 +
 6 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
index 00dbeda7e346..de161e7a04aa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -117,6 +117,7 @@ void gk104_grctx_generate_r418800(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gk110_grctx;
 void gk110_grctx_generate_r419eb0(struct gf100_gr *);
+void gk110_grctx_generate_r419f78(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gk110b_grctx;
 extern const struct gf100_grctx_func gk208_grctx;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
index 94233d0119df..52a234b1ef01 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -906,7 +906,9 @@ static void
 gk104_grctx_generate_r419f78(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
+
+	/* bit 3 set disables loads in fp helper invocations, we need it enabled */
+	nvkm_mask(device, 0x419f78, 0x00000009, 0x00000000);
 }
 
 void
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
index 4391458e1fb2..3acdd9eeb74a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
@@ -820,6 +820,15 @@ gk110_grctx_generate_r419eb0(struct gf100_gr *gr)
 	nvkm_mask(device, 0x419eb0, 0x00001000, 0x00001000);
 }
 
+void
+gk110_grctx_generate_r419f78(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	/* bit 3 set disables loads in fp helper invocations, we need it enabled */
+	nvkm_mask(device, 0x419f78, 0x00000008, 0x00000000);
+}
+
 const struct gf100_grctx_func
 gk110_grctx = {
 	.main  = gf100_grctx_generate_main,
@@ -854,4 +863,5 @@ gk110_grctx = {
 	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
 	.r418800 = gk104_grctx_generate_r418800,
 	.r419eb0 = gk110_grctx_generate_r419eb0,
+	.r419f78 = gk110_grctx_generate_r419f78,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
index 7b9a34f9ec3c..5597e87624ac 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
@@ -103,4 +103,5 @@ gk110b_grctx = {
 	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
 	.r418800 = gk104_grctx_generate_r418800,
 	.r419eb0 = gk110_grctx_generate_r419eb0,
+	.r419f78 = gk110_grctx_generate_r419f78,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
index c78d07a8bb7d..612656496541 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
@@ -568,4 +568,5 @@ gk208_grctx = {
 	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
 	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
 	.r418800 = gk104_grctx_generate_r418800,
+	.r419f78 = gk110_grctx_generate_r419f78,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
index beac66eb2a80..9906974ac3f0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -988,4 +988,5 @@ gm107_grctx = {
 	.r406500 = gm107_grctx_generate_r406500,
 	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
 	.r419e00 = gm107_grctx_generate_r419e00,
+	.r419f78 = gk110_grctx_generate_r419f78,
 };
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [Nouveau] [PATCH v2] drm/nouveau/gr: enable memory loads on helper invocation on all channels
  2023-06-22 15:20 [Nouveau] [PATCH v2] drm/nouveau/gr: enable memory loads on helper invocation on all channels Karol Herbst
@ 2023-08-03  2:14 ` Dave Airlie
  0 siblings, 0 replies; 2+ messages in thread
From: Dave Airlie @ 2023-08-03  2:14 UTC (permalink / raw)
  To: Karol Herbst; +Cc: nouveau, linux-kernel, stable, Ben Skeggs

On Fri, 23 Jun 2023 at 01:20, Karol Herbst <kherbst@redhat.com> wrote:
>
> We have a lurking bug where Fragment Shader Helper Invocations can't load
> from memory. But this is actually required in OpenGL and is causing random
> hangs or failures in random shaders.
>
> It is unknown how widespread this issue is, but shaders hitting this can
> end up with infinite loops.
>
> We enable those only on all Kepler and newer GPUs where we use our own
> Firmware.
>
> Nvidia's firmware provides a way to set a kernelspace controlled list of
> mmio registers in the gr space from push buffers via MME macros.

seems sane,

Reviewed-by: Dave Airlie <airlied@redhat.com>
>
> v2: drop code for gm200 and newer.
>
> Cc: Ben Skeggs <bskeggs@redhat.com>
> Cc: David Airlie <airlied@gmail.com>
> Cc: nouveau@lists.freedesktop.org
> Cc: stable@vger.kernel.org
> Signed-off-by: Karol Herbst <kherbst@redhat.com>
> ---
>  drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h  |  1 +
>  drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c  |  4 +++-
>  drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c  | 10 ++++++++++
>  drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c |  1 +
>  drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c  |  1 +
>  drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c  |  1 +
>  6 files changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
> index 00dbeda7e346..de161e7a04aa 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
> @@ -117,6 +117,7 @@ void gk104_grctx_generate_r418800(struct gf100_gr *);
>
>  extern const struct gf100_grctx_func gk110_grctx;
>  void gk110_grctx_generate_r419eb0(struct gf100_gr *);
> +void gk110_grctx_generate_r419f78(struct gf100_gr *);
>
>  extern const struct gf100_grctx_func gk110b_grctx;
>  extern const struct gf100_grctx_func gk208_grctx;
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
> index 94233d0119df..52a234b1ef01 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
> @@ -906,7 +906,9 @@ static void
>  gk104_grctx_generate_r419f78(struct gf100_gr *gr)
>  {
>         struct nvkm_device *device = gr->base.engine.subdev.device;
> -       nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
> +
> +       /* bit 3 set disables loads in fp helper invocations, we need it enabled */
> +       nvkm_mask(device, 0x419f78, 0x00000009, 0x00000000);
>  }
>
>  void
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
> index 4391458e1fb2..3acdd9eeb74a 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
> @@ -820,6 +820,15 @@ gk110_grctx_generate_r419eb0(struct gf100_gr *gr)
>         nvkm_mask(device, 0x419eb0, 0x00001000, 0x00001000);
>  }
>
> +void
> +gk110_grctx_generate_r419f78(struct gf100_gr *gr)
> +{
> +       struct nvkm_device *device = gr->base.engine.subdev.device;
> +
> +       /* bit 3 set disables loads in fp helper invocations, we need it enabled */
> +       nvkm_mask(device, 0x419f78, 0x00000008, 0x00000000);
> +}
> +
>  const struct gf100_grctx_func
>  gk110_grctx = {
>         .main  = gf100_grctx_generate_main,
> @@ -854,4 +863,5 @@ gk110_grctx = {
>         .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
>         .r418800 = gk104_grctx_generate_r418800,
>         .r419eb0 = gk110_grctx_generate_r419eb0,
> +       .r419f78 = gk110_grctx_generate_r419f78,
>  };
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
> index 7b9a34f9ec3c..5597e87624ac 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
> @@ -103,4 +103,5 @@ gk110b_grctx = {
>         .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
>         .r418800 = gk104_grctx_generate_r418800,
>         .r419eb0 = gk110_grctx_generate_r419eb0,
> +       .r419f78 = gk110_grctx_generate_r419f78,
>  };
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
> index c78d07a8bb7d..612656496541 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
> @@ -568,4 +568,5 @@ gk208_grctx = {
>         .dist_skip_table = gf117_grctx_generate_dist_skip_table,
>         .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
>         .r418800 = gk104_grctx_generate_r418800,
> +       .r419f78 = gk110_grctx_generate_r419f78,
>  };
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
> index beac66eb2a80..9906974ac3f0 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
> @@ -988,4 +988,5 @@ gm107_grctx = {
>         .r406500 = gm107_grctx_generate_r406500,
>         .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
>         .r419e00 = gm107_grctx_generate_r419e00,
> +       .r419f78 = gk110_grctx_generate_r419f78,
>  };
> --
> 2.41.0
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-08-03  2:15 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-22 15:20 [Nouveau] [PATCH v2] drm/nouveau/gr: enable memory loads on helper invocation on all channels Karol Herbst
2023-08-03  2:14 ` Dave Airlie

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).