Paul Kocialkowski <paul.kocialkowski@bootlin.com> writes:

> The binner BO is not required until the V3D is in use, so avoid
> allocating it at probe and do it on the first non-dumb BO allocation.
>
> Keep track of which clients are using the V3D and liberate the buffer
> when there is none left, using a kref. Protect the logic with a
> mutex to avoid race conditions.
>
> The binner BO is created at the time of the first render ioctl and is
> destroyed when there is no client and no exec job using it left.
>
> The Out-Of-Memory (OOM) interrupt also gets some tweaking, to avoid
> enabling it before having allocated a binner bo.
>
> We also want to keep the BO alive during runtime suspend/resume to avoid
> failing to allocate it at resume. This happens when the CMA pool is
> full at that point and results in a hard crash.
>
> Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
> ---
>  drivers/gpu/drm/vc4/vc4_bo.c  | 45 +++++++++++++++++++++++++--
>  drivers/gpu/drm/vc4/vc4_drv.c |  6 ++++
>  drivers/gpu/drm/vc4/vc4_drv.h | 14 +++++++++
>  drivers/gpu/drm/vc4/vc4_gem.c | 13 ++++++++
>  drivers/gpu/drm/vc4/vc4_irq.c | 21 +++++++++----
>  drivers/gpu/drm/vc4/vc4_v3d.c | 58 +++++++++++++++++++++++++++--------
>  6 files changed, 135 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
> index 88ebd681d7eb..03a26de620de 100644
> --- a/drivers/gpu/drm/vc4/vc4_bo.c
> +++ b/drivers/gpu/drm/vc4/vc4_bo.c
> @@ -799,20 +799,47 @@ vc4_prime_import_sg_table(struct drm_device *dev,
>  	return obj;
>  }
>  
> +static int vc4_grab_bin_bo(struct vc4_dev *vc4, struct vc4_file *vc4file)
> +{
> +	int ret;
> +
> +	if (!vc4->v3d)
> +		return -ENODEV;
> +
> +	if (vc4file->bin_bo_used)
> +		return 0;
> +
> +	ret = vc4_v3d_bin_bo_get(vc4);
> +	if (ret)
> +		return ret;
> +
> +	vc4file->bin_bo_used = true;
> +
> +	return 0;
> +}
> +
>  int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *file_priv)
>  {
>  	struct drm_vc4_create_bo *args = data;
> +	struct vc4_file *vc4file = file_priv->driver_priv;
> +	struct vc4_dev *vc4 = to_vc4_dev(dev);
>  	struct vc4_bo *bo = NULL;
>  	int ret;
>  
> +	ret = vc4_grab_bin_bo(vc4, vc4file);
> +	if (ret)
> +		return ret;

Interesting note -- we'll now throw -ENODEV from this ioctl when v3d
isn't present.  I think that's actually totally fine and maybe I should
have done that for the !v3d patches originally.

>  	/*
>  	 * We can't allocate from the BO cache, because the BOs don't
>  	 * get zeroed, and that might leak data between users.
>  	 */
>  	bo = vc4_bo_create(dev, args->size, false, VC4_BO_TYPE_V3D);
> -	if (IS_ERR(bo))
> +	if (IS_ERR(bo)) {
> +		vc4_v3d_bin_bo_put(vc4);
>  		return PTR_ERR(bo);
> +	}

I actually don't think you want the bin_bo_put()s in the error paths
here -- vc4_grab_bin_bo has put a flag in vc4file that we need a
bin_bo_put() when the file is closed, and if we take these error paths
there will be a refcount underflow once the file gets closed.

>  
>  	bo->madv = VC4_MADV_WILLNEED;
>  
> @@ -846,6 +873,8 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
>  			   struct drm_file *file_priv)
>  {
>  	struct drm_vc4_create_shader_bo *args = data;
> +	struct vc4_file *vc4file = file_priv->driver_priv;
> +	struct vc4_dev *vc4 = to_vc4_dev(dev);
>  	struct vc4_bo *bo = NULL;
>  	int ret;
>  
> @@ -865,9 +894,15 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
>  		return -EINVAL;
>  	}
>  
> +	ret = vc4_grab_bin_bo(vc4, vc4file);
> +	if (ret)
> +		return ret;
> +
>  	bo = vc4_bo_create(dev, args->size, true, VC4_BO_TYPE_V3D_SHADER);
> -	if (IS_ERR(bo))
> +	if (IS_ERR(bo)) {
> +		vc4_v3d_bin_bo_put(vc4);
>  		return PTR_ERR(bo);
> +	}
>  
>  	bo->madv = VC4_MADV_WILLNEED;
>  
> @@ -893,8 +928,12 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
>  	 * races for users to do doing things like mmap the shader BO.
>  	 */
>  	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
> +	goto complete;
> +
> +fail:
> +	vc4_v3d_bin_bo_put(vc4);
>  
> - fail:
> +complete:
>  	drm_gem_object_put_unlocked(&bo->base.base);
>  
>  	return ret;
> diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
> index 6d9be20a32be..0f99ad03614e 100644
> --- a/drivers/gpu/drm/vc4/vc4_drv.c
> +++ b/drivers/gpu/drm/vc4/vc4_drv.c
> @@ -128,8 +128,12 @@ static int vc4_open(struct drm_device *dev, struct drm_file *file)
>  
>  static void vc4_close(struct drm_device *dev, struct drm_file *file)
>  {
> +	struct vc4_dev *vc4 = to_vc4_dev(dev);
>  	struct vc4_file *vc4file = file->driver_priv;
>  
> +	if (vc4file->bin_bo_used)
> +		vc4_v3d_bin_bo_put(vc4);
> +
>  	vc4_perfmon_close_file(vc4file);
>  	kfree(vc4file);
>  }
> @@ -274,6 +278,8 @@ static int vc4_drm_bind(struct device *dev)
>  	drm->dev_private = vc4;
>  	INIT_LIST_HEAD(&vc4->debugfs_list);
>  
> +	mutex_init(&vc4->bin_bo_lock);
> +
>  	ret = vc4_bo_cache_init(drm);
>  	if (ret)
>  		goto dev_put;
> diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
> index 4f13f6262491..5bfca83deb8e 100644
> --- a/drivers/gpu/drm/vc4/vc4_drv.h
> +++ b/drivers/gpu/drm/vc4/vc4_drv.h
> @@ -216,6 +216,11 @@ struct vc4_dev {
>  	 * the minor is available (after drm_dev_register()).
>  	 */
>  	struct list_head debugfs_list;
> +
> +	/* Mutex for binner bo allocation. */
> +	struct mutex bin_bo_lock;
> +	/* Reference count for our binner bo. */
> +	struct kref bin_bo_kref;
>  };
>  
>  static inline struct vc4_dev *
> @@ -584,6 +589,11 @@ struct vc4_exec_info {
>  	 * NULL otherwise.
>  	 */
>  	struct vc4_perfmon *perfmon;
> +
> +	/* Whether the exec has taken a reference to the binner BO, which should
> +	 * happen with a VC4_PACKET_TILE_BINNING_MODE_CONFIG packet.
> +	 */
> +	bool bin_bo_used;
>  };
>  
>  /* Per-open file private data. Any driver-specific resource that has to be
> @@ -594,6 +604,8 @@ struct vc4_file {
>  		struct idr idr;
>  		struct mutex lock;
>  	} perfmon;
> +
> +	bool bin_bo_used;
>  };

> diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
> index 723dc86b4511..e226c24e543f 100644
> --- a/drivers/gpu/drm/vc4/vc4_irq.c
> +++ b/drivers/gpu/drm/vc4/vc4_irq.c
> @@ -59,18 +59,22 @@ vc4_overflow_mem_work(struct work_struct *work)
>  {
>  	struct vc4_dev *vc4 =
>  		container_of(work, struct vc4_dev, overflow_mem_work);
> -	struct vc4_bo *bo = vc4->bin_bo;
> +	struct vc4_bo *bo;
>  	int bin_bo_slot;
>  	struct vc4_exec_info *exec;
>  	unsigned long irqflags;
>  
> -	if (!bo)
> -		return;
> +	mutex_lock(&vc4->bin_bo_lock);
> +
> +	if (!vc4->bin_bo)
> +		goto complete;
> +
> +	bo = vc4->bin_bo;
>  
>  	bin_bo_slot = vc4_v3d_get_bin_slot(vc4);
>  	if (bin_bo_slot < 0) {
>  		DRM_ERROR("Couldn't allocate binner overflow mem\n");
> -		return;
> +		goto complete;
>  	}
>  
>  	spin_lock_irqsave(&vc4->job_lock, irqflags);
> @@ -101,6 +105,9 @@ vc4_overflow_mem_work(struct work_struct *work)
>  	V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
>  	V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
>  	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
> +
> +complete:
> +	mutex_unlock(&vc4->bin_bo_lock);
>  }
>  
>  static void
> @@ -252,8 +259,10 @@ vc4_irq_postinstall(struct drm_device *dev)
>  	if (!vc4->v3d)
>  		return 0;
>  
> -	/* Enable both the render done and out of memory interrupts. */
> -	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
> +	/* Enable the render done interrupts. The out-of-memory interrupt is
> +	 * enabled as soon as we have a binner BO allocated.
> +	 */
> +	V3D_WRITE(V3D_INTENA, V3D_INT_FLDONE | V3D_INT_FRDONE);
>  
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
> index c16db4665af6..55abaae71856 100644
> --- a/drivers/gpu/drm/vc4/vc4_v3d.c
> +++ b/drivers/gpu/drm/vc4/vc4_v3d.c
> @@ -294,6 +294,14 @@ static int bin_bo_alloc(struct vc4_dev *vc4)
>  			WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 !=
>  				     bo->base.base.size / vc4->bin_alloc_size);
>  
> +			kref_init(&vc4->bin_bo_kref);
> +
> +			/* Enable the out-of-memory interrupt to set our
> +			 * newly-allocated binner BO, potentially from an
> +			 * already-pending-but-masked interupt.
> +			 */
> +			V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
> +
>  			break;
>  		}
>  
> @@ -313,6 +321,43 @@ static int bin_bo_alloc(struct vc4_dev *vc4)
>  	return ret;
>  }
>  
> +int vc4_v3d_bin_bo_get(struct vc4_dev *vc4)
> +{
> +	int ret = 0;
> +
> +	mutex_lock(&vc4->bin_bo_lock);
> +
> +	if (vc4->bin_bo) {
> +		kref_get(&vc4->bin_bo_kref);
> +		goto complete;
> +	}
> +
> +	ret = bin_bo_alloc(vc4);
> +
> +complete:
> +	mutex_unlock(&vc4->bin_bo_lock);
> +
> +	return ret;
> +}
> +
> +static void bin_bo_release(struct kref *ref)
> +{
> +	struct vc4_dev *vc4 = container_of(ref, struct vc4_dev, bin_bo_kref);
> +
> +	if (!vc4->bin_bo)
> +		return;

Could we WARN_ON_ONCE instead of returning silenty?  If we're going from
1->0 refcount without a bin_bo allocated, something has gone terribly
wrong and we want to know.

> +	drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
> +	vc4->bin_bo = NULL;
> +}