From mboxrd@z Thu Jan 1 00:00:00 1970 From: Daniel Vetter Subject: Re: [PATCH 5/5] drm/i915:Use the coarse mechanism based on drm fd to dispatch the BSD command on BDW GT3 Date: Wed, 9 Apr 2014 16:34:27 +0200 Message-ID: <20140409143427.GD9262@phenom.ffwll.local> References: <1397008796-3595-1-git-send-email-yakui.zhao@intel.com> <1397008796-3595-6-git-send-email-yakui.zhao@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from mail-ee0-f43.google.com (mail-ee0-f43.google.com [74.125.83.43]) by gabe.freedesktop.org (Postfix) with ESMTP id BB6E46E041 for ; Wed, 9 Apr 2014 07:34:30 -0700 (PDT) Received: by mail-ee0-f43.google.com with SMTP id e53so1969698eek.2 for ; Wed, 09 Apr 2014 07:34:29 -0700 (PDT) Content-Disposition: inline In-Reply-To: <1397008796-3595-6-git-send-email-yakui.zhao@intel.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" To: Zhao Yakui Cc: intel-gfx@lists.freedesktop.org List-Id: intel-gfx@lists.freedesktop.org On Wed, Apr 09, 2014 at 09:59:56AM +0800, Zhao Yakui wrote: > The BDW GT3 has two independent BSD rings, which can be used to process the > video commands. To be simpler, it is transparent to user-space driver/middleware. > Instead the kernel driver will decide which ring is to dispatch the BSD video > command. > > As every BSD ring is powerful, it is enough to dispatch the BSD video command > based on the drm fd. In such case the different BSD ring is used for video playing > back and encoding. At the same time the coarse dispatch mechanism can help to avoid > the object synchronization between the BSD rings. > > Signed-off-by: Zhao Yakui This looks way too complicated. First things first please get rid of the atomic_t usage. If you don't have _massive_ comments explaining the memory barriers you're most likely using linux kernel atomic_t wrong. They are fully unordered. With that out of the way this still looks a bit complicated really. Can't we just use a very simple static rule in gen8_dispatch_bsd_ring which hashed the pointer address of the file_priv? Just to get things going, once we have a clear need we can try to make things more intelligent. But in case of doubt I really prefer if we start with the dumbest possible approach first and add complexity instead of starting with something really complex and simplifying it. -Daniel > --- > drivers/gpu/drm/i915/i915_dma.c | 14 ++++++ > drivers/gpu/drm/i915/i915_drv.h | 3 ++ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 73 +++++++++++++++++++++++++++- > drivers/gpu/drm/i915/intel_ringbuffer.c | 2 + > drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + > 5 files changed, 93 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index 0b38f88..8260463 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) > spin_lock_init(&dev_priv->backlight_lock); > spin_lock_init(&dev_priv->uncore.lock); > spin_lock_init(&dev_priv->mm.object_stat_lock); > + spin_lock_init(&dev_priv->bsd_lock); > mutex_init(&dev_priv->dpio_lock); > mutex_init(&dev_priv->modeset_restore_lock); > > @@ -1928,7 +1929,20 @@ void i915_driver_preclose(struct drm_device * dev, struct drm_file *file_priv) > void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) > { > struct drm_i915_file_private *file_priv = file->driver_priv; > + struct intel_ring_buffer *bsd_ring; > + struct drm_i915_private *dev_priv = dev->dev_private; > > + if (file_priv && file_priv->bsd_ring) { > + int cmd_counter; > + bsd_ring = file_priv->bsd_ring; > + file_priv->bsd_ring = NULL; > + spin_lock(&dev_priv->bsd_lock); > + cmd_counter = atomic_sub_return(1, &bsd_ring->bsd_cmd_counter); > + if (cmd_counter < 0) { > + atomic_set(&bsd_ring->bsd_cmd_counter, 0); > + } > + spin_unlock(&dev_priv->bsd_lock); > + } > kfree(file_priv); > } > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index d77f4e0..128639c 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1457,6 +1457,8 @@ struct drm_i915_private { > struct i915_dri1_state dri1; > /* Old ums support infrastructure, same warning applies. */ > struct i915_ums_state ums; > + /* the lock for dispatch video commands on two BSD rings */ > + spinlock_t bsd_lock; > }; > > static inline struct drm_i915_private *to_i915(const struct drm_device *dev) > @@ -1664,6 +1666,7 @@ struct drm_i915_file_private { > > struct i915_hw_context *private_default_ctx; > atomic_t rps_wait_boost; > + struct intel_ring_buffer *bsd_ring; > }; > > /* > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 3491402..75d8cc0 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -999,6 +999,70 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, > return 0; > } > > +/** > + * Find one BSD ring to dispatch the corresponding BSD command. > + * The Ring ID is returned. > + */ > +static int gen8_dispatch_bsd_ring(struct drm_device *dev, > + struct drm_file *file) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct drm_i915_file_private *file_priv = file->driver_priv; > + struct intel_ring_buffer *temp_ring, *bsd_ring; > + int bsd_counter, temp_counter; > + > + if (file_priv->bsd_ring) { > + /* Check whether the load balance is required.*/ > + spin_lock(&dev_priv->bsd_lock); > + bsd_counter = atomic_read(&(file_priv->bsd_ring->bsd_cmd_counter)); > + temp_ring = &dev_priv->ring[VCS]; > + temp_counter = atomic_read(&temp_ring->bsd_cmd_counter); > + bsd_ring = &dev_priv->ring[VCS]; > + > + temp_ring = &dev_priv->ring[VCS2]; > + if (atomic_read(&temp_ring->bsd_cmd_counter) < temp_counter) { > + temp_counter = atomic_read(&temp_ring->bsd_cmd_counter); > + bsd_ring = temp_ring; > + } > + /* > + * If it is already the ring with the minimum load, it is > + * unnecessary to switch it. > + */ > + if (bsd_ring == file_priv->bsd_ring) { > + spin_unlock(&dev_priv->bsd_lock); > + return bsd_ring->id; > + } > + /* > + * If the load delta between current ring and target ring is > + * small, it is unnecessary to switch it. > + */ > + if ((bsd_counter - temp_counter) <= 1) { > + spin_unlock(&dev_priv->bsd_lock); > + return file_priv->bsd_ring->id; > + } > + /* balance the load between current ring and target ring */ > + atomic_dec(&file_priv->bsd_ring->bsd_cmd_counter); > + atomic_inc(&bsd_ring->bsd_cmd_counter); > + spin_unlock(&dev_priv->bsd_lock); > + file_priv->bsd_ring = bsd_ring; > + return bsd_ring->id; > + } else { > + spin_lock(&dev_priv->bsd_lock); > + bsd_ring = &dev_priv->ring[VCS]; > + bsd_counter = atomic_read(&bsd_ring->bsd_cmd_counter); > + temp_ring = &dev_priv->ring[VCS2]; > + temp_counter = atomic_read(&temp_ring->bsd_cmd_counter); > + if (temp_counter < bsd_counter) { > + bsd_ring = temp_ring; > + bsd_counter = temp_counter; > + } > + atomic_inc(&bsd_ring->bsd_cmd_counter); > + file_priv->bsd_ring = bsd_ring; > + spin_unlock(&dev_priv->bsd_lock); > + return bsd_ring->id; > + } > +} > + > static int > i915_gem_do_execbuffer(struct drm_device *dev, void *data, > struct drm_file *file, > @@ -1043,7 +1107,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > > if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) > ring = &dev_priv->ring[RCS]; > - else > + else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) { > + if (HAS_BSD2(dev)) { > + int ring_id; > + ring_id = gen8_dispatch_bsd_ring(dev, file); > + ring = &dev_priv->ring[ring_id]; > + } else > + ring = &dev_priv->ring[VCS]; > + } else > ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1]; > > if (!intel_ring_initialized(ring)) { > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 43e0227..852fc29 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -2123,6 +2123,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) > ring->dispatch_execbuffer = i965_dispatch_execbuffer; > } > ring->init = init_ring_common; > + atomic_set(&ring->bsd_cmd_counter, 0); > > return intel_init_ring_buffer(dev, ring); > } > @@ -2170,6 +2171,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) > > ring->init = init_ring_common; > > + atomic_set(&ring->bsd_cmd_counter, 0); > return intel_init_ring_buffer(dev, ring); > } > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 8ca4285..4f87b08 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -196,6 +196,8 @@ struct intel_ring_buffer { > * to encode the command length in the header). > */ > u32 (*get_cmd_length_mask)(u32 cmd_header); > + > + atomic_t bsd_cmd_counter; > }; > > static inline bool > -- > 1.7.10.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch