* (no subject)
@ 2018-07-06 14:42 Christian König
2018-07-06 14:42 ` [PATCH 1/6] dma-buf: add caching of sg_table Christian König
` (6 more replies)
0 siblings, 7 replies; 31+ messages in thread
From: Christian König @ 2018-07-06 14:42 UTC (permalink / raw)
To: intel-gfx
Next try of prework for unpinned DMA-buf operation.
Only send to intel-gfx to trigger unit tests on the following patches.
Christian.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 1/6] dma-buf: add caching of sg_table
2018-07-06 14:42 (no subject) Christian König
@ 2018-07-06 14:42 ` Christian König
2018-07-06 14:42 ` [PATCH 2/6] drm: remove prime sg_table caching Christian König
` (5 subsequent siblings)
6 siblings, 0 replies; 31+ messages in thread
From: Christian König @ 2018-07-06 14:42 UTC (permalink / raw)
To: intel-gfx
To allow a smooth transition from pinning buffer objects to dynamic
invalidation we first start to cache the sg_table for an attachment
unless the driver explicitly says to not do so.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/dma-buf/dma-buf.c | 24 ++++++++++++++++++++++++
include/linux/dma-buf.h | 11 +++++++++++
2 files changed, 35 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 13884474d158..0bea5eecf554 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -574,6 +574,20 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
list_add(&attach->node, &dmabuf->attachments);
mutex_unlock(&dmabuf->lock);
+
+ if (!dmabuf->ops->no_sgt_cache) {
+ struct sg_table *sgt;
+
+ sgt = dmabuf->ops->map_dma_buf(attach, DMA_BIDIRECTIONAL);
+ if (!sgt)
+ sgt = ERR_PTR(-ENOMEM);
+ if (IS_ERR(sgt)) {
+ dma_buf_detach(dmabuf, attach);
+ return ERR_CAST(sgt);
+ }
+ attach->sgt = sgt;
+ }
+
return attach;
err_attach:
@@ -596,6 +610,10 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
if (WARN_ON(!dmabuf || !attach))
return;
+ if (attach->sgt)
+ dmabuf->ops->unmap_dma_buf(attach, attach->sgt,
+ DMA_BIDIRECTIONAL);
+
mutex_lock(&dmabuf->lock);
list_del(&attach->node);
if (dmabuf->ops->detach)
@@ -631,6 +649,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
if (WARN_ON(!attach || !attach->dmabuf))
return ERR_PTR(-EINVAL);
+ if (attach->sgt)
+ return attach->sgt;
+
sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
if (!sg_table)
sg_table = ERR_PTR(-ENOMEM);
@@ -658,6 +679,9 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
return;
+ if (attach->sgt == sg_table)
+ return;
+
attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
direction);
}
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 58725f890b5b..6534a6769e17 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -51,6 +51,16 @@ struct dma_buf_attachment;
* @vunmap: [optional] unmaps a vmap from the buffer
*/
struct dma_buf_ops {
+ /**
+ * @no_sgt_cache:
+ *
+ * Flag controlling the caching of the sg_table in the DMA-buf helpers.
+ * If not set the sg_table is created during device attaching, if set
+ * the sg_table is created dynamically when dma_buf_map_attachment() is
+ * called.
+ */
+ bool no_sgt_cache;
+
/**
* @attach:
*
@@ -323,6 +333,7 @@ struct dma_buf_attachment {
struct device *dev;
struct list_head node;
void *priv;
+ struct sg_table *sgt;
};
/**
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH 2/6] drm: remove prime sg_table caching
2018-07-06 14:42 (no subject) Christian König
2018-07-06 14:42 ` [PATCH 1/6] dma-buf: add caching of sg_table Christian König
@ 2018-07-06 14:42 ` Christian König
2018-07-06 14:42 ` [PATCH 3/6] dma-buf: add dma_buf_(un)map_attachment_locked variants v3 Christian König
` (4 subsequent siblings)
6 siblings, 0 replies; 31+ messages in thread
From: Christian König @ 2018-07-06 14:42 UTC (permalink / raw)
To: intel-gfx
That is now done by the DMA-buf helpers instead.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/drm_prime.c | 78 +++++++++++----------------------------------
1 file changed, 18 insertions(+), 60 deletions(-)
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 186db2e4c57a..f6bd37eb2a72 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -86,11 +86,6 @@ struct drm_prime_member {
struct rb_node handle_rb;
};
-struct drm_prime_attachment {
- struct sg_table *sgt;
- enum dma_data_direction dir;
-};
-
static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
struct dma_buf *dma_buf, uint32_t handle)
{
@@ -188,26 +183,17 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri
* @dma_buf: buffer to attach device to
* @attach: buffer attachment data
*
- * Allocates &drm_prime_attachment and calls &drm_driver.gem_prime_pin for
- * device specific attachment. This can be used as the &dma_buf_ops.attach
- * callback.
+ * Calls &drm_driver.gem_prime_pin for device specific handling. This can be
+ * used as the &dma_buf_ops.attach callback.
*
* Returns 0 on success, negative error code on failure.
*/
int drm_gem_map_attach(struct dma_buf *dma_buf,
struct dma_buf_attachment *attach)
{
- struct drm_prime_attachment *prime_attach;
struct drm_gem_object *obj = dma_buf->priv;
struct drm_device *dev = obj->dev;
- prime_attach = kzalloc(sizeof(*prime_attach), GFP_KERNEL);
- if (!prime_attach)
- return -ENOMEM;
-
- prime_attach->dir = DMA_NONE;
- attach->priv = prime_attach;
-
if (!dev->driver->gem_prime_pin)
return 0;
@@ -226,27 +212,9 @@ EXPORT_SYMBOL(drm_gem_map_attach);
void drm_gem_map_detach(struct dma_buf *dma_buf,
struct dma_buf_attachment *attach)
{
- struct drm_prime_attachment *prime_attach = attach->priv;
struct drm_gem_object *obj = dma_buf->priv;
struct drm_device *dev = obj->dev;
- if (prime_attach) {
- struct sg_table *sgt = prime_attach->sgt;
-
- if (sgt) {
- if (prime_attach->dir != DMA_NONE)
- dma_unmap_sg_attrs(attach->dev, sgt->sgl,
- sgt->nents,
- prime_attach->dir,
- DMA_ATTR_SKIP_CPU_SYNC);
- sg_free_table(sgt);
- }
-
- kfree(sgt);
- kfree(prime_attach);
- attach->priv = NULL;
- }
-
if (dev->driver->gem_prime_unpin)
dev->driver->gem_prime_unpin(obj);
}
@@ -292,36 +260,21 @@ void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr
struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach,
enum dma_data_direction dir)
{
- struct drm_prime_attachment *prime_attach = attach->priv;
struct drm_gem_object *obj = attach->dmabuf->priv;
struct sg_table *sgt;
- if (WARN_ON(dir == DMA_NONE || !prime_attach))
+ if (WARN_ON(dir == DMA_NONE))
return ERR_PTR(-EINVAL);
- /* return the cached mapping when possible */
- if (prime_attach->dir == dir)
- return prime_attach->sgt;
-
- /*
- * two mappings with different directions for the same attachment are
- * not allowed
- */
- if (WARN_ON(prime_attach->dir != DMA_NONE))
- return ERR_PTR(-EBUSY);
-
sgt = obj->dev->driver->gem_prime_get_sg_table(obj);
+ if (IS_ERR(sgt))
+ return sgt;
- if (!IS_ERR(sgt)) {
- if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
- DMA_ATTR_SKIP_CPU_SYNC)) {
- sg_free_table(sgt);
- kfree(sgt);
- sgt = ERR_PTR(-ENOMEM);
- } else {
- prime_attach->sgt = sgt;
- prime_attach->dir = dir;
- }
+ if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+ DMA_ATTR_SKIP_CPU_SYNC)) {
+ sg_free_table(sgt);
+ kfree(sgt);
+ sgt = ERR_PTR(-ENOMEM);
}
return sgt;
@@ -334,14 +287,19 @@ EXPORT_SYMBOL(drm_gem_map_dma_buf);
* @sgt: scatterlist info of the buffer to unmap
* @dir: direction of DMA transfer
*
- * Not implemented. The unmap is done at drm_gem_map_detach(). This can be
- * used as the &dma_buf_ops.unmap_dma_buf callback.
+ * This can be used as the &dma_buf_ops.unmap_dma_buf callback.
*/
void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
- /* nothing to be done here */
+ if (!sgt)
+ return;
+
+ dma_unmap_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ sg_free_table(sgt);
+ kfree(sgt);
}
EXPORT_SYMBOL(drm_gem_unmap_dma_buf);
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH 3/6] dma-buf: add dma_buf_(un)map_attachment_locked variants v3
2018-07-06 14:42 (no subject) Christian König
2018-07-06 14:42 ` [PATCH 1/6] dma-buf: add caching of sg_table Christian König
2018-07-06 14:42 ` [PATCH 2/6] drm: remove prime sg_table caching Christian König
@ 2018-07-06 14:42 ` Christian König
2018-07-06 14:42 ` [PATCH 4/6] dma-buf: lock the reservation object during (un)map_dma_buf v3 Christian König
` (3 subsequent siblings)
6 siblings, 0 replies; 31+ messages in thread
From: Christian König @ 2018-07-06 14:42 UTC (permalink / raw)
To: intel-gfx
Add function variants which can be called with the reservation lock
already held.
v2: reordered, add lockdep asserts, fix kerneldoc
v3: rebased on sgt caching
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/dma-buf/dma-buf.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++
include/linux/dma-buf.h | 5 ++++
2 files changed, 68 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 0bea5eecf554..e68322c953a9 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -624,6 +624,43 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
}
EXPORT_SYMBOL_GPL(dma_buf_detach);
+/**
+ * dma_buf_map_attachment_locked - Maps the buffer into _device_ address space
+ * with the reservation lock held. Is a wrapper for map_dma_buf() of the
+ *
+ * Returns the scatterlist table of the attachment;
+ * dma_buf_ops.
+ * @attach: [in] attachment whose scatterlist is to be returned
+ * @direction: [in] direction of DMA transfer
+ *
+ * Returns sg_table containing the scatterlist to be returned; returns ERR_PTR
+ * on error. May return -EINTR if it is interrupted by a signal.
+ *
+ * A mapping must be unmapped by using dma_buf_unmap_attachment_locked(). Note
+ * that the underlying backing storage is pinned for as long as a mapping
+ * exists, therefore users/importers should not hold onto a mapping for undue
+ * amounts of time.
+ */
+struct sg_table *
+dma_buf_map_attachment_locked(struct dma_buf_attachment *attach,
+ enum dma_data_direction direction)
+{
+ struct sg_table *sg_table;
+
+ might_sleep();
+ reservation_object_assert_held(attach->dmabuf->resv);
+
+ if (attach->sgt)
+ return attach->sgt;
+
+ sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
+ if (!sg_table)
+ sg_table = ERR_PTR(-ENOMEM);
+
+ return sg_table;
+}
+EXPORT_SYMBOL_GPL(dma_buf_map_attachment_locked);
+
/**
* dma_buf_map_attachment - Returns the scatterlist table of the attachment;
* mapped into _device_ address space. Is a wrapper for map_dma_buf() of the
@@ -660,6 +697,32 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
}
EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
+/**
+ * dma_buf_unmap_attachment_locked - unmaps the buffer with reservation lock
+ * held, should deallocate the associated scatterlist. Is a wrapper for
+ * unmap_dma_buf() of dma_buf_ops.
+ * @attach: [in] attachment to unmap buffer from
+ * @sg_table: [in] scatterlist info of the buffer to unmap
+ * @direction: [in] direction of DMA transfer
+ *
+ * This unmaps a DMA mapping for @attached obtained by
+ * dma_buf_map_attachment_locked().
+ */
+void dma_buf_unmap_attachment_locked(struct dma_buf_attachment *attach,
+ struct sg_table *sg_table,
+ enum dma_data_direction direction)
+{
+ might_sleep();
+ reservation_object_assert_held(attach->dmabuf->resv);
+
+ if (attach->sgt == sg_table)
+ return;
+
+ attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
+ direction);
+}
+EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment_locked);
+
/**
* dma_buf_unmap_attachment - unmaps and decreases usecount of the buffer;might
* deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 6534a6769e17..a40f8f586a95 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -395,8 +395,13 @@ int dma_buf_fd(struct dma_buf *dmabuf, int flags);
struct dma_buf *dma_buf_get(int fd);
void dma_buf_put(struct dma_buf *dmabuf);
+struct sg_table *dma_buf_map_attachment_locked(struct dma_buf_attachment *,
+ enum dma_data_direction);
struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
enum dma_data_direction);
+void dma_buf_unmap_attachment_locked(struct dma_buf_attachment *,
+ struct sg_table *,
+ enum dma_data_direction);
void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
enum dma_data_direction);
int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH 4/6] dma-buf: lock the reservation object during (un)map_dma_buf v3
2018-07-06 14:42 (no subject) Christian König
` (2 preceding siblings ...)
2018-07-06 14:42 ` [PATCH 3/6] dma-buf: add dma_buf_(un)map_attachment_locked variants v3 Christian König
@ 2018-07-06 14:42 ` Christian König
2018-07-06 14:42 ` [PATCH 5/6] drm/amdgpu: add independent DMA-buf export v3 Christian König
` (2 subsequent siblings)
6 siblings, 0 replies; 31+ messages in thread
From: Christian König @ 2018-07-06 14:42 UTC (permalink / raw)
To: intel-gfx
First step towards unpinned DMA buf operation.
I've checked the DRM drivers to potential locking of the reservation
object, but essentially we need to audit all implementations of the
dma_buf _ops for this to work.
v2: reordered
v3: rebased on sgt caching
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/dma-buf/dma-buf.c | 15 ++++++---------
include/linux/dma-buf.h | 6 ++++++
2 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index e68322c953a9..21608306349d 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -686,10 +686,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
if (WARN_ON(!attach || !attach->dmabuf))
return ERR_PTR(-EINVAL);
- if (attach->sgt)
- return attach->sgt;
-
- sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
+ reservation_object_lock(attach->dmabuf->resv, NULL);
+ sg_table = dma_buf_map_attachment_locked(attach, direction);
+ reservation_object_unlock(attach->dmabuf->resv);
if (!sg_table)
sg_table = ERR_PTR(-ENOMEM);
@@ -742,11 +741,9 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
return;
- if (attach->sgt == sg_table)
- return;
-
- attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
- direction);
+ reservation_object_lock(attach->dmabuf->resv, NULL);
+ dma_buf_unmap_attachment_locked(attach, sg_table, direction);
+ reservation_object_unlock(attach->dmabuf->resv);
}
EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index a40f8f586a95..e964077a7e7e 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -128,6 +128,9 @@ struct dma_buf_ops {
* any other kind of sharing that the exporter might wish to make
* available to buffer-users.
*
+ * This is always called with the dmabuf->resv object locked when
+ * no_sgt_cache is true.
+ *
* Returns:
*
* A &sg_table scatter list of or the backing storage of the DMA buffer,
@@ -148,6 +151,9 @@ struct dma_buf_ops {
* It should also unpin the backing storage if this is the last mapping
* of the DMA buffer, it the exporter supports backing storage
* migration.
+ *
+ * This is always called with the dmabuf->resv object locked when
+ * no_sgt_cache is true.
*/
void (*unmap_dma_buf)(struct dma_buf_attachment *,
struct sg_table *,
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH 5/6] drm/amdgpu: add independent DMA-buf export v3
2018-07-06 14:42 (no subject) Christian König
` (3 preceding siblings ...)
2018-07-06 14:42 ` [PATCH 4/6] dma-buf: lock the reservation object during (un)map_dma_buf v3 Christian König
@ 2018-07-06 14:42 ` Christian König
2018-07-06 14:42 ` [PATCH 6/6] drm/amdgpu: add independent DMA-buf import v4 Christian König
2018-07-06 16:02 ` ✗ Fi.CI.BAT: failure for series starting with [1/6] dma-buf: add caching of sg_table Patchwork
6 siblings, 0 replies; 31+ messages in thread
From: Christian König @ 2018-07-06 14:42 UTC (permalink / raw)
To: intel-gfx
The caching of SGT's is actually quite harmful and should probably removed
altogether when all drivers are audited.
Start by providing a separate DMA-buf export implementation in amdgpu. This is
also a prerequisite of unpinned DMA-buf handling.
v2: fix unintended recursion, remove debugging leftovers
v3: split out from unpinned DMA-buf work
v4: rebase on top of new no_sgt_cache flag
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 -
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 -
drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 94 +++++++++++++------------------
3 files changed, 40 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8eaba0f4db10..269664fd00de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -374,7 +374,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_file *file_priv);
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
struct drm_gem_object *
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 06aede194bf8..04ef967e128c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1138,7 +1138,6 @@ static struct drm_driver kms_driver = {
.gem_prime_export = amdgpu_gem_prime_export,
.gem_prime_import = amdgpu_gem_prime_import,
.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
- .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
.gem_prime_vmap = amdgpu_gem_prime_vmap,
.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 3ed02f472003..75c70ddbc080 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -40,22 +40,6 @@
static const struct dma_buf_ops amdgpu_dmabuf_ops;
-/**
- * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
- * implementation
- * @obj: GEM buffer object
- *
- * Returns:
- * A scatter/gather table for the pinned pages of the buffer object's memory.
- */
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
-{
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- int npages = bo->tbo.num_pages;
-
- return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
-}
-
/**
* amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
* @obj: GEM buffer object
@@ -189,34 +173,29 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
}
/**
- * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
- * @dma_buf: shared DMA buffer
+ * amdgpu_gem_map_dma_buf - &dma_buf_ops.map_dma_buf implementation
* @attach: DMA-buf attachment
+ * @dir: DMA direction
*
* Makes sure that the shared DMA buffer can be accessed by the target device.
* For now, simply pins it to the GTT domain, where it should be accessible by
* all DMA devices.
*
* Returns:
- * 0 on success or negative error code.
+ * sg_table filled with the DMA addresses to use or ERR_PRT with negative error
+ * code.
*/
-static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
- struct dma_buf_attachment *attach)
+static struct sg_table *
+amdgpu_gem_map_dma_buf(struct dma_buf_attachment *attach,
+ enum dma_data_direction dir)
{
+ struct dma_buf *dma_buf = attach->dmabuf;
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct sg_table *sgt;
long r;
- r = drm_gem_map_attach(dma_buf, attach);
- if (r)
- return r;
-
- r = amdgpu_bo_reserve(bo, false);
- if (unlikely(r != 0))
- goto error_detach;
-
-
if (attach->dev->driver != adev->dev->driver) {
/*
* Wait for all shared fences to complete before we switch to future
@@ -227,54 +206,62 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
MAX_SCHEDULE_TIMEOUT);
if (unlikely(r < 0)) {
DRM_DEBUG_PRIME("Fence wait failed: %li\n", r);
- goto error_unreserve;
+ return ERR_PTR(r);
}
}
/* pin buffer into GTT */
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
if (r)
- goto error_unreserve;
+ return ERR_PTR(r);
+
+ sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages, bo->tbo.num_pages);
+ if (IS_ERR(sgt))
+ return sgt;
+
+ if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+ DMA_ATTR_SKIP_CPU_SYNC))
+ goto error_free;
if (attach->dev->driver != adev->dev->driver)
bo->prime_shared_count++;
-error_unreserve:
- amdgpu_bo_unreserve(bo);
+ return sgt;
-error_detach:
- if (r)
- drm_gem_map_detach(dma_buf, attach);
- return r;
+error_free:
+ sg_free_table(sgt);
+ kfree(sgt);
+ return ERR_PTR(-ENOMEM);
}
/**
- * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
- * @dma_buf: shared DMA buffer
+ * amdgpu_gem_unmap_dma_buf - &dma_buf_ops.unmap_dma_buf implementation
* @attach: DMA-buf attachment
+ * @sgt: sg_table to unmap
+ * @dir: DMA direction
*
* This is called when a shared DMA buffer no longer needs to be accessible by
* the other device. For now, simply unpins the buffer from GTT.
*/
-static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
- struct dma_buf_attachment *attach)
+static void amdgpu_gem_unmap_dma_buf(struct dma_buf_attachment *attach,
+ struct sg_table *sgt,
+ enum dma_data_direction dir)
{
+ struct dma_buf *dma_buf = attach->dmabuf;
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- int ret = 0;
-
- ret = amdgpu_bo_reserve(bo, true);
- if (unlikely(ret != 0))
- goto error;
amdgpu_bo_unpin(bo);
+
if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
bo->prime_shared_count--;
- amdgpu_bo_unreserve(bo);
-error:
- drm_gem_map_detach(dma_buf, attach);
+ if (sgt) {
+ dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
+ sg_free_table(sgt);
+ kfree(sgt);
+ }
}
/**
@@ -332,10 +319,9 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
}
static const struct dma_buf_ops amdgpu_dmabuf_ops = {
- .attach = amdgpu_gem_map_attach,
- .detach = amdgpu_gem_map_detach,
- .map_dma_buf = drm_gem_map_dma_buf,
- .unmap_dma_buf = drm_gem_unmap_dma_buf,
+ .no_sgt_cache = true,
+ .map_dma_buf = amdgpu_gem_map_dma_buf,
+ .unmap_dma_buf = amdgpu_gem_unmap_dma_buf,
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_gem_begin_cpu_access,
.map = drm_gem_dmabuf_kmap,
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH 6/6] drm/amdgpu: add independent DMA-buf import v4
2018-07-06 14:42 (no subject) Christian König
` (4 preceding siblings ...)
2018-07-06 14:42 ` [PATCH 5/6] drm/amdgpu: add independent DMA-buf export v3 Christian König
@ 2018-07-06 14:42 ` Christian König
2018-07-06 16:02 ` ✗ Fi.CI.BAT: failure for series starting with [1/6] dma-buf: add caching of sg_table Patchwork
6 siblings, 0 replies; 31+ messages in thread
From: Christian König @ 2018-07-06 14:42 UTC (permalink / raw)
To: intel-gfx
Instead of relying on the DRM functions just implement our own import
functions. This prepares support for taking care of unpinned DMA-buf.
v2: enable for all exporters, not just amdgpu, fix invalidation
handling, lock reservation object while setting callback
v3: change to new dma_buf attach interface
v4: split out from unpinned DMA-buf work
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ----
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 -
drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 38 +++++++++++++++++++------------
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 34 +++++++++++++++++++++++----
4 files changed, 52 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 269664fd00de..31c6edd77f2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -374,10 +374,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_file *file_priv);
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
- struct dma_buf_attachment *attach,
- struct sg_table *sg);
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 04ef967e128c..c632e14257d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1138,7 +1138,6 @@ static struct drm_driver kms_driver = {
.gem_prime_export = amdgpu_gem_prime_export,
.gem_prime_import = amdgpu_gem_prime_import,
.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
- .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
.gem_prime_vmap = amdgpu_gem_prime_vmap,
.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
.gem_prime_mmap = amdgpu_gem_prime_mmap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 75c70ddbc080..13033ff48cb1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -122,31 +122,28 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
}
/**
- * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
- * implementation
+ * amdgpu_gem_prime_create_obj - create BO for DMA-buf import
+ *
* @dev: DRM device
- * @attach: DMA-buf attachment
- * @sg: Scatter/gather table
+ * @dma_buf: DMA-buf
*
- * Import shared DMA buffer memory exported by another device.
+ * Creates an empty SG BO for DMA-buf import.
*
* Returns:
* A new GEM buffer object of the given DRM device, representing the memory
* described by the given DMA-buf attachment and scatter/gather table.
*/
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
- struct dma_buf_attachment *attach,
- struct sg_table *sg)
+static struct drm_gem_object *
+amdgpu_gem_prime_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
{
- struct reservation_object *resv = attach->dmabuf->resv;
+ struct reservation_object *resv = dma_buf->resv;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
int ret;
memset(&bp, 0, sizeof(bp));
- bp.size = attach->dmabuf->size;
+ bp.size = dma_buf->size;
bp.byte_align = PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_CPU;
bp.flags = 0;
@@ -157,11 +154,9 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
if (ret)
goto error;
- bo->tbo.sg = sg;
- bo->tbo.ttm->sg = sg;
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
- if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
+ if (dma_buf->ops != &amdgpu_dmabuf_ops)
bo->prime_shared_count = 1;
ww_mutex_unlock(&resv->lock);
@@ -377,6 +372,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf)
{
+ struct dma_buf_attachment *attach;
struct drm_gem_object *obj;
if (dma_buf->ops == &amdgpu_dmabuf_ops) {
@@ -391,5 +387,17 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
}
}
- return drm_gem_prime_import(dev, dma_buf);
+ obj = amdgpu_gem_prime_create_obj(dev, dma_buf);
+ if (IS_ERR(obj))
+ return obj;
+
+ attach = dma_buf_attach(dma_buf, dev->dev);
+ if (IS_ERR(attach)) {
+ drm_gem_object_put(obj);
+ return ERR_CAST(attach);
+ }
+
+ get_dma_buf(dma_buf);
+ obj->import_attach = attach;
+ return obj;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0e47f15b0f97..ee85e49e3b07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
#include <linux/pagemap.h>
#include <linux/debugfs.h>
#include <linux/iommu.h>
+#include <linux/dma-buf.h>
#include "amdgpu.h"
#include "amdgpu_object.h"
#include "amdgpu_trace.h"
@@ -798,6 +799,7 @@ struct amdgpu_ttm_gup_task_list {
struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
+ struct drm_gem_object *gobj;
u64 offset;
uint64_t userptr;
struct task_struct *usertask;
@@ -1222,6 +1224,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
return NULL;
}
gtt->ttm.ttm.func = &amdgpu_backend_func;
+ gtt->gobj = &ttm_to_amdgpu_bo(bo)->gem_base;
/* allocate space for the uninitialized page entries */
if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) {
@@ -1242,7 +1245,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
if (gtt && gtt->userptr) {
@@ -1255,7 +1257,20 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
return 0;
}
- if (slave && ttm->sg) {
+ if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
+ if (!ttm->sg) {
+ struct dma_buf_attachment *attach;
+ struct sg_table *sgt;
+
+ attach = gtt->gobj->import_attach;
+ sgt = dma_buf_map_attachment_locked(attach,
+ DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt))
+ return PTR_ERR(sgt);
+
+ ttm->sg = sgt;
+ }
+
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
gtt->ttm.dma_address,
ttm->num_pages);
@@ -1282,9 +1297,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
*/
static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
{
- struct amdgpu_device *adev;
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+ struct amdgpu_device *adev;
if (gtt && gtt->userptr) {
amdgpu_ttm_tt_set_user_pages(ttm, NULL);
@@ -1293,7 +1307,17 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
return;
}
- if (slave)
+ if (ttm->sg && gtt->gobj->import_attach) {
+ struct dma_buf_attachment *attach;
+
+ attach = gtt->gobj->import_attach;
+ dma_buf_unmap_attachment_locked(attach, ttm->sg,
+ DMA_BIDIRECTIONAL);
+ ttm->sg = NULL;
+ return;
+ }
+
+ if (ttm->page_flags & TTM_PAGE_FLAG_SG)
return;
adev = amdgpu_ttm_adev(ttm->bdev);
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 31+ messages in thread
* ✗ Fi.CI.BAT: failure for series starting with [1/6] dma-buf: add caching of sg_table
2018-07-06 14:42 (no subject) Christian König
` (5 preceding siblings ...)
2018-07-06 14:42 ` [PATCH 6/6] drm/amdgpu: add independent DMA-buf import v4 Christian König
@ 2018-07-06 16:02 ` Patchwork
6 siblings, 0 replies; 31+ messages in thread
From: Patchwork @ 2018-07-06 16:02 UTC (permalink / raw)
To: Christian König; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/6] dma-buf: add caching of sg_table
URL : https://patchwork.freedesktop.org/series/46080/
State : failure
== Summary ==
Applying: dma-buf: add caching of sg_table
Applying: drm: remove prime sg_table caching
Applying: dma-buf: add dma_buf_(un)map_attachment_locked variants v3
Applying: dma-buf: lock the reservation object during (un)map_dma_buf v3
Applying: drm/amdgpu: add independent DMA-buf export v3
error: sha1 information is lacking or useless (drivers/gpu/drm/amd/amdgpu/amdgpu.h).
error: could not build fake ancestor
Patch failed at 0005 drm/amdgpu: add independent DMA-buf export v3
Use 'git am --show-current-patch' to see the failed patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* (no subject)
@ 2018-07-05 10:38 rosdi ablatiff
0 siblings, 0 replies; 31+ messages in thread
From: rosdi ablatiff @ 2018-07-05 10:38 UTC (permalink / raw)
To: intel-gfx
[-- Attachment #1.1: Type: text/plain, Size: 1 bytes --]
[-- Attachment #1.2: Type: text/html, Size: 1 bytes --]
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* (no subject)
@ 2017-01-16 16:28 Tony Whittam
0 siblings, 0 replies; 31+ messages in thread
From: Tony Whittam @ 2017-01-16 16:28 UTC (permalink / raw)
To: intel-gfx
[-- Attachment #1.1: Type: text/plain, Size: 2520 bytes --]
Hi everyone,
I don't know if this is too specialised for this list. Anyway, no harm in
asking the question :-)
*Preamble*
Build: Yocto from the Apollo Lake BSP release *gold, *
Hardware: Oxbow Hill Rev B CRB with Intel Atom E3950 and 4GB DDR3 RAM (one
SODIMM)
Build: core-image-sato-sdk
Installed on the onboard eMMC.
OpenCL: installed user space drivers from SRB4 https://software.intel.
com/file/533571/download
I'm currently evaluating the Apollo Lake platform as a candidate to run our
embedded application. We already have this application running on less
powerful ARM based Linux systems with Mali GPU using OpenCL 1.2. We're now
evaluating the E3950 as a faster alternative. To evaluate the application I
need OpenCL 1.2 or later.
To verify the OpenCL installation I have built and run the Intel demo apps:
CapsBasic and Bitonic Sort. CapsBasic sees two devices: CPU and GPU and
Bitonic sort can run its kernels correctly on both the CPU and the GPU.
*The issue*
Simply put, the application has
- thread 1 (feeder): has a loop that feeds data into OpenCL and queues
kernels
- thread 2 (consumer): waits for results and reads output data.
- an OpenCL Host command queue with out-of-order execution enabled
When I run my app and select the GPU OpenCL device, the feeder thread *stalls
inside a blocking call to clEnqueueMapBuffer(). *At this point only one
thing has been queued on the command queue: a buffer unmap command for a
different buffer. This unmap is waiting for an OpenCL event that will
indicate data ready to be processed.
In contrast, when I run my app and select the *CPU OpenCL *device, it works
perfectly.
Does anyone have any ideas on
1. what might be causing this problem running with the GPU?
2. how to debug this on the Yocto platform?
Best regards,
Tony
--
Tony Whittam
Rapt Touch
--
Confidentiality Notice:
The information contained in this message, including any attachments
hereto, may be confidential and is intended to be read only by the
individual or entity to whom this message is addressed. If the reader of
this message is not the intended recipient or an agent or designee of the
intended recipient, please note that any review, use, disclosure or
distribution of this message or its attachments, in any form, is strictly
prohibited. If you have received this message in error, please immediately
notify the sender and/or Rapt Touch Ltd via email at info@rapttouch.com and
delete or destroy any copy of this message and its attachments.
[-- Attachment #1.2: Type: text/html, Size: 4121 bytes --]
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH i-g-t v5 1/4] lib: add igt_dummyload
@ 2016-11-11 16:16 Daniel Vetter
2016-11-14 18:24 ` (no subject) Abdiel Janulgue
0 siblings, 1 reply; 31+ messages in thread
From: Daniel Vetter @ 2016-11-11 16:16 UTC (permalink / raw)
To: Abdiel Janulgue; +Cc: Daniel Vetter, intel-gfx
On Fri, Nov 11, 2016 at 07:41:10PM +0200, Abdiel Janulgue wrote:
> A lot of igt testcases need some GPU workload to make sure a race
> window is big enough. Unfortunately having a fixed amount of
> workload leads to spurious test failures or overtly long runtimes
> on some fast/slow platforms. This library contains functionality
> to submit GPU workloads that should consume exactly a specific
> amount of time.
>
> v2 : Add recursive batch feature from Chris
> v3 : Drop auto-tuned stuff. Add bo dependecy to recursive batch
> by adding a dummy reloc to the bo as suggested by Ville.
> v4: Fix dependency reloc as write instead of read (Ville).
> Fix wrong handling of batchbuffer start on ILK causing
> test failure
>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> ---
> lib/Makefile.sources | 2 +
> lib/igt.h | 1 +
> lib/igt_dummyload.c | 276 +++++++++++++++++++++++++++++++++++++++++++++++++++
> lib/igt_dummyload.h | 42 ++++++++
Did you check that your new docs do show up in the generated
documentation? Iirc you need to edit some xml under docs/.
-Daniel
> 4 files changed, 321 insertions(+)
> create mode 100644 lib/igt_dummyload.c
> create mode 100644 lib/igt_dummyload.h
>
> diff --git a/lib/Makefile.sources b/lib/Makefile.sources
> index e8e277b..7fc5ec2 100644
> --- a/lib/Makefile.sources
> +++ b/lib/Makefile.sources
> @@ -75,6 +75,8 @@ lib_source_list = \
> igt_draw.h \
> igt_pm.c \
> igt_pm.h \
> + igt_dummyload.c \
> + igt_dummyload.h \
> uwildmat/uwildmat.h \
> uwildmat/uwildmat.c \
> $(NULL)
> diff --git a/lib/igt.h b/lib/igt.h
> index d751f24..a0028d5 100644
> --- a/lib/igt.h
> +++ b/lib/igt.h
> @@ -32,6 +32,7 @@
> #include "igt_core.h"
> #include "igt_debugfs.h"
> #include "igt_draw.h"
> +#include "igt_dummyload.h"
> #include "igt_fb.h"
> #include "igt_gt.h"
> #include "igt_kms.h"
> diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
> new file mode 100644
> index 0000000..b934fd5
> --- /dev/null
> +++ b/lib/igt_dummyload.c
> @@ -0,0 +1,276 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "igt.h"
> +#include "igt_dummyload.h"
> +#include <time.h>
> +#include <signal.h>
> +#include <sys/syscall.h>
> +
> +/**
> + * SECTION:igt_dummyload
> + * @short_description: Library for submitting GPU workloads
> + * @title: Dummyload
> + * @include: igt.h
> + *
> + * A lot of igt testcases need some GPU workload to make sure a race window is
> + * big enough. Unfortunately having a fixed amount of workload leads to
> + * spurious test failures or overtly long runtimes on some fast/slow platforms.
> + * This library contains functionality to submit GPU workloads that should
> + * consume exactly a specific amount of time.
> + */
> +
> +#define NSEC_PER_SEC 1000000000L
> +
> +#define gettid() syscall(__NR_gettid)
> +#define sigev_notify_thread_id _sigev_un._tid
> +
> +#define LOCAL_I915_EXEC_BSD_SHIFT (13)
> +#define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> +
> +#define ENGINE_MASK (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
> +
> +static void
> +fill_object(struct drm_i915_gem_exec_object2 *obj, uint32_t gem_handle,
> + struct drm_i915_gem_relocation_entry *relocs, uint32_t count)
> +{
> + memset(obj, 0, sizeof(*obj));
> + obj->handle = gem_handle;
> + obj->relocation_count = count;
> + obj->relocs_ptr = (uintptr_t)relocs;
> +}
> +
> +static void
> +fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
> + uint32_t gem_handle, uint32_t offset,
> + uint32_t read_domains, uint32_t write_domains)
> +{
> + reloc->target_handle = gem_handle;
> + reloc->delta = 0;
> + reloc->offset = offset * sizeof(uint32_t);
> + reloc->presumed_offset = 0;
> + reloc->read_domains = read_domains;
> + reloc->write_domain = write_domains;
> +}
> +
> +
> +static uint32_t *batch;
> +
> +static uint32_t emit_recursive_batch(int fd, int engine, unsigned dep_handle)
> +{
> + const int gen = intel_gen(intel_get_drm_devid(fd));
> + struct drm_i915_gem_exec_object2 obj[2];
> + struct drm_i915_gem_relocation_entry relocs[2];
> + struct drm_i915_gem_execbuffer2 execbuf;
> + unsigned engines[16];
> + unsigned nengine, handle;
> + int i = 0, reloc_count = 0, buf_count = 0;
> +
> + buf_count = 0;
> + nengine = 0;
> + if (engine < 0) {
> + for_each_engine(fd, engine)
> + if (engine)
> + engines[nengine++] = engine;
> + } else {
> + igt_require(gem_has_ring(fd, engine));
> + engines[nengine++] = engine;
> + }
> + igt_require(nengine);
> +
> + memset(&execbuf, 0, sizeof(execbuf));
> + memset(obj, 0, sizeof(obj));
> + memset(relocs, 0, sizeof(relocs));
> +
> + execbuf.buffers_ptr = (uintptr_t) obj;
> + handle = gem_create(fd, 4096);
> + batch = gem_mmap__gtt(fd, handle, 4096, PROT_WRITE);
> + gem_set_domain(fd, handle,
> + I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +
> + if (nengine == 1 && dep_handle > 0) {
> + /* dummy write to dependency */
> + fill_object(&obj[buf_count], dep_handle, NULL, 0);
> + buf_count++;
> +
> + fill_reloc(&relocs[reloc_count], dep_handle, i,
> + I915_GEM_DOMAIN_RENDER,
> + I915_GEM_DOMAIN_RENDER);
> + batch[i++] = 0; /* reloc */
> + reloc_count++;
> + batch[i++] = MI_NOOP;
> + }
> +
> + if (gen >= 8) {
> + batch[i++] = MI_BATCH_BUFFER_START | 1 << 8 | 1;
> + /* recurse */
> + fill_reloc(&relocs[reloc_count], handle, i,
> + I915_GEM_DOMAIN_COMMAND, 0);
> + batch[i++] = 0;
> + batch[i++] = 0;
> + } else if (gen >= 6) {
> + batch[i++] = MI_BATCH_BUFFER_START | 1 << 8;
> + /* recurse */
> + fill_reloc(&relocs[reloc_count], handle, i,
> + I915_GEM_DOMAIN_COMMAND, 0);
> + batch[i++] = 0;
> + } else {
> + batch[i++] = MI_BATCH_BUFFER_START | 2 << 6 |
> + ((gen < 4) ? 1 : 0);
> + /* recurse */
> + fill_reloc(&relocs[reloc_count], handle, i,
> + I915_GEM_DOMAIN_COMMAND, 0);
> + batch[i++] = 0;
> + if (gen < 4)
> + relocs[reloc_count].delta = 1;
> + }
> + reloc_count++;
> +
> + fill_object(&obj[buf_count], handle, relocs, reloc_count);
> + buf_count++;
> +
> + for (i = 0; i < nengine; i++) {
> + execbuf.flags &= ~ENGINE_MASK;
> + execbuf.flags = engines[i];
> + execbuf.buffer_count = buf_count;
> + gem_execbuf(fd, &execbuf);
> + }
> +
> + return handle;
> +}
> +
> +static void sigiter(int sig, siginfo_t *info, void *arg)
> +{
> + *batch = MI_BATCH_BUFFER_END;
> + __sync_synchronize();
> +}
> +
> +static timer_t setup_batch_exit_timer(int64_t ns)
> +{
> + timer_t timer;
> + struct sigevent sev;
> + struct sigaction act;
> + struct itimerspec its;
> +
> + memset(&sev, 0, sizeof(sev));
> + sev.sigev_notify = SIGEV_SIGNAL | SIGEV_THREAD_ID;
> + sev.sigev_notify_thread_id = gettid();
> + sev.sigev_signo = SIGRTMIN + 1;
> + igt_assert(timer_create(CLOCK_MONOTONIC, &sev, &timer) == 0);
> + igt_assert(timer > 0);
> +
> + memset(&act, 0, sizeof(act));
> + act.sa_sigaction = sigiter;
> + act.sa_flags = SA_SIGINFO;
> + igt_assert(sigaction(SIGRTMIN + 1, &act, NULL) == 0);
> +
> + memset(&its, 0, sizeof(its));
> + its.it_value.tv_sec = ns / NSEC_PER_SEC;
> + its.it_value.tv_nsec = ns % NSEC_PER_SEC;
> + igt_assert(timer_settime(timer, 0, &its, NULL) == 0);
> +
> + return timer;
> +}
> +
> +/**
> + * igt_spin_batch:
> + * @fd: open i915 drm file descriptor
> + * @ns: amount of time in nanoseconds the batch executes after terminating.
> + * If value is less than 0, execute batch forever.
> + * @engine: Ring to execute batch OR'd with execbuf flags. If value is less
> + * than 0, execute on all available rings.
> + * @dep_handle: handle to a buffer object dependency. If greater than 0, add a
> + * relocation entry to this buffer within the batch.
> + *
> + * Start a recursive batch on a ring that terminates after an exact amount
> + * of time has elapsed. Immediately returns a #igt_spin_t that contains the
> + * batch's handle that can be waited upon. The returned structure must be passed to
> + * igt_post_spin_batch() for post-processing.
> + *
> + * Returns:
> + * Structure with helper internal state for igt_post_spin_batch().
> + */
> +igt_spin_t igt_spin_batch(int fd, int64_t ns, int engine, unsigned dep_handle)
> +{
> + timer_t timer;
> + uint32_t handle = emit_recursive_batch(fd, engine, dep_handle);
> + int64_t wait_timeout = 0;
> + igt_assert_eq(gem_wait(fd, handle, &wait_timeout), -ETIME);
> +
> + if (ns < 1) {
> + if (ns == 0) {
> + *batch = MI_BATCH_BUFFER_END;
> + __sync_synchronize();
> + return (igt_spin_t){ handle, batch, 0};
> + }
> + return (igt_spin_t){ handle, batch, 0 };
> + }
> + timer = setup_batch_exit_timer(ns);
> +
> + return (igt_spin_t){ handle, batch, timer };
> +}
> +
> +/**
> + * igt_post_spin_batch:
> + * @fd: open i915 drm file descriptor
> + * @arg: spin batch state from igt_spin_batch()
> + *
> + * This function does the necessary post-processing after starting a recursive
> + * batch with igt_spin_batch().
> + */
> +void igt_post_spin_batch(int fd, igt_spin_t arg)
> +{
> + if (arg.handle == 0)
> + return;
> +
> + if (arg.timer > 0)
> + timer_delete(arg.timer);
> +
> + gem_close(fd, arg.handle);
> + munmap(arg.batch, 4096);
> +}
> +
> +
> +/**
> + * igt_spin_batch_wait:
> + * @fd: open i915 drm file descriptor
> + * @ns: amount of time in nanoseconds the batch executes after terminating.
> + * If value is less than 0, execute batch forever.
> + * @engine: ring to execute batch OR'd with execbuf flags. If value is less
> + * than 0, execute on all available rings.
> + * @dep_handle: handle to a buffer object dependency. If greater than 0, include
> + * this buffer on the wait dependency
> + *
> + * This is similar to igt_spin_batch(), but waits on the recursive batch to finish
> + * instead of returning right away. The function also does the necessary
> + * post-processing automatically if set to timeout.
> + */
> +void igt_spin_batch_wait(int fd, int64_t ns, int engine, unsigned dep_handle)
> +{
> + igt_spin_t spin = igt_spin_batch(fd, ns, engine, dep_handle);
> + int64_t wait_timeout = ns + (0.5 * NSEC_PER_SEC);
> + igt_assert_eq(gem_wait(fd, spin.handle, &wait_timeout), 0);
> +
> + igt_post_spin_batch(fd, spin);
> +}
> diff --git a/lib/igt_dummyload.h b/lib/igt_dummyload.h
> new file mode 100644
> index 0000000..79ead2c
> --- /dev/null
> +++ b/lib/igt_dummyload.h
> @@ -0,0 +1,42 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __IGT_DUMMYLOAD_H__
> +#define __IGT_DUMMYLOAD_H__
> +
> +typedef struct igt_spin {
> + unsigned handle;
> + uint32_t *batch;
> + timer_t timer;
> +} igt_spin_t;
> +
> +
> +igt_spin_t igt_spin_batch(int fd, int64_t ns, int engine, unsigned dep_handle);
> +
> +void igt_post_spin_batch(int fd, igt_spin_t arg);
> +
> +void igt_spin_batch_wait(int fd, int64_t ns, int engine, unsigned dep_handle);
> +
> +
> +#endif /* __IGT_DUMMYLOAD_H__ */
> --
> 2.7.0
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* (no subject)
2016-11-11 16:16 [PATCH i-g-t v5 1/4] lib: add igt_dummyload Daniel Vetter
@ 2016-11-14 18:24 ` Abdiel Janulgue
0 siblings, 0 replies; 31+ messages in thread
From: Abdiel Janulgue @ 2016-11-14 18:24 UTC (permalink / raw)
To: intel-gfx
On 11.11.2016 18:16, Daniel Vetter wrote:
> On Fri, Nov 11, 2016 at 07:41:10PM +0200, Abdiel Janulgue wrote:
>> A lot of igt testcases need some GPU workload to make sure a race
>> window is big enough. Unfortunately having a fixed amount of
>> workload leads to spurious test failures or overtly long runtimes
>> on some fast/slow platforms. This library contains functionality
>> to submit GPU workloads that should consume exactly a specific
>> amount of time.
>>
>> v2 : Add recursive batch feature from Chris
>> v3 : Drop auto-tuned stuff. Add bo dependecy to recursive batch
>> by adding a dummy reloc to the bo as suggested by Ville.
>> v4: Fix dependency reloc as write instead of read (Ville).
>> Fix wrong handling of batchbuffer start on ILK causing
>> test failure
>>
>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
>> ---
>> lib/Makefile.sources | 2 +
>> lib/igt.h | 1 +
>> lib/igt_dummyload.c | 276 +++++++++++++++++++++++++++++++++++++++++++++++++++
>> lib/igt_dummyload.h | 42 ++++++++
>
> Did you check that your new docs do show up in the generated
> documentation? Iirc you need to edit some xml under docs/.
> -Daniel
>
Yeah I missed that. Updated now to include the docs in generated
documentation.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH 2/2] drm/i915: Rework order of operations in {__intel, logical}_ring_prepare()
2015-06-12 17:09 ` [PATCH v2] Resolve issues with ringbuffer space management Dave Gordon
@ 2015-06-12 17:09 Dave Gordon
[not found] ` <1433789441-8295-1-git-send-email-david.s.gordon@intel.com>
0 siblings, 1 reply; 31+ messages in thread
From: Dave Gordon @ 2015-06-12 17:09 UTC (permalink / raw)
To: intel-gfx
The original idea of preallocating the OLR was implemented in
> 9d773091 drm/i915: Preallocate next seqno before touching the ring
and the sequence of operations was to allocate the OLR, then wrap past
the end of the ring if necessary, then wait for space if necessary.
But subsequently intel_ring_begin() was refactored, in
> 304d695 drm/i915: Flush outstanding requests before allocating new seqno
to ensure that pending work that might need to be flushed used the old
and not the newly-allocated request. This changed the sequence to wrap
and/or wait, then allocate, although the comment still said
/* Preallocate the olr before touching the ring */
which was no longer true as intel_wrap_ring_buffer() touches the ring.
However, with the introduction of dynamic pinning, in
> 7ba717c drm/i915/bdw: Pin the ringbuffer backing object to GGTT on-demand
came the possibility that the ringbuffer might not be pinned to the GTT
or mapped into CPU address space when intel_ring_begin() is called. It
gets pinned when the request is allocated, so it's now important that
this comes *before* anything that can write into the ringbuffer, in this
case intel_wrap_ring_buffer(), as this will fault if (a) the ringbuffer
happens not to be mapped, and (b) tail happens to be sufficiently close
to the end of the ring to trigger wrapping.
So the correct order is neither the original allocate-wait-pad-wait, nor
the subsequent wait-pad-wait-allocate, but wait-allocate-pad, avoiding
both the problems described in the two commits mentioned above.
As a bonus, we eliminate the special case where a single ring_begin()
might end up waiting twice (once to be able to wrap, and then again
if that still hadn't actually freed enough space for the request). We
just precalculate the total amount of space we'll need *including* any
for padding the end of the ring and wait for that much in one go :)
In the time since this code was written, it has all been cloned from
the original ringbuffer model to become the execbuffer code, in
> 82e104c drm/i915/bdw: New logical ring submission mechanism
So now we have to fix it in both paths ...
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
drivers/gpu/drm/i915/intel_lrc.c | 64 +++++++++++++++----------------
drivers/gpu/drm/i915/intel_ringbuffer.c | 63 +++++++++++++++---------------
2 files changed, 64 insertions(+), 63 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 454e836..3ef5fb6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -740,39 +740,22 @@ intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf,
execlists_context_queue(ring, ctx, ringbuf->tail, request);
}
-static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf,
- struct intel_context *ctx)
-{
- uint32_t __iomem *virt;
- int rem = ringbuf->size - ringbuf->tail;
-
- if (ringbuf->space < rem) {
- int ret = logical_ring_wait_for_space(ringbuf, ctx, rem);
-
- if (ret)
- return ret;
- }
-
- virt = ringbuf->virtual_start + ringbuf->tail;
- rem /= 4;
- while (rem--)
- iowrite32(MI_NOOP, virt++);
-
- ringbuf->tail = 0;
- intel_ring_update_space(ringbuf);
-
- return 0;
-}
-
static int logical_ring_prepare(struct intel_ringbuffer *ringbuf,
struct intel_context *ctx, int bytes)
{
+ int fill = 0;
int ret;
+ /*
+ * If the request will not fit between 'tail' and the effective
+ * size of the ringbuffer, then we need to pad the end of the
+ * ringbuffer with NOOPs, then start the request at the top of
+ * the ring. This increases the total size that we need to check
+ * for by however much is left at the end of the ring ...
+ */
if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
- ret = logical_ring_wrap_buffer(ringbuf, ctx);
- if (unlikely(ret))
- return ret;
+ fill = ringbuf->size - ringbuf->tail;
+ bytes += fill;
}
if (unlikely(ringbuf->space < bytes)) {
@@ -781,6 +764,28 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf,
return ret;
}
+ /* Ensure we have a request before touching the ring */
+ if (!ringbuf->ring->outstanding_lazy_request) {
+ ret = i915_gem_request_alloc(ringbuf->ring, ctx);
+ if (ret)
+ return ret;
+ }
+
+ if (unlikely(fill)) {
+ uint32_t __iomem *virt = ringbuf->virtual_start + ringbuf->tail;
+
+ /* tail should not have moved */
+ if (WARN_ON(fill != ringbuf->size - ringbuf->tail))
+ fill = ringbuf->size - ringbuf->tail;
+
+ do
+ iowrite32(MI_NOOP, virt++);
+ while ((fill -= 4) > 0);
+
+ ringbuf->tail = 0;
+ intel_ring_update_space(ringbuf);
+ }
+
return 0;
}
@@ -814,11 +819,6 @@ static int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf,
if (ret)
return ret;
- /* Preallocate the olr before touching the ring */
- ret = i915_gem_request_alloc(ring, ctx);
- if (ret)
- return ret;
-
ringbuf->space -= num_dwords * sizeof(uint32_t);
return 0;
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index a3406b2..4c0bc29 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2137,29 +2137,6 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
return 0;
}
-static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
-{
- uint32_t __iomem *virt;
- struct intel_ringbuffer *ringbuf = ring->buffer;
- int rem = ringbuf->size - ringbuf->tail;
-
- if (ringbuf->space < rem) {
- int ret = ring_wait_for_space(ring, rem);
- if (ret)
- return ret;
- }
-
- virt = ringbuf->virtual_start + ringbuf->tail;
- rem /= 4;
- while (rem--)
- iowrite32(MI_NOOP, virt++);
-
- ringbuf->tail = 0;
- intel_ring_update_space(ringbuf);
-
- return 0;
-}
-
int intel_ring_idle(struct intel_engine_cs *ring)
{
struct drm_i915_gem_request *req;
@@ -2197,12 +2174,19 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring,
int bytes)
{
struct intel_ringbuffer *ringbuf = ring->buffer;
+ int fill = 0;
int ret;
+ /*
+ * If the request will not fit between 'tail' and the effective
+ * size of the ringbuffer, then we need to pad the end of the
+ * ringbuffer with NOOPs, then start the request at the top of
+ * the ring. This increases the total size that we need to check
+ * for by however much is left at the end of the ring ...
+ */
if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
- ret = intel_wrap_ring_buffer(ring);
- if (unlikely(ret))
- return ret;
+ fill = ringbuf->size - ringbuf->tail;
+ bytes += fill;
}
if (unlikely(ringbuf->space < bytes)) {
@@ -2211,6 +2195,28 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring,
return ret;
}
+ /* Ensure we have a request before touching the ring */
+ if (!ringbuf->ring->outstanding_lazy_request) {
+ ret = i915_gem_request_alloc(ringbuf->ring, ctx);
+ if (ret)
+ return ret;
+ }
+
+ if (unlikely(fill)) {
+ uint32_t __iomem *virt = ringbuf->virtual_start + ringbuf->tail;
+
+ /* tail should not have moved */
+ if (WARN_ON(fill != ringbuf->size - ringbuf->tail))
+ fill = ringbuf->size - ringbuf->tail;
+
+ do
+ iowrite32(MI_NOOP, virt++);
+ while ((fill -= 4) > 0);
+
+ ringbuf->tail = 0;
+ intel_ring_update_space(ringbuf);
+ }
+
return 0;
}
@@ -2229,11 +2235,6 @@ int intel_ring_begin(struct intel_engine_cs *ring,
if (ret)
return ret;
- /* Preallocate the olr before touching the ring */
- ret = i915_gem_request_alloc(ring, ring->default_context);
- if (ret)
- return ret;
-
ring->buffer->space -= num_dwords * sizeof(uint32_t);
return 0;
}
--
1.7.9.5
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 31+ messages in thread
* (no subject)
@ 2015-04-14 10:10 Mika Kahola
0 siblings, 0 replies; 31+ messages in thread
From: Mika Kahola @ 2015-04-14 10:10 UTC (permalink / raw)
To: intel-gfx
This series is revised based on Jani's good comments.
In this series the patch which read out DP link training
parameters from VBT is discarded as based on the comments
that I received.
Files changed:
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_drv.h
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH] drm/i915: (VLV2) Fix the hotplug detection bits
@ 2014-01-21 16:38 Todd Previte
2014-01-23 4:22 ` (no subject) Todd Previte
0 siblings, 1 reply; 31+ messages in thread
From: Todd Previte @ 2014-01-21 16:38 UTC (permalink / raw)
To: intel-gfx
These bits are in reverse order in the header from those defined in
the specification. Change the bit positions for ports B and D to
correctly match the spec.
---
drivers/gpu/drm/i915/i915_reg.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 10ecf90..2d77b51 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2083,9 +2083,9 @@
* Please check the detailed lore in the commit message for for experimental
* evidence.
*/
-#define PORTD_HOTPLUG_LIVE_STATUS (1 << 29)
+#define PORTD_HOTPLUG_LIVE_STATUS (1 << 27)
#define PORTC_HOTPLUG_LIVE_STATUS (1 << 28)
-#define PORTB_HOTPLUG_LIVE_STATUS (1 << 27)
+#define PORTB_HOTPLUG_LIVE_STATUS (1 << 29)
#define PORTD_HOTPLUG_INT_STATUS (3 << 21)
#define PORTC_HOTPLUG_INT_STATUS (3 << 19)
#define PORTB_HOTPLUG_INT_STATUS (3 << 17)
--
1.8.3.2
^ permalink raw reply related [flat|nested] 31+ messages in thread
* (no subject)
@ 2013-12-30 2:29 Oravil Nair
2014-01-07 7:32 ` Daniel Vetter
0 siblings, 1 reply; 31+ messages in thread
From: Oravil Nair @ 2013-12-30 2:29 UTC (permalink / raw)
To: intel-gfx
[-- Attachment #1.1: Type: text/plain, Size: 242 bytes --]
Hi,
i915_gem_object_pin(), during i915 driver create, seems to write to the
memory written by BIOS. Where can the start address be specified to
allocate memory so that the memory written by BIOS is not overwritten at
initialization?
Thanks
[-- Attachment #1.2: Type: text/html, Size: 331 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: (no subject)
2013-12-30 2:29 Oravil Nair
@ 2014-01-07 7:32 ` Daniel Vetter
0 siblings, 0 replies; 31+ messages in thread
From: Daniel Vetter @ 2014-01-07 7:32 UTC (permalink / raw)
To: Oravil Nair; +Cc: intel-gfx
On Mon, Dec 30, 2013 at 07:59:49AM +0530, Oravil Nair wrote:
> Hi,
>
> i915_gem_object_pin(), during i915 driver create, seems to write to the
> memory written by BIOS. Where can the start address be specified to
> allocate memory so that the memory written by BIOS is not overwritten at
> initialization?
I guess you want Jesse's patches to save the screen contents from the BIOS
modeset setup? But tbh I'm not clear at all what exactly you're talking
about.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 31+ messages in thread
[parent not found: <CAEyVMbDjLwcDFrQ7y4UtGp7HOT1wi5MB2EWLGTuOdJCKDWsUew@mail.gmail.com>]
* Re: (no subject)
[not found] <CAEyVMbDjLwcDFrQ7y4UtGp7HOT1wi5MB2EWLGTuOdJCKDWsUew@mail.gmail.com>
@ 2013-04-03 15:46 ` Daniel Vetter
0 siblings, 0 replies; 31+ messages in thread
From: Daniel Vetter @ 2013-04-03 15:46 UTC (permalink / raw)
To: Dihan Wickremasuriya; +Cc: Michael Siracusa, intel-gfx, Jeff Faneuff
[-- Attachment #1.1: Type: text/plain, Size: 1354 bytes --]
Hi all,
Two things:
- Please _always_ include a public mailing list when reporting bugs, your
dear maintainer sometimes slacks off.
- We need to see the error_state before we can assess what kind of hang you
have (it's like gettting a SIGSEGV for a normal program, no two gpu hangs
are the same ...).
Cheers, Daniel
On Wed, Apr 3, 2013 at 5:42 PM, Dihan Wickremasuriya <
dwickremasuriya@rethinkrobotics.com> wrote:
> Hi Chris/Daniel,
>
> This is Dihan from Rethink Robotics and we were hoping you could help with
> the GPU hang problem in the i915 driver mentioned in bug #26345:
> https://bugs.freedesktop.org/show_bug.cgi?id=26345
>
> We are running into the same problem with the 3.8.5 kernel (which has the
> fix mentioned in comment #153 of the bug report) when running a Qt 5
> application in Gentoo. At times the entire X session would freeze. The
> x11-perf tests described in the bug report run without any issues though.
>
> Would you happen to know whether this is because of an issue in the driver
> that is not currently being addressed by the fix? I have attached the Xorg
> log, the dmesg output and i915_error_state from a hung session. Please let
> me know if you need any more info. Thanks in advance!
>
> Best regards,
> Dihan
>
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
[-- Attachment #1.2: Type: text/html, Size: 2095 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* (no subject)
@ 2012-07-19 20:00 Olivier Galibert
0 siblings, 0 replies; 31+ messages in thread
From: Olivier Galibert @ 2012-07-19 20:00 UTC (permalink / raw)
To: intel-gfx, mesa-dev
Hi,
This is the second verion of the clipping/interpolation patches.
Main differences:
- I tried to take all of Paul's remarks into account
- I exploded the first patch in 4 independant ones
- I've added a patch to ensure that integers pass through unscathed
Patch 4/9 is (slightly) controversial. There may be better ways to do
it, or at least more general ones. But it's simple, it works, and it
allows to validate the other 8. It's an easy one to revert if we
build an alternative.
Best,
OG.
[PATCH 1/9] intel gen4-5: fix the vue view in the fs.
[PATCH 2/9] intel gen4-5: simplify the bfc copy in the sf.
[PATCH 3/9] intel gen4-5: fix GL_VERTEX_PROGRAM_TWO_SIDE selection.
[PATCH 4/9] intel gen4-5: Fix backface/frontface selection when one
[PATCH 5/9] intel gen4-5: Compute the interpolation status for every
[PATCH 6/9] intel gen4-5: Correctly setup the parameters in the sf.
[PATCH 7/9] intel gen4-5: Correctly handle flat vs. non-flat in the
[PATCH 8/9] intel gen4-5: Make noperspective clipping work.
[PATCH 9/9] intel gen4-5: Don't touch flatshaded values when
^ permalink raw reply [flat|nested] 31+ messages in thread
* (no subject)
@ 2012-05-31 18:00 Muhammad Jamil
0 siblings, 0 replies; 31+ messages in thread
From: Muhammad Jamil @ 2012-05-31 18:00 UTC (permalink / raw)
To: e_wangi, intel-gfx, iswahyudiwardany, mail-noreply, sandyseteluk,
irdiansyah27, afia_gra
Learn How to Make Money Online
http://3ftechnologies.com/http102dx-2.php?qohranknumber=245
__________________
Why dont your juries hangmurderers? Because theyre afraid the mans friends will shoot them inthe back, in the dark--and its just what they WOULD do. kaelah winfrith
Thu, 31 May 2012 19:00:33
^ permalink raw reply [flat|nested] 31+ messages in thread
* (no subject)
@ 2012-04-12 0:55 Rodrigo Vivi
0 siblings, 0 replies; 31+ messages in thread
From: Rodrigo Vivi @ 2012-04-12 0:55 UTC (permalink / raw)
To: DRI Development; +Cc: Intel Graphics Development, Rodrigo Vivi
There are many bugs open on fd.o regarding missing modes that are supported on Windows and other closed source drivers.
>From EDID spec we can (might?) infer modes using GTF and CVT when monitor allows it trough range limited flag... obviously limiting by the range.
>From our code:
* EDID spec says modes should be preferred in this order:
* - preferred detailed mode
* - other detailed modes from base block
* - detailed modes from extension blocks
* - CVT 3-byte code modes
* - standard timing codes
* - established timing codes
* - modes inferred from GTF or CVT range information
*
* We get this pretty much right.
Not actually so right... We were inferring just using GTF... not CVT or even GTF2.
This patch not just add some common cvt modes but also allows some modes been inferred when using gtf2 as well.
Cheers,
Rodrigo.
>From 4b7a88d0d812583d850ca691d1ac491355230d11 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 11 Apr 2012 15:36:31 -0300
Subject: [PATCH] drm/edid: Adding common CVT inferred modes when monitor
allows range limited ones trough EDID.
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
drivers/gpu/drm/drm_edid.c | 37 +++++++++++++-
drivers/gpu/drm/drm_edid_modes.h | 101 ++++++++++++++++++++++++++++++++++++++
2 files changed, 136 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 7ee7be1..3179572 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1020,17 +1020,50 @@ drm_gtf_modes_for_range(struct drm_connector *connector, struct edid *edid,
return modes;
}
+static int
+drm_cvt_modes_for_range(struct drm_connector *connector, struct edid *edid,
+ struct detailed_timing *timing)
+{
+ int i, modes = 0;
+ struct drm_display_mode *newmode;
+ struct drm_device *dev = connector->dev;
+
+ for (i = 0; i < drm_num_cvt_inferred_modes; i++) {
+ if (mode_in_range(drm_cvt_inferred_modes + i, edid, timing)) {
+ newmode = drm_mode_duplicate(dev, &drm_cvt_inferred_modes[i]);
+ if (newmode) {
+ drm_mode_probed_add(connector, newmode);
+ modes++;
+ }
+ }
+ }
+
+ return modes;
+}
+
static void
do_inferred_modes(struct detailed_timing *timing, void *c)
{
struct detailed_mode_closure *closure = c;
struct detailed_non_pixel *data = &timing->data.other_data;
- int gtf = (closure->edid->features & DRM_EDID_FEATURE_DEFAULT_GTF);
+ int timing_level = standard_timing_level(closure->edid);
- if (gtf && data->type == EDID_DETAIL_MONITOR_RANGE)
+ if (data->type == EDID_DETAIL_MONITOR_RANGE)
+ switch (timing_level) {
+ case LEVEL_DMT:
+ break;
+ case LEVEL_GTF:
+ case LEVEL_GTF2:
closure->modes += drm_gtf_modes_for_range(closure->connector,
closure->edid,
timing);
+ break;
+ case LEVEL_CVT:
+ closure->modes += drm_cvt_modes_for_range(closure->connector,
+ closure->edid,
+ timing);
+ break;
+ }
}
static int
diff --git a/drivers/gpu/drm/drm_edid_modes.h b/drivers/gpu/drm/drm_edid_modes.h
index a91ffb1..7e14a32 100644
--- a/drivers/gpu/drm/drm_edid_modes.h
+++ b/drivers/gpu/drm/drm_edid_modes.h
@@ -266,6 +266,107 @@ static const struct drm_display_mode drm_dmt_modes[] = {
static const int drm_num_dmt_modes =
sizeof(drm_dmt_modes) / sizeof(struct drm_display_mode);
+static const struct drm_display_mode drm_cvt_inferred_modes[] = {
+ /* 640x480@60Hz */
+ { DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 23750 640, 664,
+ 720, 800, 0, 480, 483, 487, 500, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 800x600@60Hz */
+ { DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 38250, 800, 832,
+ 912, 1024, 0, 600, 603, 607, 624, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 900x600@60Hz */
+ { DRM_MODE("900x600", DRM_MODE_TYPE_DRIVER, 45250, 960, 992,
+ 1088, 1216, 0, 600, 603, 609, 624, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1024x576@60Hz */
+ { DRM_MODE("1024x576", DRM_MODE_TYPE_DRIVER, 46500, 1024, 1064,
+ 1160, 1296, 0, 576, 579, 584, 599, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1024x768@60Hz */
+ { DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 63500, 1024, 1072,
+ 1176, 1328, 0, 768, 771, 775, 798, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1152x864@60Hz */
+ { DRM_MODE("1152x864", DRM_MODE_TYPE_DRIVER, 81750, 1152, 1216,
+ 1336, 1520, 0, 864, 867, 871, 897, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1280x720@60Hz */
+ { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74500, 1280, 1344,
+ 1472, 1664, 0, 720, 723, 728, 748, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1280x768@60Hz */
+ { DRM_MODE("1280x768", DRM_MODE_TYPE_DRIVER, 79500, 1280, 1344,
+ 1472, 1664, 0, 768, 771, 781, 798, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1280x800@60Hz */
+ { DRM_MODE("1280x800", DRM_MODE_TYPE_DRIVER, 83500, 1280, 1352,
+ 1480, 1680, 0, 800, 803, 809, 831, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1280x1024@60Hz */
+ { DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 109000, 1280, 1368,
+ 1496, 1712, 0, 1024, 1027, 1034, 1063, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1360x768@60Hz */
+ { DRM_MODE("1360x768", DRM_MODE_TYPE_DRIVER, 84750, 1360, 1432,
+ 1568, 1776, 0, 768, 771, 781, 798, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1366x768@60Hz */
+ { DRM_MODE("1366x768", DRM_MODE_TYPE_DRIVER, 85250, 1368, 1440,
+ 1576, 1784, 0, 768, 771, 781, 798, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1440x900@60Hz */
+ { DRM_MODE("1440x900", DRM_MODE_TYPE_DRIVER, 106500, 1440, 1528,
+ 1672, 1904, 0, 900, 903, 909, 934, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1400x1050@60Hz */
+ { DRM_MODE("1400x1050", DRM_MODE_TYPE_DRIVER, 121750, 1400, 1488,
+ 1632, 1864, 0, 1050, 1053, 1057, 1089, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1600x900@60Hz */
+ { DRM_MODE("1600x900", DRM_MODE_TYPE_DRIVER, 118250, 1600, 1696,
+ 1856, 2112, 0, 900, 903, 908, 934, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1600x1200@60Hz */
+ { DRM_MODE("1600x1200", DRM_MODE_TYPE_DRIVER, 161000, 1600, 1712,
+ 1880, 2160, 0, 1200, 1203, 1207, 1245, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1680x945@60Hz */
+ { DRM_MODE("1680x945", DRM_MODE_TYPE_DRIVER, 130750, 1680, 1776,
+ 1952, 2224, 0, 945, 948, 953, 981, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1680x1050@60Hz */
+ { DRM_MODE("1680x1050", DRM_MODE_TYPE_DRIVER, 146250, 1680, 1784,
+ 1960, 2240, 0, 1050, 1053, 1059, 1089, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1920x1080@60Hz */
+ { DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 73000, 1920, 2048,
+ 2248, 2576, 0, 1080, 1083, 1088, 1120, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1920x1200@60Hz */
+ { DRM_MODE("1920x1200", DRM_MODE_TYPE_DRIVER, 193250, 1920, 2056,
+ 2256, 2592, 0, 1200, 1203, 1209, 1245, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 1920x1440@60Hz */
+ { DRM_MODE("1920x1440", DRM_MODE_TYPE_DRIVER, 233500, 1920, 2064,
+ 2264, 2608, 0, 1440, 1443, 1447, 1493, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 2048x1152@60Hz */
+ { DRM_MODE("2048x1152", DRM_MODE_TYPE_DRIVER, 197000, 2048, 2184,
+ 2400, 2752, 0, 1152, 1155, 1160, 1195, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 2048x1536@60Hz */
+ { DRM_MODE("2048x1536", DRM_MODE_TYPE_DRIVER, 272000, 2048, 2208,
+ 2424, 2800, 0, 1563, 1566, 1576, 1620, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+ /* 2560x1600@60Hz */
+ { DRM_MODE("2560x1600", DRM_MODE_TYPE_DRIVER, 348500, 2560, 2760,
+ 3032, 3504, 0, 1600, 1603, 1609, 1658, 0,
+ DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+};
+static const int drm_num_cvt_inferred_modes =
+ sizeof(drm_cvt_inferred_modes) / sizeof(struct drm_display_mode);
+
static const struct drm_display_mode edid_est_modes[] = {
{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 40000, 800, 840,
968, 1056, 0, 600, 601, 605, 628, 0,
--
1.7.7.6
^ permalink raw reply related [flat|nested] 31+ messages in thread
* (no subject)
@ 2012-04-08 2:26 Muhammad Jamil
0 siblings, 0 replies; 31+ messages in thread
From: Muhammad Jamil @ 2012-04-08 2:26 UTC (permalink / raw)
To: alia2426, intel-gfx, anggita_chaonk, embapoenya, support,
semetgp, sandyseteluk
Make Income 0nline with revolutionary system
http://184.168.145.37/etdfgtim.php?pjtcamp=95
Sun, 8 Apr 2012 3:25:59
______________
"Billy took his seat with the others around a golden oak table, with a microphone all his own." (c) jerald visszapattant
^ permalink raw reply [flat|nested] 31+ messages in thread
* (no subject)
@ 2012-04-05 6:44 Muhammad Jamil
0 siblings, 0 replies; 31+ messages in thread
From: Muhammad Jamil @ 2012-04-05 6:44 UTC (permalink / raw)
To: intel-gfx, dianaoktavia81, ilham_syah, herman_suni,
friends_confession, ashley_nn30, eddy_susanto96
Learn H0w T0 Earn M0ney 0nline N0w
http://residentialtreatmentcenter.net/coffegold.php?evynumber=91
Thu, 5 Apr 2012 7:44:44
_________________________________
" You yell." (c) daelynn wulfgar
^ permalink raw reply [flat|nested] 31+ messages in thread
* (no subject)
@ 2012-04-03 18:25 Muhammad Jamil
0 siblings, 0 replies; 31+ messages in thread
From: Muhammad Jamil @ 2012-04-03 18:25 UTC (permalink / raw)
To: embapoenya, intel-gfx, iswahyudiwardany, semetgp, support
[-- Attachment #1.1: Type: text/plain, Size: 109 bytes --]
http://www.signsandsites.com/wp-content/themes/duotone/nav21.php
Muhammad Jamil
Sumintar
4/3/2012 11:25:11 AM
[-- Attachment #1.2: Type: text/html, Size: 366 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* (no subject)
@ 2012-04-03 12:42 Muhammad Jamil
0 siblings, 0 replies; 31+ messages in thread
From: Muhammad Jamil @ 2012-04-03 12:42 UTC (permalink / raw)
To: rahman_lyk, ivoximoet, psamawa, friends_confession, cute_mommy77,
intel-gfx, irdiansyah27
W0rking fr0m h0me Ieads t0 sh0cking m0ney resuIts!
http://jadehurtz.com/coffeemoney.php?apohgoto=64
Tue, 3 Apr 2012 13:42:09
______________
" So Tom took his goods out himself, and soughtemployers for Bert who did not know of this strain of poetry inhis nature" (c) britin wynton
^ permalink raw reply [flat|nested] 31+ messages in thread
* forcewake v4 (now with spinlock)
@ 2011-04-14 18:13 Ben Widawsky
2011-04-14 18:13 ` (no subject) Ben Widawsky
0 siblings, 1 reply; 31+ messages in thread
From: Ben Widawsky @ 2011-04-14 18:13 UTC (permalink / raw)
To: intel-gfx
As the patches say, I don't think adding this spinlock will have too
much of a performance impact (I couldn't notice anything in my limited
testing), because the serializing locks are already held when acquiring
this lock. I suppose it now serializes access between stuct_mutex and
config.lock, but in most cases I don't think that's big.
Perhaps the ugliest change is spin_lock() in debugfs. But as I argue in
the comments, if you can't get the lock there, register reads are also
failing, and hung somewhere else.
Ben
^ permalink raw reply [flat|nested] 31+ messages in thread
* forcewake junk, RFC, RFT(test)
@ 2011-04-08 17:47 Ben Widawsky
2011-04-09 20:26 ` forcewake junk, part2 Ben Widawsky
0 siblings, 1 reply; 31+ messages in thread
From: Ben Widawsky @ 2011-04-08 17:47 UTC (permalink / raw)
To: intel-gfx
I am requesting 3 things:
1. code review/request for comments
2. testing on SNB
3. performance regression on < SNB
review
------
The first two patches have nothing to do with the user's ability to
interact with registers. Those patches are about enforcing correctness
within our driver for newer generation products. In other words, if
patch 3 doesn't make sense, don't automatically drop 1, and 2.
review patch 1/2
----------------
The first change is straight forward. It attempts to fold the forcewake stuff
into our standard register read and write functions. Some overhead is added as
a result, but I'd guess it is nothing compared to the UC read about to happen,
and so will not be noticeable.
The existing method for doing the forcewake_get and put requires some
synchronization. For the most part it is protected by struct_mutex and life is
good. Adding a WARN to the get()/put() function is there more as documentation
to future developers, and reminders to the current ones.
To provide an interface to allow user space to use forcewake, I've decided to
change the mechanism that get()/put() operate by introducing a reference count.
The reference count itself must be protected by struct mutex (since we need
synchronization between the initial condition and the destructor). Imagine for
instance: Thread A does a get() and is in the middle of waking the GT (ref has
already been incremented), thread B comes in, thinks the GT is awake,
and incorrectly goes about its business. This does allow users of the
interface to only have to hold struct_mutex while doing the get() and
not for every read and write.
review patch 3
--------------
User space interface is mostly what you'd expect, except the in the case
of trying to get lockless access. This code is a bit meh, but to
remind everyone, it is root only debug code.
testing
-------
The assertion that forcewake is currently properly protected for the
most part, may not be true. People interested should run these patches
on SNB systems with their favorite graphics applications and report the
warnings that occur, they will be in the kernel log, ie. dmesg.
performance
-----------
Looking mostly for regressions on older systems. There is a slight
overhead added to all register reads and writes, which I think shouldn't
be noticeable, but who knows.
Thanks, and let the flames begin!
Ben
^ permalink raw reply [flat|nested] 31+ messages in thread
* forcewake junk, part2
2011-04-08 17:47 forcewake junk, RFC, RFT(test) Ben Widawsky
@ 2011-04-09 20:26 ` Ben Widawsky
2011-04-09 20:26 ` (no subject) Ben Widawsky
0 siblings, 1 reply; 31+ messages in thread
From: Ben Widawsky @ 2011-04-09 20:26 UTC (permalink / raw)
To: intel-gfx
The request to test, and report the warnings is still desired.
These address all of Chris' comments from IRC (I think). I tried to put
the reasoning in the code comments, as well as commits; but I'll add it
here for completeness.
Simplify the acquisition of the forcewake lock by removing the forced
entry. The motivation behind it is that the kernel offers sufficient
tools to get relevant data before/during/after a hang, and adding a
complex mechanism for userspace is not such a great idea. If a need to
do this comes up, we can always add it back on a situational basis.
Similarly, on releasing of the debugfs file, no more forced entry mechanism.
This does leave a potential for release to hang, but AFAICS if release hangs,
the system is going to need a reboot before i915 becomes usable again because
module unload would also hang.
^ permalink raw reply [flat|nested] 31+ messages in thread
* (no subject)
@ 2011-04-07 21:32 Jesse Barnes
0 siblings, 0 replies; 31+ messages in thread
From: Jesse Barnes @ 2011-04-07 21:32 UTC (permalink / raw)
To: intel-gfx
These are some prep patches I'd like to get feedback on. I've only
compile tested them so far (the actual hw support code this is for was
tested before the split), so testing would be appreciated as well.
Thanks,
Jesse
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH] drm/i915/ringbuffer: set force wake bit before reading ring register
@ 2010-11-09 9:17 Zou Nan hai
2010-11-09 9:17 ` Zou, Nanhai
0 siblings, 1 reply; 31+ messages in thread
From: Zou Nan hai @ 2010-11-09 9:17 UTC (permalink / raw)
To: intel-gfx, Chris Wilson
before reading ring register, set force wake bit to prevent GT core
power down to low power state. otherwise we may read stale value.
Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 14 ++++++++++++++
drivers/gpu/drm/i915/i915_reg.h | 1 +
drivers/gpu/drm/i915/intel_ringbuffer.c | 3 ---
drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +++++++----
4 files changed, 22 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 90414ae..53c0239 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1325,4 +1325,18 @@ static inline void i915_write(struct drm_i915_private *dev_priv, u32 reg,
#define PRIMARY_RINGBUFFER_SIZE (128*1024)
+/* on SNB platform,
+ before reading ring registers forcewake bit
+ must be set to prevent GT core from power down
+*/
+
+static inline u32 i915_safe_read(struct intel_ring_buffer *ring,
+ unsigned int offset)
+{
+ u32 ret;
+ drm_i915_private_t *dev_priv = ring->dev->dev_private;
+ if (IS_GEN6(ring->dev)) I915_WRITE(FORCEWAKE, 1);
+ ret = I915_READ(offset);
+ return ret;
+}
#endif
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 25ed911..4d994d2 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3052,4 +3052,5 @@
#define EDP_LINK_TRAIN_800MV_0DB_SNB_B (0x38<<22)
#define EDP_LINK_TRAIN_VOL_EMP_MASK_SNB (0x3f<<22)
+#define FORCEWAKE 0xA18C
#endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7c1f3ff..2820235 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -572,7 +572,6 @@ err:
int intel_init_ring_buffer(struct drm_device *dev,
struct intel_ring_buffer *ring)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj_priv;
struct drm_gem_object *obj;
int ret;
@@ -691,8 +690,6 @@ int intel_wait_ring_buffer(struct drm_device *dev,
struct intel_ring_buffer *ring, int n)
{
unsigned long end;
- drm_i915_private_t *dev_priv = dev->dev_private;
-
trace_i915_ring_wait_begin (dev);
end = jiffies + 3 * HZ;
do {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 3126c26..cde1cdd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -7,13 +7,16 @@ struct intel_hw_status_page {
struct drm_gem_object *obj;
};
-#define I915_READ_TAIL(ring) I915_READ(RING_TAIL(ring->mmio_base))
+#define I915_READ_TAIL(ring) i915_safe_read(ring, RING_TAIL(ring->mmio_base))
#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL(ring->mmio_base), val)
-#define I915_READ_START(ring) I915_READ(RING_START(ring->mmio_base))
+
+#define I915_READ_START(ring) i915_safe_read(ring, RING_START(ring->mmio_base))
#define I915_WRITE_START(ring, val) I915_WRITE(RING_START(ring->mmio_base), val)
-#define I915_READ_HEAD(ring) I915_READ(RING_HEAD(ring->mmio_base))
+
+#define I915_READ_HEAD(ring) i915_safe_read(ring, RING_HEAD(ring->mmio_base))
#define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD(ring->mmio_base), val)
-#define I915_READ_CTL(ring) I915_READ(RING_CTL(ring->mmio_base))
+
+#define I915_READ_CTL(ring) i915_safe_read(ring, RING_CTL(ring->mmio_base))
#define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL(ring->mmio_base), val)
struct drm_i915_gem_execbuffer2;
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH] drm/i915/ringbuffer: set force wake bit before reading ring register
2010-11-09 9:17 [PATCH] drm/i915/ringbuffer: set force wake bit before reading ring register Zou Nan hai
@ 2010-11-09 9:17 ` Zou, Nanhai
2010-11-09 10:50 ` Chris Wilson
0 siblings, 1 reply; 31+ messages in thread
From: Zou, Nanhai @ 2010-11-09 9:17 UTC (permalink / raw)
To: intel-gfx, Chris Wilson, Zhao, Jian J
>>-----Original Message-----
>>From: Zou, Nanhai
>>Sent: 2010年11月9日 17:18
>>To: intel-gfx@lists.freedesktop.org; Chris Wilson
>>Cc: Zou, Nanhai
>>Subject: [PATCH] drm/i915/ringbuffer: set force wake bit before reading ring
>>register
>>
>>before reading ring register, set force wake bit to prevent GT core
>>power down to low power state. otherwise we may read stale value.
>>
>>Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
>>---
>> drivers/gpu/drm/i915/i915_drv.h | 14 ++++++++++++++
>> drivers/gpu/drm/i915/i915_reg.h | 1 +
>> drivers/gpu/drm/i915/intel_ringbuffer.c | 3 ---
>> drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +++++++----
>> 4 files changed, 22 insertions(+), 7 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>b/drivers/gpu/drm/i915/i915_drv.h
>>index 90414ae..53c0239 100644
>>--- a/drivers/gpu/drm/i915/i915_drv.h
>>+++ b/drivers/gpu/drm/i915/i915_drv.h
>>@@ -1325,4 +1325,18 @@ static inline void i915_write(struct drm_i915_private
>>*dev_priv, u32 reg,
>>
>> #define PRIMARY_RINGBUFFER_SIZE (128*1024)
>>
>>+/* on SNB platform,
>>+ before reading ring registers forcewake bit
>>+ must be set to prevent GT core from power down
>>+*/
>>+
>>+static inline u32 i915_safe_read(struct intel_ring_buffer *ring,
>>+ unsigned int offset)
>>+{
>>+ u32 ret;
>>+ drm_i915_private_t *dev_priv = ring->dev->dev_private;
>>+ if (IS_GEN6(ring->dev)) I915_WRITE(FORCEWAKE, 1);
>>+ ret = I915_READ(offset);
>>+ return ret;
>>+}
>> #endif
>>diff --git a/drivers/gpu/drm/i915/i915_reg.h
>>b/drivers/gpu/drm/i915/i915_reg.h
>>index 25ed911..4d994d2 100644
>>--- a/drivers/gpu/drm/i915/i915_reg.h
>>+++ b/drivers/gpu/drm/i915/i915_reg.h
>>@@ -3052,4 +3052,5 @@
>> #define EDP_LINK_TRAIN_800MV_0DB_SNB_B (0x38<<22)
>> #define EDP_LINK_TRAIN_VOL_EMP_MASK_SNB (0x3f<<22)
>>
>>+#define FORCEWAKE 0xA18C
>> #endif /* _I915_REG_H_ */
>>diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c
>>b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>index 7c1f3ff..2820235 100644
>>--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>>+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>@@ -572,7 +572,6 @@ err:
>> int intel_init_ring_buffer(struct drm_device *dev,
>> struct intel_ring_buffer *ring)
>> {
>>- struct drm_i915_private *dev_priv = dev->dev_private;
>> struct drm_i915_gem_object *obj_priv;
>> struct drm_gem_object *obj;
>> int ret;
>>@@ -691,8 +690,6 @@ int intel_wait_ring_buffer(struct drm_device *dev,
>> struct intel_ring_buffer *ring, int n)
>> {
>> unsigned long end;
>>- drm_i915_private_t *dev_priv = dev->dev_private;
>>-
>> trace_i915_ring_wait_begin (dev);
>> end = jiffies + 3 * HZ;
>> do {
>>diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h
>>b/drivers/gpu/drm/i915/intel_ringbuffer.h
>>index 3126c26..cde1cdd 100644
>>--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>>+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>>@@ -7,13 +7,16 @@ struct intel_hw_status_page {
>> struct drm_gem_object *obj;
>> };
>>
>>-#define I915_READ_TAIL(ring) I915_READ(RING_TAIL(ring->mmio_base))
>>+#define I915_READ_TAIL(ring) i915_safe_read(ring,
>>RING_TAIL(ring->mmio_base))
>> #define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL(ring->mmio_base),
>>val)
>>-#define I915_READ_START(ring) I915_READ(RING_START(ring->mmio_base))
>>+
>>+#define I915_READ_START(ring) i915_safe_read(ring,
>>RING_START(ring->mmio_base))
>> #define I915_WRITE_START(ring, val) I915_WRITE(RING_START(ring->mmio_base),
>>val)
>>-#define I915_READ_HEAD(ring) I915_READ(RING_HEAD(ring->mmio_base))
>>+
>>+#define I915_READ_HEAD(ring) i915_safe_read(ring,
>>RING_HEAD(ring->mmio_base))
>> #define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD(ring->mmio_base),
>>val)
>>-#define I915_READ_CTL(ring) I915_READ(RING_CTL(ring->mmio_base))
>>+
>>+#define I915_READ_CTL(ring) i915_safe_read(ring,
>>RING_CTL(ring->mmio_base))
>> #define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL(ring->mmio_base),
>>val)
>>
>> struct drm_i915_gem_execbuffer2;
>>--
>>1.7.1
I have tested this patch with the read ring head from status page workaround patch reverted.
Seems it works on my SNB box.
Thanks
Zou Nanhai
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH] drm/i915/ringbuffer: set force wake bit before reading ring register
2010-11-09 9:17 ` Zou, Nanhai
@ 2010-11-09 10:50 ` Chris Wilson
2010-11-10 0:36 ` Zou, Nanhai
0 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2010-11-09 10:50 UTC (permalink / raw)
To: Zou, Nanhai, intel-gfx, Zhao, Jian J
On Tue, 9 Nov 2010 17:17:07 +0800, "Zou, Nanhai" <nanhai.zou@intel.com> wrote:
> I have tested this patch with the read ring head from status page workaround patch reverted.
> Seems it works on my SNB box.
I needed to add a udelay(100) to i915_safe_read for my rev 8. Can you
check if there is a recommended delay for FORCEWAKE?
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH] drm/i915/ringbuffer: set force wake bit before reading ring register
2010-11-09 10:50 ` Chris Wilson
@ 2010-11-10 0:36 ` Zou, Nanhai
2010-11-10 7:54 ` Chris Wilson
0 siblings, 1 reply; 31+ messages in thread
From: Zou, Nanhai @ 2010-11-10 0:36 UTC (permalink / raw)
To: Chris Wilson, intel-gfx, Zhao, Jian J
>>-----Original Message-----
>>From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
>>Sent: 2010年11月9日 18:50
>>To: Zou, Nanhai; intel-gfx@lists.freedesktop.org; Zhao, Jian J
>>Subject: RE: [PATCH] drm/i915/ringbuffer: set force wake bit before reading
>>ring register
>>
>>On Tue, 9 Nov 2010 17:17:07 +0800, "Zou, Nanhai" <nanhai.zou@intel.com> wrote:
>>
>>> I have tested this patch with the read ring head from status page workaround
>>patch reverted.
>>> Seems it works on my SNB box.
>>
>>I needed to add a udelay(100) to i915_safe_read for my rev 8. Can you
>>check if there is a recommended delay for FORCEWAKE?
>>-Chris
>>
Dose a post read to FORCEWAKE help?
Thanks
Zou Nanhai
>>--
>>Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH] drm/i915/ringbuffer: set force wake bit before reading ring register
2010-11-10 0:36 ` Zou, Nanhai
@ 2010-11-10 7:54 ` Chris Wilson
2010-11-10 18:47 ` Jesse Barnes
0 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2010-11-10 7:54 UTC (permalink / raw)
To: Zou, Nanhai, intel-gfx, Zhao, Jian J
[-- Attachment #1: Type: text/plain, Size: 1035 bytes --]
On Wed, 10 Nov 2010 08:36:20 +0800, "Zou, Nanhai" <nanhai.zou@intel.com> wrote:
> >>-----Original Message-----
> >>From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> >>Sent: 2010å¹´11æ9æ¥ 18:50
> >>To: Zou, Nanhai; intel-gfx@lists.freedesktop.org; Zhao, Jian J
> >>Subject: RE: [PATCH] drm/i915/ringbuffer: set force wake bit before reading
> >>ring register
> >>
> >>On Tue, 9 Nov 2010 17:17:07 +0800, "Zou, Nanhai" <nanhai.zou@intel.com> wrote:
> >>
> >>> I have tested this patch with the read ring head from status page workaround
> >>patch reverted.
> >>> Seems it works on my SNB box.
> >>
> >>I needed to add a udelay(100) to i915_safe_read for my rev 8. Can you
> >>check if there is a recommended delay for FORCEWAKE?
> >>-Chris
> >>
> Dose a post read to FORCEWAKE help?
No, tried a POSTING_READ(FORCEWAKE) first and it wasn't until I added the
udelay() between the READ(FORCEWAKE) and the READ(reg) that it returned
the correct results in a single call.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH] drm/i915/ringbuffer: set force wake bit before reading ring register
2010-11-10 7:54 ` Chris Wilson
@ 2010-11-10 18:47 ` Jesse Barnes
2010-11-17 22:52 ` (no subject) Thantry, Hariharan L
0 siblings, 1 reply; 31+ messages in thread
From: Jesse Barnes @ 2010-11-10 18:47 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Wed, 10 Nov 2010 07:54:12 +0000
Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Wed, 10 Nov 2010 08:36:20 +0800, "Zou, Nanhai"
> <nanhai.zou@intel.com> wrote:
> > >>-----Original Message-----
> > >>From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> > >>Sent: 2010年11月9日 18:50
> > >>To: Zou, Nanhai; intel-gfx@lists.freedesktop.org; Zhao, Jian J
> > >>Subject: RE: [PATCH] drm/i915/ringbuffer: set force wake bit
> > >>before reading ring register
> > >>
> > >>On Tue, 9 Nov 2010 17:17:07 +0800, "Zou, Nanhai"
> > >><nanhai.zou@intel.com> wrote:
> > >>
> > >>> I have tested this patch with the read ring head from status
> > >>> page workaround
> > >>patch reverted.
> > >>> Seems it works on my SNB box.
> > >>
> > >>I needed to add a udelay(100) to i915_safe_read for my rev 8. Can
> > >>you check if there is a recommended delay for FORCEWAKE?
> > >>-Chris
> > >>
> > Dose a post read to FORCEWAKE help?
>
> No, tried a POSTING_READ(FORCEWAKE) first and it wasn't until I added
> the udelay() between the READ(FORCEWAKE) and the READ(reg) that it
> returned the correct results in a single call.
I think these regs will be affected by the ucode on the GPU; we may
need specific delays after some operations. Have you tried asking the
hw guys what the best practices are here?
--
Jesse Barnes, Intel Open Source Technology Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 31+ messages in thread
* (no subject)
2010-11-10 18:47 ` Jesse Barnes
@ 2010-11-17 22:52 ` Thantry, Hariharan L
0 siblings, 0 replies; 31+ messages in thread
From: Thantry, Hariharan L @ 2010-11-17 22:52 UTC (permalink / raw)
To: intel-gfx
Hi folks,
I am a bit new to graphics, but had a few questions that I was hoping that someone could answer for me. I hope this is the right forum to ask these questions.
My interest is in seeing whether I can use the Intel integrated graphics part for non-graphics (GPGPU) work, while driving the display through another discrete card.
I have an Ironlake system (core setup with base Debian (no X-related packages), a basic PCI-E graphics card (NVIDIA NV37GL) and a 2.6.36 kernel with the following relevant config entries.
CONFIG_AGP=y
CONFIG_AGP_AMD64=y
CONFIG_AGP_INTEL=y
CONFIG_VGA_ARB=y
CONFIG_VGA_ARB_MAX_GPUS=16
# CONFIG_VGA_SWITCHEROO is not set
CONFIG_DRM=m
CONFIG_DRM_KMS_HELPER=m
CONFIG_DRM_TTM=m
CONFIG_DRM_R128=m
CONFIG_DRM_I810=m
CONFIG_DRM_I830=m
CONFIG_DRM_I915=m
CONFIG_DRM_I915_KMS=y
I have libdrm & libva installed, and was hoping to use libdrm APIs to do some basic operations on the integrated graphics.
I can insmod the DRM & the DRM_KMS_HELPER module fine, but when trying to insert the I915 driver, I get a "no such device error", even though the module object exists.
lspci doesn't seem to return the Intel integrated graphics PCI device either.
00:00.0 Host bridge: Intel Corporation Auburndale/Havendale DRAM Controller (rev 02)
00:01.0 PCI bridge: Intel Corporation Auburndale/Havendale PCI Express x16 Root Port (rev 02)
00:16.0 Communication controller: Intel Corporation Ibex Peak HECI Controller (rev 06)
00:16.2 IDE interface: Intel Corporation Ibex Peak PT IDER Controller (rev 06)
00:16.3 Serial controller: Intel Corporation Ibex Peak KT Controller (rev 06)
00:19.0 Ethernet controller: Intel Corporation Device 10f0 (rev 06)
00:1a.0 USB Controller: Intel Corporation Ibex Peak USB2 Enhanced Host Controller (rev 06)
00:1b.0 Audio device: Intel Corporation Ibex Peak High Definition Audio (rev 06)
00:1d.0 USB Controller: Intel Corporation Ibex Peak USB2 Enhanced Host Controller (rev 06)
00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev a6)
00:1f.0 ISA bridge: Intel Corporation Ibex Peak LPC Interface Controller (rev 06)
00:1f.2 SATA controller: Intel Corporation Ibex Peak 6 port SATA AHCI Controller (rev 06)
00:1f.3 SMBus: Intel Corporation Ibex Peak SMBus Controller (rev 06)
01:00.0 VGA compatible controller: nVidia Corporation NV37GL [Quadro PCI-E Series] (rev a2)
02:02.0 Ethernet controller: Intel Corporation 82557/8/9/0/1 Ethernet Pro 100 (rev 08)
First off, is there a way for the Intel integrated graphics to appear in the list of PCI devices when it's not being used for driving the display?
Secondly, can I simply use the libdrm APIs to directly perform operations on the Intel integrated part? Does there exist any documentation describing the DRM APIs?
Finally, can I use the DRM APIs for using the GPU "media pipe" (architecturally different from the 3D graphics pipe)?
Thanks,
Hari
^ permalink raw reply [flat|nested] 31+ messages in thread
end of thread, other threads:[~2018-07-06 16:02 UTC | newest]
Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-06 14:42 (no subject) Christian König
2018-07-06 14:42 ` [PATCH 1/6] dma-buf: add caching of sg_table Christian König
2018-07-06 14:42 ` [PATCH 2/6] drm: remove prime sg_table caching Christian König
2018-07-06 14:42 ` [PATCH 3/6] dma-buf: add dma_buf_(un)map_attachment_locked variants v3 Christian König
2018-07-06 14:42 ` [PATCH 4/6] dma-buf: lock the reservation object during (un)map_dma_buf v3 Christian König
2018-07-06 14:42 ` [PATCH 5/6] drm/amdgpu: add independent DMA-buf export v3 Christian König
2018-07-06 14:42 ` [PATCH 6/6] drm/amdgpu: add independent DMA-buf import v4 Christian König
2018-07-06 16:02 ` ✗ Fi.CI.BAT: failure for series starting with [1/6] dma-buf: add caching of sg_table Patchwork
-- strict thread matches above, loose matches on Subject: below --
2018-07-05 10:38 (no subject) rosdi ablatiff
2017-01-16 16:28 Tony Whittam
2016-11-11 16:16 [PATCH i-g-t v5 1/4] lib: add igt_dummyload Daniel Vetter
2016-11-14 18:24 ` (no subject) Abdiel Janulgue
2015-06-12 17:09 [PATCH 2/2] drm/i915: Rework order of operations in {__intel, logical}_ring_prepare() Dave Gordon
[not found] ` <1433789441-8295-1-git-send-email-david.s.gordon@intel.com>
2015-06-12 17:09 ` [PATCH v2] Resolve issues with ringbuffer space management Dave Gordon
2015-06-12 20:25 ` (no subject) Dave Gordon
2015-06-17 11:04 ` Daniel Vetter
2015-06-17 12:41 ` Jani Nikula
2015-06-18 10:30 ` Dave Gordon
2015-04-14 10:10 Mika Kahola
2014-01-21 16:38 [PATCH] drm/i915: (VLV2) Fix the hotplug detection bits Todd Previte
2014-01-23 4:22 ` (no subject) Todd Previte
2013-12-30 2:29 Oravil Nair
2014-01-07 7:32 ` Daniel Vetter
[not found] <CAEyVMbDjLwcDFrQ7y4UtGp7HOT1wi5MB2EWLGTuOdJCKDWsUew@mail.gmail.com>
2013-04-03 15:46 ` Daniel Vetter
2012-07-19 20:00 Olivier Galibert
2012-05-31 18:00 Muhammad Jamil
2012-04-12 0:55 Rodrigo Vivi
2012-04-08 2:26 Muhammad Jamil
2012-04-05 6:44 Muhammad Jamil
2012-04-03 18:25 Muhammad Jamil
2012-04-03 12:42 Muhammad Jamil
2011-04-14 18:13 forcewake v4 (now with spinlock) Ben Widawsky
2011-04-14 18:13 ` (no subject) Ben Widawsky
2011-04-08 17:47 forcewake junk, RFC, RFT(test) Ben Widawsky
2011-04-09 20:26 ` forcewake junk, part2 Ben Widawsky
2011-04-09 20:26 ` (no subject) Ben Widawsky
2011-04-07 21:32 Jesse Barnes
2010-11-09 9:17 [PATCH] drm/i915/ringbuffer: set force wake bit before reading ring register Zou Nan hai
2010-11-09 9:17 ` Zou, Nanhai
2010-11-09 10:50 ` Chris Wilson
2010-11-10 0:36 ` Zou, Nanhai
2010-11-10 7:54 ` Chris Wilson
2010-11-10 18:47 ` Jesse Barnes
2010-11-17 22:52 ` (no subject) Thantry, Hariharan L
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).