dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/mm: expand rb_hole_addr augmented callbacks
@ 2020-05-19  8:44 Nirmoy Das
  2020-05-19  8:44 ` [PATCH 2/2] drm/mm: improve rb_hole_addr rbtree search Nirmoy Das
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Nirmoy Das @ 2020-05-19  8:44 UTC (permalink / raw)
  To: dri-devel; +Cc: nirmoy.aiemd, Nirmoy Das, christian.koenig, chris

Expand RB_DECLARE_CALLBACKS_MAX so that it is possible to store
extra value(max hole alignment) in the rb_hole_addr augmented rbtree.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/drm_mm.c | 72 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index f4ca1ff80af9..91e90c635e05 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -241,9 +241,74 @@ static void insert_hole_size(struct rb_root_cached *root,
 	rb_insert_color_cached(&node->rb_hole_size, root, first);
 }

-RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks,
-			 struct drm_mm_node, rb_hole_addr,
-			 u64, subtree_max_hole, HOLE_SIZE)
+static inline bool
+augment_callbacks_compute_max_hole(struct drm_mm_node *node, bool exit)
+{
+	struct drm_mm_node *child;
+	u64 max = HOLE_SIZE(node);
+
+	if (node->rb_hole_addr.rb_left) {
+		child = rb_entry(node->rb_hole_addr.rb_left, struct drm_mm_node,
+				 rb_hole_addr);
+		if (child->subtree_max_hole > max)
+			max = child->subtree_max_hole;
+	}
+
+	if (node->rb_hole_addr.rb_right) {
+		child = rb_entry(node->rb_hole_addr.rb_right,
+				 struct drm_mm_node, rb_hole_addr);
+		if (child->subtree_max_hole > max)
+			max = child->subtree_max_hole;
+	}
+
+	if (exit && node->subtree_max_hole == max)
+		return true;
+
+	node->subtree_max_hole = max;
+	return false;
+}
+
+static inline void
+augment_callbacks_propagate(struct rb_node *rb, struct rb_node *stop)
+{
+	while (rb != stop) {
+		struct drm_mm_node *node = rb_entry(rb,  struct drm_mm_node,
+						    rb_hole_addr);
+		if (augment_callbacks_compute_max_hole(node, true))
+			break;
+
+		rb = rb_parent(&node->rb_hole_addr);
+	}
+}
+
+static inline void
+augment_callbacks_copy(struct rb_node *rb_old, struct rb_node *rb_new)
+{
+	struct drm_mm_node *old = rb_entry(rb_old, struct drm_mm_node,
+					   rb_hole_addr);
+	struct drm_mm_node *new = rb_entry(rb_new, struct drm_mm_node,
+					   rb_hole_addr);
+
+	new->subtree_max_hole = old->subtree_max_hole;
+}
+
+static void
+augment_callbacks_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
+{
+	struct drm_mm_node *old = rb_entry(rb_old, struct drm_mm_node,
+					   rb_hole_addr);
+	struct drm_mm_node *new = rb_entry(rb_new, struct drm_mm_node,
+					   rb_hole_addr);
+
+	new->subtree_max_hole = old->subtree_max_hole;
+	augment_callbacks_compute_max_hole(old, false);
+}
+
+static const struct rb_augment_callbacks augment_callbacks = {
+	.propagate = augment_callbacks_propagate,
+	.copy = augment_callbacks_copy,
+	.rotate = augment_callbacks_rotate
+};

 static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node)
 {
@@ -256,6 +321,7 @@ static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node)
 		parent = rb_entry(rb_parent, struct drm_mm_node, rb_hole_addr);
 		if (parent->subtree_max_hole < subtree_max_hole)
 			parent->subtree_max_hole = subtree_max_hole;
+
 		if (start < HOLE_ADDR(parent))
 			link = &parent->rb_hole_addr.rb_left;
 		else
--
2.26.2

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] drm/mm: improve rb_hole_addr rbtree search
  2020-05-19  8:44 [PATCH 1/2] drm/mm: expand rb_hole_addr augmented callbacks Nirmoy Das
@ 2020-05-19  8:44 ` Nirmoy Das
  2020-05-19 21:55   ` Chris Wilson
  2020-05-19 22:56   ` Chris Wilson
  2020-05-19 22:40 ` [PATCH 1/2] drm/mm: expand rb_hole_addr augmented callbacks Chris Wilson
  2020-05-20  7:00 ` Christian König
  2 siblings, 2 replies; 9+ messages in thread
From: Nirmoy Das @ 2020-05-19  8:44 UTC (permalink / raw)
  To: dri-devel; +Cc: nirmoy.aiemd, Nirmoy Das, christian.koenig, chris

Userspace can still abuse alignment while allocating buffer object
to slow down rb_hole_addr rbtree search. This patch improves search
in fragmented rb_hole_addr rbtree by storing maximum subtree hole
alignment and use that to skip a complete subtree if that subtree
can not fit a (size + alignment) request.

With this patch applied, 50k bo allocs of size 4k and alignment 9k
took ~0.24 sec on amdgpu, compared to  27 sec without it.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/drm_mm.c | 66 ++++++++++++++++++++++++++++++++++------
 include/drm/drm_mm.h     |  1 +
 2 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 91e90c635e05..1af0a211b660 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -212,8 +212,11 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
 				   &drm_mm_interval_tree_augment);
 }
 
+#define DRM_MM_ALIGN_SHIFT 6
 #define HOLE_SIZE(NODE) ((NODE)->hole_size)
 #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
+#define HOLE_SIZE_ALIGN(NODE) ((NODE->hole_size << DRM_MM_ALIGN_SHIFT) | \
+			       ffs(HOLE_ADDR(NODE)))
 
 static u64 rb_to_hole_size(struct rb_node *rb)
 {
@@ -241,6 +244,33 @@ static void insert_hole_size(struct rb_root_cached *root,
 	rb_insert_color_cached(&node->rb_hole_size, root, first);
 }
 
+static inline bool
+augment_callbacks_compute_max_hole_align(struct drm_mm_node *node, bool exit)
+{
+	struct drm_mm_node *child;
+	u64 max = HOLE_SIZE_ALIGN(node);
+
+	if (node->rb_hole_addr.rb_left) {
+		child = rb_entry(node->rb_hole_addr.rb_left, struct drm_mm_node,
+				 rb_hole_addr);
+		if (child->subtree_max_hole_align > max)
+			max = child->subtree_max_hole_align;
+	}
+
+	if (node->rb_hole_addr.rb_right) {
+		child = rb_entry(node->rb_hole_addr.rb_right,
+				 struct drm_mm_node, rb_hole_addr);
+		if (child->subtree_max_hole_align > max)
+			max = child->subtree_max_hole_align;
+	}
+
+	if (exit && node->subtree_max_hole_align == max)
+		return true;
+
+	node->subtree_max_hole_align = max;
+	return false;
+}
+
 static inline bool
 augment_callbacks_compute_max_hole(struct drm_mm_node *node, bool exit)
 {
@@ -271,10 +301,14 @@ augment_callbacks_compute_max_hole(struct drm_mm_node *node, bool exit)
 static inline void
 augment_callbacks_propagate(struct rb_node *rb, struct rb_node *stop)
 {
+	bool compute_max_hole, compute_max_hole_align;
+
 	while (rb != stop) {
 		struct drm_mm_node *node = rb_entry(rb,  struct drm_mm_node,
 						    rb_hole_addr);
-		if (augment_callbacks_compute_max_hole(node, true))
+		compute_max_hole = augment_callbacks_compute_max_hole(node, true);
+		compute_max_hole_align = augment_callbacks_compute_max_hole_align(node, true);
+		if (compute_max_hole && compute_max_hole_align)
 			break;
 
 		rb = rb_parent(&node->rb_hole_addr);
@@ -290,6 +324,7 @@ augment_callbacks_copy(struct rb_node *rb_old, struct rb_node *rb_new)
 					   rb_hole_addr);
 
 	new->subtree_max_hole = old->subtree_max_hole;
+	new->subtree_max_hole_align = old->subtree_max_hole_align;
 }
 
 static void
@@ -301,7 +336,9 @@ augment_callbacks_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
 					   rb_hole_addr);
 
 	new->subtree_max_hole = old->subtree_max_hole;
+	new->subtree_max_hole_align = old->subtree_max_hole_align;
 	augment_callbacks_compute_max_hole(old, false);
+	augment_callbacks_compute_max_hole_align(old, false);
 }
 
 static const struct rb_augment_callbacks augment_callbacks = {
@@ -313,7 +350,9 @@ static const struct rb_augment_callbacks augment_callbacks = {
 static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node)
 {
 	struct rb_node **link = &root->rb_node, *rb_parent = NULL;
-	u64 start = HOLE_ADDR(node), subtree_max_hole = node->subtree_max_hole;
+	u64 start = HOLE_ADDR(node);
+	u64 subtree_max_hole = node->subtree_max_hole;
+	u64 subtree_max_hole_align = node->subtree_max_hole_align;
 	struct drm_mm_node *parent;
 
 	while (*link) {
@@ -322,6 +361,9 @@ static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node)
 		if (parent->subtree_max_hole < subtree_max_hole)
 			parent->subtree_max_hole = subtree_max_hole;
 
+		if (parent->subtree_max_hole_align < subtree_max_hole_align)
+			parent->subtree_max_hole_align = subtree_max_hole_align;
+
 		if (start < HOLE_ADDR(parent))
 			link = &parent->rb_hole_addr.rb_left;
 		else
@@ -339,6 +381,7 @@ static void add_hole(struct drm_mm_node *node)
 	node->hole_size =
 		__drm_mm_hole_node_end(node) - __drm_mm_hole_node_start(node);
 	node->subtree_max_hole = node->hole_size;
+	node->subtree_max_hole_align = HOLE_SIZE_ALIGN(node);
 	DRM_MM_BUG_ON(!drm_mm_hole_follows(node));
 
 	insert_hole_size(&mm->holes_size, node);
@@ -355,8 +398,10 @@ static void rm_hole(struct drm_mm_node *node)
 	rb_erase_cached(&node->rb_hole_size, &node->mm->holes_size);
 	rb_erase_augmented(&node->rb_hole_addr, &node->mm->holes_addr,
 			   &augment_callbacks);
+
 	node->hole_size = 0;
 	node->subtree_max_hole = 0;
+	node->subtree_max_hole_align = 0;
 
 	DRM_MM_BUG_ON(drm_mm_hole_follows(node));
 }
@@ -458,10 +503,11 @@ first_hole(struct drm_mm *mm,
  * else return parent of @entry
  */
 static struct drm_mm_node *
-next_hole_high_addr(struct drm_mm_node *entry, u64 size)
+next_hole_high_addr(struct drm_mm_node *entry, u64 size, u64 alignment)
 {
 	struct rb_node *rb_node, *left_rb_node, *parent_rb_node;
 	struct drm_mm_node *left_node;
+	u64 req_align = (size + alignment) << DRM_MM_ALIGN_SHIFT;
 
 	if (!entry)
 		return NULL;
@@ -473,6 +519,7 @@ next_hole_high_addr(struct drm_mm_node *entry, u64 size)
 		left_node = rb_entry(left_rb_node,
 				     struct drm_mm_node, rb_hole_addr);
 		if ((left_node->subtree_max_hole < size ||
+		     left_node->subtree_max_hole_align < req_align ||
 		     entry->size == entry->subtree_max_hole) &&
 		    parent_rb_node && parent_rb_node->rb_left != rb_node)
 			return rb_hole_addr_to_node(parent_rb_node);
@@ -498,10 +545,11 @@ next_hole_high_addr(struct drm_mm_node *entry, u64 size)
  * else return parent of @entry
  */
 static struct drm_mm_node *
-next_hole_low_addr(struct drm_mm_node *entry, u64 size)
+next_hole_low_addr(struct drm_mm_node *entry, u64 size, u64 alignment)
 {
 	struct rb_node *rb_node, *right_rb_node, *parent_rb_node;
 	struct drm_mm_node *right_node;
+	u64 req_align = (size + alignment) << DRM_MM_ALIGN_SHIFT;
 
 	if (!entry)
 		return NULL;
@@ -513,6 +561,7 @@ next_hole_low_addr(struct drm_mm_node *entry, u64 size)
 		right_node = rb_entry(right_rb_node,
 				      struct drm_mm_node, rb_hole_addr);
 		if ((right_node->subtree_max_hole < size ||
+		     right_node->subtree_max_hole_align < req_align ||
 		     entry->size == entry->subtree_max_hole) &&
 		    parent_rb_node && parent_rb_node->rb_right != rb_node)
 			return rb_hole_addr_to_node(parent_rb_node);
@@ -524,7 +573,7 @@ next_hole_low_addr(struct drm_mm_node *entry, u64 size)
 static struct drm_mm_node *
 next_hole(struct drm_mm *mm,
 	  struct drm_mm_node *node,
-	  u64 size,
+	  u64 size, u64 alignment,
 	  enum drm_mm_insert_mode mode)
 {
 	switch (mode) {
@@ -533,10 +582,10 @@ next_hole(struct drm_mm *mm,
 		return rb_hole_size_to_node(rb_prev(&node->rb_hole_size));
 
 	case DRM_MM_INSERT_LOW:
-		return next_hole_low_addr(node, size);
+		return next_hole_low_addr(node, size, alignment);
 
 	case DRM_MM_INSERT_HIGH:
-		return next_hole_high_addr(node, size);
+		return next_hole_high_addr(node, size, alignment);
 
 	case DRM_MM_INSERT_EVICT:
 		node = list_next_entry(node, hole_stack);
@@ -650,7 +699,7 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 	remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
 	for (hole = first_hole(mm, range_start, range_end, size, mode);
 	     hole;
-	     hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+	     hole = once ? NULL : next_hole(mm, hole, size, alignment, mode)) {
 		u64 hole_start = __drm_mm_hole_node_start(hole);
 		u64 hole_end = hole_start + hole->hole_size;
 		u64 adj_start, adj_end;
@@ -713,7 +762,6 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 			add_hole(hole);
 		if (adj_start + size < hole_end)
 			add_hole(node);
-
 		save_stack(node);
 		return 0;
 	}
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index a01bc6fac83c..c280b535d9a9 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -169,6 +169,7 @@ struct drm_mm_node {
 	u64 __subtree_last;
 	u64 hole_size;
 	u64 subtree_max_hole;
+	u64 subtree_max_hole_align;
 	unsigned long flags;
 #define DRM_MM_NODE_ALLOCATED_BIT	0
 #define DRM_MM_NODE_SCANNED_BIT		1
-- 
2.26.2

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/mm: improve rb_hole_addr rbtree search
  2020-05-19  8:44 ` [PATCH 2/2] drm/mm: improve rb_hole_addr rbtree search Nirmoy Das
@ 2020-05-19 21:55   ` Chris Wilson
  2020-05-19 22:56   ` Chris Wilson
  1 sibling, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2020-05-19 21:55 UTC (permalink / raw)
  To: Nirmoy Das, dri-devel; +Cc: Nirmoy Das, christian.koenig, nirmoy.aiemd

Quoting Nirmoy Das (2020-05-19 09:44:36)
> Userspace can still abuse alignment while allocating buffer object
> to slow down rb_hole_addr rbtree search. This patch improves search
> in fragmented rb_hole_addr rbtree by storing maximum subtree hole
> alignment and use that to skip a complete subtree if that subtree
> can not fit a (size + alignment) request.
> 
> With this patch applied, 50k bo allocs of size 4k and alignment 9k
> took ~0.24 sec on amdgpu, compared to  27 sec without it.
> 
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
>  drivers/gpu/drm/drm_mm.c | 66 ++++++++++++++++++++++++++++++++++------
>  include/drm/drm_mm.h     |  1 +
>  2 files changed, 58 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> index 91e90c635e05..1af0a211b660 100644
> --- a/drivers/gpu/drm/drm_mm.c
> +++ b/drivers/gpu/drm/drm_mm.c
> @@ -212,8 +212,11 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
>                                    &drm_mm_interval_tree_augment);
>  }
>  
> +#define DRM_MM_ALIGN_SHIFT 6
>  #define HOLE_SIZE(NODE) ((NODE)->hole_size)
>  #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
> +#define HOLE_SIZE_ALIGN(NODE) ((NODE->hole_size << DRM_MM_ALIGN_SHIFT) | \
> +                              ffs(HOLE_ADDR(NODE)))
>  
>  static u64 rb_to_hole_size(struct rb_node *rb)
>  {
> @@ -241,6 +244,33 @@ static void insert_hole_size(struct rb_root_cached *root,
>         rb_insert_color_cached(&node->rb_hole_size, root, first);
>  }
>  
> +static inline bool
> +augment_callbacks_compute_max_hole_align(struct drm_mm_node *node, bool exit)
> +{
> +       struct drm_mm_node *child;
> +       u64 max = HOLE_SIZE_ALIGN(node);
> +
> +       if (node->rb_hole_addr.rb_left) {
> +               child = rb_entry(node->rb_hole_addr.rb_left, struct drm_mm_node,
> +                                rb_hole_addr);
> +               if (child->subtree_max_hole_align > max)
> +                       max = child->subtree_max_hole_align;
> +       }
> +
> +       if (node->rb_hole_addr.rb_right) {
> +               child = rb_entry(node->rb_hole_addr.rb_right,
> +                                struct drm_mm_node, rb_hole_addr);
> +               if (child->subtree_max_hole_align > max)
> +                       max = child->subtree_max_hole_align;
> +       }
> +
> +       if (exit && node->subtree_max_hole_align == max)
> +               return true;
> +
> +       node->subtree_max_hole_align = max;
> +       return false;
> +}
> +
>  static inline bool
>  augment_callbacks_compute_max_hole(struct drm_mm_node *node, bool exit)
>  {
> @@ -271,10 +301,14 @@ augment_callbacks_compute_max_hole(struct drm_mm_node *node, bool exit)
>  static inline void
>  augment_callbacks_propagate(struct rb_node *rb, struct rb_node *stop)
>  {
> +       bool compute_max_hole, compute_max_hole_align;
> +
>         while (rb != stop) {
>                 struct drm_mm_node *node = rb_entry(rb,  struct drm_mm_node,
>                                                     rb_hole_addr);
> -               if (augment_callbacks_compute_max_hole(node, true))
> +               compute_max_hole = augment_callbacks_compute_max_hole(node, true);
> +               compute_max_hole_align = augment_callbacks_compute_max_hole_align(node, true);
> +               if (compute_max_hole && compute_max_hole_align)
>                         break;
>  
>                 rb = rb_parent(&node->rb_hole_addr);
> @@ -290,6 +324,7 @@ augment_callbacks_copy(struct rb_node *rb_old, struct rb_node *rb_new)
>                                            rb_hole_addr);
>  
>         new->subtree_max_hole = old->subtree_max_hole;
> +       new->subtree_max_hole_align = old->subtree_max_hole_align;
>  }
>  
>  static void
> @@ -301,7 +336,9 @@ augment_callbacks_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
>                                            rb_hole_addr);
>  
>         new->subtree_max_hole = old->subtree_max_hole;
> +       new->subtree_max_hole_align = old->subtree_max_hole_align;
>         augment_callbacks_compute_max_hole(old, false);
> +       augment_callbacks_compute_max_hole_align(old, false);
>  }
>  
>  static const struct rb_augment_callbacks augment_callbacks = {
> @@ -313,7 +350,9 @@ static const struct rb_augment_callbacks augment_callbacks = {
>  static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node)
>  {
>         struct rb_node **link = &root->rb_node, *rb_parent = NULL;
> -       u64 start = HOLE_ADDR(node), subtree_max_hole = node->subtree_max_hole;
> +       u64 start = HOLE_ADDR(node);
> +       u64 subtree_max_hole = node->subtree_max_hole;
> +       u64 subtree_max_hole_align = node->subtree_max_hole_align;
>         struct drm_mm_node *parent;
>  
>         while (*link) {
> @@ -322,6 +361,9 @@ static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node)
>                 if (parent->subtree_max_hole < subtree_max_hole)
>                         parent->subtree_max_hole = subtree_max_hole;
>  
> +               if (parent->subtree_max_hole_align < subtree_max_hole_align)
> +                       parent->subtree_max_hole_align = subtree_max_hole_align;
> +
>                 if (start < HOLE_ADDR(parent))
>                         link = &parent->rb_hole_addr.rb_left;
>                 else
> @@ -339,6 +381,7 @@ static void add_hole(struct drm_mm_node *node)
>         node->hole_size =
>                 __drm_mm_hole_node_end(node) - __drm_mm_hole_node_start(node);
>         node->subtree_max_hole = node->hole_size;
> +       node->subtree_max_hole_align = HOLE_SIZE_ALIGN(node);
>         DRM_MM_BUG_ON(!drm_mm_hole_follows(node));
>  
>         insert_hole_size(&mm->holes_size, node);
> @@ -355,8 +398,10 @@ static void rm_hole(struct drm_mm_node *node)
>         rb_erase_cached(&node->rb_hole_size, &node->mm->holes_size);
>         rb_erase_augmented(&node->rb_hole_addr, &node->mm->holes_addr,
>                            &augment_callbacks);
> +
>         node->hole_size = 0;
>         node->subtree_max_hole = 0;
> +       node->subtree_max_hole_align = 0;
>  
>         DRM_MM_BUG_ON(drm_mm_hole_follows(node));
>  }
> @@ -458,10 +503,11 @@ first_hole(struct drm_mm *mm,
>   * else return parent of @entry
>   */
>  static struct drm_mm_node *
> -next_hole_high_addr(struct drm_mm_node *entry, u64 size)
> +next_hole_high_addr(struct drm_mm_node *entry, u64 size, u64 alignment)
>  {
>         struct rb_node *rb_node, *left_rb_node, *parent_rb_node;
>         struct drm_mm_node *left_node;
> +       u64 req_align = (size + alignment) << DRM_MM_ALIGN_SHIFT;
>  
>         if (!entry)
>                 return NULL;
> @@ -473,6 +519,7 @@ next_hole_high_addr(struct drm_mm_node *entry, u64 size)
>                 left_node = rb_entry(left_rb_node,
>                                      struct drm_mm_node, rb_hole_addr);
>                 if ((left_node->subtree_max_hole < size ||
> +                    left_node->subtree_max_hole_align < req_align ||
>                      entry->size == entry->subtree_max_hole) &&
>                     parent_rb_node && parent_rb_node->rb_left != rb_node)
>                         return rb_hole_addr_to_node(parent_rb_node);
> @@ -498,10 +545,11 @@ next_hole_high_addr(struct drm_mm_node *entry, u64 size)
>   * else return parent of @entry
>   */
>  static struct drm_mm_node *
> -next_hole_low_addr(struct drm_mm_node *entry, u64 size)
> +next_hole_low_addr(struct drm_mm_node *entry, u64 size, u64 alignment)
>  {
>         struct rb_node *rb_node, *right_rb_node, *parent_rb_node;
>         struct drm_mm_node *right_node;
> +       u64 req_align = (size + alignment) << DRM_MM_ALIGN_SHIFT;
>  
>         if (!entry)
>                 return NULL;
> @@ -513,6 +561,7 @@ next_hole_low_addr(struct drm_mm_node *entry, u64 size)
>                 right_node = rb_entry(right_rb_node,
>                                       struct drm_mm_node, rb_hole_addr);
>                 if ((right_node->subtree_max_hole < size ||
> +                    right_node->subtree_max_hole_align < req_align ||
>                      entry->size == entry->subtree_max_hole) &&
>                     parent_rb_node && parent_rb_node->rb_right != rb_node)
>                         return rb_hole_addr_to_node(parent_rb_node);
> @@ -524,7 +573,7 @@ next_hole_low_addr(struct drm_mm_node *entry, u64 size)
>  static struct drm_mm_node *
>  next_hole(struct drm_mm *mm,
>           struct drm_mm_node *node,
> -         u64 size,
> +         u64 size, u64 alignment,
>           enum drm_mm_insert_mode mode)
>  {
>         switch (mode) {
> @@ -533,10 +582,10 @@ next_hole(struct drm_mm *mm,
>                 return rb_hole_size_to_node(rb_prev(&node->rb_hole_size));
>  
>         case DRM_MM_INSERT_LOW:
> -               return next_hole_low_addr(node, size);
> +               return next_hole_low_addr(node, size, alignment);
>  
>         case DRM_MM_INSERT_HIGH:
> -               return next_hole_high_addr(node, size);
> +               return next_hole_high_addr(node, size, alignment);

Our test case is of course hitting DRM_MM_INSERT_BEST so shows no
benefit. Though I do have an open-coded search to avoid the O(N^2)
behaviour that just ascends the size rbtree until it is happy (rather
than looking at all nodes within a size).

Searching top-down/bottom-up does have its merits though, so if with
this rbtree it's always quicker than best...
-Chris
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/mm: expand rb_hole_addr augmented callbacks
  2020-05-19  8:44 [PATCH 1/2] drm/mm: expand rb_hole_addr augmented callbacks Nirmoy Das
  2020-05-19  8:44 ` [PATCH 2/2] drm/mm: improve rb_hole_addr rbtree search Nirmoy Das
@ 2020-05-19 22:40 ` Chris Wilson
  2020-05-20  7:00 ` Christian König
  2 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2020-05-19 22:40 UTC (permalink / raw)
  To: Nirmoy Das, dri-devel; +Cc: Nirmoy Das, christian.koenig, nirmoy.aiemd

Quoting Nirmoy Das (2020-05-19 09:44:35)
> Expand RB_DECLARE_CALLBACKS_MAX so that it is possible to store
> extra value(max hole alignment) in the rb_hole_addr augmented rbtree.
> 
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
>  drivers/gpu/drm/drm_mm.c | 72 ++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 69 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> index f4ca1ff80af9..91e90c635e05 100644
> --- a/drivers/gpu/drm/drm_mm.c
> +++ b/drivers/gpu/drm/drm_mm.c
> @@ -241,9 +241,74 @@ static void insert_hole_size(struct rb_root_cached *root,
>         rb_insert_color_cached(&node->rb_hole_size, root, first);
>  }
> 
> -RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks,
> -                        struct drm_mm_node, rb_hole_addr,
> -                        u64, subtree_max_hole, HOLE_SIZE)
> +static inline bool
> +augment_callbacks_compute_max_hole(struct drm_mm_node *node, bool exit)
> +{
> +       struct drm_mm_node *child;
> +       u64 max = HOLE_SIZE(node);
> +
> +       if (node->rb_hole_addr.rb_left) {
> +               child = rb_entry(node->rb_hole_addr.rb_left, struct drm_mm_node,
> +                                rb_hole_addr);

We repeat rb_entry a lot. There's rb_hole_addr_to_node() which is
slightly more concise, but uses the _safe variant. Though you could

	child = rb_hole_addr_to_node(node->rb_hole_addr.rb_left);
	if (child && child->subtree_max_hole > max)
		max = child->subtree_max_hole;

which the compiler should generate equivalent code for.

But do have a think of a suitable name for a helper to tidy up all the
rb_entries that are overflowing 80cols.

> +               if (child->subtree_max_hole > max)
> +                       max = child->subtree_max_hole;
> +       }
> +
> +       if (node->rb_hole_addr.rb_right) {
> +               child = rb_entry(node->rb_hole_addr.rb_right,
> +                                struct drm_mm_node, rb_hole_addr);
> +               if (child->subtree_max_hole > max)
> +                       max = child->subtree_max_hole;
> +       }
> +
> +       if (exit && node->subtree_max_hole == max)
> +               return true;
> +
> +       node->subtree_max_hole = max;
> +       return false;
> +}
> +
> +static inline void
> +augment_callbacks_propagate(struct rb_node *rb, struct rb_node *stop)
> +{
> +       while (rb != stop) {
> +               struct drm_mm_node *node = rb_entry(rb,  struct drm_mm_node,
> +                                                   rb_hole_addr);

Coding style / checkpatch will insist on a newline here after the vars.


> +               if (augment_callbacks_compute_max_hole(node, true))
> +                       break;
> +
> +               rb = rb_parent(&node->rb_hole_addr);
> +       }
> +}
> +
> +static inline void
> +augment_callbacks_copy(struct rb_node *rb_old, struct rb_node *rb_new)
> +{
> +       struct drm_mm_node *old = rb_entry(rb_old, struct drm_mm_node,
> +                                          rb_hole_addr);
> +       struct drm_mm_node *new = rb_entry(rb_new, struct drm_mm_node,
> +                                          rb_hole_addr);
> +
> +       new->subtree_max_hole = old->subtree_max_hole;
> +}
> +
> +static void
> +augment_callbacks_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
> +{
> +       struct drm_mm_node *old = rb_entry(rb_old, struct drm_mm_node,
> +                                          rb_hole_addr);
> +       struct drm_mm_node *new = rb_entry(rb_new, struct drm_mm_node,
> +                                          rb_hole_addr);
> +
> +       new->subtree_max_hole = old->subtree_max_hole;
> +       augment_callbacks_compute_max_hole(old, false);
> +}
> +
> +static const struct rb_augment_callbacks augment_callbacks = {
> +       .propagate = augment_callbacks_propagate,
> +       .copy = augment_callbacks_copy,
> +       .rotate = augment_callbacks_rotate
> +};

Faithful expansion of the macros,
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/mm: improve rb_hole_addr rbtree search
  2020-05-19  8:44 ` [PATCH 2/2] drm/mm: improve rb_hole_addr rbtree search Nirmoy Das
  2020-05-19 21:55   ` Chris Wilson
@ 2020-05-19 22:56   ` Chris Wilson
  2020-05-20 16:28     ` Nirmoy
  1 sibling, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2020-05-19 22:56 UTC (permalink / raw)
  To: Nirmoy Das, dri-devel; +Cc: Nirmoy Das, christian.koenig, nirmoy.aiemd

Quoting Nirmoy Das (2020-05-19 09:44:36)
> +#define DRM_MM_ALIGN_SHIFT 6
>  #define HOLE_SIZE(NODE) ((NODE)->hole_size)
>  #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
> +#define HOLE_SIZE_ALIGN(NODE) ((NODE->hole_size << DRM_MM_ALIGN_SHIFT) | \
> +                              ffs(HOLE_ADDR(NODE)))

Fwiw, max hole size of 58b, we would need to stop storing byte
extents...

>  static struct drm_mm_node *
> -next_hole_low_addr(struct drm_mm_node *entry, u64 size)
> +next_hole_low_addr(struct drm_mm_node *entry, u64 size, u64 alignment)
>  {
>         struct rb_node *rb_node, *right_rb_node, *parent_rb_node;
>         struct drm_mm_node *right_node;
> +       u64 req_align = (size + alignment) << DRM_MM_ALIGN_SHIFT;
>  
>         if (!entry)
>                 return NULL;
> @@ -513,6 +561,7 @@ next_hole_low_addr(struct drm_mm_node *entry, u64 size)
>                 right_node = rb_entry(right_rb_node,
>                                       struct drm_mm_node, rb_hole_addr);
>                 if ((right_node->subtree_max_hole < size ||
> +                    right_node->subtree_max_hole_align < req_align ||

What was the point in storing the packed alignment if we are just
searching for a hole big enough for (size + alignment)?
-Chris
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/mm: expand rb_hole_addr augmented callbacks
  2020-05-19  8:44 [PATCH 1/2] drm/mm: expand rb_hole_addr augmented callbacks Nirmoy Das
  2020-05-19  8:44 ` [PATCH 2/2] drm/mm: improve rb_hole_addr rbtree search Nirmoy Das
  2020-05-19 22:40 ` [PATCH 1/2] drm/mm: expand rb_hole_addr augmented callbacks Chris Wilson
@ 2020-05-20  7:00 ` Christian König
  2 siblings, 0 replies; 9+ messages in thread
From: Christian König @ 2020-05-20  7:00 UTC (permalink / raw)
  To: Nirmoy Das, dri-devel; +Cc: Nirmoy Das, chris

Am 19.05.20 um 10:44 schrieb Nirmoy Das:
> Expand RB_DECLARE_CALLBACKS_MAX so that it is possible to store
> extra value(max hole alignment) in the rb_hole_addr augmented rbtree.

This looks to complicated to me, why not just extend the HOLE_SIZE 
definition to come up with a combined size/alignment value?

Christian.

>
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
>   drivers/gpu/drm/drm_mm.c | 72 ++++++++++++++++++++++++++++++++++++++--
>   1 file changed, 69 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> index f4ca1ff80af9..91e90c635e05 100644
> --- a/drivers/gpu/drm/drm_mm.c
> +++ b/drivers/gpu/drm/drm_mm.c
> @@ -241,9 +241,74 @@ static void insert_hole_size(struct rb_root_cached *root,
>   	rb_insert_color_cached(&node->rb_hole_size, root, first);
>   }
>
> -RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks,
> -			 struct drm_mm_node, rb_hole_addr,
> -			 u64, subtree_max_hole, HOLE_SIZE)
> +static inline bool
> +augment_callbacks_compute_max_hole(struct drm_mm_node *node, bool exit)
> +{
> +	struct drm_mm_node *child;
> +	u64 max = HOLE_SIZE(node);
> +
> +	if (node->rb_hole_addr.rb_left) {
> +		child = rb_entry(node->rb_hole_addr.rb_left, struct drm_mm_node,
> +				 rb_hole_addr);
> +		if (child->subtree_max_hole > max)
> +			max = child->subtree_max_hole;
> +	}
> +
> +	if (node->rb_hole_addr.rb_right) {
> +		child = rb_entry(node->rb_hole_addr.rb_right,
> +				 struct drm_mm_node, rb_hole_addr);
> +		if (child->subtree_max_hole > max)
> +			max = child->subtree_max_hole;
> +	}
> +
> +	if (exit && node->subtree_max_hole == max)
> +		return true;
> +
> +	node->subtree_max_hole = max;
> +	return false;
> +}
> +
> +static inline void
> +augment_callbacks_propagate(struct rb_node *rb, struct rb_node *stop)
> +{
> +	while (rb != stop) {
> +		struct drm_mm_node *node = rb_entry(rb,  struct drm_mm_node,
> +						    rb_hole_addr);
> +		if (augment_callbacks_compute_max_hole(node, true))
> +			break;
> +
> +		rb = rb_parent(&node->rb_hole_addr);
> +	}
> +}
> +
> +static inline void
> +augment_callbacks_copy(struct rb_node *rb_old, struct rb_node *rb_new)
> +{
> +	struct drm_mm_node *old = rb_entry(rb_old, struct drm_mm_node,
> +					   rb_hole_addr);
> +	struct drm_mm_node *new = rb_entry(rb_new, struct drm_mm_node,
> +					   rb_hole_addr);
> +
> +	new->subtree_max_hole = old->subtree_max_hole;
> +}
> +
> +static void
> +augment_callbacks_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
> +{
> +	struct drm_mm_node *old = rb_entry(rb_old, struct drm_mm_node,
> +					   rb_hole_addr);
> +	struct drm_mm_node *new = rb_entry(rb_new, struct drm_mm_node,
> +					   rb_hole_addr);
> +
> +	new->subtree_max_hole = old->subtree_max_hole;
> +	augment_callbacks_compute_max_hole(old, false);
> +}
> +
> +static const struct rb_augment_callbacks augment_callbacks = {
> +	.propagate = augment_callbacks_propagate,
> +	.copy = augment_callbacks_copy,
> +	.rotate = augment_callbacks_rotate
> +};
>
>   static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node)
>   {
> @@ -256,6 +321,7 @@ static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node)
>   		parent = rb_entry(rb_parent, struct drm_mm_node, rb_hole_addr);
>   		if (parent->subtree_max_hole < subtree_max_hole)
>   			parent->subtree_max_hole = subtree_max_hole;
> +
>   		if (start < HOLE_ADDR(parent))
>   			link = &parent->rb_hole_addr.rb_left;
>   		else
> --
> 2.26.2
>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/mm: improve rb_hole_addr rbtree search
  2020-05-19 22:56   ` Chris Wilson
@ 2020-05-20 16:28     ` Nirmoy
  2020-05-20 16:35       ` Chris Wilson
  0 siblings, 1 reply; 9+ messages in thread
From: Nirmoy @ 2020-05-20 16:28 UTC (permalink / raw)
  To: Chris Wilson; +Cc: dri-devel

Hi Chris,

On 5/20/20 12:56 AM, Chris Wilson wrote:
> Quoting Nirmoy Das (2020-05-19 09:44:36)
>> +#define DRM_MM_ALIGN_SHIFT 6
>>   #define HOLE_SIZE(NODE) ((NODE)->hole_size)
>>   #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
>> +#define HOLE_SIZE_ALIGN(NODE) ((NODE->hole_size << DRM_MM_ALIGN_SHIFT) | \
>> +                              ffs(HOLE_ADDR(NODE)))
> Fwiw, max hole size of 58b, we would need to stop storing byte
> extents...


Can you please explain 2nd part of this statement.


>>   static struct drm_mm_node *
>> -next_hole_low_addr(struct drm_mm_node *entry, u64 size)
>> +next_hole_low_addr(struct drm_mm_node *entry, u64 size, u64 alignment)
>>   {
>>          struct rb_node *rb_node, *right_rb_node, *parent_rb_node;
>>          struct drm_mm_node *right_node;
>> +       u64 req_align = (size + alignment) << DRM_MM_ALIGN_SHIFT;
>>   
>>          if (!entry)
>>                  return NULL;
>> @@ -513,6 +561,7 @@ next_hole_low_addr(struct drm_mm_node *entry, u64 size)
>>                  right_node = rb_entry(right_rb_node,
>>                                        struct drm_mm_node, rb_hole_addr);
>>                  if ((right_node->subtree_max_hole < size ||
>> +                    right_node->subtree_max_hole_align < req_align ||
> What was the point in storing the packed alignment if we are just
> searching for a hole big enough for (size + alignment)?

Yes, I realized this is not correct :/

Still thinking about a better solution to capture alignment into subtree 
elimination.


Regards,

Nirmoy

> -Chris
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=02%7C01%7Cnirmoy.das%40amd.com%7C1b1ab9c2ca03412daa2108d7fc47d26e%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637255257724473951&amp;sdata=gDRvdhwLV1M%2BKLCgpENik52gAB3O0ik1n%2B%2FaZxLgr%2Fk%3D&amp;reserved=0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/mm: improve rb_hole_addr rbtree search
  2020-05-20 16:28     ` Nirmoy
@ 2020-05-20 16:35       ` Chris Wilson
  2020-05-20 16:46         ` Nirmoy
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2020-05-20 16:35 UTC (permalink / raw)
  To: Nirmoy; +Cc: dri-devel

Quoting Nirmoy (2020-05-20 17:28:04)
> Hi Chris,
> 
> On 5/20/20 12:56 AM, Chris Wilson wrote:
> > Quoting Nirmoy Das (2020-05-19 09:44:36)
> >> +#define DRM_MM_ALIGN_SHIFT 6
> >>   #define HOLE_SIZE(NODE) ((NODE)->hole_size)
> >>   #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
> >> +#define HOLE_SIZE_ALIGN(NODE) ((NODE->hole_size << DRM_MM_ALIGN_SHIFT) | \
> >> +                              ffs(HOLE_ADDR(NODE)))
> > Fwiw, max hole size of 58b, we would need to stop storing byte
> > extents...
> 
> 
> Can you please explain 2nd part of this statement.

Currently we [i915] use drm_mm with byte-addressing, so 58b is a tad too
close to the amount we actually need to track. If we used page tracking
instead of bytes, we regain 12b to play around with. It makes no
difference to the code at the moment (e.g. we still could not fit within
u32) so there has been no pressure to rewrite the extents handling. But
given sufficient reason, we could.
-Chris
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/mm: improve rb_hole_addr rbtree search
  2020-05-20 16:35       ` Chris Wilson
@ 2020-05-20 16:46         ` Nirmoy
  0 siblings, 0 replies; 9+ messages in thread
From: Nirmoy @ 2020-05-20 16:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: dri-devel


On 5/20/20 6:35 PM, Chris Wilson wrote:
> Quoting Nirmoy (2020-05-20 17:28:04)
>> Hi Chris,
>>
>> On 5/20/20 12:56 AM, Chris Wilson wrote:
>>> Quoting Nirmoy Das (2020-05-19 09:44:36)
>>>> +#define DRM_MM_ALIGN_SHIFT 6
>>>>    #define HOLE_SIZE(NODE) ((NODE)->hole_size)
>>>>    #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
>>>> +#define HOLE_SIZE_ALIGN(NODE) ((NODE->hole_size << DRM_MM_ALIGN_SHIFT) | \
>>>> +                              ffs(HOLE_ADDR(NODE)))
>>> Fwiw, max hole size of 58b, we would need to stop storing byte
>>> extents...
>>
>> Can you please explain 2nd part of this statement.
> Currently we [i915] use drm_mm with byte-addressing, so 58b is a tad too
> close to the amount we actually need to track. If we used page tracking
> instead of bytes, we regain 12b to play around with. It makes no
> difference to the code at the moment (e.g. we still could not fit within
> u32) so there has been no pressure to rewrite the extents handling. But
> given sufficient reason, we could.
> -Chris


Thanks for the detailed explanation.


Nirmoy

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-05-20 16:46 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-19  8:44 [PATCH 1/2] drm/mm: expand rb_hole_addr augmented callbacks Nirmoy Das
2020-05-19  8:44 ` [PATCH 2/2] drm/mm: improve rb_hole_addr rbtree search Nirmoy Das
2020-05-19 21:55   ` Chris Wilson
2020-05-19 22:56   ` Chris Wilson
2020-05-20 16:28     ` Nirmoy
2020-05-20 16:35       ` Chris Wilson
2020-05-20 16:46         ` Nirmoy
2020-05-19 22:40 ` [PATCH 1/2] drm/mm: expand rb_hole_addr augmented callbacks Chris Wilson
2020-05-20  7:00 ` Christian König

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).