Commit 85d3a316c714197f94e75c1e5b2d37607d66e5de

Authored by Michel Lespinasse
Committed by Linus Torvalds
1 parent 6b2dbba8b6

kmemleak: use rbtree instead of prio tree

kmemleak uses a tree where each node represents an allocated memory object
in order to quickly find out what object a given address is part of.
However, the objects don't overlap, so rbtrees are a better choice than
prio tree for this use.  They are both faster and have lower memory
overhead.

Tested by booting a kernel with kmemleak enabled, loading the
kmemleak_test module, and looking for the expected messages.

Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 51 additions and 49 deletions Side-by-side Diff

... ... @@ -29,7 +29,7 @@
29 29 * - kmemleak_lock (rwlock): protects the object_list modifications and
30 30 * accesses to the object_tree_root. The object_list is the main list
31 31 * holding the metadata (struct kmemleak_object) for the allocated memory
32   - * blocks. The object_tree_root is a priority search tree used to look-up
  32 + * blocks. The object_tree_root is a red black tree used to look-up
33 33 * metadata based on a pointer to the corresponding memory block. The
34 34 * kmemleak_object structures are added to the object_list and
35 35 * object_tree_root in the create_object() function called from the
... ... @@ -71,7 +71,7 @@
71 71 #include <linux/delay.h>
72 72 #include <linux/export.h>
73 73 #include <linux/kthread.h>
74   -#include <linux/prio_tree.h>
  74 +#include <linux/rbtree.h>
75 75 #include <linux/fs.h>
76 76 #include <linux/debugfs.h>
77 77 #include <linux/seq_file.h>
... ... @@ -132,7 +132,7 @@
132 132 * Structure holding the metadata for each allocated memory block.
133 133 * Modifications to such objects should be made while holding the
134 134 * object->lock. Insertions or deletions from object_list, gray_list or
135   - * tree_node are already protected by the corresponding locks or mutex (see
  135 + * rb_node are already protected by the corresponding locks or mutex (see
136 136 * the notes on locking above). These objects are reference-counted
137 137 * (use_count) and freed using the RCU mechanism.
138 138 */
... ... @@ -141,7 +141,7 @@
141 141 unsigned long flags; /* object status flags */
142 142 struct list_head object_list;
143 143 struct list_head gray_list;
144   - struct prio_tree_node tree_node;
  144 + struct rb_node rb_node;
145 145 struct rcu_head rcu; /* object_list lockless traversal */
146 146 /* object usage count; object freed when use_count == 0 */
147 147 atomic_t use_count;
... ... @@ -182,9 +182,9 @@
182 182 static LIST_HEAD(object_list);
183 183 /* the list of gray-colored objects (see color_gray comment below) */
184 184 static LIST_HEAD(gray_list);
185   -/* prio search tree for object boundaries */
186   -static struct prio_tree_root object_tree_root;
187   -/* rw_lock protecting the access to object_list and prio_tree_root */
  185 +/* search tree for object boundaries */
  186 +static struct rb_root object_tree_root = RB_ROOT;
  187 +/* rw_lock protecting the access to object_list and object_tree_root */
188 188 static DEFINE_RWLOCK(kmemleak_lock);
189 189  
190 190 /* allocation caches for kmemleak internal data */
... ... @@ -380,7 +380,7 @@
380 380 trace.entries = object->trace;
381 381  
382 382 pr_notice("Object 0x%08lx (size %zu):\n",
383   - object->tree_node.start, object->size);
  383 + object->pointer, object->size);
384 384 pr_notice(" comm \"%s\", pid %d, jiffies %lu\n",
385 385 object->comm, object->pid, object->jiffies);
386 386 pr_notice(" min_count = %d\n", object->min_count);
387 387  
388 388  
389 389  
390 390  
... ... @@ -392,32 +392,32 @@
392 392 }
393 393  
394 394 /*
395   - * Look-up a memory block metadata (kmemleak_object) in the priority search
  395 + * Look-up a memory block metadata (kmemleak_object) in the object search
396 396 * tree based on a pointer value. If alias is 0, only values pointing to the
397 397 * beginning of the memory block are allowed. The kmemleak_lock must be held
398 398 * when calling this function.
399 399 */
400 400 static struct kmemleak_object *lookup_object(unsigned long ptr, int alias)
401 401 {
402   - struct prio_tree_node *node;
403   - struct prio_tree_iter iter;
404   - struct kmemleak_object *object;
  402 + struct rb_node *rb = object_tree_root.rb_node;
405 403  
406   - prio_tree_iter_init(&iter, &object_tree_root, ptr, ptr);
407   - node = prio_tree_next(&iter);
408   - if (node) {
409   - object = prio_tree_entry(node, struct kmemleak_object,
410   - tree_node);
411   - if (!alias && object->pointer != ptr) {
  404 + while (rb) {
  405 + struct kmemleak_object *object =
  406 + rb_entry(rb, struct kmemleak_object, rb_node);
  407 + if (ptr < object->pointer)
  408 + rb = object->rb_node.rb_left;
  409 + else if (object->pointer + object->size <= ptr)
  410 + rb = object->rb_node.rb_right;
  411 + else if (object->pointer == ptr || alias)
  412 + return object;
  413 + else {
412 414 kmemleak_warn("Found object by alias at 0x%08lx\n",
413 415 ptr);
414 416 dump_object_info(object);
415   - object = NULL;
  417 + break;
416 418 }
417   - } else
418   - object = NULL;
419   -
420   - return object;
  419 + }
  420 + return NULL;
421 421 }
422 422  
423 423 /*
... ... @@ -471,7 +471,7 @@
471 471 }
472 472  
473 473 /*
474   - * Look up an object in the prio search tree and increase its use_count.
  474 + * Look up an object in the object search tree and increase its use_count.
475 475 */
476 476 static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
477 477 {
... ... @@ -516,8 +516,8 @@
516 516 int min_count, gfp_t gfp)
517 517 {
518 518 unsigned long flags;
519   - struct kmemleak_object *object;
520   - struct prio_tree_node *node;
  519 + struct kmemleak_object *object, *parent;
  520 + struct rb_node **link, *rb_parent;
521 521  
522 522 object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
523 523 if (!object) {
524 524  
525 525  
... ... @@ -560,31 +560,34 @@
560 560 /* kernel backtrace */
561 561 object->trace_len = __save_stack_trace(object->trace);
562 562  
563   - INIT_PRIO_TREE_NODE(&object->tree_node);
564   - object->tree_node.start = ptr;
565   - object->tree_node.last = ptr + size - 1;
566   -
567 563 write_lock_irqsave(&kmemleak_lock, flags);
568 564  
569 565 min_addr = min(min_addr, ptr);
570 566 max_addr = max(max_addr, ptr + size);
571   - node = prio_tree_insert(&object_tree_root, &object->tree_node);
572   - /*
573   - * The code calling the kernel does not yet have the pointer to the
574   - * memory block to be able to free it. However, we still hold the
575   - * kmemleak_lock here in case parts of the kernel started freeing
576   - * random memory blocks.
577   - */
578   - if (node != &object->tree_node) {
579   - kmemleak_stop("Cannot insert 0x%lx into the object search tree "
580   - "(already existing)\n", ptr);
581   - object = lookup_object(ptr, 1);
582   - spin_lock(&object->lock);
583   - dump_object_info(object);
584   - spin_unlock(&object->lock);
585   -
586   - goto out;
  567 + link = &object_tree_root.rb_node;
  568 + rb_parent = NULL;
  569 + while (*link) {
  570 + rb_parent = *link;
  571 + parent = rb_entry(rb_parent, struct kmemleak_object, rb_node);
  572 + if (ptr + size <= parent->pointer)
  573 + link = &parent->rb_node.rb_left;
  574 + else if (parent->pointer + parent->size <= ptr)
  575 + link = &parent->rb_node.rb_right;
  576 + else {
  577 + kmemleak_stop("Cannot insert 0x%lx into the object "
  578 + "search tree (overlaps existing)\n",
  579 + ptr);
  580 + kmem_cache_free(object_cache, object);
  581 + object = parent;
  582 + spin_lock(&object->lock);
  583 + dump_object_info(object);
  584 + spin_unlock(&object->lock);
  585 + goto out;
  586 + }
587 587 }
  588 + rb_link_node(&object->rb_node, rb_parent, link);
  589 + rb_insert_color(&object->rb_node, &object_tree_root);
  590 +
588 591 list_add_tail_rcu(&object->object_list, &object_list);
589 592 out:
590 593 write_unlock_irqrestore(&kmemleak_lock, flags);
... ... @@ -600,7 +603,7 @@
600 603 unsigned long flags;
601 604  
602 605 write_lock_irqsave(&kmemleak_lock, flags);
603   - prio_tree_remove(&object_tree_root, &object->tree_node);
  606 + rb_erase(&object->rb_node, &object_tree_root);
604 607 list_del_rcu(&object->object_list);
605 608 write_unlock_irqrestore(&kmemleak_lock, flags);
606 609  
... ... @@ -1766,7 +1769,6 @@
1766 1769  
1767 1770 object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE);
1768 1771 scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);
1769   - INIT_PRIO_TREE_ROOT(&object_tree_root);
1770 1772  
1771 1773 if (crt_early_log >= ARRAY_SIZE(early_log))
1772 1774 pr_warning("Early log buffer exceeded (%d), please increase "