Commit c4fd308ed62f292518363ea9c6c2adb3c2d95f9d

Authored by Linus Torvalds

Merge branch 'x86-pat-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-pat-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, pat: Update the page flags for memtype atomically instead of using memtype_lock
  x86, pat: In rbt_memtype_check_insert(), update new->type only if valid
  x86, pat: Migrate to rbtree only backend for pat memtype management
  x86, pat: Preparatory changes in pat.c for bigger rbtree change
  rbtree: Add support for augmented rbtrees

Showing 8 changed files Side-by-side Diff

Documentation/rbtree.txt
... ... @@ -189,4 +189,63 @@
189 189 struct rb_node *node;
190 190 for (node = rb_first(&mytree); node; node = rb_next(node))
191 191 printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring);
  192 +
  193 +Support for Augmented rbtrees
  194 +-----------------------------
  195 +
  196 +Augmented rbtree is an rbtree with "some" additional data stored in each node.
  197 +This data can be used to augment some new functionality to rbtree.
  198 +Augmented rbtree is an optional feature built on top of basic rbtree
  199 +infrastructure. rbtree user who wants this feature will have an augment
  200 +callback function in rb_root initialized.
  201 +
  202 +This callback function will be called from rbtree core routines whenever
  203 +a node has a change in one or both of its children. It is the responsibility
  204 +of the callback function to recalculate the additional data that is in the
  205 +rb node using new children information. Note that if this new additional
  206 +data affects the parent node's additional data, then callback function has
  207 +to handle it and do the recursive updates.
  208 +
  209 +
  210 +Interval tree is an example of augmented rb tree. Reference -
  211 +"Introduction to Algorithms" by Cormen, Leiserson, Rivest and Stein.
  212 +More details about interval trees:
  213 +
  214 +Classical rbtree has a single key and it cannot be directly used to store
  215 +interval ranges like [lo:hi] and do a quick lookup for any overlap with a new
  216 +lo:hi or to find whether there is an exact match for a new lo:hi.
  217 +
  218 +However, rbtree can be augmented to store such interval ranges in a structured
  219 +way making it possible to do efficient lookup and exact match.
  220 +
  221 +This "extra information" stored in each node is the maximum hi
  222 +(max_hi) value among all the nodes that are its descendents. This
  223 +information can be maintained at each node just be looking at the node
  224 +and its immediate children. And this will be used in O(log n) lookup
  225 +for lowest match (lowest start address among all possible matches)
  226 +with something like:
  227 +
  228 +find_lowest_match(lo, hi, node)
  229 +{
  230 + lowest_match = NULL;
  231 + while (node) {
  232 + if (max_hi(node->left) > lo) {
  233 + // Lowest overlap if any must be on left side
  234 + node = node->left;
  235 + } else if (overlap(lo, hi, node)) {
  236 + lowest_match = node;
  237 + break;
  238 + } else if (lo > node->lo) {
  239 + // Lowest overlap if any must be on right side
  240 + node = node->right;
  241 + } else {
  242 + break;
  243 + }
  244 + }
  245 + return lowest_match;
  246 +}
  247 +
  248 +Finding exact match will be to first find lowest match and then to follow
  249 +successor nodes looking for exact match, until the start of a node is beyond
  250 +the hi value we are looking for.
arch/x86/include/asm/cacheflush.h
... ... @@ -44,9 +44,6 @@
44 44 memcpy(dst, src, len);
45 45 }
46 46  
47   -#define PG_WC PG_arch_1
48   -PAGEFLAG(WC, WC)
49   -
50 47 #ifdef CONFIG_X86_PAT
51 48 /*
52 49 * X86 PAT uses page flags WC and Uncached together to keep track of
53 50  
54 51  
55 52  
56 53  
... ... @@ -55,16 +52,24 @@
55 52 * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not
56 53 * been changed from its default (value of -1 used to denote this).
57 54 * Note we do not support _PAGE_CACHE_UC here.
58   - *
59   - * Caller must hold memtype_lock for atomicity.
60 55 */
  56 +
  57 +#define _PGMT_DEFAULT 0
  58 +#define _PGMT_WC (1UL << PG_arch_1)
  59 +#define _PGMT_UC_MINUS (1UL << PG_uncached)
  60 +#define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1)
  61 +#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1)
  62 +#define _PGMT_CLEAR_MASK (~_PGMT_MASK)
  63 +
61 64 static inline unsigned long get_page_memtype(struct page *pg)
62 65 {
63   - if (!PageUncached(pg) && !PageWC(pg))
  66 + unsigned long pg_flags = pg->flags & _PGMT_MASK;
  67 +
  68 + if (pg_flags == _PGMT_DEFAULT)
64 69 return -1;
65   - else if (!PageUncached(pg) && PageWC(pg))
  70 + else if (pg_flags == _PGMT_WC)
66 71 return _PAGE_CACHE_WC;
67   - else if (PageUncached(pg) && !PageWC(pg))
  72 + else if (pg_flags == _PGMT_UC_MINUS)
68 73 return _PAGE_CACHE_UC_MINUS;
69 74 else
70 75 return _PAGE_CACHE_WB;
71 76  
72 77  
73 78  
74 79  
75 80  
... ... @@ -72,25 +77,26 @@
72 77  
73 78 static inline void set_page_memtype(struct page *pg, unsigned long memtype)
74 79 {
  80 + unsigned long memtype_flags = _PGMT_DEFAULT;
  81 + unsigned long old_flags;
  82 + unsigned long new_flags;
  83 +
75 84 switch (memtype) {
76 85 case _PAGE_CACHE_WC:
77   - ClearPageUncached(pg);
78   - SetPageWC(pg);
  86 + memtype_flags = _PGMT_WC;
79 87 break;
80 88 case _PAGE_CACHE_UC_MINUS:
81   - SetPageUncached(pg);
82   - ClearPageWC(pg);
  89 + memtype_flags = _PGMT_UC_MINUS;
83 90 break;
84 91 case _PAGE_CACHE_WB:
85   - SetPageUncached(pg);
86   - SetPageWC(pg);
  92 + memtype_flags = _PGMT_WB;
87 93 break;
88   - default:
89   - case -1:
90   - ClearPageUncached(pg);
91   - ClearPageWC(pg);
92   - break;
93 94 }
  95 +
  96 + do {
  97 + old_flags = pg->flags;
  98 + new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags;
  99 + } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags);
94 100 }
95 101 #else
96 102 static inline unsigned long get_page_memtype(struct page *pg) { return -1; }
arch/x86/mm/Makefile
... ... @@ -6,6 +6,7 @@
6 6 CFLAGS_physaddr.o := $(nostackp)
7 7 CFLAGS_setup_nx.o := $(nostackp)
8 8  
  9 +obj-$(CONFIG_X86_PAT) += pat_rbtree.o
9 10 obj-$(CONFIG_SMP) += tlb.o
10 11  
11 12 obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
... ... @@ -30,6 +30,8 @@
30 30 #include <asm/pat.h>
31 31 #include <asm/io.h>
32 32  
  33 +#include "pat_internal.h"
  34 +
33 35 #ifdef CONFIG_X86_PAT
34 36 int __read_mostly pat_enabled = 1;
35 37  
36 38  
37 39  
... ... @@ -53,19 +55,15 @@
53 55 #endif
54 56  
55 57  
56   -static int debug_enable;
  58 +int pat_debug_enable;
57 59  
58 60 static int __init pat_debug_setup(char *str)
59 61 {
60   - debug_enable = 1;
  62 + pat_debug_enable = 1;
61 63 return 0;
62 64 }
63 65 __setup("debugpat", pat_debug_setup);
64 66  
65   -#define dprintk(fmt, arg...) \
66   - do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0)
67   -
68   -
69 67 static u64 __read_mostly boot_pat_state;
70 68  
71 69 enum {
72 70  
... ... @@ -132,86 +130,9 @@
132 130  
133 131 #undef PAT
134 132  
135   -static char *cattr_name(unsigned long flags)
136   -{
137   - switch (flags & _PAGE_CACHE_MASK) {
138   - case _PAGE_CACHE_UC: return "uncached";
139   - case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
140   - case _PAGE_CACHE_WB: return "write-back";
141   - case _PAGE_CACHE_WC: return "write-combining";
142   - default: return "broken";
143   - }
144   -}
  133 +static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */
145 134  
146 135 /*
147   - * The global memtype list keeps track of memory type for specific
148   - * physical memory areas. Conflicting memory types in different
149   - * mappings can cause CPU cache corruption. To avoid this we keep track.
150   - *
151   - * The list is sorted based on starting address and can contain multiple
152   - * entries for each address (this allows reference counting for overlapping
153   - * areas). All the aliases have the same cache attributes of course.
154   - * Zero attributes are represented as holes.
155   - *
156   - * The data structure is a list that is also organized as an rbtree
157   - * sorted on the start address of memtype range.
158   - *
159   - * memtype_lock protects both the linear list and rbtree.
160   - */
161   -
162   -struct memtype {
163   - u64 start;
164   - u64 end;
165   - unsigned long type;
166   - struct list_head nd;
167   - struct rb_node rb;
168   -};
169   -
170   -static struct rb_root memtype_rbroot = RB_ROOT;
171   -static LIST_HEAD(memtype_list);
172   -static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
173   -
174   -static struct memtype *memtype_rb_search(struct rb_root *root, u64 start)
175   -{
176   - struct rb_node *node = root->rb_node;
177   - struct memtype *last_lower = NULL;
178   -
179   - while (node) {
180   - struct memtype *data = container_of(node, struct memtype, rb);
181   -
182   - if (data->start < start) {
183   - last_lower = data;
184   - node = node->rb_right;
185   - } else if (data->start > start) {
186   - node = node->rb_left;
187   - } else
188   - return data;
189   - }
190   -
191   - /* Will return NULL if there is no entry with its start <= start */
192   - return last_lower;
193   -}
194   -
195   -static void memtype_rb_insert(struct rb_root *root, struct memtype *data)
196   -{
197   - struct rb_node **new = &(root->rb_node);
198   - struct rb_node *parent = NULL;
199   -
200   - while (*new) {
201   - struct memtype *this = container_of(*new, struct memtype, rb);
202   -
203   - parent = *new;
204   - if (data->start <= this->start)
205   - new = &((*new)->rb_left);
206   - else if (data->start > this->start)
207   - new = &((*new)->rb_right);
208   - }
209   -
210   - rb_link_node(&data->rb, parent, new);
211   - rb_insert_color(&data->rb, root);
212   -}
213   -
214   -/*
215 136 * Does intersection of PAT memory type and MTRR memory type and returns
216 137 * the resulting memory type as PAT understands it.
217 138 * (Type in pat and mtrr will not have same value)
... ... @@ -237,33 +158,6 @@
237 158 return req_type;
238 159 }
239 160  
240   -static int
241   -chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
242   -{
243   - if (new->type != entry->type) {
244   - if (type) {
245   - new->type = entry->type;
246   - *type = entry->type;
247   - } else
248   - goto conflict;
249   - }
250   -
251   - /* check overlaps with more than one entry in the list */
252   - list_for_each_entry_continue(entry, &memtype_list, nd) {
253   - if (new->end <= entry->start)
254   - break;
255   - else if (new->type != entry->type)
256   - goto conflict;
257   - }
258   - return 0;
259   -
260   - conflict:
261   - printk(KERN_INFO "%s:%d conflicting memory types "
262   - "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start,
263   - new->end, cattr_name(new->type), cattr_name(entry->type));
264   - return -EBUSY;
265   -}
266   -
267 161 static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
268 162 {
269 163 int ram_page = 0, not_rampage = 0;
... ... @@ -296,8 +190,6 @@
296 190 * Here we do two pass:
297 191 * - Find the memtype of all the pages in the range, look for any conflicts
298 192 * - In case of no conflicts, set the new memtype for pages in the range
299   - *
300   - * Caller must hold memtype_lock for atomicity.
301 193 */
302 194 static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
303 195 unsigned long *new_type)
304 196  
... ... @@ -364,9 +256,8 @@
364 256 int reserve_memtype(u64 start, u64 end, unsigned long req_type,
365 257 unsigned long *new_type)
366 258 {
367   - struct memtype *new, *entry;
  259 + struct memtype *new;
368 260 unsigned long actual_type;
369   - struct list_head *where;
370 261 int is_range_ram;
371 262 int err = 0;
372 263  
373 264  
... ... @@ -404,9 +295,7 @@
404 295 is_range_ram = pat_pagerange_is_ram(start, end);
405 296 if (is_range_ram == 1) {
406 297  
407   - spin_lock(&memtype_lock);
408 298 err = reserve_ram_pages_type(start, end, req_type, new_type);
409   - spin_unlock(&memtype_lock);
410 299  
411 300 return err;
412 301 } else if (is_range_ram < 0) {
... ... @@ -423,42 +312,7 @@
423 312  
424 313 spin_lock(&memtype_lock);
425 314  
426   - /* Search for existing mapping that overlaps the current range */
427   - where = NULL;
428   - list_for_each_entry(entry, &memtype_list, nd) {
429   - if (end <= entry->start) {
430   - where = entry->nd.prev;
431   - break;
432   - } else if (start <= entry->start) { /* end > entry->start */
433   - err = chk_conflict(new, entry, new_type);
434   - if (!err) {
435   - dprintk("Overlap at 0x%Lx-0x%Lx\n",
436   - entry->start, entry->end);
437   - where = entry->nd.prev;
438   - }
439   - break;
440   - } else if (start < entry->end) { /* start > entry->start */
441   - err = chk_conflict(new, entry, new_type);
442   - if (!err) {
443   - dprintk("Overlap at 0x%Lx-0x%Lx\n",
444   - entry->start, entry->end);
445   -
446   - /*
447   - * Move to right position in the linked
448   - * list to add this new entry
449   - */
450   - list_for_each_entry_continue(entry,
451   - &memtype_list, nd) {
452   - if (start <= entry->start) {
453   - where = entry->nd.prev;
454   - break;
455   - }
456   - }
457   - }
458   - break;
459   - }
460   - }
461   -
  315 + err = rbt_memtype_check_insert(new, new_type);
462 316 if (err) {
463 317 printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, "
464 318 "track %s, req %s\n",
... ... @@ -469,13 +323,6 @@
469 323 return err;
470 324 }
471 325  
472   - if (where)
473   - list_add(&new->nd, where);
474   - else
475   - list_add_tail(&new->nd, &memtype_list);
476   -
477   - memtype_rb_insert(&memtype_rbroot, new);
478   -
479 326 spin_unlock(&memtype_lock);
480 327  
481 328 dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
... ... @@ -487,7 +334,6 @@
487 334  
488 335 int free_memtype(u64 start, u64 end)
489 336 {
490   - struct memtype *entry, *saved_entry;
491 337 int err = -EINVAL;
492 338 int is_range_ram;
493 339  
494 340  
... ... @@ -501,9 +347,7 @@
501 347 is_range_ram = pat_pagerange_is_ram(start, end);
502 348 if (is_range_ram == 1) {
503 349  
504   - spin_lock(&memtype_lock);
505 350 err = free_ram_pages_type(start, end);
506   - spin_unlock(&memtype_lock);
507 351  
508 352 return err;
509 353 } else if (is_range_ram < 0) {
... ... @@ -511,46 +355,7 @@
511 355 }
512 356  
513 357 spin_lock(&memtype_lock);
514   -
515   - entry = memtype_rb_search(&memtype_rbroot, start);
516   - if (unlikely(entry == NULL))
517   - goto unlock_ret;
518   -
519   - /*
520   - * Saved entry points to an entry with start same or less than what
521   - * we searched for. Now go through the list in both directions to look
522   - * for the entry that matches with both start and end, with list stored
523   - * in sorted start address
524   - */
525   - saved_entry = entry;
526   - list_for_each_entry_from(entry, &memtype_list, nd) {
527   - if (entry->start == start && entry->end == end) {
528   - rb_erase(&entry->rb, &memtype_rbroot);
529   - list_del(&entry->nd);
530   - kfree(entry);
531   - err = 0;
532   - break;
533   - } else if (entry->start > start) {
534   - break;
535   - }
536   - }
537   -
538   - if (!err)
539   - goto unlock_ret;
540   -
541   - entry = saved_entry;
542   - list_for_each_entry_reverse(entry, &memtype_list, nd) {
543   - if (entry->start == start && entry->end == end) {
544   - rb_erase(&entry->rb, &memtype_rbroot);
545   - list_del(&entry->nd);
546   - kfree(entry);
547   - err = 0;
548   - break;
549   - } else if (entry->start < start) {
550   - break;
551   - }
552   - }
553   -unlock_ret:
  358 + err = rbt_memtype_erase(start, end);
554 359 spin_unlock(&memtype_lock);
555 360  
556 361 if (err) {
557 362  
... ... @@ -583,10 +388,8 @@
583 388  
584 389 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
585 390 struct page *page;
586   - spin_lock(&memtype_lock);
587 391 page = pfn_to_page(paddr >> PAGE_SHIFT);
588 392 rettype = get_page_memtype(page);
589   - spin_unlock(&memtype_lock);
590 393 /*
591 394 * -1 from get_page_memtype() implies RAM page is in its
592 395 * default state and not reserved, and hence of type WB
... ... @@ -599,7 +402,7 @@
599 402  
600 403 spin_lock(&memtype_lock);
601 404  
602   - entry = memtype_rb_search(&memtype_rbroot, paddr);
  405 + entry = rbt_memtype_lookup(paddr);
603 406 if (entry != NULL)
604 407 rettype = entry->type;
605 408 else
606 409  
607 410  
608 411  
609 412  
610 413  
... ... @@ -936,29 +739,25 @@
936 739  
937 740 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
938 741  
939   -/* get Nth element of the linked list */
940 742 static struct memtype *memtype_get_idx(loff_t pos)
941 743 {
942   - struct memtype *list_node, *print_entry;
943   - int i = 1;
  744 + struct memtype *print_entry;
  745 + int ret;
944 746  
945   - print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL);
  747 + print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL);
946 748 if (!print_entry)
947 749 return NULL;
948 750  
949 751 spin_lock(&memtype_lock);
950   - list_for_each_entry(list_node, &memtype_list, nd) {
951   - if (pos == i) {
952   - *print_entry = *list_node;
953   - spin_unlock(&memtype_lock);
954   - return print_entry;
955   - }
956   - ++i;
957   - }
  752 + ret = rbt_memtype_copy_nth_element(print_entry, pos);
958 753 spin_unlock(&memtype_lock);
959   - kfree(print_entry);
960 754  
961   - return NULL;
  755 + if (!ret) {
  756 + return print_entry;
  757 + } else {
  758 + kfree(print_entry);
  759 + return NULL;
  760 + }
962 761 }
963 762  
964 763 static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
arch/x86/mm/pat_internal.h
  1 +#ifndef __PAT_INTERNAL_H_
  2 +#define __PAT_INTERNAL_H_
  3 +
  4 +extern int pat_debug_enable;
  5 +
  6 +#define dprintk(fmt, arg...) \
  7 + do { if (pat_debug_enable) printk(KERN_INFO fmt, ##arg); } while (0)
  8 +
  9 +struct memtype {
  10 + u64 start;
  11 + u64 end;
  12 + u64 subtree_max_end;
  13 + unsigned long type;
  14 + struct rb_node rb;
  15 +};
  16 +
  17 +static inline char *cattr_name(unsigned long flags)
  18 +{
  19 + switch (flags & _PAGE_CACHE_MASK) {
  20 + case _PAGE_CACHE_UC: return "uncached";
  21 + case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
  22 + case _PAGE_CACHE_WB: return "write-back";
  23 + case _PAGE_CACHE_WC: return "write-combining";
  24 + default: return "broken";
  25 + }
  26 +}
  27 +
  28 +#ifdef CONFIG_X86_PAT
  29 +extern int rbt_memtype_check_insert(struct memtype *new,
  30 + unsigned long *new_type);
  31 +extern int rbt_memtype_erase(u64 start, u64 end);
  32 +extern struct memtype *rbt_memtype_lookup(u64 addr);
  33 +extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos);
  34 +#else
  35 +static inline int rbt_memtype_check_insert(struct memtype *new,
  36 + unsigned long *new_type)
  37 +{ return 0; }
  38 +static inline int rbt_memtype_erase(u64 start, u64 end)
  39 +{ return 0; }
  40 +static inline struct memtype *rbt_memtype_lookup(u64 addr)
  41 +{ return NULL; }
  42 +static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
  43 +{ return 0; }
  44 +#endif
  45 +
  46 +#endif /* __PAT_INTERNAL_H_ */
arch/x86/mm/pat_rbtree.c
  1 +/*
  2 + * Handle caching attributes in page tables (PAT)
  3 + *
  4 + * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
  5 + * Suresh B Siddha <suresh.b.siddha@intel.com>
  6 + *
  7 + * Interval tree (augmented rbtree) used to store the PAT memory type
  8 + * reservations.
  9 + */
  10 +
  11 +#include <linux/seq_file.h>
  12 +#include <linux/debugfs.h>
  13 +#include <linux/kernel.h>
  14 +#include <linux/module.h>
  15 +#include <linux/rbtree.h>
  16 +#include <linux/sched.h>
  17 +#include <linux/gfp.h>
  18 +
  19 +#include <asm/pgtable.h>
  20 +#include <asm/pat.h>
  21 +
  22 +#include "pat_internal.h"
  23 +
  24 +/*
  25 + * The memtype tree keeps track of memory type for specific
  26 + * physical memory areas. Without proper tracking, conflicting memory
  27 + * types in different mappings can cause CPU cache corruption.
  28 + *
  29 + * The tree is an interval tree (augmented rbtree) with tree ordered
  30 + * on starting address. Tree can contain multiple entries for
  31 + * different regions which overlap. All the aliases have the same
  32 + * cache attributes of course.
  33 + *
  34 + * memtype_lock protects the rbtree.
  35 + */
  36 +
  37 +static void memtype_rb_augment_cb(struct rb_node *node);
  38 +static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb);
  39 +
  40 +static int is_node_overlap(struct memtype *node, u64 start, u64 end)
  41 +{
  42 + if (node->start >= end || node->end <= start)
  43 + return 0;
  44 +
  45 + return 1;
  46 +}
  47 +
  48 +static u64 get_subtree_max_end(struct rb_node *node)
  49 +{
  50 + u64 ret = 0;
  51 + if (node) {
  52 + struct memtype *data = container_of(node, struct memtype, rb);
  53 + ret = data->subtree_max_end;
  54 + }
  55 + return ret;
  56 +}
  57 +
  58 +/* Update 'subtree_max_end' for a node, based on node and its children */
  59 +static void update_node_max_end(struct rb_node *node)
  60 +{
  61 + struct memtype *data;
  62 + u64 max_end, child_max_end;
  63 +
  64 + if (!node)
  65 + return;
  66 +
  67 + data = container_of(node, struct memtype, rb);
  68 + max_end = data->end;
  69 +
  70 + child_max_end = get_subtree_max_end(node->rb_right);
  71 + if (child_max_end > max_end)
  72 + max_end = child_max_end;
  73 +
  74 + child_max_end = get_subtree_max_end(node->rb_left);
  75 + if (child_max_end > max_end)
  76 + max_end = child_max_end;
  77 +
  78 + data->subtree_max_end = max_end;
  79 +}
  80 +
  81 +/* Update 'subtree_max_end' for a node and all its ancestors */
  82 +static void update_path_max_end(struct rb_node *node)
  83 +{
  84 + u64 old_max_end, new_max_end;
  85 +
  86 + while (node) {
  87 + struct memtype *data = container_of(node, struct memtype, rb);
  88 +
  89 + old_max_end = data->subtree_max_end;
  90 + update_node_max_end(node);
  91 + new_max_end = data->subtree_max_end;
  92 +
  93 + if (new_max_end == old_max_end)
  94 + break;
  95 +
  96 + node = rb_parent(node);
  97 + }
  98 +}
  99 +
  100 +/* Find the first (lowest start addr) overlapping range from rb tree */
  101 +static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
  102 + u64 start, u64 end)
  103 +{
  104 + struct rb_node *node = root->rb_node;
  105 + struct memtype *last_lower = NULL;
  106 +
  107 + while (node) {
  108 + struct memtype *data = container_of(node, struct memtype, rb);
  109 +
  110 + if (get_subtree_max_end(node->rb_left) > start) {
  111 + /* Lowest overlap if any must be on left side */
  112 + node = node->rb_left;
  113 + } else if (is_node_overlap(data, start, end)) {
  114 + last_lower = data;
  115 + break;
  116 + } else if (start >= data->start) {
  117 + /* Lowest overlap if any must be on right side */
  118 + node = node->rb_right;
  119 + } else {
  120 + break;
  121 + }
  122 + }
  123 + return last_lower; /* Returns NULL if there is no overlap */
  124 +}
  125 +
  126 +static struct memtype *memtype_rb_exact_match(struct rb_root *root,
  127 + u64 start, u64 end)
  128 +{
  129 + struct memtype *match;
  130 +
  131 + match = memtype_rb_lowest_match(root, start, end);
  132 + while (match != NULL && match->start < end) {
  133 + struct rb_node *node;
  134 +
  135 + if (match->start == start && match->end == end)
  136 + return match;
  137 +
  138 + node = rb_next(&match->rb);
  139 + if (node)
  140 + match = container_of(node, struct memtype, rb);
  141 + else
  142 + match = NULL;
  143 + }
  144 +
  145 + return NULL; /* Returns NULL if there is no exact match */
  146 +}
  147 +
  148 +static int memtype_rb_check_conflict(struct rb_root *root,
  149 + u64 start, u64 end,
  150 + unsigned long reqtype, unsigned long *newtype)
  151 +{
  152 + struct rb_node *node;
  153 + struct memtype *match;
  154 + int found_type = reqtype;
  155 +
  156 + match = memtype_rb_lowest_match(&memtype_rbroot, start, end);
  157 + if (match == NULL)
  158 + goto success;
  159 +
  160 + if (match->type != found_type && newtype == NULL)
  161 + goto failure;
  162 +
  163 + dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
  164 + found_type = match->type;
  165 +
  166 + node = rb_next(&match->rb);
  167 + while (node) {
  168 + match = container_of(node, struct memtype, rb);
  169 +
  170 + if (match->start >= end) /* Checked all possible matches */
  171 + goto success;
  172 +
  173 + if (is_node_overlap(match, start, end) &&
  174 + match->type != found_type) {
  175 + goto failure;
  176 + }
  177 +
  178 + node = rb_next(&match->rb);
  179 + }
  180 +success:
  181 + if (newtype)
  182 + *newtype = found_type;
  183 +
  184 + return 0;
  185 +
  186 +failure:
  187 + printk(KERN_INFO "%s:%d conflicting memory types "
  188 + "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start,
  189 + end, cattr_name(found_type), cattr_name(match->type));
  190 + return -EBUSY;
  191 +}
  192 +
  193 +static void memtype_rb_augment_cb(struct rb_node *node)
  194 +{
  195 + if (node)
  196 + update_path_max_end(node);
  197 +}
  198 +
  199 +static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
  200 +{
  201 + struct rb_node **node = &(root->rb_node);
  202 + struct rb_node *parent = NULL;
  203 +
  204 + while (*node) {
  205 + struct memtype *data = container_of(*node, struct memtype, rb);
  206 +
  207 + parent = *node;
  208 + if (newdata->start <= data->start)
  209 + node = &((*node)->rb_left);
  210 + else if (newdata->start > data->start)
  211 + node = &((*node)->rb_right);
  212 + }
  213 +
  214 + rb_link_node(&newdata->rb, parent, node);
  215 + rb_insert_color(&newdata->rb, root);
  216 +}
  217 +
  218 +int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
  219 +{
  220 + int err = 0;
  221 +
  222 + err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end,
  223 + new->type, ret_type);
  224 +
  225 + if (!err) {
  226 + if (ret_type)
  227 + new->type = *ret_type;
  228 +
  229 + memtype_rb_insert(&memtype_rbroot, new);
  230 + }
  231 + return err;
  232 +}
  233 +
  234 +int rbt_memtype_erase(u64 start, u64 end)
  235 +{
  236 + struct memtype *data;
  237 +
  238 + data = memtype_rb_exact_match(&memtype_rbroot, start, end);
  239 + if (!data)
  240 + return -EINVAL;
  241 +
  242 + rb_erase(&data->rb, &memtype_rbroot);
  243 + return 0;
  244 +}
  245 +
  246 +struct memtype *rbt_memtype_lookup(u64 addr)
  247 +{
  248 + struct memtype *data;
  249 + data = memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE);
  250 + return data;
  251 +}
  252 +
  253 +#if defined(CONFIG_DEBUG_FS)
  254 +int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
  255 +{
  256 + struct rb_node *node;
  257 + int i = 1;
  258 +
  259 + node = rb_first(&memtype_rbroot);
  260 + while (node && pos != i) {
  261 + node = rb_next(node);
  262 + i++;
  263 + }
  264 +
  265 + if (node) { /* pos == i */
  266 + struct memtype *this = container_of(node, struct memtype, rb);
  267 + *out = *this;
  268 + return 0;
  269 + } else {
  270 + return 1;
  271 + }
  272 +}
  273 +#endif
include/linux/rbtree.h
... ... @@ -110,6 +110,7 @@
110 110 struct rb_root
111 111 {
112 112 struct rb_node *rb_node;
  113 + void (*augment_cb)(struct rb_node *node);
113 114 };
114 115  
115 116  
... ... @@ -129,7 +130,9 @@
129 130 rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
130 131 }
131 132  
132   -#define RB_ROOT (struct rb_root) { NULL, }
  133 +#define RB_ROOT (struct rb_root) { NULL, NULL, }
  134 +#define RB_AUGMENT_ROOT(x) (struct rb_root) { NULL, x}
  135 +
133 136 #define rb_entry(ptr, type, member) container_of(ptr, type, member)
134 137  
135 138 #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
... ... @@ -44,6 +44,11 @@
44 44 else
45 45 root->rb_node = right;
46 46 rb_set_parent(node, right);
  47 +
  48 + if (root->augment_cb) {
  49 + root->augment_cb(node);
  50 + root->augment_cb(right);
  51 + }
47 52 }
48 53  
49 54 static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
50 55  
... ... @@ -67,12 +72,20 @@
67 72 else
68 73 root->rb_node = left;
69 74 rb_set_parent(node, left);
  75 +
  76 + if (root->augment_cb) {
  77 + root->augment_cb(node);
  78 + root->augment_cb(left);
  79 + }
70 80 }
71 81  
72 82 void rb_insert_color(struct rb_node *node, struct rb_root *root)
73 83 {
74 84 struct rb_node *parent, *gparent;
75 85  
  86 + if (root->augment_cb)
  87 + root->augment_cb(node);
  88 +
76 89 while ((parent = rb_parent(node)) && rb_is_red(parent))
77 90 {
78 91 gparent = rb_parent(parent);
79 92  
... ... @@ -227,12 +240,15 @@
227 240 else
228 241 {
229 242 struct rb_node *old = node, *left;
  243 + int old_parent_cb = 0;
  244 + int successor_parent_cb = 0;
230 245  
231 246 node = node->rb_right;
232 247 while ((left = node->rb_left) != NULL)
233 248 node = left;
234 249  
235 250 if (rb_parent(old)) {
  251 + old_parent_cb = 1;
236 252 if (rb_parent(old)->rb_left == old)
237 253 rb_parent(old)->rb_left = node;
238 254 else
239 255  
... ... @@ -247,8 +263,10 @@
247 263 if (parent == old) {
248 264 parent = node;
249 265 } else {
  266 + successor_parent_cb = 1;
250 267 if (child)
251 268 rb_set_parent(child, parent);
  269 +
252 270 parent->rb_left = child;
253 271  
254 272 node->rb_right = old->rb_right;
... ... @@ -259,6 +277,24 @@
259 277 node->rb_left = old->rb_left;
260 278 rb_set_parent(old->rb_left, node);
261 279  
  280 + if (root->augment_cb) {
  281 + /*
  282 + * Here, three different nodes can have new children.
  283 + * The parent of the successor node that was selected
  284 + * to replace the node to be erased.
  285 + * The node that is getting erased and is now replaced
  286 + * by its successor.
  287 + * The parent of the node getting erased-replaced.
  288 + */
  289 + if (successor_parent_cb)
  290 + root->augment_cb(parent);
  291 +
  292 + root->augment_cb(node);
  293 +
  294 + if (old_parent_cb)
  295 + root->augment_cb(rb_parent(old));
  296 + }
  297 +
262 298 goto color;
263 299 }
264 300  
265 301  
266 302  
... ... @@ -267,15 +303,19 @@
267 303  
268 304 if (child)
269 305 rb_set_parent(child, parent);
270   - if (parent)
271   - {
  306 +
  307 + if (parent) {
272 308 if (parent->rb_left == node)
273 309 parent->rb_left = child;
274 310 else
275 311 parent->rb_right = child;
276   - }
277   - else
  312 +
  313 + if (root->augment_cb)
  314 + root->augment_cb(parent);
  315 +
  316 + } else {
278 317 root->rb_node = child;
  318 + }
279 319  
280 320 color:
281 321 if (color == RB_BLACK)