Commit e709ffd6169ccd259eb5874e853303e91e94e829

Authored by Rik van Riel
Committed by Linus Torvalds
1 parent edad9d2c33

mm: remove swap token code

The swap token code no longer fits in with the current VM model.  It
does not play well with cgroups or the better NUMA placement code in
development, since we have only one swap token globally.

It also has the potential to mess with scalability of the system, by
increasing the number of non-reclaimable pages on the active and
inactive anon LRU lists.

Last but not least, the swap token code has been broken for a year
without complaints, as reported by Konstantin Khlebnikov.  This suggests
we no longer have much use for it.

The days of sub-1G memory systems with heavy use of swap are over.  If
we ever need thrashing reducing code in the future, we will have to
implement something that does scale.

Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Hugh Dickins <hughd@google.com>
Acked-by: Bob Picco <bpicco@meloft.net>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 10 changed files with 2 additions and 307 deletions Side-by-side Diff

include/linux/mm_types.h
... ... @@ -345,17 +345,6 @@
345 345 /* Architecture-specific MM context */
346 346 mm_context_t context;
347 347  
348   - /* Swap token stuff */
349   - /*
350   - * Last value of global fault stamp as seen by this process.
351   - * In other words, this value gives an indication of how long
352   - * it has been since this task got the token.
353   - * Look at mm/thrash.c
354   - */
355   - unsigned int faultstamp;
356   - unsigned int token_priority;
357   - unsigned int last_interval;
358   -
359 348 unsigned long flags; /* Must use atomic bitops to access the bits */
360 349  
361 350 struct core_state *core_state; /* coredumping support */
include/linux/swap.h
... ... @@ -355,23 +355,6 @@
355 355 extern int try_to_free_swap(struct page *);
356 356 struct backing_dev_info;
357 357  
358   -/* linux/mm/thrash.c */
359   -extern struct mm_struct *swap_token_mm;
360   -extern void grab_swap_token(struct mm_struct *);
361   -extern void __put_swap_token(struct mm_struct *);
362   -extern void disable_swap_token(struct mem_cgroup *memcg);
363   -
364   -static inline int has_swap_token(struct mm_struct *mm)
365   -{
366   - return (mm == swap_token_mm);
367   -}
368   -
369   -static inline void put_swap_token(struct mm_struct *mm)
370   -{
371   - if (has_swap_token(mm))
372   - __put_swap_token(mm);
373   -}
374   -
375 358 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
376 359 extern void
377 360 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
... ... @@ -474,24 +457,6 @@
474 457 swp_entry_t entry;
475 458 entry.val = 0;
476 459 return entry;
477   -}
478   -
479   -/* linux/mm/thrash.c */
480   -static inline void put_swap_token(struct mm_struct *mm)
481   -{
482   -}
483   -
484   -static inline void grab_swap_token(struct mm_struct *mm)
485   -{
486   -}
487   -
488   -static inline int has_swap_token(struct mm_struct *mm)
489   -{
490   - return 0;
491   -}
492   -
493   -static inline void disable_swap_token(struct mem_cgroup *memcg)
494   -{
495 460 }
496 461  
497 462 static inline void
include/trace/events/vmscan.h
... ... @@ -395,88 +395,6 @@
395 395 show_reclaim_flags(__entry->reclaim_flags))
396 396 );
397 397  
398   -TRACE_EVENT(replace_swap_token,
399   - TP_PROTO(struct mm_struct *old_mm,
400   - struct mm_struct *new_mm),
401   -
402   - TP_ARGS(old_mm, new_mm),
403   -
404   - TP_STRUCT__entry(
405   - __field(struct mm_struct*, old_mm)
406   - __field(unsigned int, old_prio)
407   - __field(struct mm_struct*, new_mm)
408   - __field(unsigned int, new_prio)
409   - ),
410   -
411   - TP_fast_assign(
412   - __entry->old_mm = old_mm;
413   - __entry->old_prio = old_mm ? old_mm->token_priority : 0;
414   - __entry->new_mm = new_mm;
415   - __entry->new_prio = new_mm->token_priority;
416   - ),
417   -
418   - TP_printk("old_token_mm=%p old_prio=%u new_token_mm=%p new_prio=%u",
419   - __entry->old_mm, __entry->old_prio,
420   - __entry->new_mm, __entry->new_prio)
421   -);
422   -
423   -DECLARE_EVENT_CLASS(put_swap_token_template,
424   - TP_PROTO(struct mm_struct *swap_token_mm),
425   -
426   - TP_ARGS(swap_token_mm),
427   -
428   - TP_STRUCT__entry(
429   - __field(struct mm_struct*, swap_token_mm)
430   - ),
431   -
432   - TP_fast_assign(
433   - __entry->swap_token_mm = swap_token_mm;
434   - ),
435   -
436   - TP_printk("token_mm=%p", __entry->swap_token_mm)
437   -);
438   -
439   -DEFINE_EVENT(put_swap_token_template, put_swap_token,
440   - TP_PROTO(struct mm_struct *swap_token_mm),
441   - TP_ARGS(swap_token_mm)
442   -);
443   -
444   -DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token,
445   - TP_PROTO(struct mm_struct *swap_token_mm),
446   - TP_ARGS(swap_token_mm),
447   - TP_CONDITION(swap_token_mm != NULL)
448   -);
449   -
450   -TRACE_EVENT_CONDITION(update_swap_token_priority,
451   - TP_PROTO(struct mm_struct *mm,
452   - unsigned int old_prio,
453   - struct mm_struct *swap_token_mm),
454   -
455   - TP_ARGS(mm, old_prio, swap_token_mm),
456   -
457   - TP_CONDITION(mm->token_priority != old_prio),
458   -
459   - TP_STRUCT__entry(
460   - __field(struct mm_struct*, mm)
461   - __field(unsigned int, old_prio)
462   - __field(unsigned int, new_prio)
463   - __field(struct mm_struct*, swap_token_mm)
464   - __field(unsigned int, swap_token_prio)
465   - ),
466   -
467   - TP_fast_assign(
468   - __entry->mm = mm;
469   - __entry->old_prio = old_prio;
470   - __entry->new_prio = mm->token_priority;
471   - __entry->swap_token_mm = swap_token_mm;
472   - __entry->swap_token_prio = swap_token_mm ? swap_token_mm->token_priority : 0;
473   - ),
474   -
475   - TP_printk("mm=%p old_prio=%u new_prio=%u swap_token_mm=%p token_prio=%u",
476   - __entry->mm, __entry->old_prio, __entry->new_prio,
477   - __entry->swap_token_mm, __entry->swap_token_prio)
478   -);
479   -
480 398 #endif /* _TRACE_VMSCAN_H */
481 399  
482 400 /* This part must be outside protection */
... ... @@ -614,7 +614,6 @@
614 614 list_del(&mm->mmlist);
615 615 spin_unlock(&mmlist_lock);
616 616 }
617   - put_swap_token(mm);
618 617 if (mm->binfmt)
619 618 module_put(mm->binfmt->module);
620 619 mmdrop(mm);
... ... @@ -831,10 +830,6 @@
831 830 memcpy(mm, oldmm, sizeof(*mm));
832 831 mm_init_cpumask(mm);
833 832  
834   - /* Initializing for Swap token stuff */
835   - mm->token_priority = 0;
836   - mm->last_interval = 0;
837   -
838 833 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
839 834 mm->pmd_huge_pte = NULL;
840 835 #endif
... ... @@ -913,10 +908,6 @@
913 908 goto fail_nomem;
914 909  
915 910 good_mm:
916   - /* Initializing for Swap token stuff */
917   - mm->token_priority = 0;
918   - mm->last_interval = 0;
919   -
920 911 tsk->mm = mm;
921 912 tsk->active_mm = mm;
922 913 return 0;
... ... @@ -25,7 +25,7 @@
25 25 obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
26 26  
27 27 obj-$(CONFIG_BOUNCE) += bounce.o
28   -obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
  28 +obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
29 29 obj-$(CONFIG_HAS_DMA) += dmapool.o
30 30 obj-$(CONFIG_HUGETLBFS) += hugetlb.o
31 31 obj-$(CONFIG_NUMA) += mempolicy.o
... ... @@ -5598,7 +5598,6 @@
5598 5598 if (mm) {
5599 5599 if (mc.to)
5600 5600 mem_cgroup_move_charge(mm);
5601   - put_swap_token(mm);
5602 5601 mmput(mm);
5603 5602 }
5604 5603 if (mc.to)
... ... @@ -2908,7 +2908,6 @@
2908 2908 delayacct_set_flag(DELAYACCT_PF_SWAPIN);
2909 2909 page = lookup_swap_cache(entry);
2910 2910 if (!page) {
2911   - grab_swap_token(mm); /* Contend for token _before_ read-in */
2912 2911 page = swapin_readahead(entry,
2913 2912 GFP_HIGHUSER_MOVABLE, vma, address);
2914 2913 if (!page) {
... ... @@ -2938,6 +2937,7 @@
2938 2937 }
2939 2938  
2940 2939 locked = lock_page_or_retry(page, mm, flags);
  2940 +
2941 2941 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2942 2942 if (!locked) {
2943 2943 ret |= VM_FAULT_RETRY;
... ... @@ -755,12 +755,6 @@
755 755 pte_unmap_unlock(pte, ptl);
756 756 }
757 757  
758   - /* Pretend the page is referenced if the task has the
759   - swap token and is in the middle of a page fault. */
760   - if (mm != current->mm && has_swap_token(mm) &&
761   - rwsem_is_locked(&mm->mmap_sem))
762   - referenced++;
763   -
764 758 (*mapcount)--;
765 759  
766 760 if (referenced)
1   -/*
2   - * mm/thrash.c
3   - *
4   - * Copyright (C) 2004, Red Hat, Inc.
5   - * Copyright (C) 2004, Rik van Riel <riel@redhat.com>
6   - * Released under the GPL, see the file COPYING for details.
7   - *
8   - * Simple token based thrashing protection, using the algorithm
9   - * described in: http://www.cse.ohio-state.edu/hpcs/WWW/HTML/publications/abs05-1.html
10   - *
11   - * Sep 2006, Ashwin Chaugule <ashwin.chaugule@celunite.com>
12   - * Improved algorithm to pass token:
13   - * Each task has a priority which is incremented if it contended
14   - * for the token in an interval less than its previous attempt.
15   - * If the token is acquired, that task's priority is boosted to prevent
16   - * the token from bouncing around too often and to let the task make
17   - * some progress in its execution.
18   - */
19   -
20   -#include <linux/jiffies.h>
21   -#include <linux/mm.h>
22   -#include <linux/sched.h>
23   -#include <linux/swap.h>
24   -#include <linux/memcontrol.h>
25   -
26   -#include <trace/events/vmscan.h>
27   -
28   -#define TOKEN_AGING_INTERVAL (0xFF)
29   -
30   -static DEFINE_SPINLOCK(swap_token_lock);
31   -struct mm_struct *swap_token_mm;
32   -static struct mem_cgroup *swap_token_memcg;
33   -
34   -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
35   -static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
36   -{
37   - struct mem_cgroup *memcg;
38   -
39   - memcg = try_get_mem_cgroup_from_mm(mm);
40   - if (memcg)
41   - css_put(mem_cgroup_css(memcg));
42   -
43   - return memcg;
44   -}
45   -#else
46   -static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
47   -{
48   - return NULL;
49   -}
50   -#endif
51   -
52   -void grab_swap_token(struct mm_struct *mm)
53   -{
54   - int current_interval;
55   - unsigned int old_prio = mm->token_priority;
56   - static unsigned int global_faults;
57   - static unsigned int last_aging;
58   -
59   - global_faults++;
60   -
61   - current_interval = global_faults - mm->faultstamp;
62   -
63   - if (!spin_trylock(&swap_token_lock))
64   - return;
65   -
66   - /* First come first served */
67   - if (!swap_token_mm)
68   - goto replace_token;
69   -
70   - /*
71   - * Usually, we don't need priority aging because long interval faults
72   - * makes priority decrease quickly. But there is one exception. If the
73   - * token owner task is sleeping, it never make long interval faults.
74   - * Thus, we need a priority aging mechanism instead. The requirements
75   - * of priority aging are
76   - * 1) An aging interval is reasonable enough long. Too short aging
77   - * interval makes quick swap token lost and decrease performance.
78   - * 2) The swap token owner task have to get priority aging even if
79   - * it's under sleep.
80   - */
81   - if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) {
82   - swap_token_mm->token_priority /= 2;
83   - last_aging = global_faults;
84   - }
85   -
86   - if (mm == swap_token_mm) {
87   - mm->token_priority += 2;
88   - goto update_priority;
89   - }
90   -
91   - if (current_interval < mm->last_interval)
92   - mm->token_priority++;
93   - else {
94   - if (likely(mm->token_priority > 0))
95   - mm->token_priority--;
96   - }
97   -
98   - /* Check if we deserve the token */
99   - if (mm->token_priority > swap_token_mm->token_priority)
100   - goto replace_token;
101   -
102   -update_priority:
103   - trace_update_swap_token_priority(mm, old_prio, swap_token_mm);
104   -
105   -out:
106   - mm->faultstamp = global_faults;
107   - mm->last_interval = current_interval;
108   - spin_unlock(&swap_token_lock);
109   - return;
110   -
111   -replace_token:
112   - mm->token_priority += 2;
113   - trace_replace_swap_token(swap_token_mm, mm);
114   - swap_token_mm = mm;
115   - swap_token_memcg = swap_token_memcg_from_mm(mm);
116   - last_aging = global_faults;
117   - goto out;
118   -}
119   -
120   -/* Called on process exit. */
121   -void __put_swap_token(struct mm_struct *mm)
122   -{
123   - spin_lock(&swap_token_lock);
124   - if (likely(mm == swap_token_mm)) {
125   - trace_put_swap_token(swap_token_mm);
126   - swap_token_mm = NULL;
127   - swap_token_memcg = NULL;
128   - }
129   - spin_unlock(&swap_token_lock);
130   -}
131   -
132   -static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b)
133   -{
134   - if (!a)
135   - return true;
136   - if (!b)
137   - return true;
138   - if (a == b)
139   - return true;
140   - return false;
141   -}
142   -
143   -void disable_swap_token(struct mem_cgroup *memcg)
144   -{
145   - /* memcg reclaim don't disable unrelated mm token. */
146   - if (match_memcg(memcg, swap_token_memcg)) {
147   - spin_lock(&swap_token_lock);
148   - if (match_memcg(memcg, swap_token_memcg)) {
149   - trace_disable_swap_token(swap_token_mm);
150   - swap_token_mm = NULL;
151   - swap_token_memcg = NULL;
152   - }
153   - spin_unlock(&swap_token_lock);
154   - }
155   -}
... ... @@ -2352,8 +2352,6 @@
2352 2352  
2353 2353 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
2354 2354 sc->nr_scanned = 0;
2355   - if (!priority)
2356   - disable_swap_token(sc->target_mem_cgroup);
2357 2355 aborted_reclaim = shrink_zones(priority, zonelist, sc);
2358 2356  
2359 2357 /*
... ... @@ -2703,10 +2701,6 @@
2703 2701 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
2704 2702 unsigned long lru_pages = 0;
2705 2703 int has_under_min_watermark_zone = 0;
2706   -
2707   - /* The swap token gets in the way of swapout... */
2708   - if (!priority)
2709   - disable_swap_token(NULL);
2710 2704  
2711 2705 all_zones_ok = 1;
2712 2706 balanced = 0;