Blame view

mm/swap_slots.c 9.28 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
67afa38e0   Tim Chen   mm/swap: add cach...
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
  /*
   * Manage cache of swap slots to be used for and returned from
   * swap.
   *
   * Copyright(c) 2016 Intel Corporation.
   *
   * Author: Tim Chen <tim.c.chen@linux.intel.com>
   *
   * We allocate the swap slots from the global pool and put
   * it into local per cpu caches.  This has the advantage
   * of no needing to acquire the swap_info lock every time
   * we need a new slot.
   *
   * There is also opportunity to simply return the slot
   * to local caches without needing to acquire swap_info
   * lock.  We do not reuse the returned slots directly but
   * move them back to the global pool in a batch.  This
   * allows the slots to coaellesce and reduce fragmentation.
   *
   * The swap entry allocated is marked with SWAP_HAS_CACHE
   * flag in map_count that prevents it from being allocated
   * again from the global pool.
   *
   * The swap slots cache is protected by a mutex instead of
   * a spin lock as when we search for slots with scan_swap_map,
   * we can possibly sleep.
   */
  
  #include <linux/swap_slots.h>
  #include <linux/cpu.h>
  #include <linux/cpumask.h>
  #include <linux/vmalloc.h>
  #include <linux/mutex.h>
54f180d3c   Huang Ying   mm, swap: use kvz...
35
  #include <linux/mm.h>
67afa38e0   Tim Chen   mm/swap: add cach...
36

67afa38e0   Tim Chen   mm/swap: add cach...
37
38
  static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots);
  static bool	swap_slot_cache_active;
ba81f8384   Huang Ying   mm/swap: skip rea...
39
  bool	swap_slot_cache_enabled;
67afa38e0   Tim Chen   mm/swap: add cach...
40
  static bool	swap_slot_cache_initialized;
31f21da18   Colin Ian King   mm/swap_slots.c: ...
41
  static DEFINE_MUTEX(swap_slots_cache_mutex);
67afa38e0   Tim Chen   mm/swap: add cach...
42
  /* Serialize swap slots cache enable/disable operations */
31f21da18   Colin Ian King   mm/swap_slots.c: ...
43
  static DEFINE_MUTEX(swap_slots_cache_enable_mutex);
67afa38e0   Tim Chen   mm/swap: add cach...
44
45
46
47
  
  static void __drain_swap_slots_cache(unsigned int type);
  static void deactivate_swap_slots_cache(void);
  static void reactivate_swap_slots_cache(void);
e0f3ebba9   Zhen Lei   mm/swap_slots.c: ...
48
  #define use_swap_slot_cache (swap_slot_cache_active && swap_slot_cache_enabled)
67afa38e0   Tim Chen   mm/swap: add cach...
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
  #define SLOTS_CACHE 0x1
  #define SLOTS_CACHE_RET 0x2
  
  static void deactivate_swap_slots_cache(void)
  {
  	mutex_lock(&swap_slots_cache_mutex);
  	swap_slot_cache_active = false;
  	__drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
  	mutex_unlock(&swap_slots_cache_mutex);
  }
  
  static void reactivate_swap_slots_cache(void)
  {
  	mutex_lock(&swap_slots_cache_mutex);
  	swap_slot_cache_active = true;
  	mutex_unlock(&swap_slots_cache_mutex);
  }
  
  /* Must not be called with cpu hot plug lock */
  void disable_swap_slots_cache_lock(void)
  {
  	mutex_lock(&swap_slots_cache_enable_mutex);
  	swap_slot_cache_enabled = false;
  	if (swap_slot_cache_initialized) {
  		/* serialize with cpu hotplug operations */
  		get_online_cpus();
  		__drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
  		put_online_cpus();
  	}
  }
  
  static void __reenable_swap_slots_cache(void)
  {
  	swap_slot_cache_enabled = has_usable_swap();
  }
  
  void reenable_swap_slots_cache_unlock(void)
  {
  	__reenable_swap_slots_cache();
  	mutex_unlock(&swap_slots_cache_enable_mutex);
  }
  
  static bool check_cache_active(void)
  {
  	long pages;
e0f3ebba9   Zhen Lei   mm/swap_slots.c: ...
94
  	if (!swap_slot_cache_enabled)
67afa38e0   Tim Chen   mm/swap: add cach...
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
  		return false;
  
  	pages = get_nr_swap_pages();
  	if (!swap_slot_cache_active) {
  		if (pages > num_online_cpus() *
  		    THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE)
  			reactivate_swap_slots_cache();
  		goto out;
  	}
  
  	/* if global pool of slot caches too low, deactivate cache */
  	if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE)
  		deactivate_swap_slots_cache();
  out:
  	return swap_slot_cache_active;
  }
  
  static int alloc_swap_slot_cache(unsigned int cpu)
  {
  	struct swap_slots_cache *cache;
  	swp_entry_t *slots, *slots_ret;
  
  	/*
  	 * Do allocation outside swap_slots_cache_mutex
54f180d3c   Huang Ying   mm, swap: use kvz...
119
  	 * as kvzalloc could trigger reclaim and get_swap_page,
67afa38e0   Tim Chen   mm/swap: add cach...
120
121
  	 * which can lock swap_slots_cache_mutex.
  	 */
778e1cdd8   Kees Cook   treewide: kvzallo...
122
  	slots = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t),
54f180d3c   Huang Ying   mm, swap: use kvz...
123
  			 GFP_KERNEL);
67afa38e0   Tim Chen   mm/swap: add cach...
124
125
  	if (!slots)
  		return -ENOMEM;
778e1cdd8   Kees Cook   treewide: kvzallo...
126
  	slots_ret = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t),
54f180d3c   Huang Ying   mm, swap: use kvz...
127
  			     GFP_KERNEL);
67afa38e0   Tim Chen   mm/swap: add cach...
128
  	if (!slots_ret) {
54f180d3c   Huang Ying   mm, swap: use kvz...
129
  		kvfree(slots);
67afa38e0   Tim Chen   mm/swap: add cach...
130
131
132
133
134
  		return -ENOMEM;
  	}
  
  	mutex_lock(&swap_slots_cache_mutex);
  	cache = &per_cpu(swp_slots, cpu);
f90eae2a0   Zhen Lei   mm/swap_slots.c: ...
135
  	if (cache->slots || cache->slots_ret) {
67afa38e0   Tim Chen   mm/swap: add cach...
136
  		/* cache already allocated */
f90eae2a0   Zhen Lei   mm/swap_slots.c: ...
137
138
139
140
141
142
143
  		mutex_unlock(&swap_slots_cache_mutex);
  
  		kvfree(slots);
  		kvfree(slots_ret);
  
  		return 0;
  	}
67afa38e0   Tim Chen   mm/swap: add cach...
144
145
146
147
148
149
150
151
  	if (!cache->lock_initialized) {
  		mutex_init(&cache->alloc_lock);
  		spin_lock_init(&cache->free_lock);
  		cache->lock_initialized = true;
  	}
  	cache->nr = 0;
  	cache->cur = 0;
  	cache->n_ret = 0;
a2e167317   Tim Chen   mm/swap_slots.c: ...
152
153
154
155
156
157
158
  	/*
  	 * We initialized alloc_lock and free_lock earlier.  We use
  	 * !cache->slots or !cache->slots_ret to know if it is safe to acquire
  	 * the corresponding lock and use the cache.  Memory barrier below
  	 * ensures the assumption.
  	 */
  	mb();
67afa38e0   Tim Chen   mm/swap: add cach...
159
  	cache->slots = slots;
67afa38e0   Tim Chen   mm/swap: add cach...
160
  	cache->slots_ret = slots_ret;
67afa38e0   Tim Chen   mm/swap: add cach...
161
  	mutex_unlock(&swap_slots_cache_mutex);
67afa38e0   Tim Chen   mm/swap: add cach...
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
  	return 0;
  }
  
  static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
  				  bool free_slots)
  {
  	struct swap_slots_cache *cache;
  	swp_entry_t *slots = NULL;
  
  	cache = &per_cpu(swp_slots, cpu);
  	if ((type & SLOTS_CACHE) && cache->slots) {
  		mutex_lock(&cache->alloc_lock);
  		swapcache_free_entries(cache->slots + cache->cur, cache->nr);
  		cache->cur = 0;
  		cache->nr = 0;
  		if (free_slots && cache->slots) {
54f180d3c   Huang Ying   mm, swap: use kvz...
178
  			kvfree(cache->slots);
67afa38e0   Tim Chen   mm/swap: add cach...
179
180
181
182
183
184
185
186
187
188
189
190
191
192
  			cache->slots = NULL;
  		}
  		mutex_unlock(&cache->alloc_lock);
  	}
  	if ((type & SLOTS_CACHE_RET) && cache->slots_ret) {
  		spin_lock_irq(&cache->free_lock);
  		swapcache_free_entries(cache->slots_ret, cache->n_ret);
  		cache->n_ret = 0;
  		if (free_slots && cache->slots_ret) {
  			slots = cache->slots_ret;
  			cache->slots_ret = NULL;
  		}
  		spin_unlock_irq(&cache->free_lock);
  		if (slots)
54f180d3c   Huang Ying   mm, swap: use kvz...
193
  			kvfree(slots);
67afa38e0   Tim Chen   mm/swap: add cach...
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
  	}
  }
  
  static void __drain_swap_slots_cache(unsigned int type)
  {
  	unsigned int cpu;
  
  	/*
  	 * This function is called during
  	 *	1) swapoff, when we have to make sure no
  	 *	   left over slots are in cache when we remove
  	 *	   a swap device;
  	 *      2) disabling of swap slot cache, when we run low
  	 *	   on swap slots when allocating memory and need
  	 *	   to return swap slots to global pool.
  	 *
  	 * We cannot acquire cpu hot plug lock here as
  	 * this function can be invoked in the cpu
  	 * hot plug path:
  	 * cpu_up -> lock cpu_hotplug -> cpu hotplug state callback
  	 *   -> memory allocation -> direct reclaim -> get_swap_page
  	 *   -> drain_swap_slots_cache
  	 *
  	 * Hence the loop over current online cpu below could miss cpu that
  	 * is being brought online but not yet marked as online.
  	 * That is okay as we do not schedule and run anything on a
  	 * cpu before it has been marked online. Hence, we will not
  	 * fill any swap slots in slots cache of such cpu.
  	 * There are no slots on such cpu that need to be drained.
  	 */
  	for_each_online_cpu(cpu)
  		drain_slots_cache_cpu(cpu, type, false);
  }
  
  static int free_slot_cache(unsigned int cpu)
  {
  	mutex_lock(&swap_slots_cache_mutex);
  	drain_slots_cache_cpu(cpu, SLOTS_CACHE | SLOTS_CACHE_RET, true);
  	mutex_unlock(&swap_slots_cache_mutex);
  	return 0;
  }
f3bc52cb0   Miaohe Lin   mm/swap_slots.c: ...
235
  void enable_swap_slots_cache(void)
67afa38e0   Tim Chen   mm/swap: add cach...
236
  {
67afa38e0   Tim Chen   mm/swap: add cach...
237
  	mutex_lock(&swap_slots_cache_enable_mutex);
d69a9575f   Zhen Lei   mm/swap_slots.c: ...
238
239
  	if (!swap_slot_cache_initialized) {
  		int ret;
67afa38e0   Tim Chen   mm/swap: add cach...
240

d69a9575f   Zhen Lei   mm/swap_slots.c: ...
241
242
243
244
245
246
247
248
249
  		ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache",
  					alloc_swap_slot_cache, free_slot_cache);
  		if (WARN_ONCE(ret < 0, "Cache allocation failed (%s), operating "
  				       "without swap slots cache.
  ", __func__))
  			goto out_unlock;
  
  		swap_slot_cache_initialized = true;
  	}
9b7a81432   Tim Chen   mm/swap_slots.c: ...
250

67afa38e0   Tim Chen   mm/swap: add cach...
251
252
253
  	__reenable_swap_slots_cache();
  out_unlock:
  	mutex_unlock(&swap_slots_cache_enable_mutex);
67afa38e0   Tim Chen   mm/swap: add cach...
254
255
256
257
258
259
260
261
262
263
  }
  
  /* called with swap slot cache's alloc lock held */
  static int refill_swap_slots_cache(struct swap_slots_cache *cache)
  {
  	if (!use_swap_slot_cache || cache->nr)
  		return 0;
  
  	cache->cur = 0;
  	if (swap_slot_cache_active)
5d5e8f195   Huang Ying   mm, swap, get_swa...
264
265
  		cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
  					   cache->slots, 1);
67afa38e0   Tim Chen   mm/swap: add cach...
266
267
268
269
270
271
272
  
  	return cache->nr;
  }
  
  int free_swap_slot(swp_entry_t entry)
  {
  	struct swap_slots_cache *cache;
f07e0f849   Sebastian Andrzej Siewior   mm/swap_slots.c: ...
273
  	cache = raw_cpu_ptr(&swp_slots);
a2e167317   Tim Chen   mm/swap_slots.c: ...
274
  	if (likely(use_swap_slot_cache && cache->slots_ret)) {
67afa38e0   Tim Chen   mm/swap: add cach...
275
276
  		spin_lock_irq(&cache->free_lock);
  		/* Swap slots cache may be deactivated before acquiring lock */
f07e0f849   Sebastian Andrzej Siewior   mm/swap_slots.c: ...
277
  		if (!use_swap_slot_cache || !cache->slots_ret) {
67afa38e0   Tim Chen   mm/swap: add cach...
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
  			spin_unlock_irq(&cache->free_lock);
  			goto direct_free;
  		}
  		if (cache->n_ret >= SWAP_SLOTS_CACHE_SIZE) {
  			/*
  			 * Return slots to global pool.
  			 * The current swap_map value is SWAP_HAS_CACHE.
  			 * Set it to 0 to indicate it is available for
  			 * allocation in global pool
  			 */
  			swapcache_free_entries(cache->slots_ret, cache->n_ret);
  			cache->n_ret = 0;
  		}
  		cache->slots_ret[cache->n_ret++] = entry;
  		spin_unlock_irq(&cache->free_lock);
  	} else {
  direct_free:
  		swapcache_free_entries(&entry, 1);
  	}
67afa38e0   Tim Chen   mm/swap: add cach...
297
298
299
  
  	return 0;
  }
38d8b4e6b   Huang Ying   mm, THP, swap: de...
300
  swp_entry_t get_swap_page(struct page *page)
67afa38e0   Tim Chen   mm/swap: add cach...
301
  {
2406b76fe   Wei Yang   mm/swap_slots.c: ...
302
  	swp_entry_t entry;
67afa38e0   Tim Chen   mm/swap: add cach...
303
  	struct swap_slots_cache *cache;
38d8b4e6b   Huang Ying   mm, THP, swap: de...
304
305
306
307
  	entry.val = 0;
  
  	if (PageTransHuge(page)) {
  		if (IS_ENABLED(CONFIG_THP_SWAP))
5d5e8f195   Huang Ying   mm, swap, get_swa...
308
  			get_swap_pages(1, &entry, HPAGE_PMD_NR);
bb98f2c5a   Tejun Heo   mm, memcontrol: m...
309
  		goto out;
38d8b4e6b   Huang Ying   mm, THP, swap: de...
310
  	}
67afa38e0   Tim Chen   mm/swap: add cach...
311
312
313
314
315
316
317
318
319
320
  	/*
  	 * Preemption is allowed here, because we may sleep
  	 * in refill_swap_slots_cache().  But it is safe, because
  	 * accesses to the per-CPU data structure are protected by the
  	 * mutex cache->alloc_lock.
  	 *
  	 * The alloc path here does not touch cache->slots_ret
  	 * so cache->free_lock is not taken.
  	 */
  	cache = raw_cpu_ptr(&swp_slots);
a2e167317   Tim Chen   mm/swap_slots.c: ...
321
  	if (likely(check_cache_active() && cache->slots)) {
67afa38e0   Tim Chen   mm/swap: add cach...
322
323
324
325
  		mutex_lock(&cache->alloc_lock);
  		if (cache->slots) {
  repeat:
  			if (cache->nr) {
2406b76fe   Wei Yang   mm/swap_slots.c: ...
326
327
  				entry = cache->slots[cache->cur];
  				cache->slots[cache->cur++].val = 0;
67afa38e0   Tim Chen   mm/swap: add cach...
328
  				cache->nr--;
2406b76fe   Wei Yang   mm/swap_slots.c: ...
329
330
  			} else if (refill_swap_slots_cache(cache)) {
  				goto repeat;
67afa38e0   Tim Chen   mm/swap: add cach...
331
332
333
334
  			}
  		}
  		mutex_unlock(&cache->alloc_lock);
  		if (entry.val)
bb98f2c5a   Tejun Heo   mm, memcontrol: m...
335
  			goto out;
67afa38e0   Tim Chen   mm/swap: add cach...
336
  	}
5d5e8f195   Huang Ying   mm, swap, get_swa...
337
  	get_swap_pages(1, &entry, 1);
bb98f2c5a   Tejun Heo   mm, memcontrol: m...
338
339
340
341
342
  out:
  	if (mem_cgroup_try_charge_swap(page, entry)) {
  		put_swap_page(page, entry);
  		entry.val = 0;
  	}
67afa38e0   Tim Chen   mm/swap: add cach...
343
344
  	return entry;
  }