Blame view

mm/percpu-vm.c 10 KB
9f6455325   Tejun Heo   percpu: move vmal...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
  /*
   * mm/percpu-vm.c - vmalloc area based chunk allocation
   *
   * Copyright (C) 2010		SUSE Linux Products GmbH
   * Copyright (C) 2010		Tejun Heo <tj@kernel.org>
   *
   * This file is released under the GPLv2.
   *
   * Chunks are mapped into vmalloc areas and populated page by page.
   * This is the default chunk allocator.
   */
  
  static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
  				    unsigned int cpu, int page_idx)
  {
  	/* must not be used on pre-mapped chunk */
  	WARN_ON(chunk->immutable);
  
  	return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx));
  }
  
  /**
fbbb7f4e1   Tejun Heo   percpu: remove th...
23
   * pcpu_get_pages - get temp pages array
9f6455325   Tejun Heo   percpu: move vmal...
24
   * @chunk: chunk of interest
9f6455325   Tejun Heo   percpu: move vmal...
25
   *
fbbb7f4e1   Tejun Heo   percpu: remove th...
26
   * Returns pointer to array of pointers to struct page which can be indexed
cdb4cba5a   Tejun Heo   percpu: remove @m...
27
28
   * with pcpu_page_idx().  Note that there is only one array and accesses
   * should be serialized by pcpu_alloc_mutex.
9f6455325   Tejun Heo   percpu: move vmal...
29
30
   *
   * RETURNS:
fbbb7f4e1   Tejun Heo   percpu: remove th...
31
   * Pointer to temp pages array on success.
9f6455325   Tejun Heo   percpu: move vmal...
32
   */
cdb4cba5a   Tejun Heo   percpu: remove @m...
33
  static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc)
9f6455325   Tejun Heo   percpu: move vmal...
34
35
  {
  	static struct page **pages;
9f6455325   Tejun Heo   percpu: move vmal...
36
  	size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
9f6455325   Tejun Heo   percpu: move vmal...
37

cdb4cba5a   Tejun Heo   percpu: remove @m...
38
39
40
  	lockdep_assert_held(&pcpu_alloc_mutex);
  
  	if (!pages)
fbbb7f4e1   Tejun Heo   percpu: remove th...
41
  		pages = pcpu_mem_zalloc(pages_size);
9f6455325   Tejun Heo   percpu: move vmal...
42
43
44
45
46
47
48
  	return pages;
  }
  
  /**
   * pcpu_free_pages - free pages which were allocated for @chunk
   * @chunk: chunk pages were allocated for
   * @pages: array of pages to be freed, indexed by pcpu_page_idx()
9f6455325   Tejun Heo   percpu: move vmal...
49
50
51
52
53
54
55
   * @page_start: page index of the first page to be freed
   * @page_end: page index of the last page to be freed + 1
   *
   * Free pages [@page_start and @page_end) in @pages for all units.
   * The pages were allocated for @chunk.
   */
  static void pcpu_free_pages(struct pcpu_chunk *chunk,
fbbb7f4e1   Tejun Heo   percpu: remove th...
56
  			    struct page **pages, int page_start, int page_end)
9f6455325   Tejun Heo   percpu: move vmal...
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
  {
  	unsigned int cpu;
  	int i;
  
  	for_each_possible_cpu(cpu) {
  		for (i = page_start; i < page_end; i++) {
  			struct page *page = pages[pcpu_page_idx(cpu, i)];
  
  			if (page)
  				__free_page(page);
  		}
  	}
  }
  
  /**
   * pcpu_alloc_pages - allocates pages for @chunk
   * @chunk: target chunk
   * @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
9f6455325   Tejun Heo   percpu: move vmal...
75
76
77
78
79
80
81
82
   * @page_start: page index of the first page to be allocated
   * @page_end: page index of the last page to be allocated + 1
   *
   * Allocate pages [@page_start,@page_end) into @pages for all units.
   * The allocation is for @chunk.  Percpu core doesn't care about the
   * content of @pages and will pass it verbatim to pcpu_map_pages().
   */
  static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
fbbb7f4e1   Tejun Heo   percpu: remove th...
83
  			    struct page **pages, int page_start, int page_end)
9f6455325   Tejun Heo   percpu: move vmal...
84
85
  {
  	const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
f0d279654   Tejun Heo   percpu: fix pcpu_...
86
  	unsigned int cpu, tcpu;
9f6455325   Tejun Heo   percpu: move vmal...
87
88
89
90
91
92
93
  	int i;
  
  	for_each_possible_cpu(cpu) {
  		for (i = page_start; i < page_end; i++) {
  			struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
  
  			*pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0);
f0d279654   Tejun Heo   percpu: fix pcpu_...
94
95
  			if (!*pagep)
  				goto err;
9f6455325   Tejun Heo   percpu: move vmal...
96
97
98
  		}
  	}
  	return 0;
f0d279654   Tejun Heo   percpu: fix pcpu_...
99
100
101
102
103
104
105
106
107
108
109
110
  
  err:
  	while (--i >= page_start)
  		__free_page(pages[pcpu_page_idx(cpu, i)]);
  
  	for_each_possible_cpu(tcpu) {
  		if (tcpu == cpu)
  			break;
  		for (i = page_start; i < page_end; i++)
  			__free_page(pages[pcpu_page_idx(tcpu, i)]);
  	}
  	return -ENOMEM;
9f6455325   Tejun Heo   percpu: move vmal...
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
  }
  
  /**
   * pcpu_pre_unmap_flush - flush cache prior to unmapping
   * @chunk: chunk the regions to be flushed belongs to
   * @page_start: page index of the first page to be flushed
   * @page_end: page index of the last page to be flushed + 1
   *
   * Pages in [@page_start,@page_end) of @chunk are about to be
   * unmapped.  Flush cache.  As each flushing trial can be very
   * expensive, issue flush on the whole region at once rather than
   * doing it for each cpu.  This could be an overkill but is more
   * scalable.
   */
  static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
  				 int page_start, int page_end)
  {
  	flush_cache_vunmap(
a855b84c3   Tejun Heo   percpu: fix chunk...
129
130
  		pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
  		pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
9f6455325   Tejun Heo   percpu: move vmal...
131
132
133
134
135
136
137
138
139
140
141
  }
  
  static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
  {
  	unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT);
  }
  
  /**
   * pcpu_unmap_pages - unmap pages out of a pcpu_chunk
   * @chunk: chunk of interest
   * @pages: pages array which can be used to pass information to free
9f6455325   Tejun Heo   percpu: move vmal...
142
143
144
145
146
147
148
149
150
151
   * @page_start: page index of the first page to unmap
   * @page_end: page index of the last page to unmap + 1
   *
   * For each cpu, unmap pages [@page_start,@page_end) out of @chunk.
   * Corresponding elements in @pages were cleared by the caller and can
   * be used to carry information to pcpu_free_pages() which will be
   * called after all unmaps are finished.  The caller should call
   * proper pre/post flush functions.
   */
  static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
fbbb7f4e1   Tejun Heo   percpu: remove th...
152
  			     struct page **pages, int page_start, int page_end)
9f6455325   Tejun Heo   percpu: move vmal...
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
  {
  	unsigned int cpu;
  	int i;
  
  	for_each_possible_cpu(cpu) {
  		for (i = page_start; i < page_end; i++) {
  			struct page *page;
  
  			page = pcpu_chunk_page(chunk, cpu, i);
  			WARN_ON(!page);
  			pages[pcpu_page_idx(cpu, i)] = page;
  		}
  		__pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start),
  				   page_end - page_start);
  	}
9f6455325   Tejun Heo   percpu: move vmal...
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
  }
  
  /**
   * pcpu_post_unmap_tlb_flush - flush TLB after unmapping
   * @chunk: pcpu_chunk the regions to be flushed belong to
   * @page_start: page index of the first page to be flushed
   * @page_end: page index of the last page to be flushed + 1
   *
   * Pages [@page_start,@page_end) of @chunk have been unmapped.  Flush
   * TLB for the regions.  This can be skipped if the area is to be
   * returned to vmalloc as vmalloc will handle TLB flushing lazily.
   *
   * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
   * for the whole region.
   */
  static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
  				      int page_start, int page_end)
  {
  	flush_tlb_kernel_range(
a855b84c3   Tejun Heo   percpu: fix chunk...
187
188
  		pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
  		pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
9f6455325   Tejun Heo   percpu: move vmal...
189
190
191
192
193
194
195
196
197
198
199
200
201
  }
  
  static int __pcpu_map_pages(unsigned long addr, struct page **pages,
  			    int nr_pages)
  {
  	return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT,
  					PAGE_KERNEL, pages);
  }
  
  /**
   * pcpu_map_pages - map pages into a pcpu_chunk
   * @chunk: chunk of interest
   * @pages: pages array containing pages to be mapped
9f6455325   Tejun Heo   percpu: move vmal...
202
203
204
205
206
207
208
   * @page_start: page index of the first page to map
   * @page_end: page index of the last page to map + 1
   *
   * For each cpu, map pages [@page_start,@page_end) into @chunk.  The
   * caller is responsible for calling pcpu_post_map_flush() after all
   * mappings are complete.
   *
fbbb7f4e1   Tejun Heo   percpu: remove th...
209
210
   * This function is responsible for setting up whatever is necessary for
   * reverse lookup (addr -> chunk).
9f6455325   Tejun Heo   percpu: move vmal...
211
212
   */
  static int pcpu_map_pages(struct pcpu_chunk *chunk,
fbbb7f4e1   Tejun Heo   percpu: remove th...
213
  			  struct page **pages, int page_start, int page_end)
9f6455325   Tejun Heo   percpu: move vmal...
214
215
216
217
218
219
220
221
222
223
  {
  	unsigned int cpu, tcpu;
  	int i, err;
  
  	for_each_possible_cpu(cpu) {
  		err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start),
  				       &pages[pcpu_page_idx(cpu, page_start)],
  				       page_end - page_start);
  		if (err < 0)
  			goto err;
9f6455325   Tejun Heo   percpu: move vmal...
224

fbbb7f4e1   Tejun Heo   percpu: remove th...
225
  		for (i = page_start; i < page_end; i++)
9f6455325   Tejun Heo   percpu: move vmal...
226
227
  			pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)],
  					    chunk);
9f6455325   Tejun Heo   percpu: move vmal...
228
  	}
9f6455325   Tejun Heo   percpu: move vmal...
229
  	return 0;
9f6455325   Tejun Heo   percpu: move vmal...
230
231
232
233
234
235
236
  err:
  	for_each_possible_cpu(tcpu) {
  		if (tcpu == cpu)
  			break;
  		__pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
  				   page_end - page_start);
  	}
849f51690   Tejun Heo   percpu: perform t...
237
  	pcpu_post_unmap_tlb_flush(chunk, page_start, page_end);
9f6455325   Tejun Heo   percpu: move vmal...
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
  	return err;
  }
  
  /**
   * pcpu_post_map_flush - flush cache after mapping
   * @chunk: pcpu_chunk the regions to be flushed belong to
   * @page_start: page index of the first page to be flushed
   * @page_end: page index of the last page to be flushed + 1
   *
   * Pages [@page_start,@page_end) of @chunk have been mapped.  Flush
   * cache.
   *
   * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
   * for the whole region.
   */
  static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
  				int page_start, int page_end)
  {
  	flush_cache_vmap(
a855b84c3   Tejun Heo   percpu: fix chunk...
257
258
  		pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
  		pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
9f6455325   Tejun Heo   percpu: move vmal...
259
260
261
262
263
  }
  
  /**
   * pcpu_populate_chunk - populate and map an area of a pcpu_chunk
   * @chunk: chunk of interest
a93ace487   Tejun Heo   percpu: move regi...
264
265
   * @page_start: the start page
   * @page_end: the end page
9f6455325   Tejun Heo   percpu: move vmal...
266
267
   *
   * For each cpu, populate and map pages [@page_start,@page_end) into
dca496451   Tejun Heo   percpu: move comm...
268
   * @chunk.
9f6455325   Tejun Heo   percpu: move vmal...
269
270
271
272
   *
   * CONTEXT:
   * pcpu_alloc_mutex, does GFP_KERNEL allocation.
   */
a93ace487   Tejun Heo   percpu: move regi...
273
274
  static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
  			       int page_start, int page_end)
9f6455325   Tejun Heo   percpu: move vmal...
275
  {
9f6455325   Tejun Heo   percpu: move vmal...
276
  	struct page **pages;
9f6455325   Tejun Heo   percpu: move vmal...
277

cdb4cba5a   Tejun Heo   percpu: remove @m...
278
  	pages = pcpu_get_pages(chunk);
9f6455325   Tejun Heo   percpu: move vmal...
279
280
  	if (!pages)
  		return -ENOMEM;
a93ace487   Tejun Heo   percpu: move regi...
281
282
  	if (pcpu_alloc_pages(chunk, pages, page_start, page_end))
  		return -ENOMEM;
9f6455325   Tejun Heo   percpu: move vmal...
283

a93ace487   Tejun Heo   percpu: move regi...
284
285
286
  	if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
  		pcpu_free_pages(chunk, pages, page_start, page_end);
  		return -ENOMEM;
9f6455325   Tejun Heo   percpu: move vmal...
287
288
  	}
  	pcpu_post_map_flush(chunk, page_start, page_end);
9f6455325   Tejun Heo   percpu: move vmal...
289
  	return 0;
9f6455325   Tejun Heo   percpu: move vmal...
290
291
292
293
294
  }
  
  /**
   * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk
   * @chunk: chunk to depopulate
a93ace487   Tejun Heo   percpu: move regi...
295
296
   * @page_start: the start page
   * @page_end: the end page
9f6455325   Tejun Heo   percpu: move vmal...
297
298
   *
   * For each cpu, depopulate and unmap pages [@page_start,@page_end)
a93ace487   Tejun Heo   percpu: move regi...
299
   * from @chunk.
9f6455325   Tejun Heo   percpu: move vmal...
300
301
302
303
   *
   * CONTEXT:
   * pcpu_alloc_mutex.
   */
a93ace487   Tejun Heo   percpu: move regi...
304
305
  static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
  				  int page_start, int page_end)
9f6455325   Tejun Heo   percpu: move vmal...
306
  {
9f6455325   Tejun Heo   percpu: move vmal...
307
  	struct page **pages;
9f6455325   Tejun Heo   percpu: move vmal...
308

9f6455325   Tejun Heo   percpu: move vmal...
309
310
311
312
313
  	/*
  	 * If control reaches here, there must have been at least one
  	 * successful population attempt so the temp pages array must
  	 * be available now.
  	 */
cdb4cba5a   Tejun Heo   percpu: remove @m...
314
  	pages = pcpu_get_pages(chunk);
9f6455325   Tejun Heo   percpu: move vmal...
315
316
317
318
  	BUG_ON(!pages);
  
  	/* unmap and free */
  	pcpu_pre_unmap_flush(chunk, page_start, page_end);
a93ace487   Tejun Heo   percpu: move regi...
319
  	pcpu_unmap_pages(chunk, pages, page_start, page_end);
9f6455325   Tejun Heo   percpu: move vmal...
320
321
  
  	/* no need to flush tlb, vmalloc will handle it lazily */
a93ace487   Tejun Heo   percpu: move regi...
322
  	pcpu_free_pages(chunk, pages, page_start, page_end);
9f6455325   Tejun Heo   percpu: move vmal...
323
324
325
326
327
328
329
330
331
332
333
334
  }
  
  static struct pcpu_chunk *pcpu_create_chunk(void)
  {
  	struct pcpu_chunk *chunk;
  	struct vm_struct **vms;
  
  	chunk = pcpu_alloc_chunk();
  	if (!chunk)
  		return NULL;
  
  	vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes,
ec3f64fc9   David Rientjes   mm: remove gfp ma...
335
  				pcpu_nr_groups, pcpu_atom_size);
9f6455325   Tejun Heo   percpu: move vmal...
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
  	if (!vms) {
  		pcpu_free_chunk(chunk);
  		return NULL;
  	}
  
  	chunk->data = vms;
  	chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0];
  	return chunk;
  }
  
  static void pcpu_destroy_chunk(struct pcpu_chunk *chunk)
  {
  	if (chunk && chunk->data)
  		pcpu_free_vm_areas(chunk->data, pcpu_nr_groups);
  	pcpu_free_chunk(chunk);
  }
  
  static struct page *pcpu_addr_to_page(void *addr)
  {
  	return vmalloc_to_page(addr);
  }
  
  static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai)
  {
  	/* no extra restriction */
  	return 0;
  }