Commit f4eb07c17df2e6cf9bd58bfcd9cc9e05e9489d07

Authored by Heiko Carstens
Committed by Martin Schwidefsky
1 parent 7f090145a1

[S390] Virtual memmap for s390.

Virtual memmap support for s390. Inspired by the ia64 implementation.

Unlike ia64 we need a mechanism which allows us to dynamically attach
shared memory regions.
These memory regions are accessed via the dcss device driver. dcss
implements the 'direct_access' operation, which requires struct pages
for every single shared page.
Therefore this implementation provides an interface to attach/detach
shared memory:

int add_shared_memory(unsigned long start, unsigned long size);
int remove_shared_memory(unsigned long start, unsigned long size);

The purpose of the add_shared_memory function is to add the given
memory range to the 1:1 mapping and to make sure that the
corresponding range in the vmemmap is backed with physical pages.
It also initialises the new struct pages.

remove_shared_memory in turn only invalidates the page table
entries in the 1:1 mapping. The page tables and the memory used for
struct pages in the vmemmap are currently not freed. They will be
reused when the next segment will be attached.
Given that the maximum size of a shared memory region is 2GB and
in addition all regions must reside below 2GB this is not too much of
a restriction, but there is room for improvement.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

Showing 9 changed files with 488 additions and 210 deletions Side-by-side Diff

... ... @@ -235,6 +235,9 @@
235 235  
236 236 source "mm/Kconfig"
237 237  
  238 +config HOLES_IN_ZONE
  239 + def_bool y
  240 +
238 241 comment "I/O subsystem configuration"
239 242  
240 243 config MACHCHK_WARNING
arch/s390/kernel/setup.c
... ... @@ -64,7 +64,7 @@
64 64 unsigned int console_irq = -1;
65 65 unsigned long machine_flags = 0;
66 66  
67   -struct mem_chunk memory_chunk[MEMORY_CHUNKS];
  67 +struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
68 68 volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */
69 69 unsigned long __initdata zholes_size[MAX_NR_ZONES];
70 70 static unsigned long __initdata memory_end;
arch/s390/mm/Makefile
... ... @@ -2,6 +2,6 @@
2 2 # Makefile for the linux s390-specific parts of the memory manager.
3 3 #
4 4  
5   -obj-y := init.o fault.o ioremap.o extmem.o mmap.o
  5 +obj-y := init.o fault.o ioremap.o extmem.o mmap.o vmem.o
6 6 obj-$(CONFIG_CMM) += cmm.o
arch/s390/mm/extmem.c
... ... @@ -16,6 +16,7 @@
16 16 #include <linux/bootmem.h>
17 17 #include <linux/ctype.h>
18 18 #include <asm/page.h>
  19 +#include <asm/pgtable.h>
19 20 #include <asm/ebcdic.h>
20 21 #include <asm/errno.h>
21 22 #include <asm/extmem.h>
... ... @@ -238,65 +239,6 @@
238 239 }
239 240  
240 241 /*
241   - * check if the given segment collides with guest storage.
242   - * returns 1 if this is the case, 0 if no collision was found
243   - */
244   -static int
245   -segment_overlaps_storage(struct dcss_segment *seg)
246   -{
247   - int i;
248   -
249   - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
250   - if (memory_chunk[i].type != CHUNK_READ_WRITE)
251   - continue;
252   - if ((memory_chunk[i].addr >> 20) > (seg->end >> 20))
253   - continue;
254   - if (((memory_chunk[i].addr + memory_chunk[i].size - 1) >> 20)
255   - < (seg->start_addr >> 20))
256   - continue;
257   - return 1;
258   - }
259   - return 0;
260   -}
261   -
262   -/*
263   - * check if segment collides with other segments that are currently loaded
264   - * returns 1 if this is the case, 0 if no collision was found
265   - */
266   -static int
267   -segment_overlaps_others (struct dcss_segment *seg)
268   -{
269   - struct list_head *l;
270   - struct dcss_segment *tmp;
271   -
272   - BUG_ON(!mutex_is_locked(&dcss_lock));
273   - list_for_each(l, &dcss_list) {
274   - tmp = list_entry(l, struct dcss_segment, list);
275   - if ((tmp->start_addr >> 20) > (seg->end >> 20))
276   - continue;
277   - if ((tmp->end >> 20) < (seg->start_addr >> 20))
278   - continue;
279   - if (seg == tmp)
280   - continue;
281   - return 1;
282   - }
283   - return 0;
284   -}
285   -
286   -/*
287   - * check if segment exceeds the kernel mapping range (detected or set via mem=)
288   - * returns 1 if this is the case, 0 if segment fits into the range
289   - */
290   -static inline int
291   -segment_exceeds_range (struct dcss_segment *seg)
292   -{
293   - int seg_last_pfn = (seg->end) >> PAGE_SHIFT;
294   - if (seg_last_pfn > max_pfn)
295   - return 1;
296   - return 0;
297   -}
298   -
299   -/*
300 242 * get info about a segment
301 243 * possible return values:
302 244 * -ENOSYS : we are not running on VM
303 245  
304 246  
305 247  
... ... @@ -341,24 +283,26 @@
341 283 rc = query_segment_type (seg);
342 284 if (rc < 0)
343 285 goto out_free;
344   - if (segment_exceeds_range(seg)) {
345   - PRINT_WARN ("segment_load: not loading segment %s - exceeds"
346   - " kernel mapping range\n",name);
347   - rc = -ERANGE;
  286 +
  287 + rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
  288 +
  289 + switch (rc) {
  290 + case 0:
  291 + break;
  292 + case -ENOSPC:
  293 + PRINT_WARN("segment_load: not loading segment %s - overlaps "
  294 + "storage/segment\n", name);
348 295 goto out_free;
349   - }
350   - if (segment_overlaps_storage(seg)) {
351   - PRINT_WARN ("segment_load: not loading segment %s - overlaps"
352   - " storage\n",name);
353   - rc = -ENOSPC;
  296 + case -ERANGE:
  297 + PRINT_WARN("segment_load: not loading segment %s - exceeds "
  298 + "kernel mapping range\n", name);
354 299 goto out_free;
355   - }
356   - if (segment_overlaps_others(seg)) {
357   - PRINT_WARN ("segment_load: not loading segment %s - overlaps"
358   - " other segments\n",name);
359   - rc = -EBUSY;
  300 + default:
  301 + PRINT_WARN("segment_load: not loading segment %s (rc: %d)\n",
  302 + name, rc);
360 303 goto out_free;
361 304 }
  305 +
362 306 if (do_nonshared)
363 307 dcss_command = DCSS_LOADNSR;
364 308 else
... ... @@ -372,7 +316,7 @@
372 316 rc = dcss_diag_translate_rc (seg->end);
373 317 dcss_diag(DCSS_PURGESEG, seg->dcss_name,
374 318 &seg->start_addr, &seg->end);
375   - goto out_free;
  319 + goto out_shared;
376 320 }
377 321 seg->do_nonshared = do_nonshared;
378 322 atomic_set(&seg->ref_count, 1);
... ... @@ -391,6 +335,8 @@
391 335 (void*)seg->start_addr, (void*)seg->end,
392 336 segtype_string[seg->vm_segtype]);
393 337 goto out;
  338 + out_shared:
  339 + remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
394 340 out_free:
395 341 kfree(seg);
396 342 out:
... ... @@ -530,12 +476,12 @@
530 476 "please report to linux390@de.ibm.com\n",name);
531 477 goto out_unlock;
532 478 }
533   - if (atomic_dec_return(&seg->ref_count) == 0) {
534   - list_del(&seg->list);
535   - dcss_diag(DCSS_PURGESEG, seg->dcss_name,
536   - &dummy, &dummy);
537   - kfree(seg);
538   - }
  479 + if (atomic_dec_return(&seg->ref_count) != 0)
  480 + goto out_unlock;
  481 + remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
  482 + list_del(&seg->list);
  483 + dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
  484 + kfree(seg);
539 485 out_unlock:
540 486 mutex_unlock(&dcss_lock);
541 487 }
... ... @@ -69,6 +69,8 @@
69 69 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
70 70 i = max_mapnr;
71 71 while (i-- > 0) {
  72 + if (!pfn_valid(i))
  73 + continue;
72 74 page = pfn_to_page(i);
73 75 total++;
74 76 if (PageReserved(page))
75 77  
76 78  
77 79  
78 80  
79 81  
80 82  
81 83  
82 84  
... ... @@ -84,68 +86,54 @@
84 86 printk("%d pages swap cached\n",cached);
85 87 }
86 88  
  89 +static void __init setup_ro_region(void)
  90 +{
  91 + pgd_t *pgd;
  92 + pmd_t *pmd;
  93 + pte_t *pte;
  94 + pte_t new_pte;
  95 + unsigned long address, end;
  96 +
  97 + address = ((unsigned long)&__start_rodata) & PAGE_MASK;
  98 + end = PFN_ALIGN((unsigned long)&__end_rodata);
  99 +
  100 + for (; address < end; address += PAGE_SIZE) {
  101 + pgd = pgd_offset_k(address);
  102 + pmd = pmd_offset(pgd, address);
  103 + pte = pte_offset_kernel(pmd, address);
  104 + new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO));
  105 + set_pte(pte, new_pte);
  106 + }
  107 +}
  108 +
87 109 extern unsigned long __initdata zholes_size[];
  110 +extern void vmem_map_init(void);
88 111 /*
89 112 * paging_init() sets up the page tables
90 113 */
91   -
92   -#ifndef CONFIG_64BIT
93 114 void __init paging_init(void)
94 115 {
95   - pgd_t * pg_dir;
96   - pte_t * pg_table;
97   - pte_t pte;
98   - int i;
99   - unsigned long tmp;
100   - unsigned long pfn = 0;
101   - unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE;
102   - static const int ssm_mask = 0x04000000L;
103   - unsigned long ro_start_pfn, ro_end_pfn;
  116 + pgd_t *pg_dir;
  117 + int i;
  118 + unsigned long pgdir_k;
  119 + static const int ssm_mask = 0x04000000L;
104 120 unsigned long zones_size[MAX_NR_ZONES];
  121 + unsigned long dma_pfn, high_pfn;
105 122  
106   - ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata);
107   - ro_end_pfn = PFN_UP((unsigned long)&__end_rodata);
108   -
109   - memset(zones_size, 0, sizeof(zones_size));
110   - zones_size[ZONE_DMA] = max_low_pfn;
111   - free_area_init_node(0, &contig_page_data, zones_size,
112   - __pa(PAGE_OFFSET) >> PAGE_SHIFT,
113   - zholes_size);
114   -
115   - /* unmap whole virtual address space */
  123 + pg_dir = swapper_pg_dir;
116 124  
117   - pg_dir = swapper_pg_dir;
118   -
  125 +#ifdef CONFIG_64BIT
  126 + pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERN_REGION_TABLE;
119 127 for (i = 0; i < PTRS_PER_PGD; i++)
120   - pmd_clear((pmd_t *) pg_dir++);
121   -
122   - /*
123   - * map whole physical memory to virtual memory (identity mapping)
124   - */
  128 + pgd_clear(pg_dir + i);
  129 +#else
  130 + pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE;
  131 + for (i = 0; i < PTRS_PER_PGD; i++)
  132 + pmd_clear((pmd_t *)(pg_dir + i));
  133 +#endif
  134 + vmem_map_init();
  135 + setup_ro_region();
125 136  
126   - pg_dir = swapper_pg_dir;
127   -
128   - while (pfn < max_low_pfn) {
129   - /*
130   - * pg_table is physical at this point
131   - */
132   - pg_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
133   -
134   - pmd_populate_kernel(&init_mm, (pmd_t *) pg_dir, pg_table);
135   - pg_dir++;
136   -
137   - for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) {
138   - if (pfn >= ro_start_pfn && pfn < ro_end_pfn)
139   - pte = pfn_pte(pfn, __pgprot(_PAGE_RO));
140   - else
141   - pte = pfn_pte(pfn, PAGE_KERNEL);
142   - if (pfn >= max_low_pfn)
143   - pte_val(pte) = _PAGE_TYPE_EMPTY;
144   - set_pte(pg_table, pte);
145   - pfn++;
146   - }
147   - }
148   -
149 137 S390_lowcore.kernel_asce = pgdir_k;
150 138  
151 139 /* enable virtual mapping in kernel mode */
152 140  
... ... @@ -154,31 +142,9 @@
154 142 __ctl_load(pgdir_k, 13, 13);
155 143 __raw_local_irq_ssm(ssm_mask);
156 144  
157   - local_flush_tlb();
158   -}
159   -
160   -#else /* CONFIG_64BIT */
161   -
162   -void __init paging_init(void)
163   -{
164   - pgd_t * pg_dir;
165   - pmd_t * pm_dir;
166   - pte_t * pt_dir;
167   - pte_t pte;
168   - int i,j,k;
169   - unsigned long pfn = 0;
170   - unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) |
171   - _KERN_REGION_TABLE;
172   - static const int ssm_mask = 0x04000000L;
173   - unsigned long zones_size[MAX_NR_ZONES];
174   - unsigned long dma_pfn, high_pfn;
175   - unsigned long ro_start_pfn, ro_end_pfn;
176   -
177 145 memset(zones_size, 0, sizeof(zones_size));
178 146 dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT;
179 147 high_pfn = max_low_pfn;
180   - ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata);
181   - ro_end_pfn = PFN_UP((unsigned long)&__end_rodata);
182 148  
183 149 if (dma_pfn > high_pfn)
184 150 zones_size[ZONE_DMA] = high_pfn;
185 151  
... ... @@ -190,56 +156,7 @@
190 156 /* Initialize mem_map[]. */
191 157 free_area_init_node(0, &contig_page_data, zones_size,
192 158 __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
193   -
194   - /*
195   - * map whole physical memory to virtual memory (identity mapping)
196   - */
197   -
198   - pg_dir = swapper_pg_dir;
199   -
200   - for (i = 0 ; i < PTRS_PER_PGD ; i++,pg_dir++) {
201   -
202   - if (pfn >= max_low_pfn) {
203   - pgd_clear(pg_dir);
204   - continue;
205   - }
206   -
207   - pm_dir = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE * 4);
208   - pgd_populate(&init_mm, pg_dir, pm_dir);
209   -
210   - for (j = 0 ; j < PTRS_PER_PMD ; j++,pm_dir++) {
211   - if (pfn >= max_low_pfn) {
212   - pmd_clear(pm_dir);
213   - continue;
214   - }
215   -
216   - pt_dir = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
217   - pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
218   -
219   - for (k = 0 ; k < PTRS_PER_PTE ; k++,pt_dir++) {
220   - if (pfn >= ro_start_pfn && pfn < ro_end_pfn)
221   - pte = pfn_pte(pfn, __pgprot(_PAGE_RO));
222   - else
223   - pte = pfn_pte(pfn, PAGE_KERNEL);
224   - if (pfn >= max_low_pfn)
225   - pte_val(pte) = _PAGE_TYPE_EMPTY;
226   - set_pte(pt_dir, pte);
227   - pfn++;
228   - }
229   - }
230   - }
231   -
232   - S390_lowcore.kernel_asce = pgdir_k;
233   -
234   - /* enable virtual mapping in kernel mode */
235   - __ctl_load(pgdir_k, 1, 1);
236   - __ctl_load(pgdir_k, 7, 7);
237   - __ctl_load(pgdir_k, 13, 13);
238   - __raw_local_irq_ssm(ssm_mask);
239   -
240   - local_flush_tlb();
241 159 }
242   -#endif /* CONFIG_64BIT */
243 160  
244 161 void __init mem_init(void)
245 162 {
... ... @@ -269,6 +186,8 @@
269 186 printk("Write protected kernel read-only data: %#lx - %#lx\n",
270 187 (unsigned long)&__start_rodata,
271 188 PFN_ALIGN((unsigned long)&__end_rodata) - 1);
  189 + printk("Virtual memmap size: %ldk\n",
  190 + (max_pfn * sizeof(struct page)) >> 10);
272 191 }
273 192  
274 193 void free_initmem(void)
  1 +/*
  2 + * arch/s390/mm/vmem.c
  3 + *
  4 + * Copyright IBM Corp. 2006
  5 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
  6 + */
  7 +
  8 +#include <linux/bootmem.h>
  9 +#include <linux/pfn.h>
  10 +#include <linux/mm.h>
  11 +#include <linux/module.h>
  12 +#include <linux/list.h>
  13 +#include <asm/pgalloc.h>
  14 +#include <asm/pgtable.h>
  15 +#include <asm/setup.h>
  16 +#include <asm/tlbflush.h>
  17 +
  18 +unsigned long vmalloc_end;
  19 +EXPORT_SYMBOL(vmalloc_end);
  20 +
  21 +static struct page *vmem_map;
  22 +static DEFINE_MUTEX(vmem_mutex);
  23 +
  24 +struct memory_segment {
  25 + struct list_head list;
  26 + unsigned long start;
  27 + unsigned long size;
  28 +};
  29 +
  30 +static LIST_HEAD(mem_segs);
  31 +
  32 +void memmap_init(unsigned long size, int nid, unsigned long zone,
  33 + unsigned long start_pfn)
  34 +{
  35 + struct page *start, *end;
  36 + struct page *map_start, *map_end;
  37 + int i;
  38 +
  39 + start = pfn_to_page(start_pfn);
  40 + end = start + size;
  41 +
  42 + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
  43 + unsigned long cstart, cend;
  44 +
  45 + cstart = PFN_DOWN(memory_chunk[i].addr);
  46 + cend = cstart + PFN_DOWN(memory_chunk[i].size);
  47 +
  48 + map_start = mem_map + cstart;
  49 + map_end = mem_map + cend;
  50 +
  51 + if (map_start < start)
  52 + map_start = start;
  53 + if (map_end > end)
  54 + map_end = end;
  55 +
  56 + map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1))
  57 + / sizeof(struct page);
  58 + map_end += ((PFN_ALIGN((unsigned long) map_end)
  59 + - (unsigned long) map_end)
  60 + / sizeof(struct page));
  61 +
  62 + if (map_start < map_end)
  63 + memmap_init_zone((unsigned long)(map_end - map_start),
  64 + nid, zone, page_to_pfn(map_start));
  65 + }
  66 +}
  67 +
  68 +static inline void *vmem_alloc_pages(unsigned int order)
  69 +{
  70 + if (slab_is_available())
  71 + return (void *)__get_free_pages(GFP_KERNEL, order);
  72 + return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
  73 +}
  74 +
  75 +static inline pmd_t *vmem_pmd_alloc(void)
  76 +{
  77 + pmd_t *pmd;
  78 + int i;
  79 +
  80 + pmd = vmem_alloc_pages(PMD_ALLOC_ORDER);
  81 + if (!pmd)
  82 + return NULL;
  83 + for (i = 0; i < PTRS_PER_PMD; i++)
  84 + pmd_clear(pmd + i);
  85 + return pmd;
  86 +}
  87 +
  88 +static inline pte_t *vmem_pte_alloc(void)
  89 +{
  90 + pte_t *pte;
  91 + pte_t empty_pte;
  92 + int i;
  93 +
  94 + pte = vmem_alloc_pages(PTE_ALLOC_ORDER);
  95 + if (!pte)
  96 + return NULL;
  97 + pte_val(empty_pte) = _PAGE_TYPE_EMPTY;
  98 + for (i = 0; i < PTRS_PER_PTE; i++)
  99 + set_pte(pte + i, empty_pte);
  100 + return pte;
  101 +}
  102 +
  103 +/*
  104 + * Add a physical memory range to the 1:1 mapping.
  105 + */
  106 +static int vmem_add_range(unsigned long start, unsigned long size)
  107 +{
  108 + unsigned long address;
  109 + pgd_t *pg_dir;
  110 + pmd_t *pm_dir;
  111 + pte_t *pt_dir;
  112 + pte_t pte;
  113 + int ret = -ENOMEM;
  114 +
  115 + for (address = start; address < start + size; address += PAGE_SIZE) {
  116 + pg_dir = pgd_offset_k(address);
  117 + if (pgd_none(*pg_dir)) {
  118 + pm_dir = vmem_pmd_alloc();
  119 + if (!pm_dir)
  120 + goto out;
  121 + pgd_populate(&init_mm, pg_dir, pm_dir);
  122 + }
  123 +
  124 + pm_dir = pmd_offset(pg_dir, address);
  125 + if (pmd_none(*pm_dir)) {
  126 + pt_dir = vmem_pte_alloc();
  127 + if (!pt_dir)
  128 + goto out;
  129 + pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
  130 + }
  131 +
  132 + pt_dir = pte_offset_kernel(pm_dir, address);
  133 + pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL);
  134 + set_pte(pt_dir, pte);
  135 + }
  136 + ret = 0;
  137 +out:
  138 + flush_tlb_kernel_range(start, start + size);
  139 + return ret;
  140 +}
  141 +
  142 +/*
  143 + * Remove a physical memory range from the 1:1 mapping.
  144 + * Currently only invalidates page table entries.
  145 + */
  146 +static void vmem_remove_range(unsigned long start, unsigned long size)
  147 +{
  148 + unsigned long address;
  149 + pgd_t *pg_dir;
  150 + pmd_t *pm_dir;
  151 + pte_t *pt_dir;
  152 + pte_t pte;
  153 +
  154 + pte_val(pte) = _PAGE_TYPE_EMPTY;
  155 + for (address = start; address < start + size; address += PAGE_SIZE) {
  156 + pg_dir = pgd_offset_k(address);
  157 + if (pgd_none(*pg_dir))
  158 + continue;
  159 + pm_dir = pmd_offset(pg_dir, address);
  160 + if (pmd_none(*pm_dir))
  161 + continue;
  162 + pt_dir = pte_offset_kernel(pm_dir, address);
  163 + set_pte(pt_dir, pte);
  164 + }
  165 + flush_tlb_kernel_range(start, start + size);
  166 +}
  167 +
  168 +/*
  169 + * Add a backed mem_map array to the virtual mem_map array.
  170 + */
  171 +static int vmem_add_mem_map(unsigned long start, unsigned long size)
  172 +{
  173 + unsigned long address, start_addr, end_addr;
  174 + struct page *map_start, *map_end;
  175 + pgd_t *pg_dir;
  176 + pmd_t *pm_dir;
  177 + pte_t *pt_dir;
  178 + pte_t pte;
  179 + int ret = -ENOMEM;
  180 +
  181 + map_start = vmem_map + PFN_DOWN(start);
  182 + map_end = vmem_map + PFN_DOWN(start + size);
  183 +
  184 + start_addr = (unsigned long) map_start & PAGE_MASK;
  185 + end_addr = PFN_ALIGN((unsigned long) map_end);
  186 +
  187 + for (address = start_addr; address < end_addr; address += PAGE_SIZE) {
  188 + pg_dir = pgd_offset_k(address);
  189 + if (pgd_none(*pg_dir)) {
  190 + pm_dir = vmem_pmd_alloc();
  191 + if (!pm_dir)
  192 + goto out;
  193 + pgd_populate(&init_mm, pg_dir, pm_dir);
  194 + }
  195 +
  196 + pm_dir = pmd_offset(pg_dir, address);
  197 + if (pmd_none(*pm_dir)) {
  198 + pt_dir = vmem_pte_alloc();
  199 + if (!pt_dir)
  200 + goto out;
  201 + pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
  202 + }
  203 +
  204 + pt_dir = pte_offset_kernel(pm_dir, address);
  205 + if (pte_none(*pt_dir)) {
  206 + unsigned long new_page;
  207 +
  208 + new_page =__pa(vmem_alloc_pages(0));
  209 + if (!new_page)
  210 + goto out;
  211 + pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
  212 + set_pte(pt_dir, pte);
  213 + }
  214 + }
  215 + ret = 0;
  216 +out:
  217 + flush_tlb_kernel_range(start_addr, end_addr);
  218 + return ret;
  219 +}
  220 +
  221 +static int vmem_add_mem(unsigned long start, unsigned long size)
  222 +{
  223 + int ret;
  224 +
  225 + ret = vmem_add_range(start, size);
  226 + if (ret)
  227 + return ret;
  228 + return vmem_add_mem_map(start, size);
  229 +}
  230 +
  231 +/*
  232 + * Add memory segment to the segment list if it doesn't overlap with
  233 + * an already present segment.
  234 + */
  235 +static int insert_memory_segment(struct memory_segment *seg)
  236 +{
  237 + struct memory_segment *tmp;
  238 +
  239 + if (PFN_DOWN(seg->start + seg->size) > max_pfn ||
  240 + seg->start + seg->size < seg->start)
  241 + return -ERANGE;
  242 +
  243 + list_for_each_entry(tmp, &mem_segs, list) {
  244 + if (seg->start >= tmp->start + tmp->size)
  245 + continue;
  246 + if (seg->start + seg->size <= tmp->start)
  247 + continue;
  248 + return -ENOSPC;
  249 + }
  250 + list_add(&seg->list, &mem_segs);
  251 + return 0;
  252 +}
  253 +
  254 +/*
  255 + * Remove memory segment from the segment list.
  256 + */
  257 +static void remove_memory_segment(struct memory_segment *seg)
  258 +{
  259 + list_del(&seg->list);
  260 +}
  261 +
  262 +static void __remove_shared_memory(struct memory_segment *seg)
  263 +{
  264 + remove_memory_segment(seg);
  265 + vmem_remove_range(seg->start, seg->size);
  266 +}
  267 +
  268 +int remove_shared_memory(unsigned long start, unsigned long size)
  269 +{
  270 + struct memory_segment *seg;
  271 + int ret;
  272 +
  273 + mutex_lock(&vmem_mutex);
  274 +
  275 + ret = -ENOENT;
  276 + list_for_each_entry(seg, &mem_segs, list) {
  277 + if (seg->start == start && seg->size == size)
  278 + break;
  279 + }
  280 +
  281 + if (seg->start != start || seg->size != size)
  282 + goto out;
  283 +
  284 + ret = 0;
  285 + __remove_shared_memory(seg);
  286 + kfree(seg);
  287 +out:
  288 + mutex_unlock(&vmem_mutex);
  289 + return ret;
  290 +}
  291 +
  292 +int add_shared_memory(unsigned long start, unsigned long size)
  293 +{
  294 + struct memory_segment *seg;
  295 + struct page *page;
  296 + unsigned long pfn, num_pfn, end_pfn;
  297 + int ret;
  298 +
  299 + mutex_lock(&vmem_mutex);
  300 + ret = -ENOMEM;
  301 + seg = kzalloc(sizeof(*seg), GFP_KERNEL);
  302 + if (!seg)
  303 + goto out;
  304 + seg->start = start;
  305 + seg->size = size;
  306 +
  307 + ret = insert_memory_segment(seg);
  308 + if (ret)
  309 + goto out_free;
  310 +
  311 + ret = vmem_add_mem(start, size);
  312 + if (ret)
  313 + goto out_remove;
  314 +
  315 + pfn = PFN_DOWN(start);
  316 + num_pfn = PFN_DOWN(size);
  317 + end_pfn = pfn + num_pfn;
  318 +
  319 + page = pfn_to_page(pfn);
  320 + memset(page, 0, num_pfn * sizeof(struct page));
  321 +
  322 + for (; pfn < end_pfn; pfn++) {
  323 + page = pfn_to_page(pfn);
  324 + init_page_count(page);
  325 + reset_page_mapcount(page);
  326 + SetPageReserved(page);
  327 + INIT_LIST_HEAD(&page->lru);
  328 + }
  329 + goto out;
  330 +
  331 +out_remove:
  332 + __remove_shared_memory(seg);
  333 +out_free:
  334 + kfree(seg);
  335 +out:
  336 + mutex_unlock(&vmem_mutex);
  337 + return ret;
  338 +}
  339 +
  340 +/*
  341 + * map whole physical memory to virtual memory (identity mapping)
  342 + */
  343 +void __init vmem_map_init(void)
  344 +{
  345 + unsigned long map_size;
  346 + int i;
  347 +
  348 + map_size = ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * sizeof(struct page);
  349 + vmalloc_end = PFN_ALIGN(VMALLOC_END_INIT) - PFN_ALIGN(map_size);
  350 + vmem_map = (struct page *) vmalloc_end;
  351 + NODE_DATA(0)->node_mem_map = vmem_map;
  352 +
  353 + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++)
  354 + vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size);
  355 +}
  356 +
  357 +/*
  358 + * Convert memory chunk array to a memory segment list so there is a single
  359 + * list that contains both r/w memory and shared memory segments.
  360 + */
  361 +static int __init vmem_convert_memory_chunk(void)
  362 +{
  363 + struct memory_segment *seg;
  364 + int i;
  365 +
  366 + mutex_lock(&vmem_mutex);
  367 + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
  368 + if (!memory_chunk[i].size)
  369 + continue;
  370 + seg = kzalloc(sizeof(*seg), GFP_KERNEL);
  371 + if (!seg)
  372 + panic("Out of memory...\n");
  373 + seg->start = memory_chunk[i].addr;
  374 + seg->size = memory_chunk[i].size;
  375 + insert_memory_segment(seg);
  376 + }
  377 + mutex_unlock(&vmem_mutex);
  378 + return 0;
  379 +}
  380 +
  381 +core_initcall(vmem_convert_memory_chunk);
include/asm-s390/page.h
... ... @@ -127,6 +127,26 @@
127 127 return skey;
128 128 }
129 129  
  130 +extern unsigned long max_pfn;
  131 +
  132 +static inline int pfn_valid(unsigned long pfn)
  133 +{
  134 + unsigned long dummy;
  135 + int ccode;
  136 +
  137 + if (pfn >= max_pfn)
  138 + return 0;
  139 +
  140 + asm volatile(
  141 + " lra %0,0(%2)\n"
  142 + " ipm %1\n"
  143 + " srl %1,28\n"
  144 + : "=d" (dummy), "=d" (ccode)
  145 + : "a" (pfn << PAGE_SHIFT)
  146 + : "cc");
  147 + return !ccode;
  148 +}
  149 +
130 150 #endif /* !__ASSEMBLY__ */
131 151  
132 152 /* to align the pointer to the (next) page boundary */
... ... @@ -138,8 +158,6 @@
138 158 #define __va(x) (void *)(unsigned long)(x)
139 159 #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
140 160 #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
141   -
142   -#define pfn_valid(pfn) ((pfn) < max_mapnr)
143 161 #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
144 162  
145 163 #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
include/asm-s390/pgalloc.h
... ... @@ -25,8 +25,11 @@
25 25 * Page allocation orders.
26 26 */
27 27 #ifndef __s390x__
  28 +# define PTE_ALLOC_ORDER 0
  29 +# define PMD_ALLOC_ORDER 0
28 30 # define PGD_ALLOC_ORDER 1
29 31 #else /* __s390x__ */
  32 +# define PTE_ALLOC_ORDER 0
30 33 # define PMD_ALLOC_ORDER 2
31 34 # define PGD_ALLOC_ORDER 2
32 35 #endif /* __s390x__ */
include/asm-s390/pgtable.h
... ... @@ -107,23 +107,25 @@
107 107 * The vmalloc() routines leaves a hole of 4kB between each vmalloced
108 108 * area for the same reason. ;)
109 109 */
  110 +extern unsigned long vmalloc_end;
110 111 #define VMALLOC_OFFSET (8*1024*1024)
111 112 #define VMALLOC_START (((unsigned long) high_memory + VMALLOC_OFFSET) \
112 113 & ~(VMALLOC_OFFSET-1))
  114 +#define VMALLOC_END vmalloc_end
113 115  
114 116 /*
115 117 * We need some free virtual space to be able to do vmalloc.
116 118 * VMALLOC_MIN_SIZE defines the minimum size of the vmalloc
117 119 * area. On a machine with 2GB memory we make sure that we
118 120 * have at least 128MB free space for vmalloc. On a machine
119   - * with 4TB we make sure we have at least 1GB.
  121 + * with 4TB we make sure we have at least 128GB.
120 122 */
121 123 #ifndef __s390x__
122 124 #define VMALLOC_MIN_SIZE 0x8000000UL
123   -#define VMALLOC_END 0x80000000UL
  125 +#define VMALLOC_END_INIT 0x80000000UL
124 126 #else /* __s390x__ */
125   -#define VMALLOC_MIN_SIZE 0x40000000UL
126   -#define VMALLOC_END 0x40000000000UL
  127 +#define VMALLOC_MIN_SIZE 0x2000000000UL
  128 +#define VMALLOC_END_INIT 0x40000000000UL
127 129 #endif /* __s390x__ */
128 130  
129 131 /*
130 132  
... ... @@ -815,10 +817,16 @@
815 817  
816 818 #define kern_addr_valid(addr) (1)
817 819  
  820 +extern int add_shared_memory(unsigned long start, unsigned long size);
  821 +extern int remove_shared_memory(unsigned long start, unsigned long size);
  822 +
818 823 /*
819 824 * No page table caches to initialise
820 825 */
821 826 #define pgtable_cache_init() do { } while (0)
  827 +
  828 +#define __HAVE_ARCH_MEMMAP_INIT
  829 +extern void memmap_init(unsigned long, int, unsigned long, unsigned long);
822 830  
823 831 #define __HAVE_ARCH_PTEP_ESTABLISH
824 832 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS