Commit c74df32c724a1652ad8399b4891bb02c9d43743a

Authored by Hugh Dickins
Committed by Linus Torvalds
1 parent 1bb3630e89

[PATCH] mm: ptd_alloc take ptlock

Second step in pushing down the page_table_lock.  Remove the temporary
bridging hack from __pud_alloc, __pmd_alloc, __pte_alloc: expect callers not
to hold page_table_lock, whether it's on init_mm or a user mm; take
page_table_lock internally to check if a racing task already allocated.

Convert their callers from common code.  But avoid coming back to change them
again later: instead of moving the spin_lock(&mm->page_table_lock) down,
switch over to new macros pte_alloc_map_lock and pte_unmap_unlock, which
encapsulate the mapping+locking and unlocking+unmapping together, and in the
end may use alternatives to the mm page_table_lock itself.

These callers all hold mmap_sem (some exclusively, some not), so at no level
can a page table be whipped away from beneath them; and pte_alloc uses the
"atomic" pmd_present to test whether it needs to allocate.  It appears that on
all arches we can safely descend without page_table_lock.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 7 changed files with 90 additions and 135 deletions Side-by-side Diff

... ... @@ -309,25 +309,24 @@
309 309 pud_t * pud;
310 310 pmd_t * pmd;
311 311 pte_t * pte;
  312 + spinlock_t *ptl;
312 313  
313 314 if (unlikely(anon_vma_prepare(vma)))
314   - goto out_sig;
  315 + goto out;
315 316  
316 317 flush_dcache_page(page);
317 318 pgd = pgd_offset(mm, address);
318   -
319   - spin_lock(&mm->page_table_lock);
320 319 pud = pud_alloc(mm, pgd, address);
321 320 if (!pud)
322 321 goto out;
323 322 pmd = pmd_alloc(mm, pud, address);
324 323 if (!pmd)
325 324 goto out;
326   - pte = pte_alloc_map(mm, pmd, address);
  325 + pte = pte_alloc_map_lock(mm, pmd, address, &ptl);
327 326 if (!pte)
328 327 goto out;
329 328 if (!pte_none(*pte)) {
330   - pte_unmap(pte);
  329 + pte_unmap_unlock(pte, ptl);
331 330 goto out;
332 331 }
333 332 inc_mm_counter(mm, anon_rss);
334 333  
... ... @@ -335,14 +334,11 @@
335 334 set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
336 335 page, vma->vm_page_prot))));
337 336 page_add_anon_rmap(page, vma, address);
338   - pte_unmap(pte);
339   - spin_unlock(&mm->page_table_lock);
  337 + pte_unmap_unlock(pte, ptl);
340 338  
341 339 /* no need for flush_tlb */
342 340 return;
343 341 out:
344   - spin_unlock(&mm->page_table_lock);
345   -out_sig:
346 342 __free_page(page);
347 343 force_sig(SIGKILL, current);
348 344 }
... ... @@ -779,9 +779,27 @@
779 779 }
780 780 #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
781 781  
  782 +#define pte_offset_map_lock(mm, pmd, address, ptlp) \
  783 +({ \
  784 + spinlock_t *__ptl = &(mm)->page_table_lock; \
  785 + pte_t *__pte = pte_offset_map(pmd, address); \
  786 + *(ptlp) = __ptl; \
  787 + spin_lock(__ptl); \
  788 + __pte; \
  789 +})
  790 +
  791 +#define pte_unmap_unlock(pte, ptl) do { \
  792 + spin_unlock(ptl); \
  793 + pte_unmap(pte); \
  794 +} while (0)
  795 +
782 796 #define pte_alloc_map(mm, pmd, address) \
783 797 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
784 798 NULL: pte_offset_map(pmd, address))
  799 +
  800 +#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
  801 + ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
  802 + NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
785 803  
786 804 #define pte_alloc_kernel(pmd, address) \
787 805 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
... ... @@ -255,7 +255,6 @@
255 255 /*
256 256 * Link in the new vma and copy the page table entries.
257 257 */
258   - spin_lock(&mm->page_table_lock);
259 258 *pprev = tmp;
260 259 pprev = &tmp->vm_next;
261 260  
... ... @@ -265,7 +264,6 @@
265 264  
266 265 mm->map_count++;
267 266 retval = copy_page_range(mm, oldmm, tmp);
268   - spin_unlock(&mm->page_table_lock);
269 267  
270 268 if (tmp->vm_ops && tmp->vm_ops->open)
271 269 tmp->vm_ops->open(tmp);
... ... @@ -63,23 +63,20 @@
63 63 pud_t *pud;
64 64 pgd_t *pgd;
65 65 pte_t pte_val;
  66 + spinlock_t *ptl;
66 67  
67 68 BUG_ON(vma->vm_flags & VM_RESERVED);
68 69  
69 70 pgd = pgd_offset(mm, addr);
70   - spin_lock(&mm->page_table_lock);
71   -
72 71 pud = pud_alloc(mm, pgd, addr);
73 72 if (!pud)
74   - goto err_unlock;
75   -
  73 + goto out;
76 74 pmd = pmd_alloc(mm, pud, addr);
77 75 if (!pmd)
78   - goto err_unlock;
79   -
80   - pte = pte_alloc_map(mm, pmd, addr);
  76 + goto out;
  77 + pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
81 78 if (!pte)
82   - goto err_unlock;
  79 + goto out;
83 80  
84 81 /*
85 82 * This page may have been truncated. Tell the
86 83  
... ... @@ -89,10 +86,10 @@
89 86 inode = vma->vm_file->f_mapping->host;
90 87 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
91 88 if (!page->mapping || page->index >= size)
92   - goto err_unlock;
  89 + goto unlock;
93 90 err = -ENOMEM;
94 91 if (page_mapcount(page) > INT_MAX/2)
95   - goto err_unlock;
  92 + goto unlock;
96 93  
97 94 if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
98 95 inc_mm_counter(mm, file_rss);
99 96  
100 97  
101 98  
... ... @@ -101,17 +98,15 @@
101 98 set_pte_at(mm, addr, pte, mk_pte(page, prot));
102 99 page_add_file_rmap(page);
103 100 pte_val = *pte;
104   - pte_unmap(pte);
105 101 update_mmu_cache(vma, addr, pte_val);
106   -
107 102 err = 0;
108   -err_unlock:
109   - spin_unlock(&mm->page_table_lock);
  103 +unlock:
  104 + pte_unmap_unlock(pte, ptl);
  105 +out:
110 106 return err;
111 107 }
112 108 EXPORT_SYMBOL(install_page);
113 109  
114   -
115 110 /*
116 111 * Install a file pte to a given virtual memory address, release any
117 112 * previously existing mapping.
118 113  
119 114  
120 115  
121 116  
... ... @@ -125,23 +120,20 @@
125 120 pud_t *pud;
126 121 pgd_t *pgd;
127 122 pte_t pte_val;
  123 + spinlock_t *ptl;
128 124  
129 125 BUG_ON(vma->vm_flags & VM_RESERVED);
130 126  
131 127 pgd = pgd_offset(mm, addr);
132   - spin_lock(&mm->page_table_lock);
133   -
134 128 pud = pud_alloc(mm, pgd, addr);
135 129 if (!pud)
136   - goto err_unlock;
137   -
  130 + goto out;
138 131 pmd = pmd_alloc(mm, pud, addr);
139 132 if (!pmd)
140   - goto err_unlock;
141   -
142   - pte = pte_alloc_map(mm, pmd, addr);
  133 + goto out;
  134 + pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
143 135 if (!pte)
144   - goto err_unlock;
  136 + goto out;
145 137  
146 138 if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) {
147 139 update_hiwater_rss(mm);
148 140  
149 141  
... ... @@ -150,16 +142,12 @@
150 142  
151 143 set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
152 144 pte_val = *pte;
153   - pte_unmap(pte);
154 145 update_mmu_cache(vma, addr, pte_val);
155   - spin_unlock(&mm->page_table_lock);
156   - return 0;
157   -
158   -err_unlock:
159   - spin_unlock(&mm->page_table_lock);
  146 + pte_unmap_unlock(pte, ptl);
  147 + err = 0;
  148 +out:
160 149 return err;
161 150 }
162   -
163 151  
164 152 /***
165 153 * sys_remap_file_pages - remap arbitrary pages of a shared backing store
... ... @@ -277,12 +277,15 @@
277 277 unsigned long addr;
278 278  
279 279 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
  280 + src_pte = huge_pte_offset(src, addr);
  281 + if (!src_pte)
  282 + continue;
280 283 dst_pte = huge_pte_alloc(dst, addr);
281 284 if (!dst_pte)
282 285 goto nomem;
  286 + spin_lock(&dst->page_table_lock);
283 287 spin_lock(&src->page_table_lock);
284   - src_pte = huge_pte_offset(src, addr);
285   - if (src_pte && !pte_none(*src_pte)) {
  288 + if (!pte_none(*src_pte)) {
286 289 entry = *src_pte;
287 290 ptepage = pte_page(entry);
288 291 get_page(ptepage);
... ... @@ -290,6 +293,7 @@
290 293 set_huge_pte_at(dst, addr, dst_pte, entry);
291 294 }
292 295 spin_unlock(&src->page_table_lock);
  296 + spin_unlock(&dst->page_table_lock);
293 297 }
294 298 return 0;
295 299  
... ... @@ -354,7 +358,6 @@
354 358  
355 359 hugetlb_prefault_arch_hook(mm);
356 360  
357   - spin_lock(&mm->page_table_lock);
358 361 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
359 362 unsigned long idx;
360 363 pte_t *pte = huge_pte_alloc(mm, addr);
361 364  
362 365  
... ... @@ -389,11 +392,12 @@
389 392 goto out;
390 393 }
391 394 }
  395 + spin_lock(&mm->page_table_lock);
392 396 add_mm_counter(mm, file_rss, HPAGE_SIZE / PAGE_SIZE);
393 397 set_huge_pte_at(mm, addr, pte, make_huge_pte(vma, page));
  398 + spin_unlock(&mm->page_table_lock);
394 399 }
395 400 out:
396   - spin_unlock(&mm->page_table_lock);
397 401 return ret;
398 402 }
399 403  
... ... @@ -282,14 +282,11 @@
282 282  
283 283 int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
284 284 {
285   - struct page *new;
286   -
287   - spin_unlock(&mm->page_table_lock);
288   - new = pte_alloc_one(mm, address);
289   - spin_lock(&mm->page_table_lock);
  285 + struct page *new = pte_alloc_one(mm, address);
290 286 if (!new)
291 287 return -ENOMEM;
292 288  
  289 + spin_lock(&mm->page_table_lock);
293 290 if (pmd_present(*pmd)) /* Another has populated it */
294 291 pte_free(new);
295 292 else {
... ... @@ -297,6 +294,7 @@
297 294 inc_page_state(nr_page_table_pages);
298 295 pmd_populate(mm, pmd, new);
299 296 }
  297 + spin_unlock(&mm->page_table_lock);
300 298 return 0;
301 299 }
302 300  
... ... @@ -344,9 +342,6 @@
344 342 * copy one vm_area from one task to the other. Assumes the page tables
345 343 * already present in the new task to be cleared in the whole range
346 344 * covered by this vma.
347   - *
348   - * dst->page_table_lock is held on entry and exit,
349   - * but may be dropped within p[mg]d_alloc() and pte_alloc_map().
350 345 */
351 346  
352 347 static inline void
353 348  
354 349  
355 350  
... ... @@ -419,17 +414,19 @@
419 414 unsigned long addr, unsigned long end)
420 415 {
421 416 pte_t *src_pte, *dst_pte;
  417 + spinlock_t *src_ptl, *dst_ptl;
422 418 int progress = 0;
423 419 int rss[2];
424 420  
425 421 again:
426 422 rss[1] = rss[0] = 0;
427   - dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
  423 + dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
428 424 if (!dst_pte)
429 425 return -ENOMEM;
430 426 src_pte = pte_offset_map_nested(src_pmd, addr);
  427 + src_ptl = &src_mm->page_table_lock;
  428 + spin_lock(src_ptl);
431 429  
432   - spin_lock(&src_mm->page_table_lock);
433 430 do {
434 431 /*
435 432 * We are holding two locks at this point - either of them
... ... @@ -438,8 +435,8 @@
438 435 if (progress >= 32) {
439 436 progress = 0;
440 437 if (need_resched() ||
441   - need_lockbreak(&src_mm->page_table_lock) ||
442   - need_lockbreak(&dst_mm->page_table_lock))
  438 + need_lockbreak(src_ptl) ||
  439 + need_lockbreak(dst_ptl))
443 440 break;
444 441 }
445 442 if (pte_none(*src_pte)) {
446 443  
447 444  
448 445  
... ... @@ -449,12 +446,12 @@
449 446 copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
450 447 progress += 8;
451 448 } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
452   - spin_unlock(&src_mm->page_table_lock);
453 449  
  450 + spin_unlock(src_ptl);
454 451 pte_unmap_nested(src_pte - 1);
455   - pte_unmap(dst_pte - 1);
456 452 add_mm_rss(dst_mm, rss[0], rss[1]);
457   - cond_resched_lock(&dst_mm->page_table_lock);
  453 + pte_unmap_unlock(dst_pte - 1, dst_ptl);
  454 + cond_resched();
458 455 if (addr != end)
459 456 goto again;
460 457 return 0;
461 458  
... ... @@ -1049,8 +1046,9 @@
1049 1046 unsigned long addr, unsigned long end, pgprot_t prot)
1050 1047 {
1051 1048 pte_t *pte;
  1049 + spinlock_t *ptl;
1052 1050  
1053   - pte = pte_alloc_map(mm, pmd, addr);
  1051 + pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
1054 1052 if (!pte)
1055 1053 return -ENOMEM;
1056 1054 do {
... ... @@ -1062,7 +1060,7 @@
1062 1060 BUG_ON(!pte_none(*pte));
1063 1061 set_pte_at(mm, addr, pte, zero_pte);
1064 1062 } while (pte++, addr += PAGE_SIZE, addr != end);
1065   - pte_unmap(pte - 1);
  1063 + pte_unmap_unlock(pte - 1, ptl);
1066 1064 return 0;
1067 1065 }
1068 1066  
1069 1067  
... ... @@ -1112,14 +1110,12 @@
1112 1110 BUG_ON(addr >= end);
1113 1111 pgd = pgd_offset(mm, addr);
1114 1112 flush_cache_range(vma, addr, end);
1115   - spin_lock(&mm->page_table_lock);
1116 1113 do {
1117 1114 next = pgd_addr_end(addr, end);
1118 1115 err = zeromap_pud_range(mm, pgd, addr, next, prot);
1119 1116 if (err)
1120 1117 break;
1121 1118 } while (pgd++, addr = next, addr != end);
1122   - spin_unlock(&mm->page_table_lock);
1123 1119 return err;
1124 1120 }
1125 1121  
1126 1122  
... ... @@ -1133,8 +1129,9 @@
1133 1129 unsigned long pfn, pgprot_t prot)
1134 1130 {
1135 1131 pte_t *pte;
  1132 + spinlock_t *ptl;
1136 1133  
1137   - pte = pte_alloc_map(mm, pmd, addr);
  1134 + pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
1138 1135 if (!pte)
1139 1136 return -ENOMEM;
1140 1137 do {
... ... @@ -1142,7 +1139,7 @@
1142 1139 set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
1143 1140 pfn++;
1144 1141 } while (pte++, addr += PAGE_SIZE, addr != end);
1145   - pte_unmap(pte - 1);
  1142 + pte_unmap_unlock(pte - 1, ptl);
1146 1143 return 0;
1147 1144 }
1148 1145  
... ... @@ -1210,7 +1207,6 @@
1210 1207 pfn -= addr >> PAGE_SHIFT;
1211 1208 pgd = pgd_offset(mm, addr);
1212 1209 flush_cache_range(vma, addr, end);
1213   - spin_lock(&mm->page_table_lock);
1214 1210 do {
1215 1211 next = pgd_addr_end(addr, end);
1216 1212 err = remap_pud_range(mm, pgd, addr, next,
... ... @@ -1218,7 +1214,6 @@
1218 1214 if (err)
1219 1215 break;
1220 1216 } while (pgd++, addr = next, addr != end);
1221   - spin_unlock(&mm->page_table_lock);
1222 1217 return err;
1223 1218 }
1224 1219 EXPORT_SYMBOL(remap_pfn_range);
... ... @@ -1985,17 +1980,9 @@
1985 1980 * with external mmu caches can use to update those (ie the Sparc or
1986 1981 * PowerPC hashed page tables that act as extended TLBs).
1987 1982 *
1988   - * Note the "page_table_lock". It is to protect against kswapd removing
1989   - * pages from under us. Note that kswapd only ever _removes_ pages, never
1990   - * adds them. As such, once we have noticed that the page is not present,
1991   - * we can drop the lock early.
1992   - *
1993   - * The adding of pages is protected by the MM semaphore (which we hold),
1994   - * so we don't need to worry about a page being suddenly been added into
1995   - * our VM.
1996   - *
1997   - * We enter with the pagetable spinlock held, we are supposed to
1998   - * release it when done.
  1983 + * We enter with non-exclusive mmap_sem (to exclude vma changes,
  1984 + * but allow concurrent faults), and pte mapped but not yet locked.
  1985 + * We return with mmap_sem still held, but pte unmapped and unlocked.
1999 1986 */
2000 1987 static inline int handle_pte_fault(struct mm_struct *mm,
2001 1988 struct vm_area_struct *vma, unsigned long address,
... ... @@ -2003,6 +1990,7 @@
2003 1990 {
2004 1991 pte_t entry;
2005 1992  
  1993 + spin_lock(&mm->page_table_lock);
2006 1994 entry = *pte;
2007 1995 if (!pte_present(entry)) {
2008 1996 if (pte_none(entry)) {
2009 1997  
2010 1998  
2011 1999  
2012 2000  
2013 2001  
... ... @@ -2051,30 +2039,18 @@
2051 2039 if (unlikely(is_vm_hugetlb_page(vma)))
2052 2040 return hugetlb_fault(mm, vma, address, write_access);
2053 2041  
2054   - /*
2055   - * We need the page table lock to synchronize with kswapd
2056   - * and the SMP-safe atomic PTE updates.
2057   - */
2058 2042 pgd = pgd_offset(mm, address);
2059   - spin_lock(&mm->page_table_lock);
2060   -
2061 2043 pud = pud_alloc(mm, pgd, address);
2062 2044 if (!pud)
2063   - goto oom;
2064   -
  2045 + return VM_FAULT_OOM;
2065 2046 pmd = pmd_alloc(mm, pud, address);
2066 2047 if (!pmd)
2067   - goto oom;
2068   -
  2048 + return VM_FAULT_OOM;
2069 2049 pte = pte_alloc_map(mm, pmd, address);
2070 2050 if (!pte)
2071   - goto oom;
2072   -
2073   - return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
  2051 + return VM_FAULT_OOM;
2074 2052  
2075   - oom:
2076   - spin_unlock(&mm->page_table_lock);
2077   - return VM_FAULT_OOM;
  2053 + return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
2078 2054 }
2079 2055  
2080 2056 #ifndef __PAGETABLE_PUD_FOLDED
2081 2057  
2082 2058  
... ... @@ -2084,24 +2060,16 @@
2084 2060 */
2085 2061 int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
2086 2062 {
2087   - pud_t *new;
2088   -
2089   - if (mm != &init_mm) /* Temporary bridging hack */
2090   - spin_unlock(&mm->page_table_lock);
2091   - new = pud_alloc_one(mm, address);
2092   - if (!new) {
2093   - if (mm != &init_mm) /* Temporary bridging hack */
2094   - spin_lock(&mm->page_table_lock);
  2063 + pud_t *new = pud_alloc_one(mm, address);
  2064 + if (!new)
2095 2065 return -ENOMEM;
2096   - }
2097 2066  
2098 2067 spin_lock(&mm->page_table_lock);
2099 2068 if (pgd_present(*pgd)) /* Another has populated it */
2100 2069 pud_free(new);
2101 2070 else
2102 2071 pgd_populate(mm, pgd, new);
2103   - if (mm == &init_mm) /* Temporary bridging hack */
2104   - spin_unlock(&mm->page_table_lock);
  2072 + spin_unlock(&mm->page_table_lock);
2105 2073 return 0;
2106 2074 }
2107 2075 #endif /* __PAGETABLE_PUD_FOLDED */
2108 2076  
... ... @@ -2113,16 +2081,9 @@
2113 2081 */
2114 2082 int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
2115 2083 {
2116   - pmd_t *new;
2117   -
2118   - if (mm != &init_mm) /* Temporary bridging hack */
2119   - spin_unlock(&mm->page_table_lock);
2120   - new = pmd_alloc_one(mm, address);
2121   - if (!new) {
2122   - if (mm != &init_mm) /* Temporary bridging hack */
2123   - spin_lock(&mm->page_table_lock);
  2084 + pmd_t *new = pmd_alloc_one(mm, address);
  2085 + if (!new)
2124 2086 return -ENOMEM;
2125   - }
2126 2087  
2127 2088 spin_lock(&mm->page_table_lock);
2128 2089 #ifndef __ARCH_HAS_4LEVEL_HACK
... ... @@ -2136,8 +2097,7 @@
2136 2097 else
2137 2098 pgd_populate(mm, pud, new);
2138 2099 #endif /* __ARCH_HAS_4LEVEL_HACK */
2139   - if (mm == &init_mm) /* Temporary bridging hack */
2140   - spin_unlock(&mm->page_table_lock);
  2100 + spin_unlock(&mm->page_table_lock);
2141 2101 return 0;
2142 2102 }
2143 2103 #endif /* __PAGETABLE_PMD_FOLDED */
... ... @@ -28,9 +28,6 @@
28 28 pud_t *pud;
29 29 pmd_t *pmd;
30 30  
31   - /*
32   - * We don't need page_table_lock: we have mmap_sem exclusively.
33   - */
34 31 pgd = pgd_offset(mm, addr);
35 32 if (pgd_none_or_clear_bad(pgd))
36 33 return NULL;
37 34  
38 35  
39 36  
40 37  
... ... @@ -50,25 +47,20 @@
50 47 {
51 48 pgd_t *pgd;
52 49 pud_t *pud;
53   - pmd_t *pmd = NULL;
  50 + pmd_t *pmd;
54 51  
55   - /*
56   - * We do need page_table_lock: because allocators expect that.
57   - */
58   - spin_lock(&mm->page_table_lock);
59 52 pgd = pgd_offset(mm, addr);
60 53 pud = pud_alloc(mm, pgd, addr);
61 54 if (!pud)
62   - goto out;
  55 + return NULL;
63 56  
64 57 pmd = pmd_alloc(mm, pud, addr);
65 58 if (!pmd)
66   - goto out;
  59 + return NULL;
67 60  
68 61 if (!pmd_present(*pmd) && __pte_alloc(mm, pmd, addr))
69   - pmd = NULL;
70   -out:
71   - spin_unlock(&mm->page_table_lock);
  62 + return NULL;
  63 +
72 64 return pmd;
73 65 }
74 66  
... ... @@ -80,6 +72,7 @@
80 72 struct address_space *mapping = NULL;
81 73 struct mm_struct *mm = vma->vm_mm;
82 74 pte_t *old_pte, *new_pte, pte;
  75 + spinlock_t *old_ptl;
83 76  
84 77 if (vma->vm_file) {
85 78 /*
... ... @@ -95,9 +88,8 @@
95 88 new_vma->vm_truncate_count = 0;
96 89 }
97 90  
98   - spin_lock(&mm->page_table_lock);
99   - old_pte = pte_offset_map(old_pmd, old_addr);
100   - new_pte = pte_offset_map_nested(new_pmd, new_addr);
  91 + old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
  92 + new_pte = pte_offset_map_nested(new_pmd, new_addr);
101 93  
102 94 for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
103 95 new_pte++, new_addr += PAGE_SIZE) {
... ... @@ -110,8 +102,7 @@
110 102 }
111 103  
112 104 pte_unmap_nested(new_pte - 1);
113   - pte_unmap(old_pte - 1);
114   - spin_unlock(&mm->page_table_lock);
  105 + pte_unmap_unlock(old_pte - 1, old_ptl);
115 106 if (mapping)
116 107 spin_unlock(&mapping->i_mmap_lock);
117 108 }