Commit 667b279baa529a1b5bd120d4ce3df643a5749263

Authored by Paul Mundt
1 parent a16382ce1c

sh: lockless get_user_pages_fast()

Implement get_user_pages_fast without locking in the fastpath on sh.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

Showing 2 changed files with 274 additions and 1 deletions Side-by-side Diff

... ... @@ -15,7 +15,7 @@
15 15 obj-y += $(cacheops-y)
16 16  
17 17 mmu-y := nommu.o extable_32.o
18   -mmu-$(CONFIG_MMU) := extable_$(BITS).o fault_$(BITS).o \
  18 +mmu-$(CONFIG_MMU) := extable_$(BITS).o fault_$(BITS).o gup.o \
19 19 ioremap.o kmap.o pgtable.o tlbflush_$(BITS).o
20 20  
21 21 obj-y += $(mmu-y)
  1 +/*
  2 + * Lockless get_user_pages_fast for SuperH
  3 + *
  4 + * Copyright (C) 2009 - 2010 Paul Mundt
  5 + *
  6 + * Cloned from the x86 and PowerPC versions, by:
  7 + *
  8 + * Copyright (C) 2008 Nick Piggin
  9 + * Copyright (C) 2008 Novell Inc.
  10 + */
  11 +#include <linux/sched.h>
  12 +#include <linux/mm.h>
  13 +#include <linux/vmstat.h>
  14 +#include <linux/highmem.h>
  15 +#include <asm/pgtable.h>
  16 +
  17 +static inline pte_t gup_get_pte(pte_t *ptep)
  18 +{
  19 +#ifndef CONFIG_X2TLB
  20 + return ACCESS_ONCE(*ptep);
  21 +#else
  22 + /*
  23 + * With get_user_pages_fast, we walk down the pagetables without
  24 + * taking any locks. For this we would like to load the pointers
  25 + * atomically, but that is not possible with 64-bit PTEs. What
  26 + * we do have is the guarantee that a pte will only either go
  27 + * from not present to present, or present to not present or both
  28 + * -- it will not switch to a completely different present page
  29 + * without a TLB flush in between; something that we are blocking
  30 + * by holding interrupts off.
  31 + *
  32 + * Setting ptes from not present to present goes:
  33 + * ptep->pte_high = h;
  34 + * smp_wmb();
  35 + * ptep->pte_low = l;
  36 + *
  37 + * And present to not present goes:
  38 + * ptep->pte_low = 0;
  39 + * smp_wmb();
  40 + * ptep->pte_high = 0;
  41 + *
  42 + * We must ensure here that the load of pte_low sees l iff pte_high
  43 + * sees h. We load pte_high *after* loading pte_low, which ensures we
  44 + * don't see an older value of pte_high. *Then* we recheck pte_low,
  45 + * which ensures that we haven't picked up a changed pte high. We might
  46 + * have got rubbish values from pte_low and pte_high, but we are
  47 + * guaranteed that pte_low will not have the present bit set *unless*
  48 + * it is 'l'. And get_user_pages_fast only operates on present ptes, so
  49 + * we're safe.
  50 + *
  51 + * gup_get_pte should not be used or copied outside gup.c without being
  52 + * very careful -- it does not atomically load the pte or anything that
  53 + * is likely to be useful for you.
  54 + */
  55 + pte_t pte;
  56 +
  57 +retry:
  58 + pte.pte_low = ptep->pte_low;
  59 + smp_rmb();
  60 + pte.pte_high = ptep->pte_high;
  61 + smp_rmb();
  62 + if (unlikely(pte.pte_low != ptep->pte_low))
  63 + goto retry;
  64 +
  65 + return pte;
  66 +#endif
  67 +}
  68 +
  69 +/*
  70 + * The performance critical leaf functions are made noinline otherwise gcc
  71 + * inlines everything into a single function which results in too much
  72 + * register pressure.
  73 + */
  74 +static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
  75 + unsigned long end, int write, struct page **pages, int *nr)
  76 +{
  77 + u64 mask, result;
  78 + pte_t *ptep;
  79 +
  80 +#ifdef CONFIG_X2TLB
  81 + result = _PAGE_PRESENT | _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ);
  82 + if (write)
  83 + result |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE);
  84 +#elif defined(CONFIG_SUPERH64)
  85 + result = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
  86 + if (write)
  87 + result |= _PAGE_WRITE;
  88 +#else
  89 + result = _PAGE_PRESENT | _PAGE_USER;
  90 + if (write)
  91 + result |= _PAGE_RW;
  92 +#endif
  93 +
  94 + mask = result | _PAGE_SPECIAL;
  95 +
  96 + ptep = pte_offset_map(&pmd, addr);
  97 + do {
  98 + pte_t pte = gup_get_pte(ptep);
  99 + struct page *page;
  100 +
  101 + if ((pte_val(pte) & mask) != result) {
  102 + pte_unmap(ptep);
  103 + return 0;
  104 + }
  105 + VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
  106 + page = pte_page(pte);
  107 + get_page(page);
  108 + pages[*nr] = page;
  109 + (*nr)++;
  110 +
  111 + } while (ptep++, addr += PAGE_SIZE, addr != end);
  112 + pte_unmap(ptep - 1);
  113 +
  114 + return 1;
  115 +}
  116 +
  117 +static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
  118 + int write, struct page **pages, int *nr)
  119 +{
  120 + unsigned long next;
  121 + pmd_t *pmdp;
  122 +
  123 + pmdp = pmd_offset(&pud, addr);
  124 + do {
  125 + pmd_t pmd = *pmdp;
  126 +
  127 + next = pmd_addr_end(addr, end);
  128 + if (pmd_none(pmd))
  129 + return 0;
  130 + if (!gup_pte_range(pmd, addr, next, write, pages, nr))
  131 + return 0;
  132 + } while (pmdp++, addr = next, addr != end);
  133 +
  134 + return 1;
  135 +}
  136 +
  137 +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
  138 + int write, struct page **pages, int *nr)
  139 +{
  140 + unsigned long next;
  141 + pud_t *pudp;
  142 +
  143 + pudp = pud_offset(&pgd, addr);
  144 + do {
  145 + pud_t pud = *pudp;
  146 +
  147 + next = pud_addr_end(addr, end);
  148 + if (pud_none(pud))
  149 + return 0;
  150 + if (!gup_pmd_range(pud, addr, next, write, pages, nr))
  151 + return 0;
  152 + } while (pudp++, addr = next, addr != end);
  153 +
  154 + return 1;
  155 +}
  156 +
  157 +/*
  158 + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
  159 + * back to the regular GUP.
  160 + */
  161 +int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
  162 + struct page **pages)
  163 +{
  164 + struct mm_struct *mm = current->mm;
  165 + unsigned long addr, len, end;
  166 + unsigned long next;
  167 + unsigned long flags;
  168 + pgd_t *pgdp;
  169 + int nr = 0;
  170 +
  171 + start &= PAGE_MASK;
  172 + addr = start;
  173 + len = (unsigned long) nr_pages << PAGE_SHIFT;
  174 + end = start + len;
  175 + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
  176 + (void __user *)start, len)))
  177 + return 0;
  178 +
  179 + /*
  180 + * This doesn't prevent pagetable teardown, but does prevent
  181 + * the pagetables and pages from being freed.
  182 + */
  183 + local_irq_save(flags);
  184 + pgdp = pgd_offset(mm, addr);
  185 + do {
  186 + pgd_t pgd = *pgdp;
  187 +
  188 + next = pgd_addr_end(addr, end);
  189 + if (pgd_none(pgd))
  190 + break;
  191 + if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
  192 + break;
  193 + } while (pgdp++, addr = next, addr != end);
  194 + local_irq_restore(flags);
  195 +
  196 + return nr;
  197 +}
  198 +
  199 +/**
  200 + * get_user_pages_fast() - pin user pages in memory
  201 + * @start: starting user address
  202 + * @nr_pages: number of pages from start to pin
  203 + * @write: whether pages will be written to
  204 + * @pages: array that receives pointers to the pages pinned.
  205 + * Should be at least nr_pages long.
  206 + *
  207 + * Attempt to pin user pages in memory without taking mm->mmap_sem.
  208 + * If not successful, it will fall back to taking the lock and
  209 + * calling get_user_pages().
  210 + *
  211 + * Returns number of pages pinned. This may be fewer than the number
  212 + * requested. If nr_pages is 0 or negative, returns 0. If no pages
  213 + * were pinned, returns -errno.
  214 + */
  215 +int get_user_pages_fast(unsigned long start, int nr_pages, int write,
  216 + struct page **pages)
  217 +{
  218 + struct mm_struct *mm = current->mm;
  219 + unsigned long addr, len, end;
  220 + unsigned long next;
  221 + pgd_t *pgdp;
  222 + int nr = 0;
  223 +
  224 + start &= PAGE_MASK;
  225 + addr = start;
  226 + len = (unsigned long) nr_pages << PAGE_SHIFT;
  227 +
  228 + end = start + len;
  229 + if (end < start)
  230 + goto slow_irqon;
  231 +
  232 + local_irq_disable();
  233 + pgdp = pgd_offset(mm, addr);
  234 + do {
  235 + pgd_t pgd = *pgdp;
  236 +
  237 + next = pgd_addr_end(addr, end);
  238 + if (pgd_none(pgd))
  239 + goto slow;
  240 + if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
  241 + goto slow;
  242 + } while (pgdp++, addr = next, addr != end);
  243 + local_irq_enable();
  244 +
  245 + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
  246 + return nr;
  247 +
  248 + {
  249 + int ret;
  250 +
  251 +slow:
  252 + local_irq_enable();
  253 +slow_irqon:
  254 + /* Try to get the remaining pages with get_user_pages */
  255 + start += nr << PAGE_SHIFT;
  256 + pages += nr;
  257 +
  258 + down_read(&mm->mmap_sem);
  259 + ret = get_user_pages(current, mm, start,
  260 + (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
  261 + up_read(&mm->mmap_sem);
  262 +
  263 + /* Have to be a bit careful with return values */
  264 + if (nr > 0) {
  265 + if (ret < 0)
  266 + ret = nr;
  267 + else
  268 + ret += nr;
  269 + }
  270 +
  271 + return ret;
  272 + }
  273 +}