Commit 5899329b19100c0b82dc78e9b21ed8b920c9ffb3

Authored by venkatesh.pallipadi@intel.com
Committed by H. Peter Anvin
1 parent 2ab640379a

x86: PAT: implement track/untrack of pfnmap regions for x86 - v3

Impact: New mm functionality.

Hookup remap_pfn_range and vm_insert_pfn and corresponding copy and free
routines with reserve and free tracking.

reserve and free here only takes care of non RAM region mapping. For RAM
region, driver should use set_memory_[uc|wc|wb] to set the cache type and
then setup the mapping for user pte. We can bypass below
reserve/free in that case.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

Showing 2 changed files with 246 additions and 0 deletions Side-by-side Diff

arch/x86/include/asm/pgtable.h
... ... @@ -219,6 +219,11 @@
219 219 return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
220 220 }
221 221  
  222 +static inline u64 pte_pa(pte_t pte)
  223 +{
  224 + return pte_val(pte) & PTE_PFN_MASK;
  225 +}
  226 +
222 227 #define pte_page(pte) pfn_to_page(pte_pfn(pte))
223 228  
224 229 static inline int pmd_large(pmd_t pte)
... ... @@ -327,6 +332,11 @@
327 332 #define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
328 333  
329 334 #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
  335 +
  336 +/* Indicate that x86 has its own track and untrack pfn vma functions */
  337 +#define track_pfn_vma_new track_pfn_vma_new
  338 +#define track_pfn_vma_copy track_pfn_vma_copy
  339 +#define untrack_pfn_vma untrack_pfn_vma
330 340  
331 341 #ifndef __ASSEMBLY__
332 342 #define __HAVE_PHYS_MEM_ACCESS_PROT
... ... @@ -596,6 +596,242 @@
596 596 free_memtype(addr, addr + size);
597 597 }
598 598  
  599 +/*
  600 + * Internal interface to reserve a range of physical memory with prot.
  601 + * Reserved non RAM regions only and after successful reserve_memtype,
  602 + * this func also keeps identity mapping (if any) in sync with this new prot.
  603 + */
  604 +static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot)
  605 +{
  606 + int is_ram = 0;
  607 + int id_sz, ret;
  608 + unsigned long flags;
  609 + unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
  610 +
  611 + is_ram = pagerange_is_ram(paddr, paddr + size);
  612 +
  613 + if (is_ram != 0) {
  614 + /*
  615 + * For mapping RAM pages, drivers need to call
  616 + * set_memory_[uc|wc|wb] directly, for reserve and free, before
  617 + * setting up the PTE.
  618 + */
  619 + WARN_ON_ONCE(1);
  620 + return 0;
  621 + }
  622 +
  623 + ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
  624 + if (ret)
  625 + return ret;
  626 +
  627 + if (flags != want_flags) {
  628 + free_memtype(paddr, paddr + size);
  629 + printk(KERN_ERR
  630 + "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n",
  631 + current->comm, current->pid,
  632 + cattr_name(want_flags),
  633 + (unsigned long long)paddr,
  634 + (unsigned long long)(paddr + size),
  635 + cattr_name(flags));
  636 + return -EINVAL;
  637 + }
  638 +
  639 + /* Need to keep identity mapping in sync */
  640 + if (paddr >= __pa(high_memory))
  641 + return 0;
  642 +
  643 + id_sz = (__pa(high_memory) < paddr + size) ?
  644 + __pa(high_memory) - paddr :
  645 + size;
  646 +
  647 + if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
  648 + free_memtype(paddr, paddr + size);
  649 + printk(KERN_ERR
  650 + "%s:%d reserve_pfn_range ioremap_change_attr failed %s "
  651 + "for %Lx-%Lx\n",
  652 + current->comm, current->pid,
  653 + cattr_name(flags),
  654 + (unsigned long long)paddr,
  655 + (unsigned long long)(paddr + size));
  656 + return -EINVAL;
  657 + }
  658 + return 0;
  659 +}
  660 +
  661 +/*
  662 + * Internal interface to free a range of physical memory.
  663 + * Frees non RAM regions only.
  664 + */
  665 +static void free_pfn_range(u64 paddr, unsigned long size)
  666 +{
  667 + int is_ram;
  668 +
  669 + is_ram = pagerange_is_ram(paddr, paddr + size);
  670 + if (is_ram == 0)
  671 + free_memtype(paddr, paddr + size);
  672 +}
  673 +
  674 +/*
  675 + * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
  676 + * copied through copy_page_range().
  677 + *
  678 + * If the vma has a linear pfn mapping for the entire range, we get the prot
  679 + * from pte and reserve the entire vma range with single reserve_pfn_range call.
  680 + * Otherwise, we reserve the entire vma range, my ging through the PTEs page
  681 + * by page to get physical address and protection.
  682 + */
  683 +int track_pfn_vma_copy(struct vm_area_struct *vma)
  684 +{
  685 + int retval = 0;
  686 + unsigned long i, j;
  687 + u64 paddr;
  688 + pgprot_t prot;
  689 + pte_t pte;
  690 + unsigned long vma_start = vma->vm_start;
  691 + unsigned long vma_end = vma->vm_end;
  692 + unsigned long vma_size = vma_end - vma_start;
  693 +
  694 + if (!pat_enabled)
  695 + return 0;
  696 +
  697 + if (is_linear_pfn_mapping(vma)) {
  698 + /*
  699 + * reserve the whole chunk starting from vm_pgoff,
  700 + * But, we have to get the protection from pte.
  701 + */
  702 + if (follow_pfnmap_pte(vma, vma_start, &pte)) {
  703 + WARN_ON_ONCE(1);
  704 + return -1;
  705 + }
  706 + prot = pte_pgprot(pte);
  707 + paddr = (u64)vma->vm_pgoff << PAGE_SHIFT;
  708 + return reserve_pfn_range(paddr, vma_size, prot);
  709 + }
  710 +
  711 + /* reserve entire vma page by page, using pfn and prot from pte */
  712 + for (i = 0; i < vma_size; i += PAGE_SIZE) {
  713 + if (follow_pfnmap_pte(vma, vma_start + i, &pte))
  714 + continue;
  715 +
  716 + paddr = pte_pa(pte);
  717 + prot = pte_pgprot(pte);
  718 + retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
  719 + if (retval)
  720 + goto cleanup_ret;
  721 + }
  722 + return 0;
  723 +
  724 +cleanup_ret:
  725 + /* Reserve error: Cleanup partial reservation and return error */
  726 + for (j = 0; j < i; j += PAGE_SIZE) {
  727 + if (follow_pfnmap_pte(vma, vma_start + j, &pte))
  728 + continue;
  729 +
  730 + paddr = pte_pa(pte);
  731 + free_pfn_range(paddr, PAGE_SIZE);
  732 + }
  733 +
  734 + return retval;
  735 +}
  736 +
  737 +/*
  738 + * track_pfn_vma_new is called when a _new_ pfn mapping is being established
  739 + * for physical range indicated by pfn and size.
  740 + *
  741 + * prot is passed in as a parameter for the new mapping. If the vma has a
  742 + * linear pfn mapping for the entire range reserve the entire vma range with
  743 + * single reserve_pfn_range call.
  744 + * Otherwise, we look t the pfn and size and reserve only the specified range
  745 + * page by page.
  746 + *
  747 + * Note that this function can be called with caller trying to map only a
  748 + * subrange/page inside the vma.
  749 + */
  750 +int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
  751 + unsigned long pfn, unsigned long size)
  752 +{
  753 + int retval = 0;
  754 + unsigned long i, j;
  755 + u64 base_paddr;
  756 + u64 paddr;
  757 + unsigned long vma_start = vma->vm_start;
  758 + unsigned long vma_end = vma->vm_end;
  759 + unsigned long vma_size = vma_end - vma_start;
  760 +
  761 + if (!pat_enabled)
  762 + return 0;
  763 +
  764 + if (is_linear_pfn_mapping(vma)) {
  765 + /* reserve the whole chunk starting from vm_pgoff */
  766 + paddr = (u64)vma->vm_pgoff << PAGE_SHIFT;
  767 + return reserve_pfn_range(paddr, vma_size, prot);
  768 + }
  769 +
  770 + /* reserve page by page using pfn and size */
  771 + base_paddr = (u64)pfn << PAGE_SHIFT;
  772 + for (i = 0; i < size; i += PAGE_SIZE) {
  773 + paddr = base_paddr + i;
  774 + retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
  775 + if (retval)
  776 + goto cleanup_ret;
  777 + }
  778 + return 0;
  779 +
  780 +cleanup_ret:
  781 + /* Reserve error: Cleanup partial reservation and return error */
  782 + for (j = 0; j < i; j += PAGE_SIZE) {
  783 + paddr = base_paddr + j;
  784 + free_pfn_range(paddr, PAGE_SIZE);
  785 + }
  786 +
  787 + return retval;
  788 +}
  789 +
  790 +/*
  791 + * untrack_pfn_vma is called while unmapping a pfnmap for a region.
  792 + * untrack can be called for a specific region indicated by pfn and size or
  793 + * can be for the entire vma (in which case size can be zero).
  794 + */
  795 +void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
  796 + unsigned long size)
  797 +{
  798 + unsigned long i;
  799 + u64 paddr;
  800 + unsigned long vma_start = vma->vm_start;
  801 + unsigned long vma_end = vma->vm_end;
  802 + unsigned long vma_size = vma_end - vma_start;
  803 +
  804 + if (!pat_enabled)
  805 + return;
  806 +
  807 + if (is_linear_pfn_mapping(vma)) {
  808 + /* free the whole chunk starting from vm_pgoff */
  809 + paddr = (u64)vma->vm_pgoff << PAGE_SHIFT;
  810 + free_pfn_range(paddr, vma_size);
  811 + return;
  812 + }
  813 +
  814 + if (size != 0 && size != vma_size) {
  815 + /* free page by page, using pfn and size */
  816 + paddr = (u64)pfn << PAGE_SHIFT;
  817 + for (i = 0; i < size; i += PAGE_SIZE) {
  818 + paddr = paddr + i;
  819 + free_pfn_range(paddr, PAGE_SIZE);
  820 + }
  821 + } else {
  822 + /* free entire vma, page by page, using the pfn from pte */
  823 + for (i = 0; i < vma_size; i += PAGE_SIZE) {
  824 + pte_t pte;
  825 +
  826 + if (follow_pfnmap_pte(vma, vma_start + i, &pte))
  827 + continue;
  828 +
  829 + paddr = pte_pa(pte);
  830 + free_pfn_range(paddr, PAGE_SIZE);
  831 + }
  832 + }
  833 +}
  834 +
599 835 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
600 836  
601 837 /* get Nth element of the linked list */