Commit c79095092834a18ae74cfc08def1a5a101dc106c

Authored by Marek Szyprowski
1 parent 0a2b9a6ea9

ARM: integrate CMA with DMA-mapping subsystem

This patch adds support for CMA to dma-mapping subsystem for ARM
architecture. By default a global CMA area is used, but specific devices
are allowed to have their private memory areas if required (they can be
created with dma_declare_contiguous() function during board
initialisation).

Contiguous memory areas reserved for DMA are remapped with 2-level page
tables on boot. Once a buffer is requested, a low memory kernel mapping
is updated to to match requested memory access type.

GFP_ATOMIC allocations are performed from special pool which is created
early during boot. This way remapping page attributes is not needed on
allocation time.

CMA has been enabled unconditionally for ARMv6+ systems.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
CC: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Rob Clark <rob.clark@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Tested-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Tested-by: Robert Nelson <robertcnelson@gmail.com>
Tested-by: Barry Song <Baohua.Song@csr.com>

Showing 9 changed files with 370 additions and 88 deletions Side-by-side Diff

Documentation/kernel-parameters.txt
... ... @@ -520,6 +520,10 @@
520 520 a hypervisor.
521 521 Default: yes
522 522  
  523 + coherent_pool=nn[KMG] [ARM,KNL]
  524 + Sets the size of memory pool for coherent, atomic dma
  525 + allocations if Contiguous Memory Allocator (CMA) is used.
  526 +
523 527 code_bytes [X86] How many bytes of object code to print
524 528 in an oops report.
525 529 Range: 0 - 8192
... ... @@ -4,6 +4,8 @@
4 4 select HAVE_AOUT
5 5 select HAVE_DMA_API_DEBUG
6 6 select HAVE_IDE if PCI || ISA || PCMCIA
  7 + select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7)
  8 + select CMA if (CPU_V6 || CPU_V6K || CPU_V7)
7 9 select HAVE_MEMBLOCK
8 10 select RTC_LIB
9 11 select SYS_SUPPORTS_APM_EMULATION
arch/arm/include/asm/dma-contiguous.h
  1 +#ifndef ASMARM_DMA_CONTIGUOUS_H
  2 +#define ASMARM_DMA_CONTIGUOUS_H
  3 +
  4 +#ifdef __KERNEL__
  5 +#ifdef CONFIG_CMA
  6 +
  7 +#include <linux/types.h>
  8 +#include <asm-generic/dma-contiguous.h>
  9 +
  10 +void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size);
  11 +
  12 +#endif
  13 +#endif
  14 +
  15 +#endif
arch/arm/include/asm/mach/map.h
... ... @@ -30,6 +30,7 @@
30 30 #define MT_MEMORY_DTCM 12
31 31 #define MT_MEMORY_ITCM 13
32 32 #define MT_MEMORY_SO 14
  33 +#define MT_MEMORY_DMA_READY 15
33 34  
34 35 #ifdef CONFIG_MMU
35 36 extern void iotable_init(struct map_desc *, int);
arch/arm/kernel/setup.c
... ... @@ -81,6 +81,7 @@
81 81 extern void paging_init(struct machine_desc *desc);
82 82 extern void sanity_check_meminfo(void);
83 83 extern void reboot_setup(char *str);
  84 +extern void setup_dma_zone(struct machine_desc *desc);
84 85  
85 86 unsigned int processor_id;
86 87 EXPORT_SYMBOL(processor_id);
... ... @@ -939,12 +940,8 @@
939 940 machine_desc = mdesc;
940 941 machine_name = mdesc->name;
941 942  
942   -#ifdef CONFIG_ZONE_DMA
943   - if (mdesc->dma_zone_size) {
944   - extern unsigned long arm_dma_zone_size;
945   - arm_dma_zone_size = mdesc->dma_zone_size;
946   - }
947   -#endif
  943 + setup_dma_zone(mdesc);
  944 +
948 945 if (mdesc->restart_mode)
949 946 reboot_setup(&mdesc->restart_mode);
950 947  
arch/arm/mm/dma-mapping.c
... ... @@ -17,7 +17,9 @@
17 17 #include <linux/init.h>
18 18 #include <linux/device.h>
19 19 #include <linux/dma-mapping.h>
  20 +#include <linux/dma-contiguous.h>
20 21 #include <linux/highmem.h>
  22 +#include <linux/memblock.h>
21 23 #include <linux/slab.h>
22 24  
23 25 #include <asm/memory.h>
... ... @@ -26,6 +28,9 @@
26 28 #include <asm/tlbflush.h>
27 29 #include <asm/sizes.h>
28 30 #include <asm/mach/arch.h>
  31 +#include <asm/mach/map.h>
  32 +#include <asm/system_info.h>
  33 +#include <asm/dma-contiguous.h>
29 34  
30 35 #include "mm.h"
31 36  
... ... @@ -56,6 +61,19 @@
56 61 return mask;
57 62 }
58 63  
  64 +static void __dma_clear_buffer(struct page *page, size_t size)
  65 +{
  66 + void *ptr;
  67 + /*
  68 + * Ensure that the allocated pages are zeroed, and that any data
  69 + * lurking in the kernel direct-mapped region is invalidated.
  70 + */
  71 + ptr = page_address(page);
  72 + memset(ptr, 0, size);
  73 + dmac_flush_range(ptr, ptr + size);
  74 + outer_flush_range(__pa(ptr), __pa(ptr) + size);
  75 +}
  76 +
59 77 /*
60 78 * Allocate a DMA buffer for 'dev' of size 'size' using the
61 79 * specified gfp mask. Note that 'size' must be page aligned.
62 80  
... ... @@ -64,24 +82,7 @@
64 82 {
65 83 unsigned long order = get_order(size);
66 84 struct page *page, *p, *e;
67   - void *ptr;
68   - u64 mask = get_coherent_dma_mask(dev);
69 85  
70   -#ifdef CONFIG_DMA_API_DEBUG
71   - u64 limit = (mask + 1) & ~mask;
72   - if (limit && size >= limit) {
73   - dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
74   - size, mask);
75   - return NULL;
76   - }
77   -#endif
78   -
79   - if (!mask)
80   - return NULL;
81   -
82   - if (mask < 0xffffffffULL)
83   - gfp |= GFP_DMA;
84   -
85 86 page = alloc_pages(gfp, order);
86 87 if (!page)
87 88 return NULL;
... ... @@ -93,14 +94,7 @@
93 94 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
94 95 __free_page(p);
95 96  
96   - /*
97   - * Ensure that the allocated pages are zeroed, and that any data
98   - * lurking in the kernel direct-mapped region is invalidated.
99   - */
100   - ptr = page_address(page);
101   - memset(ptr, 0, size);
102   - dmac_flush_range(ptr, ptr + size);
103   - outer_flush_range(__pa(ptr), __pa(ptr) + size);
  97 + __dma_clear_buffer(page, size);
104 98  
105 99 return page;
106 100 }
... ... @@ -170,6 +164,9 @@
170 164 unsigned long base = consistent_base;
171 165 unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;
172 166  
  167 + if (cpu_architecture() >= CPU_ARCH_ARMv6)
  168 + return 0;
  169 +
173 170 consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);
174 171 if (!consistent_pte) {
175 172 pr_err("%s: no memory\n", __func__);
176 173  
... ... @@ -210,9 +207,101 @@
210 207  
211 208 return ret;
212 209 }
213   -
214 210 core_initcall(consistent_init);
215 211  
  212 +static void *__alloc_from_contiguous(struct device *dev, size_t size,
  213 + pgprot_t prot, struct page **ret_page);
  214 +
  215 +static struct arm_vmregion_head coherent_head = {
  216 + .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock),
  217 + .vm_list = LIST_HEAD_INIT(coherent_head.vm_list),
  218 +};
  219 +
  220 +size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8;
  221 +
  222 +static int __init early_coherent_pool(char *p)
  223 +{
  224 + coherent_pool_size = memparse(p, &p);
  225 + return 0;
  226 +}
  227 +early_param("coherent_pool", early_coherent_pool);
  228 +
  229 +/*
  230 + * Initialise the coherent pool for atomic allocations.
  231 + */
  232 +static int __init coherent_init(void)
  233 +{
  234 + pgprot_t prot = pgprot_dmacoherent(pgprot_kernel);
  235 + size_t size = coherent_pool_size;
  236 + struct page *page;
  237 + void *ptr;
  238 +
  239 + if (cpu_architecture() < CPU_ARCH_ARMv6)
  240 + return 0;
  241 +
  242 + ptr = __alloc_from_contiguous(NULL, size, prot, &page);
  243 + if (ptr) {
  244 + coherent_head.vm_start = (unsigned long) ptr;
  245 + coherent_head.vm_end = (unsigned long) ptr + size;
  246 + printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n",
  247 + (unsigned)size / 1024);
  248 + return 0;
  249 + }
  250 + printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
  251 + (unsigned)size / 1024);
  252 + return -ENOMEM;
  253 +}
  254 +/*
  255 + * CMA is activated by core_initcall, so we must be called after it.
  256 + */
  257 +postcore_initcall(coherent_init);
  258 +
  259 +struct dma_contig_early_reserve {
  260 + phys_addr_t base;
  261 + unsigned long size;
  262 +};
  263 +
  264 +static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata;
  265 +
  266 +static int dma_mmu_remap_num __initdata;
  267 +
  268 +void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
  269 +{
  270 + dma_mmu_remap[dma_mmu_remap_num].base = base;
  271 + dma_mmu_remap[dma_mmu_remap_num].size = size;
  272 + dma_mmu_remap_num++;
  273 +}
  274 +
  275 +void __init dma_contiguous_remap(void)
  276 +{
  277 + int i;
  278 + for (i = 0; i < dma_mmu_remap_num; i++) {
  279 + phys_addr_t start = dma_mmu_remap[i].base;
  280 + phys_addr_t end = start + dma_mmu_remap[i].size;
  281 + struct map_desc map;
  282 + unsigned long addr;
  283 +
  284 + if (end > arm_lowmem_limit)
  285 + end = arm_lowmem_limit;
  286 + if (start >= end)
  287 + return;
  288 +
  289 + map.pfn = __phys_to_pfn(start);
  290 + map.virtual = __phys_to_virt(start);
  291 + map.length = end - start;
  292 + map.type = MT_MEMORY_DMA_READY;
  293 +
  294 + /*
  295 + * Clear previous low-memory mapping
  296 + */
  297 + for (addr = __phys_to_virt(start); addr < __phys_to_virt(end);
  298 + addr += PGDIR_SIZE)
  299 + pmd_clear(pmd_off_k(addr));
  300 +
  301 + iotable_init(&map, 1);
  302 + }
  303 +}
  304 +
216 305 static void *
217 306 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
218 307 const void *caller)
219 308  
220 309  
221 310  
222 311  
223 312  
... ... @@ -319,20 +408,173 @@
319 408 arm_vmregion_free(&consistent_head, c);
320 409 }
321 410  
  411 +static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
  412 + void *data)
  413 +{
  414 + struct page *page = virt_to_page(addr);
  415 + pgprot_t prot = *(pgprot_t *)data;
  416 +
  417 + set_pte_ext(pte, mk_pte(page, prot), 0);
  418 + return 0;
  419 +}
  420 +
  421 +static void __dma_remap(struct page *page, size_t size, pgprot_t prot)
  422 +{
  423 + unsigned long start = (unsigned long) page_address(page);
  424 + unsigned end = start + size;
  425 +
  426 + apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot);
  427 + dsb();
  428 + flush_tlb_kernel_range(start, end);
  429 +}
  430 +
  431 +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
  432 + pgprot_t prot, struct page **ret_page,
  433 + const void *caller)
  434 +{
  435 + struct page *page;
  436 + void *ptr;
  437 + page = __dma_alloc_buffer(dev, size, gfp);
  438 + if (!page)
  439 + return NULL;
  440 +
  441 + ptr = __dma_alloc_remap(page, size, gfp, prot, caller);
  442 + if (!ptr) {
  443 + __dma_free_buffer(page, size);
  444 + return NULL;
  445 + }
  446 +
  447 + *ret_page = page;
  448 + return ptr;
  449 +}
  450 +
  451 +static void *__alloc_from_pool(struct device *dev, size_t size,
  452 + struct page **ret_page, const void *caller)
  453 +{
  454 + struct arm_vmregion *c;
  455 + size_t align;
  456 +
  457 + if (!coherent_head.vm_start) {
  458 + printk(KERN_ERR "%s: coherent pool not initialised!\n",
  459 + __func__);
  460 + dump_stack();
  461 + return NULL;
  462 + }
  463 +
  464 + /*
  465 + * Align the region allocation - allocations from pool are rather
  466 + * small, so align them to their order in pages, minimum is a page
  467 + * size. This helps reduce fragmentation of the DMA space.
  468 + */
  469 + align = PAGE_SIZE << get_order(size);
  470 + c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller);
  471 + if (c) {
  472 + void *ptr = (void *)c->vm_start;
  473 + struct page *page = virt_to_page(ptr);
  474 + *ret_page = page;
  475 + return ptr;
  476 + }
  477 + return NULL;
  478 +}
  479 +
  480 +static int __free_from_pool(void *cpu_addr, size_t size)
  481 +{
  482 + unsigned long start = (unsigned long)cpu_addr;
  483 + unsigned long end = start + size;
  484 + struct arm_vmregion *c;
  485 +
  486 + if (start < coherent_head.vm_start || end > coherent_head.vm_end)
  487 + return 0;
  488 +
  489 + c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start);
  490 +
  491 + if ((c->vm_end - c->vm_start) != size) {
  492 + printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
  493 + __func__, c->vm_end - c->vm_start, size);
  494 + dump_stack();
  495 + size = c->vm_end - c->vm_start;
  496 + }
  497 +
  498 + arm_vmregion_free(&coherent_head, c);
  499 + return 1;
  500 +}
  501 +
  502 +static void *__alloc_from_contiguous(struct device *dev, size_t size,
  503 + pgprot_t prot, struct page **ret_page)
  504 +{
  505 + unsigned long order = get_order(size);
  506 + size_t count = size >> PAGE_SHIFT;
  507 + struct page *page;
  508 +
  509 + page = dma_alloc_from_contiguous(dev, count, order);
  510 + if (!page)
  511 + return NULL;
  512 +
  513 + __dma_clear_buffer(page, size);
  514 + __dma_remap(page, size, prot);
  515 +
  516 + *ret_page = page;
  517 + return page_address(page);
  518 +}
  519 +
  520 +static void __free_from_contiguous(struct device *dev, struct page *page,
  521 + size_t size)
  522 +{
  523 + __dma_remap(page, size, pgprot_kernel);
  524 + dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
  525 +}
  526 +
  527 +#define nommu() 0
  528 +
322 529 #else /* !CONFIG_MMU */
323 530  
324   -#define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page)
325   -#define __dma_free_remap(addr, size) do { } while (0)
  531 +#define nommu() 1
326 532  
  533 +#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL
  534 +#define __alloc_from_pool(dev, size, ret_page, c) NULL
  535 +#define __alloc_from_contiguous(dev, size, prot, ret) NULL
  536 +#define __free_from_pool(cpu_addr, size) 0
  537 +#define __free_from_contiguous(dev, page, size) do { } while (0)
  538 +#define __dma_free_remap(cpu_addr, size) do { } while (0)
  539 +
327 540 #endif /* CONFIG_MMU */
328 541  
329   -static void *
330   -__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
331   - pgprot_t prot, const void *caller)
  542 +static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
  543 + struct page **ret_page)
332 544 {
333 545 struct page *page;
  546 + page = __dma_alloc_buffer(dev, size, gfp);
  547 + if (!page)
  548 + return NULL;
  549 +
  550 + *ret_page = page;
  551 + return page_address(page);
  552 +}
  553 +
  554 +
  555 +
  556 +static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
  557 + gfp_t gfp, pgprot_t prot, const void *caller)
  558 +{
  559 + u64 mask = get_coherent_dma_mask(dev);
  560 + struct page *page;
334 561 void *addr;
335 562  
  563 +#ifdef CONFIG_DMA_API_DEBUG
  564 + u64 limit = (mask + 1) & ~mask;
  565 + if (limit && size >= limit) {
  566 + dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
  567 + size, mask);
  568 + return NULL;
  569 + }
  570 +#endif
  571 +
  572 + if (!mask)
  573 + return NULL;
  574 +
  575 + if (mask < 0xffffffffULL)
  576 + gfp |= GFP_DMA;
  577 +
336 578 /*
337 579 * Following is a work-around (a.k.a. hack) to prevent pages
338 580 * with __GFP_COMP being passed to split_page() which cannot
339 581  
340 582  
... ... @@ -345,19 +587,17 @@
345 587 *handle = ~0;
346 588 size = PAGE_ALIGN(size);
347 589  
348   - page = __dma_alloc_buffer(dev, size, gfp);
349   - if (!page)
350   - return NULL;
351   -
352   - if (!arch_is_coherent())
353   - addr = __dma_alloc_remap(page, size, gfp, prot, caller);
  590 + if (arch_is_coherent() || nommu())
  591 + addr = __alloc_simple_buffer(dev, size, gfp, &page);
  592 + else if (cpu_architecture() < CPU_ARCH_ARMv6)
  593 + addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
  594 + else if (gfp & GFP_ATOMIC)
  595 + addr = __alloc_from_pool(dev, size, &page, caller);
354 596 else
355   - addr = page_address(page);
  597 + addr = __alloc_from_contiguous(dev, size, prot, &page);
356 598  
357 599 if (addr)
358 600 *handle = pfn_to_dma(dev, page_to_pfn(page));
359   - else
360   - __dma_free_buffer(page, size);
361 601  
362 602 return addr;
363 603 }
... ... @@ -366,8 +606,8 @@
366 606 * Allocate DMA-coherent memory space and return both the kernel remapped
367 607 * virtual and bus address for that space.
368 608 */
369   -void *
370   -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
  609 +void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle,
  610 + gfp_t gfp)
371 611 {
372 612 void *memory;
373 613  
... ... @@ -398,25 +638,11 @@
398 638 {
399 639 int ret = -ENXIO;
400 640 #ifdef CONFIG_MMU
401   - unsigned long user_size, kern_size;
402   - struct arm_vmregion *c;
403   -
404   - user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
405   -
406   - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
407   - if (c) {
408   - unsigned long off = vma->vm_pgoff;
409   -
410   - kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
411   -
412   - if (off < kern_size &&
413   - user_size <= (kern_size - off)) {
414   - ret = remap_pfn_range(vma, vma->vm_start,
415   - page_to_pfn(c->vm_pages) + off,
416   - user_size << PAGE_SHIFT,
417   - vma->vm_page_prot);
418   - }
419   - }
  641 + unsigned long pfn = dma_to_pfn(dev, dma_addr);
  642 + ret = remap_pfn_range(vma, vma->vm_start,
  643 + pfn + vma->vm_pgoff,
  644 + vma->vm_end - vma->vm_start,
  645 + vma->vm_page_prot);
420 646 #endif /* CONFIG_MMU */
421 647  
422 648 return ret;
423 649  
424 650  
425 651  
426 652  
... ... @@ -438,23 +664,33 @@
438 664 }
439 665 EXPORT_SYMBOL(dma_mmap_writecombine);
440 666  
  667 +
441 668 /*
442   - * free a page as defined by the above mapping.
443   - * Must not be called with IRQs disabled.
  669 + * Free a buffer as defined by the above mapping.
444 670 */
445 671 void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)
446 672 {
447   - WARN_ON(irqs_disabled());
  673 + struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
448 674  
449 675 if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
450 676 return;
451 677  
452 678 size = PAGE_ALIGN(size);
453 679  
454   - if (!arch_is_coherent())
  680 + if (arch_is_coherent() || nommu()) {
  681 + __dma_free_buffer(page, size);
  682 + } else if (cpu_architecture() < CPU_ARCH_ARMv6) {
455 683 __dma_free_remap(cpu_addr, size);
456   -
457   - __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size);
  684 + __dma_free_buffer(page, size);
  685 + } else {
  686 + if (__free_from_pool(cpu_addr, size))
  687 + return;
  688 + /*
  689 + * Non-atomic allocations cannot be freed with IRQs disabled
  690 + */
  691 + WARN_ON(irqs_disabled());
  692 + __free_from_contiguous(dev, page, size);
  693 + }
458 694 }
459 695 EXPORT_SYMBOL(dma_free_coherent);
460 696  
... ... @@ -20,6 +20,7 @@
20 20 #include <linux/highmem.h>
21 21 #include <linux/gfp.h>
22 22 #include <linux/memblock.h>
  23 +#include <linux/dma-contiguous.h>
23 24  
24 25 #include <asm/mach-types.h>
25 26 #include <asm/memblock.h>
... ... @@ -226,6 +227,17 @@
226 227 }
227 228 #endif
228 229  
  230 +void __init setup_dma_zone(struct machine_desc *mdesc)
  231 +{
  232 +#ifdef CONFIG_ZONE_DMA
  233 + if (mdesc->dma_zone_size) {
  234 + arm_dma_zone_size = mdesc->dma_zone_size;
  235 + arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1;
  236 + } else
  237 + arm_dma_limit = 0xffffffff;
  238 +#endif
  239 +}
  240 +
229 241 static void __init arm_bootmem_free(unsigned long min, unsigned long max_low,
230 242 unsigned long max_high)
231 243 {
232 244  
... ... @@ -273,12 +285,9 @@
273 285 * Adjust the sizes according to any special requirements for
274 286 * this machine type.
275 287 */
276   - if (arm_dma_zone_size) {
  288 + if (arm_dma_zone_size)
277 289 arm_adjust_dma_zone(zone_size, zhole_size,
278 290 arm_dma_zone_size >> PAGE_SHIFT);
279   - arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1;
280   - } else
281   - arm_dma_limit = 0xffffffff;
282 291 #endif
283 292  
284 293 free_area_init_node(0, zone_size, min, zhole_size);
... ... @@ -363,6 +372,12 @@
363 372 /* reserve any platform specific memblock areas */
364 373 if (mdesc->reserve)
365 374 mdesc->reserve();
  375 +
  376 + /*
  377 + * reserve memory for DMA contigouos allocations,
  378 + * must come from DMA area inside low memory
  379 + */
  380 + dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit));
366 381  
367 382 arm_memblock_steal_permitted = false;
368 383 memblock_allow_resize();
... ... @@ -67,6 +67,9 @@
67 67 #define arm_dma_limit ((u32)~0)
68 68 #endif
69 69  
  70 +extern phys_addr_t arm_lowmem_limit;
  71 +
70 72 void __init bootmem_init(void);
71 73 void arm_mm_memblock_reserve(void);
  74 +void dma_contiguous_remap(void);
... ... @@ -288,6 +288,11 @@
288 288 PMD_SECT_UNCACHED | PMD_SECT_XN,
289 289 .domain = DOMAIN_KERNEL,
290 290 },
  291 + [MT_MEMORY_DMA_READY] = {
  292 + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
  293 + .prot_l1 = PMD_TYPE_TABLE,
  294 + .domain = DOMAIN_KERNEL,
  295 + },
291 296 };
292 297  
293 298 const struct mem_type *get_mem_type(unsigned int type)
... ... @@ -429,6 +434,7 @@
429 434 if (arch_is_coherent() && cpu_is_xsc3()) {
430 435 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
431 436 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
  437 + mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
432 438 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
433 439 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
434 440 }
... ... @@ -460,6 +466,7 @@
460 466 mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
461 467 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
462 468 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
  469 + mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
463 470 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
464 471 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
465 472 }
... ... @@ -512,6 +519,7 @@
512 519 mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
513 520 mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
514 521 mem_types[MT_MEMORY].prot_pte |= kern_pgprot;
  522 + mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
515 523 mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask;
516 524 mem_types[MT_ROM].prot_sect |= cp->pmd;
517 525  
... ... @@ -596,7 +604,7 @@
596 604 * L1 entries, whereas PGDs refer to a group of L1 entries making
597 605 * up one logical pointer to an L2 table.
598 606 */
599   - if (((addr | end | phys) & ~SECTION_MASK) == 0) {
  607 + if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) {
600 608 pmd_t *p = pmd;
601 609  
602 610 #ifndef CONFIG_ARM_LPAE
... ... @@ -814,7 +822,7 @@
814 822 }
815 823 early_param("vmalloc", early_vmalloc);
816 824  
817   -static phys_addr_t lowmem_limit __initdata = 0;
  825 +phys_addr_t arm_lowmem_limit __initdata = 0;
818 826  
819 827 void __init sanity_check_meminfo(void)
820 828 {
... ... @@ -897,8 +905,8 @@
897 905 bank->size = newsize;
898 906 }
899 907 #endif
900   - if (!bank->highmem && bank->start + bank->size > lowmem_limit)
901   - lowmem_limit = bank->start + bank->size;
  908 + if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit)
  909 + arm_lowmem_limit = bank->start + bank->size;
902 910  
903 911 j++;
904 912 }
... ... @@ -923,8 +931,8 @@
923 931 }
924 932 #endif
925 933 meminfo.nr_banks = j;
926   - high_memory = __va(lowmem_limit - 1) + 1;
927   - memblock_set_current_limit(lowmem_limit);
  934 + high_memory = __va(arm_lowmem_limit - 1) + 1;
  935 + memblock_set_current_limit(arm_lowmem_limit);
928 936 }
929 937  
930 938 static inline void prepare_page_table(void)
... ... @@ -949,8 +957,8 @@
949 957 * Find the end of the first block of lowmem.
950 958 */
951 959 end = memblock.memory.regions[0].base + memblock.memory.regions[0].size;
952   - if (end >= lowmem_limit)
953   - end = lowmem_limit;
  960 + if (end >= arm_lowmem_limit)
  961 + end = arm_lowmem_limit;
954 962  
955 963 /*
956 964 * Clear out all the kernel space mappings, except for the first
... ... @@ -1093,8 +1101,8 @@
1093 1101 phys_addr_t end = start + reg->size;
1094 1102 struct map_desc map;
1095 1103  
1096   - if (end > lowmem_limit)
1097   - end = lowmem_limit;
  1104 + if (end > arm_lowmem_limit)
  1105 + end = arm_lowmem_limit;
1098 1106 if (start >= end)
1099 1107 break;
1100 1108  
1101 1109  
... ... @@ -1115,11 +1123,12 @@
1115 1123 {
1116 1124 void *zero_page;
1117 1125  
1118   - memblock_set_current_limit(lowmem_limit);
  1126 + memblock_set_current_limit(arm_lowmem_limit);
1119 1127  
1120 1128 build_mem_type_table();
1121 1129 prepare_page_table();
1122 1130 map_lowmem();
  1131 + dma_contiguous_remap();
1123 1132 devicemaps_init(mdesc);
1124 1133 kmap_init();
1125 1134