Blame view
lib/swiotlb.c
25.4 KB
1da177e4c
|
1 2 3 |
/* * Dynamic DMA mapping support. * |
563aaf064
|
4 |
* This implementation is a fallback for platforms that do not support |
1da177e4c
|
5 6 7 8 9 10 11 12 13 |
* I/O TLBs (aka DMA address translation hardware). * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> * Copyright (C) 2000, 2003 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> * * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid * unnecessary i-cache flushing. |
569c8bf5d
|
14 15 16 |
* 04/07/.. ak Better overflow handling. Assorted fixes. * 05/09/10 linville Add support for syncing ranges, support syncing for * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. |
fb05a3792
|
17 |
* 08/12/11 beckyb Add highmem support |
1da177e4c
|
18 19 20 |
*/ #include <linux/cache.h> |
17e5ad6c0
|
21 |
#include <linux/dma-mapping.h> |
1da177e4c
|
22 23 |
#include <linux/mm.h> #include <linux/module.h> |
1da177e4c
|
24 25 |
#include <linux/spinlock.h> #include <linux/string.h> |
0016fdee9
|
26 |
#include <linux/swiotlb.h> |
fb05a3792
|
27 |
#include <linux/pfn.h> |
1da177e4c
|
28 29 |
#include <linux/types.h> #include <linux/ctype.h> |
ef9b18935
|
30 |
#include <linux/highmem.h> |
1da177e4c
|
31 32 |
#include <asm/io.h> |
1da177e4c
|
33 |
#include <asm/dma.h> |
17e5ad6c0
|
34 |
#include <asm/scatterlist.h> |
1da177e4c
|
35 36 37 |
#include <linux/init.h> #include <linux/bootmem.h> |
a85225092
|
38 |
#include <linux/iommu-helper.h> |
1da177e4c
|
39 40 41 |
#define OFFSET(val,align) ((unsigned long) \ ( (val) & ( (align) - 1))) |
0b9afede3
|
42 43 44 45 46 47 48 49 |
#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) /* * Minimum IO TLB size to bother booting with. Systems with mainly * 64bit capable cards will only lightly use the swiotlb. If we can't * allocate a contiguous 1MB, we're probably in trouble anyway. */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) |
de69e0f0b
|
50 51 52 53 54 55 56 |
/* * Enumeration for sync targets */ enum dma_sync_target { SYNC_FOR_CPU = 0, SYNC_FOR_DEVICE = 1, }; |
1da177e4c
|
57 58 59 |
int swiotlb_force; /* |
ceb5ac326
|
60 61 |
* Used to do a quick range check in unmap_single and * sync_single_*, to see if the memory was in fact allocated by this |
1da177e4c
|
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
* API. */ static char *io_tlb_start, *io_tlb_end; /* * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. */ static unsigned long io_tlb_nslabs; /* * When the IOMMU overflows we return a fallback buffer. This sets the size. */ static unsigned long io_tlb_overflow = 32*1024; void *io_tlb_overflow_buffer; /* * This is a free list describing the number of free entries available from * each index */ static unsigned int *io_tlb_list; static unsigned int io_tlb_index; /* * We need to save away the original address corresponding to a mapped entry * for the sync operations. */ |
bc40ac669
|
90 |
static phys_addr_t *io_tlb_orig_addr; |
1da177e4c
|
91 92 93 94 95 |
/* * Protect the above data structures in the map and unmap calls */ static DEFINE_SPINLOCK(io_tlb_lock); |
5740afdb6
|
96 |
static int late_alloc; |
1da177e4c
|
97 98 99 100 |
static int __init setup_io_tlb_npages(char *str) { if (isdigit(*str)) { |
e8579e72c
|
101 |
io_tlb_nslabs = simple_strtoul(str, &str, 0); |
1da177e4c
|
102 103 104 105 106 |
/* avoid tail segment of size < IO_TLB_SEGSIZE */ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); } if (*str == ',') ++str; |
b18485e7a
|
107 |
if (!strcmp(str, "force")) |
1da177e4c
|
108 |
swiotlb_force = 1; |
b18485e7a
|
109 |
|
1da177e4c
|
110 111 112 113 |
return 1; } __setup("swiotlb=", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ |
02ca646e7
|
114 |
/* Note that this doesn't work with highmem page */ |
70a7d3cc1
|
115 116 |
static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, volatile void *address) |
e08e1f7ad
|
117 |
{ |
862d196b2
|
118 |
return phys_to_dma(hwdev, virt_to_phys(address)); |
e08e1f7ad
|
119 |
} |
ad32e8cb8
|
120 |
void swiotlb_print_info(void) |
2e5b2b86b
|
121 |
{ |
ad32e8cb8
|
122 |
unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; |
2e5b2b86b
|
123 |
phys_addr_t pstart, pend; |
2e5b2b86b
|
124 125 126 |
pstart = virt_to_phys(io_tlb_start); pend = virt_to_phys(io_tlb_end); |
2e5b2b86b
|
127 128 129 |
printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p ", bytes >> 20, io_tlb_start, io_tlb_end); |
70a7d3cc1
|
130 131 132 133 |
printk(KERN_INFO "software IO TLB at phys %#llx - %#llx ", (unsigned long long)pstart, (unsigned long long)pend); |
2e5b2b86b
|
134 |
} |
1da177e4c
|
135 136 |
/* * Statically reserve bounce buffer space and initialize bounce buffer data |
17e5ad6c0
|
137 |
* structures for the software IO TLB used to implement the DMA API. |
1da177e4c
|
138 |
*/ |
563aaf064
|
139 |
void __init |
ad32e8cb8
|
140 |
swiotlb_init_with_default_size(size_t default_size, int verbose) |
1da177e4c
|
141 |
{ |
563aaf064
|
142 |
unsigned long i, bytes; |
1da177e4c
|
143 144 |
if (!io_tlb_nslabs) { |
e8579e72c
|
145 |
io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); |
1da177e4c
|
146 147 |
io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); } |
563aaf064
|
148 |
bytes = io_tlb_nslabs << IO_TLB_SHIFT; |
1da177e4c
|
149 150 151 |
/* * Get IO TLB memory from the low pages */ |
3885123da
|
152 |
io_tlb_start = alloc_bootmem_low_pages(bytes); |
1da177e4c
|
153 154 |
if (!io_tlb_start) panic("Cannot allocate SWIOTLB buffer"); |
563aaf064
|
155 |
io_tlb_end = io_tlb_start + bytes; |
1da177e4c
|
156 157 158 159 160 161 162 |
/* * Allocate and initialize the free list array. This array is used * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE * between io_tlb_start and io_tlb_end. */ io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); |
25667d675
|
163 |
for (i = 0; i < io_tlb_nslabs; i++) |
1da177e4c
|
164 165 |
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); io_tlb_index = 0; |
bc40ac669
|
166 |
io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t)); |
1da177e4c
|
167 168 169 170 171 |
/* * Get the overflow emergency buffer */ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); |
563aaf064
|
172 173 174 |
if (!io_tlb_overflow_buffer) panic("Cannot allocate SWIOTLB overflow buffer! "); |
ad32e8cb8
|
175 176 |
if (verbose) swiotlb_print_info(); |
1da177e4c
|
177 |
} |
563aaf064
|
178 |
void __init |
ad32e8cb8
|
179 |
swiotlb_init(int verbose) |
1da177e4c
|
180 |
{ |
ad32e8cb8
|
181 |
swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ |
1da177e4c
|
182 |
} |
0b9afede3
|
183 184 185 186 187 188 |
/* * Systems with larger DMA zones (those that don't support ISA) can * initialize the swiotlb later using the slab allocator if needed. * This should be just like above, but with some error catching. */ int |
563aaf064
|
189 |
swiotlb_late_init_with_default_size(size_t default_size) |
0b9afede3
|
190 |
{ |
563aaf064
|
191 |
unsigned long i, bytes, req_nslabs = io_tlb_nslabs; |
0b9afede3
|
192 193 194 195 196 197 198 199 200 201 |
unsigned int order; if (!io_tlb_nslabs) { io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); } /* * Get IO TLB memory from the low pages */ |
563aaf064
|
202 |
order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); |
0b9afede3
|
203 |
io_tlb_nslabs = SLABS_PER_PAGE << order; |
563aaf064
|
204 |
bytes = io_tlb_nslabs << IO_TLB_SHIFT; |
0b9afede3
|
205 206 |
while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { |
bb52196be
|
207 208 |
io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); |
0b9afede3
|
209 210 211 212 213 214 215 |
if (io_tlb_start) break; order--; } if (!io_tlb_start) goto cleanup1; |
563aaf064
|
216 |
if (order != get_order(bytes)) { |
0b9afede3
|
217 218 219 220 |
printk(KERN_WARNING "Warning: only able to allocate %ld MB " "for software IO TLB ", (PAGE_SIZE << order) >> 20); io_tlb_nslabs = SLABS_PER_PAGE << order; |
563aaf064
|
221 |
bytes = io_tlb_nslabs << IO_TLB_SHIFT; |
0b9afede3
|
222 |
} |
563aaf064
|
223 224 |
io_tlb_end = io_tlb_start + bytes; memset(io_tlb_start, 0, bytes); |
0b9afede3
|
225 226 227 228 229 230 231 232 233 234 235 236 237 238 |
/* * Allocate and initialize the free list array. This array is used * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE * between io_tlb_start and io_tlb_end. */ io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, get_order(io_tlb_nslabs * sizeof(int))); if (!io_tlb_list) goto cleanup2; for (i = 0; i < io_tlb_nslabs; i++) io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); io_tlb_index = 0; |
bc40ac669
|
239 240 241 242 |
io_tlb_orig_addr = (phys_addr_t *) __get_free_pages(GFP_KERNEL, get_order(io_tlb_nslabs * sizeof(phys_addr_t))); |
0b9afede3
|
243 244 |
if (!io_tlb_orig_addr) goto cleanup3; |
bc40ac669
|
245 |
memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t)); |
0b9afede3
|
246 247 248 249 250 251 252 253 |
/* * Get the overflow emergency buffer */ io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA, get_order(io_tlb_overflow)); if (!io_tlb_overflow_buffer) goto cleanup4; |
ad32e8cb8
|
254 |
swiotlb_print_info(); |
0b9afede3
|
255 |
|
5740afdb6
|
256 |
late_alloc = 1; |
0b9afede3
|
257 258 259 |
return 0; cleanup4: |
bc40ac669
|
260 261 |
free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs * sizeof(phys_addr_t))); |
0b9afede3
|
262 263 |
io_tlb_orig_addr = NULL; cleanup3: |
25667d675
|
264 265 |
free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * sizeof(int))); |
0b9afede3
|
266 |
io_tlb_list = NULL; |
0b9afede3
|
267 |
cleanup2: |
563aaf064
|
268 |
io_tlb_end = NULL; |
0b9afede3
|
269 270 271 272 273 274 |
free_pages((unsigned long)io_tlb_start, order); io_tlb_start = NULL; cleanup1: io_tlb_nslabs = req_nslabs; return -ENOMEM; } |
5740afdb6
|
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 |
void __init swiotlb_free(void) { if (!io_tlb_overflow_buffer) return; if (late_alloc) { free_pages((unsigned long)io_tlb_overflow_buffer, get_order(io_tlb_overflow)); free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs * sizeof(phys_addr_t))); free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * sizeof(int))); free_pages((unsigned long)io_tlb_start, get_order(io_tlb_nslabs << IO_TLB_SHIFT)); } else { free_bootmem_late(__pa(io_tlb_overflow_buffer), io_tlb_overflow); free_bootmem_late(__pa(io_tlb_orig_addr), io_tlb_nslabs * sizeof(phys_addr_t)); free_bootmem_late(__pa(io_tlb_list), io_tlb_nslabs * sizeof(int)); free_bootmem_late(__pa(io_tlb_start), io_tlb_nslabs << IO_TLB_SHIFT); } } |
02ca646e7
|
300 |
static int is_swiotlb_buffer(phys_addr_t paddr) |
640aebfe0
|
301 |
{ |
02ca646e7
|
302 303 |
return paddr >= virt_to_phys(io_tlb_start) && paddr < virt_to_phys(io_tlb_end); |
640aebfe0
|
304 |
} |
1da177e4c
|
305 |
/* |
fb05a3792
|
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 |
* Bounce: copy the swiotlb buffer back to the original dma location */ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, enum dma_data_direction dir) { unsigned long pfn = PFN_DOWN(phys); if (PageHighMem(pfn_to_page(pfn))) { /* The buffer does not have a mapping. Map it in and copy */ unsigned int offset = phys & ~PAGE_MASK; char *buffer; unsigned int sz = 0; unsigned long flags; while (size) { |
67131ad05
|
321 |
sz = min_t(size_t, PAGE_SIZE - offset, size); |
fb05a3792
|
322 323 324 325 326 327 |
local_irq_save(flags); buffer = kmap_atomic(pfn_to_page(pfn), KM_BOUNCE_READ); if (dir == DMA_TO_DEVICE) memcpy(dma_addr, buffer + offset, sz); |
ef9b18935
|
328 |
else |
fb05a3792
|
329 330 |
memcpy(buffer + offset, dma_addr, sz); kunmap_atomic(buffer, KM_BOUNCE_READ); |
ef9b18935
|
331 |
local_irq_restore(flags); |
fb05a3792
|
332 333 334 335 336 |
size -= sz; pfn++; dma_addr += sz; offset = 0; |
ef9b18935
|
337 338 |
} } else { |
ef9b18935
|
339 |
if (dir == DMA_TO_DEVICE) |
fb05a3792
|
340 |
memcpy(dma_addr, phys_to_virt(phys), size); |
ef9b18935
|
341 |
else |
fb05a3792
|
342 |
memcpy(phys_to_virt(phys), dma_addr, size); |
ef9b18935
|
343 |
} |
1b548f667
|
344 |
} |
1da177e4c
|
345 346 347 348 |
/* * Allocates bounce buffer and returns its kernel virtual address. */ static void * |
bc40ac669
|
349 |
map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir) |
1da177e4c
|
350 351 352 353 354 |
{ unsigned long flags; char *dma_addr; unsigned int nslots, stride, index, wrap; int i; |
681cc5cd3
|
355 356 357 358 359 360 |
unsigned long start_dma_addr; unsigned long mask; unsigned long offset_slots; unsigned long max_slots; mask = dma_get_seg_boundary(hwdev); |
70a7d3cc1
|
361 |
start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask; |
681cc5cd3
|
362 363 |
offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; |
a5ddde4a5
|
364 365 366 367 |
/* * Carefully handle integer overflow which can occur when mask == ~0UL. */ |
b15a3891c
|
368 369 370 |
max_slots = mask + 1 ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); |
1da177e4c
|
371 372 373 374 375 376 377 378 379 380 |
/* * For mappings greater than a page, we limit the stride (and * hence alignment) to a page size. */ nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; if (size > PAGE_SIZE) stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); else stride = 1; |
348145458
|
381 |
BUG_ON(!nslots); |
1da177e4c
|
382 383 384 385 386 387 |
/* * Find suitable number of IO TLB entries size that will fit this * request and allocate a buffer from that IO TLB pool. */ spin_lock_irqsave(&io_tlb_lock, flags); |
a7133a155
|
388 389 390 391 392 393 |
index = ALIGN(io_tlb_index, stride); if (index >= io_tlb_nslabs) index = 0; wrap = index; do { |
a85225092
|
394 395 |
while (iommu_is_span_boundary(index, nslots, offset_slots, max_slots)) { |
b15a3891c
|
396 397 398 |
index += stride; if (index >= io_tlb_nslabs) index = 0; |
a7133a155
|
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 |
if (index == wrap) goto not_found; } /* * If we find a slot that indicates we have 'nslots' number of * contiguous buffers, we allocate the buffers from that slot * and mark the entries as '0' indicating unavailable. */ if (io_tlb_list[index] >= nslots) { int count = 0; for (i = index; i < (int) (index + nslots); i++) io_tlb_list[i] = 0; for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) io_tlb_list[i] = ++count; dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); |
1da177e4c
|
416 |
|
a7133a155
|
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 |
/* * Update the indices to avoid searching in the next * round. */ io_tlb_index = ((index + nslots) < io_tlb_nslabs ? (index + nslots) : 0); goto found; } index += stride; if (index >= io_tlb_nslabs) index = 0; } while (index != wrap); not_found: spin_unlock_irqrestore(&io_tlb_lock, flags); return NULL; found: |
1da177e4c
|
435 436 437 438 439 440 441 |
spin_unlock_irqrestore(&io_tlb_lock, flags); /* * Save away the mapping from the original address to the DMA address. * This is needed when we sync the memory. Then we sync the buffer if * needed. */ |
bc40ac669
|
442 443 |
for (i = 0; i < nslots; i++) io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT); |
1da177e4c
|
444 |
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) |
fb05a3792
|
445 |
swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); |
1da177e4c
|
446 447 448 449 450 451 452 453 |
return dma_addr; } /* * dma_addr is the kernel virtual address of the bounce buffer to unmap. */ static void |
7fcebbd2d
|
454 |
do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) |
1da177e4c
|
455 456 457 458 |
{ unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; |
bc40ac669
|
459 |
phys_addr_t phys = io_tlb_orig_addr[index]; |
1da177e4c
|
460 461 462 463 |
/* * First, sync the memory before unmapping the entry */ |
bc40ac669
|
464 |
if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) |
fb05a3792
|
465 |
swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); |
1da177e4c
|
466 467 468 |
/* * Return the buffer to the free list by setting the corresponding |
af901ca18
|
469 |
* entries to indicate the number of contiguous entries available. |
1da177e4c
|
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 |
* While returning the entries to the free list, we merge the entries * with slots below and above the pool being returned. */ spin_lock_irqsave(&io_tlb_lock, flags); { count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? io_tlb_list[index + nslots] : 0); /* * Step 1: return the slots to the free list, merging the * slots with superceeding slots */ for (i = index + nslots - 1; i >= index; i--) io_tlb_list[i] = ++count; /* * Step 2: merge the returned slots with the preceding slots, * if available (non zero) */ for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) io_tlb_list[i] = ++count; } spin_unlock_irqrestore(&io_tlb_lock, flags); } static void |
de69e0f0b
|
494 495 |
sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir, int target) |
1da177e4c
|
496 |
{ |
bc40ac669
|
497 498 499 500 |
int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; phys_addr_t phys = io_tlb_orig_addr[index]; phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1)); |
df336d1c7
|
501 |
|
de69e0f0b
|
502 503 504 |
switch (target) { case SYNC_FOR_CPU: if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) |
fb05a3792
|
505 |
swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); |
348145458
|
506 507 |
else BUG_ON(dir != DMA_TO_DEVICE); |
de69e0f0b
|
508 509 510 |
break; case SYNC_FOR_DEVICE: if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) |
fb05a3792
|
511 |
swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); |
348145458
|
512 513 |
else BUG_ON(dir != DMA_FROM_DEVICE); |
de69e0f0b
|
514 515 |
break; default: |
1da177e4c
|
516 |
BUG(); |
de69e0f0b
|
517 |
} |
1da177e4c
|
518 519 520 521 |
} void * swiotlb_alloc_coherent(struct device *hwdev, size_t size, |
06a544971
|
522 |
dma_addr_t *dma_handle, gfp_t flags) |
1da177e4c
|
523 |
{ |
563aaf064
|
524 |
dma_addr_t dev_addr; |
1da177e4c
|
525 526 |
void *ret; int order = get_order(size); |
284901a90
|
527 |
u64 dma_mask = DMA_BIT_MASK(32); |
1e74f3000
|
528 529 530 |
if (hwdev && hwdev->coherent_dma_mask) dma_mask = hwdev->coherent_dma_mask; |
1da177e4c
|
531 |
|
25667d675
|
532 |
ret = (void *)__get_free_pages(flags, order); |
ac2b3e67d
|
533 |
if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) { |
1da177e4c
|
534 535 |
/* * The allocated memory isn't reachable by the device. |
1da177e4c
|
536 537 538 539 540 541 542 |
*/ free_pages((unsigned long) ret, order); ret = NULL; } if (!ret) { /* * We are either out of memory or the device can't DMA |
ceb5ac326
|
543 544 |
* to GFP_DMA memory; fall back on map_single(), which * will grab memory from the lowest available address range. |
1da177e4c
|
545 |
*/ |
bc40ac669
|
546 |
ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); |
9dfda12b8
|
547 |
if (!ret) |
1da177e4c
|
548 |
return NULL; |
1da177e4c
|
549 550 551 |
} memset(ret, 0, size); |
70a7d3cc1
|
552 |
dev_addr = swiotlb_virt_to_bus(hwdev, ret); |
1da177e4c
|
553 554 |
/* Confirm address can be DMA'd by device */ |
ac2b3e67d
|
555 |
if (dev_addr + size - 1 > dma_mask) { |
563aaf064
|
556 557 |
printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx ", |
1e74f3000
|
558 |
(unsigned long long)dma_mask, |
563aaf064
|
559 |
(unsigned long long)dev_addr); |
a2b89b596
|
560 561 |
/* DMA_TO_DEVICE to avoid memcpy in unmap_single */ |
7fcebbd2d
|
562 |
do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); |
a2b89b596
|
563 |
return NULL; |
1da177e4c
|
564 565 566 567 |
} *dma_handle = dev_addr; return ret; } |
874d6a955
|
568 |
EXPORT_SYMBOL(swiotlb_alloc_coherent); |
1da177e4c
|
569 570 571 |
void swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, |
02ca646e7
|
572 |
dma_addr_t dev_addr) |
1da177e4c
|
573 |
{ |
862d196b2
|
574 |
phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); |
02ca646e7
|
575 |
|
aa24886e3
|
576 |
WARN_ON(irqs_disabled()); |
02ca646e7
|
577 578 |
if (!is_swiotlb_buffer(paddr)) free_pages((unsigned long)vaddr, get_order(size)); |
1da177e4c
|
579 580 |
else /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ |
7fcebbd2d
|
581 |
do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); |
1da177e4c
|
582 |
} |
874d6a955
|
583 |
EXPORT_SYMBOL(swiotlb_free_coherent); |
1da177e4c
|
584 585 586 587 588 589 590 |
static void swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) { /* * Ran out of IOMMU space for this operation. This is very bad. * Unfortunately the drivers cannot handle this operation properly. |
17e5ad6c0
|
591 |
* unless they check for dma_mapping_error (most don't) |
1da177e4c
|
592 593 594 |
* When the mapping is small enough return a static buffer to limit * the damage, or panic when the transfer is too big. */ |
563aaf064
|
595 |
printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at " |
94b324864
|
596 597 |
"device %s ", size, dev ? dev_name(dev) : "?"); |
1da177e4c
|
598 |
|
c7084b35e
|
599 600 601 602 603 604 605 606 607 608 609 610 |
if (size <= io_tlb_overflow || !do_panic) return; if (dir == DMA_BIDIRECTIONAL) panic("DMA: Random memory could be DMA accessed "); if (dir == DMA_FROM_DEVICE) panic("DMA: Random memory could be DMA written "); if (dir == DMA_TO_DEVICE) panic("DMA: Random memory could be DMA read "); |
1da177e4c
|
611 612 613 614 |
} /* * Map a single buffer of the indicated size for DMA in streaming mode. The |
17e5ad6c0
|
615 |
* physical address to use is returned. |
1da177e4c
|
616 617 |
* * Once the device is given the dma address, the device owns this memory until |
ceb5ac326
|
618 |
* either swiotlb_unmap_page or swiotlb_dma_sync_single is performed. |
1da177e4c
|
619 |
*/ |
f98eee8ea
|
620 621 622 623 |
dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) |
1da177e4c
|
624 |
{ |
f98eee8ea
|
625 |
phys_addr_t phys = page_to_phys(page) + offset; |
862d196b2
|
626 |
dma_addr_t dev_addr = phys_to_dma(dev, phys); |
1da177e4c
|
627 |
void *map; |
348145458
|
628 |
BUG_ON(dir == DMA_NONE); |
1da177e4c
|
629 |
/* |
ceb5ac326
|
630 |
* If the address happens to be in the device's DMA window, |
1da177e4c
|
631 632 633 |
* we can safely return the device addr and not worry about bounce * buffering it. */ |
b9394647a
|
634 |
if (dma_capable(dev, dev_addr, size) && !swiotlb_force) |
1da177e4c
|
635 636 637 638 639 |
return dev_addr; /* * Oh well, have to allocate and map a bounce buffer. */ |
f98eee8ea
|
640 |
map = map_single(dev, phys, size, dir); |
1da177e4c
|
641 |
if (!map) { |
f98eee8ea
|
642 |
swiotlb_full(dev, size, dir, 1); |
1da177e4c
|
643 644 |
map = io_tlb_overflow_buffer; } |
f98eee8ea
|
645 |
dev_addr = swiotlb_virt_to_bus(dev, map); |
1da177e4c
|
646 647 648 649 |
/* * Ensure that the address returned is DMA'ble */ |
b9394647a
|
650 |
if (!dma_capable(dev, dev_addr, size)) |
1da177e4c
|
651 652 653 654 |
panic("map_single: bounce buffer is not DMA'ble"); return dev_addr; } |
f98eee8ea
|
655 |
EXPORT_SYMBOL_GPL(swiotlb_map_page); |
1da177e4c
|
656 657 |
/* |
1da177e4c
|
658 |
* Unmap a single streaming mode DMA translation. The dma_addr and size must |
ceb5ac326
|
659 |
* match what was provided for in a previous swiotlb_map_page call. All |
1da177e4c
|
660 661 662 663 664 |
* other usages are undefined. * * After this call, reads by the cpu to the buffer are guaranteed to see * whatever the device wrote there. */ |
7fcebbd2d
|
665 666 |
static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir) |
1da177e4c
|
667 |
{ |
862d196b2
|
668 |
phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); |
1da177e4c
|
669 |
|
348145458
|
670 |
BUG_ON(dir == DMA_NONE); |
7fcebbd2d
|
671 |
|
02ca646e7
|
672 673 |
if (is_swiotlb_buffer(paddr)) { do_unmap_single(hwdev, phys_to_virt(paddr), size, dir); |
7fcebbd2d
|
674 675 676 677 678 |
return; } if (dir != DMA_FROM_DEVICE) return; |
02ca646e7
|
679 680 681 682 683 684 685 |
/* * phys_to_virt doesn't work with hihgmem page but we could * call dma_mark_clean() with hihgmem page here. However, we * are fine since dma_mark_clean() is null on POWERPC. We can * make dma_mark_clean() take a physical address if necessary. */ dma_mark_clean(phys_to_virt(paddr), size); |
7fcebbd2d
|
686 687 688 689 690 691 692 |
} void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { unmap_single(hwdev, dev_addr, size, dir); |
1da177e4c
|
693 |
} |
f98eee8ea
|
694 |
EXPORT_SYMBOL_GPL(swiotlb_unmap_page); |
874d6a955
|
695 |
|
1da177e4c
|
696 697 698 699 |
/* * Make physical memory consistent for a single streaming mode DMA translation * after a transfer. * |
ceb5ac326
|
700 |
* If you perform a swiotlb_map_page() but wish to interrogate the buffer |
17e5ad6c0
|
701 702 |
* using the cpu, yet do not wish to teardown the dma mapping, you must * call this function before doing so. At the next point you give the dma |
1da177e4c
|
703 704 705 |
* address back to the card, you must first perform a * swiotlb_dma_sync_for_device, and then the device again owns the buffer */ |
be6b02678
|
706 |
static void |
8270f3f1a
|
707 |
swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, |
de69e0f0b
|
708 |
size_t size, int dir, int target) |
1da177e4c
|
709 |
{ |
862d196b2
|
710 |
phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); |
1da177e4c
|
711 |
|
348145458
|
712 |
BUG_ON(dir == DMA_NONE); |
380d68783
|
713 |
|
02ca646e7
|
714 715 |
if (is_swiotlb_buffer(paddr)) { sync_single(hwdev, phys_to_virt(paddr), size, dir, target); |
380d68783
|
716 717 718 719 720 |
return; } if (dir != DMA_FROM_DEVICE) return; |
02ca646e7
|
721 |
dma_mark_clean(phys_to_virt(paddr), size); |
1da177e4c
|
722 723 724 |
} void |
8270f3f1a
|
725 |
swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, |
160c1d8e4
|
726 |
size_t size, enum dma_data_direction dir) |
8270f3f1a
|
727 |
{ |
de69e0f0b
|
728 |
swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); |
8270f3f1a
|
729 |
} |
874d6a955
|
730 |
EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); |
8270f3f1a
|
731 732 |
void |
1da177e4c
|
733 |
swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, |
160c1d8e4
|
734 |
size_t size, enum dma_data_direction dir) |
1da177e4c
|
735 |
{ |
de69e0f0b
|
736 |
swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); |
1da177e4c
|
737 |
} |
874d6a955
|
738 |
EXPORT_SYMBOL(swiotlb_sync_single_for_device); |
1da177e4c
|
739 740 |
/* |
878a97cfd
|
741 742 |
* Same as above, but for a sub-range of the mapping. */ |
be6b02678
|
743 |
static void |
878a97cfd
|
744 |
swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, |
de69e0f0b
|
745 746 |
unsigned long offset, size_t size, int dir, int target) |
878a97cfd
|
747 |
{ |
380d68783
|
748 |
swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target); |
878a97cfd
|
749 750 751 752 |
} void swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, |
160c1d8e4
|
753 754 |
unsigned long offset, size_t size, enum dma_data_direction dir) |
878a97cfd
|
755 |
{ |
de69e0f0b
|
756 757 |
swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, SYNC_FOR_CPU); |
878a97cfd
|
758 |
} |
874d6a955
|
759 |
EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu); |
878a97cfd
|
760 761 762 |
void swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, |
160c1d8e4
|
763 764 |
unsigned long offset, size_t size, enum dma_data_direction dir) |
878a97cfd
|
765 |
{ |
de69e0f0b
|
766 767 |
swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, SYNC_FOR_DEVICE); |
878a97cfd
|
768 |
} |
874d6a955
|
769 |
EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); |
878a97cfd
|
770 771 |
/* |
1da177e4c
|
772 |
* Map a set of buffers described by scatterlist in streaming mode for DMA. |
ceb5ac326
|
773 |
* This is the scatter-gather version of the above swiotlb_map_page |
1da177e4c
|
774 775 776 777 778 779 780 781 782 783 |
* interface. Here the scatter gather list elements are each tagged with the * appropriate dma address and length. They are obtained via * sg_dma_{address,length}(SG). * * NOTE: An implementation may be able to use a smaller number of * DMA address/length pairs than there are SG table elements. * (for example via virtual mapping capabilities) * The routine returns the number of addr/length pairs actually * used, at most nents. * |
ceb5ac326
|
784 |
* Device ownership issues as mentioned above for swiotlb_map_page are the |
1da177e4c
|
785 786 787 |
* same here. */ int |
309df0c50
|
788 |
swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, |
160c1d8e4
|
789 |
enum dma_data_direction dir, struct dma_attrs *attrs) |
1da177e4c
|
790 |
{ |
dbfd49fe9
|
791 |
struct scatterlist *sg; |
1da177e4c
|
792 |
int i; |
348145458
|
793 |
BUG_ON(dir == DMA_NONE); |
1da177e4c
|
794 |
|
dbfd49fe9
|
795 |
for_each_sg(sgl, sg, nelems, i) { |
961d7d0ee
|
796 |
phys_addr_t paddr = sg_phys(sg); |
862d196b2
|
797 |
dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); |
bc40ac669
|
798 |
|
cf56e3f2e
|
799 |
if (swiotlb_force || |
b9394647a
|
800 |
!dma_capable(hwdev, dev_addr, sg->length)) { |
bc40ac669
|
801 802 |
void *map = map_single(hwdev, sg_phys(sg), sg->length, dir); |
7e8702334
|
803 |
if (!map) { |
1da177e4c
|
804 805 806 |
/* Don't panic here, we expect map_sg users to do proper error handling. */ swiotlb_full(hwdev, sg->length, dir, 0); |
309df0c50
|
807 808 |
swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, attrs); |
dbfd49fe9
|
809 |
sgl[0].dma_length = 0; |
1da177e4c
|
810 811 |
return 0; } |
70a7d3cc1
|
812 |
sg->dma_address = swiotlb_virt_to_bus(hwdev, map); |
1da177e4c
|
813 814 815 816 817 818 |
} else sg->dma_address = dev_addr; sg->dma_length = sg->length; } return nelems; } |
309df0c50
|
819 820 821 822 823 824 825 826 |
EXPORT_SYMBOL(swiotlb_map_sg_attrs); int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir) { return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); } |
874d6a955
|
827 |
EXPORT_SYMBOL(swiotlb_map_sg); |
1da177e4c
|
828 829 830 |
/* * Unmap a set of streaming mode DMA translations. Again, cpu read rules |
ceb5ac326
|
831 |
* concerning calls here are the same as for swiotlb_unmap_page() above. |
1da177e4c
|
832 833 |
*/ void |
309df0c50
|
834 |
swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, |
160c1d8e4
|
835 |
int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) |
1da177e4c
|
836 |
{ |
dbfd49fe9
|
837 |
struct scatterlist *sg; |
1da177e4c
|
838 |
int i; |
348145458
|
839 |
BUG_ON(dir == DMA_NONE); |
1da177e4c
|
840 |
|
7fcebbd2d
|
841 842 |
for_each_sg(sgl, sg, nelems, i) unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); |
1da177e4c
|
843 |
} |
309df0c50
|
844 845 846 847 848 849 850 851 |
EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir) { return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); } |
874d6a955
|
852 |
EXPORT_SYMBOL(swiotlb_unmap_sg); |
1da177e4c
|
853 854 855 856 857 858 859 860 |
/* * Make physical memory consistent for a set of streaming mode DMA translations * after a transfer. * * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules * and usage. */ |
be6b02678
|
861 |
static void |
dbfd49fe9
|
862 |
swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, |
de69e0f0b
|
863 |
int nelems, int dir, int target) |
1da177e4c
|
864 |
{ |
dbfd49fe9
|
865 |
struct scatterlist *sg; |
1da177e4c
|
866 |
int i; |
380d68783
|
867 868 |
for_each_sg(sgl, sg, nelems, i) swiotlb_sync_single(hwdev, sg->dma_address, |
de69e0f0b
|
869 |
sg->dma_length, dir, target); |
1da177e4c
|
870 871 872 |
} void |
8270f3f1a
|
873 |
swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, |
160c1d8e4
|
874 |
int nelems, enum dma_data_direction dir) |
8270f3f1a
|
875 |
{ |
de69e0f0b
|
876 |
swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); |
8270f3f1a
|
877 |
} |
874d6a955
|
878 |
EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); |
8270f3f1a
|
879 880 |
void |
1da177e4c
|
881 |
swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, |
160c1d8e4
|
882 |
int nelems, enum dma_data_direction dir) |
1da177e4c
|
883 |
{ |
de69e0f0b
|
884 |
swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); |
1da177e4c
|
885 |
} |
874d6a955
|
886 |
EXPORT_SYMBOL(swiotlb_sync_sg_for_device); |
1da177e4c
|
887 888 |
int |
8d8bb39b9
|
889 |
swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) |
1da177e4c
|
890 |
{ |
70a7d3cc1
|
891 |
return (dma_addr == swiotlb_virt_to_bus(hwdev, io_tlb_overflow_buffer)); |
1da177e4c
|
892 |
} |
874d6a955
|
893 |
EXPORT_SYMBOL(swiotlb_dma_mapping_error); |
1da177e4c
|
894 895 |
/* |
17e5ad6c0
|
896 |
* Return whether the given device DMA address mask can be supported |
1da177e4c
|
897 |
* properly. For example, if your device can only drive the low 24-bits |
17e5ad6c0
|
898 |
* during bus mastering, then you would pass 0x00ffffff as the mask to |
1da177e4c
|
899 900 901 |
* this function. */ int |
563aaf064
|
902 |
swiotlb_dma_supported(struct device *hwdev, u64 mask) |
1da177e4c
|
903 |
{ |
70a7d3cc1
|
904 |
return swiotlb_virt_to_bus(hwdev, io_tlb_end - 1) <= mask; |
1da177e4c
|
905 |
} |
1da177e4c
|
906 |
EXPORT_SYMBOL(swiotlb_dma_supported); |