pool: Improve memory usage for devices which can't cross boundaries

The previous implementation simply refused to allocate more than a boundary's worth of data from an entire page. Some users didn't know this, so specified things like SMP_CACHE_BYTES, not realising the horrible waste of memory that this was. It's fairly easy to correct this problem, just by ensuring we don't cross a boundary within a page. This even helps drivers like EHCI (which can't cross a 4k boundary) on machines with larger page sizes. Signed-off-by: Matthew Wilcox <willy@linux.intel.com> Acked-by: David S. Miller <davem@davemloft.net>

pool: Improve memory usage for devices which can't cross boundaries
The previous implementation simply refused to allocate more than a boundary's worth of data from an entire page. Some users didn't know this, so specified things like SMP_CACHE_BYTES, not realising the horrible waste of memory that this was. It's fairly easy to correct this problem, just by ensuring we don't cross a boundary within a page. This even helps drivers like EHCI (which can't cross a 4k boundary) on machines with larger page sizes. Signed-off-by: Matthew Wilcox <willy@linux.intel.com> Acked-by: David S. Miller <davem@davemloft.net>
Matthew Wilcox
1 parent a35a345514
Showing 1 changed file with 20 additions and 16 deletions Side-by-side Diff
mm/dmapool.c
@@ -43,6 +43,7 @@
 	size_t size;
 	struct device *dev;
 	size_t allocation;
+	size_t boundary;
 	char name[32];
 	wait_queue_head_t waitq;
 	struct list_head pools;
@@ -107,7 +108,7 @@
  * @dev: device that will be doing the DMA
  * @size: size of the blocks in this pool.
  * @align: alignment requirement for blocks; must be a power of two
- * @allocation: returned blocks won't cross this boundary (or zero)
+ * @boundary: returned blocks won't cross this power of two boundary
  * Context: !in_interrupt()
  *
  * Returns a dma allocation pool with the requested characteristics, or
  
  
@@ -117,15 +118,16 @@
  * cache flushing primitives.  The actual size of blocks allocated may be
  * larger than requested because of alignment.
  *
- * If allocation is nonzero, objects returned from dma_pool_alloc() won't
+ * If @boundary is nonzero, objects returned from dma_pool_alloc() won't
  * cross that size boundary.  This is useful for devices which have
  * addressing restrictions on individual DMA transfers, such as not crossing
  * boundaries of 4KBytes.
  */
 struct dma_pool *dma_pool_create(const char *name, struct device *dev,
-				 size_t size, size_t align, size_t allocation)
+				 size_t size, size_t align, size_t boundary)
 {
 	struct dma_pool *retval;
+	size_t allocation;
  
 	if (align == 0) {
 		align = 1;
  
  
  
  
@@ -142,27 +144,26 @@
 	if ((size % align) != 0)
 		size = ALIGN(size, align);
  
-	if (allocation == 0) {
-		if (PAGE_SIZE < size)
-			allocation = size;
-		else
-			allocation = PAGE_SIZE;
-		/* FIXME: round up for less fragmentation */
-	} else if (allocation < size)
+	allocation = max_t(size_t, size, PAGE_SIZE);
+
+	if (!boundary) {
+		boundary = allocation;
+	} else if ((boundary < size) || (boundary & (boundary - 1))) {
 		return NULL;
+	}
  
-	if (!
-	    (retval =
-	     kmalloc_node(sizeof *retval, GFP_KERNEL, dev_to_node(dev))))
+	retval = kmalloc_node(sizeof(*retval), GFP_KERNEL, dev_to_node(dev));
+	if (!retval)
 		return retval;
  
-	strlcpy(retval->name, name, sizeof retval->name);
+	strlcpy(retval->name, name, sizeof(retval->name));
  
 	retval->dev = dev;
  
 	INIT_LIST_HEAD(&retval->page_list);
 	spin_lock_init(&retval->lock);
 	retval->size = size;
+	retval->boundary = boundary;
 	retval->allocation = allocation;
 	init_waitqueue_head(&retval->waitq);
  
  
@@ -192,11 +193,14 @@
 static void pool_initialise_page(struct dma_pool *pool, struct dma_page *page)
 {
 	unsigned int offset = 0;
+	unsigned int next_boundary = pool->boundary;
  
 	do {
 		unsigned int next = offset + pool->size;
-		if (unlikely((next + pool->size) >= pool->allocation))
-			next = pool->allocation;
+		if (unlikely((next + pool->size) >= next_boundary)) {
+			next = next_boundary;
+			next_boundary += pool->boundary;
+		}
 		*(int *)(page->vaddr + offset) = next;
 		offset = next;
 	} while (offset < pool->allocation);
...	...	@@ -43,6 +43,7 @@
43	43	size_t size;
44	44	struct device *dev;
45	45	size_t allocation;
	46	+ size_t boundary;
46	47	char name[32];
47	48	wait_queue_head_t waitq;
48	49	struct list_head pools;
...	...	@@ -107,7 +108,7 @@
107	108	* @dev: device that will be doing the DMA
108	109	* @size: size of the blocks in this pool.
109	110	* @align: alignment requirement for blocks; must be a power of two
110		- * @allocation: returned blocks won't cross this boundary (or zero)
	111	+ * @boundary: returned blocks won't cross this power of two boundary
111	112	* Context: !in_interrupt()
112	113	*
113	114	* Returns a dma allocation pool with the requested characteristics, or
114	115
115	116
...	...	@@ -117,15 +118,16 @@
117	118	* cache flushing primitives. The actual size of blocks allocated may be
118	119	* larger than requested because of alignment.
119	120	*
120		- * If allocation is nonzero, objects returned from dma_pool_alloc() won't
	121	+ * If @boundary is nonzero, objects returned from dma_pool_alloc() won't
121	122	* cross that size boundary. This is useful for devices which have
122	123	* addressing restrictions on individual DMA transfers, such as not crossing
123	124	* boundaries of 4KBytes.
124	125	*/
125	126	struct dma_pool dma_pool_create(const char name, struct device *dev,
126		- size_t size, size_t align, size_t allocation)
	127	+ size_t size, size_t align, size_t boundary)
127	128	{
128	129	struct dma_pool *retval;
	130	+ size_t allocation;
129	131
130	132	if (align == 0) {
131	133	align = 1;
132	134
133	135
134	136
135	137
...	...	@@ -142,27 +144,26 @@
142	144	if ((size % align) != 0)
143	145	size = ALIGN(size, align);
144	146
145		- if (allocation == 0) {
146		- if (PAGE_SIZE < size)
147		- allocation = size;
148		- else
149		- allocation = PAGE_SIZE;
150		- /* FIXME: round up for less fragmentation */
151		- } else if (allocation < size)
	147	+ allocation = max_t(size_t, size, PAGE_SIZE);
	148	+
	149	+ if (!boundary) {
	150	+ boundary = allocation;
	151	+ } else if ((boundary < size) \|\| (boundary & (boundary - 1))) {
152	152	return NULL;
	153	+ }
153	154
154		- if (!
155		- (retval =
156		- kmalloc_node(sizeof *retval, GFP_KERNEL, dev_to_node(dev))))
	155	+ retval = kmalloc_node(sizeof(*retval), GFP_KERNEL, dev_to_node(dev));
	156	+ if (!retval)
157	157	return retval;
158	158
159		- strlcpy(retval->name, name, sizeof retval->name);
	159	+ strlcpy(retval->name, name, sizeof(retval->name));
160	160
161	161	retval->dev = dev;
162	162
163	163	INIT_LIST_HEAD(&retval->page_list);
164	164	spin_lock_init(&retval->lock);
165	165	retval->size = size;
	166	+ retval->boundary = boundary;
166	167	retval->allocation = allocation;
167	168	init_waitqueue_head(&retval->waitq);
168	169
169	170
...	...	@@ -192,11 +193,14 @@
192	193	static void pool_initialise_page(struct dma_pool pool, struct dma_page page)
193	194	{
194	195	unsigned int offset = 0;
	196	+ unsigned int next_boundary = pool->boundary;
195	197
196	198	do {
197	199	unsigned int next = offset + pool->size;
198		- if (unlikely((next + pool->size) >= pool->allocation))
199		- next = pool->allocation;
	200	+ if (unlikely((next + pool->size) >= next_boundary)) {
	201	+ next = next_boundary;
	202	+ next_boundary += pool->boundary;
	203	+ }
200	204	(int )(page->vaddr + offset) = next;
201	205	offset = next;
202	206	} while (offset < pool->allocation);