Commit 10cef6029502915bdb3cf0821d425cf9dc30c817

Authored by Matt Mackall
Committed by Linus Torvalds
1 parent 30992c97ae

[PATCH] slob: introduce the SLOB allocator

configurable replacement for slab allocator

This adds a CONFIG_SLAB option under CONFIG_EMBEDDED.  When CONFIG_SLAB is
disabled, the kernel falls back to using the 'SLOB' allocator.

SLOB is a traditional K&R/UNIX allocator with a SLAB emulation layer,
similar to the original Linux kmalloc allocator that SLAB replaced.  It's
signicantly smaller code and is more memory efficient.  But like all
similar allocators, it scales poorly and suffers from fragmentation more
than SLAB, so it's only appropriate for small systems.

It's been tested extensively in the Linux-tiny tree.  I've also
stress-tested it with make -j 8 compiles on a 3G SMP+PREEMPT box (not
recommended).

Here's a comparison for otherwise identical builds, showing SLOB saving
nearly half a megabyte of RAM:

$ size vmlinux*
   text    data     bss     dec     hex filename
3336372  529360  190812 4056544  3de5e0 vmlinux-slab
3323208  527948  190684 4041840  3dac70 vmlinux-slob

$ size mm/{slab,slob}.o
   text    data     bss     dec     hex filename
  13221     752      48   14021    36c5 mm/slab.o
   1896      52       8    1956     7a4 mm/slob.o

/proc/meminfo:
                  SLAB          SLOB      delta
MemTotal:        27964 kB      27980 kB     +16 kB
MemFree:         24596 kB      25092 kB    +496 kB
Buffers:            36 kB         36 kB       0 kB
Cached:           1188 kB       1188 kB       0 kB
SwapCached:          0 kB          0 kB       0 kB
Active:            608 kB        600 kB      -8 kB
Inactive:          808 kB        812 kB      +4 kB
HighTotal:           0 kB          0 kB       0 kB
HighFree:            0 kB          0 kB       0 kB
LowTotal:        27964 kB      27980 kB     +16 kB
LowFree:         24596 kB      25092 kB    +496 kB
SwapTotal:           0 kB          0 kB       0 kB
SwapFree:            0 kB          0 kB       0 kB
Dirty:               4 kB         12 kB      +8 kB
Writeback:           0 kB          0 kB       0 kB
Mapped:            560 kB        556 kB      -4 kB
Slab:             1756 kB          0 kB   -1756 kB
CommitLimit:     13980 kB      13988 kB      +8 kB
Committed_AS:     4208 kB       4208 kB       0 kB
PageTables:         28 kB         28 kB       0 kB
VmallocTotal:  1007312 kB    1007312 kB       0 kB
VmallocUsed:        48 kB         48 kB       0 kB
VmallocChunk:  1007264 kB    1007264 kB       0 kB

(this work has been sponsored in part by CELF)

From: Ingo Molnar <mingo@elte.hu>

   Fix 32-bitness bugs in mm/slob.c.

Signed-off-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 5 changed files with 440 additions and 1 deletions Side-by-side Diff

... ... @@ -323,6 +323,7 @@
323 323 };
324 324 #endif
325 325  
  326 +#ifdef CONFIG_SLAB
326 327 extern struct seq_operations slabinfo_op;
327 328 extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *);
328 329 static int slabinfo_open(struct inode *inode, struct file *file)
... ... @@ -336,6 +337,7 @@
336 337 .llseek = seq_lseek,
337 338 .release = seq_release,
338 339 };
  340 +#endif
339 341  
340 342 static int show_stat(struct seq_file *p, void *v)
341 343 {
342 344  
... ... @@ -600,7 +602,9 @@
600 602 create_seq_entry("partitions", 0, &proc_partitions_operations);
601 603 create_seq_entry("stat", 0, &proc_stat_operations);
602 604 create_seq_entry("interrupts", 0, &proc_interrupts_operations);
  605 +#ifdef CONFIG_SLAB
603 606 create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
  607 +#endif
604 608 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
605 609 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
606 610 create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
include/linux/slab.h
... ... @@ -53,6 +53,8 @@
53 53 #define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't sleep */
54 54 #define SLAB_CTOR_VERIFY 0x004UL /* tell constructor it's a verify call */
55 55  
  56 +#ifndef CONFIG_SLOB
  57 +
56 58 /* prototypes */
57 59 extern void __init kmem_cache_init(void);
58 60  
... ... @@ -133,6 +135,39 @@
133 135  
134 136 extern int FASTCALL(kmem_cache_reap(int));
135 137 extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr));
  138 +
  139 +#else /* CONFIG_SLOB */
  140 +
  141 +/* SLOB allocator routines */
  142 +
  143 +void kmem_cache_init(void);
  144 +struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags);
  145 +struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t,
  146 + unsigned long,
  147 + void (*)(void *, struct kmem_cache *, unsigned long),
  148 + void (*)(void *, struct kmem_cache *, unsigned long));
  149 +int kmem_cache_destroy(struct kmem_cache *c);
  150 +void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags);
  151 +void kmem_cache_free(struct kmem_cache *c, void *b);
  152 +const char *kmem_cache_name(struct kmem_cache *);
  153 +void *kmalloc(size_t size, gfp_t flags);
  154 +void *kzalloc(size_t size, gfp_t flags);
  155 +void kfree(const void *m);
  156 +unsigned int ksize(const void *m);
  157 +unsigned int kmem_cache_size(struct kmem_cache *c);
  158 +
  159 +static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
  160 +{
  161 + return kzalloc(n * size, flags);
  162 +}
  163 +
  164 +#define kmem_cache_shrink(d) (0)
  165 +#define kmem_cache_reap(a)
  166 +#define kmem_ptr_validate(a, b) (0)
  167 +#define kmem_cache_alloc_node(c, f, n) kmem_cache_alloc(c, f)
  168 +#define kmalloc_node(s, f, n) kmalloc(s, f)
  169 +
  170 +#endif /* CONFIG_SLOB */
136 171  
137 172 /* System wide caches */
138 173 extern kmem_cache_t *vm_area_cachep;
... ... @@ -380,6 +380,15 @@
380 380 no dummy operations need be executed.
381 381 Zero means use compiler's default.
382 382  
  383 +config SLAB
  384 + default y
  385 + bool "Use full SLAB allocator" if EMBEDDED
  386 + help
  387 + Disabling this replaces the advanced SLAB allocator and
  388 + kmalloc support with the drastically simpler SLOB allocator.
  389 + SLOB is more space efficient but does not scale well and is
  390 + more susceptible to fragmentation.
  391 +
383 392 endmenu # General setup
384 393  
385 394 config TINY_SHMEM
... ... @@ -390,6 +399,10 @@
390 399 int
391 400 default 0 if BASE_FULL
392 401 default 1 if !BASE_FULL
  402 +
  403 +config SLOB
  404 + default !SLAB
  405 + bool
393 406  
394 407 menu "Loadable module support"
395 408  
... ... @@ -9,7 +9,7 @@
9 9  
10 10 obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
11 11 page_alloc.o page-writeback.o pdflush.o \
12   - readahead.o slab.o swap.o truncate.o vmscan.o \
  12 + readahead.o swap.o truncate.o vmscan.o \
13 13 prio_tree.o util.o $(mmu-y)
14 14  
15 15 obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
... ... @@ -18,6 +18,8 @@
18 18 obj-$(CONFIG_SPARSEMEM) += sparse.o
19 19 obj-$(CONFIG_SHMEM) += shmem.o
20 20 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
  21 +obj-$(CONFIG_SLOB) += slob.o
  22 +obj-$(CONFIG_SLAB) += slab.o
21 23 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
22 24 obj-$(CONFIG_FS_XIP) += filemap_xip.o
  1 +/*
  2 + * SLOB Allocator: Simple List Of Blocks
  3 + *
  4 + * Matt Mackall <mpm@selenic.com> 12/30/03
  5 + *
  6 + * How SLOB works:
  7 + *
  8 + * The core of SLOB is a traditional K&R style heap allocator, with
  9 + * support for returning aligned objects. The granularity of this
  10 + * allocator is 8 bytes on x86, though it's perhaps possible to reduce
  11 + * this to 4 if it's deemed worth the effort. The slob heap is a
  12 + * singly-linked list of pages from __get_free_page, grown on demand
  13 + * and allocation from the heap is currently first-fit.
  14 + *
  15 + * Above this is an implementation of kmalloc/kfree. Blocks returned
  16 + * from kmalloc are 8-byte aligned and prepended with a 8-byte header.
  17 + * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
  18 + * __get_free_pages directly so that it can return page-aligned blocks
  19 + * and keeps a linked list of such pages and their orders. These
  20 + * objects are detected in kfree() by their page alignment.
  21 + *
  22 + * SLAB is emulated on top of SLOB by simply calling constructors and
  23 + * destructors for every SLAB allocation. Objects are returned with
  24 + * the 8-byte alignment unless the SLAB_MUST_HWCACHE_ALIGN flag is
  25 + * set, in which case the low-level allocator will fragment blocks to
  26 + * create the proper alignment. Again, objects of page-size or greater
  27 + * are allocated by calling __get_free_pages. As SLAB objects know
  28 + * their size, no separate size bookkeeping is necessary and there is
  29 + * essentially no allocation space overhead.
  30 + */
  31 +
  32 +#include <linux/config.h>
  33 +#include <linux/slab.h>
  34 +#include <linux/mm.h>
  35 +#include <linux/cache.h>
  36 +#include <linux/init.h>
  37 +#include <linux/module.h>
  38 +#include <linux/timer.h>
  39 +
  40 +struct slob_block {
  41 + int units;
  42 + struct slob_block *next;
  43 +};
  44 +typedef struct slob_block slob_t;
  45 +
  46 +#define SLOB_UNIT sizeof(slob_t)
  47 +#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
  48 +#define SLOB_ALIGN L1_CACHE_BYTES
  49 +
  50 +struct bigblock {
  51 + int order;
  52 + void *pages;
  53 + struct bigblock *next;
  54 +};
  55 +typedef struct bigblock bigblock_t;
  56 +
  57 +static slob_t arena = { .next = &arena, .units = 1 };
  58 +static slob_t *slobfree = &arena;
  59 +static bigblock_t *bigblocks;
  60 +static DEFINE_SPINLOCK(slob_lock);
  61 +static DEFINE_SPINLOCK(block_lock);
  62 +
  63 +static void slob_free(void *b, int size);
  64 +
  65 +static void *slob_alloc(size_t size, gfp_t gfp, int align)
  66 +{
  67 + slob_t *prev, *cur, *aligned = 0;
  68 + int delta = 0, units = SLOB_UNITS(size);
  69 + unsigned long flags;
  70 +
  71 + spin_lock_irqsave(&slob_lock, flags);
  72 + prev = slobfree;
  73 + for (cur = prev->next; ; prev = cur, cur = cur->next) {
  74 + if (align) {
  75 + aligned = (slob_t *)ALIGN((unsigned long)cur, align);
  76 + delta = aligned - cur;
  77 + }
  78 + if (cur->units >= units + delta) { /* room enough? */
  79 + if (delta) { /* need to fragment head to align? */
  80 + aligned->units = cur->units - delta;
  81 + aligned->next = cur->next;
  82 + cur->next = aligned;
  83 + cur->units = delta;
  84 + prev = cur;
  85 + cur = aligned;
  86 + }
  87 +
  88 + if (cur->units == units) /* exact fit? */
  89 + prev->next = cur->next; /* unlink */
  90 + else { /* fragment */
  91 + prev->next = cur + units;
  92 + prev->next->units = cur->units - units;
  93 + prev->next->next = cur->next;
  94 + cur->units = units;
  95 + }
  96 +
  97 + slobfree = prev;
  98 + spin_unlock_irqrestore(&slob_lock, flags);
  99 + return cur;
  100 + }
  101 + if (cur == slobfree) {
  102 + spin_unlock_irqrestore(&slob_lock, flags);
  103 +
  104 + if (size == PAGE_SIZE) /* trying to shrink arena? */
  105 + return 0;
  106 +
  107 + cur = (slob_t *)__get_free_page(gfp);
  108 + if (!cur)
  109 + return 0;
  110 +
  111 + slob_free(cur, PAGE_SIZE);
  112 + spin_lock_irqsave(&slob_lock, flags);
  113 + cur = slobfree;
  114 + }
  115 + }
  116 +}
  117 +
  118 +static void slob_free(void *block, int size)
  119 +{
  120 + slob_t *cur, *b = (slob_t *)block;
  121 + unsigned long flags;
  122 +
  123 + if (!block)
  124 + return;
  125 +
  126 + if (size)
  127 + b->units = SLOB_UNITS(size);
  128 +
  129 + /* Find reinsertion point */
  130 + spin_lock_irqsave(&slob_lock, flags);
  131 + for (cur = slobfree; !(b > cur && b < cur->next); cur = cur->next)
  132 + if (cur >= cur->next && (b > cur || b < cur->next))
  133 + break;
  134 +
  135 + if (b + b->units == cur->next) {
  136 + b->units += cur->next->units;
  137 + b->next = cur->next->next;
  138 + } else
  139 + b->next = cur->next;
  140 +
  141 + if (cur + cur->units == b) {
  142 + cur->units += b->units;
  143 + cur->next = b->next;
  144 + } else
  145 + cur->next = b;
  146 +
  147 + slobfree = cur;
  148 +
  149 + spin_unlock_irqrestore(&slob_lock, flags);
  150 +}
  151 +
  152 +static int FASTCALL(find_order(int size));
  153 +static int fastcall find_order(int size)
  154 +{
  155 + int order = 0;
  156 + for ( ; size > 4096 ; size >>=1)
  157 + order++;
  158 + return order;
  159 +}
  160 +
  161 +void *kmalloc(size_t size, gfp_t gfp)
  162 +{
  163 + slob_t *m;
  164 + bigblock_t *bb;
  165 + unsigned long flags;
  166 +
  167 + if (size < PAGE_SIZE - SLOB_UNIT) {
  168 + m = slob_alloc(size + SLOB_UNIT, gfp, 0);
  169 + return m ? (void *)(m + 1) : 0;
  170 + }
  171 +
  172 + bb = slob_alloc(sizeof(bigblock_t), gfp, 0);
  173 + if (!bb)
  174 + return 0;
  175 +
  176 + bb->order = find_order(size);
  177 + bb->pages = (void *)__get_free_pages(gfp, bb->order);
  178 +
  179 + if (bb->pages) {
  180 + spin_lock_irqsave(&block_lock, flags);
  181 + bb->next = bigblocks;
  182 + bigblocks = bb;
  183 + spin_unlock_irqrestore(&block_lock, flags);
  184 + return bb->pages;
  185 + }
  186 +
  187 + slob_free(bb, sizeof(bigblock_t));
  188 + return 0;
  189 +}
  190 +
  191 +EXPORT_SYMBOL(kmalloc);
  192 +
  193 +void kfree(const void *block)
  194 +{
  195 + bigblock_t *bb, **last = &bigblocks;
  196 + unsigned long flags;
  197 +
  198 + if (!block)
  199 + return;
  200 +
  201 + if (!((unsigned long)block & (PAGE_SIZE-1))) {
  202 + /* might be on the big block list */
  203 + spin_lock_irqsave(&block_lock, flags);
  204 + for (bb = bigblocks; bb; last = &bb->next, bb = bb->next) {
  205 + if (bb->pages == block) {
  206 + *last = bb->next;
  207 + spin_unlock_irqrestore(&block_lock, flags);
  208 + free_pages((unsigned long)block, bb->order);
  209 + slob_free(bb, sizeof(bigblock_t));
  210 + return;
  211 + }
  212 + }
  213 + spin_unlock_irqrestore(&block_lock, flags);
  214 + }
  215 +
  216 + slob_free((slob_t *)block - 1, 0);
  217 + return;
  218 +}
  219 +
  220 +EXPORT_SYMBOL(kfree);
  221 +
  222 +unsigned int ksize(const void *block)
  223 +{
  224 + bigblock_t *bb;
  225 + unsigned long flags;
  226 +
  227 + if (!block)
  228 + return 0;
  229 +
  230 + if (!((unsigned long)block & (PAGE_SIZE-1))) {
  231 + spin_lock_irqsave(&block_lock, flags);
  232 + for (bb = bigblocks; bb; bb = bb->next)
  233 + if (bb->pages == block) {
  234 + spin_unlock_irqrestore(&slob_lock, flags);
  235 + return PAGE_SIZE << bb->order;
  236 + }
  237 + spin_unlock_irqrestore(&block_lock, flags);
  238 + }
  239 +
  240 + return ((slob_t *)block - 1)->units * SLOB_UNIT;
  241 +}
  242 +
  243 +struct kmem_cache {
  244 + unsigned int size, align;
  245 + const char *name;
  246 + void (*ctor)(void *, struct kmem_cache *, unsigned long);
  247 + void (*dtor)(void *, struct kmem_cache *, unsigned long);
  248 +};
  249 +
  250 +struct kmem_cache *kmem_cache_create(const char *name, size_t size,
  251 + size_t align, unsigned long flags,
  252 + void (*ctor)(void*, struct kmem_cache *, unsigned long),
  253 + void (*dtor)(void*, struct kmem_cache *, unsigned long))
  254 +{
  255 + struct kmem_cache *c;
  256 +
  257 + c = slob_alloc(sizeof(struct kmem_cache), flags, 0);
  258 +
  259 + if (c) {
  260 + c->name = name;
  261 + c->size = size;
  262 + c->ctor = ctor;
  263 + c->dtor = dtor;
  264 + /* ignore alignment unless it's forced */
  265 + c->align = (flags & SLAB_MUST_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
  266 + if (c->align < align)
  267 + c->align = align;
  268 + }
  269 +
  270 + return c;
  271 +}
  272 +EXPORT_SYMBOL(kmem_cache_create);
  273 +
  274 +int kmem_cache_destroy(struct kmem_cache *c)
  275 +{
  276 + slob_free(c, sizeof(struct kmem_cache));
  277 + return 0;
  278 +}
  279 +EXPORT_SYMBOL(kmem_cache_destroy);
  280 +
  281 +void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags)
  282 +{
  283 + void *b;
  284 +
  285 + if (c->size < PAGE_SIZE)
  286 + b = slob_alloc(c->size, flags, c->align);
  287 + else
  288 + b = (void *)__get_free_pages(flags, find_order(c->size));
  289 +
  290 + if (c->ctor)
  291 + c->ctor(b, c, SLAB_CTOR_CONSTRUCTOR);
  292 +
  293 + return b;
  294 +}
  295 +EXPORT_SYMBOL(kmem_cache_alloc);
  296 +
  297 +void kmem_cache_free(struct kmem_cache *c, void *b)
  298 +{
  299 + if (c->dtor)
  300 + c->dtor(b, c, 0);
  301 +
  302 + if (c->size < PAGE_SIZE)
  303 + slob_free(b, c->size);
  304 + else
  305 + free_pages((unsigned long)b, find_order(c->size));
  306 +}
  307 +EXPORT_SYMBOL(kmem_cache_free);
  308 +
  309 +unsigned int kmem_cache_size(struct kmem_cache *c)
  310 +{
  311 + return c->size;
  312 +}
  313 +EXPORT_SYMBOL(kmem_cache_size);
  314 +
  315 +const char *kmem_cache_name(struct kmem_cache *c)
  316 +{
  317 + return c->name;
  318 +}
  319 +EXPORT_SYMBOL(kmem_cache_name);
  320 +
  321 +static struct timer_list slob_timer = TIMER_INITIALIZER(
  322 + (void (*)(unsigned long))kmem_cache_init, 0, 0);
  323 +
  324 +void kmem_cache_init(void)
  325 +{
  326 + void *p = slob_alloc(PAGE_SIZE, 0, PAGE_SIZE-1);
  327 +
  328 + if (p)
  329 + free_page((unsigned long)p);
  330 +
  331 + mod_timer(&slob_timer, jiffies + HZ);
  332 +}
  333 +
  334 +atomic_t slab_reclaim_pages = ATOMIC_INIT(0);
  335 +EXPORT_SYMBOL(slab_reclaim_pages);
  336 +
  337 +#ifdef CONFIG_SMP
  338 +
  339 +void *__alloc_percpu(size_t size, size_t align)
  340 +{
  341 + int i;
  342 + struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL);
  343 +
  344 + if (!pdata)
  345 + return NULL;
  346 +
  347 + for (i = 0; i < NR_CPUS; i++) {
  348 + if (!cpu_possible(i))
  349 + continue;
  350 + pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
  351 + if (!pdata->ptrs[i])
  352 + goto unwind_oom;
  353 + memset(pdata->ptrs[i], 0, size);
  354 + }
  355 +
  356 + /* Catch derefs w/o wrappers */
  357 + return (void *) (~(unsigned long) pdata);
  358 +
  359 +unwind_oom:
  360 + while (--i >= 0) {
  361 + if (!cpu_possible(i))
  362 + continue;
  363 + kfree(pdata->ptrs[i]);
  364 + }
  365 + kfree(pdata);
  366 + return NULL;
  367 +}
  368 +EXPORT_SYMBOL(__alloc_percpu);
  369 +
  370 +void
  371 +free_percpu(const void *objp)
  372 +{
  373 + int i;
  374 + struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp);
  375 +
  376 + for (i = 0; i < NR_CPUS; i++) {
  377 + if (!cpu_possible(i))
  378 + continue;
  379 + kfree(p->ptrs[i]);
  380 + }
  381 + kfree(p);
  382 +}
  383 +EXPORT_SYMBOL(free_percpu);
  384 +
  385 +#endif