Commit 189d3c4a94ef19fca2a71a6a336e9fda900e25e7
Committed by
Linus Torvalds
1 parent
b6f2fcbcfc
Exists in
master
and in
39 other branches
mm: bdi: allow setting a minimum for the bdi dirty limit
Under normal circumstances each device is given a part of the total write-back cache that relates to its current avg writeout speed in relation to the other devices. min_ratio - allows one to assign a minimum portion of the write-back cache to a particular device. This is useful in situations where you might want to provide a minimum QoS. (One request for this feature came from flash based storage people who wanted to avoid writing out at all costs - they of course needed some pdflush hacks as well) max_ratio - allows one to assign a maximum portion of the dirty limit to a particular device. This is useful in situations where you want to avoid one device taking all or most of the write-back cache. Eg. an NFS mount that is prone to get stuck, or a FUSE mount which you don't trust to play fair. Add "min_ratio" to /sys/class/bdi. This indicates the minimum percentage of the global dirty threshold allocated to this bdi. [mszeredi@suse.cz] - fix parsing in min_ratio_store() - document new sysfs attribute Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 4 changed files with 57 additions and 1 deletions Side-by-side Diff
Documentation/ABI/testing/sysfs-class-bdi
... | ... | @@ -43,4 +43,11 @@ |
43 | 43 | |
44 | 44 | Current threshold on this BDI for reclaimable + writeback |
45 | 45 | memory |
46 | + | |
47 | +min_ratio (read-write) | |
48 | + | |
49 | + Minimal percentage of global dirty threshold allocated to this | |
50 | + bdi. If the value written to this file would make the the sum | |
51 | + of all min_ratio values exceed 100, then EINVAL is returned. | |
52 | + The default is zero |
include/linux/backing-dev.h
... | ... | @@ -51,6 +51,8 @@ |
51 | 51 | struct prop_local_percpu completions; |
52 | 52 | int dirty_exceeded; |
53 | 53 | |
54 | + unsigned int min_ratio; | |
55 | + | |
54 | 56 | struct device *dev; |
55 | 57 | }; |
56 | 58 | |
... | ... | @@ -136,6 +138,8 @@ |
136 | 138 | return 1; |
137 | 139 | #endif |
138 | 140 | } |
141 | + | |
142 | +int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); | |
139 | 143 | |
140 | 144 | /* |
141 | 145 | * Flags in backing_dev_info::capability |
mm/backing-dev.c
... | ... | @@ -55,6 +55,24 @@ |
55 | 55 | BDI_SHOW(dirty_kb, K(get_dirty(bdi, 1))) |
56 | 56 | BDI_SHOW(bdi_dirty_kb, K(get_dirty(bdi, 2))) |
57 | 57 | |
58 | +static ssize_t min_ratio_store(struct device *dev, | |
59 | + struct device_attribute *attr, const char *buf, size_t count) | |
60 | +{ | |
61 | + struct backing_dev_info *bdi = dev_get_drvdata(dev); | |
62 | + char *end; | |
63 | + unsigned int ratio; | |
64 | + ssize_t ret = -EINVAL; | |
65 | + | |
66 | + ratio = simple_strtoul(buf, &end, 10); | |
67 | + if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { | |
68 | + ret = bdi_set_min_ratio(bdi, ratio); | |
69 | + if (!ret) | |
70 | + ret = count; | |
71 | + } | |
72 | + return ret; | |
73 | +} | |
74 | +BDI_SHOW(min_ratio, bdi->min_ratio) | |
75 | + | |
58 | 76 | #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) |
59 | 77 | |
60 | 78 | static struct device_attribute bdi_dev_attrs[] = { |
... | ... | @@ -63,6 +81,7 @@ |
63 | 81 | __ATTR_RO(writeback_kb), |
64 | 82 | __ATTR_RO(dirty_kb), |
65 | 83 | __ATTR_RO(bdi_dirty_kb), |
84 | + __ATTR_RW(min_ratio), | |
66 | 85 | __ATTR_NULL, |
67 | 86 | }; |
68 | 87 | |
... | ... | @@ -126,6 +145,8 @@ |
126 | 145 | int err; |
127 | 146 | |
128 | 147 | bdi->dev = NULL; |
148 | + | |
149 | + bdi->min_ratio = 0; | |
129 | 150 | |
130 | 151 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { |
131 | 152 | err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); |
mm/page-writeback.c
... | ... | @@ -243,6 +243,29 @@ |
243 | 243 | } |
244 | 244 | |
245 | 245 | /* |
246 | + * | |
247 | + */ | |
248 | +static DEFINE_SPINLOCK(bdi_lock); | |
249 | +static unsigned int bdi_min_ratio; | |
250 | + | |
251 | +int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | |
252 | +{ | |
253 | + int ret = 0; | |
254 | + unsigned long flags; | |
255 | + | |
256 | + spin_lock_irqsave(&bdi_lock, flags); | |
257 | + min_ratio -= bdi->min_ratio; | |
258 | + if (bdi_min_ratio + min_ratio < 100) { | |
259 | + bdi_min_ratio += min_ratio; | |
260 | + bdi->min_ratio += min_ratio; | |
261 | + } else | |
262 | + ret = -EINVAL; | |
263 | + spin_unlock_irqrestore(&bdi_lock, flags); | |
264 | + | |
265 | + return ret; | |
266 | +} | |
267 | + | |
268 | +/* | |
246 | 269 | * Work out the current dirty-memory clamping and background writeout |
247 | 270 | * thresholds. |
248 | 271 | * |
... | ... | @@ -330,7 +353,7 @@ |
330 | 353 | *pdirty = dirty; |
331 | 354 | |
332 | 355 | if (bdi) { |
333 | - u64 bdi_dirty = dirty; | |
356 | + u64 bdi_dirty; | |
334 | 357 | long numerator, denominator; |
335 | 358 | |
336 | 359 | /* |
337 | 360 | |
... | ... | @@ -338,8 +361,10 @@ |
338 | 361 | */ |
339 | 362 | bdi_writeout_fraction(bdi, &numerator, &denominator); |
340 | 363 | |
364 | + bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100; | |
341 | 365 | bdi_dirty *= numerator; |
342 | 366 | do_div(bdi_dirty, denominator); |
367 | + bdi_dirty += (dirty * bdi->min_ratio) / 100; | |
343 | 368 | |
344 | 369 | *pbdi_dirty = bdi_dirty; |
345 | 370 | clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); |