Blame view
mm/page-writeback.c
86.8 KB
457c89965 treewide: Add SPD... |
1 |
// SPDX-License-Identifier: GPL-2.0-only |
1da177e4c Linux-2.6.12-rc2 |
2 |
/* |
f30c22695 fix file specific... |
3 |
* mm/page-writeback.c |
1da177e4c Linux-2.6.12-rc2 |
4 5 |
* * Copyright (C) 2002, Linus Torvalds. |
90eec103b treewide: Remove ... |
6 |
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra |
1da177e4c Linux-2.6.12-rc2 |
7 8 9 10 |
* * Contains functions related to writing back dirty pages at the * address_space level. * |
e1f8e8744 Remove Andrew Mor... |
11 |
* 10Apr2002 Andrew Morton |
1da177e4c Linux-2.6.12-rc2 |
12 13 14 15 |
* Initial version */ #include <linux/kernel.h> |
b95f1b31b mm: Map most file... |
16 |
#include <linux/export.h> |
1da177e4c Linux-2.6.12-rc2 |
17 18 19 20 21 22 23 24 25 |
#include <linux/spinlock.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/writeback.h> #include <linux/init.h> #include <linux/backing-dev.h> |
55e829af0 [PATCH] io-accoun... |
26 |
#include <linux/task_io_accounting_ops.h> |
1da177e4c Linux-2.6.12-rc2 |
27 28 |
#include <linux/blkdev.h> #include <linux/mpage.h> |
d08b3851d [PATCH] mm: track... |
29 |
#include <linux/rmap.h> |
1da177e4c Linux-2.6.12-rc2 |
30 |
#include <linux/percpu.h> |
1da177e4c Linux-2.6.12-rc2 |
31 32 33 34 |
#include <linux/smp.h> #include <linux/sysctl.h> #include <linux/cpu.h> #include <linux/syscalls.h> |
811d736f9 [PATCH] BLOCK: Di... |
35 |
#include <linux/pagevec.h> |
eb608e3a3 block: Convert BD... |
36 |
#include <linux/timer.h> |
8bd75c77b sched/rt: Move rt... |
37 |
#include <linux/sched/rt.h> |
f361bf4a6 sched/headers: Pr... |
38 |
#include <linux/sched/signal.h> |
6e543d578 mm: vmscan: fix d... |
39 |
#include <linux/mm_inline.h> |
028c2dd18 writeback: Add tr... |
40 |
#include <trace/events/writeback.h> |
1da177e4c Linux-2.6.12-rc2 |
41 |
|
6e543d578 mm: vmscan: fix d... |
42 |
#include "internal.h" |
1da177e4c Linux-2.6.12-rc2 |
43 |
/* |
ffd1f609a writeback: introd... |
44 45 46 47 48 |
* Sleep at most 200ms at a time in balance_dirty_pages(). */ #define MAX_PAUSE max(HZ/5, 1) /* |
5b9b35743 writeback: avoid ... |
49 50 51 52 53 54 |
* Try to keep balance_dirty_pages() call intervals higher than this many pages * by raising pause time to max_pause when falls below it. */ #define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10)) /* |
e98be2d59 writeback: bdi wr... |
55 56 57 |
* Estimate write bandwidth at 200ms intervals. */ #define BANDWIDTH_INTERVAL max(HZ/5, 1) |
6c14ae1e9 writeback: dirty ... |
58 |
#define RATELIMIT_CALC_SHIFT 10 |
e98be2d59 writeback: bdi wr... |
59 |
/* |
1da177e4c Linux-2.6.12-rc2 |
60 61 62 63 |
* After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited * will look to see if it needs to force writeback or throttling. */ static long ratelimit_pages = 32; |
1da177e4c Linux-2.6.12-rc2 |
64 65 66 |
/* The following parameters are exported via /proc/sys/vm */ /* |
5b0830cb9 writeback: get ri... |
67 |
* Start background writeback (via writeback threads) at this percentage |
1da177e4c Linux-2.6.12-rc2 |
68 |
*/ |
1b5e62b42 writeback: double... |
69 |
int dirty_background_ratio = 10; |
1da177e4c Linux-2.6.12-rc2 |
70 71 |
/* |
2da02997e mm: add dirty_bac... |
72 73 74 75 76 77 |
* dirty_background_bytes starts at 0 (disabled) so that it is a function of * dirty_background_ratio * the amount of dirtyable memory */ unsigned long dirty_background_bytes; /* |
195cf453d mm/page-writeback... |
78 79 80 81 82 83 |
* free highmem will not be subtracted from the total free memory * for calculating free ratios if vm_highmem_is_dirtyable is true */ int vm_highmem_is_dirtyable; /* |
1da177e4c Linux-2.6.12-rc2 |
84 85 |
* The generator of dirty data starts writeback at this percentage */ |
1b5e62b42 writeback: double... |
86 |
int vm_dirty_ratio = 20; |
1da177e4c Linux-2.6.12-rc2 |
87 88 |
/* |
2da02997e mm: add dirty_bac... |
89 90 91 92 93 94 |
* vm_dirty_bytes starts at 0 (disabled) so that it is a function of * vm_dirty_ratio * the amount of dirtyable memory */ unsigned long vm_dirty_bytes; /* |
704503d83 mm: fix proc_doin... |
95 |
* The interval between `kupdate'-style writebacks |
1da177e4c Linux-2.6.12-rc2 |
96 |
*/ |
22ef37eed page-writeback: f... |
97 |
unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */ |
1da177e4c Linux-2.6.12-rc2 |
98 |
|
91913a294 mm: export dirty_... |
99 |
EXPORT_SYMBOL_GPL(dirty_writeback_interval); |
1da177e4c Linux-2.6.12-rc2 |
100 |
/* |
704503d83 mm: fix proc_doin... |
101 |
* The longest time for which data is allowed to remain dirty |
1da177e4c Linux-2.6.12-rc2 |
102 |
*/ |
22ef37eed page-writeback: f... |
103 |
unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */ |
1da177e4c Linux-2.6.12-rc2 |
104 105 |
/* |
ed5b43f15 [PATCH] Represent... |
106 107 |
* Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies: * a full sync is triggered after this time elapses without any disk activity. |
1da177e4c Linux-2.6.12-rc2 |
108 109 110 111 112 113 |
*/ int laptop_mode; EXPORT_SYMBOL(laptop_mode); /* End of sysctl-exported parameters */ |
dcc25ae76 writeback: move g... |
114 |
struct wb_domain global_wb_domain; |
1da177e4c Linux-2.6.12-rc2 |
115 |
|
2bc00aef0 writeback: consol... |
116 117 |
/* consolidated parameters for balance_dirty_pages() and its subroutines */ struct dirty_throttle_control { |
e9f07dfd7 writeback: add di... |
118 119 |
#ifdef CONFIG_CGROUP_WRITEBACK struct wb_domain *dom; |
9fc3a43e1 writeback: separa... |
120 |
struct dirty_throttle_control *gdtc; /* only set in memcg dtc's */ |
e9f07dfd7 writeback: add di... |
121 |
#endif |
2bc00aef0 writeback: consol... |
122 |
struct bdi_writeback *wb; |
e9770b348 writeback: add di... |
123 |
struct fprop_local_percpu *wb_completions; |
eb608e3a3 block: Convert BD... |
124 |
|
9fc3a43e1 writeback: separa... |
125 |
unsigned long avail; /* dirtyable */ |
2bc00aef0 writeback: consol... |
126 127 128 129 130 131 |
unsigned long dirty; /* file_dirty + write + nfs */ unsigned long thresh; /* dirty threshold */ unsigned long bg_thresh; /* dirty background threshold */ unsigned long wb_dirty; /* per-wb counterparts */ unsigned long wb_thresh; |
970fb01ad writeback: add di... |
132 |
unsigned long wb_bg_thresh; |
daddfa3cb writeback: add di... |
133 134 |
unsigned long pos_ratio; |
2bc00aef0 writeback: consol... |
135 |
}; |
eb608e3a3 block: Convert BD... |
136 137 138 139 140 141 |
/* * Length of period for aging writeout fractions of bdis. This is an * arbitrarily chosen number. The longer the period, the slower fractions will * reflect changes in current writeout rate. */ #define VM_COMPLETIONS_PERIOD_LEN (3*HZ) |
04fbfdc14 mm: per device di... |
142 |
|
693108a8a writeback: make b... |
143 |
#ifdef CONFIG_CGROUP_WRITEBACK |
d60d1bddd writeback: memcg ... |
144 145 146 |
#define GDTC_INIT(__wb) .wb = (__wb), \ .dom = &global_wb_domain, \ .wb_completions = &(__wb)->completions |
9fc3a43e1 writeback: separa... |
147 |
#define GDTC_INIT_NO_WB .dom = &global_wb_domain |
d60d1bddd writeback: memcg ... |
148 149 150 151 152 |
#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \ .dom = mem_cgroup_wb_domain(__wb), \ .wb_completions = &(__wb)->memcg_completions, \ .gdtc = __gdtc |
c2aa723a6 writeback: implem... |
153 154 155 156 157 |
static bool mdtc_valid(struct dirty_throttle_control *dtc) { return dtc->dom; } |
e9f07dfd7 writeback: add di... |
158 159 160 161 162 |
static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc) { return dtc->dom; } |
9fc3a43e1 writeback: separa... |
163 164 165 166 |
static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc) { return mdtc->gdtc; } |
841710aa6 writeback: implem... |
167 168 169 170 |
static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb) { return &wb->memcg_completions; } |
693108a8a writeback: make b... |
171 172 173 |
static void wb_min_max_ratio(struct bdi_writeback *wb, unsigned long *minp, unsigned long *maxp) { |
20792ebf3 writeback: use RE... |
174 |
unsigned long this_bw = READ_ONCE(wb->avg_write_bandwidth); |
693108a8a writeback: make b... |
175 176 177 178 179 180 181 182 183 184 185 |
unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth); unsigned long long min = wb->bdi->min_ratio; unsigned long long max = wb->bdi->max_ratio; /* * @wb may already be clean by the time control reaches here and * the total may not include its bw. */ if (this_bw < tot_bw) { if (min) { min *= this_bw; |
6d9e8c651 mm/page-writeback... |
186 |
min = div64_ul(min, tot_bw); |
693108a8a writeback: make b... |
187 188 189 |
} if (max < 100) { max *= this_bw; |
6d9e8c651 mm/page-writeback... |
190 |
max = div64_ul(max, tot_bw); |
693108a8a writeback: make b... |
191 192 193 194 195 196 197 198 |
} } *minp = min; *maxp = max; } #else /* CONFIG_CGROUP_WRITEBACK */ |
d60d1bddd writeback: memcg ... |
199 200 |
#define GDTC_INIT(__wb) .wb = (__wb), \ .wb_completions = &(__wb)->completions |
9fc3a43e1 writeback: separa... |
201 |
#define GDTC_INIT_NO_WB |
c2aa723a6 writeback: implem... |
202 203 204 205 206 207 |
#define MDTC_INIT(__wb, __gdtc) static bool mdtc_valid(struct dirty_throttle_control *dtc) { return false; } |
e9f07dfd7 writeback: add di... |
208 209 210 211 212 |
static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc) { return &global_wb_domain; } |
9fc3a43e1 writeback: separa... |
213 214 215 216 |
static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc) { return NULL; } |
841710aa6 writeback: implem... |
217 218 219 220 |
static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb) { return NULL; } |
693108a8a writeback: make b... |
221 222 223 224 225 226 227 228 |
static void wb_min_max_ratio(struct bdi_writeback *wb, unsigned long *minp, unsigned long *maxp) { *minp = wb->bdi->min_ratio; *maxp = wb->bdi->max_ratio; } #endif /* CONFIG_CGROUP_WRITEBACK */ |
04fbfdc14 mm: per device di... |
229 |
/* |
a756cf590 mm: try to distri... |
230 231 232 233 234 235 236 |
* In a memory zone, there is a certain amount of pages we consider * available for the page cache, which is essentially the number of * free and reclaimable pages, minus some zone reserves to protect * lowmem and the ability to uphold the zone's watermarks without * requiring writeback. * * This number of dirtyable pages is the base value of which the |
e0857cf5a mm/page-writeback... |
237 |
* user-configurable dirty ratio is the effective number of pages that |
a756cf590 mm: try to distri... |
238 239 240 241 242 243 244 245 |
* are allowed to be actually dirtied. Per individual zone, or * globally by using the sum of dirtyable pages over all zones. * * Because the user is allowed to specify the dirty limit globally as * absolute number of bytes, calculating the per-zone dirty limit can * require translating the configured limit into a percentage of * global dirtyable memory first. */ |
a804552b9 mm/page-writeback... |
246 |
/** |
281e37265 mm, page_alloc: c... |
247 248 |
* node_dirtyable_memory - number of dirtyable pages in a node * @pgdat: the node |
a804552b9 mm/page-writeback... |
249 |
* |
a862f68a8 docs/core-api/mm:... |
250 |
* Return: the node's number of pages potentially available for dirty |
281e37265 mm, page_alloc: c... |
251 |
* page cache. This is the base value for the per-node dirty limits. |
a804552b9 mm/page-writeback... |
252 |
*/ |
281e37265 mm, page_alloc: c... |
253 |
static unsigned long node_dirtyable_memory(struct pglist_data *pgdat) |
a804552b9 mm/page-writeback... |
254 |
{ |
281e37265 mm, page_alloc: c... |
255 256 257 258 259 260 261 262 263 264 265 |
unsigned long nr_pages = 0; int z; for (z = 0; z < MAX_NR_ZONES; z++) { struct zone *zone = pgdat->node_zones + z; if (!populated_zone(zone)) continue; nr_pages += zone_page_state(zone, NR_FREE_PAGES); } |
a804552b9 mm/page-writeback... |
266 |
|
a8d014373 mm: page_alloc: g... |
267 268 269 270 271 |
/* * Pages reserved for the kernel should not be considered * dirtyable, to prevent a situation where reclaim has to * clean pages in order to balance the zones. */ |
281e37265 mm, page_alloc: c... |
272 |
nr_pages -= min(nr_pages, pgdat->totalreserve_pages); |
a804552b9 mm/page-writeback... |
273 |
|
281e37265 mm, page_alloc: c... |
274 275 |
nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE); nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE); |
a804552b9 mm/page-writeback... |
276 277 278 |
return nr_pages; } |
1edf22348 mm/page-writeback... |
279 280 281 282 |
static unsigned long highmem_dirtyable_memory(unsigned long total) { #ifdef CONFIG_HIGHMEM int node; |
bb4cc2bea mm, vmscan: remov... |
283 |
unsigned long x = 0; |
09b4ab3c4 mm/writeback: cor... |
284 |
int i; |
1edf22348 mm/page-writeback... |
285 286 |
for_each_node_state(node, N_HIGH_MEMORY) { |
281e37265 mm, page_alloc: c... |
287 288 |
for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) { struct zone *z; |
9cb937e21 mm, page_alloc: f... |
289 |
unsigned long nr_pages; |
281e37265 mm, page_alloc: c... |
290 291 292 293 294 |
if (!is_highmem_idx(i)) continue; z = &NODE_DATA(node)->node_zones[i]; |
9cb937e21 mm, page_alloc: f... |
295 296 |
if (!populated_zone(z)) continue; |
1edf22348 mm/page-writeback... |
297 |
|
9cb937e21 mm, page_alloc: f... |
298 |
nr_pages = zone_page_state(z, NR_FREE_PAGES); |
281e37265 mm, page_alloc: c... |
299 |
/* watch for underflows */ |
9cb937e21 mm, page_alloc: f... |
300 |
nr_pages -= min(nr_pages, high_wmark_pages(z)); |
bb4cc2bea mm, vmscan: remov... |
301 302 303 |
nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE); nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE); x += nr_pages; |
09b4ab3c4 mm/writeback: cor... |
304 |
} |
1edf22348 mm/page-writeback... |
305 |
} |
281e37265 mm, page_alloc: c... |
306 |
|
1edf22348 mm/page-writeback... |
307 |
/* |
c8b74c2f6 mm: fix calculati... |
308 309 310 311 312 313 314 315 316 317 318 319 |
* Unreclaimable memory (kernel memory or anonymous memory * without swap) can bring down the dirtyable pages below * the zone's dirty balance reserve and the above calculation * will underflow. However we still want to add in nodes * which are below threshold (negative values) to get a more * accurate calculation but make sure that the total never * underflows. */ if ((long)x < 0) x = 0; /* |
1edf22348 mm/page-writeback... |
320 321 322 323 324 325 326 327 328 329 330 331 |
* Make sure that the number of highmem pages is never larger * than the number of the total dirtyable memory. This can only * occur in very strange VM situations but we want to make sure * that this does not occur. */ return min(x, total); #else return 0; #endif } /** |
ccafa2879 mm: writeback: cl... |
332 |
* global_dirtyable_memory - number of globally dirtyable pages |
1edf22348 mm/page-writeback... |
333 |
* |
a862f68a8 docs/core-api/mm:... |
334 |
* Return: the global number of pages potentially available for dirty |
ccafa2879 mm: writeback: cl... |
335 |
* page cache. This is the base value for the global dirty limits. |
1edf22348 mm/page-writeback... |
336 |
*/ |
18cf8cf8b mm: page-writebac... |
337 |
static unsigned long global_dirtyable_memory(void) |
1edf22348 mm/page-writeback... |
338 339 |
{ unsigned long x; |
c41f012ad mm: rename global... |
340 |
x = global_zone_page_state(NR_FREE_PAGES); |
a8d014373 mm: page_alloc: g... |
341 342 343 344 345 346 |
/* * Pages reserved for the kernel should not be considered * dirtyable, to prevent a situation where reclaim has to * clean pages in order to balance the zones. */ x -= min(x, totalreserve_pages); |
1edf22348 mm/page-writeback... |
347 |
|
599d0c954 mm, vmscan: move ... |
348 349 |
x += global_node_page_state(NR_INACTIVE_FILE); x += global_node_page_state(NR_ACTIVE_FILE); |
a804552b9 mm/page-writeback... |
350 |
|
1edf22348 mm/page-writeback... |
351 352 353 354 355 |
if (!vm_highmem_is_dirtyable) x -= highmem_dirtyable_memory(x); return x + 1; /* Ensure that we never return 0 */ } |
9fc3a43e1 writeback: separa... |
356 357 358 |
/** * domain_dirty_limits - calculate thresh and bg_thresh for a wb_domain * @dtc: dirty_throttle_control of interest |
ccafa2879 mm: writeback: cl... |
359 |
* |
9fc3a43e1 writeback: separa... |
360 361 362 |
* Calculate @dtc->thresh and ->bg_thresh considering * vm_dirty_{bytes|ratio} and dirty_background_{bytes|ratio}. The caller * must ensure that @dtc->avail is set before calling this function. The |
a37b0715d mm/writeback: rep... |
363 |
* dirty limits will be lifted by 1/4 for real-time tasks. |
ccafa2879 mm: writeback: cl... |
364 |
*/ |
9fc3a43e1 writeback: separa... |
365 |
static void domain_dirty_limits(struct dirty_throttle_control *dtc) |
ccafa2879 mm: writeback: cl... |
366 |
{ |
9fc3a43e1 writeback: separa... |
367 368 369 370 |
const unsigned long available_memory = dtc->avail; struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc); unsigned long bytes = vm_dirty_bytes; unsigned long bg_bytes = dirty_background_bytes; |
62a584fe0 writeback: use hi... |
371 372 373 |
/* convert ratios to per-PAGE_SIZE for higher precision */ unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100; unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100; |
9fc3a43e1 writeback: separa... |
374 375 |
unsigned long thresh; unsigned long bg_thresh; |
ccafa2879 mm: writeback: cl... |
376 |
struct task_struct *tsk; |
9fc3a43e1 writeback: separa... |
377 378 379 380 381 382 383 |
/* gdtc is !NULL iff @dtc is for memcg domain */ if (gdtc) { unsigned long global_avail = gdtc->avail; /* * The byte settings can't be applied directly to memcg * domains. Convert them to ratios by scaling against |
62a584fe0 writeback: use hi... |
384 385 386 |
* globally available memory. As the ratios are in * per-PAGE_SIZE, they can be obtained by dividing bytes by * number of pages. |
9fc3a43e1 writeback: separa... |
387 388 |
*/ if (bytes) |
62a584fe0 writeback: use hi... |
389 390 |
ratio = min(DIV_ROUND_UP(bytes, global_avail), PAGE_SIZE); |
9fc3a43e1 writeback: separa... |
391 |
if (bg_bytes) |
62a584fe0 writeback: use hi... |
392 393 |
bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail), PAGE_SIZE); |
9fc3a43e1 writeback: separa... |
394 395 396 397 398 |
bytes = bg_bytes = 0; } if (bytes) thresh = DIV_ROUND_UP(bytes, PAGE_SIZE); |
ccafa2879 mm: writeback: cl... |
399 |
else |
62a584fe0 writeback: use hi... |
400 |
thresh = (ratio * available_memory) / PAGE_SIZE; |
ccafa2879 mm: writeback: cl... |
401 |
|
9fc3a43e1 writeback: separa... |
402 403 |
if (bg_bytes) bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE); |
ccafa2879 mm: writeback: cl... |
404 |
else |
62a584fe0 writeback: use hi... |
405 |
bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; |
ccafa2879 mm: writeback: cl... |
406 |
|
90daf3062 Revert "mm/page-w... |
407 |
if (bg_thresh >= thresh) |
9fc3a43e1 writeback: separa... |
408 |
bg_thresh = thresh / 2; |
ccafa2879 mm: writeback: cl... |
409 |
tsk = current; |
a37b0715d mm/writeback: rep... |
410 |
if (rt_task(tsk)) { |
a53eaff8c MM: increase safe... |
411 412 |
bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; thresh += thresh / 4 + global_wb_domain.dirty_limit / 32; |
ccafa2879 mm: writeback: cl... |
413 |
} |
9fc3a43e1 writeback: separa... |
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 |
dtc->thresh = thresh; dtc->bg_thresh = bg_thresh; /* we should eventually report the domain in the TP */ if (!gdtc) trace_global_dirty_state(bg_thresh, thresh); } /** * global_dirty_limits - background-writeback and dirty-throttling thresholds * @pbackground: out parameter for bg_thresh * @pdirty: out parameter for thresh * * Calculate bg_thresh and thresh for global_wb_domain. See * domain_dirty_limits() for details. */ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty) { struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB }; gdtc.avail = global_dirtyable_memory(); domain_dirty_limits(&gdtc); *pbackground = gdtc.bg_thresh; *pdirty = gdtc.thresh; |
ccafa2879 mm: writeback: cl... |
439 |
} |
a756cf590 mm: try to distri... |
440 |
/** |
281e37265 mm, page_alloc: c... |
441 442 |
* node_dirty_limit - maximum number of dirty pages allowed in a node * @pgdat: the node |
a756cf590 mm: try to distri... |
443 |
* |
a862f68a8 docs/core-api/mm:... |
444 |
* Return: the maximum number of dirty pages allowed in a node, based |
281e37265 mm, page_alloc: c... |
445 |
* on the node's dirtyable memory. |
a756cf590 mm: try to distri... |
446 |
*/ |
281e37265 mm, page_alloc: c... |
447 |
static unsigned long node_dirty_limit(struct pglist_data *pgdat) |
a756cf590 mm: try to distri... |
448 |
{ |
281e37265 mm, page_alloc: c... |
449 |
unsigned long node_memory = node_dirtyable_memory(pgdat); |
a756cf590 mm: try to distri... |
450 451 452 453 454 |
struct task_struct *tsk = current; unsigned long dirty; if (vm_dirty_bytes) dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) * |
281e37265 mm, page_alloc: c... |
455 |
node_memory / global_dirtyable_memory(); |
a756cf590 mm: try to distri... |
456 |
else |
281e37265 mm, page_alloc: c... |
457 |
dirty = vm_dirty_ratio * node_memory / 100; |
a756cf590 mm: try to distri... |
458 |
|
a37b0715d mm/writeback: rep... |
459 |
if (rt_task(tsk)) |
a756cf590 mm: try to distri... |
460 461 462 463 464 465 |
dirty += dirty / 4; return dirty; } /** |
281e37265 mm, page_alloc: c... |
466 467 |
* node_dirty_ok - tells whether a node is within its dirty limits * @pgdat: the node to check |
a756cf590 mm: try to distri... |
468 |
* |
a862f68a8 docs/core-api/mm:... |
469 |
* Return: %true when the dirty pages in @pgdat are within the node's |
a756cf590 mm: try to distri... |
470 471 |
* dirty limit, %false if the limit is exceeded. */ |
281e37265 mm, page_alloc: c... |
472 |
bool node_dirty_ok(struct pglist_data *pgdat) |
a756cf590 mm: try to distri... |
473 |
{ |
281e37265 mm, page_alloc: c... |
474 475 |
unsigned long limit = node_dirty_limit(pgdat); unsigned long nr_pages = 0; |
11fb99898 mm: move most fil... |
476 |
nr_pages += node_page_state(pgdat, NR_FILE_DIRTY); |
11fb99898 mm: move most fil... |
477 |
nr_pages += node_page_state(pgdat, NR_WRITEBACK); |
a756cf590 mm: try to distri... |
478 |
|
281e37265 mm, page_alloc: c... |
479 |
return nr_pages <= limit; |
a756cf590 mm: try to distri... |
480 |
} |
2da02997e mm: add dirty_bac... |
481 |
int dirty_background_ratio_handler(struct ctl_table *table, int write, |
32927393d sysctl: pass kern... |
482 |
void *buffer, size_t *lenp, loff_t *ppos) |
2da02997e mm: add dirty_bac... |
483 484 |
{ int ret; |
8d65af789 sysctl: remove "s... |
485 |
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
2da02997e mm: add dirty_bac... |
486 487 488 489 490 491 |
if (ret == 0 && write) dirty_background_bytes = 0; return ret; } int dirty_background_bytes_handler(struct ctl_table *table, int write, |
32927393d sysctl: pass kern... |
492 |
void *buffer, size_t *lenp, loff_t *ppos) |
2da02997e mm: add dirty_bac... |
493 494 |
{ int ret; |
8d65af789 sysctl: remove "s... |
495 |
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
2da02997e mm: add dirty_bac... |
496 497 498 499 |
if (ret == 0 && write) dirty_background_ratio = 0; return ret; } |
32927393d sysctl: pass kern... |
500 501 |
int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) |
04fbfdc14 mm: per device di... |
502 503 |
{ int old_ratio = vm_dirty_ratio; |
2da02997e mm: add dirty_bac... |
504 |
int ret; |
8d65af789 sysctl: remove "s... |
505 |
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
04fbfdc14 mm: per device di... |
506 |
if (ret == 0 && write && vm_dirty_ratio != old_ratio) { |
eb608e3a3 block: Convert BD... |
507 |
writeback_set_ratelimit(); |
2da02997e mm: add dirty_bac... |
508 509 510 511 |
vm_dirty_bytes = 0; } return ret; } |
2da02997e mm: add dirty_bac... |
512 |
int dirty_bytes_handler(struct ctl_table *table, int write, |
32927393d sysctl: pass kern... |
513 |
void *buffer, size_t *lenp, loff_t *ppos) |
2da02997e mm: add dirty_bac... |
514 |
{ |
fc3501d41 mm: fix dirty_byt... |
515 |
unsigned long old_bytes = vm_dirty_bytes; |
2da02997e mm: add dirty_bac... |
516 |
int ret; |
8d65af789 sysctl: remove "s... |
517 |
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
2da02997e mm: add dirty_bac... |
518 |
if (ret == 0 && write && vm_dirty_bytes != old_bytes) { |
eb608e3a3 block: Convert BD... |
519 |
writeback_set_ratelimit(); |
2da02997e mm: add dirty_bac... |
520 |
vm_dirty_ratio = 0; |
04fbfdc14 mm: per device di... |
521 522 523 |
} return ret; } |
eb608e3a3 block: Convert BD... |
524 525 526 527 528 529 530 531 |
static unsigned long wp_next_time(unsigned long cur_time) { cur_time += VM_COMPLETIONS_PERIOD_LEN; /* 0 has a special meaning... */ if (!cur_time) return 1; return cur_time; } |
c7981433e writeback: make _... |
532 533 534 |
static void wb_domain_writeout_inc(struct wb_domain *dom, struct fprop_local_percpu *completions, unsigned int max_prop_frac) |
04fbfdc14 mm: per device di... |
535 |
{ |
c7981433e writeback: make _... |
536 537 |
__fprop_inc_percpu_max(&dom->completions, completions, max_prop_frac); |
eb608e3a3 block: Convert BD... |
538 |
/* First event after period switching was turned off? */ |
517663edd mm/page-writeback... |
539 |
if (unlikely(!dom->period_time)) { |
eb608e3a3 block: Convert BD... |
540 541 542 543 544 545 |
/* * We can race with other __bdi_writeout_inc calls here but * it does not cause any harm since the resulting time when * timer will fire and what is in writeout_period_time will be * roughly the same. */ |
380c27ca3 writeback: implem... |
546 547 |
dom->period_time = wp_next_time(jiffies); mod_timer(&dom->period_timer, dom->period_time); |
eb608e3a3 block: Convert BD... |
548 |
} |
04fbfdc14 mm: per device di... |
549 |
} |
c7981433e writeback: make _... |
550 551 552 553 554 |
/* * Increment @wb's writeout completion count and the global writeout * completion count. Called from test_clear_page_writeback(). */ static inline void __wb_writeout_inc(struct bdi_writeback *wb) |
dd5656e59 mm: bdi: export b... |
555 |
{ |
841710aa6 writeback: implem... |
556 |
struct wb_domain *cgdom; |
dd5656e59 mm: bdi: export b... |
557 |
|
3e8f399da writeback: rework... |
558 |
inc_wb_stat(wb, WB_WRITTEN); |
c7981433e writeback: make _... |
559 560 |
wb_domain_writeout_inc(&global_wb_domain, &wb->completions, wb->bdi->max_prop_frac); |
841710aa6 writeback: implem... |
561 562 563 564 565 |
cgdom = mem_cgroup_wb_domain(wb); if (cgdom) wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb), wb->bdi->max_prop_frac); |
dd5656e59 mm: bdi: export b... |
566 |
} |
dd5656e59 mm: bdi: export b... |
567 |
|
93f78d882 writeback: move b... |
568 |
void wb_writeout_inc(struct bdi_writeback *wb) |
04fbfdc14 mm: per device di... |
569 |
{ |
dd5656e59 mm: bdi: export b... |
570 571 572 |
unsigned long flags; local_irq_save(flags); |
93f78d882 writeback: move b... |
573 |
__wb_writeout_inc(wb); |
dd5656e59 mm: bdi: export b... |
574 |
local_irq_restore(flags); |
04fbfdc14 mm: per device di... |
575 |
} |
93f78d882 writeback: move b... |
576 |
EXPORT_SYMBOL_GPL(wb_writeout_inc); |
04fbfdc14 mm: per device di... |
577 |
|
04fbfdc14 mm: per device di... |
578 |
/* |
eb608e3a3 block: Convert BD... |
579 580 581 |
* On idle system, we can be called long after we scheduled because we use * deferred timers so count with missed periods. */ |
9823e51bf mm/page-writeback... |
582 |
static void writeout_period(struct timer_list *t) |
eb608e3a3 block: Convert BD... |
583 |
{ |
9823e51bf mm/page-writeback... |
584 |
struct wb_domain *dom = from_timer(dom, t, period_timer); |
380c27ca3 writeback: implem... |
585 |
int miss_periods = (jiffies - dom->period_time) / |
eb608e3a3 block: Convert BD... |
586 |
VM_COMPLETIONS_PERIOD_LEN; |
380c27ca3 writeback: implem... |
587 588 |
if (fprop_new_period(&dom->completions, miss_periods + 1)) { dom->period_time = wp_next_time(dom->period_time + |
eb608e3a3 block: Convert BD... |
589 |
miss_periods * VM_COMPLETIONS_PERIOD_LEN); |
380c27ca3 writeback: implem... |
590 |
mod_timer(&dom->period_timer, dom->period_time); |
eb608e3a3 block: Convert BD... |
591 592 593 594 595 |
} else { /* * Aging has zeroed all fractions. Stop wasting CPU on period * updates. */ |
380c27ca3 writeback: implem... |
596 |
dom->period_time = 0; |
eb608e3a3 block: Convert BD... |
597 598 |
} } |
380c27ca3 writeback: implem... |
599 600 601 |
int wb_domain_init(struct wb_domain *dom, gfp_t gfp) { memset(dom, 0, sizeof(*dom)); |
dcc25ae76 writeback: move g... |
602 603 |
spin_lock_init(&dom->lock); |
9823e51bf mm/page-writeback... |
604 |
timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE); |
dcc25ae76 writeback: move g... |
605 606 |
dom->dirty_limit_tstamp = jiffies; |
380c27ca3 writeback: implem... |
607 608 |
return fprop_global_init(&dom->completions, gfp); } |
841710aa6 writeback: implem... |
609 610 611 612 613 614 615 |
#ifdef CONFIG_CGROUP_WRITEBACK void wb_domain_exit(struct wb_domain *dom) { del_timer_sync(&dom->period_timer); fprop_global_destroy(&dom->completions); } #endif |
eb608e3a3 block: Convert BD... |
616 |
/* |
d08c429b0 mm/page-writeback... |
617 618 619 |
* bdi_min_ratio keeps the sum of the minimum dirty shares of all * registered backing devices, which, for obvious reasons, can not * exceed 100%. |
189d3c4a9 mm: bdi: allow se... |
620 |
*/ |
189d3c4a9 mm: bdi: allow se... |
621 622 623 624 625 |
static unsigned int bdi_min_ratio; int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { int ret = 0; |
189d3c4a9 mm: bdi: allow se... |
626 |
|
cfc4ba536 writeback: use RC... |
627 |
spin_lock_bh(&bdi_lock); |
a42dde041 mm: bdi: allow se... |
628 |
if (min_ratio > bdi->max_ratio) { |
189d3c4a9 mm: bdi: allow se... |
629 |
ret = -EINVAL; |
a42dde041 mm: bdi: allow se... |
630 631 632 633 634 635 636 637 638 |
} else { min_ratio -= bdi->min_ratio; if (bdi_min_ratio + min_ratio < 100) { bdi_min_ratio += min_ratio; bdi->min_ratio += min_ratio; } else { ret = -EINVAL; } } |
cfc4ba536 writeback: use RC... |
639 |
spin_unlock_bh(&bdi_lock); |
a42dde041 mm: bdi: allow se... |
640 641 642 643 644 645 |
return ret; } int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) { |
a42dde041 mm: bdi: allow se... |
646 647 648 649 |
int ret = 0; if (max_ratio > 100) return -EINVAL; |
cfc4ba536 writeback: use RC... |
650 |
spin_lock_bh(&bdi_lock); |
a42dde041 mm: bdi: allow se... |
651 652 653 654 |
if (bdi->min_ratio > max_ratio) { ret = -EINVAL; } else { bdi->max_ratio = max_ratio; |
eb608e3a3 block: Convert BD... |
655 |
bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100; |
a42dde041 mm: bdi: allow se... |
656 |
} |
cfc4ba536 writeback: use RC... |
657 |
spin_unlock_bh(&bdi_lock); |
189d3c4a9 mm: bdi: allow se... |
658 659 660 |
return ret; } |
a42dde041 mm: bdi: allow se... |
661 |
EXPORT_SYMBOL(bdi_set_max_ratio); |
189d3c4a9 mm: bdi: allow se... |
662 |
|
6c14ae1e9 writeback: dirty ... |
663 664 665 666 667 |
static unsigned long dirty_freerun_ceiling(unsigned long thresh, unsigned long bg_thresh) { return (thresh + bg_thresh) / 2; } |
c7981433e writeback: make _... |
668 669 |
static unsigned long hard_dirty_limit(struct wb_domain *dom, unsigned long thresh) |
ffd1f609a writeback: introd... |
670 |
{ |
dcc25ae76 writeback: move g... |
671 |
return max(thresh, dom->dirty_limit); |
ffd1f609a writeback: introd... |
672 |
} |
c5edf9cdc writeback: fix in... |
673 674 675 676 677 678 |
/* * Memory which can be further allocated to a memcg domain is capped by * system-wide clean memory excluding the amount being used in the domain. */ static void mdtc_calc_avail(struct dirty_throttle_control *mdtc, unsigned long filepages, unsigned long headroom) |
c2aa723a6 writeback: implem... |
679 680 |
{ struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc); |
c5edf9cdc writeback: fix in... |
681 682 683 |
unsigned long clean = filepages - min(filepages, mdtc->dirty); unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty); unsigned long other_clean = global_clean - min(global_clean, clean); |
c2aa723a6 writeback: implem... |
684 |
|
c5edf9cdc writeback: fix in... |
685 |
mdtc->avail = filepages + min(headroom, other_clean); |
ffd1f609a writeback: introd... |
686 |
} |
6f7186562 writeback: add bd... |
687 |
/** |
b1cbc6d40 writeback: make _... |
688 689 |
* __wb_calc_thresh - @wb's share of dirty throttling threshold * @dtc: dirty_throttle_context of interest |
1babe1838 writeback: add co... |
690 |
* |
aed21ad28 writeback: commen... |
691 692 693 694 695 |
* Note that balance_dirty_pages() will only seriously take it as a hard limit * when sleeping max_pause per page is not enough to keep the dirty pages under * control. For example, when the device is completely stalled due to some error * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key. * In the other normal situations, it acts more gently by throttling the tasks |
a88a341a7 writeback: move b... |
696 |
* more (rather than completely block them) when the wb dirty pages go high. |
1babe1838 writeback: add co... |
697 |
* |
6f7186562 writeback: add bd... |
698 |
* It allocates high/low dirty limits to fast/slow devices, in order to prevent |
1babe1838 writeback: add co... |
699 700 701 |
* - starving fast devices * - piling up dirty pages (that will take long time to sync) on slow devices * |
a88a341a7 writeback: move b... |
702 |
* The wb's share of dirty limit will be adapting to its throughput and |
1babe1838 writeback: add co... |
703 |
* bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set. |
a862f68a8 docs/core-api/mm:... |
704 705 |
* * Return: @wb's dirty limit in pages. The term "dirty" in the context of |
8d92890bd mm/writeback: dis... |
706 |
* dirty balancing includes all PG_dirty and PG_writeback pages. |
1babe1838 writeback: add co... |
707 |
*/ |
b1cbc6d40 writeback: make _... |
708 |
static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc) |
16c4042f0 writeback: avoid ... |
709 |
{ |
e9f07dfd7 writeback: add di... |
710 |
struct wb_domain *dom = dtc_dom(dtc); |
b1cbc6d40 writeback: make _... |
711 |
unsigned long thresh = dtc->thresh; |
0d960a383 writeback: clean ... |
712 |
u64 wb_thresh; |
d3ac946ec mm/page-writeback... |
713 |
unsigned long numerator, denominator; |
693108a8a writeback: make b... |
714 |
unsigned long wb_min_ratio, wb_max_ratio; |
04fbfdc14 mm: per device di... |
715 |
|
16c4042f0 writeback: avoid ... |
716 |
/* |
0d960a383 writeback: clean ... |
717 |
* Calculate this BDI's share of the thresh ratio. |
16c4042f0 writeback: avoid ... |
718 |
*/ |
e9770b348 writeback: add di... |
719 |
fprop_fraction_percpu(&dom->completions, dtc->wb_completions, |
380c27ca3 writeback: implem... |
720 |
&numerator, &denominator); |
04fbfdc14 mm: per device di... |
721 |
|
0d960a383 writeback: clean ... |
722 723 |
wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100; wb_thresh *= numerator; |
d3ac946ec mm/page-writeback... |
724 |
wb_thresh = div64_ul(wb_thresh, denominator); |
04fbfdc14 mm: per device di... |
725 |
|
b1cbc6d40 writeback: make _... |
726 |
wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio); |
04fbfdc14 mm: per device di... |
727 |
|
0d960a383 writeback: clean ... |
728 729 730 |
wb_thresh += (thresh * wb_min_ratio) / 100; if (wb_thresh > (thresh * wb_max_ratio) / 100) wb_thresh = thresh * wb_max_ratio / 100; |
16c4042f0 writeback: avoid ... |
731 |
|
0d960a383 writeback: clean ... |
732 |
return wb_thresh; |
1da177e4c Linux-2.6.12-rc2 |
733 |
} |
b1cbc6d40 writeback: make _... |
734 735 736 737 738 |
unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh) { struct dirty_throttle_control gdtc = { GDTC_INIT(wb), .thresh = thresh }; return __wb_calc_thresh(&gdtc); |
1da177e4c Linux-2.6.12-rc2 |
739 |
} |
6c14ae1e9 writeback: dirty ... |
740 |
/* |
5a5374856 mm/page-writeback... |
741 742 743 744 745 746 747 748 749 750 751 752 753 |
* setpoint - dirty 3 * f(dirty) := 1.0 + (----------------) * limit - setpoint * * it's a 3rd order polynomial that subjects to * * (1) f(freerun) = 2.0 => rampup dirty_ratelimit reasonably fast * (2) f(setpoint) = 1.0 => the balance point * (3) f(limit) = 0 => the hard limit * (4) df/dx <= 0 => negative feedback control * (5) the closer to setpoint, the smaller |df/dx| (and the reverse) * => fast response on large errors; small oscillation near setpoint */ |
d5c9fde3d mm/page-writeback... |
754 |
static long long pos_ratio_polynom(unsigned long setpoint, |
5a5374856 mm/page-writeback... |
755 756 757 758 759 |
unsigned long dirty, unsigned long limit) { long long pos_ratio; long x; |
d5c9fde3d mm/page-writeback... |
760 |
x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, |
464d1387a writeback: use |1... |
761 |
(limit - setpoint) | 1); |
5a5374856 mm/page-writeback... |
762 763 764 765 766 767 768 769 770 |
pos_ratio = x; pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; pos_ratio += 1 << RATELIMIT_CALC_SHIFT; return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT); } /* |
6c14ae1e9 writeback: dirty ... |
771 772 773 774 |
* Dirty position control. * * (o) global/bdi setpoints * |
de1fff37b writeback: s/bdi/... |
775 |
* We want the dirty pages be balanced around the global/wb setpoints. |
6c14ae1e9 writeback: dirty ... |
776 777 778 779 780 781 782 783 784 |
* When the number of dirty pages is higher/lower than the setpoint, the * dirty position control ratio (and hence task dirty ratelimit) will be * decreased/increased to bring the dirty pages back to the setpoint. * * pos_ratio = 1 << RATELIMIT_CALC_SHIFT * * if (dirty < setpoint) scale up pos_ratio * if (dirty > setpoint) scale down pos_ratio * |
de1fff37b writeback: s/bdi/... |
785 786 |
* if (wb_dirty < wb_setpoint) scale up pos_ratio * if (wb_dirty > wb_setpoint) scale down pos_ratio |
6c14ae1e9 writeback: dirty ... |
787 788 789 790 791 792 793 794 |
* * task_ratelimit = dirty_ratelimit * pos_ratio >> RATELIMIT_CALC_SHIFT * * (o) global control line * * ^ pos_ratio * | * | |<===== global dirty control scope ======>| |
032315543 mm/page-writeback... |
795 |
* 2.0 * * * * * * * |
6c14ae1e9 writeback: dirty ... |
796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 |
* | .* * | . * * | . * * | . * * | . * * | . * * 1.0 ................................* * | . . * * | . . * * | . . * * | . . * * | . . * * 0 +------------.------------------.----------------------*-------------> * freerun^ setpoint^ limit^ dirty pages * |
de1fff37b writeback: s/bdi/... |
811 |
* (o) wb control line |
6c14ae1e9 writeback: dirty ... |
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 |
* * ^ pos_ratio * | * | * * | * * | * * | * * | * |<=========== span ============>| * 1.0 .......................* * | . * * | . * * | . * * | . * * | . * * | . * * | . * * | . * * | . * * | . * * | . * * 1/4 ...............................................* * * * * * * * * * * * * | . . * | . . * | . . * 0 +----------------------.-------------------------------.-------------> |
de1fff37b writeback: s/bdi/... |
837 |
* wb_setpoint^ x_intercept^ |
6c14ae1e9 writeback: dirty ... |
838 |
* |
de1fff37b writeback: s/bdi/... |
839 |
* The wb control line won't drop below pos_ratio=1/4, so that wb_dirty can |
6c14ae1e9 writeback: dirty ... |
840 841 |
* be smoothly throttled down to normal if it starts high in situations like * - start writing to a slow SD card and a fast disk at the same time. The SD |
de1fff37b writeback: s/bdi/... |
842 843 |
* card's wb_dirty may rush to many times higher than wb_setpoint. * - the wb dirty thresh drops quickly due to change of JBOD workload |
6c14ae1e9 writeback: dirty ... |
844 |
*/ |
daddfa3cb writeback: add di... |
845 |
static void wb_position_ratio(struct dirty_throttle_control *dtc) |
6c14ae1e9 writeback: dirty ... |
846 |
{ |
2bc00aef0 writeback: consol... |
847 |
struct bdi_writeback *wb = dtc->wb; |
20792ebf3 writeback: use RE... |
848 |
unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth); |
2bc00aef0 writeback: consol... |
849 |
unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh); |
c7981433e writeback: make _... |
850 |
unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh); |
2bc00aef0 writeback: consol... |
851 |
unsigned long wb_thresh = dtc->wb_thresh; |
6c14ae1e9 writeback: dirty ... |
852 853 |
unsigned long x_intercept; unsigned long setpoint; /* dirty pages' target balance point */ |
de1fff37b writeback: s/bdi/... |
854 |
unsigned long wb_setpoint; |
6c14ae1e9 writeback: dirty ... |
855 856 857 |
unsigned long span; long long pos_ratio; /* for scaling up/down the rate limit */ long x; |
daddfa3cb writeback: add di... |
858 |
dtc->pos_ratio = 0; |
2bc00aef0 writeback: consol... |
859 |
if (unlikely(dtc->dirty >= limit)) |
daddfa3cb writeback: add di... |
860 |
return; |
6c14ae1e9 writeback: dirty ... |
861 862 863 864 |
/* * global setpoint * |
5a5374856 mm/page-writeback... |
865 866 867 |
* See comment for pos_ratio_polynom(). */ setpoint = (freerun + limit) / 2; |
2bc00aef0 writeback: consol... |
868 |
pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit); |
5a5374856 mm/page-writeback... |
869 870 871 872 |
/* * The strictlimit feature is a tool preventing mistrusted filesystems * from growing a large number of dirty pages before throttling. For |
de1fff37b writeback: s/bdi/... |
873 874 |
* such filesystems balance_dirty_pages always checks wb counters * against wb limits. Even if global "nr_dirty" is under "freerun". |
5a5374856 mm/page-writeback... |
875 876 877 878 |
* This is especially important for fuse which sets bdi->max_ratio to * 1% by default. Without strictlimit feature, fuse writeback may * consume arbitrary amount of RAM because it is accounted in * NR_WRITEBACK_TEMP which is not involved in calculating "nr_dirty". |
6c14ae1e9 writeback: dirty ... |
879 |
* |
a88a341a7 writeback: move b... |
880 |
* Here, in wb_position_ratio(), we calculate pos_ratio based on |
de1fff37b writeback: s/bdi/... |
881 |
* two values: wb_dirty and wb_thresh. Let's consider an example: |
5a5374856 mm/page-writeback... |
882 883 |
* total amount of RAM is 16GB, bdi->max_ratio is equal to 1%, global * limits are set by default to 10% and 20% (background and throttle). |
de1fff37b writeback: s/bdi/... |
884 |
* Then wb_thresh is 1% of 20% of 16GB. This amounts to ~8K pages. |
0d960a383 writeback: clean ... |
885 |
* wb_calc_thresh(wb, bg_thresh) is about ~4K pages. wb_setpoint is |
de1fff37b writeback: s/bdi/... |
886 |
* about ~6K pages (as the average of background and throttle wb |
5a5374856 mm/page-writeback... |
887 |
* limits). The 3rd order polynomial will provide positive feedback if |
de1fff37b writeback: s/bdi/... |
888 |
* wb_dirty is under wb_setpoint and vice versa. |
6c14ae1e9 writeback: dirty ... |
889 |
* |
5a5374856 mm/page-writeback... |
890 |
* Note, that we cannot use global counters in these calculations |
de1fff37b writeback: s/bdi/... |
891 |
* because we want to throttle process writing to a strictlimit wb |
5a5374856 mm/page-writeback... |
892 893 |
* much earlier than global "freerun" is reached (~23MB vs. ~2.3GB * in the example above). |
6c14ae1e9 writeback: dirty ... |
894 |
*/ |
a88a341a7 writeback: move b... |
895 |
if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) { |
de1fff37b writeback: s/bdi/... |
896 |
long long wb_pos_ratio; |
5a5374856 mm/page-writeback... |
897 |
|
daddfa3cb writeback: add di... |
898 899 900 901 902 |
if (dtc->wb_dirty < 8) { dtc->pos_ratio = min_t(long long, pos_ratio * 2, 2 << RATELIMIT_CALC_SHIFT); return; } |
5a5374856 mm/page-writeback... |
903 |
|
2bc00aef0 writeback: consol... |
904 |
if (dtc->wb_dirty >= wb_thresh) |
daddfa3cb writeback: add di... |
905 |
return; |
5a5374856 mm/page-writeback... |
906 |
|
970fb01ad writeback: add di... |
907 908 |
wb_setpoint = dirty_freerun_ceiling(wb_thresh, dtc->wb_bg_thresh); |
5a5374856 mm/page-writeback... |
909 |
|
de1fff37b writeback: s/bdi/... |
910 |
if (wb_setpoint == 0 || wb_setpoint == wb_thresh) |
daddfa3cb writeback: add di... |
911 |
return; |
5a5374856 mm/page-writeback... |
912 |
|
2bc00aef0 writeback: consol... |
913 |
wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty, |
de1fff37b writeback: s/bdi/... |
914 |
wb_thresh); |
5a5374856 mm/page-writeback... |
915 916 |
/* |
de1fff37b writeback: s/bdi/... |
917 918 |
* Typically, for strictlimit case, wb_setpoint << setpoint * and pos_ratio >> wb_pos_ratio. In the other words global |
5a5374856 mm/page-writeback... |
919 |
* state ("dirty") is not limiting factor and we have to |
de1fff37b writeback: s/bdi/... |
920 |
* make decision based on wb counters. But there is an |
5a5374856 mm/page-writeback... |
921 922 |
* important case when global pos_ratio should get precedence: * global limits are exceeded (e.g. due to activities on other |
de1fff37b writeback: s/bdi/... |
923 |
* wb's) while given strictlimit wb is below limit. |
5a5374856 mm/page-writeback... |
924 |
* |
de1fff37b writeback: s/bdi/... |
925 |
* "pos_ratio * wb_pos_ratio" would work for the case above, |
5a5374856 mm/page-writeback... |
926 |
* but it would look too non-natural for the case of all |
de1fff37b writeback: s/bdi/... |
927 |
* activity in the system coming from a single strictlimit wb |
5a5374856 mm/page-writeback... |
928 929 930 931 |
* with bdi->max_ratio == 100%. * * Note that min() below somewhat changes the dynamics of the * control system. Normally, pos_ratio value can be well over 3 |
de1fff37b writeback: s/bdi/... |
932 |
* (when globally we are at freerun and wb is well below wb |
5a5374856 mm/page-writeback... |
933 934 935 936 |
* setpoint). Now the maximum pos_ratio in the same situation * is 2. We might want to tweak this if we observe the control * system is too slow to adapt. */ |
daddfa3cb writeback: add di... |
937 938 |
dtc->pos_ratio = min(pos_ratio, wb_pos_ratio); return; |
5a5374856 mm/page-writeback... |
939 |
} |
6c14ae1e9 writeback: dirty ... |
940 941 942 |
/* * We have computed basic pos_ratio above based on global situation. If |
de1fff37b writeback: s/bdi/... |
943 |
* the wb is over/under its share of dirty pages, we want to scale |
6c14ae1e9 writeback: dirty ... |
944 945 946 947 |
* pos_ratio further down/up. That is done by the following mechanism. */ /* |
de1fff37b writeback: s/bdi/... |
948 |
* wb setpoint |
6c14ae1e9 writeback: dirty ... |
949 |
* |
de1fff37b writeback: s/bdi/... |
950 |
* f(wb_dirty) := 1.0 + k * (wb_dirty - wb_setpoint) |
6c14ae1e9 writeback: dirty ... |
951 |
* |
de1fff37b writeback: s/bdi/... |
952 |
* x_intercept - wb_dirty |
6c14ae1e9 writeback: dirty ... |
953 |
* := -------------------------- |
de1fff37b writeback: s/bdi/... |
954 |
* x_intercept - wb_setpoint |
6c14ae1e9 writeback: dirty ... |
955 |
* |
de1fff37b writeback: s/bdi/... |
956 |
* The main wb control line is a linear function that subjects to |
6c14ae1e9 writeback: dirty ... |
957 |
* |
de1fff37b writeback: s/bdi/... |
958 959 960 |
* (1) f(wb_setpoint) = 1.0 * (2) k = - 1 / (8 * write_bw) (in single wb case) * or equally: x_intercept = wb_setpoint + 8 * write_bw |
6c14ae1e9 writeback: dirty ... |
961 |
* |
de1fff37b writeback: s/bdi/... |
962 |
* For single wb case, the dirty pages are observed to fluctuate |
6c14ae1e9 writeback: dirty ... |
963 |
* regularly within range |
de1fff37b writeback: s/bdi/... |
964 |
* [wb_setpoint - write_bw/2, wb_setpoint + write_bw/2] |
6c14ae1e9 writeback: dirty ... |
965 966 967 |
* for various filesystems, where (2) can yield in a reasonable 12.5% * fluctuation range for pos_ratio. * |
de1fff37b writeback: s/bdi/... |
968 |
* For JBOD case, wb_thresh (not wb_dirty!) could fluctuate up to its |
6c14ae1e9 writeback: dirty ... |
969 |
* own size, so move the slope over accordingly and choose a slope that |
de1fff37b writeback: s/bdi/... |
970 |
* yields 100% pos_ratio fluctuation on suddenly doubled wb_thresh. |
6c14ae1e9 writeback: dirty ... |
971 |
*/ |
2bc00aef0 writeback: consol... |
972 973 |
if (unlikely(wb_thresh > dtc->thresh)) wb_thresh = dtc->thresh; |
aed21ad28 writeback: commen... |
974 |
/* |
de1fff37b writeback: s/bdi/... |
975 |
* It's very possible that wb_thresh is close to 0 not because the |
aed21ad28 writeback: commen... |
976 977 978 979 980 |
* device is slow, but that it has remained inactive for long time. * Honour such devices a reasonable good (hopefully IO efficient) * threshold, so that the occasional writes won't be blocked and active * writes can rampup the threshold quickly. */ |
2bc00aef0 writeback: consol... |
981 |
wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8); |
6c14ae1e9 writeback: dirty ... |
982 |
/* |
de1fff37b writeback: s/bdi/... |
983 984 |
* scale global setpoint to wb's: * wb_setpoint = setpoint * wb_thresh / thresh |
6c14ae1e9 writeback: dirty ... |
985 |
*/ |
e4bc13adf Merge branch 'for... |
986 |
x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1); |
de1fff37b writeback: s/bdi/... |
987 |
wb_setpoint = setpoint * (u64)x >> 16; |
6c14ae1e9 writeback: dirty ... |
988 |
/* |
de1fff37b writeback: s/bdi/... |
989 990 |
* Use span=(8*write_bw) in single wb case as indicated by * (thresh - wb_thresh ~= 0) and transit to wb_thresh in JBOD case. |
6c14ae1e9 writeback: dirty ... |
991 |
* |
de1fff37b writeback: s/bdi/... |
992 993 994 |
* wb_thresh thresh - wb_thresh * span = --------- * (8 * write_bw) + ------------------ * wb_thresh * thresh thresh |
6c14ae1e9 writeback: dirty ... |
995 |
*/ |
2bc00aef0 writeback: consol... |
996 |
span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16; |
de1fff37b writeback: s/bdi/... |
997 |
x_intercept = wb_setpoint + span; |
6c14ae1e9 writeback: dirty ... |
998 |
|
2bc00aef0 writeback: consol... |
999 1000 |
if (dtc->wb_dirty < x_intercept - span / 4) { pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty), |
e4bc13adf Merge branch 'for... |
1001 |
(x_intercept - wb_setpoint) | 1); |
6c14ae1e9 writeback: dirty ... |
1002 1003 |
} else pos_ratio /= 4; |
8927f66c4 writeback: dirty ... |
1004 |
/* |
de1fff37b writeback: s/bdi/... |
1005 |
* wb reserve area, safeguard against dirty pool underrun and disk idle |
8927f66c4 writeback: dirty ... |
1006 1007 1008 |
* It may push the desired control point of global dirty pages higher * than setpoint. */ |
de1fff37b writeback: s/bdi/... |
1009 |
x_intercept = wb_thresh / 2; |
2bc00aef0 writeback: consol... |
1010 1011 1012 1013 |
if (dtc->wb_dirty < x_intercept) { if (dtc->wb_dirty > x_intercept / 8) pos_ratio = div_u64(pos_ratio * x_intercept, dtc->wb_dirty); |
50657fc4d writeback: fix pp... |
1014 |
else |
8927f66c4 writeback: dirty ... |
1015 1016 |
pos_ratio *= 8; } |
daddfa3cb writeback: add di... |
1017 |
dtc->pos_ratio = pos_ratio; |
6c14ae1e9 writeback: dirty ... |
1018 |
} |
a88a341a7 writeback: move b... |
1019 1020 1021 |
static void wb_update_write_bandwidth(struct bdi_writeback *wb, unsigned long elapsed, unsigned long written) |
e98be2d59 writeback: bdi wr... |
1022 1023 |
{ const unsigned long period = roundup_pow_of_two(3 * HZ); |
a88a341a7 writeback: move b... |
1024 1025 |
unsigned long avg = wb->avg_write_bandwidth; unsigned long old = wb->write_bandwidth; |
e98be2d59 writeback: bdi wr... |
1026 1027 1028 1029 1030 1031 1032 1033 |
u64 bw; /* * bw = written * HZ / elapsed * * bw * elapsed + write_bandwidth * (period - elapsed) * write_bandwidth = --------------------------------------------------- * period |
c72efb658 writeback: fix po... |
1034 1035 1036 |
* * @written may have decreased due to account_page_redirty(). * Avoid underflowing @bw calculation. |
e98be2d59 writeback: bdi wr... |
1037 |
*/ |
a88a341a7 writeback: move b... |
1038 |
bw = written - min(written, wb->written_stamp); |
e98be2d59 writeback: bdi wr... |
1039 1040 |
bw *= HZ; if (unlikely(elapsed > period)) { |
0a5d1a7f6 mm/page-writeback... |
1041 |
bw = div64_ul(bw, elapsed); |
e98be2d59 writeback: bdi wr... |
1042 1043 1044 |
avg = bw; goto out; } |
a88a341a7 writeback: move b... |
1045 |
bw += (u64)wb->write_bandwidth * (period - elapsed); |
e98be2d59 writeback: bdi wr... |
1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 |
bw >>= ilog2(period); /* * one more level of smoothing, for filtering out sudden spikes */ if (avg > old && old >= (unsigned long)bw) avg -= (avg - old) >> 3; if (avg < old && old <= (unsigned long)bw) avg += (old - avg) >> 3; out: |
95a46c65e writeback: make b... |
1058 1059 1060 1061 1062 1063 1064 |
/* keep avg > 0 to guarantee that tot > 0 if there are dirty wbs */ avg = max(avg, 1LU); if (wb_has_dirty_io(wb)) { long delta = avg - wb->avg_write_bandwidth; WARN_ON_ONCE(atomic_long_add_return(delta, &wb->bdi->tot_write_bandwidth) <= 0); } |
a88a341a7 writeback: move b... |
1065 |
wb->write_bandwidth = bw; |
20792ebf3 writeback: use RE... |
1066 |
WRITE_ONCE(wb->avg_write_bandwidth, avg); |
e98be2d59 writeback: bdi wr... |
1067 |
} |
2bc00aef0 writeback: consol... |
1068 |
static void update_dirty_limit(struct dirty_throttle_control *dtc) |
c42843f2f writeback: introd... |
1069 |
{ |
e9f07dfd7 writeback: add di... |
1070 |
struct wb_domain *dom = dtc_dom(dtc); |
2bc00aef0 writeback: consol... |
1071 |
unsigned long thresh = dtc->thresh; |
dcc25ae76 writeback: move g... |
1072 |
unsigned long limit = dom->dirty_limit; |
c42843f2f writeback: introd... |
1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 |
/* * Follow up in one step. */ if (limit < thresh) { limit = thresh; goto update; } /* * Follow down slowly. Use the higher one as the target, because thresh * may drop below dirty. This is exactly the reason to introduce |
dcc25ae76 writeback: move g... |
1085 |
* dom->dirty_limit which is guaranteed to lie above the dirty pages. |
c42843f2f writeback: introd... |
1086 |
*/ |
2bc00aef0 writeback: consol... |
1087 |
thresh = max(thresh, dtc->dirty); |
c42843f2f writeback: introd... |
1088 1089 1090 1091 1092 1093 |
if (limit > thresh) { limit -= (limit - thresh) >> 5; goto update; } return; update: |
dcc25ae76 writeback: move g... |
1094 |
dom->dirty_limit = limit; |
c42843f2f writeback: introd... |
1095 |
} |
42dd235cb writeback: rename... |
1096 1097 |
static void domain_update_dirty_limit(struct dirty_throttle_control *dtc, unsigned long now) |
c42843f2f writeback: introd... |
1098 |
{ |
e9f07dfd7 writeback: add di... |
1099 |
struct wb_domain *dom = dtc_dom(dtc); |
c42843f2f writeback: introd... |
1100 1101 1102 1103 |
/* * check locklessly first to optimize away locking for the most time */ |
dcc25ae76 writeback: move g... |
1104 |
if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) |
c42843f2f writeback: introd... |
1105 |
return; |
dcc25ae76 writeback: move g... |
1106 1107 |
spin_lock(&dom->lock); if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) { |
2bc00aef0 writeback: consol... |
1108 |
update_dirty_limit(dtc); |
dcc25ae76 writeback: move g... |
1109 |
dom->dirty_limit_tstamp = now; |
c42843f2f writeback: introd... |
1110 |
} |
dcc25ae76 writeback: move g... |
1111 |
spin_unlock(&dom->lock); |
c42843f2f writeback: introd... |
1112 |
} |
be3ffa276 writeback: dirty ... |
1113 |
/* |
de1fff37b writeback: s/bdi/... |
1114 |
* Maintain wb->dirty_ratelimit, the base dirty throttle rate. |
be3ffa276 writeback: dirty ... |
1115 |
* |
de1fff37b writeback: s/bdi/... |
1116 |
* Normal wb tasks will be curbed at or below it in long term. |
be3ffa276 writeback: dirty ... |
1117 1118 |
* Obviously it should be around (write_bw / N) when there are N dd tasks. */ |
2bc00aef0 writeback: consol... |
1119 |
static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc, |
a88a341a7 writeback: move b... |
1120 1121 |
unsigned long dirtied, unsigned long elapsed) |
be3ffa276 writeback: dirty ... |
1122 |
{ |
2bc00aef0 writeback: consol... |
1123 1124 1125 |
struct bdi_writeback *wb = dtc->wb; unsigned long dirty = dtc->dirty; unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh); |
c7981433e writeback: make _... |
1126 |
unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh); |
7381131cb writeback: stabil... |
1127 |
unsigned long setpoint = (freerun + limit) / 2; |
a88a341a7 writeback: move b... |
1128 1129 |
unsigned long write_bw = wb->avg_write_bandwidth; unsigned long dirty_ratelimit = wb->dirty_ratelimit; |
be3ffa276 writeback: dirty ... |
1130 1131 1132 |
unsigned long dirty_rate; unsigned long task_ratelimit; unsigned long balanced_dirty_ratelimit; |
7381131cb writeback: stabil... |
1133 1134 |
unsigned long step; unsigned long x; |
d59b1087a mm/page-writeback... |
1135 |
unsigned long shift; |
be3ffa276 writeback: dirty ... |
1136 1137 1138 1139 1140 |
/* * The dirty rate will match the writeout rate in long term, except * when dirty pages are truncated by userspace or re-dirtied by FS. */ |
a88a341a7 writeback: move b... |
1141 |
dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed; |
be3ffa276 writeback: dirty ... |
1142 |
|
be3ffa276 writeback: dirty ... |
1143 1144 1145 1146 |
/* * task_ratelimit reflects each dd's dirty rate for the past 200ms. */ task_ratelimit = (u64)dirty_ratelimit * |
daddfa3cb writeback: add di... |
1147 |
dtc->pos_ratio >> RATELIMIT_CALC_SHIFT; |
be3ffa276 writeback: dirty ... |
1148 1149 1150 1151 |
task_ratelimit++; /* it helps rampup dirty_ratelimit from tiny values */ /* * A linear estimation of the "balanced" throttle rate. The theory is, |
de1fff37b writeback: s/bdi/... |
1152 |
* if there are N dd tasks, each throttled at task_ratelimit, the wb's |
be3ffa276 writeback: dirty ... |
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 |
* dirty_rate will be measured to be (N * task_ratelimit). So the below * formula will yield the balanced rate limit (write_bw / N). * * Note that the expanded form is not a pure rate feedback: * rate_(i+1) = rate_(i) * (write_bw / dirty_rate) (1) * but also takes pos_ratio into account: * rate_(i+1) = rate_(i) * (write_bw / dirty_rate) * pos_ratio (2) * * (1) is not realistic because pos_ratio also takes part in balancing * the dirty rate. Consider the state * pos_ratio = 0.5 (3) * rate = 2 * (write_bw / N) (4) * If (1) is used, it will stuck in that state! Because each dd will * be throttled at * task_ratelimit = pos_ratio * rate = (write_bw / N) (5) * yielding * dirty_rate = N * task_ratelimit = write_bw (6) * put (6) into (1) we get * rate_(i+1) = rate_(i) (7) * * So we end up using (2) to always keep * rate_(i+1) ~= (write_bw / N) (8) * regardless of the value of pos_ratio. As long as (8) is satisfied, * pos_ratio is able to drive itself to 1.0, which is not only where * the dirty count meet the setpoint, but also where the slope of * pos_ratio is most flat and hence task_ratelimit is least fluctuated. */ balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw, dirty_rate | 1); |
bdaac4902 writeback: balanc... |
1182 1183 1184 1185 1186 |
/* * balanced_dirty_ratelimit ~= (write_bw / N) <= write_bw */ if (unlikely(balanced_dirty_ratelimit > write_bw)) balanced_dirty_ratelimit = write_bw; |
be3ffa276 writeback: dirty ... |
1187 |
|
7381131cb writeback: stabil... |
1188 1189 1190 |
/* * We could safely do this and return immediately: * |
de1fff37b writeback: s/bdi/... |
1191 |
* wb->dirty_ratelimit = balanced_dirty_ratelimit; |
7381131cb writeback: stabil... |
1192 1193 |
* * However to get a more stable dirty_ratelimit, the below elaborated |
331cbdeed writeback: Fix so... |
1194 |
* code makes use of task_ratelimit to filter out singular points and |
7381131cb writeback: stabil... |
1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 |
* limit the step size. * * The below code essentially only uses the relative value of * * task_ratelimit - dirty_ratelimit * = (pos_ratio - 1) * dirty_ratelimit * * which reflects the direction and size of dirty position error. */ /* * dirty_ratelimit will follow balanced_dirty_ratelimit iff * task_ratelimit is on the same side of dirty_ratelimit, too. * For example, when * - dirty_ratelimit > balanced_dirty_ratelimit * - dirty_ratelimit > task_ratelimit (dirty pages are above setpoint) * lowering dirty_ratelimit will help meet both the position and rate * control targets. Otherwise, don't update dirty_ratelimit if it will * only help meet the rate target. After all, what the users ultimately * feel and care are stable dirty rate and small position error. * * |task_ratelimit - dirty_ratelimit| is used to limit the step size |
331cbdeed writeback: Fix so... |
1217 |
* and filter out the singular points of balanced_dirty_ratelimit. Which |
7381131cb writeback: stabil... |
1218 1219 1220 1221 1222 |
* keeps jumping around randomly and can even leap far away at times * due to the small 200ms estimation period of dirty_rate (we want to * keep that period small to reduce time lags). */ step = 0; |
5a5374856 mm/page-writeback... |
1223 1224 |
/* |
de1fff37b writeback: s/bdi/... |
1225 |
* For strictlimit case, calculations above were based on wb counters |
a88a341a7 writeback: move b... |
1226 |
* and limits (starting from pos_ratio = wb_position_ratio() and up to |
5a5374856 mm/page-writeback... |
1227 |
* balanced_dirty_ratelimit = task_ratelimit * write_bw / dirty_rate). |
de1fff37b writeback: s/bdi/... |
1228 1229 |
* Hence, to calculate "step" properly, we have to use wb_dirty as * "dirty" and wb_setpoint as "setpoint". |
5a5374856 mm/page-writeback... |
1230 |
* |
de1fff37b writeback: s/bdi/... |
1231 1232 |
* We rampup dirty_ratelimit forcibly if wb_dirty is low because * it's possible that wb_thresh is close to zero due to inactivity |
970fb01ad writeback: add di... |
1233 |
* of backing device. |
5a5374856 mm/page-writeback... |
1234 |
*/ |
a88a341a7 writeback: move b... |
1235 |
if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) { |
2bc00aef0 writeback: consol... |
1236 1237 1238 |
dirty = dtc->wb_dirty; if (dtc->wb_dirty < 8) setpoint = dtc->wb_dirty + 1; |
5a5374856 mm/page-writeback... |
1239 |
else |
970fb01ad writeback: add di... |
1240 |
setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2; |
5a5374856 mm/page-writeback... |
1241 |
} |
7381131cb writeback: stabil... |
1242 |
if (dirty < setpoint) { |
a88a341a7 writeback: move b... |
1243 |
x = min3(wb->balanced_dirty_ratelimit, |
7c809968f mm/page-writeback... |
1244 |
balanced_dirty_ratelimit, task_ratelimit); |
7381131cb writeback: stabil... |
1245 1246 1247 |
if (dirty_ratelimit < x) step = x - dirty_ratelimit; } else { |
a88a341a7 writeback: move b... |
1248 |
x = max3(wb->balanced_dirty_ratelimit, |
7c809968f mm/page-writeback... |
1249 |
balanced_dirty_ratelimit, task_ratelimit); |
7381131cb writeback: stabil... |
1250 1251 1252 1253 1254 1255 1256 1257 1258 |
if (dirty_ratelimit > x) step = dirty_ratelimit - x; } /* * Don't pursue 100% rate matching. It's impossible since the balanced * rate itself is constantly fluctuating. So decrease the track speed * when it gets close to the target. Helps eliminate pointless tremors. */ |
d59b1087a mm/page-writeback... |
1259 1260 1261 1262 1263 |
shift = dirty_ratelimit / (2 * step + 1); if (shift < BITS_PER_LONG) step = DIV_ROUND_UP(step >> shift, 8); else step = 0; |
7381131cb writeback: stabil... |
1264 1265 1266 1267 1268 |
if (dirty_ratelimit < balanced_dirty_ratelimit) dirty_ratelimit += step; else dirty_ratelimit -= step; |
20792ebf3 writeback: use RE... |
1269 |
WRITE_ONCE(wb->dirty_ratelimit, max(dirty_ratelimit, 1UL)); |
a88a341a7 writeback: move b... |
1270 |
wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit; |
b48c104d2 writeback: trace ... |
1271 |
|
5634cc2aa writeback: update... |
1272 |
trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit); |
be3ffa276 writeback: dirty ... |
1273 |
} |
c2aa723a6 writeback: implem... |
1274 1275 |
static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc, struct dirty_throttle_control *mdtc, |
8a7317995 writeback: reorga... |
1276 |
bool update_ratelimit) |
e98be2d59 writeback: bdi wr... |
1277 |
{ |
c2aa723a6 writeback: implem... |
1278 |
struct bdi_writeback *wb = gdtc->wb; |
e98be2d59 writeback: bdi wr... |
1279 |
unsigned long now = jiffies; |
45a2966fd writeback: fix ba... |
1280 |
unsigned long elapsed; |
be3ffa276 writeback: dirty ... |
1281 |
unsigned long dirtied; |
e98be2d59 writeback: bdi wr... |
1282 |
unsigned long written; |
45a2966fd writeback: fix ba... |
1283 |
spin_lock(&wb->list_lock); |
8a7317995 writeback: reorga... |
1284 |
|
e98be2d59 writeback: bdi wr... |
1285 |
/* |
45a2966fd writeback: fix ba... |
1286 1287 1288 1289 |
* Lockless checks for elapsed time are racy and delayed update after * IO completion doesn't do it at all (to make sure written pages are * accounted reasonably quickly). Make sure elapsed >= 1 to avoid * division errors. |
e98be2d59 writeback: bdi wr... |
1290 |
*/ |
45a2966fd writeback: fix ba... |
1291 |
elapsed = max(now - wb->bw_time_stamp, 1UL); |
a88a341a7 writeback: move b... |
1292 1293 |
dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]); written = percpu_counter_read(&wb->stat[WB_WRITTEN]); |
e98be2d59 writeback: bdi wr... |
1294 |
|
8a7317995 writeback: reorga... |
1295 |
if (update_ratelimit) { |
42dd235cb writeback: rename... |
1296 |
domain_update_dirty_limit(gdtc, now); |
c2aa723a6 writeback: implem... |
1297 1298 1299 1300 1301 1302 1303 |
wb_update_dirty_ratelimit(gdtc, dirtied, elapsed); /* * @mdtc is always NULL if !CGROUP_WRITEBACK but the * compiler has no way to figure that out. Help it. */ if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) { |
42dd235cb writeback: rename... |
1304 |
domain_update_dirty_limit(mdtc, now); |
c2aa723a6 writeback: implem... |
1305 1306 |
wb_update_dirty_ratelimit(mdtc, dirtied, elapsed); } |
be3ffa276 writeback: dirty ... |
1307 |
} |
a88a341a7 writeback: move b... |
1308 |
wb_update_write_bandwidth(wb, elapsed, written); |
e98be2d59 writeback: bdi wr... |
1309 |
|
a88a341a7 writeback: move b... |
1310 1311 |
wb->dirtied_stamp = dirtied; wb->written_stamp = written; |
20792ebf3 writeback: use RE... |
1312 |
WRITE_ONCE(wb->bw_time_stamp, now); |
45a2966fd writeback: fix ba... |
1313 |
spin_unlock(&wb->list_lock); |
e98be2d59 writeback: bdi wr... |
1314 |
} |
45a2966fd writeback: fix ba... |
1315 |
void wb_update_bandwidth(struct bdi_writeback *wb) |
e98be2d59 writeback: bdi wr... |
1316 |
{ |
2bc00aef0 writeback: consol... |
1317 |
struct dirty_throttle_control gdtc = { GDTC_INIT(wb) }; |
fee468fdf writeback: reliab... |
1318 |
__wb_update_bandwidth(&gdtc, NULL, false); |
fee468fdf writeback: reliab... |
1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 |
} /* Interval after which we consider wb idle and don't estimate bandwidth */ #define WB_BANDWIDTH_IDLE_JIF (HZ) static void wb_bandwidth_estimate_start(struct bdi_writeback *wb) { unsigned long now = jiffies; unsigned long elapsed = now - READ_ONCE(wb->bw_time_stamp); if (elapsed > WB_BANDWIDTH_IDLE_JIF && !atomic_read(&wb->writeback_inodes)) { spin_lock(&wb->list_lock); wb->dirtied_stamp = wb_stat(wb, WB_DIRTIED); wb->written_stamp = wb_stat(wb, WB_WRITTEN); |
20792ebf3 writeback: use RE... |
1334 |
WRITE_ONCE(wb->bw_time_stamp, now); |
fee468fdf writeback: reliab... |
1335 1336 |
spin_unlock(&wb->list_lock); } |
e98be2d59 writeback: bdi wr... |
1337 |
} |
1da177e4c Linux-2.6.12-rc2 |
1338 |
/* |
d0e1d66b5 writeback: remove... |
1339 |
* After a task dirtied this many pages, balance_dirty_pages_ratelimited() |
9d823e8f6 writeback: per ta... |
1340 1341 1342 |
* will look to see if it needs to start dirty throttling. * * If dirty_poll_interval is too low, big NUMA machines will call the expensive |
c41f012ad mm: rename global... |
1343 |
* global_zone_page_state() too often. So scale it near-sqrt to the safety margin |
9d823e8f6 writeback: per ta... |
1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 |
* (the number of pages we may dirty without exceeding the dirty limits). */ static unsigned long dirty_poll_interval(unsigned long dirty, unsigned long thresh) { if (thresh > dirty) return 1UL << (ilog2(thresh - dirty) >> 1); return 1; } |
a88a341a7 writeback: move b... |
1354 |
static unsigned long wb_max_pause(struct bdi_writeback *wb, |
de1fff37b writeback: s/bdi/... |
1355 |
unsigned long wb_dirty) |
c8462cc9d writeback: limit ... |
1356 |
{ |
20792ebf3 writeback: use RE... |
1357 |
unsigned long bw = READ_ONCE(wb->avg_write_bandwidth); |
e3b6c655b writeback: fix ne... |
1358 |
unsigned long t; |
c8462cc9d writeback: limit ... |
1359 |
|
7ccb9ad53 writeback: max, m... |
1360 1361 1362 1363 1364 1365 1366 |
/* * Limit pause time for small memory systems. If sleeping for too long * time, a small pool of dirty/writeback pages may go empty and disk go * idle. * * 8 serves as the safety ratio. */ |
de1fff37b writeback: s/bdi/... |
1367 |
t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8)); |
7ccb9ad53 writeback: max, m... |
1368 |
t++; |
e3b6c655b writeback: fix ne... |
1369 |
return min_t(unsigned long, t, MAX_PAUSE); |
7ccb9ad53 writeback: max, m... |
1370 |
} |
a88a341a7 writeback: move b... |
1371 1372 1373 1374 1375 |
static long wb_min_pause(struct bdi_writeback *wb, long max_pause, unsigned long task_ratelimit, unsigned long dirty_ratelimit, int *nr_dirtied_pause) |
c8462cc9d writeback: limit ... |
1376 |
{ |
20792ebf3 writeback: use RE... |
1377 1378 |
long hi = ilog2(READ_ONCE(wb->avg_write_bandwidth)); long lo = ilog2(READ_ONCE(wb->dirty_ratelimit)); |
7ccb9ad53 writeback: max, m... |
1379 1380 1381 |
long t; /* target pause */ long pause; /* estimated next pause */ int pages; /* target nr_dirtied_pause */ |
c8462cc9d writeback: limit ... |
1382 |
|
7ccb9ad53 writeback: max, m... |
1383 1384 |
/* target for 10ms pause on 1-dd case */ t = max(1, HZ / 100); |
c8462cc9d writeback: limit ... |
1385 1386 1387 1388 1389 |
/* * Scale up pause time for concurrent dirtiers in order to reduce CPU * overheads. * |
7ccb9ad53 writeback: max, m... |
1390 |
* (N * 10ms) on 2^N concurrent tasks. |
c8462cc9d writeback: limit ... |
1391 1392 |
*/ if (hi > lo) |
7ccb9ad53 writeback: max, m... |
1393 |
t += (hi - lo) * (10 * HZ) / 1024; |
c8462cc9d writeback: limit ... |
1394 1395 |
/* |
7ccb9ad53 writeback: max, m... |
1396 1397 1398 1399 1400 1401 1402 1403 |
* This is a bit convoluted. We try to base the next nr_dirtied_pause * on the much more stable dirty_ratelimit. However the next pause time * will be computed based on task_ratelimit and the two rate limits may * depart considerably at some time. Especially if task_ratelimit goes * below dirty_ratelimit/2 and the target pause is max_pause, the next * pause time will be max_pause*2 _trimmed down_ to max_pause. As a * result task_ratelimit won't be executed faithfully, which could * eventually bring down dirty_ratelimit. |
c8462cc9d writeback: limit ... |
1404 |
* |
7ccb9ad53 writeback: max, m... |
1405 1406 1407 1408 1409 1410 1411 |
* We apply two rules to fix it up: * 1) try to estimate the next pause time and if necessary, use a lower * nr_dirtied_pause so as not to exceed max_pause. When this happens, * nr_dirtied_pause will be "dancing" with task_ratelimit. * 2) limit the target pause time to max_pause/2, so that the normal * small fluctuations of task_ratelimit won't trigger rule (1) and * nr_dirtied_pause will remain as stable as dirty_ratelimit. |
c8462cc9d writeback: limit ... |
1412 |
*/ |
7ccb9ad53 writeback: max, m... |
1413 1414 |
t = min(t, 1 + max_pause / 2); pages = dirty_ratelimit * t / roundup_pow_of_two(HZ); |
c8462cc9d writeback: limit ... |
1415 1416 |
/* |
5b9b35743 writeback: avoid ... |
1417 1418 1419 1420 1421 1422 |
* Tiny nr_dirtied_pause is found to hurt I/O performance in the test * case fio-mmap-randwrite-64k, which does 16*{sync read, async write}. * When the 16 consecutive reads are often interrupted by some dirty * throttling pause during the async writes, cfq will go into idles * (deadline is fine). So push nr_dirtied_pause as high as possible * until reaches DIRTY_POLL_THRESH=32 pages. |
c8462cc9d writeback: limit ... |
1423 |
*/ |
5b9b35743 writeback: avoid ... |
1424 1425 1426 1427 1428 1429 1430 1431 |
if (pages < DIRTY_POLL_THRESH) { t = max_pause; pages = dirty_ratelimit * t / roundup_pow_of_two(HZ); if (pages > DIRTY_POLL_THRESH) { pages = DIRTY_POLL_THRESH; t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit; } } |
7ccb9ad53 writeback: max, m... |
1432 1433 1434 1435 1436 |
pause = HZ * pages / (task_ratelimit + 1); if (pause > max_pause) { t = max_pause; pages = task_ratelimit * t / roundup_pow_of_two(HZ); } |
c8462cc9d writeback: limit ... |
1437 |
|
7ccb9ad53 writeback: max, m... |
1438 |
*nr_dirtied_pause = pages; |
c8462cc9d writeback: limit ... |
1439 |
/* |
7ccb9ad53 writeback: max, m... |
1440 |
* The minimal pause time will normally be half the target pause time. |
c8462cc9d writeback: limit ... |
1441 |
*/ |
5b9b35743 writeback: avoid ... |
1442 |
return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t; |
c8462cc9d writeback: limit ... |
1443 |
} |
970fb01ad writeback: add di... |
1444 |
static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) |
5a5374856 mm/page-writeback... |
1445 |
{ |
2bc00aef0 writeback: consol... |
1446 |
struct bdi_writeback *wb = dtc->wb; |
93f78d882 writeback: move b... |
1447 |
unsigned long wb_reclaimable; |
5a5374856 mm/page-writeback... |
1448 1449 |
/* |
de1fff37b writeback: s/bdi/... |
1450 |
* wb_thresh is not treated as some limiting factor as |
5a5374856 mm/page-writeback... |
1451 |
* dirty_thresh, due to reasons |
de1fff37b writeback: s/bdi/... |
1452 |
* - in JBOD setup, wb_thresh can fluctuate a lot |
5a5374856 mm/page-writeback... |
1453 |
* - in a system with HDD and USB key, the USB key may somehow |
de1fff37b writeback: s/bdi/... |
1454 1455 |
* go into state (wb_dirty >> wb_thresh) either because * wb_dirty starts high, or because wb_thresh drops low. |
5a5374856 mm/page-writeback... |
1456 |
* In this case we don't want to hard throttle the USB key |
de1fff37b writeback: s/bdi/... |
1457 1458 |
* dirtiers for 100 seconds until wb_dirty drops under * wb_thresh. Instead the auxiliary wb control line in |
a88a341a7 writeback: move b... |
1459 |
* wb_position_ratio() will let the dirtier task progress |
de1fff37b writeback: s/bdi/... |
1460 |
* at some rate <= (write_bw / 2) for bringing down wb_dirty. |
5a5374856 mm/page-writeback... |
1461 |
*/ |
b1cbc6d40 writeback: make _... |
1462 |
dtc->wb_thresh = __wb_calc_thresh(dtc); |
970fb01ad writeback: add di... |
1463 1464 |
dtc->wb_bg_thresh = dtc->thresh ? div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; |
5a5374856 mm/page-writeback... |
1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 |
/* * In order to avoid the stacked BDI deadlock we need * to ensure we accurately count the 'dirty' pages when * the threshold is low. * * Otherwise it would be possible to get thresh+n pages * reported dirty, even though there are thresh-m pages * actually dirty; with m+n sitting in the percpu * deltas. */ |
2bce774e8 writeback: remove... |
1476 |
if (dtc->wb_thresh < 2 * wb_stat_error()) { |
93f78d882 writeback: move b... |
1477 |
wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE); |
2bc00aef0 writeback: consol... |
1478 |
dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK); |
5a5374856 mm/page-writeback... |
1479 |
} else { |
93f78d882 writeback: move b... |
1480 |
wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE); |
2bc00aef0 writeback: consol... |
1481 |
dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK); |
5a5374856 mm/page-writeback... |
1482 1483 |
} } |
9d823e8f6 writeback: per ta... |
1484 |
/* |
1da177e4c Linux-2.6.12-rc2 |
1485 1486 |
* balance_dirty_pages() must be called by processes which are generating dirty * data. It looks at the number of dirty pages in the machine and will force |
143dfe861 writeback: IO-les... |
1487 |
* the caller to wait once crossing the (background_thresh + dirty_thresh) / 2. |
5b0830cb9 writeback: get ri... |
1488 1489 |
* If we're over `background_thresh' then the writeback threads are woken to * perform some writeout. |
1da177e4c Linux-2.6.12-rc2 |
1490 |
*/ |
4c578dce5 mm/page-writeback... |
1491 |
static void balance_dirty_pages(struct bdi_writeback *wb, |
143dfe861 writeback: IO-les... |
1492 |
unsigned long pages_dirtied) |
1da177e4c Linux-2.6.12-rc2 |
1493 |
{ |
2bc00aef0 writeback: consol... |
1494 |
struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) }; |
c2aa723a6 writeback: implem... |
1495 |
struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) }; |
2bc00aef0 writeback: consol... |
1496 |
struct dirty_throttle_control * const gdtc = &gdtc_stor; |
c2aa723a6 writeback: implem... |
1497 1498 1499 |
struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ? &mdtc_stor : NULL; struct dirty_throttle_control *sdtc; |
8d92890bd mm/writeback: dis... |
1500 |
unsigned long nr_reclaimable; /* = file_dirty */ |
83712358b writeback: dirty ... |
1501 |
long period; |
7ccb9ad53 writeback: max, m... |
1502 1503 1504 1505 |
long pause; long max_pause; long min_pause; int nr_dirtied_pause; |
e50e37201 writeback: balanc... |
1506 |
bool dirty_exceeded = false; |
143dfe861 writeback: IO-les... |
1507 |
unsigned long task_ratelimit; |
7ccb9ad53 writeback: max, m... |
1508 |
unsigned long dirty_ratelimit; |
dfb8ae567 writeback: let ba... |
1509 |
struct backing_dev_info *bdi = wb->bdi; |
5a5374856 mm/page-writeback... |
1510 |
bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT; |
e98be2d59 writeback: bdi wr... |
1511 |
unsigned long start_time = jiffies; |
1da177e4c Linux-2.6.12-rc2 |
1512 1513 |
for (;;) { |
83712358b writeback: dirty ... |
1514 |
unsigned long now = jiffies; |
2bc00aef0 writeback: consol... |
1515 |
unsigned long dirty, thresh, bg_thresh; |
50e55bf62 mm/page-writeback... |
1516 1517 1518 |
unsigned long m_dirty = 0; /* stop bogus uninit warnings */ unsigned long m_thresh = 0; unsigned long m_bg_thresh = 0; |
83712358b writeback: dirty ... |
1519 |
|
8d92890bd mm/writeback: dis... |
1520 |
nr_reclaimable = global_node_page_state(NR_FILE_DIRTY); |
9fc3a43e1 writeback: separa... |
1521 |
gdtc->avail = global_dirtyable_memory(); |
11fb99898 mm: move most fil... |
1522 |
gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK); |
5fce25a9d mm: speed up writ... |
1523 |
|
9fc3a43e1 writeback: separa... |
1524 |
domain_dirty_limits(gdtc); |
16c4042f0 writeback: avoid ... |
1525 |
|
5a5374856 mm/page-writeback... |
1526 |
if (unlikely(strictlimit)) { |
970fb01ad writeback: add di... |
1527 |
wb_dirty_limits(gdtc); |
5a5374856 mm/page-writeback... |
1528 |
|
2bc00aef0 writeback: consol... |
1529 1530 |
dirty = gdtc->wb_dirty; thresh = gdtc->wb_thresh; |
970fb01ad writeback: add di... |
1531 |
bg_thresh = gdtc->wb_bg_thresh; |
5a5374856 mm/page-writeback... |
1532 |
} else { |
2bc00aef0 writeback: consol... |
1533 1534 1535 |
dirty = gdtc->dirty; thresh = gdtc->thresh; bg_thresh = gdtc->bg_thresh; |
5a5374856 mm/page-writeback... |
1536 |
} |
c2aa723a6 writeback: implem... |
1537 |
if (mdtc) { |
c5edf9cdc writeback: fix in... |
1538 |
unsigned long filepages, headroom, writeback; |
c2aa723a6 writeback: implem... |
1539 1540 1541 1542 1543 |
/* * If @wb belongs to !root memcg, repeat the same * basic calculations for the memcg domain. */ |
c5edf9cdc writeback: fix in... |
1544 1545 |
mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty, &writeback); |
c2aa723a6 writeback: implem... |
1546 |
mdtc->dirty += writeback; |
c5edf9cdc writeback: fix in... |
1547 |
mdtc_calc_avail(mdtc, filepages, headroom); |
c2aa723a6 writeback: implem... |
1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 |
domain_dirty_limits(mdtc); if (unlikely(strictlimit)) { wb_dirty_limits(mdtc); m_dirty = mdtc->wb_dirty; m_thresh = mdtc->wb_thresh; m_bg_thresh = mdtc->wb_bg_thresh; } else { m_dirty = mdtc->dirty; m_thresh = mdtc->thresh; m_bg_thresh = mdtc->bg_thresh; } |
5a5374856 mm/page-writeback... |
1561 |
} |
16c4042f0 writeback: avoid ... |
1562 1563 1564 |
/* * Throttle it only when the background writeback cannot * catch-up. This avoids (excessively) small writeouts |
de1fff37b writeback: s/bdi/... |
1565 |
* when the wb limits are ramping up in case of !strictlimit. |
5a5374856 mm/page-writeback... |
1566 |
* |
de1fff37b writeback: s/bdi/... |
1567 1568 |
* In strictlimit case make decision based on the wb counters * and limits. Small writeouts when the wb limits are ramping |
5a5374856 mm/page-writeback... |
1569 |
* up are the price we consciously pay for strictlimit-ing. |
c2aa723a6 writeback: implem... |
1570 1571 1572 |
* * If memcg domain is in effect, @dirty should be under * both global and memcg freerun ceilings. |
16c4042f0 writeback: avoid ... |
1573 |
*/ |
c2aa723a6 writeback: implem... |
1574 1575 1576 |
if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) && (!mdtc || m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) { |
a37b0715d mm/writeback: rep... |
1577 1578 1579 1580 1581 1582 |
unsigned long intv; unsigned long m_intv; free_running: intv = dirty_poll_interval(dirty, thresh); m_intv = ULONG_MAX; |
c2aa723a6 writeback: implem... |
1583 |
|
83712358b writeback: dirty ... |
1584 1585 |
current->dirty_paused_when = now; current->nr_dirtied = 0; |
c2aa723a6 writeback: implem... |
1586 1587 1588 |
if (mdtc) m_intv = dirty_poll_interval(m_dirty, m_thresh); current->nr_dirtied_pause = min(intv, m_intv); |
16c4042f0 writeback: avoid ... |
1589 |
break; |
83712358b writeback: dirty ... |
1590 |
} |
16c4042f0 writeback: avoid ... |
1591 |
|
bc05873dc writeback: make w... |
1592 |
if (unlikely(!writeback_in_progress(wb))) |
9ecf4866c writeback: make b... |
1593 |
wb_start_background_writeback(wb); |
143dfe861 writeback: IO-les... |
1594 |
|
97b27821b writeback, memcg:... |
1595 |
mem_cgroup_flush_foreign(wb); |
c2aa723a6 writeback: implem... |
1596 1597 1598 1599 |
/* * Calculate global domain's pos_ratio and select the * global dtc by default. */ |
a37b0715d mm/writeback: rep... |
1600 |
if (!strictlimit) { |
970fb01ad writeback: add di... |
1601 |
wb_dirty_limits(gdtc); |
5fce25a9d mm: speed up writ... |
1602 |
|
a37b0715d mm/writeback: rep... |
1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 |
if ((current->flags & PF_LOCAL_THROTTLE) && gdtc->wb_dirty < dirty_freerun_ceiling(gdtc->wb_thresh, gdtc->wb_bg_thresh)) /* * LOCAL_THROTTLE tasks must not be throttled * when below the per-wb freerun ceiling. */ goto free_running; } |
2bc00aef0 writeback: consol... |
1613 1614 |
dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) && ((gdtc->dirty > gdtc->thresh) || strictlimit); |
daddfa3cb writeback: add di... |
1615 1616 |
wb_position_ratio(gdtc); |
c2aa723a6 writeback: implem... |
1617 1618 1619 1620 1621 1622 1623 1624 1625 |
sdtc = gdtc; if (mdtc) { /* * If memcg domain is in effect, calculate its * pos_ratio. @wb should satisfy constraints from * both global and memcg domains. Choose the one * w/ lower pos_ratio. */ |
a37b0715d mm/writeback: rep... |
1626 |
if (!strictlimit) { |
c2aa723a6 writeback: implem... |
1627 |
wb_dirty_limits(mdtc); |
a37b0715d mm/writeback: rep... |
1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 |
if ((current->flags & PF_LOCAL_THROTTLE) && mdtc->wb_dirty < dirty_freerun_ceiling(mdtc->wb_thresh, mdtc->wb_bg_thresh)) /* * LOCAL_THROTTLE tasks must not be * throttled when below the per-wb * freerun ceiling. */ goto free_running; } |
c2aa723a6 writeback: implem... |
1639 1640 1641 1642 1643 1644 1645 |
dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) && ((mdtc->dirty > mdtc->thresh) || strictlimit); wb_position_ratio(mdtc); if (mdtc->pos_ratio < gdtc->pos_ratio) sdtc = mdtc; } |
daddfa3cb writeback: add di... |
1646 |
|
a88a341a7 writeback: move b... |
1647 1648 |
if (dirty_exceeded && !wb->dirty_exceeded) wb->dirty_exceeded = 1; |
1da177e4c Linux-2.6.12-rc2 |
1649 |
|
20792ebf3 writeback: use RE... |
1650 |
if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) + |
45a2966fd writeback: fix ba... |
1651 |
BANDWIDTH_INTERVAL)) |
fee468fdf writeback: reliab... |
1652 |
__wb_update_bandwidth(gdtc, mdtc, true); |
e98be2d59 writeback: bdi wr... |
1653 |
|
c2aa723a6 writeback: implem... |
1654 |
/* throttle according to the chosen dtc */ |
20792ebf3 writeback: use RE... |
1655 |
dirty_ratelimit = READ_ONCE(wb->dirty_ratelimit); |
c2aa723a6 writeback: implem... |
1656 |
task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >> |
3a73dbbc9 writeback: fix un... |
1657 |
RATELIMIT_CALC_SHIFT; |
c2aa723a6 writeback: implem... |
1658 |
max_pause = wb_max_pause(wb, sdtc->wb_dirty); |
a88a341a7 writeback: move b... |
1659 1660 1661 |
min_pause = wb_min_pause(wb, max_pause, task_ratelimit, dirty_ratelimit, &nr_dirtied_pause); |
7ccb9ad53 writeback: max, m... |
1662 |
|
3a73dbbc9 writeback: fix un... |
1663 |
if (unlikely(task_ratelimit == 0)) { |
83712358b writeback: dirty ... |
1664 |
period = max_pause; |
c8462cc9d writeback: limit ... |
1665 |
pause = max_pause; |
143dfe861 writeback: IO-les... |
1666 |
goto pause; |
04fbfdc14 mm: per device di... |
1667 |
} |
83712358b writeback: dirty ... |
1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 |
period = HZ * pages_dirtied / task_ratelimit; pause = period; if (current->dirty_paused_when) pause -= now - current->dirty_paused_when; /* * For less than 1s think time (ext3/4 may block the dirtier * for up to 800ms from time to time on 1-HDD; so does xfs, * however at much less frequency), try to compensate it in * future periods by updating the virtual time; otherwise just * do a reset, as it may be a light dirtier. */ |
7ccb9ad53 writeback: max, m... |
1679 |
if (pause < min_pause) { |
5634cc2aa writeback: update... |
1680 |
trace_balance_dirty_pages(wb, |
c2aa723a6 writeback: implem... |
1681 1682 1683 1684 1685 |
sdtc->thresh, sdtc->bg_thresh, sdtc->dirty, sdtc->wb_thresh, sdtc->wb_dirty, |
ece13ac31 writeback: trace ... |
1686 1687 1688 |
dirty_ratelimit, task_ratelimit, pages_dirtied, |
83712358b writeback: dirty ... |
1689 |
period, |
7ccb9ad53 writeback: max, m... |
1690 |
min(pause, 0L), |
ece13ac31 writeback: trace ... |
1691 |
start_time); |
83712358b writeback: dirty ... |
1692 1693 1694 1695 1696 1697 |
if (pause < -HZ) { current->dirty_paused_when = now; current->nr_dirtied = 0; } else if (period) { current->dirty_paused_when += period; current->nr_dirtied = 0; |
7ccb9ad53 writeback: max, m... |
1698 1699 |
} else if (current->nr_dirtied_pause <= pages_dirtied) current->nr_dirtied_pause += pages_dirtied; |
57fc978cf writeback: contro... |
1700 |
break; |
04fbfdc14 mm: per device di... |
1701 |
} |
7ccb9ad53 writeback: max, m... |
1702 1703 1704 1705 1706 |
if (unlikely(pause > max_pause)) { /* for occasional dropped task_ratelimit */ now += min(pause - max_pause, max_pause); pause = max_pause; } |
143dfe861 writeback: IO-les... |
1707 1708 |
pause: |
5634cc2aa writeback: update... |
1709 |
trace_balance_dirty_pages(wb, |
c2aa723a6 writeback: implem... |
1710 1711 1712 1713 1714 |
sdtc->thresh, sdtc->bg_thresh, sdtc->dirty, sdtc->wb_thresh, sdtc->wb_dirty, |
ece13ac31 writeback: trace ... |
1715 1716 1717 |
dirty_ratelimit, task_ratelimit, pages_dirtied, |
83712358b writeback: dirty ... |
1718 |
period, |
ece13ac31 writeback: trace ... |
1719 1720 |
pause, start_time); |
499d05ecf mm: Make task in ... |
1721 |
__set_current_state(TASK_KILLABLE); |
b57d74aff writeback: track ... |
1722 |
wb->dirty_sleep = now; |
d25105e89 writeback: accoun... |
1723 |
io_schedule_timeout(pause); |
87c6a9b25 writeback: make b... |
1724 |
|
83712358b writeback: dirty ... |
1725 1726 |
current->dirty_paused_when = now + pause; current->nr_dirtied = 0; |
7ccb9ad53 writeback: max, m... |
1727 |
current->nr_dirtied_pause = nr_dirtied_pause; |
83712358b writeback: dirty ... |
1728 |
|
ffd1f609a writeback: introd... |
1729 |
/* |
2bc00aef0 writeback: consol... |
1730 1731 |
* This is typically equal to (dirty < thresh) and can also * keep "1000+ dd on a slow USB stick" under control. |
ffd1f609a writeback: introd... |
1732 |
*/ |
1df647197 writeback: hard t... |
1733 |
if (task_ratelimit) |
ffd1f609a writeback: introd... |
1734 |
break; |
499d05ecf mm: Make task in ... |
1735 |
|
c5c6343c4 writeback: permit... |
1736 |
/* |
f0953a1bb mm: fix typos in ... |
1737 |
* In the case of an unresponsive NFS server and the NFS dirty |
de1fff37b writeback: s/bdi/... |
1738 |
* pages exceeds dirty_thresh, give the other good wb's a pipe |
c5c6343c4 writeback: permit... |
1739 1740 |
* to go through, so that tasks on them still remain responsive. * |
3f8b6fb7f scripts/spelling.... |
1741 |
* In theory 1 page is enough to keep the consumer-producer |
c5c6343c4 writeback: permit... |
1742 |
* pipe going: the flusher cleans 1 page => the task dirties 1 |
de1fff37b writeback: s/bdi/... |
1743 |
* more page. However wb_dirty has accounting errors. So use |
93f78d882 writeback: move b... |
1744 |
* the larger and more IO friendly wb_stat_error. |
c5c6343c4 writeback: permit... |
1745 |
*/ |
2bce774e8 writeback: remove... |
1746 |
if (sdtc->wb_dirty <= wb_stat_error()) |
c5c6343c4 writeback: permit... |
1747 |
break; |
499d05ecf mm: Make task in ... |
1748 1749 |
if (fatal_signal_pending(current)) break; |
1da177e4c Linux-2.6.12-rc2 |
1750 |
} |
a88a341a7 writeback: move b... |
1751 1752 |
if (!dirty_exceeded && wb->dirty_exceeded) wb->dirty_exceeded = 0; |
1da177e4c Linux-2.6.12-rc2 |
1753 |
|
bc05873dc writeback: make w... |
1754 |
if (writeback_in_progress(wb)) |
5b0830cb9 writeback: get ri... |
1755 |
return; |
1da177e4c Linux-2.6.12-rc2 |
1756 1757 1758 1759 1760 1761 1762 1763 1764 |
/* * In laptop mode, we wait until hitting the higher threshold before * starting background writeout, and then write out all the way down * to the lower threshold. So slow writers cause minimal disk activity. * * In normal mode, we start background writeout at the lower * background_thresh, to keep the amount of dirty memory low. */ |
143dfe861 writeback: IO-les... |
1765 1766 |
if (laptop_mode) return; |
2bc00aef0 writeback: consol... |
1767 |
if (nr_reclaimable > gdtc->bg_thresh) |
9ecf4866c writeback: make b... |
1768 |
wb_start_background_writeback(wb); |
1da177e4c Linux-2.6.12-rc2 |
1769 |
} |
9d823e8f6 writeback: per ta... |
1770 |
static DEFINE_PER_CPU(int, bdp_ratelimits); |
245b2e70e percpu: clean up ... |
1771 |
|
54848d73f writeback: charge... |
1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 |
/* * Normal tasks are throttled by * loop { * dirty tsk->nr_dirtied_pause pages; * take a snap in balance_dirty_pages(); * } * However there is a worst case. If every task exit immediately when dirtied * (tsk->nr_dirtied_pause - 1) pages, balance_dirty_pages() will never be * called to throttle the page dirties. The solution is to save the not yet * throttled page dirties in dirty_throttle_leaks on task exit and charge them * randomly into the running tasks. This works well for the above worst case, * as the new task will pick up and accumulate the old task's leaked dirty * count and eventually get throttled. */ DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0; |
1da177e4c Linux-2.6.12-rc2 |
1787 |
/** |
d0e1d66b5 writeback: remove... |
1788 |
* balance_dirty_pages_ratelimited - balance dirty memory state |
67be2dd1b [PATCH] DocBook: ... |
1789 |
* @mapping: address_space which was dirtied |
1da177e4c Linux-2.6.12-rc2 |
1790 1791 1792 1793 1794 |
* * Processes which are dirtying memory should call in here once for each page * which was newly dirtied. The function will periodically check the system's * dirty state and will initiate writeback if needed. * |
5defd497e mm: page-writebac... |
1795 1796 1797 |
* Once we're over the dirty memory limit we decrease the ratelimiting * by a lot, to prevent individual processes from overshooting the limit * by (ratelimit_pages) each. |
1da177e4c Linux-2.6.12-rc2 |
1798 |
*/ |
d0e1d66b5 writeback: remove... |
1799 |
void balance_dirty_pages_ratelimited(struct address_space *mapping) |
1da177e4c Linux-2.6.12-rc2 |
1800 |
{ |
dfb8ae567 writeback: let ba... |
1801 1802 1803 |
struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); struct bdi_writeback *wb = NULL; |
9d823e8f6 writeback: per ta... |
1804 1805 |
int ratelimit; int *p; |
1da177e4c Linux-2.6.12-rc2 |
1806 |
|
f56753ac2 bdi: replace BDI_... |
1807 |
if (!(bdi->capabilities & BDI_CAP_WRITEBACK)) |
36715cef0 writeback: skip t... |
1808 |
return; |
dfb8ae567 writeback: let ba... |
1809 1810 1811 1812 |
if (inode_cgwb_enabled(inode)) wb = wb_get_create_current(bdi, GFP_KERNEL); if (!wb) wb = &bdi->wb; |
9d823e8f6 writeback: per ta... |
1813 |
ratelimit = current->nr_dirtied_pause; |
a88a341a7 writeback: move b... |
1814 |
if (wb->dirty_exceeded) |
9d823e8f6 writeback: per ta... |
1815 |
ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10)); |
9d823e8f6 writeback: per ta... |
1816 |
preempt_disable(); |
1da177e4c Linux-2.6.12-rc2 |
1817 |
/* |
9d823e8f6 writeback: per ta... |
1818 1819 1820 1821 |
* This prevents one CPU to accumulate too many dirtied pages without * calling into balance_dirty_pages(), which can happen when there are * 1000+ tasks, all of them start dirtying pages at exactly the same * time, hence all honoured too large initial task->nr_dirtied_pause. |
1da177e4c Linux-2.6.12-rc2 |
1822 |
*/ |
7c8e0181e mm: replace __get... |
1823 |
p = this_cpu_ptr(&bdp_ratelimits); |
9d823e8f6 writeback: per ta... |
1824 |
if (unlikely(current->nr_dirtied >= ratelimit)) |
fa5a734e4 [PATCH] balance_d... |
1825 |
*p = 0; |
d3bc1fef9 writeback: fix di... |
1826 1827 1828 |
else if (unlikely(*p >= ratelimit_pages)) { *p = 0; ratelimit = 0; |
1da177e4c Linux-2.6.12-rc2 |
1829 |
} |
54848d73f writeback: charge... |
1830 1831 1832 1833 1834 |
/* * Pick up the dirtied pages by the exited tasks. This avoids lots of * short-lived tasks (eg. gcc invocations in a kernel build) escaping * the dirty throttling and livelock other long-run dirtiers. */ |
7c8e0181e mm: replace __get... |
1835 |
p = this_cpu_ptr(&dirty_throttle_leaks); |
54848d73f writeback: charge... |
1836 |
if (*p > 0 && current->nr_dirtied < ratelimit) { |
d0e1d66b5 writeback: remove... |
1837 |
unsigned long nr_pages_dirtied; |
54848d73f writeback: charge... |
1838 1839 1840 |
nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied); *p -= nr_pages_dirtied; current->nr_dirtied += nr_pages_dirtied; |
1da177e4c Linux-2.6.12-rc2 |
1841 |
} |
fa5a734e4 [PATCH] balance_d... |
1842 |
preempt_enable(); |
9d823e8f6 writeback: per ta... |
1843 1844 |
if (unlikely(current->nr_dirtied >= ratelimit)) |
4c578dce5 mm/page-writeback... |
1845 |
balance_dirty_pages(wb, current->nr_dirtied); |
dfb8ae567 writeback: let ba... |
1846 1847 |
wb_put(wb); |
1da177e4c Linux-2.6.12-rc2 |
1848 |
} |
d0e1d66b5 writeback: remove... |
1849 |
EXPORT_SYMBOL(balance_dirty_pages_ratelimited); |
1da177e4c Linux-2.6.12-rc2 |
1850 |
|
aa661bbe1 writeback: move o... |
1851 1852 1853 1854 1855 |
/** * wb_over_bg_thresh - does @wb need to be written back? * @wb: bdi_writeback of interest * * Determines whether background writeback should keep writing @wb or it's |
a862f68a8 docs/core-api/mm:... |
1856 1857 1858 |
* clean enough. * * Return: %true if writeback should continue. |
aa661bbe1 writeback: move o... |
1859 1860 1861 |
*/ bool wb_over_bg_thresh(struct bdi_writeback *wb) { |
947e9762a writeback: update... |
1862 |
struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) }; |
c2aa723a6 writeback: implem... |
1863 |
struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) }; |
947e9762a writeback: update... |
1864 |
struct dirty_throttle_control * const gdtc = &gdtc_stor; |
c2aa723a6 writeback: implem... |
1865 1866 |
struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ? &mdtc_stor : NULL; |
ab19939a6 mm/page-writeback... |
1867 1868 |
unsigned long reclaimable; unsigned long thresh; |
aa661bbe1 writeback: move o... |
1869 |
|
947e9762a writeback: update... |
1870 1871 1872 1873 1874 |
/* * Similar to balance_dirty_pages() but ignores pages being written * as we're trying to decide whether to put more under writeback. */ gdtc->avail = global_dirtyable_memory(); |
8d92890bd mm/writeback: dis... |
1875 |
gdtc->dirty = global_node_page_state(NR_FILE_DIRTY); |
947e9762a writeback: update... |
1876 |
domain_dirty_limits(gdtc); |
aa661bbe1 writeback: move o... |
1877 |
|
947e9762a writeback: update... |
1878 |
if (gdtc->dirty > gdtc->bg_thresh) |
aa661bbe1 writeback: move o... |
1879 |
return true; |
ab19939a6 mm/page-writeback... |
1880 1881 1882 1883 1884 1885 1886 |
thresh = wb_calc_thresh(gdtc->wb, gdtc->bg_thresh); if (thresh < 2 * wb_stat_error()) reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE); else reclaimable = wb_stat(wb, WB_RECLAIMABLE); if (reclaimable > thresh) |
aa661bbe1 writeback: move o... |
1887 |
return true; |
c2aa723a6 writeback: implem... |
1888 |
if (mdtc) { |
c5edf9cdc writeback: fix in... |
1889 |
unsigned long filepages, headroom, writeback; |
c2aa723a6 writeback: implem... |
1890 |
|
c5edf9cdc writeback: fix in... |
1891 1892 1893 |
mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty, &writeback); mdtc_calc_avail(mdtc, filepages, headroom); |
c2aa723a6 writeback: implem... |
1894 1895 1896 1897 |
domain_dirty_limits(mdtc); /* ditto, ignore writeback */ if (mdtc->dirty > mdtc->bg_thresh) return true; |
ab19939a6 mm/page-writeback... |
1898 1899 1900 1901 1902 1903 1904 |
thresh = wb_calc_thresh(mdtc->wb, mdtc->bg_thresh); if (thresh < 2 * wb_stat_error()) reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE); else reclaimable = wb_stat(wb, WB_RECLAIMABLE); if (reclaimable > thresh) |
c2aa723a6 writeback: implem... |
1905 1906 |
return true; } |
aa661bbe1 writeback: move o... |
1907 1908 |
return false; } |
1da177e4c Linux-2.6.12-rc2 |
1909 |
/* |
1da177e4c Linux-2.6.12-rc2 |
1910 1911 |
* sysctl handler for /proc/sys/vm/dirty_writeback_centisecs */ |
cccad5b98 mm: convert use o... |
1912 |
int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, |
32927393d sysctl: pass kern... |
1913 |
void *buffer, size_t *length, loff_t *ppos) |
1da177e4c Linux-2.6.12-rc2 |
1914 |
{ |
94af58469 writeback: schedu... |
1915 1916 1917 1918 |
unsigned int old_interval = dirty_writeback_interval; int ret; ret = proc_dointvec(table, write, buffer, length, ppos); |
515c24c13 mm/page-writeback... |
1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 |
/* * Writing 0 to dirty_writeback_interval will disable periodic writeback * and a different non-zero value will wakeup the writeback threads. * wb_wakeup_delayed() would be more appropriate, but it's a pain to * iterate over all bdis and wbs. * The reason we do this is to make the change take effect immediately. */ if (!ret && write && dirty_writeback_interval && dirty_writeback_interval != old_interval) |
94af58469 writeback: schedu... |
1929 1930 1931 |
wakeup_flusher_threads(WB_REASON_PERIODIC); return ret; |
1da177e4c Linux-2.6.12-rc2 |
1932 |
} |
bca237a52 block/laptop_mode... |
1933 |
void laptop_mode_timer_fn(struct timer_list *t) |
1da177e4c Linux-2.6.12-rc2 |
1934 |
{ |
bca237a52 block/laptop_mode... |
1935 1936 |
struct backing_dev_info *backing_dev_info = from_timer(backing_dev_info, t, laptop_mode_wb_timer); |
1da177e4c Linux-2.6.12-rc2 |
1937 |
|
bca237a52 block/laptop_mode... |
1938 |
wakeup_flusher_threads_bdi(backing_dev_info, WB_REASON_LAPTOP_TIMER); |
1da177e4c Linux-2.6.12-rc2 |
1939 1940 1941 1942 1943 1944 1945 |
} /* * We've spun up the disk and we're in laptop mode: schedule writeback * of all dirty data a few seconds from now. If the flush is already scheduled * then push it back - the user is still using the disk. */ |
31373d09d laptop-mode: Make... |
1946 |
void laptop_io_completion(struct backing_dev_info *info) |
1da177e4c Linux-2.6.12-rc2 |
1947 |
{ |
31373d09d laptop-mode: Make... |
1948 |
mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode); |
1da177e4c Linux-2.6.12-rc2 |
1949 1950 1951 1952 1953 1954 1955 1956 1957 |
} /* * We're in laptop mode and we've just synced. The sync's writes will have * caused another writeback to be scheduled by laptop_io_completion. * Nothing needs to be written back anymore, so we unschedule the writeback. */ void laptop_sync_completion(void) { |
31373d09d laptop-mode: Make... |
1958 1959 1960 1961 1962 1963 1964 1965 |
struct backing_dev_info *bdi; rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) del_timer(&bdi->laptop_mode_wb_timer); rcu_read_unlock(); |
1da177e4c Linux-2.6.12-rc2 |
1966 1967 1968 1969 1970 |
} /* * If ratelimit_pages is too high then we can get into dirty-data overload * if a large number of processes all perform writes at the same time. |
1da177e4c Linux-2.6.12-rc2 |
1971 1972 1973 |
* * Here we set ratelimit_pages to a level which ensures that when all CPUs are * dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory |
9d823e8f6 writeback: per ta... |
1974 |
* thresholds. |
1da177e4c Linux-2.6.12-rc2 |
1975 |
*/ |
2d1d43f6a [PATCH] call mm/p... |
1976 |
void writeback_set_ratelimit(void) |
1da177e4c Linux-2.6.12-rc2 |
1977 |
{ |
dcc25ae76 writeback: move g... |
1978 |
struct wb_domain *dom = &global_wb_domain; |
9d823e8f6 writeback: per ta... |
1979 1980 |
unsigned long background_thresh; unsigned long dirty_thresh; |
dcc25ae76 writeback: move g... |
1981 |
|
9d823e8f6 writeback: per ta... |
1982 |
global_dirty_limits(&background_thresh, &dirty_thresh); |
dcc25ae76 writeback: move g... |
1983 |
dom->dirty_limit = dirty_thresh; |
9d823e8f6 writeback: per ta... |
1984 |
ratelimit_pages = dirty_thresh / (num_online_cpus() * 32); |
1da177e4c Linux-2.6.12-rc2 |
1985 1986 |
if (ratelimit_pages < 16) ratelimit_pages = 16; |
1da177e4c Linux-2.6.12-rc2 |
1987 |
} |
1d7ac6aec mm/writeback: Con... |
1988 |
static int page_writeback_cpu_online(unsigned int cpu) |
1da177e4c Linux-2.6.12-rc2 |
1989 |
{ |
1d7ac6aec mm/writeback: Con... |
1990 1991 |
writeback_set_ratelimit(); return 0; |
1da177e4c Linux-2.6.12-rc2 |
1992 |
} |
1da177e4c Linux-2.6.12-rc2 |
1993 |
/* |
dc6e29da9 Fix balance_dirty... |
1994 1995 1996 |
* Called early on to tune the page writeback dirty limits. * * We used to scale dirty pages according to how total memory |
0a18e6078 mm: remove vm_tot... |
1997 |
* related to pages that could be allocated for buffers. |
dc6e29da9 Fix balance_dirty... |
1998 1999 2000 |
* * However, that was when we used "dirty_ratio" to scale with * all memory, and we don't do that any more. "dirty_ratio" |
0a18e6078 mm: remove vm_tot... |
2001 |
* is now applied to total non-HIGHPAGE memory, and as such we can't |
dc6e29da9 Fix balance_dirty... |
2002 2003 2004 2005 2006 2007 |
* get into the old insane situation any more where we had * large amounts of dirty pages compared to a small amount of * non-HIGHMEM memory. * * But we might still want to scale the dirty_ratio by how * much memory the box has.. |
1da177e4c Linux-2.6.12-rc2 |
2008 2009 2010 |
*/ void __init page_writeback_init(void) { |
a50fcb512 writeback: fix in... |
2011 |
BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL)); |
1d7ac6aec mm/writeback: Con... |
2012 2013 2014 2015 |
cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online", page_writeback_cpu_online, NULL); cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL, page_writeback_cpu_online); |
1da177e4c Linux-2.6.12-rc2 |
2016 |
} |
811d736f9 [PATCH] BLOCK: Di... |
2017 |
/** |
f446daaea mm: implement wri... |
2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 |
* tag_pages_for_writeback - tag pages to be written by write_cache_pages * @mapping: address space structure to write * @start: starting page index * @end: ending page index (inclusive) * * This function scans the page range from @start to @end (inclusive) and tags * all pages that have DIRTY tag set with a special TOWRITE tag. The idea is * that write_cache_pages (or whoever calls this function) will then use * TOWRITE tag to identify pages eligible for writeback. This mechanism is * used to avoid livelocking of writeback by a process steadily creating new * dirty pages in the file (thus it is important for this function to be quick * so that it can tag pages faster than a dirtying process can create them). */ |
f446daaea mm: implement wri... |
2031 2032 2033 |
void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end) { |
ff9c745b8 mm: Convert page-... |
2034 2035 2036 |
XA_STATE(xas, &mapping->i_pages, start); unsigned int tagged = 0; void *page; |
268f42de7 radix-tree: delet... |
2037 |
|
ff9c745b8 mm: Convert page-... |
2038 2039 2040 2041 |
xas_lock_irq(&xas); xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) { xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE); if (++tagged % XA_CHECK_SCHED) |
268f42de7 radix-tree: delet... |
2042 |
continue; |
ff9c745b8 mm: Convert page-... |
2043 2044 2045 |
xas_pause(&xas); xas_unlock_irq(&xas); |
f446daaea mm: implement wri... |
2046 |
cond_resched(); |
ff9c745b8 mm: Convert page-... |
2047 |
xas_lock_irq(&xas); |
268f42de7 radix-tree: delet... |
2048 |
} |
ff9c745b8 mm: Convert page-... |
2049 |
xas_unlock_irq(&xas); |
f446daaea mm: implement wri... |
2050 2051 2052 2053 |
} EXPORT_SYMBOL(tag_pages_for_writeback); /** |
0ea971801 consolidate gener... |
2054 |
* write_cache_pages - walk the list of dirty pages of the given address space and write all of them. |
811d736f9 [PATCH] BLOCK: Di... |
2055 2056 |
* @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write |
0ea971801 consolidate gener... |
2057 2058 |
* @writepage: function called for each page * @data: data passed to writepage function |
811d736f9 [PATCH] BLOCK: Di... |
2059 |
* |
0ea971801 consolidate gener... |
2060 |
* If a page is already under I/O, write_cache_pages() skips it, even |
811d736f9 [PATCH] BLOCK: Di... |
2061 2062 2063 2064 2065 2066 |
* if it's dirty. This is desirable behaviour for memory-cleaning writeback, * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() * and msync() need to guarantee that all the data which was dirty at the time * the call was made get new I/O started against them. If wbc->sync_mode is * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. |
f446daaea mm: implement wri... |
2067 2068 2069 2070 2071 2072 2073 |
* * To avoid livelocks (when other process dirties new pages), we first tag * pages which should be written back with TOWRITE tag and only then start * writing them. For data-integrity sync we have to be careful so that we do * not miss some pages (e.g., because some other process has cleared TOWRITE * tag we set). The rule we follow is that TOWRITE tag can be cleared only * by the process clearing the DIRTY tag (and submitting the page for IO). |
64081362e mm/page-writeback... |
2074 2075 2076 2077 2078 2079 2080 |
* * To avoid deadlocks between range_cyclic writeback and callers that hold * pages in PageWriteback to aggregate IO until write_cache_pages() returns, * we do not loop back to the start of the file. Doing so causes a page * lock/page writeback access order inversion - we should only ever lock * multiple pages in ascending page->index order, and looping back to the start * of the file violates that rule and causes deadlocks. |
a862f68a8 docs/core-api/mm:... |
2081 2082 |
* * Return: %0 on success, negative error code otherwise |
811d736f9 [PATCH] BLOCK: Di... |
2083 |
*/ |
0ea971801 consolidate gener... |
2084 2085 2086 |
int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data) |
811d736f9 [PATCH] BLOCK: Di... |
2087 |
{ |
811d736f9 [PATCH] BLOCK: Di... |
2088 2089 |
int ret = 0; int done = 0; |
3fa750dcf mm/page-writeback... |
2090 |
int error; |
811d736f9 [PATCH] BLOCK: Di... |
2091 2092 2093 2094 |
struct pagevec pvec; int nr_pages; pgoff_t index; pgoff_t end; /* Inclusive */ |
bd19e012f mm: write_cache_p... |
2095 |
pgoff_t done_index; |
811d736f9 [PATCH] BLOCK: Di... |
2096 |
int range_whole = 0; |
ff9c745b8 mm: Convert page-... |
2097 |
xa_mark_t tag; |
811d736f9 [PATCH] BLOCK: Di... |
2098 |
|
866798201 mm, pagevec: remo... |
2099 |
pagevec_init(&pvec); |
811d736f9 [PATCH] BLOCK: Di... |
2100 |
if (wbc->range_cyclic) { |
28659cc8c mm/page-writeback... |
2101 |
index = mapping->writeback_index; /* prev offset */ |
811d736f9 [PATCH] BLOCK: Di... |
2102 2103 |
end = -1; } else { |
09cbfeaf1 mm, fs: get rid o... |
2104 2105 |
index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; |
811d736f9 [PATCH] BLOCK: Di... |
2106 2107 |
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; |
811d736f9 [PATCH] BLOCK: Di... |
2108 |
} |
cc7b8f624 mm/page-writeback... |
2109 2110 |
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) { tag_pages_for_writeback(mapping, index, end); |
f446daaea mm: implement wri... |
2111 |
tag = PAGECACHE_TAG_TOWRITE; |
cc7b8f624 mm/page-writeback... |
2112 |
} else { |
f446daaea mm: implement wri... |
2113 |
tag = PAGECACHE_TAG_DIRTY; |
cc7b8f624 mm/page-writeback... |
2114 |
} |
bd19e012f mm: write_cache_p... |
2115 |
done_index = index; |
5a3d5c981 mm: write_cache_p... |
2116 2117 |
while (!done && (index <= end)) { int i; |
2b9775ae4 mm: use pagevec_l... |
2118 |
nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, |
67fd707f4 mm: remove nr_pag... |
2119 |
tag); |
5a3d5c981 mm: write_cache_p... |
2120 2121 |
if (nr_pages == 0) break; |
811d736f9 [PATCH] BLOCK: Di... |
2122 |
|
811d736f9 [PATCH] BLOCK: Di... |
2123 2124 |
for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; |
cf15b07cf writeback: make m... |
2125 |
done_index = page->index; |
d5482cdf8 mm: write_cache_p... |
2126 |
|
811d736f9 [PATCH] BLOCK: Di... |
2127 |
lock_page(page); |
5a3d5c981 mm: write_cache_p... |
2128 2129 2130 2131 |
/* * Page truncated or invalidated. We can freely skip it * then, even for data integrity operations: the page * has disappeared concurrently, so there could be no |
f0953a1bb mm: fix typos in ... |
2132 |
* real expectation of this data integrity operation |
5a3d5c981 mm: write_cache_p... |
2133 2134 2135 |
* even if there is now a new, dirty page at the same * pagecache address. */ |
811d736f9 [PATCH] BLOCK: Di... |
2136 |
if (unlikely(page->mapping != mapping)) { |
5a3d5c981 mm: write_cache_p... |
2137 |
continue_unlock: |
811d736f9 [PATCH] BLOCK: Di... |
2138 2139 2140 |
unlock_page(page); continue; } |
515f4a037 mm: write_cache_p... |
2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 |
if (!PageDirty(page)) { /* someone wrote it for us */ goto continue_unlock; } if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) wait_on_page_writeback(page); else goto continue_unlock; } |
811d736f9 [PATCH] BLOCK: Di... |
2152 |
|
515f4a037 mm: write_cache_p... |
2153 2154 |
BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) |
5a3d5c981 mm: write_cache_p... |
2155 |
goto continue_unlock; |
811d736f9 [PATCH] BLOCK: Di... |
2156 |
|
de1414a65 fs: export inode_... |
2157 |
trace_wbc_writepage(wbc, inode_to_bdi(mapping->host)); |
3fa750dcf mm/page-writeback... |
2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 |
error = (*writepage)(page, wbc, data); if (unlikely(error)) { /* * Handle errors according to the type of * writeback. There's no need to continue for * background writeback. Just push done_index * past this page so media errors won't choke * writeout for the entire file. For integrity * writeback, we must process the entire dirty * set regardless of errors because the fs may * still have state to clear for each page. In * that case we continue processing and return * the first error. */ if (error == AOP_WRITEPAGE_ACTIVATE) { |
00266770b mm: write_cache_p... |
2173 |
unlock_page(page); |
3fa750dcf mm/page-writeback... |
2174 2175 2176 |
error = 0; } else if (wbc->sync_mode != WB_SYNC_ALL) { ret = error; |
cf15b07cf writeback: make m... |
2177 |
done_index = page->index + 1; |
00266770b mm: write_cache_p... |
2178 2179 2180 |
done = 1; break; } |
3fa750dcf mm/page-writeback... |
2181 2182 |
if (!ret) ret = error; |
0b5649278 writeback: pay at... |
2183 |
} |
00266770b mm: write_cache_p... |
2184 |
|
546a19242 writeback: write_... |
2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 |
/* * We stop writing back only if we are not doing * integrity sync. In case of integrity sync we have to * keep going until we have written all the pages * we tagged for writeback prior to entering this loop. */ if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; |
05fe478dd mm: write_cache_p... |
2195 |
} |
811d736f9 [PATCH] BLOCK: Di... |
2196 2197 2198 2199 |
} pagevec_release(&pvec); cond_resched(); } |
64081362e mm/page-writeback... |
2200 2201 2202 2203 2204 2205 2206 2207 |
/* * If we hit the last page and there is more work to be done: wrap * back the index back to the start of the file for the next * time we are called. */ if (wbc->range_cyclic && !done) done_index = 0; |
0b5649278 writeback: pay at... |
2208 2209 |
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; |
06d6cf695 mm: Add range_con... |
2210 |
|
811d736f9 [PATCH] BLOCK: Di... |
2211 2212 |
return ret; } |
0ea971801 consolidate gener... |
2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 |
EXPORT_SYMBOL(write_cache_pages); /* * Function used by generic_writepages to call the real writepage * function and set the mapping flags on error */ static int __writepage(struct page *page, struct writeback_control *wbc, void *data) { struct address_space *mapping = data; int ret = mapping->a_ops->writepage(page, wbc); mapping_set_error(mapping, ret); return ret; } /** * generic_writepages - walk the list of dirty pages of the given address space and writepage() all of them. * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * * This is a library function, which implements the writepages() * address_space_operation. |
a862f68a8 docs/core-api/mm:... |
2235 2236 |
* * Return: %0 on success, negative error code otherwise |
0ea971801 consolidate gener... |
2237 2238 2239 2240 |
*/ int generic_writepages(struct address_space *mapping, struct writeback_control *wbc) { |
9b6096a65 mm: make generic_... |
2241 2242 |
struct blk_plug plug; int ret; |
0ea971801 consolidate gener... |
2243 2244 2245 |
/* deal with chardevs and other special file */ if (!mapping->a_ops->writepage) return 0; |
9b6096a65 mm: make generic_... |
2246 2247 2248 2249 |
blk_start_plug(&plug); ret = write_cache_pages(mapping, wbc, __writepage, mapping); blk_finish_plug(&plug); return ret; |
0ea971801 consolidate gener... |
2250 |
} |
811d736f9 [PATCH] BLOCK: Di... |
2251 2252 |
EXPORT_SYMBOL(generic_writepages); |
1da177e4c Linux-2.6.12-rc2 |
2253 2254 |
int do_writepages(struct address_space *mapping, struct writeback_control *wbc) { |
22905f775 identify multipag... |
2255 |
int ret; |
fee468fdf writeback: reliab... |
2256 |
struct bdi_writeback *wb; |
22905f775 identify multipag... |
2257 |
|
1da177e4c Linux-2.6.12-rc2 |
2258 2259 |
if (wbc->nr_to_write <= 0) return 0; |
fee468fdf writeback: reliab... |
2260 2261 |
wb = inode_to_wb_wbc(mapping->host, wbc); wb_bandwidth_estimate_start(wb); |
80a2ea9f8 mm: retry writepa... |
2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 |
while (1) { if (mapping->a_ops->writepages) ret = mapping->a_ops->writepages(mapping, wbc); else ret = generic_writepages(mapping, wbc); if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL)) break; cond_resched(); congestion_wait(BLK_RW_ASYNC, HZ/50); } |
45a2966fd writeback: fix ba... |
2272 2273 2274 2275 2276 |
/* * Usually few pages are written by now from those we've just submitted * but if there's constant writeback being submitted, this makes sure * writeback bandwidth is updated once in a while. */ |
20792ebf3 writeback: use RE... |
2277 2278 |
if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) + BANDWIDTH_INTERVAL)) |
45a2966fd writeback: fix ba... |
2279 |
wb_update_bandwidth(wb); |
22905f775 identify multipag... |
2280 |
return ret; |
1da177e4c Linux-2.6.12-rc2 |
2281 2282 2283 |
} /** |
2b69c8280 mm: drop "wait" p... |
2284 |
* write_one_page - write out a single page and wait on I/O |
67be2dd1b [PATCH] DocBook: ... |
2285 |
* @page: the page to write |
1da177e4c Linux-2.6.12-rc2 |
2286 2287 2288 |
* * The page must be locked by the caller and will be unlocked upon return. * |
37e51a764 mm: clean up erro... |
2289 2290 |
* Note that the mapping's AS_EIO/AS_ENOSPC flags will be cleared when this * function returns. |
a862f68a8 docs/core-api/mm:... |
2291 2292 |
* * Return: %0 on success, negative error code otherwise |
1da177e4c Linux-2.6.12-rc2 |
2293 |
*/ |
2b69c8280 mm: drop "wait" p... |
2294 |
int write_one_page(struct page *page) |
1da177e4c Linux-2.6.12-rc2 |
2295 2296 2297 2298 2299 2300 2301 2302 2303 |
{ struct address_space *mapping = page->mapping; int ret = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 1, }; BUG_ON(!PageLocked(page)); |
2b69c8280 mm: drop "wait" p... |
2304 |
wait_on_page_writeback(page); |
1da177e4c Linux-2.6.12-rc2 |
2305 2306 |
if (clear_page_dirty_for_io(page)) { |
09cbfeaf1 mm, fs: get rid o... |
2307 |
get_page(page); |
1da177e4c Linux-2.6.12-rc2 |
2308 |
ret = mapping->a_ops->writepage(page, &wbc); |
37e51a764 mm: clean up erro... |
2309 |
if (ret == 0) |
1da177e4c Linux-2.6.12-rc2 |
2310 |
wait_on_page_writeback(page); |
09cbfeaf1 mm, fs: get rid o... |
2311 |
put_page(page); |
1da177e4c Linux-2.6.12-rc2 |
2312 2313 2314 |
} else { unlock_page(page); } |
37e51a764 mm: clean up erro... |
2315 2316 2317 |
if (!ret) ret = filemap_check_errors(mapping); |
1da177e4c Linux-2.6.12-rc2 |
2318 2319 2320 2321 2322 |
return ret; } EXPORT_SYMBOL(write_one_page); /* |
767193253 [PATCH] simplify ... |
2323 2324 2325 2326 2327 |
* For address_spaces which do not use buffers nor write back. */ int __set_page_dirty_no_writeback(struct page *page) { if (!PageDirty(page)) |
c3f0da631 mm/page-writeback... |
2328 |
return !TestSetPageDirty(page); |
767193253 [PATCH] simplify ... |
2329 2330 |
return 0; } |
b82a96c92 fs: remove noop_s... |
2331 |
EXPORT_SYMBOL(__set_page_dirty_no_writeback); |
767193253 [PATCH] simplify ... |
2332 2333 |
/* |
e3a7cca1e vfs: add/use acco... |
2334 |
* Helper function for set_page_dirty family. |
c4843a759 memcg: add per cg... |
2335 |
* |
81f8c3a46 mm: memcontrol: g... |
2336 |
* Caller must hold lock_page_memcg(). |
c4843a759 memcg: add per cg... |
2337 |
* |
e3a7cca1e vfs: add/use acco... |
2338 2339 |
* NOTE: This relies on being atomic wrt interrupts. */ |
6e1cae881 mm/writeback: mov... |
2340 2341 |
static void account_page_dirtied(struct page *page, struct address_space *mapping) |
e3a7cca1e vfs: add/use acco... |
2342 |
{ |
52ebea749 writeback: make b... |
2343 |
struct inode *inode = mapping->host; |
9fb0a7da0 writeback: add mo... |
2344 |
trace_writeback_dirty_page(page, mapping); |
f56753ac2 bdi: replace BDI_... |
2345 |
if (mapping_can_writeback(mapping)) { |
52ebea749 writeback: make b... |
2346 |
struct bdi_writeback *wb; |
de1414a65 fs: export inode_... |
2347 |
|
52ebea749 writeback: make b... |
2348 2349 |
inode_attach_wb(inode, page); wb = inode_to_wb(inode); |
de1414a65 fs: export inode_... |
2350 |
|
00f3ca2c2 mm: memcontrol: p... |
2351 |
__inc_lruvec_page_state(page, NR_FILE_DIRTY); |
5a1c84b40 mm: remove reclai... |
2352 |
__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); |
c4a25635b mm: move vmscan w... |
2353 |
__inc_node_page_state(page, NR_DIRTIED); |
3e8f399da writeback: rework... |
2354 2355 |
inc_wb_stat(wb, WB_RECLAIMABLE); inc_wb_stat(wb, WB_DIRTIED); |
09cbfeaf1 mm, fs: get rid o... |
2356 |
task_io_account_write(PAGE_SIZE); |
d3bc1fef9 writeback: fix di... |
2357 |
current->nr_dirtied++; |
87e378974 mm/page-writeback... |
2358 |
__this_cpu_inc(bdp_ratelimits); |
97b27821b writeback, memcg:... |
2359 2360 |
mem_cgroup_track_foreign_dirty(page, wb); |
e3a7cca1e vfs: add/use acco... |
2361 2362 2363 2364 |
} } /* |
b9ea25152 page_writeback: c... |
2365 2366 |
* Helper function for deaccounting dirty page without writeback. * |
81f8c3a46 mm: memcontrol: g... |
2367 |
* Caller must hold lock_page_memcg(). |
b9ea25152 page_writeback: c... |
2368 |
*/ |
c4843a759 memcg: add per cg... |
2369 |
void account_page_cleaned(struct page *page, struct address_space *mapping, |
62cccb8c8 mm: simplify lock... |
2370 |
struct bdi_writeback *wb) |
b9ea25152 page_writeback: c... |
2371 |
{ |
f56753ac2 bdi: replace BDI_... |
2372 |
if (mapping_can_writeback(mapping)) { |
00f3ca2c2 mm: memcontrol: p... |
2373 |
dec_lruvec_page_state(page, NR_FILE_DIRTY); |
5a1c84b40 mm: remove reclai... |
2374 |
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); |
682aa8e1a writeback: implem... |
2375 |
dec_wb_stat(wb, WB_RECLAIMABLE); |
09cbfeaf1 mm, fs: get rid o... |
2376 |
task_io_account_cancelled_write(PAGE_SIZE); |
b9ea25152 page_writeback: c... |
2377 2378 |
} } |
b9ea25152 page_writeback: c... |
2379 2380 |
/* |
6e1cae881 mm/writeback: mov... |
2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 |
* Mark the page dirty, and set it dirty in the page cache, and mark the inode * dirty. * * If warn is true, then emit a warning if the page is not uptodate and has * not been truncated. * * The caller must hold lock_page_memcg(). */ void __set_page_dirty(struct page *page, struct address_space *mapping, int warn) { unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); if (page->mapping) { /* Race with truncate? */ WARN_ON_ONCE(warn && !PageUptodate(page)); account_page_dirtied(page, mapping); __xa_set_mark(&mapping->i_pages, page_index(page), PAGECACHE_TAG_DIRTY); } xa_unlock_irqrestore(&mapping->i_pages, flags); } /* |
1da177e4c Linux-2.6.12-rc2 |
2405 |
* For address_spaces which do not use buffers. Just tag the page as dirty in |
ff9c745b8 mm: Convert page-... |
2406 |
* the xarray. |
1da177e4c Linux-2.6.12-rc2 |
2407 2408 2409 2410 2411 |
* * This is also used when a single buffer is being dirtied: we want to set the * page dirty in that case, but not all the buffers. This is a "bottom-up" * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. * |
2d6d7f982 mm: protect set_p... |
2412 2413 2414 |
* The caller must ensure this doesn't race with truncation. Most will simply * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and * the pte lock held, which also locks out truncation. |
1da177e4c Linux-2.6.12-rc2 |
2415 2416 2417 |
*/ int __set_page_dirty_nobuffers(struct page *page) { |
62cccb8c8 mm: simplify lock... |
2418 |
lock_page_memcg(page); |
1da177e4c Linux-2.6.12-rc2 |
2419 2420 |
if (!TestSetPageDirty(page)) { struct address_space *mapping = page_mapping(page); |
1da177e4c Linux-2.6.12-rc2 |
2421 |
|
c4843a759 memcg: add per cg... |
2422 |
if (!mapping) { |
62cccb8c8 mm: simplify lock... |
2423 |
unlock_page_memcg(page); |
8c08540f8 [PATCH] clean up ... |
2424 |
return 1; |
c4843a759 memcg: add per cg... |
2425 |
} |
2f18be363 mm/writeback: use... |
2426 |
__set_page_dirty(page, mapping, !PagePrivate(page)); |
62cccb8c8 mm: simplify lock... |
2427 |
unlock_page_memcg(page); |
c4843a759 memcg: add per cg... |
2428 |
|
8c08540f8 [PATCH] clean up ... |
2429 2430 2431 |
if (mapping->host) { /* !PageAnon && !swapper_space */ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
1da177e4c Linux-2.6.12-rc2 |
2432 |
} |
4741c9fd3 [PATCH] set_page_... |
2433 |
return 1; |
1da177e4c Linux-2.6.12-rc2 |
2434 |
} |
62cccb8c8 mm: simplify lock... |
2435 |
unlock_page_memcg(page); |
4741c9fd3 [PATCH] set_page_... |
2436 |
return 0; |
1da177e4c Linux-2.6.12-rc2 |
2437 2438 2439 2440 |
} EXPORT_SYMBOL(__set_page_dirty_nobuffers); /* |
2f800fbd7 writeback: fix di... |
2441 |
* Call this whenever redirtying a page, to de-account the dirty counters |
dcfe4df3d mm/page-writeback... |
2442 2443 |
* (NR_DIRTIED, WB_DIRTIED, tsk->nr_dirtied), so that they match the written * counters (NR_WRITTEN, WB_WRITTEN) in long term. The mismatches will lead to |
2f800fbd7 writeback: fix di... |
2444 2445 2446 2447 2448 2449 |
* systematic errors in balanced_dirty_ratelimit and the dirty pages position * control. */ void account_page_redirty(struct page *page) { struct address_space *mapping = page->mapping; |
910181343 writeback: attrib... |
2450 |
|
f56753ac2 bdi: replace BDI_... |
2451 |
if (mapping && mapping_can_writeback(mapping)) { |
682aa8e1a writeback: implem... |
2452 2453 |
struct inode *inode = mapping->host; struct bdi_writeback *wb; |
2e898e4c0 writeback: safer ... |
2454 |
struct wb_lock_cookie cookie = {}; |
910181343 writeback: attrib... |
2455 |
|
2e898e4c0 writeback: safer ... |
2456 |
wb = unlocked_inode_to_wb_begin(inode, &cookie); |
2f800fbd7 writeback: fix di... |
2457 |
current->nr_dirtied--; |
c4a25635b mm: move vmscan w... |
2458 |
dec_node_page_state(page, NR_DIRTIED); |
910181343 writeback: attrib... |
2459 |
dec_wb_stat(wb, WB_DIRTIED); |
2e898e4c0 writeback: safer ... |
2460 |
unlocked_inode_to_wb_end(inode, &cookie); |
2f800fbd7 writeback: fix di... |
2461 2462 2463 2464 2465 |
} } EXPORT_SYMBOL(account_page_redirty); /* |
1da177e4c Linux-2.6.12-rc2 |
2466 2467 2468 2469 2470 2471 |
* When a writepage implementation decides that it doesn't want to write this * page for some reason, it should redirty the locked page via * redirty_page_for_writepage() and it should then unlock the page and return 0 */ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) { |
8d38633c3 page_writeback: p... |
2472 |
int ret; |
1da177e4c Linux-2.6.12-rc2 |
2473 |
wbc->pages_skipped++; |
8d38633c3 page_writeback: p... |
2474 |
ret = __set_page_dirty_nobuffers(page); |
2f800fbd7 writeback: fix di... |
2475 |
account_page_redirty(page); |
8d38633c3 page_writeback: p... |
2476 |
return ret; |
1da177e4c Linux-2.6.12-rc2 |
2477 2478 2479 2480 |
} EXPORT_SYMBOL(redirty_page_for_writepage); /* |
6746aff74 HWPOISON: shmem: ... |
2481 2482 |
* Dirty a page. * |
0af573780 mm: require ->set... |
2483 2484 2485 |
* For pages with a mapping this should be done under the page lock for the * benefit of asynchronous memory errors who prefer a consistent dirty state. * This rule can be broken in some special cases, but should be better not to. |
1da177e4c Linux-2.6.12-rc2 |
2486 |
*/ |
1cf6e7d83 mm: task dirty ac... |
2487 |
int set_page_dirty(struct page *page) |
1da177e4c Linux-2.6.12-rc2 |
2488 2489 |
{ struct address_space *mapping = page_mapping(page); |
800d8c63b shmem: add huge p... |
2490 |
page = compound_head(page); |
1da177e4c Linux-2.6.12-rc2 |
2491 |
if (likely(mapping)) { |
278df9f45 mm: reclaim inval... |
2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 |
/* * readahead/lru_deactivate_page could remain * PG_readahead/PG_reclaim due to race with end_page_writeback * About readahead, if the page is written, the flags would be * reset. So no problem. * About lru_deactivate_page, if the page is redirty, the flag * will be reset. So no problem. but if the page is used by readahead * it will confuse readahead and make it restart the size rampup * process. But it's a trivial problem. */ |
a4bb3ecdc mm/page-writeback... |
2502 2503 |
if (PageReclaim(page)) ClearPageReclaim(page); |
0af573780 mm: require ->set... |
2504 |
return mapping->a_ops->set_page_dirty(page); |
1da177e4c Linux-2.6.12-rc2 |
2505 |
} |
4741c9fd3 [PATCH] set_page_... |
2506 2507 2508 2509 |
if (!PageDirty(page)) { if (!TestSetPageDirty(page)) return 1; } |
1da177e4c Linux-2.6.12-rc2 |
2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 |
return 0; } EXPORT_SYMBOL(set_page_dirty); /* * set_page_dirty() is racy if the caller has no reference against * page->mapping->host, and if the page is unlocked. This is because another * CPU could truncate the page off the mapping and then free the mapping. * * Usually, the page _is_ locked, or the caller is a user-space process which * holds a reference on the inode by having an open file. * * In other cases, the page should be locked before running set_page_dirty(). */ int set_page_dirty_lock(struct page *page) { int ret; |
7eaceacca block: remove per... |
2527 |
lock_page(page); |
1da177e4c Linux-2.6.12-rc2 |
2528 2529 2530 2531 2532 2533 2534 |
ret = set_page_dirty(page); unlock_page(page); return ret; } EXPORT_SYMBOL(set_page_dirty_lock); /* |
11f81becc page_writeback: r... |
2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 |
* This cancels just the dirty bit on the kernel page itself, it does NOT * actually remove dirty bits on any mmap's that may be around. It also * leaves the page tagged dirty, so any sync activity will still find it on * the dirty lists, and in particular, clear_page_dirty_for_io() will still * look at the dirty bits in the VM. * * Doing this should *normally* only ever be done when a page is truncated, * and is not actually mapped anywhere at all. However, fs/buffer.c does * this when it notices that somebody has cleaned out all the buffers on a * page without actually doing it through the VM. Can you say "ext3 is * horribly ugly"? Thought you could. */ |
736304f32 mm: speed up canc... |
2547 |
void __cancel_dirty_page(struct page *page) |
11f81becc page_writeback: r... |
2548 |
{ |
c4843a759 memcg: add per cg... |
2549 |
struct address_space *mapping = page_mapping(page); |
f56753ac2 bdi: replace BDI_... |
2550 |
if (mapping_can_writeback(mapping)) { |
682aa8e1a writeback: implem... |
2551 2552 |
struct inode *inode = mapping->host; struct bdi_writeback *wb; |
2e898e4c0 writeback: safer ... |
2553 |
struct wb_lock_cookie cookie = {}; |
c4843a759 memcg: add per cg... |
2554 |
|
62cccb8c8 mm: simplify lock... |
2555 |
lock_page_memcg(page); |
2e898e4c0 writeback: safer ... |
2556 |
wb = unlocked_inode_to_wb_begin(inode, &cookie); |
c4843a759 memcg: add per cg... |
2557 2558 |
if (TestClearPageDirty(page)) |
62cccb8c8 mm: simplify lock... |
2559 |
account_page_cleaned(page, mapping, wb); |
c4843a759 memcg: add per cg... |
2560 |
|
2e898e4c0 writeback: safer ... |
2561 |
unlocked_inode_to_wb_end(inode, &cookie); |
62cccb8c8 mm: simplify lock... |
2562 |
unlock_page_memcg(page); |
c4843a759 memcg: add per cg... |
2563 2564 2565 |
} else { ClearPageDirty(page); } |
11f81becc page_writeback: r... |
2566 |
} |
736304f32 mm: speed up canc... |
2567 |
EXPORT_SYMBOL(__cancel_dirty_page); |
11f81becc page_writeback: r... |
2568 2569 |
/* |
1da177e4c Linux-2.6.12-rc2 |
2570 2571 2572 2573 |
* Clear a page's dirty flag, while caring for dirty memory accounting. * Returns true if the page was previously dirty. * * This is for preparing to put the page under writeout. We leave the page |
ff9c745b8 mm: Convert page-... |
2574 |
* tagged as dirty in the xarray so that a concurrent write-for-sync |
1da177e4c Linux-2.6.12-rc2 |
2575 2576 |
* can discover it via a PAGECACHE_TAG_DIRTY walk. The ->writepage * implementation will run either set_page_writeback() or set_page_dirty(), |
ff9c745b8 mm: Convert page-... |
2577 |
* at which stage we bring the page's dirty flag and xarray dirty tag |
1da177e4c Linux-2.6.12-rc2 |
2578 2579 |
* back into sync. * |
ff9c745b8 mm: Convert page-... |
2580 |
* This incoherency between the page's dirty flag and xarray tag is |
1da177e4c Linux-2.6.12-rc2 |
2581 2582 2583 2584 2585 |
* unfortunate, but it only exists while the page is locked. */ int clear_page_dirty_for_io(struct page *page) { struct address_space *mapping = page_mapping(page); |
c4843a759 memcg: add per cg... |
2586 |
int ret = 0; |
1da177e4c Linux-2.6.12-rc2 |
2587 |
|
184b4fef5 mm/page-writeback... |
2588 |
VM_BUG_ON_PAGE(!PageLocked(page), page); |
79352894b mm: fix clear_pag... |
2589 |
|
f56753ac2 bdi: replace BDI_... |
2590 |
if (mapping && mapping_can_writeback(mapping)) { |
682aa8e1a writeback: implem... |
2591 2592 |
struct inode *inode = mapping->host; struct bdi_writeback *wb; |
2e898e4c0 writeback: safer ... |
2593 |
struct wb_lock_cookie cookie = {}; |
682aa8e1a writeback: implem... |
2594 |
|
7658cc289 VM: Fix nasty and... |
2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 |
/* * Yes, Virginia, this is indeed insane. * * We use this sequence to make sure that * (a) we account for dirty stats properly * (b) we tell the low-level filesystem to * mark the whole page dirty if it was * dirty in a pagetable. Only to then * (c) clean the page again and return 1 to * cause the writeback. * * This way we avoid all nasty races with the * dirty bit in multiple places and clearing * them concurrently from different threads. * * Note! Normally the "set_page_dirty(page)" * has no effect on the actual dirty bit - since * that will already usually be set. But we * need the side effects, and it can help us * avoid races. * * We basically use the page "master dirty bit" * as a serialization point for all the different * threads doing their things. |
7658cc289 VM: Fix nasty and... |
2619 2620 2621 |
*/ if (page_mkclean(page)) set_page_dirty(page); |
79352894b mm: fix clear_pag... |
2622 2623 2624 |
/* * We carefully synchronise fault handlers against * installing a dirty pte and marking the page dirty |
2d6d7f982 mm: protect set_p... |
2625 2626 2627 2628 |
* at this point. We do this by having them hold the * page lock while dirtying the page, and pages are * always locked coming in here, so we get the desired * exclusion. |
79352894b mm: fix clear_pag... |
2629 |
*/ |
2e898e4c0 writeback: safer ... |
2630 |
wb = unlocked_inode_to_wb_begin(inode, &cookie); |
7658cc289 VM: Fix nasty and... |
2631 |
if (TestClearPageDirty(page)) { |
00f3ca2c2 mm: memcontrol: p... |
2632 |
dec_lruvec_page_state(page, NR_FILE_DIRTY); |
5a1c84b40 mm: remove reclai... |
2633 |
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); |
682aa8e1a writeback: implem... |
2634 |
dec_wb_stat(wb, WB_RECLAIMABLE); |
c4843a759 memcg: add per cg... |
2635 |
ret = 1; |
1da177e4c Linux-2.6.12-rc2 |
2636 |
} |
2e898e4c0 writeback: safer ... |
2637 |
unlocked_inode_to_wb_end(inode, &cookie); |
c4843a759 memcg: add per cg... |
2638 |
return ret; |
1da177e4c Linux-2.6.12-rc2 |
2639 |
} |
7658cc289 VM: Fix nasty and... |
2640 |
return TestClearPageDirty(page); |
1da177e4c Linux-2.6.12-rc2 |
2641 |
} |
58bb01a9c [PATCH] re-export... |
2642 |
EXPORT_SYMBOL(clear_page_dirty_for_io); |
1da177e4c Linux-2.6.12-rc2 |
2643 |
|
633a2abb9 writeback: track ... |
2644 2645 2646 2647 2648 2649 2650 |
static void wb_inode_writeback_start(struct bdi_writeback *wb) { atomic_inc(&wb->writeback_inodes); } static void wb_inode_writeback_end(struct bdi_writeback *wb) { |
f96b9f7c1 writeback: avoid ... |
2651 |
unsigned long flags; |
633a2abb9 writeback: track ... |
2652 |
atomic_dec(&wb->writeback_inodes); |
45a2966fd writeback: fix ba... |
2653 2654 2655 2656 2657 2658 2659 |
/* * Make sure estimate of writeback throughput gets updated after * writeback completed. We delay the update by BANDWIDTH_INTERVAL * (which is the interval other bandwidth updates use for batching) so * that if multiple inodes end writeback at a similar time, they get * batched into one bandwidth update. */ |
f96b9f7c1 writeback: avoid ... |
2660 2661 2662 2663 |
spin_lock_irqsave(&wb->work_lock, flags); if (test_bit(WB_registered, &wb->state)) queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); spin_unlock_irqrestore(&wb->work_lock, flags); |
633a2abb9 writeback: track ... |
2664 |
} |
1da177e4c Linux-2.6.12-rc2 |
2665 2666 2667 |
int test_clear_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); |
d7365e783 mm: memcontrol: f... |
2668 |
int ret; |
1da177e4c Linux-2.6.12-rc2 |
2669 |
|
1c824a680 mm: page-writebac... |
2670 |
lock_page_memcg(page); |
371a096ed mm: don't use rad... |
2671 |
if (mapping && mapping_use_writeback_tags(mapping)) { |
910181343 writeback: attrib... |
2672 2673 |
struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); |
1da177e4c Linux-2.6.12-rc2 |
2674 |
unsigned long flags; |
b93b01631 page cache: use x... |
2675 |
xa_lock_irqsave(&mapping->i_pages, flags); |
1da177e4c Linux-2.6.12-rc2 |
2676 |
ret = TestClearPageWriteback(page); |
69cb51d18 mm: count writeba... |
2677 |
if (ret) { |
ff9c745b8 mm: Convert page-... |
2678 |
__xa_clear_mark(&mapping->i_pages, page_index(page), |
1da177e4c Linux-2.6.12-rc2 |
2679 |
PAGECACHE_TAG_WRITEBACK); |
823423ef5 bdi: invert BDI_C... |
2680 |
if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { |
910181343 writeback: attrib... |
2681 |
struct bdi_writeback *wb = inode_to_wb(inode); |
3e8f399da writeback: rework... |
2682 |
dec_wb_stat(wb, WB_WRITEBACK); |
910181343 writeback: attrib... |
2683 |
__wb_writeout_inc(wb); |
633a2abb9 writeback: track ... |
2684 2685 2686 |
if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) wb_inode_writeback_end(wb); |
04fbfdc14 mm: per device di... |
2687 |
} |
69cb51d18 mm: count writeba... |
2688 |
} |
6c60d2b57 fs/fs-writeback.c... |
2689 2690 2691 2692 |
if (mapping->host && !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) sb_clear_inode_writeback(mapping->host); |
b93b01631 page cache: use x... |
2693 |
xa_unlock_irqrestore(&mapping->i_pages, flags); |
1da177e4c Linux-2.6.12-rc2 |
2694 2695 2696 |
} else { ret = TestClearPageWriteback(page); } |
99b12e3d8 writeback: accoun... |
2697 |
if (ret) { |
1c824a680 mm: page-writebac... |
2698 |
dec_lruvec_page_state(page, NR_WRITEBACK); |
5a1c84b40 mm: remove reclai... |
2699 |
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); |
c4a25635b mm: move vmscan w... |
2700 |
inc_node_page_state(page, NR_WRITTEN); |
99b12e3d8 writeback: accoun... |
2701 |
} |
1c824a680 mm: page-writebac... |
2702 |
unlock_page_memcg(page); |
1da177e4c Linux-2.6.12-rc2 |
2703 2704 |
return ret; } |
1c8349a17 ext4: fix data in... |
2705 |
int __test_set_page_writeback(struct page *page, bool keep_write) |
1da177e4c Linux-2.6.12-rc2 |
2706 2707 |
{ struct address_space *mapping = page_mapping(page); |
f28d43636 mm/gup/writeback:... |
2708 |
int ret, access_ret; |
1da177e4c Linux-2.6.12-rc2 |
2709 |
|
62cccb8c8 mm: simplify lock... |
2710 |
lock_page_memcg(page); |
371a096ed mm: don't use rad... |
2711 |
if (mapping && mapping_use_writeback_tags(mapping)) { |
ff9c745b8 mm: Convert page-... |
2712 |
XA_STATE(xas, &mapping->i_pages, page_index(page)); |
910181343 writeback: attrib... |
2713 2714 |
struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); |
1da177e4c Linux-2.6.12-rc2 |
2715 |
unsigned long flags; |
ff9c745b8 mm: Convert page-... |
2716 2717 |
xas_lock_irqsave(&xas, flags); xas_load(&xas); |
1da177e4c Linux-2.6.12-rc2 |
2718 |
ret = TestSetPageWriteback(page); |
69cb51d18 mm: count writeba... |
2719 |
if (!ret) { |
6c60d2b57 fs/fs-writeback.c... |
2720 2721 2722 2723 |
bool on_wblist; on_wblist = mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); |
ff9c745b8 mm: Convert page-... |
2724 |
xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK); |
633a2abb9 writeback: track ... |
2725 2726 2727 2728 2729 2730 2731 |
if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { struct bdi_writeback *wb = inode_to_wb(inode); inc_wb_stat(wb, WB_WRITEBACK); if (!on_wblist) wb_inode_writeback_start(wb); } |
6c60d2b57 fs/fs-writeback.c... |
2732 2733 2734 2735 2736 2737 2738 2739 |
/* * We can come through here when swapping anonymous * pages, so we don't necessarily have an inode to track * for sync. */ if (mapping->host && !on_wblist) sb_mark_inode_writeback(mapping->host); |
69cb51d18 mm: count writeba... |
2740 |
} |
1da177e4c Linux-2.6.12-rc2 |
2741 |
if (!PageDirty(page)) |
ff9c745b8 mm: Convert page-... |
2742 |
xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY); |
1c8349a17 ext4: fix data in... |
2743 |
if (!keep_write) |
ff9c745b8 mm: Convert page-... |
2744 2745 |
xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); xas_unlock_irqrestore(&xas, flags); |
1da177e4c Linux-2.6.12-rc2 |
2746 2747 2748 |
} else { ret = TestSetPageWriteback(page); } |
3a3c02ecf mm: page-writebac... |
2749 |
if (!ret) { |
00f3ca2c2 mm: memcontrol: p... |
2750 |
inc_lruvec_page_state(page, NR_WRITEBACK); |
5a1c84b40 mm: remove reclai... |
2751 |
inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); |
3a3c02ecf mm: page-writebac... |
2752 |
} |
62cccb8c8 mm: simplify lock... |
2753 |
unlock_page_memcg(page); |
f28d43636 mm/gup/writeback:... |
2754 2755 2756 2757 2758 2759 |
access_ret = arch_make_page_accessible(page); /* * If writeback has been triggered on a page that cannot be made * accessible, it is too late to recover here. */ VM_BUG_ON_PAGE(access_ret != 0, page); |
1da177e4c Linux-2.6.12-rc2 |
2760 2761 2762 |
return ret; } |
1c8349a17 ext4: fix data in... |
2763 |
EXPORT_SYMBOL(__test_set_page_writeback); |
1da177e4c Linux-2.6.12-rc2 |
2764 |
|
19343b5bd mm/page-writeback... |
2765 2766 2767 2768 2769 |
/* * Wait for a page to complete writeback */ void wait_on_page_writeback(struct page *page) { |
c2407cf7d mm: make wait_on_... |
2770 |
while (PageWriteback(page)) { |
19343b5bd mm/page-writeback... |
2771 2772 2773 2774 2775 |
trace_wait_on_page_writeback(page, page_mapping(page)); wait_on_page_bit(page, PG_writeback); } } EXPORT_SYMBOL_GPL(wait_on_page_writeback); |
e5dbd3321 mm/writeback: Add... |
2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 |
/* * Wait for a page to complete writeback. Returns -EINTR if we get a * fatal signal while waiting. */ int wait_on_page_writeback_killable(struct page *page) { while (PageWriteback(page)) { trace_wait_on_page_writeback(page, page_mapping(page)); if (wait_on_page_bit_killable(page, PG_writeback)) return -EINTR; } return 0; } EXPORT_SYMBOL_GPL(wait_on_page_writeback_killable); |
1d1d1a767 mm: only enforce ... |
2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 |
/** * wait_for_stable_page() - wait for writeback to finish, if necessary. * @page: The page to wait on. * * This function determines if the given page is related to a backing device * that requires page contents to be held stable during writeback. If so, then * it will wait for any pending writeback to complete. */ void wait_for_stable_page(struct page *page) { |
8854a6a72 mm/page-writeback... |
2801 |
page = thp_head(page); |
1cb039f3d bdi: replace BDI_... |
2802 |
if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES) |
de1414a65 fs: export inode_... |
2803 |
wait_on_page_writeback(page); |
1d1d1a767 mm: only enforce ... |
2804 2805 |
} EXPORT_SYMBOL_GPL(wait_for_stable_page); |