Eric Lee / smarc-fsl-linux-kernel

Blame view

mm/page-writeback.c 38.8 KB

1da177e4c Linus Torvalds Linux-2.6.12-rc2	1	/*
f30c22695 Uwe Zeisberger fix file specific...	2	* mm/page-writeback.c
1da177e4c Linus Torvalds Linux-2.6.12-rc2	3 4	* * Copyright (C) 2002, Linus Torvalds.
04fbfdc14 Peter Zijlstra mm: per device di...	5	* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
1da177e4c Linus Torvalds Linux-2.6.12-rc2	6 7 8 9	* * Contains functions related to writing back dirty pages at the * address_space level. *
e1f8e8744 Francois Cami Remove Andrew Mor...	10	* 10Apr2002 Andrew Morton
1da177e4c Linus Torvalds Linux-2.6.12-rc2	11 12 13 14 15 16 17 18 19 20 21 22 23 24	* Initial version */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/writeback.h> #include <linux/init.h> #include <linux/backing-dev.h>
55e829af0 Andrew Morton [PATCH] io-accoun...	25	#include <linux/task_io_accounting_ops.h>
1da177e4c Linus Torvalds Linux-2.6.12-rc2	26 27	#include <linux/blkdev.h> #include <linux/mpage.h>
d08b3851d Peter Zijlstra [PATCH] mm: track...	28	#include <linux/rmap.h>
1da177e4c Linus Torvalds Linux-2.6.12-rc2	29 30 31 32 33 34	#include <linux/percpu.h> #include <linux/notifier.h> #include <linux/smp.h> #include <linux/sysctl.h> #include <linux/cpu.h> #include <linux/syscalls.h>
cf9a2ae8d David Howells [PATCH] BLOCK: Mo...	35	#include <linux/buffer_head.h>
811d736f9 David Howells [PATCH] BLOCK: Di...	36	#include <linux/pagevec.h>
028c2dd18 Dave Chinner writeback: Add tr...	37	#include <trace/events/writeback.h>
1da177e4c Linus Torvalds Linux-2.6.12-rc2	38 39	/*
1da177e4c Linus Torvalds Linux-2.6.12-rc2	40 41 42 43	* After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited * will look to see if it needs to force writeback or throttling. */ static long ratelimit_pages = 32;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	44 45 46	/* * When balance_dirty_pages decides that the caller needs to perform some * non-background writeback, this is how many pages it will attempt to write.
3a2e9a5a2 Wu Fengguang writeback: balanc...	47	* It should be somewhat larger than dirtied pages to ensure that reasonably
1da177e4c Linus Torvalds Linux-2.6.12-rc2	48 49	* large amounts of I/O are submitted. */
3a2e9a5a2 Wu Fengguang writeback: balanc...	50	static inline long sync_writeback_pages(unsigned long dirtied)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	51	{
3a2e9a5a2 Wu Fengguang writeback: balanc...	52 53 54 55	if (dirtied < ratelimit_pages) dirtied = ratelimit_pages; return dirtied + dirtied / 2;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	56 57 58 59 60	} /* The following parameters are exported via /proc/sys/vm / /
5b0830cb9 Jens Axboe writeback: get ri...	61	* Start background writeback (via writeback threads) at this percentage
1da177e4c Linus Torvalds Linux-2.6.12-rc2	62	*/
1b5e62b42 Wu Fengguang writeback: double...	63	int dirty_background_ratio = 10;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	64 65	/*
2da02997e David Rientjes mm: add dirty_bac...	66 67 68 69 70 71	* dirty_background_bytes starts at 0 (disabled) so that it is a function of * dirty_background_ratio * the amount of dirtyable memory / unsigned long dirty_background_bytes; /
195cf453d Bron Gondwana mm/page-writeback...	72 73 74 75 76 77	* free highmem will not be subtracted from the total free memory * for calculating free ratios if vm_highmem_is_dirtyable is true / int vm_highmem_is_dirtyable; /
1da177e4c Linus Torvalds Linux-2.6.12-rc2	78 79	* The generator of dirty data starts writeback at this percentage */
1b5e62b42 Wu Fengguang writeback: double...	80	int vm_dirty_ratio = 20;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	81 82	/*
2da02997e David Rientjes mm: add dirty_bac...	83 84 85 86 87 88	* vm_dirty_bytes starts at 0 (disabled) so that it is a function of * vm_dirty_ratio * the amount of dirtyable memory / unsigned long vm_dirty_bytes; /
704503d83 Alexey Dobriyan mm: fix proc_doin...	89	* The interval between `kupdate'-style writebacks
1da177e4c Linus Torvalds Linux-2.6.12-rc2	90	*/
22ef37eed Toshiyuki Okajima page-writeback: f...	91	unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */
1da177e4c Linus Torvalds Linux-2.6.12-rc2	92 93	/*
704503d83 Alexey Dobriyan mm: fix proc_doin...	94	* The longest time for which data is allowed to remain dirty
1da177e4c Linus Torvalds Linux-2.6.12-rc2	95	*/
22ef37eed Toshiyuki Okajima page-writeback: f...	96	unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */
1da177e4c Linus Torvalds Linux-2.6.12-rc2	97 98 99 100 101 102 103	/* * Flag that makes the machine dump writes/reads and block dirtyings. / int block_dump; /
ed5b43f15 Bart Samwel [PATCH] Represent...	104 105	* Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies: * a full sync is triggered after this time elapses without any disk activity.
1da177e4c Linus Torvalds Linux-2.6.12-rc2	106 107 108 109 110 111	/ int laptop_mode; EXPORT_SYMBOL(laptop_mode); / End of sysctl-exported parameters */
1da177e4c Linus Torvalds Linux-2.6.12-rc2	112	/*
04fbfdc14 Peter Zijlstra mm: per device di...	113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128	* Scale the writeback cache size proportional to the relative writeout speeds. * * We do this by keeping a floating proportion between BDIs, based on page * writeback completions [end_page_writeback()]. Those devices that write out * pages fastest will get the larger share, while the slower will get a smaller * share. * * We use page writeout completions because we are interested in getting rid of * dirty pages. Having them written out is the primary goal. * * We introduce a concept of time, a period over which we measure these events, * because demand can/will vary over time. The length of this period itself is * measured in page writeback completions. * */ static struct prop_descriptor vm_completions;
3e26c149c Peter Zijlstra mm: dirty balanci...	129	static struct prop_descriptor vm_dirties;
04fbfdc14 Peter Zijlstra mm: per device di...	130
04fbfdc14 Peter Zijlstra mm: per device di...	131 132 133 134 135 136 137 138	/* * couple the period to the dirty_ratio: * * period/2 ~ roundup_pow_of_two(dirty limit) */ static int calc_period_shift(void) { unsigned long dirty_total;
2da02997e David Rientjes mm: add dirty_bac...	139 140 141 142 143	if (vm_dirty_bytes) dirty_total = vm_dirty_bytes / PAGE_SIZE; else dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / 100;
04fbfdc14 Peter Zijlstra mm: per device di...	144 145 146 147	return 2 + ilog2(dirty_total - 1); } /*
2da02997e David Rientjes mm: add dirty_bac...	148	* update the period when the dirty threshold changes.
04fbfdc14 Peter Zijlstra mm: per device di...	149	*/
2da02997e David Rientjes mm: add dirty_bac...	150 151 152 153 154 155 156 157	static void update_completion_period(void) { int shift = calc_period_shift(); prop_change_shift(&vm_completions, shift); prop_change_shift(&vm_dirties, shift); } int dirty_background_ratio_handler(struct ctl_table *table, int write,
8d65af789 Alexey Dobriyan sysctl: remove "s...	158	void __user buffer, size_t lenp,
2da02997e David Rientjes mm: add dirty_bac...	159 160 161	loff_t *ppos) { int ret;
8d65af789 Alexey Dobriyan sysctl: remove "s...	162	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2da02997e David Rientjes mm: add dirty_bac...	163 164 165 166 167 168	if (ret == 0 && write) dirty_background_bytes = 0; return ret; } int dirty_background_bytes_handler(struct ctl_table *table, int write,
8d65af789 Alexey Dobriyan sysctl: remove "s...	169	void __user buffer, size_t lenp,
2da02997e David Rientjes mm: add dirty_bac...	170 171 172	loff_t *ppos) { int ret;
8d65af789 Alexey Dobriyan sysctl: remove "s...	173	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
2da02997e David Rientjes mm: add dirty_bac...	174 175 176 177	if (ret == 0 && write) dirty_background_ratio = 0; return ret; }
04fbfdc14 Peter Zijlstra mm: per device di...	178	int dirty_ratio_handler(struct ctl_table *table, int write,
8d65af789 Alexey Dobriyan sysctl: remove "s...	179	void __user buffer, size_t lenp,
04fbfdc14 Peter Zijlstra mm: per device di...	180 181 182	loff_t *ppos) { int old_ratio = vm_dirty_ratio;
2da02997e David Rientjes mm: add dirty_bac...	183	int ret;
8d65af789 Alexey Dobriyan sysctl: remove "s...	184	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
04fbfdc14 Peter Zijlstra mm: per device di...	185	if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
2da02997e David Rientjes mm: add dirty_bac...	186 187 188 189 190 191 192 193	update_completion_period(); vm_dirty_bytes = 0; } return ret; } int dirty_bytes_handler(struct ctl_table *table, int write,
8d65af789 Alexey Dobriyan sysctl: remove "s...	194	void __user buffer, size_t lenp,
2da02997e David Rientjes mm: add dirty_bac...	195 196	loff_t *ppos) {
fc3501d41 Sven Wegener mm: fix dirty_byt...	197	unsigned long old_bytes = vm_dirty_bytes;
2da02997e David Rientjes mm: add dirty_bac...	198	int ret;
8d65af789 Alexey Dobriyan sysctl: remove "s...	199	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
2da02997e David Rientjes mm: add dirty_bac...	200 201 202	if (ret == 0 && write && vm_dirty_bytes != old_bytes) { update_completion_period(); vm_dirty_ratio = 0;
04fbfdc14 Peter Zijlstra mm: per device di...	203 204 205 206 207 208 209 210 211 212	} return ret; } /* * Increment the BDI's writeout completion count and the global writeout * completion count. Called from test_clear_page_writeback(). / static inline void __bdi_writeout_inc(struct backing_dev_info bdi) {
a42dde041 Peter Zijlstra mm: bdi: allow se...	213 214	__prop_inc_percpu_max(&vm_completions, &bdi->completions, bdi->max_prop_frac);
04fbfdc14 Peter Zijlstra mm: per device di...	215	}
dd5656e59 Miklos Szeredi mm: bdi: export b...	216 217 218 219 220 221 222 223 224	void bdi_writeout_inc(struct backing_dev_info *bdi) { unsigned long flags; local_irq_save(flags); __bdi_writeout_inc(bdi); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(bdi_writeout_inc);
1cf6e7d83 Nick Piggin mm: task dirty ac...	225	void task_dirty_inc(struct task_struct *tsk)
3e26c149c Peter Zijlstra mm: dirty balanci...	226 227 228	{ prop_inc_single(&vm_dirties, &tsk->dirties); }
04fbfdc14 Peter Zijlstra mm: per device di...	229 230 231 232 233 234 235 236 237 238 239 240 241 242	/* * Obtain an accurate fraction of the BDI's portion. / static void bdi_writeout_fraction(struct backing_dev_info bdi, long numerator, long denominator) { if (bdi_cap_writeback_dirty(bdi)) { prop_fraction_percpu(&vm_completions, &bdi->completions, numerator, denominator); } else { numerator = 0; denominator = 1; } }
3e26c149c Peter Zijlstra mm: dirty balanci...	243 244 245 246 247 248 249 250	static inline void task_dirties_fraction(struct task_struct tsk, long numerator, long denominator) { prop_fraction_single(&vm_dirties, &tsk->dirties, numerator, denominator); } /
1babe1838 Wu Fengguang writeback: add co...	251	* task_dirty_limit - scale down dirty throttling threshold for one task
3e26c149c Peter Zijlstra mm: dirty balanci...	252 253 254 255	* * task specific dirty limit: * * dirty -= (dirty/8) * p_{t}
1babe1838 Wu Fengguang writeback: add co...	256 257 258 259 260 261 262	* * To protect light/slow dirtying tasks from heavier/fast ones, we start * throttling individual tasks before reaching the bdi dirty limit. * Relatively low thresholds will be allocated to heavy dirtiers. So when * dirty pages grow large, heavy dirtiers will be throttled first, which will * effectively curb the growth of dirty pages. Light dirtiers with high enough * dirty threshold may never get throttled.
3e26c149c Peter Zijlstra mm: dirty balanci...	263	*/
16c4042f0 Wu Fengguang writeback: avoid ...	264 265	static unsigned long task_dirty_limit(struct task_struct *tsk, unsigned long bdi_dirty)
3e26c149c Peter Zijlstra mm: dirty balanci...	266 267	{ long numerator, denominator;
16c4042f0 Wu Fengguang writeback: avoid ...	268	unsigned long dirty = bdi_dirty;
3e26c149c Peter Zijlstra mm: dirty balanci...	269 270 271 272 273 274 275	u64 inv = dirty >> 3; task_dirties_fraction(tsk, &numerator, &denominator); inv *= numerator; do_div(inv, denominator); dirty -= inv;
3e26c149c Peter Zijlstra mm: dirty balanci...	276
16c4042f0 Wu Fengguang writeback: avoid ...	277	return max(dirty, bdi_dirty/2);
3e26c149c Peter Zijlstra mm: dirty balanci...	278	}
04fbfdc14 Peter Zijlstra mm: per device di...	279	/*
189d3c4a9 Peter Zijlstra mm: bdi: allow se...	280 281	* */
189d3c4a9 Peter Zijlstra mm: bdi: allow se...	282 283 284 285 286	static unsigned int bdi_min_ratio; int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { int ret = 0;
189d3c4a9 Peter Zijlstra mm: bdi: allow se...	287
cfc4ba536 Jens Axboe writeback: use RC...	288	spin_lock_bh(&bdi_lock);
a42dde041 Peter Zijlstra mm: bdi: allow se...	289	if (min_ratio > bdi->max_ratio) {
189d3c4a9 Peter Zijlstra mm: bdi: allow se...	290	ret = -EINVAL;
a42dde041 Peter Zijlstra mm: bdi: allow se...	291 292 293 294 295 296 297 298 299	} else { min_ratio -= bdi->min_ratio; if (bdi_min_ratio + min_ratio < 100) { bdi_min_ratio += min_ratio; bdi->min_ratio += min_ratio; } else { ret = -EINVAL; } }
cfc4ba536 Jens Axboe writeback: use RC...	300	spin_unlock_bh(&bdi_lock);
a42dde041 Peter Zijlstra mm: bdi: allow se...	301 302 303 304 305 306	return ret; } int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) {
a42dde041 Peter Zijlstra mm: bdi: allow se...	307 308 309 310	int ret = 0; if (max_ratio > 100) return -EINVAL;
cfc4ba536 Jens Axboe writeback: use RC...	311	spin_lock_bh(&bdi_lock);
a42dde041 Peter Zijlstra mm: bdi: allow se...	312 313 314 315 316 317	if (bdi->min_ratio > max_ratio) { ret = -EINVAL; } else { bdi->max_ratio = max_ratio; bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; }
cfc4ba536 Jens Axboe writeback: use RC...	318	spin_unlock_bh(&bdi_lock);
189d3c4a9 Peter Zijlstra mm: bdi: allow se...	319 320 321	return ret; }
a42dde041 Peter Zijlstra mm: bdi: allow se...	322	EXPORT_SYMBOL(bdi_set_max_ratio);
189d3c4a9 Peter Zijlstra mm: bdi: allow se...	323 324	/*
1da177e4c Linus Torvalds Linux-2.6.12-rc2	325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340	* Work out the current dirty-memory clamping and background writeout * thresholds. * * The main aim here is to lower them aggressively if there is a lot of mapped * memory around. To avoid stressing page reclaim with lots of unreclaimable * pages. It is better to clamp down on writers than to start swapping, and * performing lots of scanning. * * We only allow 1/2 of the currently-unmapped memory to be dirtied. * * We don't permit the clamping level to fall below 5% - that is getting rather * excessive. * * We make sure that the background writeout level is below the adjusted * clamping level. */
1b4244647 Christoph Lameter Use ZVC counters ...	341 342 343 344 345 346	static unsigned long highmem_dirtyable_memory(unsigned long total) { #ifdef CONFIG_HIGHMEM int node; unsigned long x = 0;
37b07e416 Lee Schermerhorn memoryless nodes:...	347	for_each_node_state(node, N_HIGH_MEMORY) {
1b4244647 Christoph Lameter Use ZVC counters ...	348 349	struct zone *z = &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
adea02a1b Wu Fengguang mm: count only re...	350 351	x += zone_page_state(z, NR_FREE_PAGES) + zone_reclaimable_pages(z);
1b4244647 Christoph Lameter Use ZVC counters ...	352 353 354 355 356 357 358 359 360 361 362 363	} /* * Make sure that the number of highmem pages is never larger * than the number of the total dirtyable memory. This can only * occur in very strange VM situations but we want to make sure * that this does not occur. */ return min(x, total); #else return 0; #endif }
3eefae994 Steven Rostedt ftrace: limit tra...	364 365 366 367 368 369 370	/** * determine_dirtyable_memory - amount of memory that may be used * * Returns the numebr of pages that can currently be freed and used * by the kernel for direct mappings. */ unsigned long determine_dirtyable_memory(void)
1b4244647 Christoph Lameter Use ZVC counters ...	371 372	{ unsigned long x;
adea02a1b Wu Fengguang mm: count only re...	373	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
195cf453d Bron Gondwana mm/page-writeback...	374 375 376	if (!vm_highmem_is_dirtyable) x -= highmem_dirtyable_memory(x);
1b4244647 Christoph Lameter Use ZVC counters ...	377 378	return x + 1; /* Ensure that we never return 0 */ }
03ab450f0 Randy Dunlap mm/page-writeback...	379	/*
1babe1838 Wu Fengguang writeback: add co...	380 381 382 383 384 385 386 387	* global_dirty_limits - background-writeback and dirty-throttling thresholds * * Calculate the dirty thresholds based on sysctl parameters * - vm.dirty_background_ratio or vm.dirty_background_bytes * - vm.dirty_ratio or vm.dirty_bytes * The dirty limits will be lifted by 1/4 for PF_LESS_THROTTLE (ie. nfsd) and * runtime tasks. */
16c4042f0 Wu Fengguang writeback: avoid ...	388	void global_dirty_limits(unsigned long pbackground, unsigned long pdirty)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	389	{
364aeb284 David Rientjes mm: change dirty ...	390 391	unsigned long background; unsigned long dirty;
1b4244647 Christoph Lameter Use ZVC counters ...	392	unsigned long available_memory = determine_dirtyable_memory();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	393	struct task_struct *tsk;
2da02997e David Rientjes mm: add dirty_bac...	394 395 396 397 398 399 400 401 402 403	if (vm_dirty_bytes) dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE); else { int dirty_ratio; dirty_ratio = vm_dirty_ratio; if (dirty_ratio < 5) dirty_ratio = 5; dirty = (dirty_ratio * available_memory) / 100; }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	404
2da02997e David Rientjes mm: add dirty_bac...	405 406 407 408	if (dirty_background_bytes) background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE); else background = (dirty_background_ratio * available_memory) / 100;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	409
2da02997e David Rientjes mm: add dirty_bac...	410 411	if (background >= dirty) background = dirty / 2;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	412 413 414 415 416 417 418	tsk = current; if (tsk->flags & PF_LESS_THROTTLE \|\| rt_task(tsk)) { background += background / 4; dirty += dirty / 4; } pbackground = background; pdirty = dirty;
16c4042f0 Wu Fengguang writeback: avoid ...	419	}
04fbfdc14 Peter Zijlstra mm: per device di...	420
03ab450f0 Randy Dunlap mm/page-writeback...	421	/*
1babe1838 Wu Fengguang writeback: add co...	422 423 424 425 426 427 428 429 430 431	* bdi_dirty_limit - @bdi's share of dirty throttling threshold * * Allocate high/low dirty limits to fast/slow devices, in order to prevent * - starving fast devices * - piling up dirty pages (that will take long time to sync) on slow devices * * The bdi's share of dirty limit will be adapting to its throughput and * bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set. / unsigned long bdi_dirty_limit(struct backing_dev_info bdi, unsigned long dirty)
16c4042f0 Wu Fengguang writeback: avoid ...	432 433 434	{ u64 bdi_dirty; long numerator, denominator;
04fbfdc14 Peter Zijlstra mm: per device di...	435
16c4042f0 Wu Fengguang writeback: avoid ...	436 437 438 439	/* * Calculate this BDI's share of the dirty ratio. */ bdi_writeout_fraction(bdi, &numerator, &denominator);
04fbfdc14 Peter Zijlstra mm: per device di...	440
16c4042f0 Wu Fengguang writeback: avoid ...	441 442 443	bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100; bdi_dirty *= numerator; do_div(bdi_dirty, denominator);
04fbfdc14 Peter Zijlstra mm: per device di...	444
16c4042f0 Wu Fengguang writeback: avoid ...	445 446 447 448 449	bdi_dirty += (dirty * bdi->min_ratio) / 100; if (bdi_dirty > (dirty * bdi->max_ratio) / 100) bdi_dirty = dirty * bdi->max_ratio / 100; return bdi_dirty;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	450 451 452 453 454 455	} /* * balance_dirty_pages() must be called by processes which are generating dirty * data. It looks at the number of dirty pages in the machine and will force * the caller to perform writeback if the system is over `vm_dirty_ratio'.
5b0830cb9 Jens Axboe writeback: get ri...	456 457	* If we're over `background_thresh' then the writeback threads are woken to * perform some writeout.
1da177e4c Linus Torvalds Linux-2.6.12-rc2	458	*/
3a2e9a5a2 Wu Fengguang writeback: balanc...	459 460	static void balance_dirty_pages(struct address_space *mapping, unsigned long write_chunk)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	461	{
5fce25a9d Peter Zijlstra mm: speed up writ...	462 463	long nr_reclaimable, bdi_nr_reclaimable; long nr_writeback, bdi_nr_writeback;
364aeb284 David Rientjes mm: change dirty ...	464 465 466	unsigned long background_thresh; unsigned long dirty_thresh; unsigned long bdi_thresh;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	467	unsigned long pages_written = 0;
87c6a9b25 Jens Axboe writeback: make b...	468	unsigned long pause = 1;
e50e37201 Wu Fengguang writeback: balanc...	469	bool dirty_exceeded = false;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	470 471 472 473	struct backing_dev_info *bdi = mapping->backing_dev_info; for (;;) { struct writeback_control wbc = {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	474 475 476	.sync_mode = WB_SYNC_NONE, .older_than_this = NULL, .nr_to_write = write_chunk,
111ebb6e6 OGAWA Hirofumi [PATCH] writeback...	477	.range_cyclic = 1,
1da177e4c Linus Torvalds Linux-2.6.12-rc2	478	};
5fce25a9d Peter Zijlstra mm: speed up writ...	479 480 481	nr_reclaimable = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS); nr_writeback = global_page_state(NR_WRITEBACK);
16c4042f0 Wu Fengguang writeback: avoid ...	482 483 484 485 486 487 488 489 490 491 492 493 494	global_dirty_limits(&background_thresh, &dirty_thresh); /* * Throttle it only when the background writeback cannot * catch-up. This avoids (excessively) small writeouts * when the bdi limits are ramping up. */ if (nr_reclaimable + nr_writeback < (background_thresh + dirty_thresh) / 2) break; bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); bdi_thresh = task_dirty_limit(current, bdi_thresh);
e50e37201 Wu Fengguang writeback: balanc...	495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511	/* * In order to avoid the stacked BDI deadlock we need * to ensure we accurately count the 'dirty' pages when * the threshold is low. * * Otherwise it would be possible to get thresh+n pages * reported dirty, even though there are thresh-m pages * actually dirty; with m+n sitting in the percpu * deltas. / if (bdi_thresh < 2bdi_stat_error(bdi)) { bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK); } else { bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); }
5fce25a9d Peter Zijlstra mm: speed up writ...	512
e50e37201 Wu Fengguang writeback: balanc...	513 514 515 516 517 518 519 520 521 522 523	/* * The bdi thresh is somehow "soft" limit derived from the * global "hard" limit. The former helps to prevent heavy IO * bdi or process from holding back light ones; The latter is * the last resort safeguard. */ dirty_exceeded = (bdi_nr_reclaimable + bdi_nr_writeback >= bdi_thresh) \|\| (nr_reclaimable + nr_writeback >= dirty_thresh); if (!dirty_exceeded)
04fbfdc14 Peter Zijlstra mm: per device di...	524	break;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	525
04fbfdc14 Peter Zijlstra mm: per device di...	526 527	if (!bdi->dirty_exceeded) bdi->dirty_exceeded = 1;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	528 529 530 531 532 533	/* Note: nr_reclaimable denotes nr_dirty + nr_unstable. * Unstable writes are a feature of certain networked * filesystems (i.e. NFS) in which data may have been * written to the server's write cache, but has not yet * been flushed to permanent storage.
d7831a0bd Richard Kennedy mm: prevent balan...	534 535 536	* Only move pages to writeback if this bdi is over its * threshold otherwise wait until the disk writes catch * up.
1da177e4c Linus Torvalds Linux-2.6.12-rc2	537	*/
028c2dd18 Dave Chinner writeback: Add tr...	538	trace_wbc_balance_dirty_start(&wbc, bdi);
d7831a0bd Richard Kennedy mm: prevent balan...	539	if (bdi_nr_reclaimable > bdi_thresh) {
9c3a8ee8a Christoph Hellwig writeback: remove...	540	writeback_inodes_wb(&bdi->wb, &wbc);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	541	pages_written += write_chunk - wbc.nr_to_write;
028c2dd18 Dave Chinner writeback: Add tr...	542	trace_wbc_balance_dirty_written(&wbc, bdi);
e50e37201 Wu Fengguang writeback: balanc...	543 544	if (pages_written >= write_chunk) break; /* We've done our duty */
04fbfdc14 Peter Zijlstra mm: per device di...	545	}
028c2dd18 Dave Chinner writeback: Add tr...	546	trace_wbc_balance_dirty_wait(&wbc, bdi);
d25105e89 Wu Fengguang writeback: accoun...	547 548	__set_current_state(TASK_INTERRUPTIBLE); io_schedule_timeout(pause);
87c6a9b25 Jens Axboe writeback: make b...	549 550 551 552 553 554 555 556	/* * Increase the delay for each loop, up to our previous * default of taking a 100ms nap. */ pause <<= 1; if (pause > HZ / 10) pause = HZ / 10;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	557	}
e50e37201 Wu Fengguang writeback: balanc...	558	if (!dirty_exceeded && bdi->dirty_exceeded)
04fbfdc14 Peter Zijlstra mm: per device di...	559	bdi->dirty_exceeded = 0;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	560 561	if (writeback_in_progress(bdi))
5b0830cb9 Jens Axboe writeback: get ri...	562	return;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	563 564 565 566 567 568 569 570 571 572	/* * In laptop mode, we wait until hitting the higher threshold before * starting background writeout, and then write out all the way down * to the lower threshold. So slow writers cause minimal disk activity. * * In normal mode, we start background writeout at the lower * background_thresh, to keep the amount of dirty memory low. */ if ((laptop_mode && pages_written) \|\|
e50e37201 Wu Fengguang writeback: balanc...	573	(!laptop_mode && (nr_reclaimable > background_thresh)))
c5444198c Christoph Hellwig writeback: simpli...	574	bdi_start_background_writeback(bdi);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	575	}
a200ee182 Peter Zijlstra mm: set_page_dirt...	576	void set_page_dirty_balance(struct page *page, int page_mkwrite)
edc79b2a4 Peter Zijlstra [PATCH] mm: balan...	577	{
a200ee182 Peter Zijlstra mm: set_page_dirt...	578	if (set_page_dirty(page) \|\| page_mkwrite) {
edc79b2a4 Peter Zijlstra [PATCH] mm: balan...	579 580 581 582 583 584	struct address_space *mapping = page_mapping(page); if (mapping) balance_dirty_pages_ratelimited(mapping); } }
245b2e70e Tejun Heo percpu: clean up ...	585	static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	586	/**
fa5a734e4 Andrew Morton [PATCH] balance_d...	587	* balance_dirty_pages_ratelimited_nr - balance dirty memory state
67be2dd1b Martin Waitz [PATCH] DocBook: ...	588	* @mapping: address_space which was dirtied
a580290c3 Martin Waitz Documentation: fi...	589	* @nr_pages_dirtied: number of pages which the caller has just dirtied
1da177e4c Linus Torvalds Linux-2.6.12-rc2	590 591 592 593 594 595 596 597 598 599	* * Processes which are dirtying memory should call in here once for each page * which was newly dirtied. The function will periodically check the system's * dirty state and will initiate writeback if needed. * * On really big machines, get_writeback_state is expensive, so try to avoid * calling it too often (ratelimiting). But once we're over the dirty memory * limit we decrease the ratelimiting by a lot, to prevent individual processes * from overshooting the limit by (ratelimit_pages) each. */
fa5a734e4 Andrew Morton [PATCH] balance_d...	600 601	void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, unsigned long nr_pages_dirtied)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	602	{
fa5a734e4 Andrew Morton [PATCH] balance_d...	603 604	unsigned long ratelimit; unsigned long *p;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	605 606	ratelimit = ratelimit_pages;
04fbfdc14 Peter Zijlstra mm: per device di...	607	if (mapping->backing_dev_info->dirty_exceeded)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	608 609 610 611 612 613	ratelimit = 8; /* * Check the rate limiting. Also, we do not want to throttle real-time * tasks in balance_dirty_pages(). Period. */
fa5a734e4 Andrew Morton [PATCH] balance_d...	614	preempt_disable();
245b2e70e Tejun Heo percpu: clean up ...	615	p = &__get_cpu_var(bdp_ratelimits);
fa5a734e4 Andrew Morton [PATCH] balance_d...	616 617	p += nr_pages_dirtied; if (unlikely(p >= ratelimit)) {
3a2e9a5a2 Wu Fengguang writeback: balanc...	618	ratelimit = sync_writeback_pages(*p);
fa5a734e4 Andrew Morton [PATCH] balance_d...	619 620	*p = 0; preempt_enable();
3a2e9a5a2 Wu Fengguang writeback: balanc...	621	balance_dirty_pages(mapping, ratelimit);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	622 623	return; }
fa5a734e4 Andrew Morton [PATCH] balance_d...	624	preempt_enable();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	625	}
fa5a734e4 Andrew Morton [PATCH] balance_d...	626	EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	627
232ea4d69 Andrew Morton [PATCH] throttle_...	628	void throttle_vm_writeout(gfp_t gfp_mask)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	629	{
364aeb284 David Rientjes mm: change dirty ...	630 631	unsigned long background_thresh; unsigned long dirty_thresh;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	632 633	for ( ; ; ) {
16c4042f0 Wu Fengguang writeback: avoid ...	634	global_dirty_limits(&background_thresh, &dirty_thresh);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	635 636 637 638 639 640	/* * Boost the allowable dirty threshold a bit for page * allocators so they don't get DoS'ed by heavy writers / dirty_thresh += dirty_thresh / 10; / wheeee... */
c24f21bda Christoph Lameter [PATCH] zoned vm ...	641 642 643	if (global_page_state(NR_UNSTABLE_NFS) + global_page_state(NR_WRITEBACK) <= dirty_thresh) break;
8aa7e847d Jens Axboe Fix congestion_wa...	644	congestion_wait(BLK_RW_ASYNC, HZ/10);
369f2389e Fengguang Wu writeback: remove...	645 646 647 648 649 650 651 652	/* * The caller might hold locks which can prevent IO completion * or progress in the filesystem. So we cannot just sit here * waiting for IO to complete. */ if ((gfp_mask & (__GFP_FS\|__GFP_IO)) != (__GFP_FS\|__GFP_IO)) break;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	653 654	} }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	655	/*
1da177e4c Linus Torvalds Linux-2.6.12-rc2	656 657 658	* sysctl handler for /proc/sys/vm/dirty_writeback_centisecs / int dirty_writeback_centisecs_handler(ctl_table table, int write,
8d65af789 Alexey Dobriyan sysctl: remove "s...	659	void __user buffer, size_t length, loff_t *ppos)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	660	{
8d65af789 Alexey Dobriyan sysctl: remove "s...	661	proc_dointvec(table, write, buffer, length, ppos);
6423104b6 Jens Axboe writeback: fixups...	662	bdi_arm_supers_timer();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	663 664	return 0; }
c2c4986ed Jens Axboe writeback: fix pr...	665	#ifdef CONFIG_BLOCK
31373d09d Matthew Garrett laptop-mode: Make...	666	void laptop_mode_timer_fn(unsigned long data)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	667	{
31373d09d Matthew Garrett laptop-mode: Make...	668 669 670	struct request_queue q = (struct request_queue )data; int nr_pages = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	671
31373d09d Matthew Garrett laptop-mode: Make...	672 673 674 675	/* * We want to write everything out, not just down to the dirty * threshold */
31373d09d Matthew Garrett laptop-mode: Make...	676	if (bdi_has_dirty_io(&q->backing_dev_info))
c5444198c Christoph Hellwig writeback: simpli...	677	bdi_start_writeback(&q->backing_dev_info, nr_pages);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	678 679 680 681 682 683 684	} /* * We've spun up the disk and we're in laptop mode: schedule writeback * of all dirty data a few seconds from now. If the flush is already scheduled * then push it back - the user is still using the disk. */
31373d09d Matthew Garrett laptop-mode: Make...	685	void laptop_io_completion(struct backing_dev_info *info)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	686	{
31373d09d Matthew Garrett laptop-mode: Make...	687	mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	688 689 690 691 692 693 694 695 696	} /* * We're in laptop mode and we've just synced. The sync's writes will have * caused another writeback to be scheduled by laptop_io_completion. * Nothing needs to be written back anymore, so we unschedule the writeback. */ void laptop_sync_completion(void) {
31373d09d Matthew Garrett laptop-mode: Make...	697 698 699 700 701 702 703 704	struct backing_dev_info *bdi; rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) del_timer(&bdi->laptop_mode_wb_timer); rcu_read_unlock();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	705	}
c2c4986ed Jens Axboe writeback: fix pr...	706	#endif
1da177e4c Linus Torvalds Linux-2.6.12-rc2	707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723	/* * If ratelimit_pages is too high then we can get into dirty-data overload * if a large number of processes all perform writes at the same time. * If it is too low then SMP machines will call the (expensive) * get_writeback_state too often. * * Here we set ratelimit_pages to a level which ensures that when all CPUs are * dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory * thresholds before writeback cuts in. * * But the limit should not be set too high. Because it also controls the * amount of memory which the balance_dirty_pages() caller has to write back. * If this is too large then the caller will block on the IO queue all the * time. So limit it to four megabytes - the balance_dirty_pages() caller * will write six megabyte chunks, max. */
2d1d43f6a Chandra Seetharaman [PATCH] call mm/p...	724	void writeback_set_ratelimit(void)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	725	{
40c99aae2 Chandra Seetharaman [PATCH] remove st...	726	ratelimit_pages = vm_total_pages / (num_online_cpus() * 32);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	727 728 729 730 731	if (ratelimit_pages < 16) ratelimit_pages = 16; if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024) ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE; }
26c2143b6 Chandra Seetharaman [PATCH] cpu hotpl...	732	static int __cpuinit
1da177e4c Linus Torvalds Linux-2.6.12-rc2	733 734	ratelimit_handler(struct notifier_block self, unsigned long u, void v) {
2d1d43f6a Chandra Seetharaman [PATCH] call mm/p...	735	writeback_set_ratelimit();
aa0f03037 Paul E. McKenney [PATCH] Change co...	736	return NOTIFY_DONE;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	737	}
74b85f379 Chandra Seetharaman [PATCH] cpu hotpl...	738	static struct notifier_block __cpuinitdata ratelimit_nb = {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	739 740 741 742 743	.notifier_call = ratelimit_handler, .next = NULL, }; /*
dc6e29da9 Linus Torvalds Fix balance_dirty...	744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759	* Called early on to tune the page writeback dirty limits. * * We used to scale dirty pages according to how total memory * related to pages that could be allocated for buffers (by * comparing nr_free_buffer_pages() to vm_total_pages. * * However, that was when we used "dirty_ratio" to scale with * all memory, and we don't do that any more. "dirty_ratio" * is now applied to total non-HIGHPAGE memory (by subtracting * totalhigh_pages from vm_total_pages), and as such we can't * get into the old insane situation any more where we had * large amounts of dirty pages compared to a small amount of * non-HIGHMEM memory. * * But we might still want to scale the dirty_ratio by how * much memory the box has..
1da177e4c Linus Torvalds Linux-2.6.12-rc2	760 761 762	*/ void __init page_writeback_init(void) {
04fbfdc14 Peter Zijlstra mm: per device di...	763	int shift;
2d1d43f6a Chandra Seetharaman [PATCH] call mm/p...	764	writeback_set_ratelimit();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	765	register_cpu_notifier(&ratelimit_nb);
04fbfdc14 Peter Zijlstra mm: per device di...	766 767 768	shift = calc_period_shift(); prop_descriptor_init(&vm_completions, shift);
3e26c149c Peter Zijlstra mm: dirty balanci...	769	prop_descriptor_init(&vm_dirties, shift);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	770	}
811d736f9 David Howells [PATCH] BLOCK: Di...	771	/**
f446daaea Jan Kara mm: implement wri...	772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787	* tag_pages_for_writeback - tag pages to be written by write_cache_pages * @mapping: address space structure to write * @start: starting page index * @end: ending page index (inclusive) * * This function scans the page range from @start to @end (inclusive) and tags * all pages that have DIRTY tag set with a special TOWRITE tag. The idea is * that write_cache_pages (or whoever calls this function) will then use * TOWRITE tag to identify pages eligible for writeback. This mechanism is * used to avoid livelocking of writeback by a process steadily creating new * dirty pages in the file (thus it is important for this function to be quick * so that it can tag pages faster than a dirtying process can create them). / / * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce tree_lock latency. */
f446daaea Jan Kara mm: implement wri...	788 789 790	void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end) {
3c111a071 Randy Dunlap mm: fix fatal ker...	791	#define WRITEBACK_TAG_BATCH 4096
f446daaea Jan Kara mm: implement wri...	792 793 794 795 796 797 798 799 800 801	unsigned long tagged; do { spin_lock_irq(&mapping->tree_lock); tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree, &start, end, WRITEBACK_TAG_BATCH, PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE); spin_unlock_irq(&mapping->tree_lock); WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH); cond_resched();
d5ed3a4af Jan Kara lib/radix-tree.c:...	802 803	/* We check 'start' to handle wrapping when end == ~0UL */ } while (tagged >= WRITEBACK_TAG_BATCH && start);
f446daaea Jan Kara mm: implement wri...	804 805 806 807	} EXPORT_SYMBOL(tag_pages_for_writeback); /**
0ea971801 Miklos Szeredi consolidate gener...	808	* write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
811d736f9 David Howells [PATCH] BLOCK: Di...	809 810	* @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write
0ea971801 Miklos Szeredi consolidate gener...	811 812	* @writepage: function called for each page * @data: data passed to writepage function
811d736f9 David Howells [PATCH] BLOCK: Di...	813	*
0ea971801 Miklos Szeredi consolidate gener...	814	* If a page is already under I/O, write_cache_pages() skips it, even
811d736f9 David Howells [PATCH] BLOCK: Di...	815 816 817 818 819 820	* if it's dirty. This is desirable behaviour for memory-cleaning writeback, * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() * and msync() need to guarantee that all the data which was dirty at the time * the call was made get new I/O started against them. If wbc->sync_mode is * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete.
f446daaea Jan Kara mm: implement wri...	821 822 823 824 825 826 827	* * To avoid livelocks (when other process dirties new pages), we first tag * pages which should be written back with TOWRITE tag and only then start * writing them. For data-integrity sync we have to be careful so that we do * not miss some pages (e.g., because some other process has cleared TOWRITE * tag we set). The rule we follow is that TOWRITE tag can be cleared only * by the process clearing the DIRTY tag (and submitting the page for IO).
811d736f9 David Howells [PATCH] BLOCK: Di...	828	*/
0ea971801 Miklos Szeredi consolidate gener...	829 830 831	int write_cache_pages(struct address_space mapping, struct writeback_control wbc, writepage_t writepage, void *data)
811d736f9 David Howells [PATCH] BLOCK: Di...	832	{
811d736f9 David Howells [PATCH] BLOCK: Di...	833 834	int ret = 0; int done = 0;
811d736f9 David Howells [PATCH] BLOCK: Di...	835 836	struct pagevec pvec; int nr_pages;
31a12666d Nick Piggin mm: write_cache_p...	837	pgoff_t uninitialized_var(writeback_index);
811d736f9 David Howells [PATCH] BLOCK: Di...	838 839	pgoff_t index; pgoff_t end; /* Inclusive */
bd19e012f Nick Piggin mm: write_cache_p...	840	pgoff_t done_index;
31a12666d Nick Piggin mm: write_cache_p...	841	int cycled;
811d736f9 David Howells [PATCH] BLOCK: Di...	842	int range_whole = 0;
f446daaea Jan Kara mm: implement wri...	843	int tag;
811d736f9 David Howells [PATCH] BLOCK: Di...	844
811d736f9 David Howells [PATCH] BLOCK: Di...	845 846	pagevec_init(&pvec, 0); if (wbc->range_cyclic) {
31a12666d Nick Piggin mm: write_cache_p...	847 848 849 850 851 852	writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; if (index == 0) cycled = 1; else cycled = 0;
811d736f9 David Howells [PATCH] BLOCK: Di...	853 854 855 856 857 858	end = -1; } else { index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1;
31a12666d Nick Piggin mm: write_cache_p...	859	cycled = 1; /* ignore range_cyclic tests */
811d736f9 David Howells [PATCH] BLOCK: Di...	860	}
f446daaea Jan Kara mm: implement wri...	861 862 863 864	if (wbc->sync_mode == WB_SYNC_ALL) tag = PAGECACHE_TAG_TOWRITE; else tag = PAGECACHE_TAG_DIRTY;
811d736f9 David Howells [PATCH] BLOCK: Di...	865	retry:
f446daaea Jan Kara mm: implement wri...	866 867	if (wbc->sync_mode == WB_SYNC_ALL) tag_pages_for_writeback(mapping, index, end);
bd19e012f Nick Piggin mm: write_cache_p...	868	done_index = index;
5a3d5c981 Nick Piggin mm: write_cache_p...	869 870	while (!done && (index <= end)) { int i;
f446daaea Jan Kara mm: implement wri...	871	nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
5a3d5c981 Nick Piggin mm: write_cache_p...	872 873 874	min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); if (nr_pages == 0) break;
811d736f9 David Howells [PATCH] BLOCK: Di...	875
811d736f9 David Howells [PATCH] BLOCK: Di...	876 877 878 879	for (i = 0; i < nr_pages; i++) { struct page page = pvec.pages[i]; /
d5482cdf8 Nick Piggin mm: write_cache_p...	880 881 882 883 884	* At this point, the page may be truncated or * invalidated (changing page->mapping to NULL), or * even swizzled back from swapper_space to tmpfs file * mapping. However, page->index will not change * because we have a reference on the page.
811d736f9 David Howells [PATCH] BLOCK: Di...	885	*/
d5482cdf8 Nick Piggin mm: write_cache_p...	886 887 888 889 890 891 892 893 894 895	if (page->index > end) { /* * can't be range_cyclic (1st pass) because * end == -1 in that case. */ done = 1; break; } done_index = page->index + 1;
811d736f9 David Howells [PATCH] BLOCK: Di...	896	lock_page(page);
5a3d5c981 Nick Piggin mm: write_cache_p...	897 898 899 900 901 902 903 904	/* * Page truncated or invalidated. We can freely skip it * then, even for data integrity operations: the page * has disappeared concurrently, so there could be no * real expectation of this data interity operation * even if there is now a new, dirty page at the same * pagecache address. */
811d736f9 David Howells [PATCH] BLOCK: Di...	905	if (unlikely(page->mapping != mapping)) {
5a3d5c981 Nick Piggin mm: write_cache_p...	906	continue_unlock:
811d736f9 David Howells [PATCH] BLOCK: Di...	907 908 909	unlock_page(page); continue; }
515f4a037 Nick Piggin mm: write_cache_p...	910 911 912 913 914 915 916 917 918 919 920	if (!PageDirty(page)) { /* someone wrote it for us */ goto continue_unlock; } if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) wait_on_page_writeback(page); else goto continue_unlock; }
811d736f9 David Howells [PATCH] BLOCK: Di...	921
515f4a037 Nick Piggin mm: write_cache_p...	922 923	BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page))
5a3d5c981 Nick Piggin mm: write_cache_p...	924	goto continue_unlock;
811d736f9 David Howells [PATCH] BLOCK: Di...	925
9e094383b Dave Chinner writeback: Add tr...	926	trace_wbc_writepage(wbc, mapping->backing_dev_info);
0ea971801 Miklos Szeredi consolidate gener...	927	ret = (*writepage)(page, wbc, data);
00266770b Nick Piggin mm: write_cache_p...	928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944	if (unlikely(ret)) { if (ret == AOP_WRITEPAGE_ACTIVATE) { unlock_page(page); ret = 0; } else { /* * done_index is set past this page, * so media errors will not choke * background writeout for the entire * file. This has consequences for * range_cyclic semantics (ie. it may * not be suitable for data integrity * writeout). */ done = 1; break; }
0b5649278 Dave Chinner writeback: pay at...	945	}
00266770b Nick Piggin mm: write_cache_p...	946
546a19242 Dave Chinner writeback: write_...	947 948 949 950 951 952 953 954 955 956	/* * We stop writing back only if we are not doing * integrity sync. In case of integrity sync we have to * keep going until we have written all the pages * we tagged for writeback prior to entering this loop. */ if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break;
05fe478dd Nick Piggin mm: write_cache_p...	957	}
811d736f9 David Howells [PATCH] BLOCK: Di...	958 959 960 961	} pagevec_release(&pvec); cond_resched(); }
3a4c6800f Nick Piggin Fix page writebac...	962	if (!cycled && !done) {
811d736f9 David Howells [PATCH] BLOCK: Di...	963	/*
31a12666d Nick Piggin mm: write_cache_p...	964	* range_cyclic:
811d736f9 David Howells [PATCH] BLOCK: Di...	965 966 967	* We hit the last page and there is more work to be done: wrap * back to the start of the file */
31a12666d Nick Piggin mm: write_cache_p...	968	cycled = 1;
811d736f9 David Howells [PATCH] BLOCK: Di...	969	index = 0;
31a12666d Nick Piggin mm: write_cache_p...	970	end = writeback_index - 1;
811d736f9 David Howells [PATCH] BLOCK: Di...	971 972	goto retry; }
0b5649278 Dave Chinner writeback: pay at...	973 974	if (wbc->range_cyclic \|\| (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index;
06d6cf695 Aneesh Kumar K.V mm: Add range_con...	975
811d736f9 David Howells [PATCH] BLOCK: Di...	976 977	return ret; }
0ea971801 Miklos Szeredi consolidate gener...	978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009	EXPORT_SYMBOL(write_cache_pages); /* * Function used by generic_writepages to call the real writepage * function and set the mapping flags on error / static int __writepage(struct page page, struct writeback_control wbc, void data) { struct address_space mapping = data; int ret = mapping->a_ops->writepage(page, wbc); mapping_set_error(mapping, ret); return ret; } /* * generic_writepages - walk the list of dirty pages of the given address space and writepage() all of them. * @mapping: address space structure to write * @wbc: subtract the number of written pages from @wbc->nr_to_write * This is a library function, which implements the writepages() * address_space_operation. / int generic_writepages(struct address_space mapping, struct writeback_control wbc) { / deal with chardevs and other special file */ if (!mapping->a_ops->writepage) return 0; return write_cache_pages(mapping, wbc, __writepage, mapping); }
811d736f9 David Howells [PATCH] BLOCK: Di...	1010 1011	EXPORT_SYMBOL(generic_writepages);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1012 1013	int do_writepages(struct address_space mapping, struct writeback_control wbc) {
22905f775 Andrew Morton identify multipag...	1014	int ret;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1015 1016 1017	if (wbc->nr_to_write <= 0) return 0; if (mapping->a_ops->writepages)
d08b3851d Peter Zijlstra [PATCH] mm: track...	1018	ret = mapping->a_ops->writepages(mapping, wbc);
22905f775 Andrew Morton identify multipag...	1019 1020	else ret = generic_writepages(mapping, wbc);
22905f775 Andrew Morton identify multipag...	1021	return ret;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1022 1023 1024 1025	} /** * write_one_page - write out a single page and optionally wait on I/O
67be2dd1b Martin Waitz [PATCH] DocBook: ...	1026 1027	* @page: the page to write * @wait: if true, wait on writeout
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063	* * The page must be locked by the caller and will be unlocked upon return. * * write_one_page() returns a negative error code if I/O failed. / int write_one_page(struct page page, int wait) { struct address_space mapping = page->mapping; int ret = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 1, }; BUG_ON(!PageLocked(page)); if (wait) wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { page_cache_get(page); ret = mapping->a_ops->writepage(page, &wbc); if (ret == 0 && wait) { wait_on_page_writeback(page); if (PageError(page)) ret = -EIO; } page_cache_release(page); } else { unlock_page(page); } return ret; } EXPORT_SYMBOL(write_one_page); /
767193253 Ken Chen [PATCH] simplify ...	1064 1065 1066 1067 1068 1069 1070 1071 1072 1073	* For address_spaces which do not use buffers nor write back. / int __set_page_dirty_no_writeback(struct page page) { if (!PageDirty(page)) SetPageDirty(page); return 0; } /*
e3a7cca1e Edward Shishkin vfs: add/use acco...	1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085	* Helper function for set_page_dirty family. * NOTE: This relies on being atomic wrt interrupts. / void account_page_dirtied(struct page page, struct address_space *mapping) { if (mapping_cap_account_dirty(mapping)) { __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); task_dirty_inc(current); task_io_account_write(PAGE_CACHE_SIZE); } }
679ceace8 Michael Rubin mm: exporting acc...	1086	EXPORT_SYMBOL(account_page_dirtied);
e3a7cca1e Edward Shishkin vfs: add/use acco...	1087 1088	/*
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100	* For address_spaces which do not use buffers. Just tag the page as dirty in * its radix tree. * * This is also used when a single buffer is being dirtied: we want to set the * page dirty in that case, but not all the buffers. This is a "bottom-up" * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. * * Most callers have locked the page, which pins the address_space in memory. * But zap_pte_range() does not lock the page, however in that case the * mapping is pinned by the vma's ->vm_file reference. * * We take care to handle the case where the page was truncated from the
183ff22bb Simon Arlott spelling fixes: mm/	1101	* mapping by re-checking page_mapping() inside tree_lock.
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1102 1103 1104	/ int __set_page_dirty_nobuffers(struct page page) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1105 1106 1107	if (!TestSetPageDirty(page)) { struct address_space mapping = page_mapping(page); struct address_space mapping2;
8c08540f8 Andrew Morton [PATCH] clean up ...	1108 1109	if (!mapping) return 1;
19fd62312 Nick Piggin mm: spinlock tree...	1110	spin_lock_irq(&mapping->tree_lock);
8c08540f8 Andrew Morton [PATCH] clean up ...	1111 1112 1113	mapping2 = page_mapping(page); if (mapping2) { /* Race with truncate? */ BUG_ON(mapping2 != mapping);
787d2214c Nick Piggin fs: introduce som...	1114	WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
e3a7cca1e Edward Shishkin vfs: add/use acco...	1115	account_page_dirtied(page, mapping);
8c08540f8 Andrew Morton [PATCH] clean up ...	1116 1117 1118	radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); }
19fd62312 Nick Piggin mm: spinlock tree...	1119	spin_unlock_irq(&mapping->tree_lock);
8c08540f8 Andrew Morton [PATCH] clean up ...	1120 1121 1122	if (mapping->host) { /* !PageAnon && !swapper_space */ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1123	}
4741c9fd3 Andrew Morton [PATCH] set_page_...	1124	return 1;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1125	}
4741c9fd3 Andrew Morton [PATCH] set_page_...	1126	return 0;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142	} EXPORT_SYMBOL(__set_page_dirty_nobuffers); /* * When a writepage implementation decides that it doesn't want to write this * page for some reason, it should redirty the locked page via * redirty_page_for_writepage() and it should then unlock the page and return 0 / int redirty_page_for_writepage(struct writeback_control wbc, struct page page) { wbc->pages_skipped++; return __set_page_dirty_nobuffers(page); } EXPORT_SYMBOL(redirty_page_for_writepage); /
6746aff74 Wu Fengguang HWPOISON: shmem: ...	1143 1144 1145 1146 1147 1148 1149	* Dirty a page. * * For pages with a mapping this should be done under the page lock * for the benefit of asynchronous memory errors who prefer a consistent * dirty state. This rule can be broken in some special cases, * but should be better not to. *
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1150 1151 1152	* If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. */
1cf6e7d83 Nick Piggin mm: task dirty ac...	1153	int set_page_dirty(struct page *page)
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1154 1155 1156 1157 1158	{ struct address_space mapping = page_mapping(page); if (likely(mapping)) { int (spd)(struct page *) = mapping->a_ops->set_page_dirty;
9361401eb David Howells [PATCH] BLOCK: Ma...	1159 1160 1161 1162 1163	#ifdef CONFIG_BLOCK if (!spd) spd = __set_page_dirty_buffers; #endif return (*spd)(page);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1164	}
4741c9fd3 Andrew Morton [PATCH] set_page_...	1165 1166 1167 1168	if (!PageDirty(page)) { if (!TestSetPageDirty(page)) return 1; }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185	return 0; } EXPORT_SYMBOL(set_page_dirty); /* * set_page_dirty() is racy if the caller has no reference against * page->mapping->host, and if the page is unlocked. This is because another * CPU could truncate the page off the mapping and then free the mapping. * * Usually, the page _is_ locked, or the caller is a user-space process which * holds a reference on the inode by having an open file. * * In other cases, the page should be locked before running set_page_dirty(). / int set_page_dirty_lock(struct page page) { int ret;
db37648cd Nick Piggin [PATCH] mm: non s...	1186	lock_page_nosync(page);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1187 1188 1189 1190 1191 1192 1193	ret = set_page_dirty(page); unlock_page(page); return ret; } EXPORT_SYMBOL(set_page_dirty_lock); /*
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209	* Clear a page's dirty flag, while caring for dirty memory accounting. * Returns true if the page was previously dirty. * * This is for preparing to put the page under writeout. We leave the page * tagged as dirty in the radix tree so that a concurrent write-for-sync * can discover it via a PAGECACHE_TAG_DIRTY walk. The ->writepage * implementation will run either set_page_writeback() or set_page_dirty(), * at which stage we bring the page's dirty flag and radix-tree dirty tag * back into sync. * * This incoherency between the page's dirty flag and radix-tree tag is * unfortunate, but it only exists while the page is locked. / int clear_page_dirty_for_io(struct page page) { struct address_space *mapping = page_mapping(page);
79352894b Nick Piggin mm: fix clear_pag...	1210	BUG_ON(!PageLocked(page));
fe3cba17c Fengguang Wu mm: share PG_read...	1211	ClearPageReclaim(page);
7658cc289 Linus Torvalds VM: Fix nasty and...	1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236	if (mapping && mapping_cap_account_dirty(mapping)) { /* * Yes, Virginia, this is indeed insane. * * We use this sequence to make sure that * (a) we account for dirty stats properly * (b) we tell the low-level filesystem to * mark the whole page dirty if it was * dirty in a pagetable. Only to then * (c) clean the page again and return 1 to * cause the writeback. * * This way we avoid all nasty races with the * dirty bit in multiple places and clearing * them concurrently from different threads. * * Note! Normally the "set_page_dirty(page)" * has no effect on the actual dirty bit - since * that will already usually be set. But we * need the side effects, and it can help us * avoid races. * * We basically use the page "master dirty bit" * as a serialization point for all the different * threads doing their things.
7658cc289 Linus Torvalds VM: Fix nasty and...	1237 1238 1239	*/ if (page_mkclean(page)) set_page_dirty(page);
79352894b Nick Piggin mm: fix clear_pag...	1240 1241 1242 1243 1244 1245 1246 1247 1248 1249	/* * We carefully synchronise fault handlers against * installing a dirty pte and marking the page dirty * at this point. We do this by having them hold the * page lock at some point after installing their * pte, but before marking the page dirty. * Pages are always locked coming in here, so we get * the desired exclusion. See mm/memory.c:do_wp_page() * for more comments. */
7658cc289 Linus Torvalds VM: Fix nasty and...	1250	if (TestClearPageDirty(page)) {
8c08540f8 Andrew Morton [PATCH] clean up ...	1251	dec_zone_page_state(page, NR_FILE_DIRTY);
c9e51e418 Peter Zijlstra mm: count reclaim...	1252 1253	dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
7658cc289 Linus Torvalds VM: Fix nasty and...	1254	return 1;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1255	}
7658cc289 Linus Torvalds VM: Fix nasty and...	1256	return 0;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1257	}
7658cc289 Linus Torvalds VM: Fix nasty and...	1258	return TestClearPageDirty(page);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1259	}
58bb01a9c Hans Reiser [PATCH] re-export...	1260	EXPORT_SYMBOL(clear_page_dirty_for_io);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1261 1262 1263 1264 1265 1266 1267	int test_clear_page_writeback(struct page page) { struct address_space mapping = page_mapping(page); int ret; if (mapping) {
69cb51d18 Peter Zijlstra mm: count writeba...	1268	struct backing_dev_info *bdi = mapping->backing_dev_info;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1269	unsigned long flags;
19fd62312 Nick Piggin mm: spinlock tree...	1270	spin_lock_irqsave(&mapping->tree_lock, flags);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1271	ret = TestClearPageWriteback(page);
69cb51d18 Peter Zijlstra mm: count writeba...	1272	if (ret) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1273 1274 1275	radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK);
e4ad08fe6 Miklos Szeredi mm: bdi: add sepa...	1276	if (bdi_cap_account_writeback(bdi)) {
69cb51d18 Peter Zijlstra mm: count writeba...	1277	__dec_bdi_stat(bdi, BDI_WRITEBACK);
04fbfdc14 Peter Zijlstra mm: per device di...	1278 1279	__bdi_writeout_inc(bdi); }
69cb51d18 Peter Zijlstra mm: count writeba...	1280	}
19fd62312 Nick Piggin mm: spinlock tree...	1281	spin_unlock_irqrestore(&mapping->tree_lock, flags);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1282 1283 1284	} else { ret = TestClearPageWriteback(page); }
d688abf50 Andrew Morton move page writeba...	1285 1286	if (ret) dec_zone_page_state(page, NR_WRITEBACK);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1287 1288 1289 1290 1291 1292 1293 1294 1295	return ret; } int test_set_page_writeback(struct page page) { struct address_space mapping = page_mapping(page); int ret; if (mapping) {
69cb51d18 Peter Zijlstra mm: count writeba...	1296	struct backing_dev_info *bdi = mapping->backing_dev_info;
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1297	unsigned long flags;
19fd62312 Nick Piggin mm: spinlock tree...	1298	spin_lock_irqsave(&mapping->tree_lock, flags);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1299	ret = TestSetPageWriteback(page);
69cb51d18 Peter Zijlstra mm: count writeba...	1300	if (!ret) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1301 1302 1303	radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK);
e4ad08fe6 Miklos Szeredi mm: bdi: add sepa...	1304	if (bdi_cap_account_writeback(bdi))
69cb51d18 Peter Zijlstra mm: count writeba...	1305 1306	__inc_bdi_stat(bdi, BDI_WRITEBACK); }
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1307 1308 1309 1310	if (!PageDirty(page)) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY);
f446daaea Jan Kara mm: implement wri...	1311 1312 1313	radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_TOWRITE);
19fd62312 Nick Piggin mm: spinlock tree...	1314	spin_unlock_irqrestore(&mapping->tree_lock, flags);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1315 1316 1317	} else { ret = TestSetPageWriteback(page); }
d688abf50 Andrew Morton move page writeba...	1318 1319	if (!ret) inc_zone_page_state(page, NR_WRITEBACK);
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1320 1321 1322 1323 1324 1325	return ret; } EXPORT_SYMBOL(test_set_page_writeback); /*
001281881 Nick Piggin mm: use lockless ...	1326	* Return true if any of the pages in the mapping are marked with the
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1327 1328 1329 1330	* passed tag. / int mapping_tagged(struct address_space mapping, int tag) {
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1331	int ret;
001281881 Nick Piggin mm: use lockless ...	1332	rcu_read_lock();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1333	ret = radix_tree_tagged(&mapping->page_tree, tag);
001281881 Nick Piggin mm: use lockless ...	1334	rcu_read_unlock();
1da177e4c Linus Torvalds Linux-2.6.12-rc2	1335 1336 1337	return ret; } EXPORT_SYMBOL(mapping_tagged);