Blame view

fs/ntfs/aops.c 48.1 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
  /**
   * aops.c - NTFS kernel address space operations and page cache handling.
   *	    Part of the Linux-NTFS project.
   *
bfab36e81   Anton Altaparmakov   NTFS: Fix a mount...
5
   * Copyright (c) 2001-2007 Anton Altaparmakov
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
   * Copyright (c) 2002 Richard Russon
   *
   * This program/include file is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public License as published
   * by the Free Software Foundation; either version 2 of the License, or
   * (at your option) any later version.
   *
   * This program/include file is distributed in the hope that it will be
   * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
   * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   * GNU General Public License for more details.
   *
   * You should have received a copy of the GNU General Public License
   * along with this program (in the main directory of the Linux-NTFS
   * distribution in the file COPYING); if not, write to the Free Software
   * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   */
  
  #include <linux/errno.h>
78264bd9c   Anton Altaparmakov   NTFS: Use buffer_...
25
  #include <linux/fs.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
26
  #include <linux/gfp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
28
29
30
31
  #include <linux/mm.h>
  #include <linux/pagemap.h>
  #include <linux/swap.h>
  #include <linux/buffer_head.h>
  #include <linux/writeback.h>
b4012a989   Andrew Morton   [PATCH] ntfs buil...
32
  #include <linux/bit_spinlock.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
  
  #include "aops.h"
  #include "attrib.h"
  #include "debug.h"
  #include "inode.h"
  #include "mft.h"
  #include "runlist.h"
  #include "types.h"
  #include "ntfs.h"
  
  /**
   * ntfs_end_buffer_async_read - async io completion for reading attributes
   * @bh:		buffer head on which io is completed
   * @uptodate:	whether @bh is now uptodate or not
   *
   * Asynchronous I/O completion handler for reading pages belonging to the
   * attribute address space of an inode.  The inodes can either be files or
   * directories or they can be fake inodes describing some attribute.
   *
   * If NInoMstProtected(), perform the post read mst fixups when all IO on the
   * page has been completed and mark the page uptodate or set the error bit on
   * the page.  To determine the size of the records that need fixing up, we
   * cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs
   * record size, and index_block_size_bits, to the log(base 2) of the ntfs
   * record size.
   */
  static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
61
  	unsigned long flags;
e604635c8   Anton Altaparmakov   NTFS: Improve sca...
62
  	struct buffer_head *first, *tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
63
  	struct page *page;
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
64
  	struct inode *vi;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
65
66
67
68
  	ntfs_inode *ni;
  	int page_uptodate = 1;
  
  	page = bh->b_page;
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
69
70
  	vi = page->mapping->host;
  	ni = NTFS_I(vi);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
71
72
  
  	if (likely(uptodate)) {
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
73
74
  		loff_t i_size;
  		s64 file_ofs, init_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
75
76
77
78
79
  
  		set_buffer_uptodate(bh);
  
  		file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
  				bh_offset(bh);
07a4e2da7   Anton Altaparmakov   NTFS: Use i_size_...
80
  		read_lock_irqsave(&ni->size_lock, flags);
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
81
82
  		init_size = ni->initialized_size;
  		i_size = i_size_read(vi);
07a4e2da7   Anton Altaparmakov   NTFS: Use i_size_...
83
  		read_unlock_irqrestore(&ni->size_lock, flags);
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
84
85
86
87
  		if (unlikely(init_size > i_size)) {
  			/* Race with shrinking truncate. */
  			init_size = i_size;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
88
  		/* Check for the current buffer head overflowing. */
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
89
  		if (unlikely(file_ofs + bh->b_size > init_size)) {
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
90
  			int ofs;
eebd2aa35   Christoph Lameter   Pagecache zeroing...
91
  			void *kaddr;
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
92
93
94
95
  
  			ofs = 0;
  			if (file_ofs < init_size)
  				ofs = init_size - file_ofs;
fa8609da9   Andrew Morton   [PATCH] ntfs: kma...
96
  			local_irq_save(flags);
eebd2aa35   Christoph Lameter   Pagecache zeroing...
97
98
99
100
101
  			kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
  			memset(kaddr + bh_offset(bh) + ofs, 0,
  					bh->b_size - ofs);
  			flush_dcache_page(page);
  			kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
fa8609da9   Andrew Morton   [PATCH] ntfs: kma...
102
  			local_irq_restore(flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
104
105
  		}
  	} else {
  		clear_buffer_uptodate(bh);
e604635c8   Anton Altaparmakov   NTFS: Improve sca...
106
  		SetPageError(page);
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
107
108
  		ntfs_error(ni->vol->sb, "Buffer I/O error, logical block "
  				"0x%llx.", (unsigned long long)bh->b_blocknr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
109
  	}
e604635c8   Anton Altaparmakov   NTFS: Improve sca...
110
111
112
  	first = page_buffers(page);
  	local_irq_save(flags);
  	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
113
114
115
116
117
118
119
120
121
122
123
124
125
126
  	clear_buffer_async_read(bh);
  	unlock_buffer(bh);
  	tmp = bh;
  	do {
  		if (!buffer_uptodate(tmp))
  			page_uptodate = 0;
  		if (buffer_async_read(tmp)) {
  			if (likely(buffer_locked(tmp)))
  				goto still_busy;
  			/* Async buffers must be locked. */
  			BUG();
  		}
  		tmp = tmp->b_this_page;
  	} while (tmp != bh);
e604635c8   Anton Altaparmakov   NTFS: Improve sca...
127
128
  	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  	local_irq_restore(flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
129
130
131
132
133
134
135
136
137
138
139
140
  	/*
  	 * If none of the buffers had errors then we can set the page uptodate,
  	 * but we first have to perform the post read mst fixups, if the
  	 * attribute is mst protected, i.e. if NInoMstProteced(ni) is true.
  	 * Note we ignore fixup errors as those are detected when
  	 * map_mft_record() is called which gives us per record granularity
  	 * rather than per page granularity.
  	 */
  	if (!NInoMstProtected(ni)) {
  		if (likely(page_uptodate && !PageError(page)))
  			SetPageUptodate(page);
  	} else {
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
141
  		u8 *kaddr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
142
143
144
145
146
147
148
  		unsigned int i, recs;
  		u32 rec_size;
  
  		rec_size = ni->itype.index.block_size;
  		recs = PAGE_CACHE_SIZE / rec_size;
  		/* Should have been verified before we got here... */
  		BUG_ON(!recs);
fa8609da9   Andrew Morton   [PATCH] ntfs: kma...
149
  		local_irq_save(flags);
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
150
  		kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
151
  		for (i = 0; i < recs; i++)
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
152
  			post_read_mst_fixup((NTFS_RECORD*)(kaddr +
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
153
  					i * rec_size), rec_size);
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
154
  		kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
fa8609da9   Andrew Morton   [PATCH] ntfs: kma...
155
  		local_irq_restore(flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
156
  		flush_dcache_page(page);
b6ad6c52f   Anton Altaparmakov   NTFS: - Split ntf...
157
  		if (likely(page_uptodate && !PageError(page)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
158
159
160
161
162
  			SetPageUptodate(page);
  	}
  	unlock_page(page);
  	return;
  still_busy:
e604635c8   Anton Altaparmakov   NTFS: Improve sca...
163
164
  	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  	local_irq_restore(flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
  	return;
  }
  
  /**
   * ntfs_read_block - fill a @page of an address space with data
   * @page:	page cache page to fill with data
   *
   * Fill the page @page of the address space belonging to the @page->host inode.
   * We read each buffer asynchronously and when all buffers are read in, our io
   * completion handler ntfs_end_buffer_read_async(), if required, automatically
   * applies the mst fixups to the page before finally marking it uptodate and
   * unlocking it.
   *
   * We only enforce allocated_size limit because i_size is checked for in
   * generic_file_read().
   *
   * Return 0 on success and -errno on error.
   *
   * Contains an adapted version of fs/buffer.c::block_read_full_page().
   */
  static int ntfs_read_block(struct page *page)
  {
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
187
  	loff_t i_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
188
189
  	VCN vcn;
  	LCN lcn;
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
190
191
  	s64 init_size;
  	struct inode *vi;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
192
193
194
195
196
  	ntfs_inode *ni;
  	ntfs_volume *vol;
  	runlist_element *rl;
  	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
  	sector_t iblock, lblock, zblock;
07a4e2da7   Anton Altaparmakov   NTFS: Use i_size_...
197
  	unsigned long flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
198
199
200
  	unsigned int blocksize, vcn_ofs;
  	int i, nr;
  	unsigned char blocksize_bits;
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
201
202
  	vi = page->mapping->host;
  	ni = NTFS_I(vi);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203
204
205
206
  	vol = ni->vol;
  
  	/* $MFT/$DATA must have its complete runlist in memory at all times. */
  	BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
78af34f03   Anton Altaparmakov   NTFS: Implement s...
207
208
  	blocksize = vol->sb->s_blocksize;
  	blocksize_bits = vol->sb->s_blocksize_bits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209

a01ac532b   Anton Altaparmakov   NTFS: Fix page_ha...
210
  	if (!page_has_buffers(page)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
211
  		create_empty_buffers(page, blocksize, 0);
a01ac532b   Anton Altaparmakov   NTFS: Fix page_ha...
212
213
214
215
  		if (unlikely(!page_has_buffers(page))) {
  			unlock_page(page);
  			return -ENOMEM;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
216
  	}
a01ac532b   Anton Altaparmakov   NTFS: Fix page_ha...
217
218
  	bh = head = page_buffers(page);
  	BUG_ON(!bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
219

f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
220
221
222
223
224
225
226
227
228
229
230
  	/*
  	 * We may be racing with truncate.  To avoid some of the problems we
  	 * now take a snapshot of the various sizes and use those for the whole
  	 * of the function.  In case of an extending truncate it just means we
  	 * may leave some buffers unmapped which are now allocated.  This is
  	 * not a problem since these buffers will just get mapped when a write
  	 * occurs.  In case of a shrinking truncate, we will detect this later
  	 * on due to the runlist being incomplete and if the page is being
  	 * fully truncated, truncate will throw it away as soon as we unlock
  	 * it so no need to worry what we do with it.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
231
  	iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
07a4e2da7   Anton Altaparmakov   NTFS: Use i_size_...
232
  	read_lock_irqsave(&ni->size_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
  	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
234
235
  	init_size = ni->initialized_size;
  	i_size = i_size_read(vi);
07a4e2da7   Anton Altaparmakov   NTFS: Use i_size_...
236
  	read_unlock_irqrestore(&ni->size_lock, flags);
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
237
238
239
240
241
  	if (unlikely(init_size > i_size)) {
  		/* Race with shrinking truncate. */
  		init_size = i_size;
  	}
  	zblock = (init_size + blocksize - 1) >> blocksize_bits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
242
243
244
245
246
  
  	/* Loop through all the buffers in the page. */
  	rl = NULL;
  	nr = i = 0;
  	do {
e3bf460f3   Nate Diller   ntfs: use zero_us...
247
  		int err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
248
249
250
251
252
253
254
255
256
257
  
  		if (unlikely(buffer_uptodate(bh)))
  			continue;
  		if (unlikely(buffer_mapped(bh))) {
  			arr[nr++] = bh;
  			continue;
  		}
  		bh->b_bdev = vol->sb->s_bdev;
  		/* Is the block within the allowed limits? */
  		if (iblock < lblock) {
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
258
  			bool is_retry = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
  
  			/* Convert iblock into corresponding vcn and offset. */
  			vcn = (VCN)iblock << blocksize_bits >>
  					vol->cluster_size_bits;
  			vcn_ofs = ((VCN)iblock << blocksize_bits) &
  					vol->cluster_size_mask;
  			if (!rl) {
  lock_retry_remap:
  				down_read(&ni->runlist.lock);
  				rl = ni->runlist.rl;
  			}
  			if (likely(rl != NULL)) {
  				/* Seek to element containing target vcn. */
  				while (rl->length && rl[1].vcn <= vcn)
  					rl++;
  				lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
  			} else
  				lcn = LCN_RL_NOT_MAPPED;
  			/* Successful remap. */
  			if (lcn >= 0) {
  				/* Setup buffer head to correct block. */
  				bh->b_blocknr = ((lcn << vol->cluster_size_bits)
  						+ vcn_ofs) >> blocksize_bits;
  				set_buffer_mapped(bh);
  				/* Only read initialized data blocks. */
  				if (iblock < zblock) {
  					arr[nr++] = bh;
  					continue;
  				}
  				/* Fully non-initialized data block, zero it. */
  				goto handle_zblock;
  			}
  			/* It is a hole, need to zero it. */
  			if (lcn == LCN_HOLE)
  				goto handle_hole;
  			/* If first try and runlist unmapped, map and retry. */
  			if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
296
  				is_retry = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297
298
299
300
301
302
303
304
305
  				/*
  				 * Attempt to map runlist, dropping lock for
  				 * the duration.
  				 */
  				up_read(&ni->runlist.lock);
  				err = ntfs_map_runlist(ni, vcn);
  				if (likely(!err))
  					goto lock_retry_remap;
  				rl = NULL;
9f993fe46   Anton Altaparmakov   NTFS: Fix a bug i...
306
307
  			} else if (!rl)
  				up_read(&ni->runlist.lock);
8273d5d4c   Anton Altaparmakov   NTFS: Fix fs/ntfs...
308
309
310
311
312
313
314
315
316
  			/*
  			 * If buffer is outside the runlist, treat it as a
  			 * hole.  This can happen due to concurrent truncate
  			 * for example.
  			 */
  			if (err == -ENOENT || lcn == LCN_ENOENT) {
  				err = 0;
  				goto handle_hole;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
317
  			/* Hard error, zero out region. */
8273d5d4c   Anton Altaparmakov   NTFS: Fix fs/ntfs...
318
319
  			if (!err)
  				err = -EIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
320
321
322
323
324
325
  			bh->b_blocknr = -1;
  			SetPageError(page);
  			ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
  					"attribute type 0x%x, vcn 0x%llx, "
  					"offset 0x%x because its location on "
  					"disk could not be determined%s "
8273d5d4c   Anton Altaparmakov   NTFS: Fix fs/ntfs...
326
  					"(error code %i).", ni->mft_no,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
327
328
  					ni->type, (unsigned long long)vcn,
  					vcn_ofs, is_retry ? " even after "
8273d5d4c   Anton Altaparmakov   NTFS: Fix fs/ntfs...
329
  					"retrying" : "", err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
330
331
332
333
334
335
336
337
338
339
  		}
  		/*
  		 * Either iblock was outside lblock limits or
  		 * ntfs_rl_vcn_to_lcn() returned error.  Just zero that portion
  		 * of the page and set the buffer uptodate.
  		 */
  handle_hole:
  		bh->b_blocknr = -1UL;
  		clear_buffer_mapped(bh);
  handle_zblock:
eebd2aa35   Christoph Lameter   Pagecache zeroing...
340
  		zero_user(page, i * blocksize, blocksize);
8273d5d4c   Anton Altaparmakov   NTFS: Fix fs/ntfs...
341
342
  		if (likely(!err))
  			set_buffer_uptodate(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
  	} while (i++, iblock++, (bh = bh->b_this_page) != head);
  
  	/* Release the lock if we took it. */
  	if (rl)
  		up_read(&ni->runlist.lock);
  
  	/* Check we have at least one buffer ready for i/o. */
  	if (nr) {
  		struct buffer_head *tbh;
  
  		/* Lock the buffers. */
  		for (i = 0; i < nr; i++) {
  			tbh = arr[i];
  			lock_buffer(tbh);
  			tbh->b_end_io = ntfs_end_buffer_async_read;
  			set_buffer_async_read(tbh);
  		}
  		/* Finally, start i/o on the buffers. */
  		for (i = 0; i < nr; i++) {
  			tbh = arr[i];
  			if (likely(!buffer_uptodate(tbh)))
  				submit_bh(READ, tbh);
  			else
  				ntfs_end_buffer_async_read(tbh, 1);
  		}
  		return 0;
  	}
  	/* No i/o was scheduled on any of the buffers. */
  	if (likely(!PageError(page)))
  		SetPageUptodate(page);
  	else /* Signal synchronous i/o error. */
  		nr = -EIO;
  	unlock_page(page);
  	return nr;
  }
  
  /**
   * ntfs_readpage - fill a @page of a @file with data from the device
   * @file:	open file to which the page @page belongs or NULL
   * @page:	page cache page to fill with data
   *
   * For non-resident attributes, ntfs_readpage() fills the @page of the open
   * file @file by calling the ntfs version of the generic block_read_full_page()
   * function, ntfs_read_block(), which in turn creates and reads in the buffers
   * associated with the page asynchronously.
   *
   * For resident attributes, OTOH, ntfs_readpage() fills @page by copying the
   * data from the mft record (which at this stage is most likely in memory) and
   * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as
   * even if the mft record is not cached at this point in time, we need to wait
   * for it to be read in before we can do the copy.
   *
   * Return 0 on success and -errno on error.
   */
  static int ntfs_readpage(struct file *file, struct page *page)
  {
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
399
400
  	loff_t i_size;
  	struct inode *vi;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
401
  	ntfs_inode *ni, *base_ni;
bfab36e81   Anton Altaparmakov   NTFS: Fix a mount...
402
  	u8 *addr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
403
404
  	ntfs_attr_search_ctx *ctx;
  	MFT_RECORD *mrec;
b6ad6c52f   Anton Altaparmakov   NTFS: - Split ntf...
405
  	unsigned long flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
406
407
  	u32 attr_len;
  	int err = 0;
905685f68   Anton Altaparmakov   NTFS: - Modify ->...
408
  retry_readpage:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
409
  	BUG_ON(!PageLocked(page));
ebab89909   Anton Altaparmakov   NTFS: Fix read re...
410
411
412
413
414
  	vi = page->mapping->host;
  	i_size = i_size_read(vi);
  	/* Is the page fully outside i_size? (truncate in progress) */
  	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
  			PAGE_CACHE_SHIFT)) {
eebd2aa35   Christoph Lameter   Pagecache zeroing...
415
  		zero_user(page, 0, PAGE_CACHE_SIZE);
ebab89909   Anton Altaparmakov   NTFS: Fix read re...
416
417
418
  		ntfs_debug("Read outside i_size - truncated?");
  		goto done;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
419
420
421
422
423
424
425
426
  	/*
  	 * This can potentially happen because we clear PageUptodate() during
  	 * ntfs_writepage() of MstProtected() attributes.
  	 */
  	if (PageUptodate(page)) {
  		unlock_page(page);
  		return 0;
  	}
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
427
  	ni = NTFS_I(vi);
311120eca   Anton Altaparmakov   NTFS: Fixup handl...
428
429
430
431
  	/*
  	 * Only $DATA attributes can be encrypted and only unnamed $DATA
  	 * attributes can be compressed.  Index root can have the flags set but
  	 * this means to create compressed/encrypted files, not that the
4e64c8869   Anton Altaparmakov   NTFS: Fix handlin...
432
433
434
  	 * attribute is compressed/encrypted.  Note we need to check for
  	 * AT_INDEX_ALLOCATION since this is the type of both directory and
  	 * index inodes.
311120eca   Anton Altaparmakov   NTFS: Fixup handl...
435
  	 */
4e64c8869   Anton Altaparmakov   NTFS: Fix handlin...
436
  	if (ni->type != AT_INDEX_ALLOCATION) {
311120eca   Anton Altaparmakov   NTFS: Fixup handl...
437
438
439
440
441
442
443
444
445
446
447
448
449
  		/* If attribute is encrypted, deny access, just like NT4. */
  		if (NInoEncrypted(ni)) {
  			BUG_ON(ni->type != AT_DATA);
  			err = -EACCES;
  			goto err_out;
  		}
  		/* Compressed data streams are handled in compress.c. */
  		if (NInoNonResident(ni) && NInoCompressed(ni)) {
  			BUG_ON(ni->type != AT_DATA);
  			BUG_ON(ni->name_len);
  			return ntfs_read_compressed_block(page);
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
450
451
  	/* NInoNonResident() == NInoIndexAllocPresent() */
  	if (NInoNonResident(ni)) {
311120eca   Anton Altaparmakov   NTFS: Fixup handl...
452
  		/* Normal, non-resident data stream. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
453
454
455
456
457
458
  		return ntfs_read_block(page);
  	}
  	/*
  	 * Attribute is resident, implying it is not compressed or encrypted.
  	 * This also means the attribute is smaller than an mft record and
  	 * hence smaller than a page, so can simply zero out any pages with
311120eca   Anton Altaparmakov   NTFS: Fixup handl...
459
460
461
  	 * index above 0.  Note the attribute can actually be marked compressed
  	 * but if it is resident the actual data is not compressed so we are
  	 * ok to ignore the compressed flag here.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
462
  	 */
b6ad6c52f   Anton Altaparmakov   NTFS: - Split ntf...
463
  	if (unlikely(page->index > 0)) {
eebd2aa35   Christoph Lameter   Pagecache zeroing...
464
  		zero_user(page, 0, PAGE_CACHE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
465
466
467
468
469
470
471
472
473
474
475
476
  		goto done;
  	}
  	if (!NInoAttr(ni))
  		base_ni = ni;
  	else
  		base_ni = ni->ext.base_ntfs_ino;
  	/* Map, pin, and lock the mft record. */
  	mrec = map_mft_record(base_ni);
  	if (IS_ERR(mrec)) {
  		err = PTR_ERR(mrec);
  		goto err_out;
  	}
905685f68   Anton Altaparmakov   NTFS: - Modify ->...
477
478
479
480
481
482
483
484
  	/*
  	 * If a parallel write made the attribute non-resident, drop the mft
  	 * record and retry the readpage.
  	 */
  	if (unlikely(NInoNonResident(ni))) {
  		unmap_mft_record(base_ni);
  		goto retry_readpage;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
485
486
487
488
489
490
491
492
493
494
  	ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
  	if (unlikely(!ctx)) {
  		err = -ENOMEM;
  		goto unm_err_out;
  	}
  	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
  			CASE_SENSITIVE, 0, NULL, 0, ctx);
  	if (unlikely(err))
  		goto put_unm_err_out;
  	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
b6ad6c52f   Anton Altaparmakov   NTFS: - Split ntf...
495
496
497
  	read_lock_irqsave(&ni->size_lock, flags);
  	if (unlikely(attr_len > ni->initialized_size))
  		attr_len = ni->initialized_size;
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
498
  	i_size = i_size_read(vi);
b6ad6c52f   Anton Altaparmakov   NTFS: - Split ntf...
499
  	read_unlock_irqrestore(&ni->size_lock, flags);
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
500
501
502
503
  	if (unlikely(attr_len > i_size)) {
  		/* Race with shrinking truncate. */
  		attr_len = i_size;
  	}
bfab36e81   Anton Altaparmakov   NTFS: Fix a mount...
504
  	addr = kmap_atomic(page, KM_USER0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
505
  	/* Copy the data to the page. */
bfab36e81   Anton Altaparmakov   NTFS: Fix a mount...
506
  	memcpy(addr, (u8*)ctx->attr +
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
507
508
509
  			le16_to_cpu(ctx->attr->data.resident.value_offset),
  			attr_len);
  	/* Zero the remainder of the page. */
bfab36e81   Anton Altaparmakov   NTFS: Fix a mount...
510
  	memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
511
  	flush_dcache_page(page);
bfab36e81   Anton Altaparmakov   NTFS: Fix a mount...
512
  	kunmap_atomic(addr, KM_USER0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
  put_unm_err_out:
  	ntfs_attr_put_search_ctx(ctx);
  unm_err_out:
  	unmap_mft_record(base_ni);
  done:
  	SetPageUptodate(page);
  err_out:
  	unlock_page(page);
  	return err;
  }
  
  #ifdef NTFS_RW
  
  /**
   * ntfs_write_block - write a @page to the backing store
   * @page:	page cache page to write out
   * @wbc:	writeback control structure
   *
   * This function is for writing pages belonging to non-resident, non-mst
   * protected attributes to their backing store.
   *
   * For a page with buffers, map and write the dirty buffers asynchronously
   * under page writeback. For a page without buffers, create buffers for the
   * page, then proceed as above.
   *
   * If a page doesn't have buffers the page dirty state is definitive. If a page
   * does have buffers, the page dirty state is just a hint, and the buffer dirty
   * state is definitive. (A hint which has rules: dirty buffers against a clean
   * page is illegal. Other combinations are legal and need to be handled. In
   * particular a dirty page containing clean buffers for example.)
   *
   * Return 0 on success and -errno on error.
   *
   * Based on ntfs_read_block() and __block_write_full_page().
   */
  static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
  {
  	VCN vcn;
  	LCN lcn;
07a4e2da7   Anton Altaparmakov   NTFS: Use i_size_...
552
553
  	s64 initialized_size;
  	loff_t i_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
554
555
556
557
558
559
  	sector_t block, dblock, iblock;
  	struct inode *vi;
  	ntfs_inode *ni;
  	ntfs_volume *vol;
  	runlist_element *rl;
  	struct buffer_head *bh, *head;
07a4e2da7   Anton Altaparmakov   NTFS: Use i_size_...
560
  	unsigned long flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
561
562
  	unsigned int blocksize, vcn_ofs;
  	int err;
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
563
  	bool need_end_writeback;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
564
565
566
567
568
569
570
571
572
573
574
  	unsigned char blocksize_bits;
  
  	vi = page->mapping->host;
  	ni = NTFS_I(vi);
  	vol = ni->vol;
  
  	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  			"0x%lx.", ni->mft_no, ni->type, page->index);
  
  	BUG_ON(!NInoNonResident(ni));
  	BUG_ON(NInoMstProtected(ni));
78af34f03   Anton Altaparmakov   NTFS: Implement s...
575
576
  	blocksize = vol->sb->s_blocksize;
  	blocksize_bits = vol->sb->s_blocksize_bits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
577
578
579
580
  	if (!page_has_buffers(page)) {
  		BUG_ON(!PageUptodate(page));
  		create_empty_buffers(page, blocksize,
  				(1 << BH_Uptodate) | (1 << BH_Dirty));
a01ac532b   Anton Altaparmakov   NTFS: Fix page_ha...
581
582
583
584
585
586
587
588
589
590
591
592
  		if (unlikely(!page_has_buffers(page))) {
  			ntfs_warning(vol->sb, "Error allocating page "
  					"buffers.  Redirtying page so we try "
  					"again later.");
  			/*
  			 * Put the page back on mapping->dirty_pages, but leave
  			 * its buffers' dirty state as-is.
  			 */
  			redirty_page_for_writepage(wbc, page);
  			unlock_page(page);
  			return 0;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
593
594
  	}
  	bh = head = page_buffers(page);
a01ac532b   Anton Altaparmakov   NTFS: Fix page_ha...
595
  	BUG_ON(!bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
596
597
598
599
600
  
  	/* NOTE: Different naming scheme to ntfs_read_block()! */
  
  	/* The first block in the page. */
  	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
07a4e2da7   Anton Altaparmakov   NTFS: Use i_size_...
601
602
603
604
  	read_lock_irqsave(&ni->size_lock, flags);
  	i_size = i_size_read(vi);
  	initialized_size = ni->initialized_size;
  	read_unlock_irqrestore(&ni->size_lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
605
  	/* The first out of bounds block for the data size. */
07a4e2da7   Anton Altaparmakov   NTFS: Use i_size_...
606
  	dblock = (i_size + blocksize - 1) >> blocksize_bits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
607
608
  
  	/* The last (fully or partially) initialized block. */
07a4e2da7   Anton Altaparmakov   NTFS: Use i_size_...
609
  	iblock = initialized_size >> blocksize_bits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
  
  	/*
  	 * Be very careful.  We have no exclusion from __set_page_dirty_buffers
  	 * here, and the (potentially unmapped) buffers may become dirty at
  	 * any time.  If a buffer becomes dirty here after we've inspected it
  	 * then we just miss that fact, and the page stays dirty.
  	 *
  	 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
  	 * handle that here by just cleaning them.
  	 */
  
  	/*
  	 * Loop through all the buffers in the page, mapping all the dirty
  	 * buffers to disk addresses and handling any aliases from the
  	 * underlying block device's mapping.
  	 */
  	rl = NULL;
  	err = 0;
  	do {
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
629
  		bool is_retry = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
  
  		if (unlikely(block >= dblock)) {
  			/*
  			 * Mapped buffers outside i_size will occur, because
  			 * this page can be outside i_size when there is a
  			 * truncate in progress. The contents of such buffers
  			 * were zeroed by ntfs_writepage().
  			 *
  			 * FIXME: What about the small race window where
  			 * ntfs_writepage() has not done any clearing because
  			 * the page was within i_size but before we get here,
  			 * vmtruncate() modifies i_size?
  			 */
  			clear_buffer_dirty(bh);
  			set_buffer_uptodate(bh);
  			continue;
  		}
  
  		/* Clean buffers are not written out, so no need to map them. */
  		if (!buffer_dirty(bh))
  			continue;
  
  		/* Make sure we have enough initialized size. */
  		if (unlikely((block >= iblock) &&
07a4e2da7   Anton Altaparmakov   NTFS: Use i_size_...
654
  				(initialized_size < i_size))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
  			/*
  			 * If this page is fully outside initialized size, zero
  			 * out all pages between the current initialized size
  			 * and the current page. Just use ntfs_readpage() to do
  			 * the zeroing transparently.
  			 */
  			if (block > iblock) {
  				// TODO:
  				// For each page do:
  				// - read_cache_page()
  				// Again for each page do:
  				// - wait_on_page_locked()
  				// - Check (PageUptodate(page) &&
  				//			!PageError(page))
  				// Update initialized size in the attribute and
  				// in the inode.
  				// Again, for each page do:
  				//	__set_page_dirty_buffers();
  				// page_cache_release()
  				// We don't need to wait on the writes.
  				// Update iblock.
  			}
  			/*
  			 * The current page straddles initialized size. Zero
  			 * all non-uptodate buffers and set them uptodate (and
  			 * dirty?). Note, there aren't any non-uptodate buffers
  			 * if the page is uptodate.
  			 * FIXME: For an uptodate page, the buffers may need to
  			 * be written out because they were not initialized on
  			 * disk before.
  			 */
  			if (!PageUptodate(page)) {
  				// TODO:
  				// Zero any non-uptodate buffers up to i_size.
  				// Set them uptodate and dirty.
  			}
  			// TODO:
  			// Update initialized size in the attribute and in the
  			// inode (up to i_size).
  			// Update iblock.
  			// FIXME: This is inefficient. Try to batch the two
  			// size changes to happen in one go.
  			ntfs_error(vol->sb, "Writing beyond initialized size "
  					"is not supported yet. Sorry.");
  			err = -EOPNOTSUPP;
  			break;
  			// Do NOT set_buffer_new() BUT DO clear buffer range
  			// outside write request range.
  			// set_buffer_uptodate() on complete buffers as well as
  			// set_buffer_dirty().
  		}
  
  		/* No need to map buffers that are already mapped. */
  		if (buffer_mapped(bh))
  			continue;
  
  		/* Unmapped, dirty buffer. Need to map it. */
  		bh->b_bdev = vol->sb->s_bdev;
  
  		/* Convert block into corresponding vcn and offset. */
  		vcn = (VCN)block << blocksize_bits;
  		vcn_ofs = vcn & vol->cluster_size_mask;
  		vcn >>= vol->cluster_size_bits;
  		if (!rl) {
  lock_retry_remap:
  			down_read(&ni->runlist.lock);
  			rl = ni->runlist.rl;
  		}
  		if (likely(rl != NULL)) {
  			/* Seek to element containing target vcn. */
  			while (rl->length && rl[1].vcn <= vcn)
  				rl++;
  			lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
  		} else
  			lcn = LCN_RL_NOT_MAPPED;
  		/* Successful remap. */
  		if (lcn >= 0) {
  			/* Setup buffer head to point to correct block. */
  			bh->b_blocknr = ((lcn << vol->cluster_size_bits) +
  					vcn_ofs) >> blocksize_bits;
  			set_buffer_mapped(bh);
  			continue;
  		}
  		/* It is a hole, need to instantiate it. */
  		if (lcn == LCN_HOLE) {
8dcdebafb   Anton Altaparmakov   NTFS: Make ntfs_w...
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
  			u8 *kaddr;
  			unsigned long *bpos, *bend;
  
  			/* Check if the buffer is zero. */
  			kaddr = kmap_atomic(page, KM_USER0);
  			bpos = (unsigned long *)(kaddr + bh_offset(bh));
  			bend = (unsigned long *)((u8*)bpos + blocksize);
  			do {
  				if (unlikely(*bpos))
  					break;
  			} while (likely(++bpos < bend));
  			kunmap_atomic(kaddr, KM_USER0);
  			if (bpos == bend) {
  				/*
  				 * Buffer is zero and sparse, no need to write
  				 * it.
  				 */
  				bh->b_blocknr = -1;
  				clear_buffer_dirty(bh);
  				continue;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
761
762
763
764
765
766
767
768
769
770
  			// TODO: Instantiate the hole.
  			// clear_buffer_new(bh);
  			// unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
  			ntfs_error(vol->sb, "Writing into sparse regions is "
  					"not supported yet. Sorry.");
  			err = -EOPNOTSUPP;
  			break;
  		}
  		/* If first try and runlist unmapped, map and retry. */
  		if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
771
  			is_retry = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
772
773
774
775
776
777
778
779
780
  			/*
  			 * Attempt to map runlist, dropping lock for
  			 * the duration.
  			 */
  			up_read(&ni->runlist.lock);
  			err = ntfs_map_runlist(ni, vcn);
  			if (likely(!err))
  				goto lock_retry_remap;
  			rl = NULL;
9f993fe46   Anton Altaparmakov   NTFS: Fix a bug i...
781
782
  		} else if (!rl)
  			up_read(&ni->runlist.lock);
8273d5d4c   Anton Altaparmakov   NTFS: Fix fs/ntfs...
783
784
785
786
787
788
  		/*
  		 * If buffer is outside the runlist, truncate has cut it out
  		 * of the runlist.  Just clean and clear the buffer and set it
  		 * uptodate so it can get discarded by the VM.
  		 */
  		if (err == -ENOENT || lcn == LCN_ENOENT) {
8273d5d4c   Anton Altaparmakov   NTFS: Fix fs/ntfs...
789
790
  			bh->b_blocknr = -1;
  			clear_buffer_dirty(bh);
eebd2aa35   Christoph Lameter   Pagecache zeroing...
791
  			zero_user(page, bh_offset(bh), blocksize);
8273d5d4c   Anton Altaparmakov   NTFS: Fix fs/ntfs...
792
793
794
795
  			set_buffer_uptodate(bh);
  			err = 0;
  			continue;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
796
  		/* Failed to map the buffer, even after retrying. */
8273d5d4c   Anton Altaparmakov   NTFS: Fix fs/ntfs...
797
798
  		if (!err)
  			err = -EIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
799
800
801
802
  		bh->b_blocknr = -1;
  		ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
  				"attribute type 0x%x, vcn 0x%llx, offset 0x%x "
  				"because its location on disk could not be "
8273d5d4c   Anton Altaparmakov   NTFS: Fix fs/ntfs...
803
  				"determined%s (error code %i).", ni->mft_no,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
804
805
  				ni->type, (unsigned long long)vcn,
  				vcn_ofs, is_retry ? " even after "
8273d5d4c   Anton Altaparmakov   NTFS: Fix fs/ntfs...
806
  				"retrying" : "", err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
807
808
809
810
811
812
813
814
815
  		break;
  	} while (block++, (bh = bh->b_this_page) != head);
  
  	/* Release the lock if we took it. */
  	if (rl)
  		up_read(&ni->runlist.lock);
  
  	/* For the error case, need to reset bh to the beginning. */
  	bh = head;
54b02eb01   Anton Altaparmakov   NTFS: Optimize fs...
816
  	/* Just an optimization, so ->readpage() is not called later. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
  	if (unlikely(!PageUptodate(page))) {
  		int uptodate = 1;
  		do {
  			if (!buffer_uptodate(bh)) {
  				uptodate = 0;
  				bh = head;
  				break;
  			}
  		} while ((bh = bh->b_this_page) != head);
  		if (uptodate)
  			SetPageUptodate(page);
  	}
  
  	/* Setup all mapped, dirty buffers for async write i/o. */
  	do {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
  		if (buffer_mapped(bh) && buffer_dirty(bh)) {
  			lock_buffer(bh);
  			if (test_clear_buffer_dirty(bh)) {
  				BUG_ON(!buffer_uptodate(bh));
  				mark_buffer_async_write(bh);
  			} else
  				unlock_buffer(bh);
  		} else if (unlikely(err)) {
  			/*
  			 * For the error case. The buffer may have been set
  			 * dirty during attachment to a dirty page.
  			 */
  			if (err != -ENOMEM)
  				clear_buffer_dirty(bh);
  		}
  	} while ((bh = bh->b_this_page) != head);
  
  	if (unlikely(err)) {
  		// TODO: Remove the -EOPNOTSUPP check later on...
  		if (unlikely(err == -EOPNOTSUPP))
  			err = 0;
  		else if (err == -ENOMEM) {
  			ntfs_warning(vol->sb, "Error allocating memory. "
  					"Redirtying page so we try again "
  					"later.");
  			/*
  			 * Put the page back on mapping->dirty_pages, but
  			 * leave its buffer's dirty state as-is.
  			 */
  			redirty_page_for_writepage(wbc, page);
  			err = 0;
  		} else
  			SetPageError(page);
  	}
  
  	BUG_ON(PageWriteback(page));
  	set_page_writeback(page);	/* Keeps try_to_free_buffers() away. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
869

54b02eb01   Anton Altaparmakov   NTFS: Optimize fs...
870
  	/* Submit the prepared buffers for i/o. */
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
871
  	need_end_writeback = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
872
873
874
875
  	do {
  		struct buffer_head *next = bh->b_this_page;
  		if (buffer_async_write(bh)) {
  			submit_bh(WRITE, bh);
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
876
  			need_end_writeback = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
877
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
878
879
  		bh = next;
  	} while (bh != head);
54b02eb01   Anton Altaparmakov   NTFS: Optimize fs...
880
  	unlock_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
  
  	/* If no i/o was started, need to end_page_writeback(). */
  	if (unlikely(need_end_writeback))
  		end_page_writeback(page);
  
  	ntfs_debug("Done.");
  	return err;
  }
  
  /**
   * ntfs_write_mst_block - write a @page to the backing store
   * @page:	page cache page to write out
   * @wbc:	writeback control structure
   *
   * This function is for writing pages belonging to non-resident, mst protected
   * attributes to their backing store.  The only supported attributes are index
   * allocation and $MFT/$DATA.  Both directory inodes and index inodes are
   * supported for the index allocation case.
   *
   * The page must remain locked for the duration of the write because we apply
   * the mst fixups, write, and then undo the fixups, so if we were to unlock the
   * page before undoing the fixups, any other user of the page will see the
   * page contents as corrupt.
   *
   * We clear the page uptodate flag for the duration of the function to ensure
   * exclusion for the $MFT/$DATA case against someone mapping an mft record we
   * are about to apply the mst fixups to.
   *
   * Return 0 on success and -errno on error.
   *
   * Based on ntfs_write_block(), ntfs_mft_writepage(), and
   * write_mft_record_nolock().
   */
  static int ntfs_write_mst_block(struct page *page,
  		struct writeback_control *wbc)
  {
  	sector_t block, dblock, rec_block;
  	struct inode *vi = page->mapping->host;
  	ntfs_inode *ni = NTFS_I(vi);
  	ntfs_volume *vol = ni->vol;
  	u8 *kaddr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
922
923
924
  	unsigned int rec_size = ni->itype.index.block_size;
  	ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
  	struct buffer_head *bh, *head, *tbh, *rec_start_bh;
d53ee3222   Anton Altaparmakov   NTFS: Use MAX_BUF...
925
  	struct buffer_head *bhs[MAX_BUF_PER_PAGE];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
926
  	runlist_element *rl;
d53ee3222   Anton Altaparmakov   NTFS: Use MAX_BUF...
927
928
  	int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2;
  	unsigned bh_size, rec_size_bits;
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
929
  	bool sync, is_mft, page_is_dirty, rec_is_dirty;
d53ee3222   Anton Altaparmakov   NTFS: Use MAX_BUF...
930
  	unsigned char bh_size_bits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
931
932
933
934
935
936
937
938
939
940
941
942
943
944
  
  	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
  			"0x%lx.", vi->i_ino, ni->type, page->index);
  	BUG_ON(!NInoNonResident(ni));
  	BUG_ON(!NInoMstProtected(ni));
  	is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino);
  	/*
  	 * NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page
  	 * in its page cache were to be marked dirty.  However this should
  	 * never happen with the current driver and considering we do not
  	 * handle this case here we do want to BUG(), at least for now.
  	 */
  	BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) ||
  			(NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
78af34f03   Anton Altaparmakov   NTFS: Implement s...
945
946
  	bh_size = vol->sb->s_blocksize;
  	bh_size_bits = vol->sb->s_blocksize_bits;
d53ee3222   Anton Altaparmakov   NTFS: Use MAX_BUF...
947
  	max_bhs = PAGE_CACHE_SIZE / bh_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
948
  	BUG_ON(!max_bhs);
d53ee3222   Anton Altaparmakov   NTFS: Use MAX_BUF...
949
  	BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
950
951
952
953
954
  
  	/* Were we called for sync purposes? */
  	sync = (wbc->sync_mode == WB_SYNC_ALL);
  
  	/* Make sure we have mapped buffers. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
955
956
957
958
959
960
961
962
963
964
965
966
967
  	bh = head = page_buffers(page);
  	BUG_ON(!bh);
  
  	rec_size_bits = ni->itype.index.block_size_bits;
  	BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
  	bhs_per_rec = rec_size >> bh_size_bits;
  	BUG_ON(!bhs_per_rec);
  
  	/* The first block in the page. */
  	rec_block = block = (sector_t)page->index <<
  			(PAGE_CACHE_SHIFT - bh_size_bits);
  
  	/* The first out of bounds block for the data size. */
07a4e2da7   Anton Altaparmakov   NTFS: Use i_size_...
968
  	dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
969
970
971
  
  	rl = NULL;
  	err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
972
  	page_is_dirty = rec_is_dirty = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
973
974
  	rec_start_bh = NULL;
  	do {
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
975
  		bool is_retry = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
976
977
978
979
  
  		if (likely(block < rec_block)) {
  			if (unlikely(block >= dblock)) {
  				clear_buffer_dirty(bh);
946929d81   Anton Altaparmakov   NTFS: Fixup the r...
980
  				set_buffer_uptodate(bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
  				continue;
  			}
  			/*
  			 * This block is not the first one in the record.  We
  			 * ignore the buffer's dirty state because we could
  			 * have raced with a parallel mark_ntfs_record_dirty().
  			 */
  			if (!rec_is_dirty)
  				continue;
  			if (unlikely(err2)) {
  				if (err2 != -ENOMEM)
  					clear_buffer_dirty(bh);
  				continue;
  			}
  		} else /* if (block == rec_block) */ {
  			BUG_ON(block > rec_block);
  			/* This block is the first one in the record. */
  			rec_block += bhs_per_rec;
  			err2 = 0;
  			if (unlikely(block >= dblock)) {
  				clear_buffer_dirty(bh);
  				continue;
  			}
  			if (!buffer_dirty(bh)) {
  				/* Clean records are not written out. */
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
1006
  				rec_is_dirty = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1007
1008
  				continue;
  			}
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
1009
  			rec_is_dirty = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1010
1011
1012
1013
1014
1015
1016
  			rec_start_bh = bh;
  		}
  		/* Need to map the buffer if it is not mapped already. */
  		if (unlikely(!buffer_mapped(bh))) {
  			VCN vcn;
  			LCN lcn;
  			unsigned int vcn_ofs;
481d03742   Anton Altaparmakov   NTFS: Complete th...
1017
  			bh->b_bdev = vol->sb->s_bdev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
  			/* Obtain the vcn and offset of the current block. */
  			vcn = (VCN)block << bh_size_bits;
  			vcn_ofs = vcn & vol->cluster_size_mask;
  			vcn >>= vol->cluster_size_bits;
  			if (!rl) {
  lock_retry_remap:
  				down_read(&ni->runlist.lock);
  				rl = ni->runlist.rl;
  			}
  			if (likely(rl != NULL)) {
  				/* Seek to element containing target vcn. */
  				while (rl->length && rl[1].vcn <= vcn)
  					rl++;
  				lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
  			} else
  				lcn = LCN_RL_NOT_MAPPED;
  			/* Successful remap. */
  			if (likely(lcn >= 0)) {
  				/* Setup buffer head to correct block. */
  				bh->b_blocknr = ((lcn <<
  						vol->cluster_size_bits) +
  						vcn_ofs) >> bh_size_bits;
  				set_buffer_mapped(bh);
  			} else {
  				/*
  				 * Remap failed.  Retry to map the runlist once
  				 * unless we are working on $MFT which always
  				 * has the whole of its runlist in memory.
  				 */
  				if (!is_mft && !is_retry &&
  						lcn == LCN_RL_NOT_MAPPED) {
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
1049
  					is_retry = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1050
1051
1052
1053
1054
1055
1056
1057
1058
  					/*
  					 * Attempt to map runlist, dropping
  					 * lock for the duration.
  					 */
  					up_read(&ni->runlist.lock);
  					err2 = ntfs_map_runlist(ni, vcn);
  					if (likely(!err2))
  						goto lock_retry_remap;
  					if (err2 == -ENOMEM)
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
1059
  						page_is_dirty = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1060
  					lcn = err2;
9f993fe46   Anton Altaparmakov   NTFS: Fix a bug i...
1061
  				} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1062
  					err2 = -EIO;
9f993fe46   Anton Altaparmakov   NTFS: Fix a bug i...
1063
1064
1065
  					if (!rl)
  						up_read(&ni->runlist.lock);
  				}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1066
1067
1068
1069
1070
1071
1072
1073
1074
  				/* Hard error.  Abort writing this record. */
  				if (!err || err == -ENOMEM)
  					err = err2;
  				bh->b_blocknr = -1;
  				ntfs_error(vol->sb, "Cannot write ntfs record "
  						"0x%llx (inode 0x%lx, "
  						"attribute type 0x%x) because "
  						"its location on disk could "
  						"not be determined (error "
8907547d4   Randy Dunlap   NTFS: Fix printk ...
1075
1076
  						"code %lli).",
  						(long long)block <<
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
  						bh_size_bits >>
  						vol->mft_record_size_bits,
  						ni->mft_no, ni->type,
  						(long long)lcn);
  				/*
  				 * If this is not the first buffer, remove the
  				 * buffers in this record from the list of
  				 * buffers to write and clear their dirty bit
  				 * if not error -ENOMEM.
  				 */
  				if (rec_start_bh != bh) {
  					while (bhs[--nr_bhs] != rec_start_bh)
  						;
  					if (err2 != -ENOMEM) {
  						do {
  							clear_buffer_dirty(
  								rec_start_bh);
  						} while ((rec_start_bh =
  								rec_start_bh->
  								b_this_page) !=
  								bh);
  					}
  				}
  				continue;
  			}
  		}
  		BUG_ON(!buffer_uptodate(bh));
  		BUG_ON(nr_bhs >= max_bhs);
  		bhs[nr_bhs++] = bh;
  	} while (block++, (bh = bh->b_this_page) != head);
  	if (unlikely(rl))
  		up_read(&ni->runlist.lock);
  	/* If there were no dirty buffers, we are done. */
  	if (!nr_bhs)
  		goto done;
  	/* Map the page so we can access its contents. */
  	kaddr = kmap(page);
  	/* Clear the page uptodate flag whilst the mst fixups are applied. */
  	BUG_ON(!PageUptodate(page));
  	ClearPageUptodate(page);
  	for (i = 0; i < nr_bhs; i++) {
  		unsigned int ofs;
  
  		/* Skip buffers which are not at the beginning of records. */
  		if (i % bhs_per_rec)
  			continue;
  		tbh = bhs[i];
  		ofs = bh_offset(tbh);
  		if (is_mft) {
  			ntfs_inode *tni;
  			unsigned long mft_no;
  
  			/* Get the mft record number. */
  			mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
  					>> rec_size_bits;
  			/* Check whether to write this mft record. */
  			tni = NULL;
  			if (!ntfs_may_write_mft_record(vol, mft_no,
  					(MFT_RECORD*)(kaddr + ofs), &tni)) {
  				/*
  				 * The record should not be written.  This
  				 * means we need to redirty the page before
  				 * returning.
  				 */
c49c31115   Richard Knutsson   [PATCH] fs/ntfs: ...
1141
  				page_is_dirty = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
  				/*
  				 * Remove the buffers in this mft record from
  				 * the list of buffers to write.
  				 */
  				do {
  					bhs[i] = NULL;
  				} while (++i % bhs_per_rec);
  				continue;
  			}
  			/*
  			 * The record should be written.  If a locked ntfs
  			 * inode was returned, add it to the array of locked
  			 * ntfs inodes.
  			 */
  			if (tni)
  				locked_nis[nr_locked_nis++] = tni;
  		}
  		/* Apply the mst protection fixups. */
  		err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
  				rec_size);
  		if (unlikely(err2)) {
  			if (!err || err == -ENOMEM)
  				err = -EIO;
  			ntfs_error(vol->sb, "Failed to apply mst fixups "
  					"(inode 0x%lx, attribute type 0x%x, "
  					"page index 0x%lx, page offset 0x%x)!"
  					"  Unmount and run chkdsk.", vi->i_ino,
  					ni->type, page->index, ofs);
  			/*
  			 * Mark all the buffers in this record clean as we do
  			 * not want to write corrupt data to disk.
  			 */
  			do {
  				clear_buffer_dirty(bhs[i]);
  				bhs[i] = NULL;
  			} while (++i % bhs_per_rec);
  			continue;
  		}
  		nr_recs++;
  	}
  	/* If no records are to be written out, we are done. */
  	if (!nr_recs)
  		goto unm_done;
  	flush_dcache_page(page);
  	/* Lock buffers and start synchronous write i/o on them. */
  	for (i = 0; i < nr_bhs; i++) {
  		tbh = bhs[i];
  		if (!tbh)
  			continue;
ca5de404f   Nick Piggin   fs: rename buffer...
1191
  		if (!trylock_buffer(tbh))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
  			BUG();
  		/* The buffer dirty state is now irrelevant, just clean it. */
  		clear_buffer_dirty(tbh);
  		BUG_ON(!buffer_uptodate(tbh));
  		BUG_ON(!buffer_mapped(tbh));
  		get_bh(tbh);
  		tbh->b_end_io = end_buffer_write_sync;
  		submit_bh(WRITE, tbh);
  	}
  	/* Synchronize the mft mirror now if not @sync. */
  	if (is_mft && !sync)
  		goto do_mirror;
  do_wait:
  	/* Wait on i/o completion of buffers. */
  	for (i = 0; i < nr_bhs; i++) {
  		tbh = bhs[i];
  		if (!tbh)
  			continue;
  		wait_on_buffer(tbh);
  		if (unlikely(!buffer_uptodate(tbh))) {
  			ntfs_error(vol->sb, "I/O error while writing ntfs "
  					"record buffer (inode 0x%lx, "
  					"attribute type 0x%x, page index "
  					"0x%lx, page offset 0x%lx)!  Unmount "
  					"and run chkdsk.", vi->i_ino, ni->type,
  					page->index, bh_offset(tbh));
  			if (!err || err == -ENOMEM)
  				err = -EIO;
  			/*
  			 * Set the buffer uptodate so the page and buffer
  			 * states do not become out of sync.
  			 */
  			set_buffer_uptodate(tbh);
  		}
  	}
  	/* If @sync, now synchronize the mft mirror. */
  	if (is_mft && sync) {
  do_mirror:
  		for (i = 0; i < nr_bhs; i++) {
  			unsigned long mft_no;
  			unsigned int ofs;
  
  			/*
  			 * Skip buffers which are not at the beginning of
  			 * records.
  			 */
  			if (i % bhs_per_rec)
  				continue;
  			tbh = bhs[i];
  			/* Skip removed buffers (and hence records). */
  			if (!tbh)
  				continue;
  			ofs = bh_offset(tbh);
  			/* Get the mft record number. */
  			mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
  					>> rec_size_bits;
  			if (mft_no < vol->mftmirr_size)
  				ntfs_sync_mft_mirror(vol, mft_no,
  						(MFT_RECORD*)(kaddr + ofs),
  						sync);
  		}
  		if (!sync)
  			goto do_wait;
  	}
  	/* Remove the mst protection fixups again. */
  	for (i = 0; i < nr_bhs; i++) {
  		if (!(i % bhs_per_rec)) {
  			tbh = bhs[i];
  			if (!tbh)
  				continue;
  			post_write_mst_fixup((NTFS_RECORD*)(kaddr +
  					bh_offset(tbh)));
  		}
  	}
  	flush_dcache_page(page);
  unm_done:
  	/* Unlock any locked inodes. */
  	while (nr_locked_nis-- > 0) {
  		ntfs_inode *tni, *base_tni;
  		
  		tni = locked_nis[nr_locked_nis];
  		/* Get the base inode. */
4e5e529ad   Ingo Molnar   NTFS: Semaphore t...
1274
  		mutex_lock(&tni->extent_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1275
1276
1277
1278
1279
1280
  		if (tni->nr_extents >= 0)
  			base_tni = tni;
  		else {
  			base_tni = tni->ext.base_ntfs_ino;
  			BUG_ON(!base_tni);
  		}
4e5e529ad   Ingo Molnar   NTFS: Semaphore t...
1281
  		mutex_unlock(&tni->extent_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1282
1283
1284
  		ntfs_debug("Unlocking %s inode 0x%lx.",
  				tni == base_tni ? "base" : "extent",
  				tni->mft_no);
4e5e529ad   Ingo Molnar   NTFS: Semaphore t...
1285
  		mutex_unlock(&tni->mrec_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
  		atomic_dec(&tni->count);
  		iput(VFS_I(base_tni));
  	}
  	SetPageUptodate(page);
  	kunmap(page);
  done:
  	if (unlikely(err && err != -ENOMEM)) {
  		/*
  		 * Set page error if there is only one ntfs record in the page.
  		 * Otherwise we would loose per-record granularity.
  		 */
  		if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
  			SetPageError(page);
  		NVolSetErrors(vol);
  	}
  	if (page_is_dirty) {
  		ntfs_debug("Page still contains one or more dirty ntfs "
  				"records.  Redirtying the page starting at "
  				"record 0x%lx.", page->index <<
  				(PAGE_CACHE_SHIFT - rec_size_bits));
  		redirty_page_for_writepage(wbc, page);
  		unlock_page(page);
  	} else {
  		/*
  		 * Keep the VM happy.  This must be done otherwise the
  		 * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
  		 * the page is clean.
  		 */
  		BUG_ON(PageWriteback(page));
  		set_page_writeback(page);
  		unlock_page(page);
  		end_page_writeback(page);
  	}
  	if (likely(!err))
  		ntfs_debug("Done.");
  	return err;
  }
  
  /**
   * ntfs_writepage - write a @page to the backing store
   * @page:	page cache page to write out
   * @wbc:	writeback control structure
   *
   * This is called from the VM when it wants to have a dirty ntfs page cache
   * page cleaned.  The VM has already locked the page and marked it clean.
   *
   * For non-resident attributes, ntfs_writepage() writes the @page by calling
   * the ntfs version of the generic block_write_full_page() function,
   * ntfs_write_block(), which in turn if necessary creates and writes the
   * buffers associated with the page asynchronously.
   *
   * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying
   * the data to the mft record (which at this stage is most likely in memory).
   * The mft record is then marked dirty and written out asynchronously via the
   * vfs inode dirty code path for the inode the mft record belongs to or via the
   * vm page dirty code path for the page the mft record is in.
   *
   * Based on ntfs_readpage() and fs/buffer.c::block_write_full_page().
   *
   * Return 0 on success and -errno on error.
   */
  static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
  {
  	loff_t i_size;
149f0c520   Anton Altaparmakov   NTFS: Repeat a fa...
1350
1351
  	struct inode *vi = page->mapping->host;
  	ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
bfab36e81   Anton Altaparmakov   NTFS: Fix a mount...
1352
  	char *addr;
149f0c520   Anton Altaparmakov   NTFS: Repeat a fa...
1353
1354
  	ntfs_attr_search_ctx *ctx = NULL;
  	MFT_RECORD *m = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1355
1356
  	u32 attr_len;
  	int err;
905685f68   Anton Altaparmakov   NTFS: - Modify ->...
1357
  retry_writepage:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1358
  	BUG_ON(!PageLocked(page));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1359
  	i_size = i_size_read(vi);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
  	/* Is the page fully outside i_size? (truncate in progress) */
  	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
  			PAGE_CACHE_SHIFT)) {
  		/*
  		 * The page may have dirty, unmapped buffers.  Make them
  		 * freeable here, so the page does not leak.
  		 */
  		block_invalidatepage(page, 0);
  		unlock_page(page);
  		ntfs_debug("Write outside i_size - truncated?");
  		return 0;
  	}
bd45fdd20   Anton Altaparmakov   NTFS: Fixup handl...
1372
1373
1374
1375
  	/*
  	 * Only $DATA attributes can be encrypted and only unnamed $DATA
  	 * attributes can be compressed.  Index root can have the flags set but
  	 * this means to create compressed/encrypted files, not that the
4e64c8869   Anton Altaparmakov   NTFS: Fix handlin...
1376
1377
1378
  	 * attribute is compressed/encrypted.  Note we need to check for
  	 * AT_INDEX_ALLOCATION since this is the type of both directory and
  	 * index inodes.
bd45fdd20   Anton Altaparmakov   NTFS: Fixup handl...
1379
  	 */
4e64c8869   Anton Altaparmakov   NTFS: Fix handlin...
1380
  	if (ni->type != AT_INDEX_ALLOCATION) {
bd45fdd20   Anton Altaparmakov   NTFS: Fixup handl...
1381
1382
1383
1384
  		/* If file is encrypted, deny access, just like NT4. */
  		if (NInoEncrypted(ni)) {
  			unlock_page(page);
  			BUG_ON(ni->type != AT_DATA);
7d0ffdb27   Anton Altaparmakov   NTFS: $EA attribu...
1385
  			ntfs_debug("Denying write access to encrypted file.");
bd45fdd20   Anton Altaparmakov   NTFS: Fixup handl...
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
  			return -EACCES;
  		}
  		/* Compressed data streams are handled in compress.c. */
  		if (NInoNonResident(ni) && NInoCompressed(ni)) {
  			BUG_ON(ni->type != AT_DATA);
  			BUG_ON(ni->name_len);
  			// TODO: Implement and replace this with
  			// return ntfs_write_compressed_block(page);
  			unlock_page(page);
  			ntfs_error(vi->i_sb, "Writing to compressed files is "
  					"not supported yet.  Sorry.");
  			return -EOPNOTSUPP;
  		}
  		// TODO: Implement and remove this check.
  		if (NInoNonResident(ni) && NInoSparse(ni)) {
  			unlock_page(page);
  			ntfs_error(vi->i_sb, "Writing to sparse files is not "
  					"supported yet.  Sorry.");
  			return -EOPNOTSUPP;
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1407
1408
  	/* NInoNonResident() == NInoIndexAllocPresent() */
  	if (NInoNonResident(ni)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1409
1410
1411
1412
  		/* We have to zero every time due to mmap-at-end-of-file. */
  		if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
  			/* The page straddles i_size. */
  			unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
eebd2aa35   Christoph Lameter   Pagecache zeroing...
1413
  			zero_user_segment(page, ofs, PAGE_CACHE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1414
1415
1416
1417
  		}
  		/* Handle mst protected attributes. */
  		if (NInoMstProtected(ni))
  			return ntfs_write_mst_block(page, wbc);
bd45fdd20   Anton Altaparmakov   NTFS: Fixup handl...
1418
  		/* Normal, non-resident data stream. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1419
1420
1421
  		return ntfs_write_block(page, wbc);
  	}
  	/*
bd45fdd20   Anton Altaparmakov   NTFS: Fixup handl...
1422
1423
1424
1425
1426
1427
  	 * Attribute is resident, implying it is not compressed, encrypted, or
  	 * mst protected.  This also means the attribute is smaller than an mft
  	 * record and hence smaller than a page, so can simply return error on
  	 * any pages with index above 0.  Note the attribute can actually be
  	 * marked compressed but if it is resident the actual data is not
  	 * compressed so we are ok to ignore the compressed flag here.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
  	 */
  	BUG_ON(page_has_buffers(page));
  	BUG_ON(!PageUptodate(page));
  	if (unlikely(page->index > 0)) {
  		ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0.  "
  				"Aborting write.", page->index);
  		BUG_ON(PageWriteback(page));
  		set_page_writeback(page);
  		unlock_page(page);
  		end_page_writeback(page);
  		return -EIO;
  	}
  	if (!NInoAttr(ni))
  		base_ni = ni;
  	else
  		base_ni = ni->ext.base_ntfs_ino;
  	/* Map, pin, and lock the mft record. */
  	m = map_mft_record(base_ni);
  	if (IS_ERR(m)) {
  		err = PTR_ERR(m);
  		m = NULL;
  		ctx = NULL;
  		goto err_out;
  	}
905685f68   Anton Altaparmakov   NTFS: - Modify ->...
1452
1453
1454
1455
1456
1457
1458
1459
  	/*
  	 * If a parallel write made the attribute non-resident, drop the mft
  	 * record and retry the writepage.
  	 */
  	if (unlikely(NInoNonResident(ni))) {
  		unmap_mft_record(base_ni);
  		goto retry_writepage;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
  	ctx = ntfs_attr_get_search_ctx(base_ni, m);
  	if (unlikely(!ctx)) {
  		err = -ENOMEM;
  		goto err_out;
  	}
  	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
  			CASE_SENSITIVE, 0, NULL, 0, ctx);
  	if (unlikely(err))
  		goto err_out;
  	/*
  	 * Keep the VM happy.  This must be done otherwise the radix-tree tag
  	 * PAGECACHE_TAG_DIRTY remains set even though the page is clean.
  	 */
  	BUG_ON(PageWriteback(page));
  	set_page_writeback(page);
  	unlock_page(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1476
  	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
07a4e2da7   Anton Altaparmakov   NTFS: Use i_size_...
1477
  	i_size = i_size_read(vi);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1478
  	if (unlikely(attr_len > i_size)) {
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
1479
  		/* Race with shrinking truncate or a failed truncate. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1480
  		attr_len = i_size;
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
1481
1482
1483
1484
1485
1486
1487
1488
  		/*
  		 * If the truncate failed, fix it up now.  If a concurrent
  		 * truncate, we do its job, so it does not have to do anything.
  		 */
  		err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr,
  				attr_len);
  		/* Shrinking cannot fail. */
  		BUG_ON(err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1489
  	}
bfab36e81   Anton Altaparmakov   NTFS: Fix a mount...
1490
  	addr = kmap_atomic(page, KM_USER0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1491
1492
1493
  	/* Copy the data from the page to the mft record. */
  	memcpy((u8*)ctx->attr +
  			le16_to_cpu(ctx->attr->data.resident.value_offset),
bfab36e81   Anton Altaparmakov   NTFS: Fix a mount...
1494
  			addr, attr_len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1495
  	/* Zero out of bounds area in the page cache page. */
bfab36e81   Anton Altaparmakov   NTFS: Fix a mount...
1496
1497
  	memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
  	kunmap_atomic(addr, KM_USER0);
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
1498
  	flush_dcache_page(page);
7d0ffdb27   Anton Altaparmakov   NTFS: $EA attribu...
1499
  	flush_dcache_mft_record_page(ctx->ntfs_ino);
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
1500
  	/* We are done with the page. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1501
  	end_page_writeback(page);
f6098cf44   Anton Altaparmakov   NTFS: Fix ntfs_{r...
1502
  	/* Finally, mark the mft record dirty, so it gets written back. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
  	mark_mft_record_dirty(ctx->ntfs_ino);
  	ntfs_attr_put_search_ctx(ctx);
  	unmap_mft_record(base_ni);
  	return 0;
  err_out:
  	if (err == -ENOMEM) {
  		ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
  				"page so we try again later.");
  		/*
  		 * Put the page back on mapping->dirty_pages, but leave its
  		 * buffers' dirty state as-is.
  		 */
  		redirty_page_for_writepage(wbc, page);
  		err = 0;
  	} else {
  		ntfs_error(vi->i_sb, "Resident attribute write failed with "
149f0c520   Anton Altaparmakov   NTFS: Repeat a fa...
1519
  				"error %i.", err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1520
  		SetPageError(page);
149f0c520   Anton Altaparmakov   NTFS: Repeat a fa...
1521
  		NVolSetErrors(ni->vol);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1522
1523
1524
1525
1526
1527
1528
1529
  	}
  	unlock_page(page);
  	if (ctx)
  		ntfs_attr_put_search_ctx(ctx);
  	if (m)
  		unmap_mft_record(base_ni);
  	return err;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1530
1531
1532
1533
1534
  #endif	/* NTFS_RW */
  
  /**
   * ntfs_aops - general address space operations for inodes and attributes
   */
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
1535
  const struct address_space_operations ntfs_aops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1536
  	.readpage	= ntfs_readpage,	/* Fill page with data. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1537
1538
  #ifdef NTFS_RW
  	.writepage	= ntfs_writepage,	/* Write dirty page to disk. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1539
  #endif /* NTFS_RW */
78264bd9c   Anton Altaparmakov   NTFS: Use buffer_...
1540
1541
1542
  	.migratepage	= buffer_migrate_page,	/* Move a page cache page from
  						   one physical page to an
  						   other. */
aa261f549   Andi Kleen   HWPOISON: Enable ...
1543
  	.error_remove_page = generic_error_remove_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1544
1545
1546
1547
1548
1549
  };
  
  /**
   * ntfs_mst_aops - general address space operations for mst protecteed inodes
   *		   and attributes
   */
f5e54d6e5   Christoph Hellwig   [PATCH] mark addr...
1550
  const struct address_space_operations ntfs_mst_aops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1551
  	.readpage	= ntfs_readpage,	/* Fill page with data. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1552
1553
1554
1555
1556
1557
  #ifdef NTFS_RW
  	.writepage	= ntfs_writepage,	/* Write dirty page to disk. */
  	.set_page_dirty	= __set_page_dirty_nobuffers,	/* Set the page dirty
  						   without touching the buffers
  						   belonging to the page. */
  #endif /* NTFS_RW */
78264bd9c   Anton Altaparmakov   NTFS: Use buffer_...
1558
1559
1560
  	.migratepage	= buffer_migrate_page,	/* Move a page cache page from
  						   one physical page to an
  						   other. */
aa261f549   Andi Kleen   HWPOISON: Enable ...
1561
  	.error_remove_page = generic_error_remove_page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
  };
  
  #ifdef NTFS_RW
  
  /**
   * mark_ntfs_record_dirty - mark an ntfs record dirty
   * @page:	page containing the ntfs record to mark dirty
   * @ofs:	byte offset within @page at which the ntfs record begins
   *
   * Set the buffers and the page in which the ntfs record is located dirty.
   *
   * The latter also marks the vfs inode the ntfs record belongs to dirty
   * (I_DIRTY_PAGES only).
   *
   * If the page does not have buffers, we create them and set them uptodate.
   * The page may not be locked which is why we need to handle the buffers under
   * the mapping->private_lock.  Once the buffers are marked dirty we no longer
   * need the lock since try_to_free_buffers() does not free dirty buffers.
   */
  void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
  	struct address_space *mapping = page->mapping;
  	ntfs_inode *ni = NTFS_I(mapping->host);
  	struct buffer_head *bh, *head, *buffers_to_free = NULL;
  	unsigned int end, bh_size, bh_ofs;
  
  	BUG_ON(!PageUptodate(page));
  	end = ofs + ni->itype.index.block_size;
78af34f03   Anton Altaparmakov   NTFS: Implement s...
1589
  	bh_size = VFS_I(ni)->i_sb->s_blocksize;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
  	spin_lock(&mapping->private_lock);
  	if (unlikely(!page_has_buffers(page))) {
  		spin_unlock(&mapping->private_lock);
  		bh = head = alloc_page_buffers(page, bh_size, 1);
  		spin_lock(&mapping->private_lock);
  		if (likely(!page_has_buffers(page))) {
  			struct buffer_head *tail;
  
  			do {
  				set_buffer_uptodate(bh);
  				tail = bh;
  				bh = bh->b_this_page;
  			} while (bh);
  			tail->b_this_page = head;
  			attach_page_buffers(page, head);
  		} else
  			buffers_to_free = bh;
  	}
  	bh = head = page_buffers(page);
a01ac532b   Anton Altaparmakov   NTFS: Fix page_ha...
1609
  	BUG_ON(!bh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
  	do {
  		bh_ofs = bh_offset(bh);
  		if (bh_ofs + bh_size <= ofs)
  			continue;
  		if (unlikely(bh_ofs >= end))
  			break;
  		set_buffer_dirty(bh);
  	} while ((bh = bh->b_this_page) != head);
  	spin_unlock(&mapping->private_lock);
  	__set_page_dirty_nobuffers(page);
  	if (unlikely(buffers_to_free)) {
  		do {
  			bh = buffers_to_free->b_this_page;
  			free_buffer_head(buffers_to_free);
  			buffers_to_free = bh;
  		} while (buffers_to_free);
  	}
  }
  
  #endif /* NTFS_RW */