Commit 797b4cffdf79b9ed66759b8d2d5252eba965fb18

Authored by Vladimir Saveliev
Committed by Linus Torvalds
1 parent f870618428

reiserfs: use generic write

Make reiserfs to write via generic routines.
Original reiserfs write optimized for big writes is deadlock rone

Signed-off-by: Vladimir Saveliev <vs@namesys.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 1 additions and 1239 deletions Side-by-side Diff

Changes suppressed. Click to show
... ... @@ -153,608 +153,6 @@
153 153 return (n_err < 0) ? -EIO : 0;
154 154 }
155 155  
156   -/* I really do not want to play with memory shortage right now, so
157   - to simplify the code, we are not going to write more than this much pages at
158   - a time. This still should considerably improve performance compared to 4k
159   - at a time case. This is 32 pages of 4k size. */
160   -#define REISERFS_WRITE_PAGES_AT_A_TIME (128 * 1024) / PAGE_CACHE_SIZE
161   -
162   -/* Allocates blocks for a file to fulfil write request.
163   - Maps all unmapped but prepared pages from the list.
164   - Updates metadata with newly allocated blocknumbers as needed */
165   -static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode we work with */
166   - loff_t pos, /* Writing position */
167   - int num_pages, /* number of pages write going
168   - to touch */
169   - int write_bytes, /* amount of bytes to write */
170   - struct page **prepared_pages, /* array of
171   - prepared pages
172   - */
173   - int blocks_to_allocate /* Amount of blocks we
174   - need to allocate to
175   - fit the data into file
176   - */
177   - )
178   -{
179   - struct cpu_key key; // cpu key of item that we are going to deal with
180   - struct item_head *ih; // pointer to item head that we are going to deal with
181   - struct buffer_head *bh; // Buffer head that contains items that we are going to deal with
182   - __le32 *item; // pointer to item we are going to deal with
183   - INITIALIZE_PATH(path); // path to item, that we are going to deal with.
184   - b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored.
185   - reiserfs_blocknr_hint_t hint; // hint structure for block allocator.
186   - size_t res; // return value of various functions that we call.
187   - int curr_block; // current block used to keep track of unmapped blocks.
188   - int i; // loop counter
189   - int itempos; // position in item
190   - unsigned int from = (pos & (PAGE_CACHE_SIZE - 1)); // writing position in
191   - // first page
192   - unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; /* last modified byte offset in last page */
193   - __u64 hole_size; // amount of blocks for a file hole, if it needed to be created.
194   - int modifying_this_item = 0; // Flag for items traversal code to keep track
195   - // of the fact that we already prepared
196   - // current block for journal
197   - int will_prealloc = 0;
198   - RFALSE(!blocks_to_allocate,
199   - "green-9004: tried to allocate zero blocks?");
200   -
201   - /* only preallocate if this is a small write */
202   - if (REISERFS_I(inode)->i_prealloc_count ||
203   - (!(write_bytes & (inode->i_sb->s_blocksize - 1)) &&
204   - blocks_to_allocate <
205   - REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize))
206   - will_prealloc =
207   - REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize;
208   -
209   - allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) *
210   - sizeof(b_blocknr_t), GFP_NOFS);
211   - if (!allocated_blocks)
212   - return -ENOMEM;
213   -
214   - /* First we compose a key to point at the writing position, we want to do
215   - that outside of any locking region. */
216   - make_cpu_key(&key, inode, pos + 1, TYPE_ANY, 3 /*key length */ );
217   -
218   - /* If we came here, it means we absolutely need to open a transaction,
219   - since we need to allocate some blocks */
220   - reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that.
221   - res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough
222   - if (res)
223   - goto error_exit;
224   - reiserfs_update_inode_transaction(inode);
225   -
226   - /* Look for the in-tree position of our write, need path for block allocator */
227   - res = search_for_position_by_key(inode->i_sb, &key, &path);
228   - if (res == IO_ERROR) {
229   - res = -EIO;
230   - goto error_exit;
231   - }
232   -
233   - /* Allocate blocks */
234   - /* First fill in "hint" structure for block allocator */
235   - hint.th = th; // transaction handle.
236   - hint.path = &path; // Path, so that block allocator can determine packing locality or whatever it needs to determine.
237   - hint.inode = inode; // Inode is needed by block allocator too.
238   - hint.search_start = 0; // We have no hint on where to search free blocks for block allocator.
239   - hint.key = key.on_disk_key; // on disk key of file.
240   - hint.block = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); // Number of disk blocks this file occupies already.
241   - hint.formatted_node = 0; // We are allocating blocks for unformatted node.
242   - hint.preallocate = will_prealloc;
243   -
244   - /* Call block allocator to allocate blocks */
245   - res =
246   - reiserfs_allocate_blocknrs(&hint, allocated_blocks,
247   - blocks_to_allocate, blocks_to_allocate);
248   - if (res != CARRY_ON) {
249   - if (res == NO_DISK_SPACE) {
250   - /* We flush the transaction in case of no space. This way some
251   - blocks might become free */
252   - SB_JOURNAL(inode->i_sb)->j_must_wait = 1;
253   - res = restart_transaction(th, inode, &path);
254   - if (res)
255   - goto error_exit;
256   -
257   - /* We might have scheduled, so search again */
258   - res =
259   - search_for_position_by_key(inode->i_sb, &key,
260   - &path);
261   - if (res == IO_ERROR) {
262   - res = -EIO;
263   - goto error_exit;
264   - }
265   -
266   - /* update changed info for hint structure. */
267   - res =
268   - reiserfs_allocate_blocknrs(&hint, allocated_blocks,
269   - blocks_to_allocate,
270   - blocks_to_allocate);
271   - if (res != CARRY_ON) {
272   - res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC;
273   - pathrelse(&path);
274   - goto error_exit;
275   - }
276   - } else {
277   - res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC;
278   - pathrelse(&path);
279   - goto error_exit;
280   - }
281   - }
282   -#ifdef __BIG_ENDIAN
283   - // Too bad, I have not found any way to convert a given region from
284   - // cpu format to little endian format
285   - {
286   - int i;
287   - for (i = 0; i < blocks_to_allocate; i++)
288   - allocated_blocks[i] = cpu_to_le32(allocated_blocks[i]);
289   - }
290   -#endif
291   -
292   - /* Blocks allocating well might have scheduled and tree might have changed,
293   - let's search the tree again */
294   - /* find where in the tree our write should go */
295   - res = search_for_position_by_key(inode->i_sb, &key, &path);
296   - if (res == IO_ERROR) {
297   - res = -EIO;
298   - goto error_exit_free_blocks;
299   - }
300   -
301   - bh = get_last_bh(&path); // Get a bufferhead for last element in path.
302   - ih = get_ih(&path); // Get a pointer to last item head in path.
303   - item = get_item(&path); // Get a pointer to last item in path
304   -
305   - /* Let's see what we have found */
306   - if (res != POSITION_FOUND) { /* position not found, this means that we
307   - might need to append file with holes
308   - first */
309   - // Since we are writing past the file's end, we need to find out if
310   - // there is a hole that needs to be inserted before our writing
311   - // position, and how many blocks it is going to cover (we need to
312   - // populate pointers to file blocks representing the hole with zeros)
313   -
314   - {
315   - int item_offset = 1;
316   - /*
317   - * if ih is stat data, its offset is 0 and we don't want to
318   - * add 1 to pos in the hole_size calculation
319   - */
320   - if (is_statdata_le_ih(ih))
321   - item_offset = 0;
322   - hole_size = (pos + item_offset -
323   - (le_key_k_offset
324   - (get_inode_item_key_version(inode),
325   - &(ih->ih_key)) + op_bytes_number(ih,
326   - inode->
327   - i_sb->
328   - s_blocksize)))
329   - >> inode->i_sb->s_blocksize_bits;
330   - }
331   -
332   - if (hole_size > 0) {
333   - int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize) / UNFM_P_SIZE); // How much data to insert first time.
334   - /* area filled with zeroes, to supply as list of zero blocknumbers
335   - We allocate it outside of loop just in case loop would spin for
336   - several iterations. */
337   - char *zeros = kzalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway.
338   - if (!zeros) {
339   - res = -ENOMEM;
340   - goto error_exit_free_blocks;
341   - }
342   - do {
343   - to_paste =
344   - min_t(__u64, hole_size,
345   - MAX_ITEM_LEN(inode->i_sb->
346   - s_blocksize) /
347   - UNFM_P_SIZE);
348   - if (is_indirect_le_ih(ih)) {
349   - /* Ok, there is existing indirect item already. Need to append it */
350   - /* Calculate position past inserted item */
351   - make_cpu_key(&key, inode,
352   - le_key_k_offset
353   - (get_inode_item_key_version
354   - (inode),
355   - &(ih->ih_key)) +
356   - op_bytes_number(ih,
357   - inode->
358   - i_sb->
359   - s_blocksize),
360   - TYPE_INDIRECT, 3);
361   - res =
362   - reiserfs_paste_into_item(th, &path,
363   - &key,
364   - inode,
365   - (char *)
366   - zeros,
367   - UNFM_P_SIZE
368   - *
369   - to_paste);
370   - if (res) {
371   - kfree(zeros);
372   - goto error_exit_free_blocks;
373   - }
374   - } else if (is_statdata_le_ih(ih)) {
375   - /* No existing item, create it */
376   - /* item head for new item */
377   - struct item_head ins_ih;
378   -
379   - /* create a key for our new item */
380   - make_cpu_key(&key, inode, 1,
381   - TYPE_INDIRECT, 3);
382   -
383   - /* Create new item head for our new item */
384   - make_le_item_head(&ins_ih, &key,
385   - key.version, 1,
386   - TYPE_INDIRECT,
387   - to_paste *
388   - UNFM_P_SIZE,
389   - 0 /* free space */ );
390   -
391   - /* Find where such item should live in the tree */
392   - res =
393   - search_item(inode->i_sb, &key,
394   - &path);
395   - if (res != ITEM_NOT_FOUND) {
396   - /* item should not exist, otherwise we have error */
397   - if (res != -ENOSPC) {
398   - reiserfs_warning(inode->
399   - i_sb,
400   - "green-9008: search_by_key (%K) returned %d",
401   - &key,
402   - res);
403   - }
404   - res = -EIO;
405   - kfree(zeros);
406   - goto error_exit_free_blocks;
407   - }
408   - res =
409   - reiserfs_insert_item(th, &path,
410   - &key, &ins_ih,
411   - inode,
412   - (char *)zeros);
413   - } else {
414   - reiserfs_panic(inode->i_sb,
415   - "green-9011: Unexpected key type %K\n",
416   - &key);
417   - }
418   - if (res) {
419   - kfree(zeros);
420   - goto error_exit_free_blocks;
421   - }
422   - /* Now we want to check if transaction is too full, and if it is
423   - we restart it. This will also free the path. */
424   - if (journal_transaction_should_end
425   - (th, th->t_blocks_allocated)) {
426   - inode->i_size = cpu_key_k_offset(&key) +
427   - (to_paste << inode->i_blkbits);
428   - res =
429   - restart_transaction(th, inode,
430   - &path);
431   - if (res) {
432   - pathrelse(&path);
433   - kfree(zeros);
434   - goto error_exit;
435   - }
436   - }
437   -
438   - /* Well, need to recalculate path and stuff */
439   - set_cpu_key_k_offset(&key,
440   - cpu_key_k_offset(&key) +
441   - (to_paste << inode->
442   - i_blkbits));
443   - res =
444   - search_for_position_by_key(inode->i_sb,
445   - &key, &path);
446   - if (res == IO_ERROR) {
447   - res = -EIO;
448   - kfree(zeros);
449   - goto error_exit_free_blocks;
450   - }
451   - bh = get_last_bh(&path);
452   - ih = get_ih(&path);
453   - item = get_item(&path);
454   - hole_size -= to_paste;
455   - } while (hole_size);
456   - kfree(zeros);
457   - }
458   - }
459   - // Go through existing indirect items first
460   - // replace all zeroes with blocknumbers from list
461   - // Note that if no corresponding item was found, by previous search,
462   - // it means there are no existing in-tree representation for file area
463   - // we are going to overwrite, so there is nothing to scan through for holes.
464   - for (curr_block = 0, itempos = path.pos_in_item;
465   - curr_block < blocks_to_allocate && res == POSITION_FOUND;) {
466   - retry:
467   -
468   - if (itempos >= ih_item_len(ih) / UNFM_P_SIZE) {
469   - /* We run out of data in this indirect item, let's look for another
470   - one. */
471   - /* First if we are already modifying current item, log it */
472   - if (modifying_this_item) {
473   - journal_mark_dirty(th, inode->i_sb, bh);
474   - modifying_this_item = 0;
475   - }
476   - /* Then set the key to look for a new indirect item (offset of old
477   - item is added to old item length */
478   - set_cpu_key_k_offset(&key,
479   - le_key_k_offset
480   - (get_inode_item_key_version(inode),
481   - &(ih->ih_key)) +
482   - op_bytes_number(ih,
483   - inode->i_sb->
484   - s_blocksize));
485   - /* Search ofor position of new key in the tree. */
486   - res =
487   - search_for_position_by_key(inode->i_sb, &key,
488   - &path);
489   - if (res == IO_ERROR) {
490   - res = -EIO;
491   - goto error_exit_free_blocks;
492   - }
493   - bh = get_last_bh(&path);
494   - ih = get_ih(&path);
495   - item = get_item(&path);
496   - itempos = path.pos_in_item;
497   - continue; // loop to check all kinds of conditions and so on.
498   - }
499   - /* Ok, we have correct position in item now, so let's see if it is
500   - representing file hole (blocknumber is zero) and fill it if needed */
501   - if (!item[itempos]) {
502   - /* Ok, a hole. Now we need to check if we already prepared this
503   - block to be journaled */
504   - while (!modifying_this_item) { // loop until succeed
505   - /* Well, this item is not journaled yet, so we must prepare
506   - it for journal first, before we can change it */
507   - struct item_head tmp_ih; // We copy item head of found item,
508   - // here to detect if fs changed under
509   - // us while we were preparing for
510   - // journal.
511   - int fs_gen; // We store fs generation here to find if someone
512   - // changes fs under our feet
513   -
514   - copy_item_head(&tmp_ih, ih); // Remember itemhead
515   - fs_gen = get_generation(inode->i_sb); // remember fs generation
516   - reiserfs_prepare_for_journal(inode->i_sb, bh, 1); // Prepare a buffer within which indirect item is stored for changing.
517   - if (fs_changed(fs_gen, inode->i_sb)
518   - && item_moved(&tmp_ih, &path)) {
519   - // Sigh, fs was changed under us, we need to look for new
520   - // location of item we are working with
521   -
522   - /* unmark prepaerd area as journaled and search for it's
523   - new position */
524   - reiserfs_restore_prepared_buffer(inode->
525   - i_sb,
526   - bh);
527   - res =
528   - search_for_position_by_key(inode->
529   - i_sb,
530   - &key,
531   - &path);
532   - if (res == IO_ERROR) {
533   - res = -EIO;
534   - goto error_exit_free_blocks;
535   - }
536   - bh = get_last_bh(&path);
537   - ih = get_ih(&path);
538   - item = get_item(&path);
539   - itempos = path.pos_in_item;
540   - goto retry;
541   - }
542   - modifying_this_item = 1;
543   - }
544   - item[itempos] = allocated_blocks[curr_block]; // Assign new block
545   - curr_block++;
546   - }
547   - itempos++;
548   - }
549   -
550   - if (modifying_this_item) { // We need to log last-accessed block, if it
551   - // was modified, but not logged yet.
552   - journal_mark_dirty(th, inode->i_sb, bh);
553   - }
554   -
555   - if (curr_block < blocks_to_allocate) {
556   - // Oh, well need to append to indirect item, or to create indirect item
557   - // if there weren't any
558   - if (is_indirect_le_ih(ih)) {
559   - // Existing indirect item - append. First calculate key for append
560   - // position. We do not need to recalculate path as it should
561   - // already point to correct place.
562   - make_cpu_key(&key, inode,
563   - le_key_k_offset(get_inode_item_key_version
564   - (inode),
565   - &(ih->ih_key)) +
566   - op_bytes_number(ih,
567   - inode->i_sb->s_blocksize),
568   - TYPE_INDIRECT, 3);
569   - res =
570   - reiserfs_paste_into_item(th, &path, &key, inode,
571   - (char *)(allocated_blocks +
572   - curr_block),
573   - UNFM_P_SIZE *
574   - (blocks_to_allocate -
575   - curr_block));
576   - if (res) {
577   - goto error_exit_free_blocks;
578   - }
579   - } else if (is_statdata_le_ih(ih)) {
580   - // Last found item was statdata. That means we need to create indirect item.
581   - struct item_head ins_ih; /* itemhead for new item */
582   -
583   - /* create a key for our new item */
584   - make_cpu_key(&key, inode, 1, TYPE_INDIRECT, 3); // Position one,
585   - // because that's
586   - // where first
587   - // indirect item
588   - // begins
589   - /* Create new item head for our new item */
590   - make_le_item_head(&ins_ih, &key, key.version, 1,
591   - TYPE_INDIRECT,
592   - (blocks_to_allocate -
593   - curr_block) * UNFM_P_SIZE,
594   - 0 /* free space */ );
595   - /* Find where such item should live in the tree */
596   - res = search_item(inode->i_sb, &key, &path);
597   - if (res != ITEM_NOT_FOUND) {
598   - /* Well, if we have found such item already, or some error
599   - occured, we need to warn user and return error */
600   - if (res != -ENOSPC) {
601   - reiserfs_warning(inode->i_sb,
602   - "green-9009: search_by_key (%K) "
603   - "returned %d", &key,
604   - res);
605   - }
606   - res = -EIO;
607   - goto error_exit_free_blocks;
608   - }
609   - /* Insert item into the tree with the data as its body */
610   - res =
611   - reiserfs_insert_item(th, &path, &key, &ins_ih,
612   - inode,
613   - (char *)(allocated_blocks +
614   - curr_block));
615   - } else {
616   - reiserfs_panic(inode->i_sb,
617   - "green-9010: unexpected item type for key %K\n",
618   - &key);
619   - }
620   - }
621   - // the caller is responsible for closing the transaction
622   - // unless we return an error, they are also responsible for logging
623   - // the inode.
624   - //
625   - pathrelse(&path);
626   - /*
627   - * cleanup prellocation from previous writes
628   - * if this is a partial block write
629   - */
630   - if (write_bytes & (inode->i_sb->s_blocksize - 1))
631   - reiserfs_discard_prealloc(th, inode);
632   - reiserfs_write_unlock(inode->i_sb);
633   -
634   - // go through all the pages/buffers and map the buffers to newly allocated
635   - // blocks (so that system knows where to write these pages later).
636   - curr_block = 0;
637   - for (i = 0; i < num_pages; i++) {
638   - struct page *page = prepared_pages[i]; //current page
639   - struct buffer_head *head = page_buffers(page); // first buffer for a page
640   - int block_start, block_end; // in-page offsets for buffers.
641   -
642   - if (!page_buffers(page))
643   - reiserfs_panic(inode->i_sb,
644   - "green-9005: No buffers for prepared page???");
645   -
646   - /* For each buffer in page */
647   - for (bh = head, block_start = 0; bh != head || !block_start;
648   - block_start = block_end, bh = bh->b_this_page) {
649   - if (!bh)
650   - reiserfs_panic(inode->i_sb,
651   - "green-9006: Allocated but absent buffer for a page?");
652   - block_end = block_start + inode->i_sb->s_blocksize;
653   - if (i == 0 && block_end <= from)
654   - /* if this buffer is before requested data to map, skip it */
655   - continue;
656   - if (i == num_pages - 1 && block_start >= to)
657   - /* If this buffer is after requested data to map, abort
658   - processing of current page */
659   - break;
660   -
661   - if (!buffer_mapped(bh)) { // Ok, unmapped buffer, need to map it
662   - map_bh(bh, inode->i_sb,
663   - le32_to_cpu(allocated_blocks
664   - [curr_block]));
665   - curr_block++;
666   - set_buffer_new(bh);
667   - }
668   - }
669   - }
670   -
671   - RFALSE(curr_block > blocks_to_allocate,
672   - "green-9007: Used too many blocks? weird");
673   -
674   - kfree(allocated_blocks);
675   - return 0;
676   -
677   -// Need to deal with transaction here.
678   - error_exit_free_blocks:
679   - pathrelse(&path);
680   - // free blocks
681   - for (i = 0; i < blocks_to_allocate; i++)
682   - reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]),
683   - 1);
684   -
685   - error_exit:
686   - if (th->t_trans_id) {
687   - int err;
688   - // update any changes we made to blk count
689   - mark_inode_dirty(inode);
690   - err =
691   - journal_end(th, inode->i_sb,
692   - JOURNAL_PER_BALANCE_CNT * 3 + 1 +
693   - 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb));
694   - if (err)
695   - res = err;
696   - }
697   - reiserfs_write_unlock(inode->i_sb);
698   - kfree(allocated_blocks);
699   -
700   - return res;
701   -}
702   -
703   -/* Unlock pages prepared by reiserfs_prepare_file_region_for_write */
704   -static void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */
705   - size_t num_pages /* amount of pages */ )
706   -{
707   - int i; // loop counter
708   -
709   - for (i = 0; i < num_pages; i++) {
710   - struct page *page = prepared_pages[i];
711   -
712   - try_to_free_buffers(page);
713   - unlock_page(page);
714   - page_cache_release(page);
715   - }
716   -}
717   -
718   -/* This function will copy data from userspace to specified pages within
719   - supplied byte range */
720   -static int reiserfs_copy_from_user_to_file_region(loff_t pos, /* In-file position */
721   - int num_pages, /* Number of pages affected */
722   - int write_bytes, /* Amount of bytes to write */
723   - struct page **prepared_pages, /* pointer to
724   - array to
725   - prepared pages
726   - */
727   - const char __user * buf /* Pointer to user-supplied
728   - data */
729   - )
730   -{
731   - long page_fault = 0; // status of copy_from_user.
732   - int i; // loop counter.
733   - int offset; // offset in page
734   -
735   - for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages;
736   - i++, offset = 0) {
737   - size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page
738   - struct page *page = prepared_pages[i]; // Current page we process.
739   -
740   - fault_in_pages_readable(buf, count);
741   -
742   - /* Copy data from userspace to the current page */
743   - kmap(page);
744   - page_fault = __copy_from_user(page_address(page) + offset, buf, count); // Copy the data.
745   - /* Flush processor's dcache for this page */
746   - flush_dcache_page(page);
747   - kunmap(page);
748   - buf += count;
749   - write_bytes -= count;
750   -
751   - if (page_fault)
752   - break; // Was there a fault? abort.
753   - }
754   -
755   - return page_fault ? -EFAULT : 0;
756   -}
757   -
758 156 /* taken fs/buffer.c:__block_commit_write */
759 157 int reiserfs_commit_page(struct inode *inode, struct page *page,
760 158 unsigned from, unsigned to)
... ... @@ -824,432 +222,6 @@
824 222 return ret;
825 223 }
826 224  
827   -/* Submit pages for write. This was separated from actual file copying
828   - because we might want to allocate block numbers in-between.
829   - This function assumes that caller will adjust file size to correct value. */
830   -static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_handle *th, struct inode *inode, loff_t pos, /* Writing position offset */
831   - size_t num_pages, /* Number of pages to write */
832   - size_t write_bytes, /* number of bytes to write */
833   - struct page **prepared_pages /* list of pages */
834   - )
835   -{
836   - int status; // return status of block_commit_write.
837   - int retval = 0; // Return value we are going to return.
838   - int i; // loop counter
839   - int offset; // Writing offset in page.
840   - int orig_write_bytes = write_bytes;
841   - int sd_update = 0;
842   -
843   - for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages;
844   - i++, offset = 0) {
845   - int count = min_t(int, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page
846   - struct page *page = prepared_pages[i]; // Current page we process.
847   -
848   - status =
849   - reiserfs_commit_page(inode, page, offset, offset + count);
850   - if (status)
851   - retval = status; // To not overcomplicate matters We are going to
852   - // submit all the pages even if there was error.
853   - // we only remember error status to report it on
854   - // exit.
855   - write_bytes -= count;
856   - }
857   - /* now that we've gotten all the ordered buffers marked dirty,
858   - * we can safely update i_size and close any running transaction
859   - */
860   - if (pos + orig_write_bytes > inode->i_size) {
861   - inode->i_size = pos + orig_write_bytes; // Set new size
862   - /* If the file have grown so much that tail packing is no
863   - * longer possible, reset "need to pack" flag */
864   - if ((have_large_tails(inode->i_sb) &&
865   - inode->i_size > i_block_size(inode) * 4) ||
866   - (have_small_tails(inode->i_sb) &&
867   - inode->i_size > i_block_size(inode)))
868   - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
869   - else if ((have_large_tails(inode->i_sb) &&
870   - inode->i_size < i_block_size(inode) * 4) ||
871   - (have_small_tails(inode->i_sb) &&
872   - inode->i_size < i_block_size(inode)))
873   - REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
874   -
875   - if (th->t_trans_id) {
876   - reiserfs_write_lock(inode->i_sb);
877   - // this sets the proper flags for O_SYNC to trigger a commit
878   - mark_inode_dirty(inode);
879   - reiserfs_write_unlock(inode->i_sb);
880   - } else {
881   - reiserfs_write_lock(inode->i_sb);
882   - reiserfs_update_inode_transaction(inode);
883   - mark_inode_dirty(inode);
884   - reiserfs_write_unlock(inode->i_sb);
885   - }
886   -
887   - sd_update = 1;
888   - }
889   - if (th->t_trans_id) {
890   - reiserfs_write_lock(inode->i_sb);
891   - if (!sd_update)
892   - mark_inode_dirty(inode);
893   - status = journal_end(th, th->t_super, th->t_blocks_allocated);
894   - if (status)
895   - retval = status;
896   - reiserfs_write_unlock(inode->i_sb);
897   - }
898   - th->t_trans_id = 0;
899   -
900   - /*
901   - * we have to unlock the pages after updating i_size, otherwise
902   - * we race with writepage
903   - */
904   - for (i = 0; i < num_pages; i++) {
905   - struct page *page = prepared_pages[i];
906   - unlock_page(page);
907   - mark_page_accessed(page);
908   - page_cache_release(page);
909   - }
910   - return retval;
911   -}
912   -
913   -/* Look if passed writing region is going to touch file's tail
914   - (if it is present). And if it is, convert the tail to unformatted node */
915   -static int reiserfs_check_for_tail_and_convert(struct inode *inode, /* inode to deal with */
916   - loff_t pos, /* Writing position */
917   - int write_bytes /* amount of bytes to write */
918   - )
919   -{
920   - INITIALIZE_PATH(path); // needed for search_for_position
921   - struct cpu_key key; // Key that would represent last touched writing byte.
922   - struct item_head *ih; // item header of found block;
923   - int res; // Return value of various functions we call.
924   - int cont_expand_offset; // We will put offset for generic_cont_expand here
925   - // This can be int just because tails are created
926   - // only for small files.
927   -
928   -/* this embodies a dependency on a particular tail policy */
929   - if (inode->i_size >= inode->i_sb->s_blocksize * 4) {
930   - /* such a big files do not have tails, so we won't bother ourselves
931   - to look for tails, simply return */
932   - return 0;
933   - }
934   -
935   - reiserfs_write_lock(inode->i_sb);
936   - /* find the item containing the last byte to be written, or if
937   - * writing past the end of the file then the last item of the
938   - * file (and then we check its type). */
939   - make_cpu_key(&key, inode, pos + write_bytes + 1, TYPE_ANY,
940   - 3 /*key length */ );
941   - res = search_for_position_by_key(inode->i_sb, &key, &path);
942   - if (res == IO_ERROR) {
943   - reiserfs_write_unlock(inode->i_sb);
944   - return -EIO;
945   - }
946   - ih = get_ih(&path);
947   - res = 0;
948   - if (is_direct_le_ih(ih)) {
949   - /* Ok, closest item is file tail (tails are stored in "direct"
950   - * items), so we need to unpack it. */
951   - /* To not overcomplicate matters, we just call generic_cont_expand
952   - which will in turn call other stuff and finally will boil down to
953   - reiserfs_get_block() that would do necessary conversion. */
954   - cont_expand_offset =
955   - le_key_k_offset(get_inode_item_key_version(inode),
956   - &(ih->ih_key));
957   - pathrelse(&path);
958   - res = generic_cont_expand(inode, cont_expand_offset);
959   - } else
960   - pathrelse(&path);
961   -
962   - reiserfs_write_unlock(inode->i_sb);
963   - return res;
964   -}
965   -
966   -/* This function locks pages starting from @pos for @inode.
967   - @num_pages pages are locked and stored in
968   - @prepared_pages array. Also buffers are allocated for these pages.
969   - First and last page of the region is read if it is overwritten only
970   - partially. If last page did not exist before write (file hole or file
971   - append), it is zeroed, then.
972   - Returns number of unallocated blocks that should be allocated to cover
973   - new file data.*/
974   -static int reiserfs_prepare_file_region_for_write(struct inode *inode
975   - /* Inode of the file */ ,
976   - loff_t pos, /* position in the file */
977   - size_t num_pages, /* number of pages to
978   - prepare */
979   - size_t write_bytes, /* Amount of bytes to be
980   - overwritten from
981   - @pos */
982   - struct page **prepared_pages /* pointer to array
983   - where to store
984   - prepared pages */
985   - )
986   -{
987   - int res = 0; // Return values of different functions we call.
988   - unsigned long index = pos >> PAGE_CACHE_SHIFT; // Offset in file in pages.
989   - int from = (pos & (PAGE_CACHE_SIZE - 1)); // Writing offset in first page
990   - int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1;
991   - /* offset of last modified byte in last
992   - page */
993   - struct address_space *mapping = inode->i_mapping; // Pages are mapped here.
994   - int i; // Simple counter
995   - int blocks = 0; /* Return value (blocks that should be allocated) */
996   - struct buffer_head *bh, *head; // Current bufferhead and first bufferhead
997   - // of a page.
998   - unsigned block_start, block_end; // Starting and ending offsets of current
999   - // buffer in the page.
1000   - struct buffer_head *wait[2], **wait_bh = wait; // Buffers for page, if
1001   - // Page appeared to be not up
1002   - // to date. Note how we have
1003   - // at most 2 buffers, this is
1004   - // because we at most may
1005   - // partially overwrite two
1006   - // buffers for one page. One at // the beginning of write area
1007   - // and one at the end.
1008   - // Everything inthe middle gets // overwritten totally.
1009   -
1010   - struct cpu_key key; // cpu key of item that we are going to deal with
1011   - struct item_head *ih = NULL; // pointer to item head that we are going to deal with
1012   - struct buffer_head *itembuf = NULL; // Buffer head that contains items that we are going to deal with
1013   - INITIALIZE_PATH(path); // path to item, that we are going to deal with.
1014   - __le32 *item = NULL; // pointer to item we are going to deal with
1015   - int item_pos = -1; /* Position in indirect item */
1016   -
1017   - if (num_pages < 1) {
1018   - reiserfs_warning(inode->i_sb,
1019   - "green-9001: reiserfs_prepare_file_region_for_write "
1020   - "called with zero number of pages to process");
1021   - return -EFAULT;
1022   - }
1023   -
1024   - /* We have 2 loops for pages. In first loop we grab and lock the pages, so
1025   - that nobody would touch these until we release the pages. Then
1026   - we'd start to deal with mapping buffers to blocks. */
1027   - for (i = 0; i < num_pages; i++) {
1028   - prepared_pages[i] = grab_cache_page(mapping, index + i); // locks the page
1029   - if (!prepared_pages[i]) {
1030   - res = -ENOMEM;
1031   - goto failed_page_grabbing;
1032   - }
1033   - if (!page_has_buffers(prepared_pages[i]))
1034   - create_empty_buffers(prepared_pages[i],
1035   - inode->i_sb->s_blocksize, 0);
1036   - }
1037   -
1038   - /* Let's count amount of blocks for a case where all the blocks
1039   - overwritten are new (we will substract already allocated blocks later) */
1040   - if (num_pages > 2)
1041   - /* These are full-overwritten pages so we count all the blocks in
1042   - these pages are counted as needed to be allocated */
1043   - blocks =
1044   - (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1045   -
1046   - /* count blocks needed for first page (possibly partially written) */
1047   - blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) + !!(from & (inode->i_sb->s_blocksize - 1)); /* roundup */
1048   -
1049   - /* Now we account for last page. If last page == first page (we
1050   - overwrite only one page), we substract all the blocks past the
1051   - last writing position in a page out of already calculated number
1052   - of blocks */
1053   - blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT - inode->i_blkbits)) -
1054   - ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits);
1055   - /* Note how we do not roundup here since partial blocks still
1056   - should be allocated */
1057   -
1058   - /* Now if all the write area lies past the file end, no point in
1059   - maping blocks, since there is none, so we just zero out remaining
1060   - parts of first and last pages in write area (if needed) */
1061   - if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) {
1062   - if (from != 0) /* First page needs to be partially zeroed */
1063   - zero_user_page(prepared_pages[0], 0, from, KM_USER0);
1064   -
1065   - if (to != PAGE_CACHE_SIZE) /* Last page needs to be partially zeroed */
1066   - zero_user_page(prepared_pages[num_pages-1], to,
1067   - PAGE_CACHE_SIZE - to, KM_USER0);
1068   -
1069   - /* Since all blocks are new - use already calculated value */
1070   - return blocks;
1071   - }
1072   -
1073   - /* Well, since we write somewhere into the middle of a file, there is
1074   - possibility we are writing over some already allocated blocks, so
1075   - let's map these blocks and substract number of such blocks out of blocks
1076   - we need to allocate (calculated above) */
1077   - /* Mask write position to start on blocksize, we do it out of the
1078   - loop for performance reasons */
1079   - pos &= ~((loff_t) inode->i_sb->s_blocksize - 1);
1080   - /* Set cpu key to the starting position in a file (on left block boundary) */
1081   - make_cpu_key(&key, inode,
1082   - 1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)),
1083   - TYPE_ANY, 3 /*key length */ );
1084   -
1085   - reiserfs_write_lock(inode->i_sb); // We need that for at least search_by_key()
1086   - for (i = 0; i < num_pages; i++) {
1087   -
1088   - head = page_buffers(prepared_pages[i]);
1089   - /* For each buffer in the page */
1090   - for (bh = head, block_start = 0; bh != head || !block_start;
1091   - block_start = block_end, bh = bh->b_this_page) {
1092   - if (!bh)
1093   - reiserfs_panic(inode->i_sb,
1094   - "green-9002: Allocated but absent buffer for a page?");
1095   - /* Find where this buffer ends */
1096   - block_end = block_start + inode->i_sb->s_blocksize;
1097   - if (i == 0 && block_end <= from)
1098   - /* if this buffer is before requested data to map, skip it */
1099   - continue;
1100   -
1101   - if (i == num_pages - 1 && block_start >= to) {
1102   - /* If this buffer is after requested data to map, abort
1103   - processing of current page */
1104   - break;
1105   - }
1106   -
1107   - if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1108   - /* This is optimisation for a case where buffer is mapped
1109   - and have blocknumber assigned. In case significant amount
1110   - of such buffers are present, we may avoid some amount
1111   - of search_by_key calls.
1112   - Probably it would be possible to move parts of this code
1113   - out of BKL, but I afraid that would overcomplicate code
1114   - without any noticeable benefit.
1115   - */
1116   - item_pos++;
1117   - /* Update the key */
1118   - set_cpu_key_k_offset(&key,
1119   - cpu_key_k_offset(&key) +
1120   - inode->i_sb->s_blocksize);
1121   - blocks--; // Decrease the amount of blocks that need to be
1122   - // allocated
1123   - continue; // Go to the next buffer
1124   - }
1125   -
1126   - if (!itembuf || /* if first iteration */
1127   - item_pos >= ih_item_len(ih) / UNFM_P_SIZE) { /* or if we progressed past the
1128   - current unformatted_item */
1129   - /* Try to find next item */
1130   - res =
1131   - search_for_position_by_key(inode->i_sb,
1132   - &key, &path);
1133   - /* Abort if no more items */
1134   - if (res != POSITION_FOUND) {
1135   - /* make sure later loops don't use this item */
1136   - itembuf = NULL;
1137   - item = NULL;
1138   - break;
1139   - }
1140   -
1141   - /* Update information about current indirect item */
1142   - itembuf = get_last_bh(&path);
1143   - ih = get_ih(&path);
1144   - item = get_item(&path);
1145   - item_pos = path.pos_in_item;
1146   -
1147   - RFALSE(!is_indirect_le_ih(ih),
1148   - "green-9003: indirect item expected");
1149   - }
1150   -
1151   - /* See if there is some block associated with the file
1152   - at that position, map the buffer to this block */
1153   - if (get_block_num(item, item_pos)) {
1154   - map_bh(bh, inode->i_sb,
1155   - get_block_num(item, item_pos));
1156   - blocks--; // Decrease the amount of blocks that need to be
1157   - // allocated
1158   - }
1159   - item_pos++;
1160   - /* Update the key */
1161   - set_cpu_key_k_offset(&key,
1162   - cpu_key_k_offset(&key) +
1163   - inode->i_sb->s_blocksize);
1164   - }
1165   - }
1166   - pathrelse(&path); // Free the path
1167   - reiserfs_write_unlock(inode->i_sb);
1168   -
1169   - /* Now zero out unmappend buffers for the first and last pages of
1170   - write area or issue read requests if page is mapped. */
1171   - /* First page, see if it is not uptodate */
1172   - if (!PageUptodate(prepared_pages[0])) {
1173   - head = page_buffers(prepared_pages[0]);
1174   -
1175   - /* For each buffer in page */
1176   - for (bh = head, block_start = 0; bh != head || !block_start;
1177   - block_start = block_end, bh = bh->b_this_page) {
1178   -
1179   - if (!bh)
1180   - reiserfs_panic(inode->i_sb,
1181   - "green-9002: Allocated but absent buffer for a page?");
1182   - /* Find where this buffer ends */
1183   - block_end = block_start + inode->i_sb->s_blocksize;
1184   - if (block_end <= from)
1185   - /* if this buffer is before requested data to map, skip it */
1186   - continue;
1187   - if (block_start < from) { /* Aha, our partial buffer */
1188   - if (buffer_mapped(bh)) { /* If it is mapped, we need to
1189   - issue READ request for it to
1190   - not loose data */
1191   - ll_rw_block(READ, 1, &bh);
1192   - *wait_bh++ = bh;
1193   - } else { /* Not mapped, zero it */
1194   - zero_user_page(prepared_pages[0],
1195   - block_start,
1196   - from - block_start, KM_USER0);
1197   - set_buffer_uptodate(bh);
1198   - }
1199   - }
1200   - }
1201   - }
1202   -
1203   - /* Last page, see if it is not uptodate, or if the last page is past the end of the file. */
1204   - if (!PageUptodate(prepared_pages[num_pages - 1]) ||
1205   - ((pos + write_bytes) >> PAGE_CACHE_SHIFT) >
1206   - (inode->i_size >> PAGE_CACHE_SHIFT)) {
1207   - head = page_buffers(prepared_pages[num_pages - 1]);
1208   -
1209   - /* for each buffer in page */
1210   - for (bh = head, block_start = 0; bh != head || !block_start;
1211   - block_start = block_end, bh = bh->b_this_page) {
1212   -
1213   - if (!bh)
1214   - reiserfs_panic(inode->i_sb,
1215   - "green-9002: Allocated but absent buffer for a page?");
1216   - /* Find where this buffer ends */
1217   - block_end = block_start + inode->i_sb->s_blocksize;
1218   - if (block_start >= to)
1219   - /* if this buffer is after requested data to map, skip it */
1220   - break;
1221   - if (block_end > to) { /* Aha, our partial buffer */
1222   - if (buffer_mapped(bh)) { /* If it is mapped, we need to
1223   - issue READ request for it to
1224   - not loose data */
1225   - ll_rw_block(READ, 1, &bh);
1226   - *wait_bh++ = bh;
1227   - } else { /* Not mapped, zero it */
1228   - zero_user_page(prepared_pages[num_pages-1],
1229   - to, block_end - to, KM_USER0);
1230   - set_buffer_uptodate(bh);
1231   - }
1232   - }
1233   - }
1234   - }
1235   -
1236   - /* Wait for read requests we made to happen, if necessary */
1237   - while (wait_bh > wait) {
1238   - wait_on_buffer(*--wait_bh);
1239   - if (!buffer_uptodate(*wait_bh)) {
1240   - res = -EIO;
1241   - goto failed_read;
1242   - }
1243   - }
1244   -
1245   - return blocks;
1246   - failed_page_grabbing:
1247   - num_pages = i;
1248   - failed_read:
1249   - reiserfs_unprepare_pages(prepared_pages, num_pages);
1250   - return res;
1251   -}
1252   -
1253 225 /* Write @count bytes at position @ppos in a file indicated by @file
1254 226 from the buffer @buf.
1255 227  
1256 228  
... ... @@ -1284,14 +256,9 @@
1284 256 * new current position before returning. */
1285 257 )
1286 258 {
1287   - size_t already_written = 0; // Number of bytes already written to the file.
1288   - loff_t pos; // Current position in the file.
1289   - ssize_t res; // return value of various functions that we call.
1290   - int err = 0;
1291 259 struct inode *inode = file->f_path.dentry->d_inode; // Inode of the file that we are writing to.
1292 260 /* To simplify coding at this time, we store
1293 261 locked pages in array for now */
1294   - struct page *prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME];
1295 262 struct reiserfs_transaction_handle th;
1296 263 th.t_trans_id = 0;
1297 264  
... ... @@ -1311,212 +278,7 @@
1311 278 count = MAX_NON_LFS - (unsigned long)*ppos;
1312 279 }
1313 280  
1314   - if (file->f_flags & O_DIRECT)
1315   - return do_sync_write(file, buf, count, ppos);
1316   -
1317   - if (unlikely((ssize_t) count < 0))
1318   - return -EINVAL;
1319   -
1320   - if (unlikely(!access_ok(VERIFY_READ, buf, count)))
1321   - return -EFAULT;
1322   -
1323   - mutex_lock(&inode->i_mutex); // locks the entire file for just us
1324   -
1325   - pos = *ppos;
1326   -
1327   - /* Check if we can write to specified region of file, file
1328   - is not overly big and this kind of stuff. Adjust pos and
1329   - count, if needed */
1330   - res = generic_write_checks(file, &pos, &count, 0);
1331   - if (res)
1332   - goto out;
1333   -
1334   - if (count == 0)
1335   - goto out;
1336   -
1337   - res = remove_suid(file->f_path.dentry);
1338   - if (res)
1339   - goto out;
1340   -
1341   - file_update_time(file);
1342   -
1343   - // Ok, we are done with all the checks.
1344   -
1345   - // Now we should start real work
1346   -
1347   - /* If we are going to write past the file's packed tail or if we are going
1348   - to overwrite part of the tail, we need that tail to be converted into
1349   - unformatted node */
1350   - res = reiserfs_check_for_tail_and_convert(inode, pos, count);
1351   - if (res)
1352   - goto out;
1353   -
1354   - while (count > 0) {
1355   - /* This is the main loop in which we running until some error occures
1356   - or until we write all of the data. */
1357   - size_t num_pages; /* amount of pages we are going to write this iteration */
1358   - size_t write_bytes; /* amount of bytes to write during this iteration */
1359   - size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */
1360   -
1361   - /* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos */
1362   - num_pages = !!((pos + count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial
1363   - pages */
1364   - ((count +
1365   - (pos & (PAGE_CACHE_SIZE - 1))) >> PAGE_CACHE_SHIFT);
1366   - /* convert size to amount of
1367   - pages */
1368   - reiserfs_write_lock(inode->i_sb);
1369   - if (num_pages > REISERFS_WRITE_PAGES_AT_A_TIME
1370   - || num_pages > reiserfs_can_fit_pages(inode->i_sb)) {
1371   - /* If we were asked to write more data than we want to or if there
1372   - is not that much space, then we shorten amount of data to write
1373   - for this iteration. */
1374   - num_pages =
1375   - min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME,
1376   - reiserfs_can_fit_pages(inode->i_sb));
1377   - /* Also we should not forget to set size in bytes accordingly */
1378   - write_bytes = (num_pages << PAGE_CACHE_SHIFT) -
1379   - (pos & (PAGE_CACHE_SIZE - 1));
1380   - /* If position is not on the
1381   - start of the page, we need
1382   - to substract the offset
1383   - within page */
1384   - } else
1385   - write_bytes = count;
1386   -
1387   - /* reserve the blocks to be allocated later, so that later on
1388   - we still have the space to write the blocks to */
1389   - reiserfs_claim_blocks_to_be_allocated(inode->i_sb,
1390   - num_pages <<
1391   - (PAGE_CACHE_SHIFT -
1392   - inode->i_blkbits));
1393   - reiserfs_write_unlock(inode->i_sb);
1394   -
1395   - if (!num_pages) { /* If we do not have enough space even for a single page... */
1396   - if (pos >
1397   - inode->i_size + inode->i_sb->s_blocksize -
1398   - (pos & (inode->i_sb->s_blocksize - 1))) {
1399   - res = -ENOSPC;
1400   - break; // In case we are writing past the end of the last file block, break.
1401   - }
1402   - // Otherwise we are possibly overwriting the file, so
1403   - // let's set write size to be equal or less than blocksize.
1404   - // This way we get it correctly for file holes.
1405   - // But overwriting files on absolutelly full volumes would not
1406   - // be very efficient. Well, people are not supposed to fill
1407   - // 100% of disk space anyway.
1408   - write_bytes =
1409   - min_t(size_t, count,
1410   - inode->i_sb->s_blocksize -
1411   - (pos & (inode->i_sb->s_blocksize - 1)));
1412   - num_pages = 1;
1413   - // No blocks were claimed before, so do it now.
1414   - reiserfs_claim_blocks_to_be_allocated(inode->i_sb,
1415   - 1 <<
1416   - (PAGE_CACHE_SHIFT
1417   - -
1418   - inode->
1419   - i_blkbits));
1420   - }
1421   -
1422   - /* Prepare for writing into the region, read in all the
1423   - partially overwritten pages, if needed. And lock the pages,
1424   - so that nobody else can access these until we are done.
1425   - We get number of actual blocks needed as a result. */
1426   - res = reiserfs_prepare_file_region_for_write(inode, pos,
1427   - num_pages,
1428   - write_bytes,
1429   - prepared_pages);
1430   - if (res < 0) {
1431   - reiserfs_release_claimed_blocks(inode->i_sb,
1432   - num_pages <<
1433   - (PAGE_CACHE_SHIFT -
1434   - inode->i_blkbits));
1435   - break;
1436   - }
1437   -
1438   - blocks_to_allocate = res;
1439   -
1440   - /* First we correct our estimate of how many blocks we need */
1441   - reiserfs_release_claimed_blocks(inode->i_sb,
1442   - (num_pages <<
1443   - (PAGE_CACHE_SHIFT -
1444   - inode->i_sb->
1445   - s_blocksize_bits)) -
1446   - blocks_to_allocate);
1447   -
1448   - if (blocks_to_allocate > 0) { /*We only allocate blocks if we need to */
1449   - /* Fill in all the possible holes and append the file if needed */
1450   - res =
1451   - reiserfs_allocate_blocks_for_region(&th, inode, pos,
1452   - num_pages,
1453   - write_bytes,
1454   - prepared_pages,
1455   - blocks_to_allocate);
1456   - }
1457   -
1458   - /* well, we have allocated the blocks, so it is time to free
1459   - the reservation we made earlier. */
1460   - reiserfs_release_claimed_blocks(inode->i_sb,
1461   - blocks_to_allocate);
1462   - if (res) {
1463   - reiserfs_unprepare_pages(prepared_pages, num_pages);
1464   - break;
1465   - }
1466   -
1467   -/* NOTE that allocating blocks and filling blocks can be done in reverse order
1468   - and probably we would do that just to get rid of garbage in files after a
1469   - crash */
1470   -
1471   - /* Copy data from user-supplied buffer to file's pages */
1472   - res =
1473   - reiserfs_copy_from_user_to_file_region(pos, num_pages,
1474   - write_bytes,
1475   - prepared_pages, buf);
1476   - if (res) {
1477   - reiserfs_unprepare_pages(prepared_pages, num_pages);
1478   - break;
1479   - }
1480   -
1481   - /* Send the pages to disk and unlock them. */
1482   - res =
1483   - reiserfs_submit_file_region_for_write(&th, inode, pos,
1484   - num_pages,
1485   - write_bytes,
1486   - prepared_pages);
1487   - if (res)
1488   - break;
1489   -
1490   - already_written += write_bytes;
1491   - buf += write_bytes;
1492   - *ppos = pos += write_bytes;
1493   - count -= write_bytes;
1494   - balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
1495   - }
1496   -
1497   - /* this is only true on error */
1498   - if (th.t_trans_id) {
1499   - reiserfs_write_lock(inode->i_sb);
1500   - err = journal_end(&th, th.t_super, th.t_blocks_allocated);
1501   - reiserfs_write_unlock(inode->i_sb);
1502   - if (err) {
1503   - res = err;
1504   - goto out;
1505   - }
1506   - }
1507   -
1508   - if (likely(res >= 0) &&
1509   - (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))))
1510   - res = generic_osync_inode(inode, file->f_mapping,
1511   - OSYNC_METADATA | OSYNC_DATA);
1512   -
1513   - mutex_unlock(&inode->i_mutex);
1514   - reiserfs_async_progress_wait(inode->i_sb);
1515   - return (already_written != 0) ? already_written : res;
1516   -
1517   - out:
1518   - mutex_unlock(&inode->i_mutex); // unlock the file on exit.
1519   - return res;
  281 + return do_sync_write(file, buf, count, ppos);
1520 282 }
1521 283  
1522 284 const struct file_operations reiserfs_file_operations = {