Commit 70c8038dd698b44daf7c8fc7e2eca142bec694c4

Authored by Linus Torvalds

Merge tag 'for-f2fs-3.17-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs bug fixes from Jaegeuk Kim:
 "This series includes patches to:

   - fix recovery routines
   - fix bugs related to inline_data/xattr
   - fix when casting the dentry names
   - handle EIO or ENOMEM correctly
   - fix memory leak
   - fix lock coverage"

* tag 'for-f2fs-3.17-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (28 commits)
  f2fs: reposition unlock_new_inode to prevent accessing invalid inode
  f2fs: fix wrong casting for dentry name
  f2fs: simplify by using a literal
  f2fs: truncate stale block for inline_data
  f2fs: use macro for code readability
  f2fs: introduce need_do_checkpoint for readability
  f2fs: fix incorrect calculation with total/free inode num
  f2fs: remove rename and use rename2
  f2fs: skip if inline_data was converted already
  f2fs: remove rewrite_node_page
  f2fs: avoid double lock in truncate_blocks
  f2fs: prevent checkpoint during roll-forward
  f2fs: add WARN_ON in f2fs_bug_on
  f2fs: handle EIO not to break fs consistency
  f2fs: check s_dirty under cp_mutex
  f2fs: unlock_page when node page is redirtied out
  f2fs: introduce f2fs_cp_error for readability
  f2fs: give a chance to mount again when encountering errors
  f2fs: trigger release_dirty_inode in f2fs_put_super
  f2fs: don't skip checkpoint if there is no dirty node pages
  ...

Showing 19 changed files Side-by-side Diff

... ... @@ -23,7 +23,7 @@
23 23 mounted as f2fs. Each file shows the whole f2fs information.
24 24  
25 25 /sys/kernel/debug/f2fs/status includes:
26   - - major file system information managed by f2fs currently
  26 + - major filesystem information managed by f2fs currently
27 27 - average SIT information about whole segments
28 28 - current memory footprint consumed by f2fs.
29 29  
... ... @@ -68,7 +68,7 @@
68 68 bool "F2FS consistency checking feature"
69 69 depends on F2FS_FS
70 70 help
71   - Enables BUG_ONs which check the file system consistency in runtime.
  71 + Enables BUG_ONs which check the filesystem consistency in runtime.
72 72  
73 73 If you want to improve the performance, say N.
fs/f2fs/checkpoint.c
... ... @@ -160,14 +160,11 @@
160 160 goto redirty_out;
161 161 if (wbc->for_reclaim)
162 162 goto redirty_out;
  163 + if (unlikely(f2fs_cp_error(sbi)))
  164 + goto redirty_out;
163 165  
164   - /* Should not write any meta pages, if any IO error was occurred */
165   - if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
166   - goto no_write;
167   -
168 166 f2fs_wait_on_page_writeback(page, META);
169 167 write_meta_page(sbi, page);
170   -no_write:
171 168 dec_page_count(sbi, F2FS_DIRTY_META);
172 169 unlock_page(page);
173 170 return 0;
... ... @@ -348,7 +345,7 @@
348 345 return e ? true : false;
349 346 }
350 347  
351   -static void release_dirty_inode(struct f2fs_sb_info *sbi)
  348 +void release_dirty_inode(struct f2fs_sb_info *sbi)
352 349 {
353 350 struct ino_entry *e, *tmp;
354 351 int i;
... ... @@ -446,8 +443,8 @@
446 443 struct f2fs_orphan_block *orphan_blk = NULL;
447 444 unsigned int nentries = 0;
448 445 unsigned short index;
449   - unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
450   - (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
  446 + unsigned short orphan_blocks =
  447 + (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans);
451 448 struct page *page = NULL;
452 449 struct ino_entry *orphan = NULL;
453 450  
... ... @@ -737,7 +734,7 @@
737 734 /*
738 735 * Freeze all the FS-operations for checkpoint.
739 736 */
740   -static void block_operations(struct f2fs_sb_info *sbi)
  737 +static int block_operations(struct f2fs_sb_info *sbi)
741 738 {
742 739 struct writeback_control wbc = {
743 740 .sync_mode = WB_SYNC_ALL,
... ... @@ -745,6 +742,7 @@
745 742 .for_reclaim = 0,
746 743 };
747 744 struct blk_plug plug;
  745 + int err = 0;
748 746  
749 747 blk_start_plug(&plug);
750 748  
751 749  
... ... @@ -754,11 +752,15 @@
754 752 if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
755 753 f2fs_unlock_all(sbi);
756 754 sync_dirty_dir_inodes(sbi);
  755 + if (unlikely(f2fs_cp_error(sbi))) {
  756 + err = -EIO;
  757 + goto out;
  758 + }
757 759 goto retry_flush_dents;
758 760 }
759 761  
760 762 /*
761   - * POR: we should ensure that there is no dirty node pages
  763 + * POR: we should ensure that there are no dirty node pages
762 764 * until finishing nat/sit flush.
763 765 */
764 766 retry_flush_nodes:
765 767  
766 768  
... ... @@ -767,9 +769,16 @@
767 769 if (get_pages(sbi, F2FS_DIRTY_NODES)) {
768 770 up_write(&sbi->node_write);
769 771 sync_node_pages(sbi, 0, &wbc);
  772 + if (unlikely(f2fs_cp_error(sbi))) {
  773 + f2fs_unlock_all(sbi);
  774 + err = -EIO;
  775 + goto out;
  776 + }
770 777 goto retry_flush_nodes;
771 778 }
  779 +out:
772 780 blk_finish_plug(&plug);
  781 + return err;
773 782 }
774 783  
775 784 static void unblock_operations(struct f2fs_sb_info *sbi)
776 785  
... ... @@ -813,8 +822,11 @@
813 822 discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
814 823  
815 824 /* Flush all the NAT/SIT pages */
816   - while (get_pages(sbi, F2FS_DIRTY_META))
  825 + while (get_pages(sbi, F2FS_DIRTY_META)) {
817 826 sync_meta_pages(sbi, META, LONG_MAX);
  827 + if (unlikely(f2fs_cp_error(sbi)))
  828 + return;
  829 + }
818 830  
819 831 next_free_nid(sbi, &last_nid);
820 832  
... ... @@ -825,7 +837,7 @@
825 837 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
826 838 ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
827 839 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
828   - for (i = 0; i < 3; i++) {
  840 + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
829 841 ckpt->cur_node_segno[i] =
830 842 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
831 843 ckpt->cur_node_blkoff[i] =
... ... @@ -833,7 +845,7 @@
833 845 ckpt->alloc_type[i + CURSEG_HOT_NODE] =
834 846 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
835 847 }
836   - for (i = 0; i < 3; i++) {
  848 + for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
837 849 ckpt->cur_data_segno[i] =
838 850 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
839 851 ckpt->cur_data_blkoff[i] =
840 852  
841 853  
842 854  
... ... @@ -848,24 +860,23 @@
848 860  
849 861 /* 2 cp + n data seg summary + orphan inode blocks */
850 862 data_sum_blocks = npages_for_summary_flush(sbi);
851   - if (data_sum_blocks < 3)
  863 + if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
852 864 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
853 865 else
854 866 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
855 867  
856   - orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
857   - / F2FS_ORPHANS_PER_BLOCK;
  868 + orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans);
858 869 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
859 870 orphan_blocks);
860 871  
861 872 if (is_umount) {
862 873 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
863   - ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
  874 + ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
864 875 cp_payload_blks + data_sum_blocks +
865 876 orphan_blocks + NR_CURSEG_NODE_TYPE);
866 877 } else {
867 878 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
868   - ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
  879 + ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
869 880 cp_payload_blks + data_sum_blocks +
870 881 orphan_blocks);
871 882 }
... ... @@ -924,6 +935,9 @@
924 935 /* wait for previous submitted node/meta pages writeback */
925 936 wait_on_all_pages_writeback(sbi);
926 937  
  938 + if (unlikely(f2fs_cp_error(sbi)))
  939 + return;
  940 +
927 941 filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
928 942 filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
929 943  
930 944  
... ... @@ -934,15 +948,17 @@
934 948 /* Here, we only have one bio having CP pack */
935 949 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
936 950  
937   - if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
938   - clear_prefree_segments(sbi);
939   - release_dirty_inode(sbi);
940   - F2FS_RESET_SB_DIRT(sbi);
941   - }
  951 + release_dirty_inode(sbi);
  952 +
  953 + if (unlikely(f2fs_cp_error(sbi)))
  954 + return;
  955 +
  956 + clear_prefree_segments(sbi);
  957 + F2FS_RESET_SB_DIRT(sbi);
942 958 }
943 959  
944 960 /*
945   - * We guarantee that this checkpoint procedure should not fail.
  961 + * We guarantee that this checkpoint procedure will not fail.
946 962 */
947 963 void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
948 964 {
949 965  
... ... @@ -952,8 +968,14 @@
952 968 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
953 969  
954 970 mutex_lock(&sbi->cp_mutex);
955   - block_operations(sbi);
956 971  
  972 + if (!sbi->s_dirty)
  973 + goto out;
  974 + if (unlikely(f2fs_cp_error(sbi)))
  975 + goto out;
  976 + if (block_operations(sbi))
  977 + goto out;
  978 +
957 979 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
958 980  
959 981 f2fs_submit_merged_bio(sbi, DATA, WRITE);
960 982  
... ... @@ -976,9 +998,9 @@
976 998 do_checkpoint(sbi, is_umount);
977 999  
978 1000 unblock_operations(sbi);
979   - mutex_unlock(&sbi->cp_mutex);
980   -
981 1001 stat_inc_cp_count(sbi->stat_info);
  1002 +out:
  1003 + mutex_unlock(&sbi->cp_mutex);
982 1004 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
983 1005 }
984 1006  
... ... @@ -999,8 +1021,8 @@
999 1021 * for cp pack we can have max 1020*504 orphan entries
1000 1022 */
1001 1023 sbi->n_orphans = 0;
1002   - sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
1003   - * F2FS_ORPHANS_PER_BLOCK;
  1024 + sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
  1025 + NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
1004 1026 }
1005 1027  
1006 1028 int __init create_checkpoint_caches(void)
... ... @@ -53,7 +53,7 @@
53 53 struct page *page = bvec->bv_page;
54 54  
55 55 if (unlikely(err)) {
56   - SetPageError(page);
  56 + set_page_dirty(page);
57 57 set_bit(AS_EIO, &page->mapping->flags);
58 58 f2fs_stop_checkpoint(sbi);
59 59 }
... ... @@ -691,7 +691,7 @@
691 691 allocated = true;
692 692 blkaddr = dn.data_blkaddr;
693 693 }
694   - /* Give more consecutive addresses for the read ahead */
  694 + /* Give more consecutive addresses for the readahead */
695 695 if (blkaddr == (bh_result->b_blocknr + ofs)) {
696 696 ofs++;
697 697 dn.ofs_in_node++;
... ... @@ -739,7 +739,7 @@
739 739  
740 740 trace_f2fs_readpage(page, DATA);
741 741  
742   - /* If the file has inline data, try to read it directlly */
  742 + /* If the file has inline data, try to read it directly */
743 743 if (f2fs_has_inline_data(inode))
744 744 ret = f2fs_read_inline_data(inode, page);
745 745 else
746 746  
... ... @@ -836,10 +836,19 @@
836 836  
837 837 /* Dentry blocks are controlled by checkpoint */
838 838 if (S_ISDIR(inode->i_mode)) {
  839 + if (unlikely(f2fs_cp_error(sbi)))
  840 + goto redirty_out;
839 841 err = do_write_data_page(page, &fio);
840 842 goto done;
841 843 }
842 844  
  845 + /* we should bypass data pages to proceed the kworkder jobs */
  846 + if (unlikely(f2fs_cp_error(sbi))) {
  847 + SetPageError(page);
  848 + unlock_page(page);
  849 + return 0;
  850 + }
  851 +
843 852 if (!wbc->for_reclaim)
844 853 need_balance_fs = true;
845 854 else if (has_not_enough_free_secs(sbi, 0))
... ... @@ -927,7 +936,7 @@
927 936  
928 937 if (to > inode->i_size) {
929 938 truncate_pagecache(inode, inode->i_size);
930   - truncate_blocks(inode, inode->i_size);
  939 + truncate_blocks(inode, inode->i_size, true);
931 940 }
932 941 }
933 942  
... ... @@ -946,7 +955,7 @@
946 955  
947 956 f2fs_balance_fs(sbi);
948 957 repeat:
949   - err = f2fs_convert_inline_data(inode, pos + len);
  958 + err = f2fs_convert_inline_data(inode, pos + len, NULL);
950 959 if (err)
951 960 goto fail;
952 961  
... ... @@ -32,7 +32,7 @@
32 32 struct f2fs_stat_info *si = F2FS_STAT(sbi);
33 33 int i;
34 34  
35   - /* valid check of the segment numbers */
  35 + /* validation check of the segment numbers */
36 36 si->hit_ext = sbi->read_hit_ext;
37 37 si->total_ext = sbi->total_hit_ext;
38 38 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
... ... @@ -152,7 +152,7 @@
152 152 si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi));
153 153 si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi));
154 154  
155   - /* buld nm */
  155 + /* build nm */
156 156 si->base_mem += sizeof(struct f2fs_nm_info);
157 157 si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
158 158  
... ... @@ -124,7 +124,7 @@
124 124  
125 125 /*
126 126 * For the most part, it should be a bug when name_len is zero.
127   - * We stop here for figuring out where the bugs are occurred.
  127 + * We stop here for figuring out where the bugs has occurred.
128 128 */
129 129 f2fs_bug_on(!de->name_len);
130 130  
... ... @@ -391,7 +391,7 @@
391 391 error:
392 392 /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
393 393 truncate_inode_pages(&inode->i_data, 0);
394   - truncate_blocks(inode, 0);
  394 + truncate_blocks(inode, 0, false);
395 395 remove_dirty_dir_inode(inode);
396 396 remove_inode_page(inode);
397 397 return ERR_PTR(err);
... ... @@ -563,7 +563,7 @@
563 563 }
564 564  
565 565 /*
566   - * It only removes the dentry from the dentry page,corresponding name
  566 + * It only removes the dentry from the dentry page, corresponding name
567 567 * entry in name page does not need to be touched during deletion.
568 568 */
569 569 void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
... ... @@ -24,7 +24,7 @@
24 24 #define f2fs_bug_on(condition) BUG_ON(condition)
25 25 #define f2fs_down_write(x, y) down_write_nest_lock(x, y)
26 26 #else
27   -#define f2fs_bug_on(condition)
  27 +#define f2fs_bug_on(condition) WARN_ON(condition)
28 28 #define f2fs_down_write(x, y) down_write(x)
29 29 #endif
30 30  
... ... @@ -395,7 +395,7 @@
395 395 };
396 396  
397 397 /*
398   - * The below are the page types of bios used in submti_bio().
  398 + * The below are the page types of bios used in submit_bio().
399 399 * The available types are:
400 400 * DATA User data pages. It operates as async mode.
401 401 * NODE Node pages. It operates as async mode.
... ... @@ -470,7 +470,7 @@
470 470 struct list_head dir_inode_list; /* dir inode list */
471 471 spinlock_t dir_inode_lock; /* for dir inode list lock */
472 472  
473   - /* basic file system units */
  473 + /* basic filesystem units */
474 474 unsigned int log_sectors_per_block; /* log2 sectors per block */
475 475 unsigned int log_blocksize; /* log2 block size */
476 476 unsigned int blocksize; /* block size */
... ... @@ -799,7 +799,7 @@
799 799  
800 800 /*
801 801 * odd numbered checkpoint should at cp segment 0
802   - * and even segent must be at cp segment 1
  802 + * and even segment must be at cp segment 1
803 803 */
804 804 if (!(ckpt_version & 1))
805 805 start_addr += sbi->blocks_per_seg;
... ... @@ -1096,6 +1096,11 @@
1096 1096 return sb->s_flags & MS_RDONLY;
1097 1097 }
1098 1098  
  1099 +static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi)
  1100 +{
  1101 + return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
  1102 +}
  1103 +
1099 1104 static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
1100 1105 {
1101 1106 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
... ... @@ -1117,7 +1122,7 @@
1117 1122 */
1118 1123 int f2fs_sync_file(struct file *, loff_t, loff_t, int);
1119 1124 void truncate_data_blocks(struct dnode_of_data *);
1120   -int truncate_blocks(struct inode *, u64);
  1125 +int truncate_blocks(struct inode *, u64, bool);
1121 1126 void f2fs_truncate(struct inode *);
1122 1127 int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
1123 1128 int f2fs_setattr(struct dentry *, struct iattr *);
1124 1129  
... ... @@ -1202,10 +1207,8 @@
1202 1207 bool alloc_nid(struct f2fs_sb_info *, nid_t *);
1203 1208 void alloc_nid_done(struct f2fs_sb_info *, nid_t);
1204 1209 void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
1205   -void recover_node_page(struct f2fs_sb_info *, struct page *,
1206   - struct f2fs_summary *, struct node_info *, block_t);
1207 1210 void recover_inline_xattr(struct inode *, struct page *);
1208   -bool recover_xattr_data(struct inode *, struct page *, block_t);
  1211 +void recover_xattr_data(struct inode *, struct page *, block_t);
1209 1212 int recover_inode_page(struct f2fs_sb_info *, struct page *);
1210 1213 int restore_node_summary(struct f2fs_sb_info *, unsigned int,
1211 1214 struct f2fs_summary_block *);
... ... @@ -1238,8 +1241,6 @@
1238 1241 void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *);
1239 1242 void recover_data_page(struct f2fs_sb_info *, struct page *,
1240 1243 struct f2fs_summary *, block_t, block_t);
1241   -void rewrite_node_page(struct f2fs_sb_info *, struct page *,
1242   - struct f2fs_summary *, block_t, block_t);
1243 1244 void allocate_data_block(struct f2fs_sb_info *, struct page *,
1244 1245 block_t, block_t *, struct f2fs_summary *, int);
1245 1246 void f2fs_wait_on_page_writeback(struct page *, enum page_type);
... ... @@ -1262,6 +1263,7 @@
1262 1263 long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
1263 1264 void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
1264 1265 void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
  1266 +void release_dirty_inode(struct f2fs_sb_info *);
1265 1267 bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
1266 1268 int acquire_orphan_inode(struct f2fs_sb_info *);
1267 1269 void release_orphan_inode(struct f2fs_sb_info *);
1268 1270  
... ... @@ -1439,9 +1441,9 @@
1439 1441 */
1440 1442 bool f2fs_may_inline(struct inode *);
1441 1443 int f2fs_read_inline_data(struct inode *, struct page *);
1442   -int f2fs_convert_inline_data(struct inode *, pgoff_t);
  1444 +int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *);
1443 1445 int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
1444 1446 void truncate_inline_data(struct inode *, u64);
1445   -int recover_inline_data(struct inode *, struct page *);
  1447 +bool recover_inline_data(struct inode *, struct page *);
1446 1448 #endif
... ... @@ -41,6 +41,11 @@
41 41  
42 42 sb_start_pagefault(inode->i_sb);
43 43  
  44 + /* force to convert with normal data indices */
  45 + err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page);
  46 + if (err)
  47 + goto out;
  48 +
44 49 /* block allocation */
45 50 f2fs_lock_op(sbi);
46 51 set_new_dnode(&dn, inode, NULL, NULL, 0);
... ... @@ -110,6 +115,25 @@
110 115 return 1;
111 116 }
112 117  
  118 +static inline bool need_do_checkpoint(struct inode *inode)
  119 +{
  120 + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
  121 + bool need_cp = false;
  122 +
  123 + if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
  124 + need_cp = true;
  125 + else if (file_wrong_pino(inode))
  126 + need_cp = true;
  127 + else if (!space_for_roll_forward(sbi))
  128 + need_cp = true;
  129 + else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
  130 + need_cp = true;
  131 + else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
  132 + need_cp = true;
  133 +
  134 + return need_cp;
  135 +}
  136 +
113 137 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
114 138 {
115 139 struct inode *inode = file->f_mapping->host;
116 140  
... ... @@ -154,23 +178,12 @@
154 178 /* guarantee free sections for fsync */
155 179 f2fs_balance_fs(sbi);
156 180  
157   - down_read(&fi->i_sem);
158   -
159 181 /*
160 182 * Both of fdatasync() and fsync() are able to be recovered from
161 183 * sudden-power-off.
162 184 */
163   - if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
164   - need_cp = true;
165   - else if (file_wrong_pino(inode))
166   - need_cp = true;
167   - else if (!space_for_roll_forward(sbi))
168   - need_cp = true;
169   - else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
170   - need_cp = true;
171   - else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
172   - need_cp = true;
173   -
  185 + down_read(&fi->i_sem);
  186 + need_cp = need_do_checkpoint(inode);
174 187 up_read(&fi->i_sem);
175 188  
176 189 if (need_cp) {
... ... @@ -288,7 +301,7 @@
288 301 if (err && err != -ENOENT) {
289 302 goto fail;
290 303 } else if (err == -ENOENT) {
291   - /* direct node is not exist */
  304 + /* direct node does not exists */
292 305 if (whence == SEEK_DATA) {
293 306 pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
294 307 F2FS_I(inode));
... ... @@ -417,7 +430,7 @@
417 430 f2fs_put_page(page, 1);
418 431 }
419 432  
420   -int truncate_blocks(struct inode *inode, u64 from)
  433 +int truncate_blocks(struct inode *inode, u64 from, bool lock)
421 434 {
422 435 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
423 436 unsigned int blocksize = inode->i_sb->s_blocksize;
424 437  
... ... @@ -433,14 +446,16 @@
433 446 free_from = (pgoff_t)
434 447 ((from + blocksize - 1) >> (sbi->log_blocksize));
435 448  
436   - f2fs_lock_op(sbi);
  449 + if (lock)
  450 + f2fs_lock_op(sbi);
437 451  
438 452 set_new_dnode(&dn, inode, NULL, NULL, 0);
439 453 err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
440 454 if (err) {
441 455 if (err == -ENOENT)
442 456 goto free_next;
443   - f2fs_unlock_op(sbi);
  457 + if (lock)
  458 + f2fs_unlock_op(sbi);
444 459 trace_f2fs_truncate_blocks_exit(inode, err);
445 460 return err;
446 461 }
... ... @@ -458,7 +473,8 @@
458 473 f2fs_put_dnode(&dn);
459 474 free_next:
460 475 err = truncate_inode_blocks(inode, free_from);
461   - f2fs_unlock_op(sbi);
  476 + if (lock)
  477 + f2fs_unlock_op(sbi);
462 478 done:
463 479 /* lastly zero out the first data page */
464 480 truncate_partial_data_page(inode, from);
... ... @@ -475,7 +491,7 @@
475 491  
476 492 trace_f2fs_truncate(inode);
477 493  
478   - if (!truncate_blocks(inode, i_size_read(inode))) {
  494 + if (!truncate_blocks(inode, i_size_read(inode), true)) {
479 495 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
480 496 mark_inode_dirty(inode);
481 497 }
... ... @@ -533,7 +549,7 @@
533 549  
534 550 if ((attr->ia_valid & ATTR_SIZE) &&
535 551 attr->ia_size != i_size_read(inode)) {
536   - err = f2fs_convert_inline_data(inode, attr->ia_size);
  552 + err = f2fs_convert_inline_data(inode, attr->ia_size, NULL);
537 553 if (err)
538 554 return err;
539 555  
... ... @@ -622,7 +638,7 @@
622 638 loff_t off_start, off_end;
623 639 int ret = 0;
624 640  
625   - ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1);
  641 + ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
626 642 if (ret)
627 643 return ret;
628 644  
... ... @@ -678,7 +694,7 @@
678 694 if (ret)
679 695 return ret;
680 696  
681   - ret = f2fs_convert_inline_data(inode, offset + len);
  697 + ret = f2fs_convert_inline_data(inode, offset + len, NULL);
682 698 if (ret)
683 699 return ret;
684 700  
... ... @@ -58,7 +58,7 @@
58 58 * 3. IO subsystem is idle by checking the # of requests in
59 59 * bdev's request list.
60 60 *
61   - * Note) We have to avoid triggering GCs too much frequently.
  61 + * Note) We have to avoid triggering GCs frequently.
62 62 * Because it is possible that some segments can be
63 63 * invalidated soon after by user update or deletion.
64 64 * So, I'd like to wait some time to collect dirty segments.
... ... @@ -222,7 +222,7 @@
222 222  
223 223 u = (vblocks * 100) >> sbi->log_blocks_per_seg;
224 224  
225   - /* Handle if the system time is changed by user */
  225 + /* Handle if the system time has changed by the user */
226 226 if (mtime < sit_i->min_mtime)
227 227 sit_i->min_mtime = mtime;
228 228 if (mtime > sit_i->max_mtime)
... ... @@ -593,7 +593,7 @@
593 593  
594 594 if (phase == 2) {
595 595 inode = f2fs_iget(sb, dni.ino);
596   - if (IS_ERR(inode))
  596 + if (IS_ERR(inode) || is_bad_inode(inode))
597 597 continue;
598 598  
599 599 start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
... ... @@ -693,7 +693,7 @@
693 693 gc_more:
694 694 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
695 695 goto stop;
696   - if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
  696 + if (unlikely(f2fs_cp_error(sbi)))
697 697 goto stop;
698 698  
699 699 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
... ... @@ -91,7 +91,7 @@
91 91 block_t invalid_user_blocks = sbi->user_block_count -
92 92 written_block_count(sbi);
93 93 /*
94   - * Background GC is triggered with the following condition.
  94 + * Background GC is triggered with the following conditions.
95 95 * 1. There are a number of invalid blocks.
96 96 * 2. There is not enough free space.
97 97 */
... ... @@ -42,7 +42,8 @@
42 42 buf[1] += b1;
43 43 }
44 44  
45   -static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)
  45 +static void str2hashbuf(const unsigned char *msg, size_t len,
  46 + unsigned int *buf, int num)
46 47 {
47 48 unsigned pad, val;
48 49 int i;
49 50  
... ... @@ -73,9 +74,9 @@
73 74 {
74 75 __u32 hash;
75 76 f2fs_hash_t f2fs_hash;
76   - const char *p;
  77 + const unsigned char *p;
77 78 __u32 in[8], buf[4];
78   - const char *name = name_info->name;
  79 + const unsigned char *name = name_info->name;
79 80 size_t len = name_info->len;
80 81  
81 82 if ((len <= 2) && (name[0] == '.') &&
... ... @@ -68,7 +68,7 @@
68 68  
69 69 static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
70 70 {
71   - int err;
  71 + int err = 0;
72 72 struct page *ipage;
73 73 struct dnode_of_data dn;
74 74 void *src_addr, *dst_addr;
... ... @@ -86,6 +86,10 @@
86 86 goto out;
87 87 }
88 88  
  89 + /* someone else converted inline_data already */
  90 + if (!f2fs_has_inline_data(inode))
  91 + goto out;
  92 +
89 93 /*
90 94 * i_addr[0] is not used for inline data,
91 95 * so reserving new block will not destroy inline data
92 96  
... ... @@ -124,9 +128,10 @@
124 128 return err;
125 129 }
126 130  
127   -int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size)
  131 +int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size,
  132 + struct page *page)
128 133 {
129   - struct page *page;
  134 + struct page *new_page = page;
130 135 int err;
131 136  
132 137 if (!f2fs_has_inline_data(inode))
133 138  
134 139  
... ... @@ -134,17 +139,20 @@
134 139 else if (to_size <= MAX_INLINE_DATA)
135 140 return 0;
136 141  
137   - page = grab_cache_page(inode->i_mapping, 0);
138   - if (!page)
139   - return -ENOMEM;
  142 + if (!page || page->index != 0) {
  143 + new_page = grab_cache_page(inode->i_mapping, 0);
  144 + if (!new_page)
  145 + return -ENOMEM;
  146 + }
140 147  
141   - err = __f2fs_convert_inline_data(inode, page);
142   - f2fs_put_page(page, 1);
  148 + err = __f2fs_convert_inline_data(inode, new_page);
  149 + if (!page || page->index != 0)
  150 + f2fs_put_page(new_page, 1);
143 151 return err;
144 152 }
145 153  
146 154 int f2fs_write_inline_data(struct inode *inode,
147   - struct page *page, unsigned size)
  155 + struct page *page, unsigned size)
148 156 {
149 157 void *src_addr, *dst_addr;
150 158 struct page *ipage;
... ... @@ -199,7 +207,7 @@
199 207 f2fs_put_page(ipage, 1);
200 208 }
201 209  
202   -int recover_inline_data(struct inode *inode, struct page *npage)
  210 +bool recover_inline_data(struct inode *inode, struct page *npage)
203 211 {
204 212 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
205 213 struct f2fs_inode *ri = NULL;
... ... @@ -218,7 +226,7 @@
218 226 ri = F2FS_INODE(npage);
219 227  
220 228 if (f2fs_has_inline_data(inode) &&
221   - ri && ri->i_inline & F2FS_INLINE_DATA) {
  229 + ri && (ri->i_inline & F2FS_INLINE_DATA)) {
222 230 process_inline:
223 231 ipage = get_node_page(sbi, inode->i_ino);
224 232 f2fs_bug_on(IS_ERR(ipage));
... ... @@ -230,7 +238,7 @@
230 238 memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
231 239 update_inode(inode, ipage);
232 240 f2fs_put_page(ipage, 1);
233   - return -1;
  241 + return true;
234 242 }
235 243  
236 244 if (f2fs_has_inline_data(inode)) {
237 245  
... ... @@ -242,11 +250,11 @@
242 250 clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
243 251 update_inode(inode, ipage);
244 252 f2fs_put_page(ipage, 1);
245   - } else if (ri && ri->i_inline & F2FS_INLINE_DATA) {
246   - truncate_blocks(inode, 0);
  253 + } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
  254 + truncate_blocks(inode, 0, false);
247 255 set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
248 256 goto process_inline;
249 257 }
250   - return 0;
  258 + return false;
251 259 }
... ... @@ -134,9 +134,7 @@
134 134 return 0;
135 135 out:
136 136 clear_nlink(inode);
137   - unlock_new_inode(inode);
138   - make_bad_inode(inode);
139   - iput(inode);
  137 + iget_failed(inode);
140 138 alloc_nid_failed(sbi, ino);
141 139 return err;
142 140 }
... ... @@ -229,7 +227,7 @@
229 227 f2fs_delete_entry(de, page, inode);
230 228 f2fs_unlock_op(sbi);
231 229  
232   - /* In order to evict this inode, we set it dirty */
  230 + /* In order to evict this inode, we set it dirty */
233 231 mark_inode_dirty(inode);
234 232 fail:
235 233 trace_f2fs_unlink_exit(inode, err);
... ... @@ -267,9 +265,7 @@
267 265 return err;
268 266 out:
269 267 clear_nlink(inode);
270   - unlock_new_inode(inode);
271   - make_bad_inode(inode);
272   - iput(inode);
  268 + iget_failed(inode);
273 269 alloc_nid_failed(sbi, inode->i_ino);
274 270 return err;
275 271 }
... ... @@ -308,9 +304,7 @@
308 304 out_fail:
309 305 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
310 306 clear_nlink(inode);
311   - unlock_new_inode(inode);
312   - make_bad_inode(inode);
313   - iput(inode);
  307 + iget_failed(inode);
314 308 alloc_nid_failed(sbi, inode->i_ino);
315 309 return err;
316 310 }
... ... @@ -354,9 +348,7 @@
354 348 return 0;
355 349 out:
356 350 clear_nlink(inode);
357   - unlock_new_inode(inode);
358   - make_bad_inode(inode);
359   - iput(inode);
  351 + iget_failed(inode);
360 352 alloc_nid_failed(sbi, inode->i_ino);
361 353 return err;
362 354 }
... ... @@ -688,9 +680,7 @@
688 680 out:
689 681 f2fs_unlock_op(sbi);
690 682 clear_nlink(inode);
691   - unlock_new_inode(inode);
692   - make_bad_inode(inode);
693   - iput(inode);
  683 + iget_failed(inode);
694 684 alloc_nid_failed(sbi, inode->i_ino);
695 685 return err;
696 686 }
... ... @@ -704,7 +694,6 @@
704 694 .mkdir = f2fs_mkdir,
705 695 .rmdir = f2fs_rmdir,
706 696 .mknod = f2fs_mknod,
707   - .rename = f2fs_rename,
708 697 .rename2 = f2fs_rename2,
709 698 .tmpfile = f2fs_tmpfile,
710 699 .getattr = f2fs_getattr,
... ... @@ -237,7 +237,7 @@
237 237 nat_get_blkaddr(e) != NULL_ADDR &&
238 238 new_blkaddr == NEW_ADDR);
239 239  
240   - /* increament version no as node is removed */
  240 + /* increment version no as node is removed */
241 241 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
242 242 unsigned char version = nat_get_version(e);
243 243 nat_set_version(e, inc_node_version(version));
... ... @@ -274,7 +274,7 @@
274 274 }
275 275  
276 276 /*
277   - * This function returns always success
  277 + * This function always returns success
278 278 */
279 279 void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
280 280 {
... ... @@ -650,7 +650,7 @@
650 650  
651 651 /* get indirect nodes in the path */
652 652 for (i = 0; i < idx + 1; i++) {
653   - /* refernece count'll be increased */
  653 + /* reference count'll be increased */
654 654 pages[i] = get_node_page(sbi, nid[i]);
655 655 if (IS_ERR(pages[i])) {
656 656 err = PTR_ERR(pages[i]);
657 657  
658 658  
659 659  
660 660  
... ... @@ -823,22 +823,26 @@
823 823 */
824 824 void remove_inode_page(struct inode *inode)
825 825 {
826   - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
827   - struct page *page;
828   - nid_t ino = inode->i_ino;
829 826 struct dnode_of_data dn;
830 827  
831   - page = get_node_page(sbi, ino);
832   - if (IS_ERR(page))
  828 + set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
  829 + if (get_dnode_of_data(&dn, 0, LOOKUP_NODE))
833 830 return;
834 831  
835   - if (truncate_xattr_node(inode, page)) {
836   - f2fs_put_page(page, 1);
  832 + if (truncate_xattr_node(inode, dn.inode_page)) {
  833 + f2fs_put_dnode(&dn);
837 834 return;
838 835 }
839   - /* 0 is possible, after f2fs_new_inode() is failed */
  836 +
  837 + /* remove potential inline_data blocks */
  838 + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
  839 + S_ISLNK(inode->i_mode))
  840 + truncate_data_blocks_range(&dn, 1);
  841 +
  842 + /* 0 is possible, after f2fs_new_inode() has failed */
840 843 f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);
841   - set_new_dnode(&dn, inode, page, page, ino);
  844 +
  845 + /* will put inode & node pages */
842 846 truncate_node(&dn);
843 847 }
844 848  
845 849  
... ... @@ -1129,9 +1133,12 @@
1129 1133 set_fsync_mark(page, 0);
1130 1134 set_dentry_mark(page, 0);
1131 1135 }
1132   - NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
1133   - wrote++;
1134 1136  
  1137 + if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
  1138 + unlock_page(page);
  1139 + else
  1140 + wrote++;
  1141 +
1135 1142 if (--wbc->nr_to_write == 0)
1136 1143 break;
1137 1144 }
... ... @@ -1212,6 +1219,8 @@
1212 1219  
1213 1220 if (unlikely(sbi->por_doing))
1214 1221 goto redirty_out;
  1222 + if (unlikely(f2fs_cp_error(sbi)))
  1223 + goto redirty_out;
1215 1224  
1216 1225 f2fs_wait_on_page_writeback(page, NODE);
1217 1226  
... ... @@ -1540,15 +1549,6 @@
1540 1549 kmem_cache_free(free_nid_slab, i);
1541 1550 }
1542 1551  
1543   -void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1544   - struct f2fs_summary *sum, struct node_info *ni,
1545   - block_t new_blkaddr)
1546   -{
1547   - rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
1548   - set_node_addr(sbi, ni, new_blkaddr, false);
1549   - clear_node_page_dirty(page);
1550   -}
1551   -
1552 1552 void recover_inline_xattr(struct inode *inode, struct page *page)
1553 1553 {
1554 1554 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1555 1555  
1556 1556  
1557 1557  
1558 1558  
... ... @@ -1557,40 +1557,33 @@
1557 1557 struct page *ipage;
1558 1558 struct f2fs_inode *ri;
1559 1559  
1560   - if (!f2fs_has_inline_xattr(inode))
1561   - return;
1562   -
1563   - if (!IS_INODE(page))
1564   - return;
1565   -
1566   - ri = F2FS_INODE(page);
1567   - if (!(ri->i_inline & F2FS_INLINE_XATTR))
1568   - return;
1569   -
1570 1560 ipage = get_node_page(sbi, inode->i_ino);
1571 1561 f2fs_bug_on(IS_ERR(ipage));
1572 1562  
  1563 + ri = F2FS_INODE(page);
  1564 + if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
  1565 + clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR);
  1566 + goto update_inode;
  1567 + }
  1568 +
1573 1569 dst_addr = inline_xattr_addr(ipage);
1574 1570 src_addr = inline_xattr_addr(page);
1575 1571 inline_size = inline_xattr_size(inode);
1576 1572  
1577 1573 f2fs_wait_on_page_writeback(ipage, NODE);
1578 1574 memcpy(dst_addr, src_addr, inline_size);
1579   -
  1575 +update_inode:
1580 1576 update_inode(inode, ipage);
1581 1577 f2fs_put_page(ipage, 1);
1582 1578 }
1583 1579  
1584   -bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
  1580 +void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1585 1581 {
1586 1582 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1587 1583 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
1588 1584 nid_t new_xnid = nid_of_node(page);
1589 1585 struct node_info ni;
1590 1586  
1591   - if (!f2fs_has_xattr_block(ofs_of_node(page)))
1592   - return false;
1593   -
1594 1587 /* 1: invalidate the previous xattr nid */
1595 1588 if (!prev_xnid)
1596 1589 goto recover_xnid;
... ... @@ -1618,7 +1611,6 @@
1618 1611 set_node_addr(sbi, &ni, blkaddr, false);
1619 1612  
1620 1613 update_inode_page(inode);
1621   - return true;
1622 1614 }
1623 1615  
1624 1616 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
... ... @@ -1637,7 +1629,7 @@
1637 1629 if (!ipage)
1638 1630 return -ENOMEM;
1639 1631  
1640   - /* Should not use this inode from free nid list */
  1632 + /* Should not use this inode from free nid list */
1641 1633 remove_free_nid(NM_I(sbi), ino);
1642 1634  
1643 1635 SetPageUptodate(ipage);
... ... @@ -1651,6 +1643,7 @@
1651 1643 dst->i_blocks = cpu_to_le64(1);
1652 1644 dst->i_links = cpu_to_le32(1);
1653 1645 dst->i_xattr_nid = 0;
  1646 + dst->i_inline = src->i_inline & F2FS_INLINE_XATTR;
1654 1647  
1655 1648 new_ni = old_ni;
1656 1649 new_ni.ino = ino;
1657 1650  
... ... @@ -1659,13 +1652,14 @@
1659 1652 WARN_ON(1);
1660 1653 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1661 1654 inc_valid_inode_count(sbi);
  1655 + set_page_dirty(ipage);
1662 1656 f2fs_put_page(ipage, 1);
1663 1657 return 0;
1664 1658 }
1665 1659  
1666 1660 /*
1667 1661 * ra_sum_pages() merge contiguous pages into one bio and submit.
1668   - * these pre-readed pages are alloced in bd_inode's mapping tree.
  1662 + * these pre-read pages are allocated in bd_inode's mapping tree.
1669 1663 */
1670 1664 static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
1671 1665 int start, int nrpages)
... ... @@ -1709,7 +1703,7 @@
1709 1703 for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
1710 1704 nrpages = min(last_offset - i, bio_blocks);
1711 1705  
1712   - /* read ahead node pages */
  1706 + /* readahead node pages */
1713 1707 nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
1714 1708 if (!nrpages)
1715 1709 return -ENOMEM;
... ... @@ -1967,7 +1961,7 @@
1967 1961 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
1968 1962  
1969 1963 /* not used nids: 0, node, meta, (and root counted as valid node) */
1970   - nm_i->available_nids = nm_i->max_nid - 3;
  1964 + nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
1971 1965 nm_i->fcnt = 0;
1972 1966 nm_i->nat_cnt = 0;
1973 1967 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
... ... @@ -62,8 +62,10 @@
62 62 }
63 63 retry:
64 64 de = f2fs_find_entry(dir, &name, &page);
65   - if (de && inode->i_ino == le32_to_cpu(de->ino))
  65 + if (de && inode->i_ino == le32_to_cpu(de->ino)) {
  66 + clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
66 67 goto out_unmap_put;
  68 + }
67 69 if (de) {
68 70 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
69 71 if (IS_ERR(einode)) {
70 72  
71 73  
... ... @@ -300,14 +302,19 @@
300 302 struct node_info ni;
301 303 int err = 0, recovered = 0;
302 304  
303   - recover_inline_xattr(inode, page);
  305 + /* step 1: recover xattr */
  306 + if (IS_INODE(page)) {
  307 + recover_inline_xattr(inode, page);
  308 + } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
  309 + recover_xattr_data(inode, page, blkaddr);
  310 + goto out;
  311 + }
304 312  
  313 + /* step 2: recover inline data */
305 314 if (recover_inline_data(inode, page))
306 315 goto out;
307 316  
308   - if (recover_xattr_data(inode, page, blkaddr))
309   - goto out;
310   -
  317 + /* step 3: recover data indices */
311 318 start = start_bidx_of_node(ofs_of_node(page), fi);
312 319 end = start + ADDRS_PER_PAGE(page, fi);
313 320  
... ... @@ -364,8 +371,6 @@
364 371 fill_node_footer(dn.node_page, dn.nid, ni.ino,
365 372 ofs_of_node(page), false);
366 373 set_page_dirty(dn.node_page);
367   -
368   - recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
369 374 err:
370 375 f2fs_put_dnode(&dn);
371 376 f2fs_unlock_op(sbi);
... ... @@ -452,6 +457,9 @@
452 457 /* step #1: find fsynced inode numbers */
453 458 sbi->por_doing = true;
454 459  
  460 + /* prevent checkpoint */
  461 + mutex_lock(&sbi->cp_mutex);
  462 +
455 463 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
456 464  
457 465 err = find_fsync_dnodes(sbi, &inode_list);
... ... @@ -465,7 +473,8 @@
465 473  
466 474 /* step #2: recover data */
467 475 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
468   - f2fs_bug_on(!list_empty(&inode_list));
  476 + if (!err)
  477 + f2fs_bug_on(!list_empty(&inode_list));
469 478 out:
470 479 destroy_fsync_dnodes(&inode_list);
471 480 kmem_cache_destroy(fsync_entry_slab);
472 481  
473 482  
... ... @@ -482,8 +491,13 @@
482 491 /* Flush all the NAT/SIT pages */
483 492 while (get_pages(sbi, F2FS_DIRTY_META))
484 493 sync_meta_pages(sbi, META, LONG_MAX);
  494 + set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
  495 + mutex_unlock(&sbi->cp_mutex);
485 496 } else if (need_writecp) {
  497 + mutex_unlock(&sbi->cp_mutex);
486 498 write_checkpoint(sbi, false);
  499 + } else {
  500 + mutex_unlock(&sbi->cp_mutex);
487 501 }
488 502 return err;
489 503 }
... ... @@ -62,7 +62,7 @@
62 62 }
63 63  
64 64 /*
65   - * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue
  65 + * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
66 66 * f2fs_set_bit makes MSB and LSB reversed in a byte.
67 67 * Example:
68 68 * LSB <--> MSB
... ... @@ -808,7 +808,7 @@
808 808 }
809 809  
810 810 /*
811   - * This function always allocates a used segment (from dirty seglist) by SSR
  811 + * This function always allocates a used segment(from dirty seglist) by SSR
812 812 * manner, so it should recover the existing segment information of valid blocks
813 813 */
814 814 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
... ... @@ -1096,55 +1096,6 @@
1096 1096 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1097 1097 __add_sum_entry(sbi, type, sum);
1098 1098  
1099   - refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1100   - locate_dirty_segment(sbi, old_cursegno);
1101   -
1102   - mutex_unlock(&sit_i->sentry_lock);
1103   - mutex_unlock(&curseg->curseg_mutex);
1104   -}
1105   -
1106   -void rewrite_node_page(struct f2fs_sb_info *sbi,
1107   - struct page *page, struct f2fs_summary *sum,
1108   - block_t old_blkaddr, block_t new_blkaddr)
1109   -{
1110   - struct sit_info *sit_i = SIT_I(sbi);
1111   - int type = CURSEG_WARM_NODE;
1112   - struct curseg_info *curseg;
1113   - unsigned int segno, old_cursegno;
1114   - block_t next_blkaddr = next_blkaddr_of_node(page);
1115   - unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
1116   - struct f2fs_io_info fio = {
1117   - .type = NODE,
1118   - .rw = WRITE_SYNC,
1119   - };
1120   -
1121   - curseg = CURSEG_I(sbi, type);
1122   -
1123   - mutex_lock(&curseg->curseg_mutex);
1124   - mutex_lock(&sit_i->sentry_lock);
1125   -
1126   - segno = GET_SEGNO(sbi, new_blkaddr);
1127   - old_cursegno = curseg->segno;
1128   -
1129   - /* change the current segment */
1130   - if (segno != curseg->segno) {
1131   - curseg->next_segno = segno;
1132   - change_curseg(sbi, type, true);
1133   - }
1134   - curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1135   - __add_sum_entry(sbi, type, sum);
1136   -
1137   - /* change the current log to the next block addr in advance */
1138   - if (next_segno != segno) {
1139   - curseg->next_segno = next_segno;
1140   - change_curseg(sbi, type, true);
1141   - }
1142   - curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);
1143   -
1144   - /* rewrite node page */
1145   - set_page_writeback(page);
1146   - f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
1147   - f2fs_submit_merged_bio(sbi, NODE, WRITE);
1148 1099 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1149 1100 locate_dirty_segment(sbi, old_cursegno);
1150 1101  
... ... @@ -549,7 +549,7 @@
549 549 }
550 550  
551 551 /*
552   - * Summary block is always treated as invalid block
  552 + * Summary block is always treated as an invalid block
553 553 */
554 554 static inline void check_block_count(struct f2fs_sb_info *sbi,
555 555 int segno, struct f2fs_sit_entry *raw_sit)
... ... @@ -432,9 +432,15 @@
432 432 stop_gc_thread(sbi);
433 433  
434 434 /* We don't need to do checkpoint when it's clean */
435   - if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES))
  435 + if (sbi->s_dirty)
436 436 write_checkpoint(sbi, true);
437 437  
  438 + /*
  439 + * normally superblock is clean, so we need to release this.
  440 + * In addition, EIO will skip do checkpoint, we need this as well.
  441 + */
  442 + release_dirty_inode(sbi);
  443 +
438 444 iput(sbi->node_inode);
439 445 iput(sbi->meta_inode);
440 446  
... ... @@ -457,9 +463,6 @@
457 463  
458 464 trace_f2fs_sync_fs(sb, sync);
459 465  
460   - if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
461   - return 0;
462   -
463 466 if (sync) {
464 467 mutex_lock(&sbi->gc_mutex);
465 468 write_checkpoint(sbi, false);
... ... @@ -505,8 +508,8 @@
505 508 buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
506 509 buf->f_bavail = user_block_count - valid_user_blocks(sbi);
507 510  
508   - buf->f_files = sbi->total_node_count;
509   - buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi);
  511 + buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
  512 + buf->f_ffree = buf->f_files - valid_inode_count(sbi);
510 513  
511 514 buf->f_namelen = F2FS_NAME_LEN;
512 515 buf->f_fsid.val[0] = (u32)id;
... ... @@ -663,7 +666,7 @@
663 666 if (need_restart_gc) {
664 667 if (start_gc_thread(sbi))
665 668 f2fs_msg(sbi->sb, KERN_WARNING,
666   - "background gc thread is stop");
  669 + "background gc thread has stopped");
667 670 } else if (need_stop_gc) {
668 671 stop_gc_thread(sbi);
669 672 }
... ... @@ -812,7 +815,7 @@
812 815 if (unlikely(fsmeta >= total))
813 816 return 1;
814 817  
815   - if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
  818 + if (unlikely(f2fs_cp_error(sbi))) {
816 819 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
817 820 return 1;
818 821 }
819 822  
... ... @@ -899,8 +902,10 @@
899 902 struct buffer_head *raw_super_buf;
900 903 struct inode *root;
901 904 long err = -EINVAL;
  905 + bool retry = true;
902 906 int i;
903 907  
  908 +try_onemore:
904 909 /* allocate memory for f2fs-specific super block info */
905 910 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
906 911 if (!sbi)
907 912  
... ... @@ -1080,9 +1085,11 @@
1080 1085 /* recover fsynced data */
1081 1086 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
1082 1087 err = recover_fsync_data(sbi);
1083   - if (err)
  1088 + if (err) {
1084 1089 f2fs_msg(sb, KERN_ERR,
1085 1090 "Cannot recover all fsync data errno=%ld", err);
  1091 + goto free_kobj;
  1092 + }
1086 1093 }
1087 1094  
1088 1095 /*
... ... @@ -1123,6 +1130,13 @@
1123 1130 brelse(raw_super_buf);
1124 1131 free_sbi:
1125 1132 kfree(sbi);
  1133 +
  1134 + /* give only one another chance */
  1135 + if (retry) {
  1136 + retry = 0;
  1137 + shrink_dcache_sb(sb);
  1138 + goto try_onemore;
  1139 + }
1126 1140 return err;
1127 1141 }
1128 1142  
... ... @@ -528,7 +528,7 @@
528 528 int free;
529 529 /*
530 530 * If value is NULL, it is remove operation.
531   - * In case of update operation, we caculate free.
  531 + * In case of update operation, we calculate free.
532 532 */
533 533 free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);
534 534 if (found)
include/linux/f2fs_fs.h
... ... @@ -24,6 +24,9 @@
24 24 #define NULL_ADDR ((block_t)0) /* used as block_t addresses */
25 25 #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */
26 26  
  27 +/* 0, 1(node nid), 2(meta nid) are reserved node id */
  28 +#define F2FS_RESERVED_NODE_NUM 3
  29 +
27 30 #define F2FS_ROOT_INO(sbi) (sbi->root_ino_num)
28 31 #define F2FS_NODE_INO(sbi) (sbi->node_ino_num)
29 32 #define F2FS_META_INO(sbi) (sbi->meta_ino_num)
... ... @@ -87,6 +90,8 @@
87 90 #define CP_ORPHAN_PRESENT_FLAG 0x00000002
88 91 #define CP_UMOUNT_FLAG 0x00000001
89 92  
  93 +#define F2FS_CP_PACKS 2 /* # of checkpoint packs */
  94 +
90 95 struct f2fs_checkpoint {
91 96 __le64 checkpoint_ver; /* checkpoint block version number */
92 97 __le64 user_block_count; /* # of user blocks */
... ... @@ -123,6 +128,9 @@
123 128 */
124 129 #define F2FS_ORPHANS_PER_BLOCK 1020
125 130  
  131 +#define GET_ORPHAN_BLOCKS(n) ((n + F2FS_ORPHANS_PER_BLOCK - 1) / \
  132 + F2FS_ORPHANS_PER_BLOCK)
  133 +
126 134 struct f2fs_orphan_block {
127 135 __le32 ino[F2FS_ORPHANS_PER_BLOCK]; /* inode numbers */
128 136 __le32 reserved; /* reserved */
... ... @@ -144,6 +152,7 @@
144 152 #define F2FS_NAME_LEN 255
145 153 #define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */
146 154 #define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */
  155 +#define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */
147 156 #define ADDRS_PER_INODE(fi) addrs_per_inode(fi)
148 157 #define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */
149 158 #define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */
... ... @@ -163,8 +172,9 @@
163 172 #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \
164 173 F2FS_INLINE_XATTR_ADDRS - 1))
165 174  
166   -#define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) \
167   - - sizeof(__le32) * (DEF_ADDRS_PER_INODE + 5 - 1))
  175 +#define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) -\
  176 + sizeof(__le32) * (DEF_ADDRS_PER_INODE + \
  177 + DEF_NIDS_PER_INODE - 1))
168 178  
169 179 struct f2fs_inode {
170 180 __le16 i_mode; /* file mode */
... ... @@ -194,7 +204,7 @@
194 204  
195 205 __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */
196 206  
197   - __le32 i_nid[5]; /* direct(2), indirect(2),
  207 + __le32 i_nid[DEF_NIDS_PER_INODE]; /* direct(2), indirect(2),
198 208 double_indirect(1) node id */
199 209 } __packed;
200 210