Commit 88b88a66797159949cec32eaab12b4968f6fae2d
1 parent
120c2cba1d
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
f2fs: support atomic writes
This patch introduces a very limited functionality for atomic write support. In order to support atomic write, this patch adds two ioctls: o F2FS_IOC_START_ATOMIC_WRITE o F2FS_IOC_COMMIT_ATOMIC_WRITE The database engine should be aware of the following sequence. 1. open -> ioctl(F2FS_IOC_START_ATOMIC_WRITE); 2. writes : all the written data will be treated as atomic pages. 3. commit -> ioctl(F2FS_IOC_COMMIT_ATOMIC_WRITE); : this flushes all the data blocks to the disk, which will be shown all or nothing by f2fs recovery procedure. 4. repeat to #2. The IO pattens should be: ,- START_ATOMIC_WRITE ,- COMMIT_ATOMIC_WRITE CP | D D D D D D | FSYNC | D D D D | FSYNC ... `- COMMIT_ATOMIC_WRITE Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Showing 8 changed files with 139 additions and 5 deletions Side-by-side Diff
fs/f2fs/data.c
... | ... | @@ -1052,7 +1052,10 @@ |
1052 | 1052 | |
1053 | 1053 | trace_f2fs_write_end(inode, pos, len, copied); |
1054 | 1054 | |
1055 | - set_page_dirty(page); | |
1055 | + if (f2fs_is_atomic_file(inode)) | |
1056 | + register_inmem_page(inode, page); | |
1057 | + else | |
1058 | + set_page_dirty(page); | |
1056 | 1059 | |
1057 | 1060 | if (pos + copied > i_size_read(inode)) { |
1058 | 1061 | i_size_write(inode, pos + copied); |
fs/f2fs/f2fs.h
... | ... | @@ -192,9 +192,13 @@ |
192 | 192 | /* |
193 | 193 | * ioctl commands |
194 | 194 | */ |
195 | -#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS | |
196 | -#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS | |
195 | +#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS | |
196 | +#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS | |
197 | 197 | |
198 | +#define F2FS_IOCTL_MAGIC 0xf5 | |
199 | +#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) | |
200 | +#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) | |
201 | + | |
198 | 202 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
199 | 203 | /* |
200 | 204 | * ioctl commands in 32 bit emulation |
... | ... | @@ -263,6 +267,9 @@ |
263 | 267 | unsigned long long xattr_ver; /* cp version of xattr modification */ |
264 | 268 | struct extent_info ext; /* in-memory extent cache entry */ |
265 | 269 | struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ |
270 | + | |
271 | + struct list_head inmem_pages; /* inmemory pages managed by f2fs */ | |
272 | + struct mutex inmem_lock; /* lock for inmemory pages */ | |
266 | 273 | }; |
267 | 274 | |
268 | 275 | static inline void get_extent_info(struct extent_info *ext, |
... | ... | @@ -1051,7 +1058,8 @@ |
1051 | 1058 | FI_INLINE_DATA, /* used for inline data*/ |
1052 | 1059 | FI_APPEND_WRITE, /* inode has appended data */ |
1053 | 1060 | FI_UPDATE_WRITE, /* inode has in-place-update data */ |
1054 | - FI_NEED_IPU, /* used fo ipu for fdatasync */ | |
1061 | + FI_NEED_IPU, /* used for ipu per file */ | |
1062 | + FI_ATOMIC_FILE, /* indicate atomic file */ | |
1055 | 1063 | }; |
1056 | 1064 | |
1057 | 1065 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) |
... | ... | @@ -1138,6 +1146,11 @@ |
1138 | 1146 | return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); |
1139 | 1147 | } |
1140 | 1148 | |
1149 | +static inline bool f2fs_is_atomic_file(struct inode *inode) | |
1150 | +{ | |
1151 | + return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); | |
1152 | +} | |
1153 | + | |
1141 | 1154 | static inline void *inline_data_addr(struct page *page) |
1142 | 1155 | { |
1143 | 1156 | struct f2fs_inode *ri = F2FS_INODE(page); |
... | ... | @@ -1275,6 +1288,8 @@ |
1275 | 1288 | /* |
1276 | 1289 | * segment.c |
1277 | 1290 | */ |
1291 | +void register_inmem_page(struct inode *, struct page *); | |
1292 | +void commit_inmem_pages(struct inode *, bool); | |
1278 | 1293 | void f2fs_balance_fs(struct f2fs_sb_info *); |
1279 | 1294 | void f2fs_balance_fs_bg(struct f2fs_sb_info *); |
1280 | 1295 | int f2fs_issue_flush(struct f2fs_sb_info *); |
fs/f2fs/file.c
... | ... | @@ -862,6 +862,41 @@ |
862 | 862 | return ret; |
863 | 863 | } |
864 | 864 | |
865 | +static int f2fs_ioc_start_atomic_write(struct file *filp) | |
866 | +{ | |
867 | + struct inode *inode = file_inode(filp); | |
868 | + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
869 | + | |
870 | + if (!inode_owner_or_capable(inode)) | |
871 | + return -EACCES; | |
872 | + | |
873 | + f2fs_balance_fs(sbi); | |
874 | + | |
875 | + set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); | |
876 | + | |
877 | + return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); | |
878 | +} | |
879 | + | |
880 | +static int f2fs_ioc_commit_atomic_write(struct file *filp) | |
881 | +{ | |
882 | + struct inode *inode = file_inode(filp); | |
883 | + int ret; | |
884 | + | |
885 | + if (!inode_owner_or_capable(inode)) | |
886 | + return -EACCES; | |
887 | + | |
888 | + ret = mnt_want_write_file(filp); | |
889 | + if (ret) | |
890 | + return ret; | |
891 | + | |
892 | + if (f2fs_is_atomic_file(inode)) | |
893 | + commit_inmem_pages(inode, false); | |
894 | + | |
895 | + ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); | |
896 | + mnt_drop_write_file(filp); | |
897 | + return ret; | |
898 | +} | |
899 | + | |
865 | 900 | static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) |
866 | 901 | { |
867 | 902 | struct inode *inode = file_inode(filp); |
... | ... | @@ -899,6 +934,10 @@ |
899 | 934 | return f2fs_ioc_getflags(filp, arg); |
900 | 935 | case F2FS_IOC_SETFLAGS: |
901 | 936 | return f2fs_ioc_setflags(filp, arg); |
937 | + case F2FS_IOC_START_ATOMIC_WRITE: | |
938 | + return f2fs_ioc_start_atomic_write(filp); | |
939 | + case F2FS_IOC_COMMIT_ATOMIC_WRITE: | |
940 | + return f2fs_ioc_commit_atomic_write(filp); | |
902 | 941 | case FITRIM: |
903 | 942 | return f2fs_ioc_fitrim(filp, arg); |
904 | 943 | default: |
fs/f2fs/inline.c
fs/f2fs/inode.c
... | ... | @@ -269,6 +269,10 @@ |
269 | 269 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
270 | 270 | nid_t xnid = F2FS_I(inode)->i_xattr_nid; |
271 | 271 | |
272 | + /* some remained atomic pages should discarded */ | |
273 | + if (f2fs_is_atomic_file(inode)) | |
274 | + commit_inmem_pages(inode, true); | |
275 | + | |
272 | 276 | trace_f2fs_evict_inode(inode); |
273 | 277 | truncate_inode_pages_final(&inode->i_data); |
274 | 278 |
fs/f2fs/segment.c
... | ... | @@ -26,6 +26,7 @@ |
26 | 26 | |
27 | 27 | static struct kmem_cache *discard_entry_slab; |
28 | 28 | static struct kmem_cache *sit_entry_set_slab; |
29 | +static struct kmem_cache *inmem_entry_slab; | |
29 | 30 | |
30 | 31 | /* |
31 | 32 | * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since |
... | ... | @@ -173,6 +174,60 @@ |
173 | 174 | return result + __reverse_ffz(tmp); |
174 | 175 | } |
175 | 176 | |
177 | +void register_inmem_page(struct inode *inode, struct page *page) | |
178 | +{ | |
179 | + struct f2fs_inode_info *fi = F2FS_I(inode); | |
180 | + struct inmem_pages *new; | |
181 | + | |
182 | + new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); | |
183 | + | |
184 | + /* add atomic page indices to the list */ | |
185 | + new->page = page; | |
186 | + INIT_LIST_HEAD(&new->list); | |
187 | + | |
188 | + /* increase reference count with clean state */ | |
189 | + mutex_lock(&fi->inmem_lock); | |
190 | + get_page(page); | |
191 | + list_add_tail(&new->list, &fi->inmem_pages); | |
192 | + mutex_unlock(&fi->inmem_lock); | |
193 | +} | |
194 | + | |
195 | +void commit_inmem_pages(struct inode *inode, bool abort) | |
196 | +{ | |
197 | + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
198 | + struct f2fs_inode_info *fi = F2FS_I(inode); | |
199 | + struct inmem_pages *cur, *tmp; | |
200 | + bool submit_bio = false; | |
201 | + struct f2fs_io_info fio = { | |
202 | + .type = DATA, | |
203 | + .rw = WRITE_SYNC, | |
204 | + }; | |
205 | + | |
206 | + f2fs_balance_fs(sbi); | |
207 | + f2fs_lock_op(sbi); | |
208 | + | |
209 | + mutex_lock(&fi->inmem_lock); | |
210 | + list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { | |
211 | + lock_page(cur->page); | |
212 | + if (!abort && cur->page->mapping == inode->i_mapping) { | |
213 | + f2fs_wait_on_page_writeback(cur->page, DATA); | |
214 | + if (clear_page_dirty_for_io(cur->page)) | |
215 | + inode_dec_dirty_pages(inode); | |
216 | + do_write_data_page(cur->page, &fio); | |
217 | + submit_bio = true; | |
218 | + } | |
219 | + f2fs_put_page(cur->page, 1); | |
220 | + list_del(&cur->list); | |
221 | + kmem_cache_free(inmem_entry_slab, cur); | |
222 | + } | |
223 | + if (submit_bio) | |
224 | + f2fs_submit_merged_bio(sbi, DATA, WRITE); | |
225 | + mutex_unlock(&fi->inmem_lock); | |
226 | + | |
227 | + filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX); | |
228 | + f2fs_unlock_op(sbi); | |
229 | +} | |
230 | + | |
176 | 231 | /* |
177 | 232 | * This function balances dirty node and dentry pages. |
178 | 233 | * In addition, it controls garbage collection. |
179 | 234 | |
... | ... | @@ -2148,8 +2203,15 @@ |
2148 | 2203 | sizeof(struct nat_entry_set)); |
2149 | 2204 | if (!sit_entry_set_slab) |
2150 | 2205 | goto destory_discard_entry; |
2206 | + | |
2207 | + inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", | |
2208 | + sizeof(struct inmem_pages)); | |
2209 | + if (!inmem_entry_slab) | |
2210 | + goto destroy_sit_entry_set; | |
2151 | 2211 | return 0; |
2152 | 2212 | |
2213 | +destroy_sit_entry_set: | |
2214 | + kmem_cache_destroy(sit_entry_set_slab); | |
2153 | 2215 | destory_discard_entry: |
2154 | 2216 | kmem_cache_destroy(discard_entry_slab); |
2155 | 2217 | fail: |
... | ... | @@ -2160,5 +2222,6 @@ |
2160 | 2222 | { |
2161 | 2223 | kmem_cache_destroy(sit_entry_set_slab); |
2162 | 2224 | kmem_cache_destroy(discard_entry_slab); |
2225 | + kmem_cache_destroy(inmem_entry_slab); | |
2163 | 2226 | } |
fs/f2fs/segment.h
... | ... | @@ -175,6 +175,11 @@ |
175 | 175 | void (*allocate_segment)(struct f2fs_sb_info *, int, bool); |
176 | 176 | }; |
177 | 177 | |
178 | +struct inmem_pages { | |
179 | + struct list_head list; | |
180 | + struct page *page; | |
181 | +}; | |
182 | + | |
178 | 183 | struct sit_info { |
179 | 184 | const struct segment_allocation *s_ops; |
180 | 185 | |
... | ... | @@ -504,7 +509,7 @@ |
504 | 509 | unsigned int policy = SM_I(sbi)->ipu_policy; |
505 | 510 | |
506 | 511 | /* IPU can be done only for the user data */ |
507 | - if (S_ISDIR(inode->i_mode)) | |
512 | + if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) | |
508 | 513 | return false; |
509 | 514 | |
510 | 515 | if (policy & (0x1 << F2FS_IPU_FORCE)) |
fs/f2fs/super.c