Commit e0e851cf30f1a9bd2e2a7624e9810378d6a2b072
Committed by
Linus Torvalds
1 parent
fc5cd582e9
Exists in
master
and in
20 other branches
[PATCH] reiserfs: reiserfs hang and performance fix for data=journal mode
In data=journal mode, reiserfs writepage needs to make sure not to trigger transactions while being run under PF_MEMALLOC. This patch makes sure to redirty the page instead of forcing a transaction start in this case. Also, calling filemap_fdata* in order to trigger io on the block device can cause lock inversions on the page lock. Instead, do simple batching from flush_commit_list. Signed-off-by: Chris Mason <mason@suse.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 2 changed files with 21 additions and 5 deletions Inline Diff
fs/reiserfs/inode.c
1 | /* | 1 | /* |
2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README | 2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include <linux/config.h> | 5 | #include <linux/config.h> |
6 | #include <linux/time.h> | 6 | #include <linux/time.h> |
7 | #include <linux/fs.h> | 7 | #include <linux/fs.h> |
8 | #include <linux/reiserfs_fs.h> | 8 | #include <linux/reiserfs_fs.h> |
9 | #include <linux/reiserfs_acl.h> | 9 | #include <linux/reiserfs_acl.h> |
10 | #include <linux/reiserfs_xattr.h> | 10 | #include <linux/reiserfs_xattr.h> |
11 | #include <linux/smp_lock.h> | 11 | #include <linux/smp_lock.h> |
12 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
13 | #include <linux/highmem.h> | 13 | #include <linux/highmem.h> |
14 | #include <asm/uaccess.h> | 14 | #include <asm/uaccess.h> |
15 | #include <asm/unaligned.h> | 15 | #include <asm/unaligned.h> |
16 | #include <linux/buffer_head.h> | 16 | #include <linux/buffer_head.h> |
17 | #include <linux/mpage.h> | 17 | #include <linux/mpage.h> |
18 | #include <linux/writeback.h> | 18 | #include <linux/writeback.h> |
19 | #include <linux/quotaops.h> | 19 | #include <linux/quotaops.h> |
20 | 20 | ||
21 | extern int reiserfs_default_io_size; /* default io size devuned in super.c */ | 21 | extern int reiserfs_default_io_size; /* default io size devuned in super.c */ |
22 | 22 | ||
23 | static int reiserfs_commit_write(struct file *f, struct page *page, | 23 | static int reiserfs_commit_write(struct file *f, struct page *page, |
24 | unsigned from, unsigned to); | 24 | unsigned from, unsigned to); |
25 | static int reiserfs_prepare_write(struct file *f, struct page *page, | 25 | static int reiserfs_prepare_write(struct file *f, struct page *page, |
26 | unsigned from, unsigned to); | 26 | unsigned from, unsigned to); |
27 | 27 | ||
28 | void reiserfs_delete_inode(struct inode *inode) | 28 | void reiserfs_delete_inode(struct inode *inode) |
29 | { | 29 | { |
30 | /* We need blocks for transaction + (user+group) quota update (possibly delete) */ | 30 | /* We need blocks for transaction + (user+group) quota update (possibly delete) */ |
31 | int jbegin_count = | 31 | int jbegin_count = |
32 | JOURNAL_PER_BALANCE_CNT * 2 + | 32 | JOURNAL_PER_BALANCE_CNT * 2 + |
33 | 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); | 33 | 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); |
34 | struct reiserfs_transaction_handle th; | 34 | struct reiserfs_transaction_handle th; |
35 | int err; | 35 | int err; |
36 | 36 | ||
37 | truncate_inode_pages(&inode->i_data, 0); | 37 | truncate_inode_pages(&inode->i_data, 0); |
38 | 38 | ||
39 | reiserfs_write_lock(inode->i_sb); | 39 | reiserfs_write_lock(inode->i_sb); |
40 | 40 | ||
41 | /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ | 41 | /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ |
42 | if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ | 42 | if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ |
43 | mutex_lock(&inode->i_mutex); | 43 | mutex_lock(&inode->i_mutex); |
44 | 44 | ||
45 | reiserfs_delete_xattrs(inode); | 45 | reiserfs_delete_xattrs(inode); |
46 | 46 | ||
47 | if (journal_begin(&th, inode->i_sb, jbegin_count)) { | 47 | if (journal_begin(&th, inode->i_sb, jbegin_count)) { |
48 | mutex_unlock(&inode->i_mutex); | 48 | mutex_unlock(&inode->i_mutex); |
49 | goto out; | 49 | goto out; |
50 | } | 50 | } |
51 | reiserfs_update_inode_transaction(inode); | 51 | reiserfs_update_inode_transaction(inode); |
52 | 52 | ||
53 | err = reiserfs_delete_object(&th, inode); | 53 | err = reiserfs_delete_object(&th, inode); |
54 | 54 | ||
55 | /* Do quota update inside a transaction for journaled quotas. We must do that | 55 | /* Do quota update inside a transaction for journaled quotas. We must do that |
56 | * after delete_object so that quota updates go into the same transaction as | 56 | * after delete_object so that quota updates go into the same transaction as |
57 | * stat data deletion */ | 57 | * stat data deletion */ |
58 | if (!err) | 58 | if (!err) |
59 | DQUOT_FREE_INODE(inode); | 59 | DQUOT_FREE_INODE(inode); |
60 | 60 | ||
61 | if (journal_end(&th, inode->i_sb, jbegin_count)) { | 61 | if (journal_end(&th, inode->i_sb, jbegin_count)) { |
62 | mutex_unlock(&inode->i_mutex); | 62 | mutex_unlock(&inode->i_mutex); |
63 | goto out; | 63 | goto out; |
64 | } | 64 | } |
65 | 65 | ||
66 | mutex_unlock(&inode->i_mutex); | 66 | mutex_unlock(&inode->i_mutex); |
67 | 67 | ||
68 | /* check return value from reiserfs_delete_object after | 68 | /* check return value from reiserfs_delete_object after |
69 | * ending the transaction | 69 | * ending the transaction |
70 | */ | 70 | */ |
71 | if (err) | 71 | if (err) |
72 | goto out; | 72 | goto out; |
73 | 73 | ||
74 | /* all items of file are deleted, so we can remove "save" link */ | 74 | /* all items of file are deleted, so we can remove "save" link */ |
75 | remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything | 75 | remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything |
76 | * about an error here */ | 76 | * about an error here */ |
77 | } else { | 77 | } else { |
78 | /* no object items are in the tree */ | 78 | /* no object items are in the tree */ |
79 | ; | 79 | ; |
80 | } | 80 | } |
81 | out: | 81 | out: |
82 | clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ | 82 | clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ |
83 | inode->i_blocks = 0; | 83 | inode->i_blocks = 0; |
84 | reiserfs_write_unlock(inode->i_sb); | 84 | reiserfs_write_unlock(inode->i_sb); |
85 | } | 85 | } |
86 | 86 | ||
87 | static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, | 87 | static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, |
88 | __u32 objectid, loff_t offset, int type, int length) | 88 | __u32 objectid, loff_t offset, int type, int length) |
89 | { | 89 | { |
90 | key->version = version; | 90 | key->version = version; |
91 | 91 | ||
92 | key->on_disk_key.k_dir_id = dirid; | 92 | key->on_disk_key.k_dir_id = dirid; |
93 | key->on_disk_key.k_objectid = objectid; | 93 | key->on_disk_key.k_objectid = objectid; |
94 | set_cpu_key_k_offset(key, offset); | 94 | set_cpu_key_k_offset(key, offset); |
95 | set_cpu_key_k_type(key, type); | 95 | set_cpu_key_k_type(key, type); |
96 | key->key_length = length; | 96 | key->key_length = length; |
97 | } | 97 | } |
98 | 98 | ||
99 | /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set | 99 | /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set |
100 | offset and type of key */ | 100 | offset and type of key */ |
101 | void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, | 101 | void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, |
102 | int type, int length) | 102 | int type, int length) |
103 | { | 103 | { |
104 | _make_cpu_key(key, get_inode_item_key_version(inode), | 104 | _make_cpu_key(key, get_inode_item_key_version(inode), |
105 | le32_to_cpu(INODE_PKEY(inode)->k_dir_id), | 105 | le32_to_cpu(INODE_PKEY(inode)->k_dir_id), |
106 | le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type, | 106 | le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type, |
107 | length); | 107 | length); |
108 | } | 108 | } |
109 | 109 | ||
110 | // | 110 | // |
111 | // when key is 0, do not set version and short key | 111 | // when key is 0, do not set version and short key |
112 | // | 112 | // |
113 | inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, | 113 | inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, |
114 | int version, | 114 | int version, |
115 | loff_t offset, int type, int length, | 115 | loff_t offset, int type, int length, |
116 | int entry_count /*or ih_free_space */ ) | 116 | int entry_count /*or ih_free_space */ ) |
117 | { | 117 | { |
118 | if (key) { | 118 | if (key) { |
119 | ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id); | 119 | ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id); |
120 | ih->ih_key.k_objectid = | 120 | ih->ih_key.k_objectid = |
121 | cpu_to_le32(key->on_disk_key.k_objectid); | 121 | cpu_to_le32(key->on_disk_key.k_objectid); |
122 | } | 122 | } |
123 | put_ih_version(ih, version); | 123 | put_ih_version(ih, version); |
124 | set_le_ih_k_offset(ih, offset); | 124 | set_le_ih_k_offset(ih, offset); |
125 | set_le_ih_k_type(ih, type); | 125 | set_le_ih_k_type(ih, type); |
126 | put_ih_item_len(ih, length); | 126 | put_ih_item_len(ih, length); |
127 | /* set_ih_free_space (ih, 0); */ | 127 | /* set_ih_free_space (ih, 0); */ |
128 | // for directory items it is entry count, for directs and stat | 128 | // for directory items it is entry count, for directs and stat |
129 | // datas - 0xffff, for indirects - 0 | 129 | // datas - 0xffff, for indirects - 0 |
130 | put_ih_entry_count(ih, entry_count); | 130 | put_ih_entry_count(ih, entry_count); |
131 | } | 131 | } |
132 | 132 | ||
133 | // | 133 | // |
134 | // FIXME: we might cache recently accessed indirect item | 134 | // FIXME: we might cache recently accessed indirect item |
135 | 135 | ||
136 | // Ugh. Not too eager for that.... | 136 | // Ugh. Not too eager for that.... |
137 | // I cut the code until such time as I see a convincing argument (benchmark). | 137 | // I cut the code until such time as I see a convincing argument (benchmark). |
138 | // I don't want a bloated inode struct..., and I don't like code complexity.... | 138 | // I don't want a bloated inode struct..., and I don't like code complexity.... |
139 | 139 | ||
140 | /* cutting the code is fine, since it really isn't in use yet and is easy | 140 | /* cutting the code is fine, since it really isn't in use yet and is easy |
141 | ** to add back in. But, Vladimir has a really good idea here. Think | 141 | ** to add back in. But, Vladimir has a really good idea here. Think |
142 | ** about what happens for reading a file. For each page, | 142 | ** about what happens for reading a file. For each page, |
143 | ** The VFS layer calls reiserfs_readpage, who searches the tree to find | 143 | ** The VFS layer calls reiserfs_readpage, who searches the tree to find |
144 | ** an indirect item. This indirect item has X number of pointers, where | 144 | ** an indirect item. This indirect item has X number of pointers, where |
145 | ** X is a big number if we've done the block allocation right. But, | 145 | ** X is a big number if we've done the block allocation right. But, |
146 | ** we only use one or two of these pointers during each call to readpage, | 146 | ** we only use one or two of these pointers during each call to readpage, |
147 | ** needlessly researching again later on. | 147 | ** needlessly researching again later on. |
148 | ** | 148 | ** |
149 | ** The size of the cache could be dynamic based on the size of the file. | 149 | ** The size of the cache could be dynamic based on the size of the file. |
150 | ** | 150 | ** |
151 | ** I'd also like to see us cache the location the stat data item, since | 151 | ** I'd also like to see us cache the location the stat data item, since |
152 | ** we are needlessly researching for that frequently. | 152 | ** we are needlessly researching for that frequently. |
153 | ** | 153 | ** |
154 | ** --chris | 154 | ** --chris |
155 | */ | 155 | */ |
156 | 156 | ||
157 | /* If this page has a file tail in it, and | 157 | /* If this page has a file tail in it, and |
158 | ** it was read in by get_block_create_0, the page data is valid, | 158 | ** it was read in by get_block_create_0, the page data is valid, |
159 | ** but tail is still sitting in a direct item, and we can't write to | 159 | ** but tail is still sitting in a direct item, and we can't write to |
160 | ** it. So, look through this page, and check all the mapped buffers | 160 | ** it. So, look through this page, and check all the mapped buffers |
161 | ** to make sure they have valid block numbers. Any that don't need | 161 | ** to make sure they have valid block numbers. Any that don't need |
162 | ** to be unmapped, so that block_prepare_write will correctly call | 162 | ** to be unmapped, so that block_prepare_write will correctly call |
163 | ** reiserfs_get_block to convert the tail into an unformatted node | 163 | ** reiserfs_get_block to convert the tail into an unformatted node |
164 | */ | 164 | */ |
165 | static inline void fix_tail_page_for_writing(struct page *page) | 165 | static inline void fix_tail_page_for_writing(struct page *page) |
166 | { | 166 | { |
167 | struct buffer_head *head, *next, *bh; | 167 | struct buffer_head *head, *next, *bh; |
168 | 168 | ||
169 | if (page && page_has_buffers(page)) { | 169 | if (page && page_has_buffers(page)) { |
170 | head = page_buffers(page); | 170 | head = page_buffers(page); |
171 | bh = head; | 171 | bh = head; |
172 | do { | 172 | do { |
173 | next = bh->b_this_page; | 173 | next = bh->b_this_page; |
174 | if (buffer_mapped(bh) && bh->b_blocknr == 0) { | 174 | if (buffer_mapped(bh) && bh->b_blocknr == 0) { |
175 | reiserfs_unmap_buffer(bh); | 175 | reiserfs_unmap_buffer(bh); |
176 | } | 176 | } |
177 | bh = next; | 177 | bh = next; |
178 | } while (bh != head); | 178 | } while (bh != head); |
179 | } | 179 | } |
180 | } | 180 | } |
181 | 181 | ||
182 | /* reiserfs_get_block does not need to allocate a block only if it has been | 182 | /* reiserfs_get_block does not need to allocate a block only if it has been |
183 | done already or non-hole position has been found in the indirect item */ | 183 | done already or non-hole position has been found in the indirect item */ |
184 | static inline int allocation_needed(int retval, b_blocknr_t allocated, | 184 | static inline int allocation_needed(int retval, b_blocknr_t allocated, |
185 | struct item_head *ih, | 185 | struct item_head *ih, |
186 | __le32 * item, int pos_in_item) | 186 | __le32 * item, int pos_in_item) |
187 | { | 187 | { |
188 | if (allocated) | 188 | if (allocated) |
189 | return 0; | 189 | return 0; |
190 | if (retval == POSITION_FOUND && is_indirect_le_ih(ih) && | 190 | if (retval == POSITION_FOUND && is_indirect_le_ih(ih) && |
191 | get_block_num(item, pos_in_item)) | 191 | get_block_num(item, pos_in_item)) |
192 | return 0; | 192 | return 0; |
193 | return 1; | 193 | return 1; |
194 | } | 194 | } |
195 | 195 | ||
196 | static inline int indirect_item_found(int retval, struct item_head *ih) | 196 | static inline int indirect_item_found(int retval, struct item_head *ih) |
197 | { | 197 | { |
198 | return (retval == POSITION_FOUND) && is_indirect_le_ih(ih); | 198 | return (retval == POSITION_FOUND) && is_indirect_le_ih(ih); |
199 | } | 199 | } |
200 | 200 | ||
201 | static inline void set_block_dev_mapped(struct buffer_head *bh, | 201 | static inline void set_block_dev_mapped(struct buffer_head *bh, |
202 | b_blocknr_t block, struct inode *inode) | 202 | b_blocknr_t block, struct inode *inode) |
203 | { | 203 | { |
204 | map_bh(bh, inode->i_sb, block); | 204 | map_bh(bh, inode->i_sb, block); |
205 | } | 205 | } |
206 | 206 | ||
207 | // | 207 | // |
208 | // files which were created in the earlier version can not be longer, | 208 | // files which were created in the earlier version can not be longer, |
209 | // than 2 gb | 209 | // than 2 gb |
210 | // | 210 | // |
211 | static int file_capable(struct inode *inode, long block) | 211 | static int file_capable(struct inode *inode, long block) |
212 | { | 212 | { |
213 | if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. | 213 | if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. |
214 | block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb | 214 | block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb |
215 | return 1; | 215 | return 1; |
216 | 216 | ||
217 | return 0; | 217 | return 0; |
218 | } | 218 | } |
219 | 219 | ||
220 | /*static*/ int restart_transaction(struct reiserfs_transaction_handle *th, | 220 | /*static*/ int restart_transaction(struct reiserfs_transaction_handle *th, |
221 | struct inode *inode, struct path *path) | 221 | struct inode *inode, struct path *path) |
222 | { | 222 | { |
223 | struct super_block *s = th->t_super; | 223 | struct super_block *s = th->t_super; |
224 | int len = th->t_blocks_allocated; | 224 | int len = th->t_blocks_allocated; |
225 | int err; | 225 | int err; |
226 | 226 | ||
227 | BUG_ON(!th->t_trans_id); | 227 | BUG_ON(!th->t_trans_id); |
228 | BUG_ON(!th->t_refcount); | 228 | BUG_ON(!th->t_refcount); |
229 | 229 | ||
230 | /* we cannot restart while nested */ | 230 | /* we cannot restart while nested */ |
231 | if (th->t_refcount > 1) { | 231 | if (th->t_refcount > 1) { |
232 | return 0; | 232 | return 0; |
233 | } | 233 | } |
234 | pathrelse(path); | 234 | pathrelse(path); |
235 | reiserfs_update_sd(th, inode); | 235 | reiserfs_update_sd(th, inode); |
236 | err = journal_end(th, s, len); | 236 | err = journal_end(th, s, len); |
237 | if (!err) { | 237 | if (!err) { |
238 | err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6); | 238 | err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6); |
239 | if (!err) | 239 | if (!err) |
240 | reiserfs_update_inode_transaction(inode); | 240 | reiserfs_update_inode_transaction(inode); |
241 | } | 241 | } |
242 | return err; | 242 | return err; |
243 | } | 243 | } |
244 | 244 | ||
245 | // it is called by get_block when create == 0. Returns block number | 245 | // it is called by get_block when create == 0. Returns block number |
246 | // for 'block'-th logical block of file. When it hits direct item it | 246 | // for 'block'-th logical block of file. When it hits direct item it |
247 | // returns 0 (being called from bmap) or read direct item into piece | 247 | // returns 0 (being called from bmap) or read direct item into piece |
248 | // of page (bh_result) | 248 | // of page (bh_result) |
249 | 249 | ||
250 | // Please improve the english/clarity in the comment above, as it is | 250 | // Please improve the english/clarity in the comment above, as it is |
251 | // hard to understand. | 251 | // hard to understand. |
252 | 252 | ||
253 | static int _get_block_create_0(struct inode *inode, long block, | 253 | static int _get_block_create_0(struct inode *inode, long block, |
254 | struct buffer_head *bh_result, int args) | 254 | struct buffer_head *bh_result, int args) |
255 | { | 255 | { |
256 | INITIALIZE_PATH(path); | 256 | INITIALIZE_PATH(path); |
257 | struct cpu_key key; | 257 | struct cpu_key key; |
258 | struct buffer_head *bh; | 258 | struct buffer_head *bh; |
259 | struct item_head *ih, tmp_ih; | 259 | struct item_head *ih, tmp_ih; |
260 | int fs_gen; | 260 | int fs_gen; |
261 | int blocknr; | 261 | int blocknr; |
262 | char *p = NULL; | 262 | char *p = NULL; |
263 | int chars; | 263 | int chars; |
264 | int ret; | 264 | int ret; |
265 | int result; | 265 | int result; |
266 | int done = 0; | 266 | int done = 0; |
267 | unsigned long offset; | 267 | unsigned long offset; |
268 | 268 | ||
269 | // prepare the key to look for the 'block'-th block of file | 269 | // prepare the key to look for the 'block'-th block of file |
270 | make_cpu_key(&key, inode, | 270 | make_cpu_key(&key, inode, |
271 | (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, | 271 | (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, |
272 | 3); | 272 | 3); |
273 | 273 | ||
274 | research: | 274 | research: |
275 | result = search_for_position_by_key(inode->i_sb, &key, &path); | 275 | result = search_for_position_by_key(inode->i_sb, &key, &path); |
276 | if (result != POSITION_FOUND) { | 276 | if (result != POSITION_FOUND) { |
277 | pathrelse(&path); | 277 | pathrelse(&path); |
278 | if (p) | 278 | if (p) |
279 | kunmap(bh_result->b_page); | 279 | kunmap(bh_result->b_page); |
280 | if (result == IO_ERROR) | 280 | if (result == IO_ERROR) |
281 | return -EIO; | 281 | return -EIO; |
282 | // We do not return -ENOENT if there is a hole but page is uptodate, because it means | 282 | // We do not return -ENOENT if there is a hole but page is uptodate, because it means |
283 | // That there is some MMAPED data associated with it that is yet to be written to disk. | 283 | // That there is some MMAPED data associated with it that is yet to be written to disk. |
284 | if ((args & GET_BLOCK_NO_HOLE) | 284 | if ((args & GET_BLOCK_NO_HOLE) |
285 | && !PageUptodate(bh_result->b_page)) { | 285 | && !PageUptodate(bh_result->b_page)) { |
286 | return -ENOENT; | 286 | return -ENOENT; |
287 | } | 287 | } |
288 | return 0; | 288 | return 0; |
289 | } | 289 | } |
290 | // | 290 | // |
291 | bh = get_last_bh(&path); | 291 | bh = get_last_bh(&path); |
292 | ih = get_ih(&path); | 292 | ih = get_ih(&path); |
293 | if (is_indirect_le_ih(ih)) { | 293 | if (is_indirect_le_ih(ih)) { |
294 | __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih); | 294 | __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih); |
295 | 295 | ||
296 | /* FIXME: here we could cache indirect item or part of it in | 296 | /* FIXME: here we could cache indirect item or part of it in |
297 | the inode to avoid search_by_key in case of subsequent | 297 | the inode to avoid search_by_key in case of subsequent |
298 | access to file */ | 298 | access to file */ |
299 | blocknr = get_block_num(ind_item, path.pos_in_item); | 299 | blocknr = get_block_num(ind_item, path.pos_in_item); |
300 | ret = 0; | 300 | ret = 0; |
301 | if (blocknr) { | 301 | if (blocknr) { |
302 | map_bh(bh_result, inode->i_sb, blocknr); | 302 | map_bh(bh_result, inode->i_sb, blocknr); |
303 | if (path.pos_in_item == | 303 | if (path.pos_in_item == |
304 | ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { | 304 | ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { |
305 | set_buffer_boundary(bh_result); | 305 | set_buffer_boundary(bh_result); |
306 | } | 306 | } |
307 | } else | 307 | } else |
308 | // We do not return -ENOENT if there is a hole but page is uptodate, because it means | 308 | // We do not return -ENOENT if there is a hole but page is uptodate, because it means |
309 | // That there is some MMAPED data associated with it that is yet to be written to disk. | 309 | // That there is some MMAPED data associated with it that is yet to be written to disk. |
310 | if ((args & GET_BLOCK_NO_HOLE) | 310 | if ((args & GET_BLOCK_NO_HOLE) |
311 | && !PageUptodate(bh_result->b_page)) { | 311 | && !PageUptodate(bh_result->b_page)) { |
312 | ret = -ENOENT; | 312 | ret = -ENOENT; |
313 | } | 313 | } |
314 | 314 | ||
315 | pathrelse(&path); | 315 | pathrelse(&path); |
316 | if (p) | 316 | if (p) |
317 | kunmap(bh_result->b_page); | 317 | kunmap(bh_result->b_page); |
318 | return ret; | 318 | return ret; |
319 | } | 319 | } |
320 | // requested data are in direct item(s) | 320 | // requested data are in direct item(s) |
321 | if (!(args & GET_BLOCK_READ_DIRECT)) { | 321 | if (!(args & GET_BLOCK_READ_DIRECT)) { |
322 | // we are called by bmap. FIXME: we can not map block of file | 322 | // we are called by bmap. FIXME: we can not map block of file |
323 | // when it is stored in direct item(s) | 323 | // when it is stored in direct item(s) |
324 | pathrelse(&path); | 324 | pathrelse(&path); |
325 | if (p) | 325 | if (p) |
326 | kunmap(bh_result->b_page); | 326 | kunmap(bh_result->b_page); |
327 | return -ENOENT; | 327 | return -ENOENT; |
328 | } | 328 | } |
329 | 329 | ||
330 | /* if we've got a direct item, and the buffer or page was uptodate, | 330 | /* if we've got a direct item, and the buffer or page was uptodate, |
331 | ** we don't want to pull data off disk again. skip to the | 331 | ** we don't want to pull data off disk again. skip to the |
332 | ** end, where we map the buffer and return | 332 | ** end, where we map the buffer and return |
333 | */ | 333 | */ |
334 | if (buffer_uptodate(bh_result)) { | 334 | if (buffer_uptodate(bh_result)) { |
335 | goto finished; | 335 | goto finished; |
336 | } else | 336 | } else |
337 | /* | 337 | /* |
338 | ** grab_tail_page can trigger calls to reiserfs_get_block on up to date | 338 | ** grab_tail_page can trigger calls to reiserfs_get_block on up to date |
339 | ** pages without any buffers. If the page is up to date, we don't want | 339 | ** pages without any buffers. If the page is up to date, we don't want |
340 | ** read old data off disk. Set the up to date bit on the buffer instead | 340 | ** read old data off disk. Set the up to date bit on the buffer instead |
341 | ** and jump to the end | 341 | ** and jump to the end |
342 | */ | 342 | */ |
343 | if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { | 343 | if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { |
344 | set_buffer_uptodate(bh_result); | 344 | set_buffer_uptodate(bh_result); |
345 | goto finished; | 345 | goto finished; |
346 | } | 346 | } |
347 | // read file tail into part of page | 347 | // read file tail into part of page |
348 | offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); | 348 | offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); |
349 | fs_gen = get_generation(inode->i_sb); | 349 | fs_gen = get_generation(inode->i_sb); |
350 | copy_item_head(&tmp_ih, ih); | 350 | copy_item_head(&tmp_ih, ih); |
351 | 351 | ||
352 | /* we only want to kmap if we are reading the tail into the page. | 352 | /* we only want to kmap if we are reading the tail into the page. |
353 | ** this is not the common case, so we don't kmap until we are | 353 | ** this is not the common case, so we don't kmap until we are |
354 | ** sure we need to. But, this means the item might move if | 354 | ** sure we need to. But, this means the item might move if |
355 | ** kmap schedules | 355 | ** kmap schedules |
356 | */ | 356 | */ |
357 | if (!p) { | 357 | if (!p) { |
358 | p = (char *)kmap(bh_result->b_page); | 358 | p = (char *)kmap(bh_result->b_page); |
359 | if (fs_changed(fs_gen, inode->i_sb) | 359 | if (fs_changed(fs_gen, inode->i_sb) |
360 | && item_moved(&tmp_ih, &path)) { | 360 | && item_moved(&tmp_ih, &path)) { |
361 | goto research; | 361 | goto research; |
362 | } | 362 | } |
363 | } | 363 | } |
364 | p += offset; | 364 | p += offset; |
365 | memset(p, 0, inode->i_sb->s_blocksize); | 365 | memset(p, 0, inode->i_sb->s_blocksize); |
366 | do { | 366 | do { |
367 | if (!is_direct_le_ih(ih)) { | 367 | if (!is_direct_le_ih(ih)) { |
368 | BUG(); | 368 | BUG(); |
369 | } | 369 | } |
370 | /* make sure we don't read more bytes than actually exist in | 370 | /* make sure we don't read more bytes than actually exist in |
371 | ** the file. This can happen in odd cases where i_size isn't | 371 | ** the file. This can happen in odd cases where i_size isn't |
372 | ** correct, and when direct item padding results in a few | 372 | ** correct, and when direct item padding results in a few |
373 | ** extra bytes at the end of the direct item | 373 | ** extra bytes at the end of the direct item |
374 | */ | 374 | */ |
375 | if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) | 375 | if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) |
376 | break; | 376 | break; |
377 | if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { | 377 | if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { |
378 | chars = | 378 | chars = |
379 | inode->i_size - (le_ih_k_offset(ih) - 1) - | 379 | inode->i_size - (le_ih_k_offset(ih) - 1) - |
380 | path.pos_in_item; | 380 | path.pos_in_item; |
381 | done = 1; | 381 | done = 1; |
382 | } else { | 382 | } else { |
383 | chars = ih_item_len(ih) - path.pos_in_item; | 383 | chars = ih_item_len(ih) - path.pos_in_item; |
384 | } | 384 | } |
385 | memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars); | 385 | memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars); |
386 | 386 | ||
387 | if (done) | 387 | if (done) |
388 | break; | 388 | break; |
389 | 389 | ||
390 | p += chars; | 390 | p += chars; |
391 | 391 | ||
392 | if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) | 392 | if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) |
393 | // we done, if read direct item is not the last item of | 393 | // we done, if read direct item is not the last item of |
394 | // node FIXME: we could try to check right delimiting key | 394 | // node FIXME: we could try to check right delimiting key |
395 | // to see whether direct item continues in the right | 395 | // to see whether direct item continues in the right |
396 | // neighbor or rely on i_size | 396 | // neighbor or rely on i_size |
397 | break; | 397 | break; |
398 | 398 | ||
399 | // update key to look for the next piece | 399 | // update key to look for the next piece |
400 | set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); | 400 | set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); |
401 | result = search_for_position_by_key(inode->i_sb, &key, &path); | 401 | result = search_for_position_by_key(inode->i_sb, &key, &path); |
402 | if (result != POSITION_FOUND) | 402 | if (result != POSITION_FOUND) |
403 | // i/o error most likely | 403 | // i/o error most likely |
404 | break; | 404 | break; |
405 | bh = get_last_bh(&path); | 405 | bh = get_last_bh(&path); |
406 | ih = get_ih(&path); | 406 | ih = get_ih(&path); |
407 | } while (1); | 407 | } while (1); |
408 | 408 | ||
409 | flush_dcache_page(bh_result->b_page); | 409 | flush_dcache_page(bh_result->b_page); |
410 | kunmap(bh_result->b_page); | 410 | kunmap(bh_result->b_page); |
411 | 411 | ||
412 | finished: | 412 | finished: |
413 | pathrelse(&path); | 413 | pathrelse(&path); |
414 | 414 | ||
415 | if (result == IO_ERROR) | 415 | if (result == IO_ERROR) |
416 | return -EIO; | 416 | return -EIO; |
417 | 417 | ||
418 | /* this buffer has valid data, but isn't valid for io. mapping it to | 418 | /* this buffer has valid data, but isn't valid for io. mapping it to |
419 | * block #0 tells the rest of reiserfs it just has a tail in it | 419 | * block #0 tells the rest of reiserfs it just has a tail in it |
420 | */ | 420 | */ |
421 | map_bh(bh_result, inode->i_sb, 0); | 421 | map_bh(bh_result, inode->i_sb, 0); |
422 | set_buffer_uptodate(bh_result); | 422 | set_buffer_uptodate(bh_result); |
423 | return 0; | 423 | return 0; |
424 | } | 424 | } |
425 | 425 | ||
426 | // this is called to create file map. So, _get_block_create_0 will not | 426 | // this is called to create file map. So, _get_block_create_0 will not |
427 | // read direct item | 427 | // read direct item |
428 | static int reiserfs_bmap(struct inode *inode, sector_t block, | 428 | static int reiserfs_bmap(struct inode *inode, sector_t block, |
429 | struct buffer_head *bh_result, int create) | 429 | struct buffer_head *bh_result, int create) |
430 | { | 430 | { |
431 | if (!file_capable(inode, block)) | 431 | if (!file_capable(inode, block)) |
432 | return -EFBIG; | 432 | return -EFBIG; |
433 | 433 | ||
434 | reiserfs_write_lock(inode->i_sb); | 434 | reiserfs_write_lock(inode->i_sb); |
435 | /* do not read the direct item */ | 435 | /* do not read the direct item */ |
436 | _get_block_create_0(inode, block, bh_result, 0); | 436 | _get_block_create_0(inode, block, bh_result, 0); |
437 | reiserfs_write_unlock(inode->i_sb); | 437 | reiserfs_write_unlock(inode->i_sb); |
438 | return 0; | 438 | return 0; |
439 | } | 439 | } |
440 | 440 | ||
441 | /* special version of get_block that is only used by grab_tail_page right | 441 | /* special version of get_block that is only used by grab_tail_page right |
442 | ** now. It is sent to block_prepare_write, and when you try to get a | 442 | ** now. It is sent to block_prepare_write, and when you try to get a |
443 | ** block past the end of the file (or a block from a hole) it returns | 443 | ** block past the end of the file (or a block from a hole) it returns |
444 | ** -ENOENT instead of a valid buffer. block_prepare_write expects to | 444 | ** -ENOENT instead of a valid buffer. block_prepare_write expects to |
445 | ** be able to do i/o on the buffers returned, unless an error value | 445 | ** be able to do i/o on the buffers returned, unless an error value |
446 | ** is also returned. | 446 | ** is also returned. |
447 | ** | 447 | ** |
448 | ** So, this allows block_prepare_write to be used for reading a single block | 448 | ** So, this allows block_prepare_write to be used for reading a single block |
449 | ** in a page. Where it does not produce a valid page for holes, or past the | 449 | ** in a page. Where it does not produce a valid page for holes, or past the |
450 | ** end of the file. This turns out to be exactly what we need for reading | 450 | ** end of the file. This turns out to be exactly what we need for reading |
451 | ** tails for conversion. | 451 | ** tails for conversion. |
452 | ** | 452 | ** |
453 | ** The point of the wrapper is forcing a certain value for create, even | 453 | ** The point of the wrapper is forcing a certain value for create, even |
454 | ** though the VFS layer is calling this function with create==1. If you | 454 | ** though the VFS layer is calling this function with create==1. If you |
455 | ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, | 455 | ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, |
456 | ** don't use this function. | 456 | ** don't use this function. |
457 | */ | 457 | */ |
458 | static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, | 458 | static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, |
459 | struct buffer_head *bh_result, | 459 | struct buffer_head *bh_result, |
460 | int create) | 460 | int create) |
461 | { | 461 | { |
462 | return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); | 462 | return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); |
463 | } | 463 | } |
464 | 464 | ||
465 | /* This is special helper for reiserfs_get_block in case we are executing | 465 | /* This is special helper for reiserfs_get_block in case we are executing |
466 | direct_IO request. */ | 466 | direct_IO request. */ |
467 | static int reiserfs_get_blocks_direct_io(struct inode *inode, | 467 | static int reiserfs_get_blocks_direct_io(struct inode *inode, |
468 | sector_t iblock, | 468 | sector_t iblock, |
469 | unsigned long max_blocks, | 469 | unsigned long max_blocks, |
470 | struct buffer_head *bh_result, | 470 | struct buffer_head *bh_result, |
471 | int create) | 471 | int create) |
472 | { | 472 | { |
473 | int ret; | 473 | int ret; |
474 | 474 | ||
475 | bh_result->b_page = NULL; | 475 | bh_result->b_page = NULL; |
476 | 476 | ||
477 | /* We set the b_size before reiserfs_get_block call since it is | 477 | /* We set the b_size before reiserfs_get_block call since it is |
478 | referenced in convert_tail_for_hole() that may be called from | 478 | referenced in convert_tail_for_hole() that may be called from |
479 | reiserfs_get_block() */ | 479 | reiserfs_get_block() */ |
480 | bh_result->b_size = (1 << inode->i_blkbits); | 480 | bh_result->b_size = (1 << inode->i_blkbits); |
481 | 481 | ||
482 | ret = reiserfs_get_block(inode, iblock, bh_result, | 482 | ret = reiserfs_get_block(inode, iblock, bh_result, |
483 | create | GET_BLOCK_NO_DANGLE); | 483 | create | GET_BLOCK_NO_DANGLE); |
484 | if (ret) | 484 | if (ret) |
485 | goto out; | 485 | goto out; |
486 | 486 | ||
487 | /* don't allow direct io onto tail pages */ | 487 | /* don't allow direct io onto tail pages */ |
488 | if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { | 488 | if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { |
489 | /* make sure future calls to the direct io funcs for this offset | 489 | /* make sure future calls to the direct io funcs for this offset |
490 | ** in the file fail by unmapping the buffer | 490 | ** in the file fail by unmapping the buffer |
491 | */ | 491 | */ |
492 | clear_buffer_mapped(bh_result); | 492 | clear_buffer_mapped(bh_result); |
493 | ret = -EINVAL; | 493 | ret = -EINVAL; |
494 | } | 494 | } |
495 | /* Possible unpacked tail. Flush the data before pages have | 495 | /* Possible unpacked tail. Flush the data before pages have |
496 | disappeared */ | 496 | disappeared */ |
497 | if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { | 497 | if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { |
498 | int err; | 498 | int err; |
499 | lock_kernel(); | 499 | lock_kernel(); |
500 | err = reiserfs_commit_for_inode(inode); | 500 | err = reiserfs_commit_for_inode(inode); |
501 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; | 501 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; |
502 | unlock_kernel(); | 502 | unlock_kernel(); |
503 | if (err < 0) | 503 | if (err < 0) |
504 | ret = err; | 504 | ret = err; |
505 | } | 505 | } |
506 | out: | 506 | out: |
507 | return ret; | 507 | return ret; |
508 | } | 508 | } |
509 | 509 | ||
510 | /* | 510 | /* |
511 | ** helper function for when reiserfs_get_block is called for a hole | 511 | ** helper function for when reiserfs_get_block is called for a hole |
512 | ** but the file tail is still in a direct item | 512 | ** but the file tail is still in a direct item |
513 | ** bh_result is the buffer head for the hole | 513 | ** bh_result is the buffer head for the hole |
514 | ** tail_offset is the offset of the start of the tail in the file | 514 | ** tail_offset is the offset of the start of the tail in the file |
515 | ** | 515 | ** |
516 | ** This calls prepare_write, which will start a new transaction | 516 | ** This calls prepare_write, which will start a new transaction |
517 | ** you should not be in a transaction, or have any paths held when you | 517 | ** you should not be in a transaction, or have any paths held when you |
518 | ** call this. | 518 | ** call this. |
519 | */ | 519 | */ |
520 | static int convert_tail_for_hole(struct inode *inode, | 520 | static int convert_tail_for_hole(struct inode *inode, |
521 | struct buffer_head *bh_result, | 521 | struct buffer_head *bh_result, |
522 | loff_t tail_offset) | 522 | loff_t tail_offset) |
523 | { | 523 | { |
524 | unsigned long index; | 524 | unsigned long index; |
525 | unsigned long tail_end; | 525 | unsigned long tail_end; |
526 | unsigned long tail_start; | 526 | unsigned long tail_start; |
527 | struct page *tail_page; | 527 | struct page *tail_page; |
528 | struct page *hole_page = bh_result->b_page; | 528 | struct page *hole_page = bh_result->b_page; |
529 | int retval = 0; | 529 | int retval = 0; |
530 | 530 | ||
531 | if ((tail_offset & (bh_result->b_size - 1)) != 1) | 531 | if ((tail_offset & (bh_result->b_size - 1)) != 1) |
532 | return -EIO; | 532 | return -EIO; |
533 | 533 | ||
534 | /* always try to read until the end of the block */ | 534 | /* always try to read until the end of the block */ |
535 | tail_start = tail_offset & (PAGE_CACHE_SIZE - 1); | 535 | tail_start = tail_offset & (PAGE_CACHE_SIZE - 1); |
536 | tail_end = (tail_start | (bh_result->b_size - 1)) + 1; | 536 | tail_end = (tail_start | (bh_result->b_size - 1)) + 1; |
537 | 537 | ||
538 | index = tail_offset >> PAGE_CACHE_SHIFT; | 538 | index = tail_offset >> PAGE_CACHE_SHIFT; |
539 | /* hole_page can be zero in case of direct_io, we are sure | 539 | /* hole_page can be zero in case of direct_io, we are sure |
540 | that we cannot get here if we write with O_DIRECT into | 540 | that we cannot get here if we write with O_DIRECT into |
541 | tail page */ | 541 | tail page */ |
542 | if (!hole_page || index != hole_page->index) { | 542 | if (!hole_page || index != hole_page->index) { |
543 | tail_page = grab_cache_page(inode->i_mapping, index); | 543 | tail_page = grab_cache_page(inode->i_mapping, index); |
544 | retval = -ENOMEM; | 544 | retval = -ENOMEM; |
545 | if (!tail_page) { | 545 | if (!tail_page) { |
546 | goto out; | 546 | goto out; |
547 | } | 547 | } |
548 | } else { | 548 | } else { |
549 | tail_page = hole_page; | 549 | tail_page = hole_page; |
550 | } | 550 | } |
551 | 551 | ||
552 | /* we don't have to make sure the conversion did not happen while | 552 | /* we don't have to make sure the conversion did not happen while |
553 | ** we were locking the page because anyone that could convert | 553 | ** we were locking the page because anyone that could convert |
554 | ** must first take i_mutex. | 554 | ** must first take i_mutex. |
555 | ** | 555 | ** |
556 | ** We must fix the tail page for writing because it might have buffers | 556 | ** We must fix the tail page for writing because it might have buffers |
557 | ** that are mapped, but have a block number of 0. This indicates tail | 557 | ** that are mapped, but have a block number of 0. This indicates tail |
558 | ** data that has been read directly into the page, and block_prepare_write | 558 | ** data that has been read directly into the page, and block_prepare_write |
559 | ** won't trigger a get_block in this case. | 559 | ** won't trigger a get_block in this case. |
560 | */ | 560 | */ |
561 | fix_tail_page_for_writing(tail_page); | 561 | fix_tail_page_for_writing(tail_page); |
562 | retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end); | 562 | retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end); |
563 | if (retval) | 563 | if (retval) |
564 | goto unlock; | 564 | goto unlock; |
565 | 565 | ||
566 | /* tail conversion might change the data in the page */ | 566 | /* tail conversion might change the data in the page */ |
567 | flush_dcache_page(tail_page); | 567 | flush_dcache_page(tail_page); |
568 | 568 | ||
569 | retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end); | 569 | retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end); |
570 | 570 | ||
571 | unlock: | 571 | unlock: |
572 | if (tail_page != hole_page) { | 572 | if (tail_page != hole_page) { |
573 | unlock_page(tail_page); | 573 | unlock_page(tail_page); |
574 | page_cache_release(tail_page); | 574 | page_cache_release(tail_page); |
575 | } | 575 | } |
576 | out: | 576 | out: |
577 | return retval; | 577 | return retval; |
578 | } | 578 | } |
579 | 579 | ||
580 | static inline int _allocate_block(struct reiserfs_transaction_handle *th, | 580 | static inline int _allocate_block(struct reiserfs_transaction_handle *th, |
581 | long block, | 581 | long block, |
582 | struct inode *inode, | 582 | struct inode *inode, |
583 | b_blocknr_t * allocated_block_nr, | 583 | b_blocknr_t * allocated_block_nr, |
584 | struct path *path, int flags) | 584 | struct path *path, int flags) |
585 | { | 585 | { |
586 | BUG_ON(!th->t_trans_id); | 586 | BUG_ON(!th->t_trans_id); |
587 | 587 | ||
588 | #ifdef REISERFS_PREALLOCATE | 588 | #ifdef REISERFS_PREALLOCATE |
589 | if (!(flags & GET_BLOCK_NO_IMUX)) { | 589 | if (!(flags & GET_BLOCK_NO_IMUX)) { |
590 | return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, | 590 | return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, |
591 | path, block); | 591 | path, block); |
592 | } | 592 | } |
593 | #endif | 593 | #endif |
594 | return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path, | 594 | return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path, |
595 | block); | 595 | block); |
596 | } | 596 | } |
597 | 597 | ||
598 | int reiserfs_get_block(struct inode *inode, sector_t block, | 598 | int reiserfs_get_block(struct inode *inode, sector_t block, |
599 | struct buffer_head *bh_result, int create) | 599 | struct buffer_head *bh_result, int create) |
600 | { | 600 | { |
601 | int repeat, retval = 0; | 601 | int repeat, retval = 0; |
602 | b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int | 602 | b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int |
603 | INITIALIZE_PATH(path); | 603 | INITIALIZE_PATH(path); |
604 | int pos_in_item; | 604 | int pos_in_item; |
605 | struct cpu_key key; | 605 | struct cpu_key key; |
606 | struct buffer_head *bh, *unbh = NULL; | 606 | struct buffer_head *bh, *unbh = NULL; |
607 | struct item_head *ih, tmp_ih; | 607 | struct item_head *ih, tmp_ih; |
608 | __le32 *item; | 608 | __le32 *item; |
609 | int done; | 609 | int done; |
610 | int fs_gen; | 610 | int fs_gen; |
611 | struct reiserfs_transaction_handle *th = NULL; | 611 | struct reiserfs_transaction_handle *th = NULL; |
612 | /* space reserved in transaction batch: | 612 | /* space reserved in transaction batch: |
613 | . 3 balancings in direct->indirect conversion | 613 | . 3 balancings in direct->indirect conversion |
614 | . 1 block involved into reiserfs_update_sd() | 614 | . 1 block involved into reiserfs_update_sd() |
615 | XXX in practically impossible worst case direct2indirect() | 615 | XXX in practically impossible worst case direct2indirect() |
616 | can incur (much) more than 3 balancings. | 616 | can incur (much) more than 3 balancings. |
617 | quota update for user, group */ | 617 | quota update for user, group */ |
618 | int jbegin_count = | 618 | int jbegin_count = |
619 | JOURNAL_PER_BALANCE_CNT * 3 + 1 + | 619 | JOURNAL_PER_BALANCE_CNT * 3 + 1 + |
620 | 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); | 620 | 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); |
621 | int version; | 621 | int version; |
622 | int dangle = 1; | 622 | int dangle = 1; |
623 | loff_t new_offset = | 623 | loff_t new_offset = |
624 | (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; | 624 | (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; |
625 | 625 | ||
626 | /* bad.... */ | 626 | /* bad.... */ |
627 | reiserfs_write_lock(inode->i_sb); | 627 | reiserfs_write_lock(inode->i_sb); |
628 | version = get_inode_item_key_version(inode); | 628 | version = get_inode_item_key_version(inode); |
629 | 629 | ||
630 | if (block < 0) { | 630 | if (block < 0) { |
631 | reiserfs_write_unlock(inode->i_sb); | 631 | reiserfs_write_unlock(inode->i_sb); |
632 | return -EIO; | 632 | return -EIO; |
633 | } | 633 | } |
634 | 634 | ||
635 | if (!file_capable(inode, block)) { | 635 | if (!file_capable(inode, block)) { |
636 | reiserfs_write_unlock(inode->i_sb); | 636 | reiserfs_write_unlock(inode->i_sb); |
637 | return -EFBIG; | 637 | return -EFBIG; |
638 | } | 638 | } |
639 | 639 | ||
640 | /* if !create, we aren't changing the FS, so we don't need to | 640 | /* if !create, we aren't changing the FS, so we don't need to |
641 | ** log anything, so we don't need to start a transaction | 641 | ** log anything, so we don't need to start a transaction |
642 | */ | 642 | */ |
643 | if (!(create & GET_BLOCK_CREATE)) { | 643 | if (!(create & GET_BLOCK_CREATE)) { |
644 | int ret; | 644 | int ret; |
645 | /* find number of block-th logical block of the file */ | 645 | /* find number of block-th logical block of the file */ |
646 | ret = _get_block_create_0(inode, block, bh_result, | 646 | ret = _get_block_create_0(inode, block, bh_result, |
647 | create | GET_BLOCK_READ_DIRECT); | 647 | create | GET_BLOCK_READ_DIRECT); |
648 | reiserfs_write_unlock(inode->i_sb); | 648 | reiserfs_write_unlock(inode->i_sb); |
649 | return ret; | 649 | return ret; |
650 | } | 650 | } |
651 | /* | 651 | /* |
652 | * if we're already in a transaction, make sure to close | 652 | * if we're already in a transaction, make sure to close |
653 | * any new transactions we start in this func | 653 | * any new transactions we start in this func |
654 | */ | 654 | */ |
655 | if ((create & GET_BLOCK_NO_DANGLE) || | 655 | if ((create & GET_BLOCK_NO_DANGLE) || |
656 | reiserfs_transaction_running(inode->i_sb)) | 656 | reiserfs_transaction_running(inode->i_sb)) |
657 | dangle = 0; | 657 | dangle = 0; |
658 | 658 | ||
659 | /* If file is of such a size, that it might have a tail and tails are enabled | 659 | /* If file is of such a size, that it might have a tail and tails are enabled |
660 | ** we should mark it as possibly needing tail packing on close | 660 | ** we should mark it as possibly needing tail packing on close |
661 | */ | 661 | */ |
662 | if ((have_large_tails(inode->i_sb) | 662 | if ((have_large_tails(inode->i_sb) |
663 | && inode->i_size < i_block_size(inode) * 4) | 663 | && inode->i_size < i_block_size(inode) * 4) |
664 | || (have_small_tails(inode->i_sb) | 664 | || (have_small_tails(inode->i_sb) |
665 | && inode->i_size < i_block_size(inode))) | 665 | && inode->i_size < i_block_size(inode))) |
666 | REISERFS_I(inode)->i_flags |= i_pack_on_close_mask; | 666 | REISERFS_I(inode)->i_flags |= i_pack_on_close_mask; |
667 | 667 | ||
668 | /* set the key of the first byte in the 'block'-th block of file */ | 668 | /* set the key of the first byte in the 'block'-th block of file */ |
669 | make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ ); | 669 | make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ ); |
670 | if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { | 670 | if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { |
671 | start_trans: | 671 | start_trans: |
672 | th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); | 672 | th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); |
673 | if (!th) { | 673 | if (!th) { |
674 | retval = -ENOMEM; | 674 | retval = -ENOMEM; |
675 | goto failure; | 675 | goto failure; |
676 | } | 676 | } |
677 | reiserfs_update_inode_transaction(inode); | 677 | reiserfs_update_inode_transaction(inode); |
678 | } | 678 | } |
679 | research: | 679 | research: |
680 | 680 | ||
681 | retval = search_for_position_by_key(inode->i_sb, &key, &path); | 681 | retval = search_for_position_by_key(inode->i_sb, &key, &path); |
682 | if (retval == IO_ERROR) { | 682 | if (retval == IO_ERROR) { |
683 | retval = -EIO; | 683 | retval = -EIO; |
684 | goto failure; | 684 | goto failure; |
685 | } | 685 | } |
686 | 686 | ||
687 | bh = get_last_bh(&path); | 687 | bh = get_last_bh(&path); |
688 | ih = get_ih(&path); | 688 | ih = get_ih(&path); |
689 | item = get_item(&path); | 689 | item = get_item(&path); |
690 | pos_in_item = path.pos_in_item; | 690 | pos_in_item = path.pos_in_item; |
691 | 691 | ||
692 | fs_gen = get_generation(inode->i_sb); | 692 | fs_gen = get_generation(inode->i_sb); |
693 | copy_item_head(&tmp_ih, ih); | 693 | copy_item_head(&tmp_ih, ih); |
694 | 694 | ||
695 | if (allocation_needed | 695 | if (allocation_needed |
696 | (retval, allocated_block_nr, ih, item, pos_in_item)) { | 696 | (retval, allocated_block_nr, ih, item, pos_in_item)) { |
697 | /* we have to allocate block for the unformatted node */ | 697 | /* we have to allocate block for the unformatted node */ |
698 | if (!th) { | 698 | if (!th) { |
699 | pathrelse(&path); | 699 | pathrelse(&path); |
700 | goto start_trans; | 700 | goto start_trans; |
701 | } | 701 | } |
702 | 702 | ||
703 | repeat = | 703 | repeat = |
704 | _allocate_block(th, block, inode, &allocated_block_nr, | 704 | _allocate_block(th, block, inode, &allocated_block_nr, |
705 | &path, create); | 705 | &path, create); |
706 | 706 | ||
707 | if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { | 707 | if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { |
708 | /* restart the transaction to give the journal a chance to free | 708 | /* restart the transaction to give the journal a chance to free |
709 | ** some blocks. releases the path, so we have to go back to | 709 | ** some blocks. releases the path, so we have to go back to |
710 | ** research if we succeed on the second try | 710 | ** research if we succeed on the second try |
711 | */ | 711 | */ |
712 | SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; | 712 | SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; |
713 | retval = restart_transaction(th, inode, &path); | 713 | retval = restart_transaction(th, inode, &path); |
714 | if (retval) | 714 | if (retval) |
715 | goto failure; | 715 | goto failure; |
716 | repeat = | 716 | repeat = |
717 | _allocate_block(th, block, inode, | 717 | _allocate_block(th, block, inode, |
718 | &allocated_block_nr, NULL, create); | 718 | &allocated_block_nr, NULL, create); |
719 | 719 | ||
720 | if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) { | 720 | if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) { |
721 | goto research; | 721 | goto research; |
722 | } | 722 | } |
723 | if (repeat == QUOTA_EXCEEDED) | 723 | if (repeat == QUOTA_EXCEEDED) |
724 | retval = -EDQUOT; | 724 | retval = -EDQUOT; |
725 | else | 725 | else |
726 | retval = -ENOSPC; | 726 | retval = -ENOSPC; |
727 | goto failure; | 727 | goto failure; |
728 | } | 728 | } |
729 | 729 | ||
730 | if (fs_changed(fs_gen, inode->i_sb) | 730 | if (fs_changed(fs_gen, inode->i_sb) |
731 | && item_moved(&tmp_ih, &path)) { | 731 | && item_moved(&tmp_ih, &path)) { |
732 | goto research; | 732 | goto research; |
733 | } | 733 | } |
734 | } | 734 | } |
735 | 735 | ||
736 | if (indirect_item_found(retval, ih)) { | 736 | if (indirect_item_found(retval, ih)) { |
737 | b_blocknr_t unfm_ptr; | 737 | b_blocknr_t unfm_ptr; |
738 | /* 'block'-th block is in the file already (there is | 738 | /* 'block'-th block is in the file already (there is |
739 | corresponding cell in some indirect item). But it may be | 739 | corresponding cell in some indirect item). But it may be |
740 | zero unformatted node pointer (hole) */ | 740 | zero unformatted node pointer (hole) */ |
741 | unfm_ptr = get_block_num(item, pos_in_item); | 741 | unfm_ptr = get_block_num(item, pos_in_item); |
742 | if (unfm_ptr == 0) { | 742 | if (unfm_ptr == 0) { |
743 | /* use allocated block to plug the hole */ | 743 | /* use allocated block to plug the hole */ |
744 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); | 744 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); |
745 | if (fs_changed(fs_gen, inode->i_sb) | 745 | if (fs_changed(fs_gen, inode->i_sb) |
746 | && item_moved(&tmp_ih, &path)) { | 746 | && item_moved(&tmp_ih, &path)) { |
747 | reiserfs_restore_prepared_buffer(inode->i_sb, | 747 | reiserfs_restore_prepared_buffer(inode->i_sb, |
748 | bh); | 748 | bh); |
749 | goto research; | 749 | goto research; |
750 | } | 750 | } |
751 | set_buffer_new(bh_result); | 751 | set_buffer_new(bh_result); |
752 | if (buffer_dirty(bh_result) | 752 | if (buffer_dirty(bh_result) |
753 | && reiserfs_data_ordered(inode->i_sb)) | 753 | && reiserfs_data_ordered(inode->i_sb)) |
754 | reiserfs_add_ordered_list(inode, bh_result); | 754 | reiserfs_add_ordered_list(inode, bh_result); |
755 | put_block_num(item, pos_in_item, allocated_block_nr); | 755 | put_block_num(item, pos_in_item, allocated_block_nr); |
756 | unfm_ptr = allocated_block_nr; | 756 | unfm_ptr = allocated_block_nr; |
757 | journal_mark_dirty(th, inode->i_sb, bh); | 757 | journal_mark_dirty(th, inode->i_sb, bh); |
758 | reiserfs_update_sd(th, inode); | 758 | reiserfs_update_sd(th, inode); |
759 | } | 759 | } |
760 | set_block_dev_mapped(bh_result, unfm_ptr, inode); | 760 | set_block_dev_mapped(bh_result, unfm_ptr, inode); |
761 | pathrelse(&path); | 761 | pathrelse(&path); |
762 | retval = 0; | 762 | retval = 0; |
763 | if (!dangle && th) | 763 | if (!dangle && th) |
764 | retval = reiserfs_end_persistent_transaction(th); | 764 | retval = reiserfs_end_persistent_transaction(th); |
765 | 765 | ||
766 | reiserfs_write_unlock(inode->i_sb); | 766 | reiserfs_write_unlock(inode->i_sb); |
767 | 767 | ||
768 | /* the item was found, so new blocks were not added to the file | 768 | /* the item was found, so new blocks were not added to the file |
769 | ** there is no need to make sure the inode is updated with this | 769 | ** there is no need to make sure the inode is updated with this |
770 | ** transaction | 770 | ** transaction |
771 | */ | 771 | */ |
772 | return retval; | 772 | return retval; |
773 | } | 773 | } |
774 | 774 | ||
775 | if (!th) { | 775 | if (!th) { |
776 | pathrelse(&path); | 776 | pathrelse(&path); |
777 | goto start_trans; | 777 | goto start_trans; |
778 | } | 778 | } |
779 | 779 | ||
780 | /* desired position is not found or is in the direct item. We have | 780 | /* desired position is not found or is in the direct item. We have |
781 | to append file with holes up to 'block'-th block converting | 781 | to append file with holes up to 'block'-th block converting |
782 | direct items to indirect one if necessary */ | 782 | direct items to indirect one if necessary */ |
783 | done = 0; | 783 | done = 0; |
784 | do { | 784 | do { |
785 | if (is_statdata_le_ih(ih)) { | 785 | if (is_statdata_le_ih(ih)) { |
786 | __le32 unp = 0; | 786 | __le32 unp = 0; |
787 | struct cpu_key tmp_key; | 787 | struct cpu_key tmp_key; |
788 | 788 | ||
789 | /* indirect item has to be inserted */ | 789 | /* indirect item has to be inserted */ |
790 | make_le_item_head(&tmp_ih, &key, version, 1, | 790 | make_le_item_head(&tmp_ih, &key, version, 1, |
791 | TYPE_INDIRECT, UNFM_P_SIZE, | 791 | TYPE_INDIRECT, UNFM_P_SIZE, |
792 | 0 /* free_space */ ); | 792 | 0 /* free_space */ ); |
793 | 793 | ||
794 | if (cpu_key_k_offset(&key) == 1) { | 794 | if (cpu_key_k_offset(&key) == 1) { |
795 | /* we are going to add 'block'-th block to the file. Use | 795 | /* we are going to add 'block'-th block to the file. Use |
796 | allocated block for that */ | 796 | allocated block for that */ |
797 | unp = cpu_to_le32(allocated_block_nr); | 797 | unp = cpu_to_le32(allocated_block_nr); |
798 | set_block_dev_mapped(bh_result, | 798 | set_block_dev_mapped(bh_result, |
799 | allocated_block_nr, inode); | 799 | allocated_block_nr, inode); |
800 | set_buffer_new(bh_result); | 800 | set_buffer_new(bh_result); |
801 | done = 1; | 801 | done = 1; |
802 | } | 802 | } |
803 | tmp_key = key; // ;) | 803 | tmp_key = key; // ;) |
804 | set_cpu_key_k_offset(&tmp_key, 1); | 804 | set_cpu_key_k_offset(&tmp_key, 1); |
805 | PATH_LAST_POSITION(&path)++; | 805 | PATH_LAST_POSITION(&path)++; |
806 | 806 | ||
807 | retval = | 807 | retval = |
808 | reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih, | 808 | reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih, |
809 | inode, (char *)&unp); | 809 | inode, (char *)&unp); |
810 | if (retval) { | 810 | if (retval) { |
811 | reiserfs_free_block(th, inode, | 811 | reiserfs_free_block(th, inode, |
812 | allocated_block_nr, 1); | 812 | allocated_block_nr, 1); |
813 | goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST | 813 | goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST |
814 | } | 814 | } |
815 | //mark_tail_converted (inode); | 815 | //mark_tail_converted (inode); |
816 | } else if (is_direct_le_ih(ih)) { | 816 | } else if (is_direct_le_ih(ih)) { |
817 | /* direct item has to be converted */ | 817 | /* direct item has to be converted */ |
818 | loff_t tail_offset; | 818 | loff_t tail_offset; |
819 | 819 | ||
820 | tail_offset = | 820 | tail_offset = |
821 | ((le_ih_k_offset(ih) - | 821 | ((le_ih_k_offset(ih) - |
822 | 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; | 822 | 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; |
823 | if (tail_offset == cpu_key_k_offset(&key)) { | 823 | if (tail_offset == cpu_key_k_offset(&key)) { |
824 | /* direct item we just found fits into block we have | 824 | /* direct item we just found fits into block we have |
825 | to map. Convert it into unformatted node: use | 825 | to map. Convert it into unformatted node: use |
826 | bh_result for the conversion */ | 826 | bh_result for the conversion */ |
827 | set_block_dev_mapped(bh_result, | 827 | set_block_dev_mapped(bh_result, |
828 | allocated_block_nr, inode); | 828 | allocated_block_nr, inode); |
829 | unbh = bh_result; | 829 | unbh = bh_result; |
830 | done = 1; | 830 | done = 1; |
831 | } else { | 831 | } else { |
832 | /* we have to padd file tail stored in direct item(s) | 832 | /* we have to padd file tail stored in direct item(s) |
833 | up to block size and convert it to unformatted | 833 | up to block size and convert it to unformatted |
834 | node. FIXME: this should also get into page cache */ | 834 | node. FIXME: this should also get into page cache */ |
835 | 835 | ||
836 | pathrelse(&path); | 836 | pathrelse(&path); |
837 | /* | 837 | /* |
838 | * ugly, but we can only end the transaction if | 838 | * ugly, but we can only end the transaction if |
839 | * we aren't nested | 839 | * we aren't nested |
840 | */ | 840 | */ |
841 | BUG_ON(!th->t_refcount); | 841 | BUG_ON(!th->t_refcount); |
842 | if (th->t_refcount == 1) { | 842 | if (th->t_refcount == 1) { |
843 | retval = | 843 | retval = |
844 | reiserfs_end_persistent_transaction | 844 | reiserfs_end_persistent_transaction |
845 | (th); | 845 | (th); |
846 | th = NULL; | 846 | th = NULL; |
847 | if (retval) | 847 | if (retval) |
848 | goto failure; | 848 | goto failure; |
849 | } | 849 | } |
850 | 850 | ||
851 | retval = | 851 | retval = |
852 | convert_tail_for_hole(inode, bh_result, | 852 | convert_tail_for_hole(inode, bh_result, |
853 | tail_offset); | 853 | tail_offset); |
854 | if (retval) { | 854 | if (retval) { |
855 | if (retval != -ENOSPC) | 855 | if (retval != -ENOSPC) |
856 | reiserfs_warning(inode->i_sb, | 856 | reiserfs_warning(inode->i_sb, |
857 | "clm-6004: convert tail failed inode %lu, error %d", | 857 | "clm-6004: convert tail failed inode %lu, error %d", |
858 | inode->i_ino, | 858 | inode->i_ino, |
859 | retval); | 859 | retval); |
860 | if (allocated_block_nr) { | 860 | if (allocated_block_nr) { |
861 | /* the bitmap, the super, and the stat data == 3 */ | 861 | /* the bitmap, the super, and the stat data == 3 */ |
862 | if (!th) | 862 | if (!th) |
863 | th = reiserfs_persistent_transaction(inode->i_sb, 3); | 863 | th = reiserfs_persistent_transaction(inode->i_sb, 3); |
864 | if (th) | 864 | if (th) |
865 | reiserfs_free_block(th, | 865 | reiserfs_free_block(th, |
866 | inode, | 866 | inode, |
867 | allocated_block_nr, | 867 | allocated_block_nr, |
868 | 1); | 868 | 1); |
869 | } | 869 | } |
870 | goto failure; | 870 | goto failure; |
871 | } | 871 | } |
872 | goto research; | 872 | goto research; |
873 | } | 873 | } |
874 | retval = | 874 | retval = |
875 | direct2indirect(th, inode, &path, unbh, | 875 | direct2indirect(th, inode, &path, unbh, |
876 | tail_offset); | 876 | tail_offset); |
877 | if (retval) { | 877 | if (retval) { |
878 | reiserfs_unmap_buffer(unbh); | 878 | reiserfs_unmap_buffer(unbh); |
879 | reiserfs_free_block(th, inode, | 879 | reiserfs_free_block(th, inode, |
880 | allocated_block_nr, 1); | 880 | allocated_block_nr, 1); |
881 | goto failure; | 881 | goto failure; |
882 | } | 882 | } |
883 | /* it is important the set_buffer_uptodate is done after | 883 | /* it is important the set_buffer_uptodate is done after |
884 | ** the direct2indirect. The buffer might contain valid | 884 | ** the direct2indirect. The buffer might contain valid |
885 | ** data newer than the data on disk (read by readpage, changed, | 885 | ** data newer than the data on disk (read by readpage, changed, |
886 | ** and then sent here by writepage). direct2indirect needs | 886 | ** and then sent here by writepage). direct2indirect needs |
887 | ** to know if unbh was already up to date, so it can decide | 887 | ** to know if unbh was already up to date, so it can decide |
888 | ** if the data in unbh needs to be replaced with data from | 888 | ** if the data in unbh needs to be replaced with data from |
889 | ** the disk | 889 | ** the disk |
890 | */ | 890 | */ |
891 | set_buffer_uptodate(unbh); | 891 | set_buffer_uptodate(unbh); |
892 | 892 | ||
893 | /* unbh->b_page == NULL in case of DIRECT_IO request, this means | 893 | /* unbh->b_page == NULL in case of DIRECT_IO request, this means |
894 | buffer will disappear shortly, so it should not be added to | 894 | buffer will disappear shortly, so it should not be added to |
895 | */ | 895 | */ |
896 | if (unbh->b_page) { | 896 | if (unbh->b_page) { |
897 | /* we've converted the tail, so we must | 897 | /* we've converted the tail, so we must |
898 | ** flush unbh before the transaction commits | 898 | ** flush unbh before the transaction commits |
899 | */ | 899 | */ |
900 | reiserfs_add_tail_list(inode, unbh); | 900 | reiserfs_add_tail_list(inode, unbh); |
901 | 901 | ||
902 | /* mark it dirty now to prevent commit_write from adding | 902 | /* mark it dirty now to prevent commit_write from adding |
903 | ** this buffer to the inode's dirty buffer list | 903 | ** this buffer to the inode's dirty buffer list |
904 | */ | 904 | */ |
905 | /* | 905 | /* |
906 | * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). | 906 | * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). |
907 | * It's still atomic, but it sets the page dirty too, | 907 | * It's still atomic, but it sets the page dirty too, |
908 | * which makes it eligible for writeback at any time by the | 908 | * which makes it eligible for writeback at any time by the |
909 | * VM (which was also the case with __mark_buffer_dirty()) | 909 | * VM (which was also the case with __mark_buffer_dirty()) |
910 | */ | 910 | */ |
911 | mark_buffer_dirty(unbh); | 911 | mark_buffer_dirty(unbh); |
912 | } | 912 | } |
913 | } else { | 913 | } else { |
914 | /* append indirect item with holes if needed, when appending | 914 | /* append indirect item with holes if needed, when appending |
915 | pointer to 'block'-th block use block, which is already | 915 | pointer to 'block'-th block use block, which is already |
916 | allocated */ | 916 | allocated */ |
917 | struct cpu_key tmp_key; | 917 | struct cpu_key tmp_key; |
918 | unp_t unf_single = 0; // We use this in case we need to allocate only | 918 | unp_t unf_single = 0; // We use this in case we need to allocate only |
919 | // one block which is a fastpath | 919 | // one block which is a fastpath |
920 | unp_t *un; | 920 | unp_t *un; |
921 | __u64 max_to_insert = | 921 | __u64 max_to_insert = |
922 | MAX_ITEM_LEN(inode->i_sb->s_blocksize) / | 922 | MAX_ITEM_LEN(inode->i_sb->s_blocksize) / |
923 | UNFM_P_SIZE; | 923 | UNFM_P_SIZE; |
924 | __u64 blocks_needed; | 924 | __u64 blocks_needed; |
925 | 925 | ||
926 | RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, | 926 | RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, |
927 | "vs-804: invalid position for append"); | 927 | "vs-804: invalid position for append"); |
928 | /* indirect item has to be appended, set up key of that position */ | 928 | /* indirect item has to be appended, set up key of that position */ |
929 | make_cpu_key(&tmp_key, inode, | 929 | make_cpu_key(&tmp_key, inode, |
930 | le_key_k_offset(version, | 930 | le_key_k_offset(version, |
931 | &(ih->ih_key)) + | 931 | &(ih->ih_key)) + |
932 | op_bytes_number(ih, | 932 | op_bytes_number(ih, |
933 | inode->i_sb->s_blocksize), | 933 | inode->i_sb->s_blocksize), |
934 | //pos_in_item * inode->i_sb->s_blocksize, | 934 | //pos_in_item * inode->i_sb->s_blocksize, |
935 | TYPE_INDIRECT, 3); // key type is unimportant | 935 | TYPE_INDIRECT, 3); // key type is unimportant |
936 | 936 | ||
937 | blocks_needed = | 937 | blocks_needed = |
938 | 1 + | 938 | 1 + |
939 | ((cpu_key_k_offset(&key) - | 939 | ((cpu_key_k_offset(&key) - |
940 | cpu_key_k_offset(&tmp_key)) >> inode->i_sb-> | 940 | cpu_key_k_offset(&tmp_key)) >> inode->i_sb-> |
941 | s_blocksize_bits); | 941 | s_blocksize_bits); |
942 | RFALSE(blocks_needed < 0, "green-805: invalid offset"); | 942 | RFALSE(blocks_needed < 0, "green-805: invalid offset"); |
943 | 943 | ||
944 | if (blocks_needed == 1) { | 944 | if (blocks_needed == 1) { |
945 | un = &unf_single; | 945 | un = &unf_single; |
946 | } else { | 946 | } else { |
947 | un = kmalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling. | 947 | un = kmalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling. |
948 | if (!un) { | 948 | if (!un) { |
949 | un = &unf_single; | 949 | un = &unf_single; |
950 | blocks_needed = 1; | 950 | blocks_needed = 1; |
951 | max_to_insert = 0; | 951 | max_to_insert = 0; |
952 | } else | 952 | } else |
953 | memset(un, 0, | 953 | memset(un, 0, |
954 | UNFM_P_SIZE * min(blocks_needed, | 954 | UNFM_P_SIZE * min(blocks_needed, |
955 | max_to_insert)); | 955 | max_to_insert)); |
956 | } | 956 | } |
957 | if (blocks_needed <= max_to_insert) { | 957 | if (blocks_needed <= max_to_insert) { |
958 | /* we are going to add target block to the file. Use allocated | 958 | /* we are going to add target block to the file. Use allocated |
959 | block for that */ | 959 | block for that */ |
960 | un[blocks_needed - 1] = | 960 | un[blocks_needed - 1] = |
961 | cpu_to_le32(allocated_block_nr); | 961 | cpu_to_le32(allocated_block_nr); |
962 | set_block_dev_mapped(bh_result, | 962 | set_block_dev_mapped(bh_result, |
963 | allocated_block_nr, inode); | 963 | allocated_block_nr, inode); |
964 | set_buffer_new(bh_result); | 964 | set_buffer_new(bh_result); |
965 | done = 1; | 965 | done = 1; |
966 | } else { | 966 | } else { |
967 | /* paste hole to the indirect item */ | 967 | /* paste hole to the indirect item */ |
968 | /* If kmalloc failed, max_to_insert becomes zero and it means we | 968 | /* If kmalloc failed, max_to_insert becomes zero and it means we |
969 | only have space for one block */ | 969 | only have space for one block */ |
970 | blocks_needed = | 970 | blocks_needed = |
971 | max_to_insert ? max_to_insert : 1; | 971 | max_to_insert ? max_to_insert : 1; |
972 | } | 972 | } |
973 | retval = | 973 | retval = |
974 | reiserfs_paste_into_item(th, &path, &tmp_key, inode, | 974 | reiserfs_paste_into_item(th, &path, &tmp_key, inode, |
975 | (char *)un, | 975 | (char *)un, |
976 | UNFM_P_SIZE * | 976 | UNFM_P_SIZE * |
977 | blocks_needed); | 977 | blocks_needed); |
978 | 978 | ||
979 | if (blocks_needed != 1) | 979 | if (blocks_needed != 1) |
980 | kfree(un); | 980 | kfree(un); |
981 | 981 | ||
982 | if (retval) { | 982 | if (retval) { |
983 | reiserfs_free_block(th, inode, | 983 | reiserfs_free_block(th, inode, |
984 | allocated_block_nr, 1); | 984 | allocated_block_nr, 1); |
985 | goto failure; | 985 | goto failure; |
986 | } | 986 | } |
987 | if (!done) { | 987 | if (!done) { |
988 | /* We need to mark new file size in case this function will be | 988 | /* We need to mark new file size in case this function will be |
989 | interrupted/aborted later on. And we may do this only for | 989 | interrupted/aborted later on. And we may do this only for |
990 | holes. */ | 990 | holes. */ |
991 | inode->i_size += | 991 | inode->i_size += |
992 | inode->i_sb->s_blocksize * blocks_needed; | 992 | inode->i_sb->s_blocksize * blocks_needed; |
993 | } | 993 | } |
994 | } | 994 | } |
995 | 995 | ||
996 | if (done == 1) | 996 | if (done == 1) |
997 | break; | 997 | break; |
998 | 998 | ||
999 | /* this loop could log more blocks than we had originally asked | 999 | /* this loop could log more blocks than we had originally asked |
1000 | ** for. So, we have to allow the transaction to end if it is | 1000 | ** for. So, we have to allow the transaction to end if it is |
1001 | ** too big or too full. Update the inode so things are | 1001 | ** too big or too full. Update the inode so things are |
1002 | ** consistent if we crash before the function returns | 1002 | ** consistent if we crash before the function returns |
1003 | ** | 1003 | ** |
1004 | ** release the path so that anybody waiting on the path before | 1004 | ** release the path so that anybody waiting on the path before |
1005 | ** ending their transaction will be able to continue. | 1005 | ** ending their transaction will be able to continue. |
1006 | */ | 1006 | */ |
1007 | if (journal_transaction_should_end(th, th->t_blocks_allocated)) { | 1007 | if (journal_transaction_should_end(th, th->t_blocks_allocated)) { |
1008 | retval = restart_transaction(th, inode, &path); | 1008 | retval = restart_transaction(th, inode, &path); |
1009 | if (retval) | 1009 | if (retval) |
1010 | goto failure; | 1010 | goto failure; |
1011 | } | 1011 | } |
1012 | /* inserting indirect pointers for a hole can take a | 1012 | /* inserting indirect pointers for a hole can take a |
1013 | ** long time. reschedule if needed | 1013 | ** long time. reschedule if needed |
1014 | */ | 1014 | */ |
1015 | cond_resched(); | 1015 | cond_resched(); |
1016 | 1016 | ||
1017 | retval = search_for_position_by_key(inode->i_sb, &key, &path); | 1017 | retval = search_for_position_by_key(inode->i_sb, &key, &path); |
1018 | if (retval == IO_ERROR) { | 1018 | if (retval == IO_ERROR) { |
1019 | retval = -EIO; | 1019 | retval = -EIO; |
1020 | goto failure; | 1020 | goto failure; |
1021 | } | 1021 | } |
1022 | if (retval == POSITION_FOUND) { | 1022 | if (retval == POSITION_FOUND) { |
1023 | reiserfs_warning(inode->i_sb, | 1023 | reiserfs_warning(inode->i_sb, |
1024 | "vs-825: reiserfs_get_block: " | 1024 | "vs-825: reiserfs_get_block: " |
1025 | "%K should not be found", &key); | 1025 | "%K should not be found", &key); |
1026 | retval = -EEXIST; | 1026 | retval = -EEXIST; |
1027 | if (allocated_block_nr) | 1027 | if (allocated_block_nr) |
1028 | reiserfs_free_block(th, inode, | 1028 | reiserfs_free_block(th, inode, |
1029 | allocated_block_nr, 1); | 1029 | allocated_block_nr, 1); |
1030 | pathrelse(&path); | 1030 | pathrelse(&path); |
1031 | goto failure; | 1031 | goto failure; |
1032 | } | 1032 | } |
1033 | bh = get_last_bh(&path); | 1033 | bh = get_last_bh(&path); |
1034 | ih = get_ih(&path); | 1034 | ih = get_ih(&path); |
1035 | item = get_item(&path); | 1035 | item = get_item(&path); |
1036 | pos_in_item = path.pos_in_item; | 1036 | pos_in_item = path.pos_in_item; |
1037 | } while (1); | 1037 | } while (1); |
1038 | 1038 | ||
1039 | retval = 0; | 1039 | retval = 0; |
1040 | 1040 | ||
1041 | failure: | 1041 | failure: |
1042 | if (th && (!dangle || (retval && !th->t_trans_id))) { | 1042 | if (th && (!dangle || (retval && !th->t_trans_id))) { |
1043 | int err; | 1043 | int err; |
1044 | if (th->t_trans_id) | 1044 | if (th->t_trans_id) |
1045 | reiserfs_update_sd(th, inode); | 1045 | reiserfs_update_sd(th, inode); |
1046 | err = reiserfs_end_persistent_transaction(th); | 1046 | err = reiserfs_end_persistent_transaction(th); |
1047 | if (err) | 1047 | if (err) |
1048 | retval = err; | 1048 | retval = err; |
1049 | } | 1049 | } |
1050 | 1050 | ||
1051 | reiserfs_write_unlock(inode->i_sb); | 1051 | reiserfs_write_unlock(inode->i_sb); |
1052 | reiserfs_check_path(&path); | 1052 | reiserfs_check_path(&path); |
1053 | return retval; | 1053 | return retval; |
1054 | } | 1054 | } |
1055 | 1055 | ||
1056 | static int | 1056 | static int |
1057 | reiserfs_readpages(struct file *file, struct address_space *mapping, | 1057 | reiserfs_readpages(struct file *file, struct address_space *mapping, |
1058 | struct list_head *pages, unsigned nr_pages) | 1058 | struct list_head *pages, unsigned nr_pages) |
1059 | { | 1059 | { |
1060 | return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); | 1060 | return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); |
1061 | } | 1061 | } |
1062 | 1062 | ||
1063 | /* Compute real number of used bytes by file | 1063 | /* Compute real number of used bytes by file |
1064 | * Following three functions can go away when we'll have enough space in stat item | 1064 | * Following three functions can go away when we'll have enough space in stat item |
1065 | */ | 1065 | */ |
1066 | static int real_space_diff(struct inode *inode, int sd_size) | 1066 | static int real_space_diff(struct inode *inode, int sd_size) |
1067 | { | 1067 | { |
1068 | int bytes; | 1068 | int bytes; |
1069 | loff_t blocksize = inode->i_sb->s_blocksize; | 1069 | loff_t blocksize = inode->i_sb->s_blocksize; |
1070 | 1070 | ||
1071 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) | 1071 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) |
1072 | return sd_size; | 1072 | return sd_size; |
1073 | 1073 | ||
1074 | /* End of file is also in full block with indirect reference, so round | 1074 | /* End of file is also in full block with indirect reference, so round |
1075 | ** up to the next block. | 1075 | ** up to the next block. |
1076 | ** | 1076 | ** |
1077 | ** there is just no way to know if the tail is actually packed | 1077 | ** there is just no way to know if the tail is actually packed |
1078 | ** on the file, so we have to assume it isn't. When we pack the | 1078 | ** on the file, so we have to assume it isn't. When we pack the |
1079 | ** tail, we add 4 bytes to pretend there really is an unformatted | 1079 | ** tail, we add 4 bytes to pretend there really is an unformatted |
1080 | ** node pointer | 1080 | ** node pointer |
1081 | */ | 1081 | */ |
1082 | bytes = | 1082 | bytes = |
1083 | ((inode->i_size + | 1083 | ((inode->i_size + |
1084 | (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + | 1084 | (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + |
1085 | sd_size; | 1085 | sd_size; |
1086 | return bytes; | 1086 | return bytes; |
1087 | } | 1087 | } |
1088 | 1088 | ||
1089 | static inline loff_t to_real_used_space(struct inode *inode, ulong blocks, | 1089 | static inline loff_t to_real_used_space(struct inode *inode, ulong blocks, |
1090 | int sd_size) | 1090 | int sd_size) |
1091 | { | 1091 | { |
1092 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { | 1092 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { |
1093 | return inode->i_size + | 1093 | return inode->i_size + |
1094 | (loff_t) (real_space_diff(inode, sd_size)); | 1094 | (loff_t) (real_space_diff(inode, sd_size)); |
1095 | } | 1095 | } |
1096 | return ((loff_t) real_space_diff(inode, sd_size)) + | 1096 | return ((loff_t) real_space_diff(inode, sd_size)) + |
1097 | (((loff_t) blocks) << 9); | 1097 | (((loff_t) blocks) << 9); |
1098 | } | 1098 | } |
1099 | 1099 | ||
1100 | /* Compute number of blocks used by file in ReiserFS counting */ | 1100 | /* Compute number of blocks used by file in ReiserFS counting */ |
1101 | static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) | 1101 | static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) |
1102 | { | 1102 | { |
1103 | loff_t bytes = inode_get_bytes(inode); | 1103 | loff_t bytes = inode_get_bytes(inode); |
1104 | loff_t real_space = real_space_diff(inode, sd_size); | 1104 | loff_t real_space = real_space_diff(inode, sd_size); |
1105 | 1105 | ||
1106 | /* keeps fsck and non-quota versions of reiserfs happy */ | 1106 | /* keeps fsck and non-quota versions of reiserfs happy */ |
1107 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { | 1107 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { |
1108 | bytes += (loff_t) 511; | 1108 | bytes += (loff_t) 511; |
1109 | } | 1109 | } |
1110 | 1110 | ||
1111 | /* files from before the quota patch might i_blocks such that | 1111 | /* files from before the quota patch might i_blocks such that |
1112 | ** bytes < real_space. Deal with that here to prevent it from | 1112 | ** bytes < real_space. Deal with that here to prevent it from |
1113 | ** going negative. | 1113 | ** going negative. |
1114 | */ | 1114 | */ |
1115 | if (bytes < real_space) | 1115 | if (bytes < real_space) |
1116 | return 0; | 1116 | return 0; |
1117 | return (bytes - real_space) >> 9; | 1117 | return (bytes - real_space) >> 9; |
1118 | } | 1118 | } |
1119 | 1119 | ||
1120 | // | 1120 | // |
1121 | // BAD: new directories have stat data of new type and all other items | 1121 | // BAD: new directories have stat data of new type and all other items |
1122 | // of old type. Version stored in the inode says about body items, so | 1122 | // of old type. Version stored in the inode says about body items, so |
1123 | // in update_stat_data we can not rely on inode, but have to check | 1123 | // in update_stat_data we can not rely on inode, but have to check |
1124 | // item version directly | 1124 | // item version directly |
1125 | // | 1125 | // |
1126 | 1126 | ||
1127 | // called by read_locked_inode | 1127 | // called by read_locked_inode |
1128 | static void init_inode(struct inode *inode, struct path *path) | 1128 | static void init_inode(struct inode *inode, struct path *path) |
1129 | { | 1129 | { |
1130 | struct buffer_head *bh; | 1130 | struct buffer_head *bh; |
1131 | struct item_head *ih; | 1131 | struct item_head *ih; |
1132 | __u32 rdev; | 1132 | __u32 rdev; |
1133 | //int version = ITEM_VERSION_1; | 1133 | //int version = ITEM_VERSION_1; |
1134 | 1134 | ||
1135 | bh = PATH_PLAST_BUFFER(path); | 1135 | bh = PATH_PLAST_BUFFER(path); |
1136 | ih = PATH_PITEM_HEAD(path); | 1136 | ih = PATH_PITEM_HEAD(path); |
1137 | 1137 | ||
1138 | copy_key(INODE_PKEY(inode), &(ih->ih_key)); | 1138 | copy_key(INODE_PKEY(inode), &(ih->ih_key)); |
1139 | inode->i_blksize = reiserfs_default_io_size; | 1139 | inode->i_blksize = reiserfs_default_io_size; |
1140 | 1140 | ||
1141 | INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); | 1141 | INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); |
1142 | REISERFS_I(inode)->i_flags = 0; | 1142 | REISERFS_I(inode)->i_flags = 0; |
1143 | REISERFS_I(inode)->i_prealloc_block = 0; | 1143 | REISERFS_I(inode)->i_prealloc_block = 0; |
1144 | REISERFS_I(inode)->i_prealloc_count = 0; | 1144 | REISERFS_I(inode)->i_prealloc_count = 0; |
1145 | REISERFS_I(inode)->i_trans_id = 0; | 1145 | REISERFS_I(inode)->i_trans_id = 0; |
1146 | REISERFS_I(inode)->i_jl = NULL; | 1146 | REISERFS_I(inode)->i_jl = NULL; |
1147 | REISERFS_I(inode)->i_acl_access = NULL; | 1147 | REISERFS_I(inode)->i_acl_access = NULL; |
1148 | REISERFS_I(inode)->i_acl_default = NULL; | 1148 | REISERFS_I(inode)->i_acl_default = NULL; |
1149 | init_rwsem(&REISERFS_I(inode)->xattr_sem); | 1149 | init_rwsem(&REISERFS_I(inode)->xattr_sem); |
1150 | 1150 | ||
1151 | if (stat_data_v1(ih)) { | 1151 | if (stat_data_v1(ih)) { |
1152 | struct stat_data_v1 *sd = | 1152 | struct stat_data_v1 *sd = |
1153 | (struct stat_data_v1 *)B_I_PITEM(bh, ih); | 1153 | (struct stat_data_v1 *)B_I_PITEM(bh, ih); |
1154 | unsigned long blocks; | 1154 | unsigned long blocks; |
1155 | 1155 | ||
1156 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); | 1156 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); |
1157 | set_inode_sd_version(inode, STAT_DATA_V1); | 1157 | set_inode_sd_version(inode, STAT_DATA_V1); |
1158 | inode->i_mode = sd_v1_mode(sd); | 1158 | inode->i_mode = sd_v1_mode(sd); |
1159 | inode->i_nlink = sd_v1_nlink(sd); | 1159 | inode->i_nlink = sd_v1_nlink(sd); |
1160 | inode->i_uid = sd_v1_uid(sd); | 1160 | inode->i_uid = sd_v1_uid(sd); |
1161 | inode->i_gid = sd_v1_gid(sd); | 1161 | inode->i_gid = sd_v1_gid(sd); |
1162 | inode->i_size = sd_v1_size(sd); | 1162 | inode->i_size = sd_v1_size(sd); |
1163 | inode->i_atime.tv_sec = sd_v1_atime(sd); | 1163 | inode->i_atime.tv_sec = sd_v1_atime(sd); |
1164 | inode->i_mtime.tv_sec = sd_v1_mtime(sd); | 1164 | inode->i_mtime.tv_sec = sd_v1_mtime(sd); |
1165 | inode->i_ctime.tv_sec = sd_v1_ctime(sd); | 1165 | inode->i_ctime.tv_sec = sd_v1_ctime(sd); |
1166 | inode->i_atime.tv_nsec = 0; | 1166 | inode->i_atime.tv_nsec = 0; |
1167 | inode->i_ctime.tv_nsec = 0; | 1167 | inode->i_ctime.tv_nsec = 0; |
1168 | inode->i_mtime.tv_nsec = 0; | 1168 | inode->i_mtime.tv_nsec = 0; |
1169 | 1169 | ||
1170 | inode->i_blocks = sd_v1_blocks(sd); | 1170 | inode->i_blocks = sd_v1_blocks(sd); |
1171 | inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); | 1171 | inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); |
1172 | blocks = (inode->i_size + 511) >> 9; | 1172 | blocks = (inode->i_size + 511) >> 9; |
1173 | blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); | 1173 | blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); |
1174 | if (inode->i_blocks > blocks) { | 1174 | if (inode->i_blocks > blocks) { |
1175 | // there was a bug in <=3.5.23 when i_blocks could take negative | 1175 | // there was a bug in <=3.5.23 when i_blocks could take negative |
1176 | // values. Starting from 3.5.17 this value could even be stored in | 1176 | // values. Starting from 3.5.17 this value could even be stored in |
1177 | // stat data. For such files we set i_blocks based on file | 1177 | // stat data. For such files we set i_blocks based on file |
1178 | // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be | 1178 | // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be |
1179 | // only updated if file's inode will ever change | 1179 | // only updated if file's inode will ever change |
1180 | inode->i_blocks = blocks; | 1180 | inode->i_blocks = blocks; |
1181 | } | 1181 | } |
1182 | 1182 | ||
1183 | rdev = sd_v1_rdev(sd); | 1183 | rdev = sd_v1_rdev(sd); |
1184 | REISERFS_I(inode)->i_first_direct_byte = | 1184 | REISERFS_I(inode)->i_first_direct_byte = |
1185 | sd_v1_first_direct_byte(sd); | 1185 | sd_v1_first_direct_byte(sd); |
1186 | /* an early bug in the quota code can give us an odd number for the | 1186 | /* an early bug in the quota code can give us an odd number for the |
1187 | ** block count. This is incorrect, fix it here. | 1187 | ** block count. This is incorrect, fix it here. |
1188 | */ | 1188 | */ |
1189 | if (inode->i_blocks & 1) { | 1189 | if (inode->i_blocks & 1) { |
1190 | inode->i_blocks++; | 1190 | inode->i_blocks++; |
1191 | } | 1191 | } |
1192 | inode_set_bytes(inode, | 1192 | inode_set_bytes(inode, |
1193 | to_real_used_space(inode, inode->i_blocks, | 1193 | to_real_used_space(inode, inode->i_blocks, |
1194 | SD_V1_SIZE)); | 1194 | SD_V1_SIZE)); |
1195 | /* nopack is initially zero for v1 objects. For v2 objects, | 1195 | /* nopack is initially zero for v1 objects. For v2 objects, |
1196 | nopack is initialised from sd_attrs */ | 1196 | nopack is initialised from sd_attrs */ |
1197 | REISERFS_I(inode)->i_flags &= ~i_nopack_mask; | 1197 | REISERFS_I(inode)->i_flags &= ~i_nopack_mask; |
1198 | } else { | 1198 | } else { |
1199 | // new stat data found, but object may have old items | 1199 | // new stat data found, but object may have old items |
1200 | // (directories and symlinks) | 1200 | // (directories and symlinks) |
1201 | struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); | 1201 | struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); |
1202 | 1202 | ||
1203 | inode->i_mode = sd_v2_mode(sd); | 1203 | inode->i_mode = sd_v2_mode(sd); |
1204 | inode->i_nlink = sd_v2_nlink(sd); | 1204 | inode->i_nlink = sd_v2_nlink(sd); |
1205 | inode->i_uid = sd_v2_uid(sd); | 1205 | inode->i_uid = sd_v2_uid(sd); |
1206 | inode->i_size = sd_v2_size(sd); | 1206 | inode->i_size = sd_v2_size(sd); |
1207 | inode->i_gid = sd_v2_gid(sd); | 1207 | inode->i_gid = sd_v2_gid(sd); |
1208 | inode->i_mtime.tv_sec = sd_v2_mtime(sd); | 1208 | inode->i_mtime.tv_sec = sd_v2_mtime(sd); |
1209 | inode->i_atime.tv_sec = sd_v2_atime(sd); | 1209 | inode->i_atime.tv_sec = sd_v2_atime(sd); |
1210 | inode->i_ctime.tv_sec = sd_v2_ctime(sd); | 1210 | inode->i_ctime.tv_sec = sd_v2_ctime(sd); |
1211 | inode->i_ctime.tv_nsec = 0; | 1211 | inode->i_ctime.tv_nsec = 0; |
1212 | inode->i_mtime.tv_nsec = 0; | 1212 | inode->i_mtime.tv_nsec = 0; |
1213 | inode->i_atime.tv_nsec = 0; | 1213 | inode->i_atime.tv_nsec = 0; |
1214 | inode->i_blocks = sd_v2_blocks(sd); | 1214 | inode->i_blocks = sd_v2_blocks(sd); |
1215 | rdev = sd_v2_rdev(sd); | 1215 | rdev = sd_v2_rdev(sd); |
1216 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | 1216 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) |
1217 | inode->i_generation = | 1217 | inode->i_generation = |
1218 | le32_to_cpu(INODE_PKEY(inode)->k_dir_id); | 1218 | le32_to_cpu(INODE_PKEY(inode)->k_dir_id); |
1219 | else | 1219 | else |
1220 | inode->i_generation = sd_v2_generation(sd); | 1220 | inode->i_generation = sd_v2_generation(sd); |
1221 | 1221 | ||
1222 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 1222 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
1223 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); | 1223 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); |
1224 | else | 1224 | else |
1225 | set_inode_item_key_version(inode, KEY_FORMAT_3_6); | 1225 | set_inode_item_key_version(inode, KEY_FORMAT_3_6); |
1226 | REISERFS_I(inode)->i_first_direct_byte = 0; | 1226 | REISERFS_I(inode)->i_first_direct_byte = 0; |
1227 | set_inode_sd_version(inode, STAT_DATA_V2); | 1227 | set_inode_sd_version(inode, STAT_DATA_V2); |
1228 | inode_set_bytes(inode, | 1228 | inode_set_bytes(inode, |
1229 | to_real_used_space(inode, inode->i_blocks, | 1229 | to_real_used_space(inode, inode->i_blocks, |
1230 | SD_V2_SIZE)); | 1230 | SD_V2_SIZE)); |
1231 | /* read persistent inode attributes from sd and initalise | 1231 | /* read persistent inode attributes from sd and initalise |
1232 | generic inode flags from them */ | 1232 | generic inode flags from them */ |
1233 | REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); | 1233 | REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); |
1234 | sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); | 1234 | sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); |
1235 | } | 1235 | } |
1236 | 1236 | ||
1237 | pathrelse(path); | 1237 | pathrelse(path); |
1238 | if (S_ISREG(inode->i_mode)) { | 1238 | if (S_ISREG(inode->i_mode)) { |
1239 | inode->i_op = &reiserfs_file_inode_operations; | 1239 | inode->i_op = &reiserfs_file_inode_operations; |
1240 | inode->i_fop = &reiserfs_file_operations; | 1240 | inode->i_fop = &reiserfs_file_operations; |
1241 | inode->i_mapping->a_ops = &reiserfs_address_space_operations; | 1241 | inode->i_mapping->a_ops = &reiserfs_address_space_operations; |
1242 | } else if (S_ISDIR(inode->i_mode)) { | 1242 | } else if (S_ISDIR(inode->i_mode)) { |
1243 | inode->i_op = &reiserfs_dir_inode_operations; | 1243 | inode->i_op = &reiserfs_dir_inode_operations; |
1244 | inode->i_fop = &reiserfs_dir_operations; | 1244 | inode->i_fop = &reiserfs_dir_operations; |
1245 | } else if (S_ISLNK(inode->i_mode)) { | 1245 | } else if (S_ISLNK(inode->i_mode)) { |
1246 | inode->i_op = &reiserfs_symlink_inode_operations; | 1246 | inode->i_op = &reiserfs_symlink_inode_operations; |
1247 | inode->i_mapping->a_ops = &reiserfs_address_space_operations; | 1247 | inode->i_mapping->a_ops = &reiserfs_address_space_operations; |
1248 | } else { | 1248 | } else { |
1249 | inode->i_blocks = 0; | 1249 | inode->i_blocks = 0; |
1250 | inode->i_op = &reiserfs_special_inode_operations; | 1250 | inode->i_op = &reiserfs_special_inode_operations; |
1251 | init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); | 1251 | init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); |
1252 | } | 1252 | } |
1253 | } | 1253 | } |
1254 | 1254 | ||
1255 | // update new stat data with inode fields | 1255 | // update new stat data with inode fields |
1256 | static void inode2sd(void *sd, struct inode *inode, loff_t size) | 1256 | static void inode2sd(void *sd, struct inode *inode, loff_t size) |
1257 | { | 1257 | { |
1258 | struct stat_data *sd_v2 = (struct stat_data *)sd; | 1258 | struct stat_data *sd_v2 = (struct stat_data *)sd; |
1259 | __u16 flags; | 1259 | __u16 flags; |
1260 | 1260 | ||
1261 | set_sd_v2_mode(sd_v2, inode->i_mode); | 1261 | set_sd_v2_mode(sd_v2, inode->i_mode); |
1262 | set_sd_v2_nlink(sd_v2, inode->i_nlink); | 1262 | set_sd_v2_nlink(sd_v2, inode->i_nlink); |
1263 | set_sd_v2_uid(sd_v2, inode->i_uid); | 1263 | set_sd_v2_uid(sd_v2, inode->i_uid); |
1264 | set_sd_v2_size(sd_v2, size); | 1264 | set_sd_v2_size(sd_v2, size); |
1265 | set_sd_v2_gid(sd_v2, inode->i_gid); | 1265 | set_sd_v2_gid(sd_v2, inode->i_gid); |
1266 | set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); | 1266 | set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); |
1267 | set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); | 1267 | set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); |
1268 | set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); | 1268 | set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); |
1269 | set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); | 1269 | set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); |
1270 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | 1270 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) |
1271 | set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); | 1271 | set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); |
1272 | else | 1272 | else |
1273 | set_sd_v2_generation(sd_v2, inode->i_generation); | 1273 | set_sd_v2_generation(sd_v2, inode->i_generation); |
1274 | flags = REISERFS_I(inode)->i_attrs; | 1274 | flags = REISERFS_I(inode)->i_attrs; |
1275 | i_attrs_to_sd_attrs(inode, &flags); | 1275 | i_attrs_to_sd_attrs(inode, &flags); |
1276 | set_sd_v2_attrs(sd_v2, flags); | 1276 | set_sd_v2_attrs(sd_v2, flags); |
1277 | } | 1277 | } |
1278 | 1278 | ||
1279 | // used to copy inode's fields to old stat data | 1279 | // used to copy inode's fields to old stat data |
1280 | static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) | 1280 | static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) |
1281 | { | 1281 | { |
1282 | struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; | 1282 | struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; |
1283 | 1283 | ||
1284 | set_sd_v1_mode(sd_v1, inode->i_mode); | 1284 | set_sd_v1_mode(sd_v1, inode->i_mode); |
1285 | set_sd_v1_uid(sd_v1, inode->i_uid); | 1285 | set_sd_v1_uid(sd_v1, inode->i_uid); |
1286 | set_sd_v1_gid(sd_v1, inode->i_gid); | 1286 | set_sd_v1_gid(sd_v1, inode->i_gid); |
1287 | set_sd_v1_nlink(sd_v1, inode->i_nlink); | 1287 | set_sd_v1_nlink(sd_v1, inode->i_nlink); |
1288 | set_sd_v1_size(sd_v1, size); | 1288 | set_sd_v1_size(sd_v1, size); |
1289 | set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec); | 1289 | set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec); |
1290 | set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec); | 1290 | set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec); |
1291 | set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec); | 1291 | set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec); |
1292 | 1292 | ||
1293 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | 1293 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) |
1294 | set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev)); | 1294 | set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev)); |
1295 | else | 1295 | else |
1296 | set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); | 1296 | set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); |
1297 | 1297 | ||
1298 | // Sigh. i_first_direct_byte is back | 1298 | // Sigh. i_first_direct_byte is back |
1299 | set_sd_v1_first_direct_byte(sd_v1, | 1299 | set_sd_v1_first_direct_byte(sd_v1, |
1300 | REISERFS_I(inode)->i_first_direct_byte); | 1300 | REISERFS_I(inode)->i_first_direct_byte); |
1301 | } | 1301 | } |
1302 | 1302 | ||
1303 | /* NOTE, you must prepare the buffer head before sending it here, | 1303 | /* NOTE, you must prepare the buffer head before sending it here, |
1304 | ** and then log it after the call | 1304 | ** and then log it after the call |
1305 | */ | 1305 | */ |
1306 | static void update_stat_data(struct path *path, struct inode *inode, | 1306 | static void update_stat_data(struct path *path, struct inode *inode, |
1307 | loff_t size) | 1307 | loff_t size) |
1308 | { | 1308 | { |
1309 | struct buffer_head *bh; | 1309 | struct buffer_head *bh; |
1310 | struct item_head *ih; | 1310 | struct item_head *ih; |
1311 | 1311 | ||
1312 | bh = PATH_PLAST_BUFFER(path); | 1312 | bh = PATH_PLAST_BUFFER(path); |
1313 | ih = PATH_PITEM_HEAD(path); | 1313 | ih = PATH_PITEM_HEAD(path); |
1314 | 1314 | ||
1315 | if (!is_statdata_le_ih(ih)) | 1315 | if (!is_statdata_le_ih(ih)) |
1316 | reiserfs_panic(inode->i_sb, | 1316 | reiserfs_panic(inode->i_sb, |
1317 | "vs-13065: update_stat_data: key %k, found item %h", | 1317 | "vs-13065: update_stat_data: key %k, found item %h", |
1318 | INODE_PKEY(inode), ih); | 1318 | INODE_PKEY(inode), ih); |
1319 | 1319 | ||
1320 | if (stat_data_v1(ih)) { | 1320 | if (stat_data_v1(ih)) { |
1321 | // path points to old stat data | 1321 | // path points to old stat data |
1322 | inode2sd_v1(B_I_PITEM(bh, ih), inode, size); | 1322 | inode2sd_v1(B_I_PITEM(bh, ih), inode, size); |
1323 | } else { | 1323 | } else { |
1324 | inode2sd(B_I_PITEM(bh, ih), inode, size); | 1324 | inode2sd(B_I_PITEM(bh, ih), inode, size); |
1325 | } | 1325 | } |
1326 | 1326 | ||
1327 | return; | 1327 | return; |
1328 | } | 1328 | } |
1329 | 1329 | ||
1330 | void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, | 1330 | void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, |
1331 | struct inode *inode, loff_t size) | 1331 | struct inode *inode, loff_t size) |
1332 | { | 1332 | { |
1333 | struct cpu_key key; | 1333 | struct cpu_key key; |
1334 | INITIALIZE_PATH(path); | 1334 | INITIALIZE_PATH(path); |
1335 | struct buffer_head *bh; | 1335 | struct buffer_head *bh; |
1336 | int fs_gen; | 1336 | int fs_gen; |
1337 | struct item_head *ih, tmp_ih; | 1337 | struct item_head *ih, tmp_ih; |
1338 | int retval; | 1338 | int retval; |
1339 | 1339 | ||
1340 | BUG_ON(!th->t_trans_id); | 1340 | BUG_ON(!th->t_trans_id); |
1341 | 1341 | ||
1342 | make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant | 1342 | make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant |
1343 | 1343 | ||
1344 | for (;;) { | 1344 | for (;;) { |
1345 | int pos; | 1345 | int pos; |
1346 | /* look for the object's stat data */ | 1346 | /* look for the object's stat data */ |
1347 | retval = search_item(inode->i_sb, &key, &path); | 1347 | retval = search_item(inode->i_sb, &key, &path); |
1348 | if (retval == IO_ERROR) { | 1348 | if (retval == IO_ERROR) { |
1349 | reiserfs_warning(inode->i_sb, | 1349 | reiserfs_warning(inode->i_sb, |
1350 | "vs-13050: reiserfs_update_sd: " | 1350 | "vs-13050: reiserfs_update_sd: " |
1351 | "i/o failure occurred trying to update %K stat data", | 1351 | "i/o failure occurred trying to update %K stat data", |
1352 | &key); | 1352 | &key); |
1353 | return; | 1353 | return; |
1354 | } | 1354 | } |
1355 | if (retval == ITEM_NOT_FOUND) { | 1355 | if (retval == ITEM_NOT_FOUND) { |
1356 | pos = PATH_LAST_POSITION(&path); | 1356 | pos = PATH_LAST_POSITION(&path); |
1357 | pathrelse(&path); | 1357 | pathrelse(&path); |
1358 | if (inode->i_nlink == 0) { | 1358 | if (inode->i_nlink == 0) { |
1359 | /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */ | 1359 | /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */ |
1360 | return; | 1360 | return; |
1361 | } | 1361 | } |
1362 | reiserfs_warning(inode->i_sb, | 1362 | reiserfs_warning(inode->i_sb, |
1363 | "vs-13060: reiserfs_update_sd: " | 1363 | "vs-13060: reiserfs_update_sd: " |
1364 | "stat data of object %k (nlink == %d) not found (pos %d)", | 1364 | "stat data of object %k (nlink == %d) not found (pos %d)", |
1365 | INODE_PKEY(inode), inode->i_nlink, | 1365 | INODE_PKEY(inode), inode->i_nlink, |
1366 | pos); | 1366 | pos); |
1367 | reiserfs_check_path(&path); | 1367 | reiserfs_check_path(&path); |
1368 | return; | 1368 | return; |
1369 | } | 1369 | } |
1370 | 1370 | ||
1371 | /* sigh, prepare_for_journal might schedule. When it schedules the | 1371 | /* sigh, prepare_for_journal might schedule. When it schedules the |
1372 | ** FS might change. We have to detect that, and loop back to the | 1372 | ** FS might change. We have to detect that, and loop back to the |
1373 | ** search if the stat data item has moved | 1373 | ** search if the stat data item has moved |
1374 | */ | 1374 | */ |
1375 | bh = get_last_bh(&path); | 1375 | bh = get_last_bh(&path); |
1376 | ih = get_ih(&path); | 1376 | ih = get_ih(&path); |
1377 | copy_item_head(&tmp_ih, ih); | 1377 | copy_item_head(&tmp_ih, ih); |
1378 | fs_gen = get_generation(inode->i_sb); | 1378 | fs_gen = get_generation(inode->i_sb); |
1379 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); | 1379 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); |
1380 | if (fs_changed(fs_gen, inode->i_sb) | 1380 | if (fs_changed(fs_gen, inode->i_sb) |
1381 | && item_moved(&tmp_ih, &path)) { | 1381 | && item_moved(&tmp_ih, &path)) { |
1382 | reiserfs_restore_prepared_buffer(inode->i_sb, bh); | 1382 | reiserfs_restore_prepared_buffer(inode->i_sb, bh); |
1383 | continue; /* Stat_data item has been moved after scheduling. */ | 1383 | continue; /* Stat_data item has been moved after scheduling. */ |
1384 | } | 1384 | } |
1385 | break; | 1385 | break; |
1386 | } | 1386 | } |
1387 | update_stat_data(&path, inode, size); | 1387 | update_stat_data(&path, inode, size); |
1388 | journal_mark_dirty(th, th->t_super, bh); | 1388 | journal_mark_dirty(th, th->t_super, bh); |
1389 | pathrelse(&path); | 1389 | pathrelse(&path); |
1390 | return; | 1390 | return; |
1391 | } | 1391 | } |
1392 | 1392 | ||
1393 | /* reiserfs_read_locked_inode is called to read the inode off disk, and it | 1393 | /* reiserfs_read_locked_inode is called to read the inode off disk, and it |
1394 | ** does a make_bad_inode when things go wrong. But, we need to make sure | 1394 | ** does a make_bad_inode when things go wrong. But, we need to make sure |
1395 | ** and clear the key in the private portion of the inode, otherwise a | 1395 | ** and clear the key in the private portion of the inode, otherwise a |
1396 | ** corresponding iput might try to delete whatever object the inode last | 1396 | ** corresponding iput might try to delete whatever object the inode last |
1397 | ** represented. | 1397 | ** represented. |
1398 | */ | 1398 | */ |
1399 | static void reiserfs_make_bad_inode(struct inode *inode) | 1399 | static void reiserfs_make_bad_inode(struct inode *inode) |
1400 | { | 1400 | { |
1401 | memset(INODE_PKEY(inode), 0, KEY_SIZE); | 1401 | memset(INODE_PKEY(inode), 0, KEY_SIZE); |
1402 | make_bad_inode(inode); | 1402 | make_bad_inode(inode); |
1403 | } | 1403 | } |
1404 | 1404 | ||
1405 | // | 1405 | // |
1406 | // initially this function was derived from minix or ext2's analog and | 1406 | // initially this function was derived from minix or ext2's analog and |
1407 | // evolved as the prototype did | 1407 | // evolved as the prototype did |
1408 | // | 1408 | // |
1409 | 1409 | ||
1410 | int reiserfs_init_locked_inode(struct inode *inode, void *p) | 1410 | int reiserfs_init_locked_inode(struct inode *inode, void *p) |
1411 | { | 1411 | { |
1412 | struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; | 1412 | struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; |
1413 | inode->i_ino = args->objectid; | 1413 | inode->i_ino = args->objectid; |
1414 | INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); | 1414 | INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); |
1415 | return 0; | 1415 | return 0; |
1416 | } | 1416 | } |
1417 | 1417 | ||
1418 | /* looks for stat data in the tree, and fills up the fields of in-core | 1418 | /* looks for stat data in the tree, and fills up the fields of in-core |
1419 | inode stat data fields */ | 1419 | inode stat data fields */ |
1420 | void reiserfs_read_locked_inode(struct inode *inode, | 1420 | void reiserfs_read_locked_inode(struct inode *inode, |
1421 | struct reiserfs_iget_args *args) | 1421 | struct reiserfs_iget_args *args) |
1422 | { | 1422 | { |
1423 | INITIALIZE_PATH(path_to_sd); | 1423 | INITIALIZE_PATH(path_to_sd); |
1424 | struct cpu_key key; | 1424 | struct cpu_key key; |
1425 | unsigned long dirino; | 1425 | unsigned long dirino; |
1426 | int retval; | 1426 | int retval; |
1427 | 1427 | ||
1428 | dirino = args->dirid; | 1428 | dirino = args->dirid; |
1429 | 1429 | ||
1430 | /* set version 1, version 2 could be used too, because stat data | 1430 | /* set version 1, version 2 could be used too, because stat data |
1431 | key is the same in both versions */ | 1431 | key is the same in both versions */ |
1432 | key.version = KEY_FORMAT_3_5; | 1432 | key.version = KEY_FORMAT_3_5; |
1433 | key.on_disk_key.k_dir_id = dirino; | 1433 | key.on_disk_key.k_dir_id = dirino; |
1434 | key.on_disk_key.k_objectid = inode->i_ino; | 1434 | key.on_disk_key.k_objectid = inode->i_ino; |
1435 | key.on_disk_key.k_offset = 0; | 1435 | key.on_disk_key.k_offset = 0; |
1436 | key.on_disk_key.k_type = 0; | 1436 | key.on_disk_key.k_type = 0; |
1437 | 1437 | ||
1438 | /* look for the object's stat data */ | 1438 | /* look for the object's stat data */ |
1439 | retval = search_item(inode->i_sb, &key, &path_to_sd); | 1439 | retval = search_item(inode->i_sb, &key, &path_to_sd); |
1440 | if (retval == IO_ERROR) { | 1440 | if (retval == IO_ERROR) { |
1441 | reiserfs_warning(inode->i_sb, | 1441 | reiserfs_warning(inode->i_sb, |
1442 | "vs-13070: reiserfs_read_locked_inode: " | 1442 | "vs-13070: reiserfs_read_locked_inode: " |
1443 | "i/o failure occurred trying to find stat data of %K", | 1443 | "i/o failure occurred trying to find stat data of %K", |
1444 | &key); | 1444 | &key); |
1445 | reiserfs_make_bad_inode(inode); | 1445 | reiserfs_make_bad_inode(inode); |
1446 | return; | 1446 | return; |
1447 | } | 1447 | } |
1448 | if (retval != ITEM_FOUND) { | 1448 | if (retval != ITEM_FOUND) { |
1449 | /* a stale NFS handle can trigger this without it being an error */ | 1449 | /* a stale NFS handle can trigger this without it being an error */ |
1450 | pathrelse(&path_to_sd); | 1450 | pathrelse(&path_to_sd); |
1451 | reiserfs_make_bad_inode(inode); | 1451 | reiserfs_make_bad_inode(inode); |
1452 | inode->i_nlink = 0; | 1452 | inode->i_nlink = 0; |
1453 | return; | 1453 | return; |
1454 | } | 1454 | } |
1455 | 1455 | ||
1456 | init_inode(inode, &path_to_sd); | 1456 | init_inode(inode, &path_to_sd); |
1457 | 1457 | ||
1458 | /* It is possible that knfsd is trying to access inode of a file | 1458 | /* It is possible that knfsd is trying to access inode of a file |
1459 | that is being removed from the disk by some other thread. As we | 1459 | that is being removed from the disk by some other thread. As we |
1460 | update sd on unlink all that is required is to check for nlink | 1460 | update sd on unlink all that is required is to check for nlink |
1461 | here. This bug was first found by Sizif when debugging | 1461 | here. This bug was first found by Sizif when debugging |
1462 | SquidNG/Butterfly, forgotten, and found again after Philippe | 1462 | SquidNG/Butterfly, forgotten, and found again after Philippe |
1463 | Gramoulle <philippe.gramoulle@mmania.com> reproduced it. | 1463 | Gramoulle <philippe.gramoulle@mmania.com> reproduced it. |
1464 | 1464 | ||
1465 | More logical fix would require changes in fs/inode.c:iput() to | 1465 | More logical fix would require changes in fs/inode.c:iput() to |
1466 | remove inode from hash-table _after_ fs cleaned disk stuff up and | 1466 | remove inode from hash-table _after_ fs cleaned disk stuff up and |
1467 | in iget() to return NULL if I_FREEING inode is found in | 1467 | in iget() to return NULL if I_FREEING inode is found in |
1468 | hash-table. */ | 1468 | hash-table. */ |
1469 | /* Currently there is one place where it's ok to meet inode with | 1469 | /* Currently there is one place where it's ok to meet inode with |
1470 | nlink==0: processing of open-unlinked and half-truncated files | 1470 | nlink==0: processing of open-unlinked and half-truncated files |
1471 | during mount (fs/reiserfs/super.c:finish_unfinished()). */ | 1471 | during mount (fs/reiserfs/super.c:finish_unfinished()). */ |
1472 | if ((inode->i_nlink == 0) && | 1472 | if ((inode->i_nlink == 0) && |
1473 | !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { | 1473 | !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { |
1474 | reiserfs_warning(inode->i_sb, | 1474 | reiserfs_warning(inode->i_sb, |
1475 | "vs-13075: reiserfs_read_locked_inode: " | 1475 | "vs-13075: reiserfs_read_locked_inode: " |
1476 | "dead inode read from disk %K. " | 1476 | "dead inode read from disk %K. " |
1477 | "This is likely to be race with knfsd. Ignore", | 1477 | "This is likely to be race with knfsd. Ignore", |
1478 | &key); | 1478 | &key); |
1479 | reiserfs_make_bad_inode(inode); | 1479 | reiserfs_make_bad_inode(inode); |
1480 | } | 1480 | } |
1481 | 1481 | ||
1482 | reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ | 1482 | reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ |
1483 | 1483 | ||
1484 | } | 1484 | } |
1485 | 1485 | ||
1486 | /** | 1486 | /** |
1487 | * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). | 1487 | * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). |
1488 | * | 1488 | * |
1489 | * @inode: inode from hash table to check | 1489 | * @inode: inode from hash table to check |
1490 | * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args. | 1490 | * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args. |
1491 | * | 1491 | * |
1492 | * This function is called by iget5_locked() to distinguish reiserfs inodes | 1492 | * This function is called by iget5_locked() to distinguish reiserfs inodes |
1493 | * having the same inode numbers. Such inodes can only exist due to some | 1493 | * having the same inode numbers. Such inodes can only exist due to some |
1494 | * error condition. One of them should be bad. Inodes with identical | 1494 | * error condition. One of them should be bad. Inodes with identical |
1495 | * inode numbers (objectids) are distinguished by parent directory ids. | 1495 | * inode numbers (objectids) are distinguished by parent directory ids. |
1496 | * | 1496 | * |
1497 | */ | 1497 | */ |
1498 | int reiserfs_find_actor(struct inode *inode, void *opaque) | 1498 | int reiserfs_find_actor(struct inode *inode, void *opaque) |
1499 | { | 1499 | { |
1500 | struct reiserfs_iget_args *args; | 1500 | struct reiserfs_iget_args *args; |
1501 | 1501 | ||
1502 | args = opaque; | 1502 | args = opaque; |
1503 | /* args is already in CPU order */ | 1503 | /* args is already in CPU order */ |
1504 | return (inode->i_ino == args->objectid) && | 1504 | return (inode->i_ino == args->objectid) && |
1505 | (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); | 1505 | (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); |
1506 | } | 1506 | } |
1507 | 1507 | ||
1508 | struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) | 1508 | struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) |
1509 | { | 1509 | { |
1510 | struct inode *inode; | 1510 | struct inode *inode; |
1511 | struct reiserfs_iget_args args; | 1511 | struct reiserfs_iget_args args; |
1512 | 1512 | ||
1513 | args.objectid = key->on_disk_key.k_objectid; | 1513 | args.objectid = key->on_disk_key.k_objectid; |
1514 | args.dirid = key->on_disk_key.k_dir_id; | 1514 | args.dirid = key->on_disk_key.k_dir_id; |
1515 | inode = iget5_locked(s, key->on_disk_key.k_objectid, | 1515 | inode = iget5_locked(s, key->on_disk_key.k_objectid, |
1516 | reiserfs_find_actor, reiserfs_init_locked_inode, | 1516 | reiserfs_find_actor, reiserfs_init_locked_inode, |
1517 | (void *)(&args)); | 1517 | (void *)(&args)); |
1518 | if (!inode) | 1518 | if (!inode) |
1519 | return ERR_PTR(-ENOMEM); | 1519 | return ERR_PTR(-ENOMEM); |
1520 | 1520 | ||
1521 | if (inode->i_state & I_NEW) { | 1521 | if (inode->i_state & I_NEW) { |
1522 | reiserfs_read_locked_inode(inode, &args); | 1522 | reiserfs_read_locked_inode(inode, &args); |
1523 | unlock_new_inode(inode); | 1523 | unlock_new_inode(inode); |
1524 | } | 1524 | } |
1525 | 1525 | ||
1526 | if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) { | 1526 | if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) { |
1527 | /* either due to i/o error or a stale NFS handle */ | 1527 | /* either due to i/o error or a stale NFS handle */ |
1528 | iput(inode); | 1528 | iput(inode); |
1529 | inode = NULL; | 1529 | inode = NULL; |
1530 | } | 1530 | } |
1531 | return inode; | 1531 | return inode; |
1532 | } | 1532 | } |
1533 | 1533 | ||
1534 | struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) | 1534 | struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) |
1535 | { | 1535 | { |
1536 | __u32 *data = vobjp; | 1536 | __u32 *data = vobjp; |
1537 | struct cpu_key key; | 1537 | struct cpu_key key; |
1538 | struct dentry *result; | 1538 | struct dentry *result; |
1539 | struct inode *inode; | 1539 | struct inode *inode; |
1540 | 1540 | ||
1541 | key.on_disk_key.k_objectid = data[0]; | 1541 | key.on_disk_key.k_objectid = data[0]; |
1542 | key.on_disk_key.k_dir_id = data[1]; | 1542 | key.on_disk_key.k_dir_id = data[1]; |
1543 | reiserfs_write_lock(sb); | 1543 | reiserfs_write_lock(sb); |
1544 | inode = reiserfs_iget(sb, &key); | 1544 | inode = reiserfs_iget(sb, &key); |
1545 | if (inode && !IS_ERR(inode) && data[2] != 0 && | 1545 | if (inode && !IS_ERR(inode) && data[2] != 0 && |
1546 | data[2] != inode->i_generation) { | 1546 | data[2] != inode->i_generation) { |
1547 | iput(inode); | 1547 | iput(inode); |
1548 | inode = NULL; | 1548 | inode = NULL; |
1549 | } | 1549 | } |
1550 | reiserfs_write_unlock(sb); | 1550 | reiserfs_write_unlock(sb); |
1551 | if (!inode) | 1551 | if (!inode) |
1552 | inode = ERR_PTR(-ESTALE); | 1552 | inode = ERR_PTR(-ESTALE); |
1553 | if (IS_ERR(inode)) | 1553 | if (IS_ERR(inode)) |
1554 | return ERR_PTR(PTR_ERR(inode)); | 1554 | return ERR_PTR(PTR_ERR(inode)); |
1555 | result = d_alloc_anon(inode); | 1555 | result = d_alloc_anon(inode); |
1556 | if (!result) { | 1556 | if (!result) { |
1557 | iput(inode); | 1557 | iput(inode); |
1558 | return ERR_PTR(-ENOMEM); | 1558 | return ERR_PTR(-ENOMEM); |
1559 | } | 1559 | } |
1560 | return result; | 1560 | return result; |
1561 | } | 1561 | } |
1562 | 1562 | ||
1563 | struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, | 1563 | struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, |
1564 | int len, int fhtype, | 1564 | int len, int fhtype, |
1565 | int (*acceptable) (void *contect, | 1565 | int (*acceptable) (void *contect, |
1566 | struct dentry * de), | 1566 | struct dentry * de), |
1567 | void *context) | 1567 | void *context) |
1568 | { | 1568 | { |
1569 | __u32 obj[3], parent[3]; | 1569 | __u32 obj[3], parent[3]; |
1570 | 1570 | ||
1571 | /* fhtype happens to reflect the number of u32s encoded. | 1571 | /* fhtype happens to reflect the number of u32s encoded. |
1572 | * due to a bug in earlier code, fhtype might indicate there | 1572 | * due to a bug in earlier code, fhtype might indicate there |
1573 | * are more u32s then actually fitted. | 1573 | * are more u32s then actually fitted. |
1574 | * so if fhtype seems to be more than len, reduce fhtype. | 1574 | * so if fhtype seems to be more than len, reduce fhtype. |
1575 | * Valid types are: | 1575 | * Valid types are: |
1576 | * 2 - objectid + dir_id - legacy support | 1576 | * 2 - objectid + dir_id - legacy support |
1577 | * 3 - objectid + dir_id + generation | 1577 | * 3 - objectid + dir_id + generation |
1578 | * 4 - objectid + dir_id + objectid and dirid of parent - legacy | 1578 | * 4 - objectid + dir_id + objectid and dirid of parent - legacy |
1579 | * 5 - objectid + dir_id + generation + objectid and dirid of parent | 1579 | * 5 - objectid + dir_id + generation + objectid and dirid of parent |
1580 | * 6 - as above plus generation of directory | 1580 | * 6 - as above plus generation of directory |
1581 | * 6 does not fit in NFSv2 handles | 1581 | * 6 does not fit in NFSv2 handles |
1582 | */ | 1582 | */ |
1583 | if (fhtype > len) { | 1583 | if (fhtype > len) { |
1584 | if (fhtype != 6 || len != 5) | 1584 | if (fhtype != 6 || len != 5) |
1585 | reiserfs_warning(sb, | 1585 | reiserfs_warning(sb, |
1586 | "nfsd/reiserfs, fhtype=%d, len=%d - odd", | 1586 | "nfsd/reiserfs, fhtype=%d, len=%d - odd", |
1587 | fhtype, len); | 1587 | fhtype, len); |
1588 | fhtype = 5; | 1588 | fhtype = 5; |
1589 | } | 1589 | } |
1590 | 1590 | ||
1591 | obj[0] = data[0]; | 1591 | obj[0] = data[0]; |
1592 | obj[1] = data[1]; | 1592 | obj[1] = data[1]; |
1593 | if (fhtype == 3 || fhtype >= 5) | 1593 | if (fhtype == 3 || fhtype >= 5) |
1594 | obj[2] = data[2]; | 1594 | obj[2] = data[2]; |
1595 | else | 1595 | else |
1596 | obj[2] = 0; /* generation number */ | 1596 | obj[2] = 0; /* generation number */ |
1597 | 1597 | ||
1598 | if (fhtype >= 4) { | 1598 | if (fhtype >= 4) { |
1599 | parent[0] = data[fhtype >= 5 ? 3 : 2]; | 1599 | parent[0] = data[fhtype >= 5 ? 3 : 2]; |
1600 | parent[1] = data[fhtype >= 5 ? 4 : 3]; | 1600 | parent[1] = data[fhtype >= 5 ? 4 : 3]; |
1601 | if (fhtype == 6) | 1601 | if (fhtype == 6) |
1602 | parent[2] = data[5]; | 1602 | parent[2] = data[5]; |
1603 | else | 1603 | else |
1604 | parent[2] = 0; | 1604 | parent[2] = 0; |
1605 | } | 1605 | } |
1606 | return sb->s_export_op->find_exported_dentry(sb, obj, | 1606 | return sb->s_export_op->find_exported_dentry(sb, obj, |
1607 | fhtype < 4 ? NULL : parent, | 1607 | fhtype < 4 ? NULL : parent, |
1608 | acceptable, context); | 1608 | acceptable, context); |
1609 | } | 1609 | } |
1610 | 1610 | ||
1611 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, | 1611 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, |
1612 | int need_parent) | 1612 | int need_parent) |
1613 | { | 1613 | { |
1614 | struct inode *inode = dentry->d_inode; | 1614 | struct inode *inode = dentry->d_inode; |
1615 | int maxlen = *lenp; | 1615 | int maxlen = *lenp; |
1616 | 1616 | ||
1617 | if (maxlen < 3) | 1617 | if (maxlen < 3) |
1618 | return 255; | 1618 | return 255; |
1619 | 1619 | ||
1620 | data[0] = inode->i_ino; | 1620 | data[0] = inode->i_ino; |
1621 | data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); | 1621 | data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); |
1622 | data[2] = inode->i_generation; | 1622 | data[2] = inode->i_generation; |
1623 | *lenp = 3; | 1623 | *lenp = 3; |
1624 | /* no room for directory info? return what we've stored so far */ | 1624 | /* no room for directory info? return what we've stored so far */ |
1625 | if (maxlen < 5 || !need_parent) | 1625 | if (maxlen < 5 || !need_parent) |
1626 | return 3; | 1626 | return 3; |
1627 | 1627 | ||
1628 | spin_lock(&dentry->d_lock); | 1628 | spin_lock(&dentry->d_lock); |
1629 | inode = dentry->d_parent->d_inode; | 1629 | inode = dentry->d_parent->d_inode; |
1630 | data[3] = inode->i_ino; | 1630 | data[3] = inode->i_ino; |
1631 | data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); | 1631 | data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); |
1632 | *lenp = 5; | 1632 | *lenp = 5; |
1633 | if (maxlen >= 6) { | 1633 | if (maxlen >= 6) { |
1634 | data[5] = inode->i_generation; | 1634 | data[5] = inode->i_generation; |
1635 | *lenp = 6; | 1635 | *lenp = 6; |
1636 | } | 1636 | } |
1637 | spin_unlock(&dentry->d_lock); | 1637 | spin_unlock(&dentry->d_lock); |
1638 | return *lenp; | 1638 | return *lenp; |
1639 | } | 1639 | } |
1640 | 1640 | ||
1641 | /* looks for stat data, then copies fields to it, marks the buffer | 1641 | /* looks for stat data, then copies fields to it, marks the buffer |
1642 | containing stat data as dirty */ | 1642 | containing stat data as dirty */ |
1643 | /* reiserfs inodes are never really dirty, since the dirty inode call | 1643 | /* reiserfs inodes are never really dirty, since the dirty inode call |
1644 | ** always logs them. This call allows the VFS inode marking routines | 1644 | ** always logs them. This call allows the VFS inode marking routines |
1645 | ** to properly mark inodes for datasync and such, but only actually | 1645 | ** to properly mark inodes for datasync and such, but only actually |
1646 | ** does something when called for a synchronous update. | 1646 | ** does something when called for a synchronous update. |
1647 | */ | 1647 | */ |
1648 | int reiserfs_write_inode(struct inode *inode, int do_sync) | 1648 | int reiserfs_write_inode(struct inode *inode, int do_sync) |
1649 | { | 1649 | { |
1650 | struct reiserfs_transaction_handle th; | 1650 | struct reiserfs_transaction_handle th; |
1651 | int jbegin_count = 1; | 1651 | int jbegin_count = 1; |
1652 | 1652 | ||
1653 | if (inode->i_sb->s_flags & MS_RDONLY) | 1653 | if (inode->i_sb->s_flags & MS_RDONLY) |
1654 | return -EROFS; | 1654 | return -EROFS; |
1655 | /* memory pressure can sometimes initiate write_inode calls with sync == 1, | 1655 | /* memory pressure can sometimes initiate write_inode calls with sync == 1, |
1656 | ** these cases are just when the system needs ram, not when the | 1656 | ** these cases are just when the system needs ram, not when the |
1657 | ** inode needs to reach disk for safety, and they can safely be | 1657 | ** inode needs to reach disk for safety, and they can safely be |
1658 | ** ignored because the altered inode has already been logged. | 1658 | ** ignored because the altered inode has already been logged. |
1659 | */ | 1659 | */ |
1660 | if (do_sync && !(current->flags & PF_MEMALLOC)) { | 1660 | if (do_sync && !(current->flags & PF_MEMALLOC)) { |
1661 | reiserfs_write_lock(inode->i_sb); | 1661 | reiserfs_write_lock(inode->i_sb); |
1662 | if (!journal_begin(&th, inode->i_sb, jbegin_count)) { | 1662 | if (!journal_begin(&th, inode->i_sb, jbegin_count)) { |
1663 | reiserfs_update_sd(&th, inode); | 1663 | reiserfs_update_sd(&th, inode); |
1664 | journal_end_sync(&th, inode->i_sb, jbegin_count); | 1664 | journal_end_sync(&th, inode->i_sb, jbegin_count); |
1665 | } | 1665 | } |
1666 | reiserfs_write_unlock(inode->i_sb); | 1666 | reiserfs_write_unlock(inode->i_sb); |
1667 | } | 1667 | } |
1668 | return 0; | 1668 | return 0; |
1669 | } | 1669 | } |
1670 | 1670 | ||
1671 | /* stat data of new object is inserted already, this inserts the item | 1671 | /* stat data of new object is inserted already, this inserts the item |
1672 | containing "." and ".." entries */ | 1672 | containing "." and ".." entries */ |
1673 | static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, | 1673 | static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, |
1674 | struct inode *inode, | 1674 | struct inode *inode, |
1675 | struct item_head *ih, struct path *path, | 1675 | struct item_head *ih, struct path *path, |
1676 | struct inode *dir) | 1676 | struct inode *dir) |
1677 | { | 1677 | { |
1678 | struct super_block *sb = th->t_super; | 1678 | struct super_block *sb = th->t_super; |
1679 | char empty_dir[EMPTY_DIR_SIZE]; | 1679 | char empty_dir[EMPTY_DIR_SIZE]; |
1680 | char *body = empty_dir; | 1680 | char *body = empty_dir; |
1681 | struct cpu_key key; | 1681 | struct cpu_key key; |
1682 | int retval; | 1682 | int retval; |
1683 | 1683 | ||
1684 | BUG_ON(!th->t_trans_id); | 1684 | BUG_ON(!th->t_trans_id); |
1685 | 1685 | ||
1686 | _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id), | 1686 | _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id), |
1687 | le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, | 1687 | le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, |
1688 | TYPE_DIRENTRY, 3 /*key length */ ); | 1688 | TYPE_DIRENTRY, 3 /*key length */ ); |
1689 | 1689 | ||
1690 | /* compose item head for new item. Directories consist of items of | 1690 | /* compose item head for new item. Directories consist of items of |
1691 | old type (ITEM_VERSION_1). Do not set key (second arg is 0), it | 1691 | old type (ITEM_VERSION_1). Do not set key (second arg is 0), it |
1692 | is done by reiserfs_new_inode */ | 1692 | is done by reiserfs_new_inode */ |
1693 | if (old_format_only(sb)) { | 1693 | if (old_format_only(sb)) { |
1694 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, | 1694 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, |
1695 | TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); | 1695 | TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); |
1696 | 1696 | ||
1697 | make_empty_dir_item_v1(body, ih->ih_key.k_dir_id, | 1697 | make_empty_dir_item_v1(body, ih->ih_key.k_dir_id, |
1698 | ih->ih_key.k_objectid, | 1698 | ih->ih_key.k_objectid, |
1699 | INODE_PKEY(dir)->k_dir_id, | 1699 | INODE_PKEY(dir)->k_dir_id, |
1700 | INODE_PKEY(dir)->k_objectid); | 1700 | INODE_PKEY(dir)->k_objectid); |
1701 | } else { | 1701 | } else { |
1702 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, | 1702 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, |
1703 | TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2); | 1703 | TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2); |
1704 | 1704 | ||
1705 | make_empty_dir_item(body, ih->ih_key.k_dir_id, | 1705 | make_empty_dir_item(body, ih->ih_key.k_dir_id, |
1706 | ih->ih_key.k_objectid, | 1706 | ih->ih_key.k_objectid, |
1707 | INODE_PKEY(dir)->k_dir_id, | 1707 | INODE_PKEY(dir)->k_dir_id, |
1708 | INODE_PKEY(dir)->k_objectid); | 1708 | INODE_PKEY(dir)->k_objectid); |
1709 | } | 1709 | } |
1710 | 1710 | ||
1711 | /* look for place in the tree for new item */ | 1711 | /* look for place in the tree for new item */ |
1712 | retval = search_item(sb, &key, path); | 1712 | retval = search_item(sb, &key, path); |
1713 | if (retval == IO_ERROR) { | 1713 | if (retval == IO_ERROR) { |
1714 | reiserfs_warning(sb, "vs-13080: reiserfs_new_directory: " | 1714 | reiserfs_warning(sb, "vs-13080: reiserfs_new_directory: " |
1715 | "i/o failure occurred creating new directory"); | 1715 | "i/o failure occurred creating new directory"); |
1716 | return -EIO; | 1716 | return -EIO; |
1717 | } | 1717 | } |
1718 | if (retval == ITEM_FOUND) { | 1718 | if (retval == ITEM_FOUND) { |
1719 | pathrelse(path); | 1719 | pathrelse(path); |
1720 | reiserfs_warning(sb, "vs-13070: reiserfs_new_directory: " | 1720 | reiserfs_warning(sb, "vs-13070: reiserfs_new_directory: " |
1721 | "object with this key exists (%k)", | 1721 | "object with this key exists (%k)", |
1722 | &(ih->ih_key)); | 1722 | &(ih->ih_key)); |
1723 | return -EEXIST; | 1723 | return -EEXIST; |
1724 | } | 1724 | } |
1725 | 1725 | ||
1726 | /* insert item, that is empty directory item */ | 1726 | /* insert item, that is empty directory item */ |
1727 | return reiserfs_insert_item(th, path, &key, ih, inode, body); | 1727 | return reiserfs_insert_item(th, path, &key, ih, inode, body); |
1728 | } | 1728 | } |
1729 | 1729 | ||
1730 | /* stat data of object has been inserted, this inserts the item | 1730 | /* stat data of object has been inserted, this inserts the item |
1731 | containing the body of symlink */ | 1731 | containing the body of symlink */ |
1732 | static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */ | 1732 | static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */ |
1733 | struct item_head *ih, | 1733 | struct item_head *ih, |
1734 | struct path *path, const char *symname, | 1734 | struct path *path, const char *symname, |
1735 | int item_len) | 1735 | int item_len) |
1736 | { | 1736 | { |
1737 | struct super_block *sb = th->t_super; | 1737 | struct super_block *sb = th->t_super; |
1738 | struct cpu_key key; | 1738 | struct cpu_key key; |
1739 | int retval; | 1739 | int retval; |
1740 | 1740 | ||
1741 | BUG_ON(!th->t_trans_id); | 1741 | BUG_ON(!th->t_trans_id); |
1742 | 1742 | ||
1743 | _make_cpu_key(&key, KEY_FORMAT_3_5, | 1743 | _make_cpu_key(&key, KEY_FORMAT_3_5, |
1744 | le32_to_cpu(ih->ih_key.k_dir_id), | 1744 | le32_to_cpu(ih->ih_key.k_dir_id), |
1745 | le32_to_cpu(ih->ih_key.k_objectid), | 1745 | le32_to_cpu(ih->ih_key.k_objectid), |
1746 | 1, TYPE_DIRECT, 3 /*key length */ ); | 1746 | 1, TYPE_DIRECT, 3 /*key length */ ); |
1747 | 1747 | ||
1748 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, | 1748 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, |
1749 | 0 /*free_space */ ); | 1749 | 0 /*free_space */ ); |
1750 | 1750 | ||
1751 | /* look for place in the tree for new item */ | 1751 | /* look for place in the tree for new item */ |
1752 | retval = search_item(sb, &key, path); | 1752 | retval = search_item(sb, &key, path); |
1753 | if (retval == IO_ERROR) { | 1753 | if (retval == IO_ERROR) { |
1754 | reiserfs_warning(sb, "vs-13080: reiserfs_new_symlinik: " | 1754 | reiserfs_warning(sb, "vs-13080: reiserfs_new_symlinik: " |
1755 | "i/o failure occurred creating new symlink"); | 1755 | "i/o failure occurred creating new symlink"); |
1756 | return -EIO; | 1756 | return -EIO; |
1757 | } | 1757 | } |
1758 | if (retval == ITEM_FOUND) { | 1758 | if (retval == ITEM_FOUND) { |
1759 | pathrelse(path); | 1759 | pathrelse(path); |
1760 | reiserfs_warning(sb, "vs-13080: reiserfs_new_symlink: " | 1760 | reiserfs_warning(sb, "vs-13080: reiserfs_new_symlink: " |
1761 | "object with this key exists (%k)", | 1761 | "object with this key exists (%k)", |
1762 | &(ih->ih_key)); | 1762 | &(ih->ih_key)); |
1763 | return -EEXIST; | 1763 | return -EEXIST; |
1764 | } | 1764 | } |
1765 | 1765 | ||
1766 | /* insert item, that is body of symlink */ | 1766 | /* insert item, that is body of symlink */ |
1767 | return reiserfs_insert_item(th, path, &key, ih, inode, symname); | 1767 | return reiserfs_insert_item(th, path, &key, ih, inode, symname); |
1768 | } | 1768 | } |
1769 | 1769 | ||
1770 | /* inserts the stat data into the tree, and then calls | 1770 | /* inserts the stat data into the tree, and then calls |
1771 | reiserfs_new_directory (to insert ".", ".." item if new object is | 1771 | reiserfs_new_directory (to insert ".", ".." item if new object is |
1772 | directory) or reiserfs_new_symlink (to insert symlink body if new | 1772 | directory) or reiserfs_new_symlink (to insert symlink body if new |
1773 | object is symlink) or nothing (if new object is regular file) | 1773 | object is symlink) or nothing (if new object is regular file) |
1774 | 1774 | ||
1775 | NOTE! uid and gid must already be set in the inode. If we return | 1775 | NOTE! uid and gid must already be set in the inode. If we return |
1776 | non-zero due to an error, we have to drop the quota previously allocated | 1776 | non-zero due to an error, we have to drop the quota previously allocated |
1777 | for the fresh inode. This can only be done outside a transaction, so | 1777 | for the fresh inode. This can only be done outside a transaction, so |
1778 | if we return non-zero, we also end the transaction. */ | 1778 | if we return non-zero, we also end the transaction. */ |
1779 | int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | 1779 | int reiserfs_new_inode(struct reiserfs_transaction_handle *th, |
1780 | struct inode *dir, int mode, const char *symname, | 1780 | struct inode *dir, int mode, const char *symname, |
1781 | /* 0 for regular, EMTRY_DIR_SIZE for dirs, | 1781 | /* 0 for regular, EMTRY_DIR_SIZE for dirs, |
1782 | strlen (symname) for symlinks) */ | 1782 | strlen (symname) for symlinks) */ |
1783 | loff_t i_size, struct dentry *dentry, | 1783 | loff_t i_size, struct dentry *dentry, |
1784 | struct inode *inode) | 1784 | struct inode *inode) |
1785 | { | 1785 | { |
1786 | struct super_block *sb; | 1786 | struct super_block *sb; |
1787 | INITIALIZE_PATH(path_to_key); | 1787 | INITIALIZE_PATH(path_to_key); |
1788 | struct cpu_key key; | 1788 | struct cpu_key key; |
1789 | struct item_head ih; | 1789 | struct item_head ih; |
1790 | struct stat_data sd; | 1790 | struct stat_data sd; |
1791 | int retval; | 1791 | int retval; |
1792 | int err; | 1792 | int err; |
1793 | 1793 | ||
1794 | BUG_ON(!th->t_trans_id); | 1794 | BUG_ON(!th->t_trans_id); |
1795 | 1795 | ||
1796 | if (DQUOT_ALLOC_INODE(inode)) { | 1796 | if (DQUOT_ALLOC_INODE(inode)) { |
1797 | err = -EDQUOT; | 1797 | err = -EDQUOT; |
1798 | goto out_end_trans; | 1798 | goto out_end_trans; |
1799 | } | 1799 | } |
1800 | if (!dir || !dir->i_nlink) { | 1800 | if (!dir || !dir->i_nlink) { |
1801 | err = -EPERM; | 1801 | err = -EPERM; |
1802 | goto out_bad_inode; | 1802 | goto out_bad_inode; |
1803 | } | 1803 | } |
1804 | 1804 | ||
1805 | sb = dir->i_sb; | 1805 | sb = dir->i_sb; |
1806 | 1806 | ||
1807 | /* item head of new item */ | 1807 | /* item head of new item */ |
1808 | ih.ih_key.k_dir_id = reiserfs_choose_packing(dir); | 1808 | ih.ih_key.k_dir_id = reiserfs_choose_packing(dir); |
1809 | ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th)); | 1809 | ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th)); |
1810 | if (!ih.ih_key.k_objectid) { | 1810 | if (!ih.ih_key.k_objectid) { |
1811 | err = -ENOMEM; | 1811 | err = -ENOMEM; |
1812 | goto out_bad_inode; | 1812 | goto out_bad_inode; |
1813 | } | 1813 | } |
1814 | if (old_format_only(sb)) | 1814 | if (old_format_only(sb)) |
1815 | /* not a perfect generation count, as object ids can be reused, but | 1815 | /* not a perfect generation count, as object ids can be reused, but |
1816 | ** this is as good as reiserfs can do right now. | 1816 | ** this is as good as reiserfs can do right now. |
1817 | ** note that the private part of inode isn't filled in yet, we have | 1817 | ** note that the private part of inode isn't filled in yet, we have |
1818 | ** to use the directory. | 1818 | ** to use the directory. |
1819 | */ | 1819 | */ |
1820 | inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); | 1820 | inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); |
1821 | else | 1821 | else |
1822 | #if defined( USE_INODE_GENERATION_COUNTER ) | 1822 | #if defined( USE_INODE_GENERATION_COUNTER ) |
1823 | inode->i_generation = | 1823 | inode->i_generation = |
1824 | le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation); | 1824 | le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation); |
1825 | #else | 1825 | #else |
1826 | inode->i_generation = ++event; | 1826 | inode->i_generation = ++event; |
1827 | #endif | 1827 | #endif |
1828 | 1828 | ||
1829 | /* fill stat data */ | 1829 | /* fill stat data */ |
1830 | inode->i_nlink = (S_ISDIR(mode) ? 2 : 1); | 1830 | inode->i_nlink = (S_ISDIR(mode) ? 2 : 1); |
1831 | 1831 | ||
1832 | /* uid and gid must already be set by the caller for quota init */ | 1832 | /* uid and gid must already be set by the caller for quota init */ |
1833 | 1833 | ||
1834 | /* symlink cannot be immutable or append only, right? */ | 1834 | /* symlink cannot be immutable or append only, right? */ |
1835 | if (S_ISLNK(inode->i_mode)) | 1835 | if (S_ISLNK(inode->i_mode)) |
1836 | inode->i_flags &= ~(S_IMMUTABLE | S_APPEND); | 1836 | inode->i_flags &= ~(S_IMMUTABLE | S_APPEND); |
1837 | 1837 | ||
1838 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 1838 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
1839 | inode->i_size = i_size; | 1839 | inode->i_size = i_size; |
1840 | inode->i_blocks = 0; | 1840 | inode->i_blocks = 0; |
1841 | inode->i_bytes = 0; | 1841 | inode->i_bytes = 0; |
1842 | REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : | 1842 | REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : |
1843 | U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ; | 1843 | U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ; |
1844 | 1844 | ||
1845 | INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); | 1845 | INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); |
1846 | REISERFS_I(inode)->i_flags = 0; | 1846 | REISERFS_I(inode)->i_flags = 0; |
1847 | REISERFS_I(inode)->i_prealloc_block = 0; | 1847 | REISERFS_I(inode)->i_prealloc_block = 0; |
1848 | REISERFS_I(inode)->i_prealloc_count = 0; | 1848 | REISERFS_I(inode)->i_prealloc_count = 0; |
1849 | REISERFS_I(inode)->i_trans_id = 0; | 1849 | REISERFS_I(inode)->i_trans_id = 0; |
1850 | REISERFS_I(inode)->i_jl = NULL; | 1850 | REISERFS_I(inode)->i_jl = NULL; |
1851 | REISERFS_I(inode)->i_attrs = | 1851 | REISERFS_I(inode)->i_attrs = |
1852 | REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; | 1852 | REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; |
1853 | sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); | 1853 | sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); |
1854 | REISERFS_I(inode)->i_acl_access = NULL; | 1854 | REISERFS_I(inode)->i_acl_access = NULL; |
1855 | REISERFS_I(inode)->i_acl_default = NULL; | 1855 | REISERFS_I(inode)->i_acl_default = NULL; |
1856 | init_rwsem(&REISERFS_I(inode)->xattr_sem); | 1856 | init_rwsem(&REISERFS_I(inode)->xattr_sem); |
1857 | 1857 | ||
1858 | if (old_format_only(sb)) | 1858 | if (old_format_only(sb)) |
1859 | make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, | 1859 | make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, |
1860 | TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT); | 1860 | TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT); |
1861 | else | 1861 | else |
1862 | make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, | 1862 | make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, |
1863 | TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); | 1863 | TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); |
1864 | 1864 | ||
1865 | /* key to search for correct place for new stat data */ | 1865 | /* key to search for correct place for new stat data */ |
1866 | _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id), | 1866 | _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id), |
1867 | le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET, | 1867 | le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET, |
1868 | TYPE_STAT_DATA, 3 /*key length */ ); | 1868 | TYPE_STAT_DATA, 3 /*key length */ ); |
1869 | 1869 | ||
1870 | /* find proper place for inserting of stat data */ | 1870 | /* find proper place for inserting of stat data */ |
1871 | retval = search_item(sb, &key, &path_to_key); | 1871 | retval = search_item(sb, &key, &path_to_key); |
1872 | if (retval == IO_ERROR) { | 1872 | if (retval == IO_ERROR) { |
1873 | err = -EIO; | 1873 | err = -EIO; |
1874 | goto out_bad_inode; | 1874 | goto out_bad_inode; |
1875 | } | 1875 | } |
1876 | if (retval == ITEM_FOUND) { | 1876 | if (retval == ITEM_FOUND) { |
1877 | pathrelse(&path_to_key); | 1877 | pathrelse(&path_to_key); |
1878 | err = -EEXIST; | 1878 | err = -EEXIST; |
1879 | goto out_bad_inode; | 1879 | goto out_bad_inode; |
1880 | } | 1880 | } |
1881 | if (old_format_only(sb)) { | 1881 | if (old_format_only(sb)) { |
1882 | if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { | 1882 | if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { |
1883 | pathrelse(&path_to_key); | 1883 | pathrelse(&path_to_key); |
1884 | /* i_uid or i_gid is too big to be stored in stat data v3.5 */ | 1884 | /* i_uid or i_gid is too big to be stored in stat data v3.5 */ |
1885 | err = -EINVAL; | 1885 | err = -EINVAL; |
1886 | goto out_bad_inode; | 1886 | goto out_bad_inode; |
1887 | } | 1887 | } |
1888 | inode2sd_v1(&sd, inode, inode->i_size); | 1888 | inode2sd_v1(&sd, inode, inode->i_size); |
1889 | } else { | 1889 | } else { |
1890 | inode2sd(&sd, inode, inode->i_size); | 1890 | inode2sd(&sd, inode, inode->i_size); |
1891 | } | 1891 | } |
1892 | // these do not go to on-disk stat data | 1892 | // these do not go to on-disk stat data |
1893 | inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); | 1893 | inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); |
1894 | inode->i_blksize = reiserfs_default_io_size; | 1894 | inode->i_blksize = reiserfs_default_io_size; |
1895 | 1895 | ||
1896 | // store in in-core inode the key of stat data and version all | 1896 | // store in in-core inode the key of stat data and version all |
1897 | // object items will have (directory items will have old offset | 1897 | // object items will have (directory items will have old offset |
1898 | // format, other new objects will consist of new items) | 1898 | // format, other new objects will consist of new items) |
1899 | memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); | 1899 | memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); |
1900 | if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) | 1900 | if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) |
1901 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); | 1901 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); |
1902 | else | 1902 | else |
1903 | set_inode_item_key_version(inode, KEY_FORMAT_3_6); | 1903 | set_inode_item_key_version(inode, KEY_FORMAT_3_6); |
1904 | if (old_format_only(sb)) | 1904 | if (old_format_only(sb)) |
1905 | set_inode_sd_version(inode, STAT_DATA_V1); | 1905 | set_inode_sd_version(inode, STAT_DATA_V1); |
1906 | else | 1906 | else |
1907 | set_inode_sd_version(inode, STAT_DATA_V2); | 1907 | set_inode_sd_version(inode, STAT_DATA_V2); |
1908 | 1908 | ||
1909 | /* insert the stat data into the tree */ | 1909 | /* insert the stat data into the tree */ |
1910 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | 1910 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
1911 | if (REISERFS_I(dir)->new_packing_locality) | 1911 | if (REISERFS_I(dir)->new_packing_locality) |
1912 | th->displace_new_blocks = 1; | 1912 | th->displace_new_blocks = 1; |
1913 | #endif | 1913 | #endif |
1914 | retval = | 1914 | retval = |
1915 | reiserfs_insert_item(th, &path_to_key, &key, &ih, inode, | 1915 | reiserfs_insert_item(th, &path_to_key, &key, &ih, inode, |
1916 | (char *)(&sd)); | 1916 | (char *)(&sd)); |
1917 | if (retval) { | 1917 | if (retval) { |
1918 | err = retval; | 1918 | err = retval; |
1919 | reiserfs_check_path(&path_to_key); | 1919 | reiserfs_check_path(&path_to_key); |
1920 | goto out_bad_inode; | 1920 | goto out_bad_inode; |
1921 | } | 1921 | } |
1922 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | 1922 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
1923 | if (!th->displace_new_blocks) | 1923 | if (!th->displace_new_blocks) |
1924 | REISERFS_I(dir)->new_packing_locality = 0; | 1924 | REISERFS_I(dir)->new_packing_locality = 0; |
1925 | #endif | 1925 | #endif |
1926 | if (S_ISDIR(mode)) { | 1926 | if (S_ISDIR(mode)) { |
1927 | /* insert item with "." and ".." */ | 1927 | /* insert item with "." and ".." */ |
1928 | retval = | 1928 | retval = |
1929 | reiserfs_new_directory(th, inode, &ih, &path_to_key, dir); | 1929 | reiserfs_new_directory(th, inode, &ih, &path_to_key, dir); |
1930 | } | 1930 | } |
1931 | 1931 | ||
1932 | if (S_ISLNK(mode)) { | 1932 | if (S_ISLNK(mode)) { |
1933 | /* insert body of symlink */ | 1933 | /* insert body of symlink */ |
1934 | if (!old_format_only(sb)) | 1934 | if (!old_format_only(sb)) |
1935 | i_size = ROUND_UP(i_size); | 1935 | i_size = ROUND_UP(i_size); |
1936 | retval = | 1936 | retval = |
1937 | reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname, | 1937 | reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname, |
1938 | i_size); | 1938 | i_size); |
1939 | } | 1939 | } |
1940 | if (retval) { | 1940 | if (retval) { |
1941 | err = retval; | 1941 | err = retval; |
1942 | reiserfs_check_path(&path_to_key); | 1942 | reiserfs_check_path(&path_to_key); |
1943 | journal_end(th, th->t_super, th->t_blocks_allocated); | 1943 | journal_end(th, th->t_super, th->t_blocks_allocated); |
1944 | goto out_inserted_sd; | 1944 | goto out_inserted_sd; |
1945 | } | 1945 | } |
1946 | 1946 | ||
1947 | /* XXX CHECK THIS */ | 1947 | /* XXX CHECK THIS */ |
1948 | if (reiserfs_posixacl(inode->i_sb)) { | 1948 | if (reiserfs_posixacl(inode->i_sb)) { |
1949 | retval = reiserfs_inherit_default_acl(dir, dentry, inode); | 1949 | retval = reiserfs_inherit_default_acl(dir, dentry, inode); |
1950 | if (retval) { | 1950 | if (retval) { |
1951 | err = retval; | 1951 | err = retval; |
1952 | reiserfs_check_path(&path_to_key); | 1952 | reiserfs_check_path(&path_to_key); |
1953 | journal_end(th, th->t_super, th->t_blocks_allocated); | 1953 | journal_end(th, th->t_super, th->t_blocks_allocated); |
1954 | goto out_inserted_sd; | 1954 | goto out_inserted_sd; |
1955 | } | 1955 | } |
1956 | } else if (inode->i_sb->s_flags & MS_POSIXACL) { | 1956 | } else if (inode->i_sb->s_flags & MS_POSIXACL) { |
1957 | reiserfs_warning(inode->i_sb, "ACLs aren't enabled in the fs, " | 1957 | reiserfs_warning(inode->i_sb, "ACLs aren't enabled in the fs, " |
1958 | "but vfs thinks they are!"); | 1958 | "but vfs thinks they are!"); |
1959 | } else if (is_reiserfs_priv_object(dir)) { | 1959 | } else if (is_reiserfs_priv_object(dir)) { |
1960 | reiserfs_mark_inode_private(inode); | 1960 | reiserfs_mark_inode_private(inode); |
1961 | } | 1961 | } |
1962 | 1962 | ||
1963 | insert_inode_hash(inode); | 1963 | insert_inode_hash(inode); |
1964 | reiserfs_update_sd(th, inode); | 1964 | reiserfs_update_sd(th, inode); |
1965 | reiserfs_check_path(&path_to_key); | 1965 | reiserfs_check_path(&path_to_key); |
1966 | 1966 | ||
1967 | return 0; | 1967 | return 0; |
1968 | 1968 | ||
1969 | /* it looks like you can easily compress these two goto targets into | 1969 | /* it looks like you can easily compress these two goto targets into |
1970 | * one. Keeping it like this doesn't actually hurt anything, and they | 1970 | * one. Keeping it like this doesn't actually hurt anything, and they |
1971 | * are place holders for what the quota code actually needs. | 1971 | * are place holders for what the quota code actually needs. |
1972 | */ | 1972 | */ |
1973 | out_bad_inode: | 1973 | out_bad_inode: |
1974 | /* Invalidate the object, nothing was inserted yet */ | 1974 | /* Invalidate the object, nothing was inserted yet */ |
1975 | INODE_PKEY(inode)->k_objectid = 0; | 1975 | INODE_PKEY(inode)->k_objectid = 0; |
1976 | 1976 | ||
1977 | /* Quota change must be inside a transaction for journaling */ | 1977 | /* Quota change must be inside a transaction for journaling */ |
1978 | DQUOT_FREE_INODE(inode); | 1978 | DQUOT_FREE_INODE(inode); |
1979 | 1979 | ||
1980 | out_end_trans: | 1980 | out_end_trans: |
1981 | journal_end(th, th->t_super, th->t_blocks_allocated); | 1981 | journal_end(th, th->t_super, th->t_blocks_allocated); |
1982 | /* Drop can be outside and it needs more credits so it's better to have it outside */ | 1982 | /* Drop can be outside and it needs more credits so it's better to have it outside */ |
1983 | DQUOT_DROP(inode); | 1983 | DQUOT_DROP(inode); |
1984 | inode->i_flags |= S_NOQUOTA; | 1984 | inode->i_flags |= S_NOQUOTA; |
1985 | make_bad_inode(inode); | 1985 | make_bad_inode(inode); |
1986 | 1986 | ||
1987 | out_inserted_sd: | 1987 | out_inserted_sd: |
1988 | inode->i_nlink = 0; | 1988 | inode->i_nlink = 0; |
1989 | th->t_trans_id = 0; /* so the caller can't use this handle later */ | 1989 | th->t_trans_id = 0; /* so the caller can't use this handle later */ |
1990 | 1990 | ||
1991 | /* If we were inheriting an ACL, we need to release the lock so that | 1991 | /* If we were inheriting an ACL, we need to release the lock so that |
1992 | * iput doesn't deadlock in reiserfs_delete_xattrs. The locking | 1992 | * iput doesn't deadlock in reiserfs_delete_xattrs. The locking |
1993 | * code really needs to be reworked, but this will take care of it | 1993 | * code really needs to be reworked, but this will take care of it |
1994 | * for now. -jeffm */ | 1994 | * for now. -jeffm */ |
1995 | if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) { | 1995 | if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) { |
1996 | reiserfs_write_unlock_xattrs(dir->i_sb); | 1996 | reiserfs_write_unlock_xattrs(dir->i_sb); |
1997 | iput(inode); | 1997 | iput(inode); |
1998 | reiserfs_write_lock_xattrs(dir->i_sb); | 1998 | reiserfs_write_lock_xattrs(dir->i_sb); |
1999 | } else | 1999 | } else |
2000 | iput(inode); | 2000 | iput(inode); |
2001 | return err; | 2001 | return err; |
2002 | } | 2002 | } |
2003 | 2003 | ||
2004 | /* | 2004 | /* |
2005 | ** finds the tail page in the page cache, | 2005 | ** finds the tail page in the page cache, |
2006 | ** reads the last block in. | 2006 | ** reads the last block in. |
2007 | ** | 2007 | ** |
2008 | ** On success, page_result is set to a locked, pinned page, and bh_result | 2008 | ** On success, page_result is set to a locked, pinned page, and bh_result |
2009 | ** is set to an up to date buffer for the last block in the file. returns 0. | 2009 | ** is set to an up to date buffer for the last block in the file. returns 0. |
2010 | ** | 2010 | ** |
2011 | ** tail conversion is not done, so bh_result might not be valid for writing | 2011 | ** tail conversion is not done, so bh_result might not be valid for writing |
2012 | ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before | 2012 | ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before |
2013 | ** trying to write the block. | 2013 | ** trying to write the block. |
2014 | ** | 2014 | ** |
2015 | ** on failure, nonzero is returned, page_result and bh_result are untouched. | 2015 | ** on failure, nonzero is returned, page_result and bh_result are untouched. |
2016 | */ | 2016 | */ |
2017 | static int grab_tail_page(struct inode *p_s_inode, | 2017 | static int grab_tail_page(struct inode *p_s_inode, |
2018 | struct page **page_result, | 2018 | struct page **page_result, |
2019 | struct buffer_head **bh_result) | 2019 | struct buffer_head **bh_result) |
2020 | { | 2020 | { |
2021 | 2021 | ||
2022 | /* we want the page with the last byte in the file, | 2022 | /* we want the page with the last byte in the file, |
2023 | ** not the page that will hold the next byte for appending | 2023 | ** not the page that will hold the next byte for appending |
2024 | */ | 2024 | */ |
2025 | unsigned long index = (p_s_inode->i_size - 1) >> PAGE_CACHE_SHIFT; | 2025 | unsigned long index = (p_s_inode->i_size - 1) >> PAGE_CACHE_SHIFT; |
2026 | unsigned long pos = 0; | 2026 | unsigned long pos = 0; |
2027 | unsigned long start = 0; | 2027 | unsigned long start = 0; |
2028 | unsigned long blocksize = p_s_inode->i_sb->s_blocksize; | 2028 | unsigned long blocksize = p_s_inode->i_sb->s_blocksize; |
2029 | unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1); | 2029 | unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1); |
2030 | struct buffer_head *bh; | 2030 | struct buffer_head *bh; |
2031 | struct buffer_head *head; | 2031 | struct buffer_head *head; |
2032 | struct page *page; | 2032 | struct page *page; |
2033 | int error; | 2033 | int error; |
2034 | 2034 | ||
2035 | /* we know that we are only called with inode->i_size > 0. | 2035 | /* we know that we are only called with inode->i_size > 0. |
2036 | ** we also know that a file tail can never be as big as a block | 2036 | ** we also know that a file tail can never be as big as a block |
2037 | ** If i_size % blocksize == 0, our file is currently block aligned | 2037 | ** If i_size % blocksize == 0, our file is currently block aligned |
2038 | ** and it won't need converting or zeroing after a truncate. | 2038 | ** and it won't need converting or zeroing after a truncate. |
2039 | */ | 2039 | */ |
2040 | if ((offset & (blocksize - 1)) == 0) { | 2040 | if ((offset & (blocksize - 1)) == 0) { |
2041 | return -ENOENT; | 2041 | return -ENOENT; |
2042 | } | 2042 | } |
2043 | page = grab_cache_page(p_s_inode->i_mapping, index); | 2043 | page = grab_cache_page(p_s_inode->i_mapping, index); |
2044 | error = -ENOMEM; | 2044 | error = -ENOMEM; |
2045 | if (!page) { | 2045 | if (!page) { |
2046 | goto out; | 2046 | goto out; |
2047 | } | 2047 | } |
2048 | /* start within the page of the last block in the file */ | 2048 | /* start within the page of the last block in the file */ |
2049 | start = (offset / blocksize) * blocksize; | 2049 | start = (offset / blocksize) * blocksize; |
2050 | 2050 | ||
2051 | error = block_prepare_write(page, start, offset, | 2051 | error = block_prepare_write(page, start, offset, |
2052 | reiserfs_get_block_create_0); | 2052 | reiserfs_get_block_create_0); |
2053 | if (error) | 2053 | if (error) |
2054 | goto unlock; | 2054 | goto unlock; |
2055 | 2055 | ||
2056 | head = page_buffers(page); | 2056 | head = page_buffers(page); |
2057 | bh = head; | 2057 | bh = head; |
2058 | do { | 2058 | do { |
2059 | if (pos >= start) { | 2059 | if (pos >= start) { |
2060 | break; | 2060 | break; |
2061 | } | 2061 | } |
2062 | bh = bh->b_this_page; | 2062 | bh = bh->b_this_page; |
2063 | pos += blocksize; | 2063 | pos += blocksize; |
2064 | } while (bh != head); | 2064 | } while (bh != head); |
2065 | 2065 | ||
2066 | if (!buffer_uptodate(bh)) { | 2066 | if (!buffer_uptodate(bh)) { |
2067 | /* note, this should never happen, prepare_write should | 2067 | /* note, this should never happen, prepare_write should |
2068 | ** be taking care of this for us. If the buffer isn't up to date, | 2068 | ** be taking care of this for us. If the buffer isn't up to date, |
2069 | ** I've screwed up the code to find the buffer, or the code to | 2069 | ** I've screwed up the code to find the buffer, or the code to |
2070 | ** call prepare_write | 2070 | ** call prepare_write |
2071 | */ | 2071 | */ |
2072 | reiserfs_warning(p_s_inode->i_sb, | 2072 | reiserfs_warning(p_s_inode->i_sb, |
2073 | "clm-6000: error reading block %lu on dev %s", | 2073 | "clm-6000: error reading block %lu on dev %s", |
2074 | bh->b_blocknr, | 2074 | bh->b_blocknr, |
2075 | reiserfs_bdevname(p_s_inode->i_sb)); | 2075 | reiserfs_bdevname(p_s_inode->i_sb)); |
2076 | error = -EIO; | 2076 | error = -EIO; |
2077 | goto unlock; | 2077 | goto unlock; |
2078 | } | 2078 | } |
2079 | *bh_result = bh; | 2079 | *bh_result = bh; |
2080 | *page_result = page; | 2080 | *page_result = page; |
2081 | 2081 | ||
2082 | out: | 2082 | out: |
2083 | return error; | 2083 | return error; |
2084 | 2084 | ||
2085 | unlock: | 2085 | unlock: |
2086 | unlock_page(page); | 2086 | unlock_page(page); |
2087 | page_cache_release(page); | 2087 | page_cache_release(page); |
2088 | return error; | 2088 | return error; |
2089 | } | 2089 | } |
2090 | 2090 | ||
2091 | /* | 2091 | /* |
2092 | ** vfs version of truncate file. Must NOT be called with | 2092 | ** vfs version of truncate file. Must NOT be called with |
2093 | ** a transaction already started. | 2093 | ** a transaction already started. |
2094 | ** | 2094 | ** |
2095 | ** some code taken from block_truncate_page | 2095 | ** some code taken from block_truncate_page |
2096 | */ | 2096 | */ |
2097 | int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) | 2097 | int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) |
2098 | { | 2098 | { |
2099 | struct reiserfs_transaction_handle th; | 2099 | struct reiserfs_transaction_handle th; |
2100 | /* we want the offset for the first byte after the end of the file */ | 2100 | /* we want the offset for the first byte after the end of the file */ |
2101 | unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1); | 2101 | unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1); |
2102 | unsigned blocksize = p_s_inode->i_sb->s_blocksize; | 2102 | unsigned blocksize = p_s_inode->i_sb->s_blocksize; |
2103 | unsigned length; | 2103 | unsigned length; |
2104 | struct page *page = NULL; | 2104 | struct page *page = NULL; |
2105 | int error; | 2105 | int error; |
2106 | struct buffer_head *bh = NULL; | 2106 | struct buffer_head *bh = NULL; |
2107 | int err2; | 2107 | int err2; |
2108 | 2108 | ||
2109 | reiserfs_write_lock(p_s_inode->i_sb); | 2109 | reiserfs_write_lock(p_s_inode->i_sb); |
2110 | 2110 | ||
2111 | if (p_s_inode->i_size > 0) { | 2111 | if (p_s_inode->i_size > 0) { |
2112 | if ((error = grab_tail_page(p_s_inode, &page, &bh))) { | 2112 | if ((error = grab_tail_page(p_s_inode, &page, &bh))) { |
2113 | // -ENOENT means we truncated past the end of the file, | 2113 | // -ENOENT means we truncated past the end of the file, |
2114 | // and get_block_create_0 could not find a block to read in, | 2114 | // and get_block_create_0 could not find a block to read in, |
2115 | // which is ok. | 2115 | // which is ok. |
2116 | if (error != -ENOENT) | 2116 | if (error != -ENOENT) |
2117 | reiserfs_warning(p_s_inode->i_sb, | 2117 | reiserfs_warning(p_s_inode->i_sb, |
2118 | "clm-6001: grab_tail_page failed %d", | 2118 | "clm-6001: grab_tail_page failed %d", |
2119 | error); | 2119 | error); |
2120 | page = NULL; | 2120 | page = NULL; |
2121 | bh = NULL; | 2121 | bh = NULL; |
2122 | } | 2122 | } |
2123 | } | 2123 | } |
2124 | 2124 | ||
2125 | /* so, if page != NULL, we have a buffer head for the offset at | 2125 | /* so, if page != NULL, we have a buffer head for the offset at |
2126 | ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, | 2126 | ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, |
2127 | ** then we have an unformatted node. Otherwise, we have a direct item, | 2127 | ** then we have an unformatted node. Otherwise, we have a direct item, |
2128 | ** and no zeroing is required on disk. We zero after the truncate, | 2128 | ** and no zeroing is required on disk. We zero after the truncate, |
2129 | ** because the truncate might pack the item anyway | 2129 | ** because the truncate might pack the item anyway |
2130 | ** (it will unmap bh if it packs). | 2130 | ** (it will unmap bh if it packs). |
2131 | */ | 2131 | */ |
2132 | /* it is enough to reserve space in transaction for 2 balancings: | 2132 | /* it is enough to reserve space in transaction for 2 balancings: |
2133 | one for "save" link adding and another for the first | 2133 | one for "save" link adding and another for the first |
2134 | cut_from_item. 1 is for update_sd */ | 2134 | cut_from_item. 1 is for update_sd */ |
2135 | error = journal_begin(&th, p_s_inode->i_sb, | 2135 | error = journal_begin(&th, p_s_inode->i_sb, |
2136 | JOURNAL_PER_BALANCE_CNT * 2 + 1); | 2136 | JOURNAL_PER_BALANCE_CNT * 2 + 1); |
2137 | if (error) | 2137 | if (error) |
2138 | goto out; | 2138 | goto out; |
2139 | reiserfs_update_inode_transaction(p_s_inode); | 2139 | reiserfs_update_inode_transaction(p_s_inode); |
2140 | if (update_timestamps) | 2140 | if (update_timestamps) |
2141 | /* we are doing real truncate: if the system crashes before the last | 2141 | /* we are doing real truncate: if the system crashes before the last |
2142 | transaction of truncating gets committed - on reboot the file | 2142 | transaction of truncating gets committed - on reboot the file |
2143 | either appears truncated properly or not truncated at all */ | 2143 | either appears truncated properly or not truncated at all */ |
2144 | add_save_link(&th, p_s_inode, 1); | 2144 | add_save_link(&th, p_s_inode, 1); |
2145 | err2 = reiserfs_do_truncate(&th, p_s_inode, page, update_timestamps); | 2145 | err2 = reiserfs_do_truncate(&th, p_s_inode, page, update_timestamps); |
2146 | error = | 2146 | error = |
2147 | journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); | 2147 | journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); |
2148 | if (error) | 2148 | if (error) |
2149 | goto out; | 2149 | goto out; |
2150 | 2150 | ||
2151 | /* check reiserfs_do_truncate after ending the transaction */ | 2151 | /* check reiserfs_do_truncate after ending the transaction */ |
2152 | if (err2) { | 2152 | if (err2) { |
2153 | error = err2; | 2153 | error = err2; |
2154 | goto out; | 2154 | goto out; |
2155 | } | 2155 | } |
2156 | 2156 | ||
2157 | if (update_timestamps) { | 2157 | if (update_timestamps) { |
2158 | error = remove_save_link(p_s_inode, 1 /* truncate */ ); | 2158 | error = remove_save_link(p_s_inode, 1 /* truncate */ ); |
2159 | if (error) | 2159 | if (error) |
2160 | goto out; | 2160 | goto out; |
2161 | } | 2161 | } |
2162 | 2162 | ||
2163 | if (page) { | 2163 | if (page) { |
2164 | length = offset & (blocksize - 1); | 2164 | length = offset & (blocksize - 1); |
2165 | /* if we are not on a block boundary */ | 2165 | /* if we are not on a block boundary */ |
2166 | if (length) { | 2166 | if (length) { |
2167 | char *kaddr; | 2167 | char *kaddr; |
2168 | 2168 | ||
2169 | length = blocksize - length; | 2169 | length = blocksize - length; |
2170 | kaddr = kmap_atomic(page, KM_USER0); | 2170 | kaddr = kmap_atomic(page, KM_USER0); |
2171 | memset(kaddr + offset, 0, length); | 2171 | memset(kaddr + offset, 0, length); |
2172 | flush_dcache_page(page); | 2172 | flush_dcache_page(page); |
2173 | kunmap_atomic(kaddr, KM_USER0); | 2173 | kunmap_atomic(kaddr, KM_USER0); |
2174 | if (buffer_mapped(bh) && bh->b_blocknr != 0) { | 2174 | if (buffer_mapped(bh) && bh->b_blocknr != 0) { |
2175 | mark_buffer_dirty(bh); | 2175 | mark_buffer_dirty(bh); |
2176 | } | 2176 | } |
2177 | } | 2177 | } |
2178 | unlock_page(page); | 2178 | unlock_page(page); |
2179 | page_cache_release(page); | 2179 | page_cache_release(page); |
2180 | } | 2180 | } |
2181 | 2181 | ||
2182 | reiserfs_write_unlock(p_s_inode->i_sb); | 2182 | reiserfs_write_unlock(p_s_inode->i_sb); |
2183 | return 0; | 2183 | return 0; |
2184 | out: | 2184 | out: |
2185 | if (page) { | 2185 | if (page) { |
2186 | unlock_page(page); | 2186 | unlock_page(page); |
2187 | page_cache_release(page); | 2187 | page_cache_release(page); |
2188 | } | 2188 | } |
2189 | reiserfs_write_unlock(p_s_inode->i_sb); | 2189 | reiserfs_write_unlock(p_s_inode->i_sb); |
2190 | return error; | 2190 | return error; |
2191 | } | 2191 | } |
2192 | 2192 | ||
2193 | static int map_block_for_writepage(struct inode *inode, | 2193 | static int map_block_for_writepage(struct inode *inode, |
2194 | struct buffer_head *bh_result, | 2194 | struct buffer_head *bh_result, |
2195 | unsigned long block) | 2195 | unsigned long block) |
2196 | { | 2196 | { |
2197 | struct reiserfs_transaction_handle th; | 2197 | struct reiserfs_transaction_handle th; |
2198 | int fs_gen; | 2198 | int fs_gen; |
2199 | struct item_head tmp_ih; | 2199 | struct item_head tmp_ih; |
2200 | struct item_head *ih; | 2200 | struct item_head *ih; |
2201 | struct buffer_head *bh; | 2201 | struct buffer_head *bh; |
2202 | __le32 *item; | 2202 | __le32 *item; |
2203 | struct cpu_key key; | 2203 | struct cpu_key key; |
2204 | INITIALIZE_PATH(path); | 2204 | INITIALIZE_PATH(path); |
2205 | int pos_in_item; | 2205 | int pos_in_item; |
2206 | int jbegin_count = JOURNAL_PER_BALANCE_CNT; | 2206 | int jbegin_count = JOURNAL_PER_BALANCE_CNT; |
2207 | loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1; | 2207 | loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1; |
2208 | int retval; | 2208 | int retval; |
2209 | int use_get_block = 0; | 2209 | int use_get_block = 0; |
2210 | int bytes_copied = 0; | 2210 | int bytes_copied = 0; |
2211 | int copy_size; | 2211 | int copy_size; |
2212 | int trans_running = 0; | 2212 | int trans_running = 0; |
2213 | 2213 | ||
2214 | /* catch places below that try to log something without starting a trans */ | 2214 | /* catch places below that try to log something without starting a trans */ |
2215 | th.t_trans_id = 0; | 2215 | th.t_trans_id = 0; |
2216 | 2216 | ||
2217 | if (!buffer_uptodate(bh_result)) { | 2217 | if (!buffer_uptodate(bh_result)) { |
2218 | return -EIO; | 2218 | return -EIO; |
2219 | } | 2219 | } |
2220 | 2220 | ||
2221 | kmap(bh_result->b_page); | 2221 | kmap(bh_result->b_page); |
2222 | start_over: | 2222 | start_over: |
2223 | reiserfs_write_lock(inode->i_sb); | 2223 | reiserfs_write_lock(inode->i_sb); |
2224 | make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3); | 2224 | make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3); |
2225 | 2225 | ||
2226 | research: | 2226 | research: |
2227 | retval = search_for_position_by_key(inode->i_sb, &key, &path); | 2227 | retval = search_for_position_by_key(inode->i_sb, &key, &path); |
2228 | if (retval != POSITION_FOUND) { | 2228 | if (retval != POSITION_FOUND) { |
2229 | use_get_block = 1; | 2229 | use_get_block = 1; |
2230 | goto out; | 2230 | goto out; |
2231 | } | 2231 | } |
2232 | 2232 | ||
2233 | bh = get_last_bh(&path); | 2233 | bh = get_last_bh(&path); |
2234 | ih = get_ih(&path); | 2234 | ih = get_ih(&path); |
2235 | item = get_item(&path); | 2235 | item = get_item(&path); |
2236 | pos_in_item = path.pos_in_item; | 2236 | pos_in_item = path.pos_in_item; |
2237 | 2237 | ||
2238 | /* we've found an unformatted node */ | 2238 | /* we've found an unformatted node */ |
2239 | if (indirect_item_found(retval, ih)) { | 2239 | if (indirect_item_found(retval, ih)) { |
2240 | if (bytes_copied > 0) { | 2240 | if (bytes_copied > 0) { |
2241 | reiserfs_warning(inode->i_sb, | 2241 | reiserfs_warning(inode->i_sb, |
2242 | "clm-6002: bytes_copied %d", | 2242 | "clm-6002: bytes_copied %d", |
2243 | bytes_copied); | 2243 | bytes_copied); |
2244 | } | 2244 | } |
2245 | if (!get_block_num(item, pos_in_item)) { | 2245 | if (!get_block_num(item, pos_in_item)) { |
2246 | /* crap, we are writing to a hole */ | 2246 | /* crap, we are writing to a hole */ |
2247 | use_get_block = 1; | 2247 | use_get_block = 1; |
2248 | goto out; | 2248 | goto out; |
2249 | } | 2249 | } |
2250 | set_block_dev_mapped(bh_result, | 2250 | set_block_dev_mapped(bh_result, |
2251 | get_block_num(item, pos_in_item), inode); | 2251 | get_block_num(item, pos_in_item), inode); |
2252 | } else if (is_direct_le_ih(ih)) { | 2252 | } else if (is_direct_le_ih(ih)) { |
2253 | char *p; | 2253 | char *p; |
2254 | p = page_address(bh_result->b_page); | 2254 | p = page_address(bh_result->b_page); |
2255 | p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1); | 2255 | p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1); |
2256 | copy_size = ih_item_len(ih) - pos_in_item; | 2256 | copy_size = ih_item_len(ih) - pos_in_item; |
2257 | 2257 | ||
2258 | fs_gen = get_generation(inode->i_sb); | 2258 | fs_gen = get_generation(inode->i_sb); |
2259 | copy_item_head(&tmp_ih, ih); | 2259 | copy_item_head(&tmp_ih, ih); |
2260 | 2260 | ||
2261 | if (!trans_running) { | 2261 | if (!trans_running) { |
2262 | /* vs-3050 is gone, no need to drop the path */ | 2262 | /* vs-3050 is gone, no need to drop the path */ |
2263 | retval = journal_begin(&th, inode->i_sb, jbegin_count); | 2263 | retval = journal_begin(&th, inode->i_sb, jbegin_count); |
2264 | if (retval) | 2264 | if (retval) |
2265 | goto out; | 2265 | goto out; |
2266 | reiserfs_update_inode_transaction(inode); | 2266 | reiserfs_update_inode_transaction(inode); |
2267 | trans_running = 1; | 2267 | trans_running = 1; |
2268 | if (fs_changed(fs_gen, inode->i_sb) | 2268 | if (fs_changed(fs_gen, inode->i_sb) |
2269 | && item_moved(&tmp_ih, &path)) { | 2269 | && item_moved(&tmp_ih, &path)) { |
2270 | reiserfs_restore_prepared_buffer(inode->i_sb, | 2270 | reiserfs_restore_prepared_buffer(inode->i_sb, |
2271 | bh); | 2271 | bh); |
2272 | goto research; | 2272 | goto research; |
2273 | } | 2273 | } |
2274 | } | 2274 | } |
2275 | 2275 | ||
2276 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); | 2276 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); |
2277 | 2277 | ||
2278 | if (fs_changed(fs_gen, inode->i_sb) | 2278 | if (fs_changed(fs_gen, inode->i_sb) |
2279 | && item_moved(&tmp_ih, &path)) { | 2279 | && item_moved(&tmp_ih, &path)) { |
2280 | reiserfs_restore_prepared_buffer(inode->i_sb, bh); | 2280 | reiserfs_restore_prepared_buffer(inode->i_sb, bh); |
2281 | goto research; | 2281 | goto research; |
2282 | } | 2282 | } |
2283 | 2283 | ||
2284 | memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, | 2284 | memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, |
2285 | copy_size); | 2285 | copy_size); |
2286 | 2286 | ||
2287 | journal_mark_dirty(&th, inode->i_sb, bh); | 2287 | journal_mark_dirty(&th, inode->i_sb, bh); |
2288 | bytes_copied += copy_size; | 2288 | bytes_copied += copy_size; |
2289 | set_block_dev_mapped(bh_result, 0, inode); | 2289 | set_block_dev_mapped(bh_result, 0, inode); |
2290 | 2290 | ||
2291 | /* are there still bytes left? */ | 2291 | /* are there still bytes left? */ |
2292 | if (bytes_copied < bh_result->b_size && | 2292 | if (bytes_copied < bh_result->b_size && |
2293 | (byte_offset + bytes_copied) < inode->i_size) { | 2293 | (byte_offset + bytes_copied) < inode->i_size) { |
2294 | set_cpu_key_k_offset(&key, | 2294 | set_cpu_key_k_offset(&key, |
2295 | cpu_key_k_offset(&key) + | 2295 | cpu_key_k_offset(&key) + |
2296 | copy_size); | 2296 | copy_size); |
2297 | goto research; | 2297 | goto research; |
2298 | } | 2298 | } |
2299 | } else { | 2299 | } else { |
2300 | reiserfs_warning(inode->i_sb, | 2300 | reiserfs_warning(inode->i_sb, |
2301 | "clm-6003: bad item inode %lu, device %s", | 2301 | "clm-6003: bad item inode %lu, device %s", |
2302 | inode->i_ino, reiserfs_bdevname(inode->i_sb)); | 2302 | inode->i_ino, reiserfs_bdevname(inode->i_sb)); |
2303 | retval = -EIO; | 2303 | retval = -EIO; |
2304 | goto out; | 2304 | goto out; |
2305 | } | 2305 | } |
2306 | retval = 0; | 2306 | retval = 0; |
2307 | 2307 | ||
2308 | out: | 2308 | out: |
2309 | pathrelse(&path); | 2309 | pathrelse(&path); |
2310 | if (trans_running) { | 2310 | if (trans_running) { |
2311 | int err = journal_end(&th, inode->i_sb, jbegin_count); | 2311 | int err = journal_end(&th, inode->i_sb, jbegin_count); |
2312 | if (err) | 2312 | if (err) |
2313 | retval = err; | 2313 | retval = err; |
2314 | trans_running = 0; | 2314 | trans_running = 0; |
2315 | } | 2315 | } |
2316 | reiserfs_write_unlock(inode->i_sb); | 2316 | reiserfs_write_unlock(inode->i_sb); |
2317 | 2317 | ||
2318 | /* this is where we fill in holes in the file. */ | 2318 | /* this is where we fill in holes in the file. */ |
2319 | if (use_get_block) { | 2319 | if (use_get_block) { |
2320 | retval = reiserfs_get_block(inode, block, bh_result, | 2320 | retval = reiserfs_get_block(inode, block, bh_result, |
2321 | GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX | 2321 | GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX |
2322 | | GET_BLOCK_NO_DANGLE); | 2322 | | GET_BLOCK_NO_DANGLE); |
2323 | if (!retval) { | 2323 | if (!retval) { |
2324 | if (!buffer_mapped(bh_result) | 2324 | if (!buffer_mapped(bh_result) |
2325 | || bh_result->b_blocknr == 0) { | 2325 | || bh_result->b_blocknr == 0) { |
2326 | /* get_block failed to find a mapped unformatted node. */ | 2326 | /* get_block failed to find a mapped unformatted node. */ |
2327 | use_get_block = 0; | 2327 | use_get_block = 0; |
2328 | goto start_over; | 2328 | goto start_over; |
2329 | } | 2329 | } |
2330 | } | 2330 | } |
2331 | } | 2331 | } |
2332 | kunmap(bh_result->b_page); | 2332 | kunmap(bh_result->b_page); |
2333 | 2333 | ||
2334 | if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { | 2334 | if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { |
2335 | /* we've copied data from the page into the direct item, so the | 2335 | /* we've copied data from the page into the direct item, so the |
2336 | * buffer in the page is now clean, mark it to reflect that. | 2336 | * buffer in the page is now clean, mark it to reflect that. |
2337 | */ | 2337 | */ |
2338 | lock_buffer(bh_result); | 2338 | lock_buffer(bh_result); |
2339 | clear_buffer_dirty(bh_result); | 2339 | clear_buffer_dirty(bh_result); |
2340 | unlock_buffer(bh_result); | 2340 | unlock_buffer(bh_result); |
2341 | } | 2341 | } |
2342 | return retval; | 2342 | return retval; |
2343 | } | 2343 | } |
2344 | 2344 | ||
2345 | /* | 2345 | /* |
2346 | * mason@suse.com: updated in 2.5.54 to follow the same general io | 2346 | * mason@suse.com: updated in 2.5.54 to follow the same general io |
2347 | * start/recovery path as __block_write_full_page, along with special | 2347 | * start/recovery path as __block_write_full_page, along with special |
2348 | * code to handle reiserfs tails. | 2348 | * code to handle reiserfs tails. |
2349 | */ | 2349 | */ |
2350 | static int reiserfs_write_full_page(struct page *page, | 2350 | static int reiserfs_write_full_page(struct page *page, |
2351 | struct writeback_control *wbc) | 2351 | struct writeback_control *wbc) |
2352 | { | 2352 | { |
2353 | struct inode *inode = page->mapping->host; | 2353 | struct inode *inode = page->mapping->host; |
2354 | unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; | 2354 | unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; |
2355 | int error = 0; | 2355 | int error = 0; |
2356 | unsigned long block; | 2356 | unsigned long block; |
2357 | struct buffer_head *head, *bh; | 2357 | struct buffer_head *head, *bh; |
2358 | int partial = 0; | 2358 | int partial = 0; |
2359 | int nr = 0; | 2359 | int nr = 0; |
2360 | int checked = PageChecked(page); | 2360 | int checked = PageChecked(page); |
2361 | struct reiserfs_transaction_handle th; | 2361 | struct reiserfs_transaction_handle th; |
2362 | struct super_block *s = inode->i_sb; | 2362 | struct super_block *s = inode->i_sb; |
2363 | int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; | 2363 | int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; |
2364 | th.t_trans_id = 0; | 2364 | th.t_trans_id = 0; |
2365 | 2365 | ||
2366 | /* no logging allowed when nonblocking or from PF_MEMALLOC */ | ||
2367 | if (checked && (current->flags & PF_MEMALLOC)) { | ||
2368 | redirty_page_for_writepage(wbc, page); | ||
2369 | unlock_page(page); | ||
2370 | return 0; | ||
2371 | } | ||
2372 | |||
2366 | /* The page dirty bit is cleared before writepage is called, which | 2373 | /* The page dirty bit is cleared before writepage is called, which |
2367 | * means we have to tell create_empty_buffers to make dirty buffers | 2374 | * means we have to tell create_empty_buffers to make dirty buffers |
2368 | * The page really should be up to date at this point, so tossing | 2375 | * The page really should be up to date at this point, so tossing |
2369 | * in the BH_Uptodate is just a sanity check. | 2376 | * in the BH_Uptodate is just a sanity check. |
2370 | */ | 2377 | */ |
2371 | if (!page_has_buffers(page)) { | 2378 | if (!page_has_buffers(page)) { |
2372 | create_empty_buffers(page, s->s_blocksize, | 2379 | create_empty_buffers(page, s->s_blocksize, |
2373 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | 2380 | (1 << BH_Dirty) | (1 << BH_Uptodate)); |
2374 | } | 2381 | } |
2375 | head = page_buffers(page); | 2382 | head = page_buffers(page); |
2376 | 2383 | ||
2377 | /* last page in the file, zero out any contents past the | 2384 | /* last page in the file, zero out any contents past the |
2378 | ** last byte in the file | 2385 | ** last byte in the file |
2379 | */ | 2386 | */ |
2380 | if (page->index >= end_index) { | 2387 | if (page->index >= end_index) { |
2381 | char *kaddr; | 2388 | char *kaddr; |
2382 | unsigned last_offset; | 2389 | unsigned last_offset; |
2383 | 2390 | ||
2384 | last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1); | 2391 | last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1); |
2385 | /* no file contents in this page */ | 2392 | /* no file contents in this page */ |
2386 | if (page->index >= end_index + 1 || !last_offset) { | 2393 | if (page->index >= end_index + 1 || !last_offset) { |
2387 | unlock_page(page); | 2394 | unlock_page(page); |
2388 | return 0; | 2395 | return 0; |
2389 | } | 2396 | } |
2390 | kaddr = kmap_atomic(page, KM_USER0); | 2397 | kaddr = kmap_atomic(page, KM_USER0); |
2391 | memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE - last_offset); | 2398 | memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE - last_offset); |
2392 | flush_dcache_page(page); | 2399 | flush_dcache_page(page); |
2393 | kunmap_atomic(kaddr, KM_USER0); | 2400 | kunmap_atomic(kaddr, KM_USER0); |
2394 | } | 2401 | } |
2395 | bh = head; | 2402 | bh = head; |
2396 | block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); | 2403 | block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); |
2397 | /* first map all the buffers, logging any direct items we find */ | 2404 | /* first map all the buffers, logging any direct items we find */ |
2398 | do { | 2405 | do { |
2399 | if ((checked || buffer_dirty(bh)) && (!buffer_mapped(bh) || | 2406 | if ((checked || buffer_dirty(bh)) && (!buffer_mapped(bh) || |
2400 | (buffer_mapped(bh) | 2407 | (buffer_mapped(bh) |
2401 | && bh->b_blocknr == | 2408 | && bh->b_blocknr == |
2402 | 0))) { | 2409 | 0))) { |
2403 | /* not mapped yet, or it points to a direct item, search | 2410 | /* not mapped yet, or it points to a direct item, search |
2404 | * the btree for the mapping info, and log any direct | 2411 | * the btree for the mapping info, and log any direct |
2405 | * items found | 2412 | * items found |
2406 | */ | 2413 | */ |
2407 | if ((error = map_block_for_writepage(inode, bh, block))) { | 2414 | if ((error = map_block_for_writepage(inode, bh, block))) { |
2408 | goto fail; | 2415 | goto fail; |
2409 | } | 2416 | } |
2410 | } | 2417 | } |
2411 | bh = bh->b_this_page; | 2418 | bh = bh->b_this_page; |
2412 | block++; | 2419 | block++; |
2413 | } while (bh != head); | 2420 | } while (bh != head); |
2414 | 2421 | ||
2415 | /* | 2422 | /* |
2416 | * we start the transaction after map_block_for_writepage, | 2423 | * we start the transaction after map_block_for_writepage, |
2417 | * because it can create holes in the file (an unbounded operation). | 2424 | * because it can create holes in the file (an unbounded operation). |
2418 | * starting it here, we can make a reliable estimate for how many | 2425 | * starting it here, we can make a reliable estimate for how many |
2419 | * blocks we're going to log | 2426 | * blocks we're going to log |
2420 | */ | 2427 | */ |
2421 | if (checked) { | 2428 | if (checked) { |
2422 | ClearPageChecked(page); | 2429 | ClearPageChecked(page); |
2423 | reiserfs_write_lock(s); | 2430 | reiserfs_write_lock(s); |
2424 | error = journal_begin(&th, s, bh_per_page + 1); | 2431 | error = journal_begin(&th, s, bh_per_page + 1); |
2425 | if (error) { | 2432 | if (error) { |
2426 | reiserfs_write_unlock(s); | 2433 | reiserfs_write_unlock(s); |
2427 | goto fail; | 2434 | goto fail; |
2428 | } | 2435 | } |
2429 | reiserfs_update_inode_transaction(inode); | 2436 | reiserfs_update_inode_transaction(inode); |
2430 | } | 2437 | } |
2431 | /* now go through and lock any dirty buffers on the page */ | 2438 | /* now go through and lock any dirty buffers on the page */ |
2432 | do { | 2439 | do { |
2433 | get_bh(bh); | 2440 | get_bh(bh); |
2434 | if (!buffer_mapped(bh)) | 2441 | if (!buffer_mapped(bh)) |
2435 | continue; | 2442 | continue; |
2436 | if (buffer_mapped(bh) && bh->b_blocknr == 0) | 2443 | if (buffer_mapped(bh) && bh->b_blocknr == 0) |
2437 | continue; | 2444 | continue; |
2438 | 2445 | ||
2439 | if (checked) { | 2446 | if (checked) { |
2440 | reiserfs_prepare_for_journal(s, bh, 1); | 2447 | reiserfs_prepare_for_journal(s, bh, 1); |
2441 | journal_mark_dirty(&th, s, bh); | 2448 | journal_mark_dirty(&th, s, bh); |
2442 | continue; | 2449 | continue; |
2443 | } | 2450 | } |
2444 | /* from this point on, we know the buffer is mapped to a | 2451 | /* from this point on, we know the buffer is mapped to a |
2445 | * real block and not a direct item | 2452 | * real block and not a direct item |
2446 | */ | 2453 | */ |
2447 | if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { | 2454 | if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { |
2448 | lock_buffer(bh); | 2455 | lock_buffer(bh); |
2449 | } else { | 2456 | } else { |
2450 | if (test_set_buffer_locked(bh)) { | 2457 | if (test_set_buffer_locked(bh)) { |
2451 | redirty_page_for_writepage(wbc, page); | 2458 | redirty_page_for_writepage(wbc, page); |
2452 | continue; | 2459 | continue; |
2453 | } | 2460 | } |
2454 | } | 2461 | } |
2455 | if (test_clear_buffer_dirty(bh)) { | 2462 | if (test_clear_buffer_dirty(bh)) { |
2456 | mark_buffer_async_write(bh); | 2463 | mark_buffer_async_write(bh); |
2457 | } else { | 2464 | } else { |
2458 | unlock_buffer(bh); | 2465 | unlock_buffer(bh); |
2459 | } | 2466 | } |
2460 | } while ((bh = bh->b_this_page) != head); | 2467 | } while ((bh = bh->b_this_page) != head); |
2461 | 2468 | ||
2462 | if (checked) { | 2469 | if (checked) { |
2463 | error = journal_end(&th, s, bh_per_page + 1); | 2470 | error = journal_end(&th, s, bh_per_page + 1); |
2464 | reiserfs_write_unlock(s); | 2471 | reiserfs_write_unlock(s); |
2465 | if (error) | 2472 | if (error) |
2466 | goto fail; | 2473 | goto fail; |
2467 | } | 2474 | } |
2468 | BUG_ON(PageWriteback(page)); | 2475 | BUG_ON(PageWriteback(page)); |
2469 | set_page_writeback(page); | 2476 | set_page_writeback(page); |
2470 | unlock_page(page); | 2477 | unlock_page(page); |
2471 | 2478 | ||
2472 | /* | 2479 | /* |
2473 | * since any buffer might be the only dirty buffer on the page, | 2480 | * since any buffer might be the only dirty buffer on the page, |
2474 | * the first submit_bh can bring the page out of writeback. | 2481 | * the first submit_bh can bring the page out of writeback. |
2475 | * be careful with the buffers. | 2482 | * be careful with the buffers. |
2476 | */ | 2483 | */ |
2477 | do { | 2484 | do { |
2478 | struct buffer_head *next = bh->b_this_page; | 2485 | struct buffer_head *next = bh->b_this_page; |
2479 | if (buffer_async_write(bh)) { | 2486 | if (buffer_async_write(bh)) { |
2480 | submit_bh(WRITE, bh); | 2487 | submit_bh(WRITE, bh); |
2481 | nr++; | 2488 | nr++; |
2482 | } | 2489 | } |
2483 | put_bh(bh); | 2490 | put_bh(bh); |
2484 | bh = next; | 2491 | bh = next; |
2485 | } while (bh != head); | 2492 | } while (bh != head); |
2486 | 2493 | ||
2487 | error = 0; | 2494 | error = 0; |
2488 | done: | 2495 | done: |
2489 | if (nr == 0) { | 2496 | if (nr == 0) { |
2490 | /* | 2497 | /* |
2491 | * if this page only had a direct item, it is very possible for | 2498 | * if this page only had a direct item, it is very possible for |
2492 | * no io to be required without there being an error. Or, | 2499 | * no io to be required without there being an error. Or, |
2493 | * someone else could have locked them and sent them down the | 2500 | * someone else could have locked them and sent them down the |
2494 | * pipe without locking the page | 2501 | * pipe without locking the page |
2495 | */ | 2502 | */ |
2496 | bh = head; | 2503 | bh = head; |
2497 | do { | 2504 | do { |
2498 | if (!buffer_uptodate(bh)) { | 2505 | if (!buffer_uptodate(bh)) { |
2499 | partial = 1; | 2506 | partial = 1; |
2500 | break; | 2507 | break; |
2501 | } | 2508 | } |
2502 | bh = bh->b_this_page; | 2509 | bh = bh->b_this_page; |
2503 | } while (bh != head); | 2510 | } while (bh != head); |
2504 | if (!partial) | 2511 | if (!partial) |
2505 | SetPageUptodate(page); | 2512 | SetPageUptodate(page); |
2506 | end_page_writeback(page); | 2513 | end_page_writeback(page); |
2507 | } | 2514 | } |
2508 | return error; | 2515 | return error; |
2509 | 2516 | ||
2510 | fail: | 2517 | fail: |
2511 | /* catches various errors, we need to make sure any valid dirty blocks | 2518 | /* catches various errors, we need to make sure any valid dirty blocks |
2512 | * get to the media. The page is currently locked and not marked for | 2519 | * get to the media. The page is currently locked and not marked for |
2513 | * writeback | 2520 | * writeback |
2514 | */ | 2521 | */ |
2515 | ClearPageUptodate(page); | 2522 | ClearPageUptodate(page); |
2516 | bh = head; | 2523 | bh = head; |
2517 | do { | 2524 | do { |
2518 | get_bh(bh); | 2525 | get_bh(bh); |
2519 | if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) { | 2526 | if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) { |
2520 | lock_buffer(bh); | 2527 | lock_buffer(bh); |
2521 | mark_buffer_async_write(bh); | 2528 | mark_buffer_async_write(bh); |
2522 | } else { | 2529 | } else { |
2523 | /* | 2530 | /* |
2524 | * clear any dirty bits that might have come from getting | 2531 | * clear any dirty bits that might have come from getting |
2525 | * attached to a dirty page | 2532 | * attached to a dirty page |
2526 | */ | 2533 | */ |
2527 | clear_buffer_dirty(bh); | 2534 | clear_buffer_dirty(bh); |
2528 | } | 2535 | } |
2529 | bh = bh->b_this_page; | 2536 | bh = bh->b_this_page; |
2530 | } while (bh != head); | 2537 | } while (bh != head); |
2531 | SetPageError(page); | 2538 | SetPageError(page); |
2532 | BUG_ON(PageWriteback(page)); | 2539 | BUG_ON(PageWriteback(page)); |
2533 | set_page_writeback(page); | 2540 | set_page_writeback(page); |
2534 | unlock_page(page); | 2541 | unlock_page(page); |
2535 | do { | 2542 | do { |
2536 | struct buffer_head *next = bh->b_this_page; | 2543 | struct buffer_head *next = bh->b_this_page; |
2537 | if (buffer_async_write(bh)) { | 2544 | if (buffer_async_write(bh)) { |
2538 | clear_buffer_dirty(bh); | 2545 | clear_buffer_dirty(bh); |
2539 | submit_bh(WRITE, bh); | 2546 | submit_bh(WRITE, bh); |
2540 | nr++; | 2547 | nr++; |
2541 | } | 2548 | } |
2542 | put_bh(bh); | 2549 | put_bh(bh); |
2543 | bh = next; | 2550 | bh = next; |
2544 | } while (bh != head); | 2551 | } while (bh != head); |
2545 | goto done; | 2552 | goto done; |
2546 | } | 2553 | } |
2547 | 2554 | ||
2548 | static int reiserfs_readpage(struct file *f, struct page *page) | 2555 | static int reiserfs_readpage(struct file *f, struct page *page) |
2549 | { | 2556 | { |
2550 | return block_read_full_page(page, reiserfs_get_block); | 2557 | return block_read_full_page(page, reiserfs_get_block); |
2551 | } | 2558 | } |
2552 | 2559 | ||
2553 | static int reiserfs_writepage(struct page *page, struct writeback_control *wbc) | 2560 | static int reiserfs_writepage(struct page *page, struct writeback_control *wbc) |
2554 | { | 2561 | { |
2555 | struct inode *inode = page->mapping->host; | 2562 | struct inode *inode = page->mapping->host; |
2556 | reiserfs_wait_on_write_block(inode->i_sb); | 2563 | reiserfs_wait_on_write_block(inode->i_sb); |
2557 | return reiserfs_write_full_page(page, wbc); | 2564 | return reiserfs_write_full_page(page, wbc); |
2558 | } | 2565 | } |
2559 | 2566 | ||
2560 | static int reiserfs_prepare_write(struct file *f, struct page *page, | 2567 | static int reiserfs_prepare_write(struct file *f, struct page *page, |
2561 | unsigned from, unsigned to) | 2568 | unsigned from, unsigned to) |
2562 | { | 2569 | { |
2563 | struct inode *inode = page->mapping->host; | 2570 | struct inode *inode = page->mapping->host; |
2564 | int ret; | 2571 | int ret; |
2565 | int old_ref = 0; | 2572 | int old_ref = 0; |
2566 | 2573 | ||
2567 | reiserfs_wait_on_write_block(inode->i_sb); | 2574 | reiserfs_wait_on_write_block(inode->i_sb); |
2568 | fix_tail_page_for_writing(page); | 2575 | fix_tail_page_for_writing(page); |
2569 | if (reiserfs_transaction_running(inode->i_sb)) { | 2576 | if (reiserfs_transaction_running(inode->i_sb)) { |
2570 | struct reiserfs_transaction_handle *th; | 2577 | struct reiserfs_transaction_handle *th; |
2571 | th = (struct reiserfs_transaction_handle *)current-> | 2578 | th = (struct reiserfs_transaction_handle *)current-> |
2572 | journal_info; | 2579 | journal_info; |
2573 | BUG_ON(!th->t_refcount); | 2580 | BUG_ON(!th->t_refcount); |
2574 | BUG_ON(!th->t_trans_id); | 2581 | BUG_ON(!th->t_trans_id); |
2575 | old_ref = th->t_refcount; | 2582 | old_ref = th->t_refcount; |
2576 | th->t_refcount++; | 2583 | th->t_refcount++; |
2577 | } | 2584 | } |
2578 | 2585 | ||
2579 | ret = block_prepare_write(page, from, to, reiserfs_get_block); | 2586 | ret = block_prepare_write(page, from, to, reiserfs_get_block); |
2580 | if (ret && reiserfs_transaction_running(inode->i_sb)) { | 2587 | if (ret && reiserfs_transaction_running(inode->i_sb)) { |
2581 | struct reiserfs_transaction_handle *th = current->journal_info; | 2588 | struct reiserfs_transaction_handle *th = current->journal_info; |
2582 | /* this gets a little ugly. If reiserfs_get_block returned an | 2589 | /* this gets a little ugly. If reiserfs_get_block returned an |
2583 | * error and left a transacstion running, we've got to close it, | 2590 | * error and left a transacstion running, we've got to close it, |
2584 | * and we've got to free handle if it was a persistent transaction. | 2591 | * and we've got to free handle if it was a persistent transaction. |
2585 | * | 2592 | * |
2586 | * But, if we had nested into an existing transaction, we need | 2593 | * But, if we had nested into an existing transaction, we need |
2587 | * to just drop the ref count on the handle. | 2594 | * to just drop the ref count on the handle. |
2588 | * | 2595 | * |
2589 | * If old_ref == 0, the transaction is from reiserfs_get_block, | 2596 | * If old_ref == 0, the transaction is from reiserfs_get_block, |
2590 | * and it was a persistent trans. Otherwise, it was nested above. | 2597 | * and it was a persistent trans. Otherwise, it was nested above. |
2591 | */ | 2598 | */ |
2592 | if (th->t_refcount > old_ref) { | 2599 | if (th->t_refcount > old_ref) { |
2593 | if (old_ref) | 2600 | if (old_ref) |
2594 | th->t_refcount--; | 2601 | th->t_refcount--; |
2595 | else { | 2602 | else { |
2596 | int err; | 2603 | int err; |
2597 | reiserfs_write_lock(inode->i_sb); | 2604 | reiserfs_write_lock(inode->i_sb); |
2598 | err = reiserfs_end_persistent_transaction(th); | 2605 | err = reiserfs_end_persistent_transaction(th); |
2599 | reiserfs_write_unlock(inode->i_sb); | 2606 | reiserfs_write_unlock(inode->i_sb); |
2600 | if (err) | 2607 | if (err) |
2601 | ret = err; | 2608 | ret = err; |
2602 | } | 2609 | } |
2603 | } | 2610 | } |
2604 | } | 2611 | } |
2605 | return ret; | 2612 | return ret; |
2606 | 2613 | ||
2607 | } | 2614 | } |
2608 | 2615 | ||
2609 | static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) | 2616 | static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) |
2610 | { | 2617 | { |
2611 | return generic_block_bmap(as, block, reiserfs_bmap); | 2618 | return generic_block_bmap(as, block, reiserfs_bmap); |
2612 | } | 2619 | } |
2613 | 2620 | ||
2614 | static int reiserfs_commit_write(struct file *f, struct page *page, | 2621 | static int reiserfs_commit_write(struct file *f, struct page *page, |
2615 | unsigned from, unsigned to) | 2622 | unsigned from, unsigned to) |
2616 | { | 2623 | { |
2617 | struct inode *inode = page->mapping->host; | 2624 | struct inode *inode = page->mapping->host; |
2618 | loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to; | 2625 | loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to; |
2619 | int ret = 0; | 2626 | int ret = 0; |
2620 | int update_sd = 0; | 2627 | int update_sd = 0; |
2621 | struct reiserfs_transaction_handle *th = NULL; | 2628 | struct reiserfs_transaction_handle *th = NULL; |
2622 | 2629 | ||
2623 | reiserfs_wait_on_write_block(inode->i_sb); | 2630 | reiserfs_wait_on_write_block(inode->i_sb); |
2624 | if (reiserfs_transaction_running(inode->i_sb)) { | 2631 | if (reiserfs_transaction_running(inode->i_sb)) { |
2625 | th = current->journal_info; | 2632 | th = current->journal_info; |
2626 | } | 2633 | } |
2627 | reiserfs_commit_page(inode, page, from, to); | 2634 | reiserfs_commit_page(inode, page, from, to); |
2628 | 2635 | ||
2629 | /* generic_commit_write does this for us, but does not update the | 2636 | /* generic_commit_write does this for us, but does not update the |
2630 | ** transaction tracking stuff when the size changes. So, we have | 2637 | ** transaction tracking stuff when the size changes. So, we have |
2631 | ** to do the i_size updates here. | 2638 | ** to do the i_size updates here. |
2632 | */ | 2639 | */ |
2633 | if (pos > inode->i_size) { | 2640 | if (pos > inode->i_size) { |
2634 | struct reiserfs_transaction_handle myth; | 2641 | struct reiserfs_transaction_handle myth; |
2635 | reiserfs_write_lock(inode->i_sb); | 2642 | reiserfs_write_lock(inode->i_sb); |
2636 | /* If the file have grown beyond the border where it | 2643 | /* If the file have grown beyond the border where it |
2637 | can have a tail, unmark it as needing a tail | 2644 | can have a tail, unmark it as needing a tail |
2638 | packing */ | 2645 | packing */ |
2639 | if ((have_large_tails(inode->i_sb) | 2646 | if ((have_large_tails(inode->i_sb) |
2640 | && inode->i_size > i_block_size(inode) * 4) | 2647 | && inode->i_size > i_block_size(inode) * 4) |
2641 | || (have_small_tails(inode->i_sb) | 2648 | || (have_small_tails(inode->i_sb) |
2642 | && inode->i_size > i_block_size(inode))) | 2649 | && inode->i_size > i_block_size(inode))) |
2643 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; | 2650 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; |
2644 | 2651 | ||
2645 | ret = journal_begin(&myth, inode->i_sb, 1); | 2652 | ret = journal_begin(&myth, inode->i_sb, 1); |
2646 | if (ret) { | 2653 | if (ret) { |
2647 | reiserfs_write_unlock(inode->i_sb); | 2654 | reiserfs_write_unlock(inode->i_sb); |
2648 | goto journal_error; | 2655 | goto journal_error; |
2649 | } | 2656 | } |
2650 | reiserfs_update_inode_transaction(inode); | 2657 | reiserfs_update_inode_transaction(inode); |
2651 | inode->i_size = pos; | 2658 | inode->i_size = pos; |
2652 | /* | 2659 | /* |
2653 | * this will just nest into our transaction. It's important | 2660 | * this will just nest into our transaction. It's important |
2654 | * to use mark_inode_dirty so the inode gets pushed around on the | 2661 | * to use mark_inode_dirty so the inode gets pushed around on the |
2655 | * dirty lists, and so that O_SYNC works as expected | 2662 | * dirty lists, and so that O_SYNC works as expected |
2656 | */ | 2663 | */ |
2657 | mark_inode_dirty(inode); | 2664 | mark_inode_dirty(inode); |
2658 | reiserfs_update_sd(&myth, inode); | 2665 | reiserfs_update_sd(&myth, inode); |
2659 | update_sd = 1; | 2666 | update_sd = 1; |
2660 | ret = journal_end(&myth, inode->i_sb, 1); | 2667 | ret = journal_end(&myth, inode->i_sb, 1); |
2661 | reiserfs_write_unlock(inode->i_sb); | 2668 | reiserfs_write_unlock(inode->i_sb); |
2662 | if (ret) | 2669 | if (ret) |
2663 | goto journal_error; | 2670 | goto journal_error; |
2664 | } | 2671 | } |
2665 | if (th) { | 2672 | if (th) { |
2666 | reiserfs_write_lock(inode->i_sb); | 2673 | reiserfs_write_lock(inode->i_sb); |
2667 | if (!update_sd) | 2674 | if (!update_sd) |
2668 | mark_inode_dirty(inode); | 2675 | mark_inode_dirty(inode); |
2669 | ret = reiserfs_end_persistent_transaction(th); | 2676 | ret = reiserfs_end_persistent_transaction(th); |
2670 | reiserfs_write_unlock(inode->i_sb); | 2677 | reiserfs_write_unlock(inode->i_sb); |
2671 | if (ret) | 2678 | if (ret) |
2672 | goto out; | 2679 | goto out; |
2673 | } | 2680 | } |
2674 | 2681 | ||
2675 | out: | 2682 | out: |
2676 | return ret; | 2683 | return ret; |
2677 | 2684 | ||
2678 | journal_error: | 2685 | journal_error: |
2679 | if (th) { | 2686 | if (th) { |
2680 | reiserfs_write_lock(inode->i_sb); | 2687 | reiserfs_write_lock(inode->i_sb); |
2681 | if (!update_sd) | 2688 | if (!update_sd) |
2682 | reiserfs_update_sd(th, inode); | 2689 | reiserfs_update_sd(th, inode); |
2683 | ret = reiserfs_end_persistent_transaction(th); | 2690 | ret = reiserfs_end_persistent_transaction(th); |
2684 | reiserfs_write_unlock(inode->i_sb); | 2691 | reiserfs_write_unlock(inode->i_sb); |
2685 | } | 2692 | } |
2686 | 2693 | ||
2687 | return ret; | 2694 | return ret; |
2688 | } | 2695 | } |
2689 | 2696 | ||
2690 | void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode) | 2697 | void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode) |
2691 | { | 2698 | { |
2692 | if (reiserfs_attrs(inode->i_sb)) { | 2699 | if (reiserfs_attrs(inode->i_sb)) { |
2693 | if (sd_attrs & REISERFS_SYNC_FL) | 2700 | if (sd_attrs & REISERFS_SYNC_FL) |
2694 | inode->i_flags |= S_SYNC; | 2701 | inode->i_flags |= S_SYNC; |
2695 | else | 2702 | else |
2696 | inode->i_flags &= ~S_SYNC; | 2703 | inode->i_flags &= ~S_SYNC; |
2697 | if (sd_attrs & REISERFS_IMMUTABLE_FL) | 2704 | if (sd_attrs & REISERFS_IMMUTABLE_FL) |
2698 | inode->i_flags |= S_IMMUTABLE; | 2705 | inode->i_flags |= S_IMMUTABLE; |
2699 | else | 2706 | else |
2700 | inode->i_flags &= ~S_IMMUTABLE; | 2707 | inode->i_flags &= ~S_IMMUTABLE; |
2701 | if (sd_attrs & REISERFS_APPEND_FL) | 2708 | if (sd_attrs & REISERFS_APPEND_FL) |
2702 | inode->i_flags |= S_APPEND; | 2709 | inode->i_flags |= S_APPEND; |
2703 | else | 2710 | else |
2704 | inode->i_flags &= ~S_APPEND; | 2711 | inode->i_flags &= ~S_APPEND; |
2705 | if (sd_attrs & REISERFS_NOATIME_FL) | 2712 | if (sd_attrs & REISERFS_NOATIME_FL) |
2706 | inode->i_flags |= S_NOATIME; | 2713 | inode->i_flags |= S_NOATIME; |
2707 | else | 2714 | else |
2708 | inode->i_flags &= ~S_NOATIME; | 2715 | inode->i_flags &= ~S_NOATIME; |
2709 | if (sd_attrs & REISERFS_NOTAIL_FL) | 2716 | if (sd_attrs & REISERFS_NOTAIL_FL) |
2710 | REISERFS_I(inode)->i_flags |= i_nopack_mask; | 2717 | REISERFS_I(inode)->i_flags |= i_nopack_mask; |
2711 | else | 2718 | else |
2712 | REISERFS_I(inode)->i_flags &= ~i_nopack_mask; | 2719 | REISERFS_I(inode)->i_flags &= ~i_nopack_mask; |
2713 | } | 2720 | } |
2714 | } | 2721 | } |
2715 | 2722 | ||
2716 | void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs) | 2723 | void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs) |
2717 | { | 2724 | { |
2718 | if (reiserfs_attrs(inode->i_sb)) { | 2725 | if (reiserfs_attrs(inode->i_sb)) { |
2719 | if (inode->i_flags & S_IMMUTABLE) | 2726 | if (inode->i_flags & S_IMMUTABLE) |
2720 | *sd_attrs |= REISERFS_IMMUTABLE_FL; | 2727 | *sd_attrs |= REISERFS_IMMUTABLE_FL; |
2721 | else | 2728 | else |
2722 | *sd_attrs &= ~REISERFS_IMMUTABLE_FL; | 2729 | *sd_attrs &= ~REISERFS_IMMUTABLE_FL; |
2723 | if (inode->i_flags & S_SYNC) | 2730 | if (inode->i_flags & S_SYNC) |
2724 | *sd_attrs |= REISERFS_SYNC_FL; | 2731 | *sd_attrs |= REISERFS_SYNC_FL; |
2725 | else | 2732 | else |
2726 | *sd_attrs &= ~REISERFS_SYNC_FL; | 2733 | *sd_attrs &= ~REISERFS_SYNC_FL; |
2727 | if (inode->i_flags & S_NOATIME) | 2734 | if (inode->i_flags & S_NOATIME) |
2728 | *sd_attrs |= REISERFS_NOATIME_FL; | 2735 | *sd_attrs |= REISERFS_NOATIME_FL; |
2729 | else | 2736 | else |
2730 | *sd_attrs &= ~REISERFS_NOATIME_FL; | 2737 | *sd_attrs &= ~REISERFS_NOATIME_FL; |
2731 | if (REISERFS_I(inode)->i_flags & i_nopack_mask) | 2738 | if (REISERFS_I(inode)->i_flags & i_nopack_mask) |
2732 | *sd_attrs |= REISERFS_NOTAIL_FL; | 2739 | *sd_attrs |= REISERFS_NOTAIL_FL; |
2733 | else | 2740 | else |
2734 | *sd_attrs &= ~REISERFS_NOTAIL_FL; | 2741 | *sd_attrs &= ~REISERFS_NOTAIL_FL; |
2735 | } | 2742 | } |
2736 | } | 2743 | } |
2737 | 2744 | ||
2738 | /* decide if this buffer needs to stay around for data logging or ordered | 2745 | /* decide if this buffer needs to stay around for data logging or ordered |
2739 | ** write purposes | 2746 | ** write purposes |
2740 | */ | 2747 | */ |
2741 | static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) | 2748 | static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) |
2742 | { | 2749 | { |
2743 | int ret = 1; | 2750 | int ret = 1; |
2744 | struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); | 2751 | struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); |
2745 | 2752 | ||
2746 | lock_buffer(bh); | 2753 | lock_buffer(bh); |
2747 | spin_lock(&j->j_dirty_buffers_lock); | 2754 | spin_lock(&j->j_dirty_buffers_lock); |
2748 | if (!buffer_mapped(bh)) { | 2755 | if (!buffer_mapped(bh)) { |
2749 | goto free_jh; | 2756 | goto free_jh; |
2750 | } | 2757 | } |
2751 | /* the page is locked, and the only places that log a data buffer | 2758 | /* the page is locked, and the only places that log a data buffer |
2752 | * also lock the page. | 2759 | * also lock the page. |
2753 | */ | 2760 | */ |
2754 | if (reiserfs_file_data_log(inode)) { | 2761 | if (reiserfs_file_data_log(inode)) { |
2755 | /* | 2762 | /* |
2756 | * very conservative, leave the buffer pinned if | 2763 | * very conservative, leave the buffer pinned if |
2757 | * anyone might need it. | 2764 | * anyone might need it. |
2758 | */ | 2765 | */ |
2759 | if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { | 2766 | if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { |
2760 | ret = 0; | 2767 | ret = 0; |
2761 | } | 2768 | } |
2762 | } else if (buffer_dirty(bh)) { | 2769 | } else if (buffer_dirty(bh)) { |
2763 | struct reiserfs_journal_list *jl; | 2770 | struct reiserfs_journal_list *jl; |
2764 | struct reiserfs_jh *jh = bh->b_private; | 2771 | struct reiserfs_jh *jh = bh->b_private; |
2765 | 2772 | ||
2766 | /* why is this safe? | 2773 | /* why is this safe? |
2767 | * reiserfs_setattr updates i_size in the on disk | 2774 | * reiserfs_setattr updates i_size in the on disk |
2768 | * stat data before allowing vmtruncate to be called. | 2775 | * stat data before allowing vmtruncate to be called. |
2769 | * | 2776 | * |
2770 | * If buffer was put onto the ordered list for this | 2777 | * If buffer was put onto the ordered list for this |
2771 | * transaction, we know for sure either this transaction | 2778 | * transaction, we know for sure either this transaction |
2772 | * or an older one already has updated i_size on disk, | 2779 | * or an older one already has updated i_size on disk, |
2773 | * and this ordered data won't be referenced in the file | 2780 | * and this ordered data won't be referenced in the file |
2774 | * if we crash. | 2781 | * if we crash. |
2775 | * | 2782 | * |
2776 | * if the buffer was put onto the ordered list for an older | 2783 | * if the buffer was put onto the ordered list for an older |
2777 | * transaction, we need to leave it around | 2784 | * transaction, we need to leave it around |
2778 | */ | 2785 | */ |
2779 | if (jh && (jl = jh->jl) | 2786 | if (jh && (jl = jh->jl) |
2780 | && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) | 2787 | && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) |
2781 | ret = 0; | 2788 | ret = 0; |
2782 | } | 2789 | } |
2783 | free_jh: | 2790 | free_jh: |
2784 | if (ret && bh->b_private) { | 2791 | if (ret && bh->b_private) { |
2785 | reiserfs_free_jh(bh); | 2792 | reiserfs_free_jh(bh); |
2786 | } | 2793 | } |
2787 | spin_unlock(&j->j_dirty_buffers_lock); | 2794 | spin_unlock(&j->j_dirty_buffers_lock); |
2788 | unlock_buffer(bh); | 2795 | unlock_buffer(bh); |
2789 | return ret; | 2796 | return ret; |
2790 | } | 2797 | } |
2791 | 2798 | ||
2792 | /* clm -- taken from fs/buffer.c:block_invalidate_page */ | 2799 | /* clm -- taken from fs/buffer.c:block_invalidate_page */ |
2793 | static int reiserfs_invalidatepage(struct page *page, unsigned long offset) | 2800 | static int reiserfs_invalidatepage(struct page *page, unsigned long offset) |
2794 | { | 2801 | { |
2795 | struct buffer_head *head, *bh, *next; | 2802 | struct buffer_head *head, *bh, *next; |
2796 | struct inode *inode = page->mapping->host; | 2803 | struct inode *inode = page->mapping->host; |
2797 | unsigned int curr_off = 0; | 2804 | unsigned int curr_off = 0; |
2798 | int ret = 1; | 2805 | int ret = 1; |
2799 | 2806 | ||
2800 | BUG_ON(!PageLocked(page)); | 2807 | BUG_ON(!PageLocked(page)); |
2801 | 2808 | ||
2802 | if (offset == 0) | 2809 | if (offset == 0) |
2803 | ClearPageChecked(page); | 2810 | ClearPageChecked(page); |
2804 | 2811 | ||
2805 | if (!page_has_buffers(page)) | 2812 | if (!page_has_buffers(page)) |
2806 | goto out; | 2813 | goto out; |
2807 | 2814 | ||
2808 | head = page_buffers(page); | 2815 | head = page_buffers(page); |
2809 | bh = head; | 2816 | bh = head; |
2810 | do { | 2817 | do { |
2811 | unsigned int next_off = curr_off + bh->b_size; | 2818 | unsigned int next_off = curr_off + bh->b_size; |
2812 | next = bh->b_this_page; | 2819 | next = bh->b_this_page; |
2813 | 2820 | ||
2814 | /* | 2821 | /* |
2815 | * is this block fully invalidated? | 2822 | * is this block fully invalidated? |
2816 | */ | 2823 | */ |
2817 | if (offset <= curr_off) { | 2824 | if (offset <= curr_off) { |
2818 | if (invalidatepage_can_drop(inode, bh)) | 2825 | if (invalidatepage_can_drop(inode, bh)) |
2819 | reiserfs_unmap_buffer(bh); | 2826 | reiserfs_unmap_buffer(bh); |
2820 | else | 2827 | else |
2821 | ret = 0; | 2828 | ret = 0; |
2822 | } | 2829 | } |
2823 | curr_off = next_off; | 2830 | curr_off = next_off; |
2824 | bh = next; | 2831 | bh = next; |
2825 | } while (bh != head); | 2832 | } while (bh != head); |
2826 | 2833 | ||
2827 | /* | 2834 | /* |
2828 | * We release buffers only if the entire page is being invalidated. | 2835 | * We release buffers only if the entire page is being invalidated. |
2829 | * The get_block cached value has been unconditionally invalidated, | 2836 | * The get_block cached value has been unconditionally invalidated, |
2830 | * so real IO is not possible anymore. | 2837 | * so real IO is not possible anymore. |
2831 | */ | 2838 | */ |
2832 | if (!offset && ret) | 2839 | if (!offset && ret) |
2833 | ret = try_to_release_page(page, 0); | 2840 | ret = try_to_release_page(page, 0); |
2834 | out: | 2841 | out: |
2835 | return ret; | 2842 | return ret; |
2836 | } | 2843 | } |
2837 | 2844 | ||
2838 | static int reiserfs_set_page_dirty(struct page *page) | 2845 | static int reiserfs_set_page_dirty(struct page *page) |
2839 | { | 2846 | { |
2840 | struct inode *inode = page->mapping->host; | 2847 | struct inode *inode = page->mapping->host; |
2841 | if (reiserfs_file_data_log(inode)) { | 2848 | if (reiserfs_file_data_log(inode)) { |
2842 | SetPageChecked(page); | 2849 | SetPageChecked(page); |
2843 | return __set_page_dirty_nobuffers(page); | 2850 | return __set_page_dirty_nobuffers(page); |
2844 | } | 2851 | } |
2845 | return __set_page_dirty_buffers(page); | 2852 | return __set_page_dirty_buffers(page); |
2846 | } | 2853 | } |
2847 | 2854 | ||
2848 | /* | 2855 | /* |
2849 | * Returns 1 if the page's buffers were dropped. The page is locked. | 2856 | * Returns 1 if the page's buffers were dropped. The page is locked. |
2850 | * | 2857 | * |
2851 | * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads | 2858 | * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads |
2852 | * in the buffers at page_buffers(page). | 2859 | * in the buffers at page_buffers(page). |
2853 | * | 2860 | * |
2854 | * even in -o notail mode, we can't be sure an old mount without -o notail | 2861 | * even in -o notail mode, we can't be sure an old mount without -o notail |
2855 | * didn't create files with tails. | 2862 | * didn't create files with tails. |
2856 | */ | 2863 | */ |
2857 | static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) | 2864 | static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) |
2858 | { | 2865 | { |
2859 | struct inode *inode = page->mapping->host; | 2866 | struct inode *inode = page->mapping->host; |
2860 | struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); | 2867 | struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); |
2861 | struct buffer_head *head; | 2868 | struct buffer_head *head; |
2862 | struct buffer_head *bh; | 2869 | struct buffer_head *bh; |
2863 | int ret = 1; | 2870 | int ret = 1; |
2864 | 2871 | ||
2865 | WARN_ON(PageChecked(page)); | 2872 | WARN_ON(PageChecked(page)); |
2866 | spin_lock(&j->j_dirty_buffers_lock); | 2873 | spin_lock(&j->j_dirty_buffers_lock); |
2867 | head = page_buffers(page); | 2874 | head = page_buffers(page); |
2868 | bh = head; | 2875 | bh = head; |
2869 | do { | 2876 | do { |
2870 | if (bh->b_private) { | 2877 | if (bh->b_private) { |
2871 | if (!buffer_dirty(bh) && !buffer_locked(bh)) { | 2878 | if (!buffer_dirty(bh) && !buffer_locked(bh)) { |
2872 | reiserfs_free_jh(bh); | 2879 | reiserfs_free_jh(bh); |
2873 | } else { | 2880 | } else { |
2874 | ret = 0; | 2881 | ret = 0; |
2875 | break; | 2882 | break; |
2876 | } | 2883 | } |
2877 | } | 2884 | } |
2878 | bh = bh->b_this_page; | 2885 | bh = bh->b_this_page; |
2879 | } while (bh != head); | 2886 | } while (bh != head); |
2880 | if (ret) | 2887 | if (ret) |
2881 | ret = try_to_free_buffers(page); | 2888 | ret = try_to_free_buffers(page); |
2882 | spin_unlock(&j->j_dirty_buffers_lock); | 2889 | spin_unlock(&j->j_dirty_buffers_lock); |
2883 | return ret; | 2890 | return ret; |
2884 | } | 2891 | } |
2885 | 2892 | ||
2886 | /* We thank Mingming Cao for helping us understand in great detail what | 2893 | /* We thank Mingming Cao for helping us understand in great detail what |
2887 | to do in this section of the code. */ | 2894 | to do in this section of the code. */ |
2888 | static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, | 2895 | static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, |
2889 | const struct iovec *iov, loff_t offset, | 2896 | const struct iovec *iov, loff_t offset, |
2890 | unsigned long nr_segs) | 2897 | unsigned long nr_segs) |
2891 | { | 2898 | { |
2892 | struct file *file = iocb->ki_filp; | 2899 | struct file *file = iocb->ki_filp; |
2893 | struct inode *inode = file->f_mapping->host; | 2900 | struct inode *inode = file->f_mapping->host; |
2894 | 2901 | ||
2895 | return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 2902 | return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
2896 | offset, nr_segs, | 2903 | offset, nr_segs, |
2897 | reiserfs_get_blocks_direct_io, NULL); | 2904 | reiserfs_get_blocks_direct_io, NULL); |
2898 | } | 2905 | } |
2899 | 2906 | ||
2900 | int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | 2907 | int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) |
2901 | { | 2908 | { |
2902 | struct inode *inode = dentry->d_inode; | 2909 | struct inode *inode = dentry->d_inode; |
2903 | int error; | 2910 | int error; |
2904 | unsigned int ia_valid = attr->ia_valid; | 2911 | unsigned int ia_valid = attr->ia_valid; |
2905 | reiserfs_write_lock(inode->i_sb); | 2912 | reiserfs_write_lock(inode->i_sb); |
2906 | if (attr->ia_valid & ATTR_SIZE) { | 2913 | if (attr->ia_valid & ATTR_SIZE) { |
2907 | /* version 2 items will be caught by the s_maxbytes check | 2914 | /* version 2 items will be caught by the s_maxbytes check |
2908 | ** done for us in vmtruncate | 2915 | ** done for us in vmtruncate |
2909 | */ | 2916 | */ |
2910 | if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && | 2917 | if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && |
2911 | attr->ia_size > MAX_NON_LFS) { | 2918 | attr->ia_size > MAX_NON_LFS) { |
2912 | error = -EFBIG; | 2919 | error = -EFBIG; |
2913 | goto out; | 2920 | goto out; |
2914 | } | 2921 | } |
2915 | /* fill in hole pointers in the expanding truncate case. */ | 2922 | /* fill in hole pointers in the expanding truncate case. */ |
2916 | if (attr->ia_size > inode->i_size) { | 2923 | if (attr->ia_size > inode->i_size) { |
2917 | error = generic_cont_expand(inode, attr->ia_size); | 2924 | error = generic_cont_expand(inode, attr->ia_size); |
2918 | if (REISERFS_I(inode)->i_prealloc_count > 0) { | 2925 | if (REISERFS_I(inode)->i_prealloc_count > 0) { |
2919 | int err; | 2926 | int err; |
2920 | struct reiserfs_transaction_handle th; | 2927 | struct reiserfs_transaction_handle th; |
2921 | /* we're changing at most 2 bitmaps, inode + super */ | 2928 | /* we're changing at most 2 bitmaps, inode + super */ |
2922 | err = journal_begin(&th, inode->i_sb, 4); | 2929 | err = journal_begin(&th, inode->i_sb, 4); |
2923 | if (!err) { | 2930 | if (!err) { |
2924 | reiserfs_discard_prealloc(&th, inode); | 2931 | reiserfs_discard_prealloc(&th, inode); |
2925 | err = journal_end(&th, inode->i_sb, 4); | 2932 | err = journal_end(&th, inode->i_sb, 4); |
2926 | } | 2933 | } |
2927 | if (err) | 2934 | if (err) |
2928 | error = err; | 2935 | error = err; |
2929 | } | 2936 | } |
2930 | if (error) | 2937 | if (error) |
2931 | goto out; | 2938 | goto out; |
2932 | } | 2939 | } |
2933 | } | 2940 | } |
2934 | 2941 | ||
2935 | if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || | 2942 | if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || |
2936 | ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && | 2943 | ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && |
2937 | (get_inode_sd_version(inode) == STAT_DATA_V1)) { | 2944 | (get_inode_sd_version(inode) == STAT_DATA_V1)) { |
2938 | /* stat data of format v3.5 has 16 bit uid and gid */ | 2945 | /* stat data of format v3.5 has 16 bit uid and gid */ |
2939 | error = -EINVAL; | 2946 | error = -EINVAL; |
2940 | goto out; | 2947 | goto out; |
2941 | } | 2948 | } |
2942 | 2949 | ||
2943 | error = inode_change_ok(inode, attr); | 2950 | error = inode_change_ok(inode, attr); |
2944 | if (!error) { | 2951 | if (!error) { |
2945 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 2952 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || |
2946 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 2953 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { |
2947 | error = reiserfs_chown_xattrs(inode, attr); | 2954 | error = reiserfs_chown_xattrs(inode, attr); |
2948 | 2955 | ||
2949 | if (!error) { | 2956 | if (!error) { |
2950 | struct reiserfs_transaction_handle th; | 2957 | struct reiserfs_transaction_handle th; |
2951 | int jbegin_count = | 2958 | int jbegin_count = |
2952 | 2 * | 2959 | 2 * |
2953 | (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + | 2960 | (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + |
2954 | REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + | 2961 | REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + |
2955 | 2; | 2962 | 2; |
2956 | 2963 | ||
2957 | /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ | 2964 | /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ |
2958 | error = | 2965 | error = |
2959 | journal_begin(&th, inode->i_sb, | 2966 | journal_begin(&th, inode->i_sb, |
2960 | jbegin_count); | 2967 | jbegin_count); |
2961 | if (error) | 2968 | if (error) |
2962 | goto out; | 2969 | goto out; |
2963 | error = | 2970 | error = |
2964 | DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; | 2971 | DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; |
2965 | if (error) { | 2972 | if (error) { |
2966 | journal_end(&th, inode->i_sb, | 2973 | journal_end(&th, inode->i_sb, |
2967 | jbegin_count); | 2974 | jbegin_count); |
2968 | goto out; | 2975 | goto out; |
2969 | } | 2976 | } |
2970 | /* Update corresponding info in inode so that everything is in | 2977 | /* Update corresponding info in inode so that everything is in |
2971 | * one transaction */ | 2978 | * one transaction */ |
2972 | if (attr->ia_valid & ATTR_UID) | 2979 | if (attr->ia_valid & ATTR_UID) |
2973 | inode->i_uid = attr->ia_uid; | 2980 | inode->i_uid = attr->ia_uid; |
2974 | if (attr->ia_valid & ATTR_GID) | 2981 | if (attr->ia_valid & ATTR_GID) |
2975 | inode->i_gid = attr->ia_gid; | 2982 | inode->i_gid = attr->ia_gid; |
2976 | mark_inode_dirty(inode); | 2983 | mark_inode_dirty(inode); |
2977 | error = | 2984 | error = |
2978 | journal_end(&th, inode->i_sb, jbegin_count); | 2985 | journal_end(&th, inode->i_sb, jbegin_count); |
2979 | } | 2986 | } |
2980 | } | 2987 | } |
2981 | if (!error) | 2988 | if (!error) |
2982 | error = inode_setattr(inode, attr); | 2989 | error = inode_setattr(inode, attr); |
2983 | } | 2990 | } |
2984 | 2991 | ||
2985 | if (!error && reiserfs_posixacl(inode->i_sb)) { | 2992 | if (!error && reiserfs_posixacl(inode->i_sb)) { |
2986 | if (attr->ia_valid & ATTR_MODE) | 2993 | if (attr->ia_valid & ATTR_MODE) |
2987 | error = reiserfs_acl_chmod(inode); | 2994 | error = reiserfs_acl_chmod(inode); |
2988 | } | 2995 | } |
2989 | 2996 | ||
2990 | out: | 2997 | out: |
2991 | reiserfs_write_unlock(inode->i_sb); | 2998 | reiserfs_write_unlock(inode->i_sb); |
2992 | return error; | 2999 | return error; |
2993 | } | 3000 | } |
2994 | 3001 | ||
2995 | struct address_space_operations reiserfs_address_space_operations = { | 3002 | struct address_space_operations reiserfs_address_space_operations = { |
2996 | .writepage = reiserfs_writepage, | 3003 | .writepage = reiserfs_writepage, |
2997 | .readpage = reiserfs_readpage, | 3004 | .readpage = reiserfs_readpage, |
2998 | .readpages = reiserfs_readpages, | 3005 | .readpages = reiserfs_readpages, |
2999 | .releasepage = reiserfs_releasepage, | 3006 | .releasepage = reiserfs_releasepage, |
3000 | .invalidatepage = reiserfs_invalidatepage, | 3007 | .invalidatepage = reiserfs_invalidatepage, |
3001 | .sync_page = block_sync_page, | 3008 | .sync_page = block_sync_page, |
3002 | .prepare_write = reiserfs_prepare_write, | 3009 | .prepare_write = reiserfs_prepare_write, |
3003 | .commit_write = reiserfs_commit_write, | 3010 | .commit_write = reiserfs_commit_write, |
3004 | .bmap = reiserfs_aop_bmap, | 3011 | .bmap = reiserfs_aop_bmap, |
3005 | .direct_IO = reiserfs_direct_IO, | 3012 | .direct_IO = reiserfs_direct_IO, |
3006 | .set_page_dirty = reiserfs_set_page_dirty, | 3013 | .set_page_dirty = reiserfs_set_page_dirty, |
3007 | }; | 3014 | }; |
3008 | 3015 |
fs/reiserfs/journal.c
1 | /* | 1 | /* |
2 | ** Write ahead logging implementation copyright Chris Mason 2000 | 2 | ** Write ahead logging implementation copyright Chris Mason 2000 |
3 | ** | 3 | ** |
4 | ** The background commits make this code very interelated, and | 4 | ** The background commits make this code very interelated, and |
5 | ** overly complex. I need to rethink things a bit....The major players: | 5 | ** overly complex. I need to rethink things a bit....The major players: |
6 | ** | 6 | ** |
7 | ** journal_begin -- call with the number of blocks you expect to log. | 7 | ** journal_begin -- call with the number of blocks you expect to log. |
8 | ** If the current transaction is too | 8 | ** If the current transaction is too |
9 | ** old, it will block until the current transaction is | 9 | ** old, it will block until the current transaction is |
10 | ** finished, and then start a new one. | 10 | ** finished, and then start a new one. |
11 | ** Usually, your transaction will get joined in with | 11 | ** Usually, your transaction will get joined in with |
12 | ** previous ones for speed. | 12 | ** previous ones for speed. |
13 | ** | 13 | ** |
14 | ** journal_join -- same as journal_begin, but won't block on the current | 14 | ** journal_join -- same as journal_begin, but won't block on the current |
15 | ** transaction regardless of age. Don't ever call | 15 | ** transaction regardless of age. Don't ever call |
16 | ** this. Ever. There are only two places it should be | 16 | ** this. Ever. There are only two places it should be |
17 | ** called from, and they are both inside this file. | 17 | ** called from, and they are both inside this file. |
18 | ** | 18 | ** |
19 | ** journal_mark_dirty -- adds blocks into this transaction. clears any flags | 19 | ** journal_mark_dirty -- adds blocks into this transaction. clears any flags |
20 | ** that might make them get sent to disk | 20 | ** that might make them get sent to disk |
21 | ** and then marks them BH_JDirty. Puts the buffer head | 21 | ** and then marks them BH_JDirty. Puts the buffer head |
22 | ** into the current transaction hash. | 22 | ** into the current transaction hash. |
23 | ** | 23 | ** |
24 | ** journal_end -- if the current transaction is batchable, it does nothing | 24 | ** journal_end -- if the current transaction is batchable, it does nothing |
25 | ** otherwise, it could do an async/synchronous commit, or | 25 | ** otherwise, it could do an async/synchronous commit, or |
26 | ** a full flush of all log and real blocks in the | 26 | ** a full flush of all log and real blocks in the |
27 | ** transaction. | 27 | ** transaction. |
28 | ** | 28 | ** |
29 | ** flush_old_commits -- if the current transaction is too old, it is ended and | 29 | ** flush_old_commits -- if the current transaction is too old, it is ended and |
30 | ** commit blocks are sent to disk. Forces commit blocks | 30 | ** commit blocks are sent to disk. Forces commit blocks |
31 | ** to disk for all backgrounded commits that have been | 31 | ** to disk for all backgrounded commits that have been |
32 | ** around too long. | 32 | ** around too long. |
33 | ** -- Note, if you call this as an immediate flush from | 33 | ** -- Note, if you call this as an immediate flush from |
34 | ** from within kupdate, it will ignore the immediate flag | 34 | ** from within kupdate, it will ignore the immediate flag |
35 | */ | 35 | */ |
36 | 36 | ||
37 | #include <linux/config.h> | 37 | #include <linux/config.h> |
38 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
39 | #include <asm/system.h> | 39 | #include <asm/system.h> |
40 | 40 | ||
41 | #include <linux/time.h> | 41 | #include <linux/time.h> |
42 | #include <asm/semaphore.h> | 42 | #include <asm/semaphore.h> |
43 | 43 | ||
44 | #include <linux/vmalloc.h> | 44 | #include <linux/vmalloc.h> |
45 | #include <linux/reiserfs_fs.h> | 45 | #include <linux/reiserfs_fs.h> |
46 | 46 | ||
47 | #include <linux/kernel.h> | 47 | #include <linux/kernel.h> |
48 | #include <linux/errno.h> | 48 | #include <linux/errno.h> |
49 | #include <linux/fcntl.h> | 49 | #include <linux/fcntl.h> |
50 | #include <linux/stat.h> | 50 | #include <linux/stat.h> |
51 | #include <linux/string.h> | 51 | #include <linux/string.h> |
52 | #include <linux/smp_lock.h> | 52 | #include <linux/smp_lock.h> |
53 | #include <linux/buffer_head.h> | 53 | #include <linux/buffer_head.h> |
54 | #include <linux/workqueue.h> | 54 | #include <linux/workqueue.h> |
55 | #include <linux/writeback.h> | 55 | #include <linux/writeback.h> |
56 | #include <linux/blkdev.h> | 56 | #include <linux/blkdev.h> |
57 | 57 | ||
58 | /* gets a struct reiserfs_journal_list * from a list head */ | 58 | /* gets a struct reiserfs_journal_list * from a list head */ |
59 | #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ | 59 | #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ |
60 | j_list)) | 60 | j_list)) |
61 | #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ | 61 | #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ |
62 | j_working_list)) | 62 | j_working_list)) |
63 | 63 | ||
64 | /* the number of mounted filesystems. This is used to decide when to | 64 | /* the number of mounted filesystems. This is used to decide when to |
65 | ** start and kill the commit workqueue | 65 | ** start and kill the commit workqueue |
66 | */ | 66 | */ |
67 | static int reiserfs_mounted_fs_count; | 67 | static int reiserfs_mounted_fs_count; |
68 | 68 | ||
69 | static struct workqueue_struct *commit_wq; | 69 | static struct workqueue_struct *commit_wq; |
70 | 70 | ||
71 | #define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit | 71 | #define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit |
72 | structs at 4k */ | 72 | structs at 4k */ |
73 | #define BUFNR 64 /*read ahead */ | 73 | #define BUFNR 64 /*read ahead */ |
74 | 74 | ||
75 | /* cnode stat bits. Move these into reiserfs_fs.h */ | 75 | /* cnode stat bits. Move these into reiserfs_fs.h */ |
76 | 76 | ||
77 | #define BLOCK_FREED 2 /* this block was freed, and can't be written. */ | 77 | #define BLOCK_FREED 2 /* this block was freed, and can't be written. */ |
78 | #define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ | 78 | #define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ |
79 | 79 | ||
80 | #define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ | 80 | #define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ |
81 | #define BLOCK_DIRTIED 5 | 81 | #define BLOCK_DIRTIED 5 |
82 | 82 | ||
83 | /* journal list state bits */ | 83 | /* journal list state bits */ |
84 | #define LIST_TOUCHED 1 | 84 | #define LIST_TOUCHED 1 |
85 | #define LIST_DIRTY 2 | 85 | #define LIST_DIRTY 2 |
86 | #define LIST_COMMIT_PENDING 4 /* someone will commit this list */ | 86 | #define LIST_COMMIT_PENDING 4 /* someone will commit this list */ |
87 | 87 | ||
88 | /* flags for do_journal_end */ | 88 | /* flags for do_journal_end */ |
89 | #define FLUSH_ALL 1 /* flush commit and real blocks */ | 89 | #define FLUSH_ALL 1 /* flush commit and real blocks */ |
90 | #define COMMIT_NOW 2 /* end and commit this transaction */ | 90 | #define COMMIT_NOW 2 /* end and commit this transaction */ |
91 | #define WAIT 4 /* wait for the log blocks to hit the disk */ | 91 | #define WAIT 4 /* wait for the log blocks to hit the disk */ |
92 | 92 | ||
93 | static int do_journal_end(struct reiserfs_transaction_handle *, | 93 | static int do_journal_end(struct reiserfs_transaction_handle *, |
94 | struct super_block *, unsigned long nblocks, | 94 | struct super_block *, unsigned long nblocks, |
95 | int flags); | 95 | int flags); |
96 | static int flush_journal_list(struct super_block *s, | 96 | static int flush_journal_list(struct super_block *s, |
97 | struct reiserfs_journal_list *jl, int flushall); | 97 | struct reiserfs_journal_list *jl, int flushall); |
98 | static int flush_commit_list(struct super_block *s, | 98 | static int flush_commit_list(struct super_block *s, |
99 | struct reiserfs_journal_list *jl, int flushall); | 99 | struct reiserfs_journal_list *jl, int flushall); |
100 | static int can_dirty(struct reiserfs_journal_cnode *cn); | 100 | static int can_dirty(struct reiserfs_journal_cnode *cn); |
101 | static int journal_join(struct reiserfs_transaction_handle *th, | 101 | static int journal_join(struct reiserfs_transaction_handle *th, |
102 | struct super_block *p_s_sb, unsigned long nblocks); | 102 | struct super_block *p_s_sb, unsigned long nblocks); |
103 | static int release_journal_dev(struct super_block *super, | 103 | static int release_journal_dev(struct super_block *super, |
104 | struct reiserfs_journal *journal); | 104 | struct reiserfs_journal *journal); |
105 | static int dirty_one_transaction(struct super_block *s, | 105 | static int dirty_one_transaction(struct super_block *s, |
106 | struct reiserfs_journal_list *jl); | 106 | struct reiserfs_journal_list *jl); |
107 | static void flush_async_commits(void *p); | 107 | static void flush_async_commits(void *p); |
108 | static void queue_log_writer(struct super_block *s); | 108 | static void queue_log_writer(struct super_block *s); |
109 | 109 | ||
110 | /* values for join in do_journal_begin_r */ | 110 | /* values for join in do_journal_begin_r */ |
111 | enum { | 111 | enum { |
112 | JBEGIN_REG = 0, /* regular journal begin */ | 112 | JBEGIN_REG = 0, /* regular journal begin */ |
113 | JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ | 113 | JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ |
114 | JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ | 114 | JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ |
115 | }; | 115 | }; |
116 | 116 | ||
117 | static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | 117 | static int do_journal_begin_r(struct reiserfs_transaction_handle *th, |
118 | struct super_block *p_s_sb, | 118 | struct super_block *p_s_sb, |
119 | unsigned long nblocks, int join); | 119 | unsigned long nblocks, int join); |
120 | 120 | ||
121 | static void init_journal_hash(struct super_block *p_s_sb) | 121 | static void init_journal_hash(struct super_block *p_s_sb) |
122 | { | 122 | { |
123 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 123 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
124 | memset(journal->j_hash_table, 0, | 124 | memset(journal->j_hash_table, 0, |
125 | JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); | 125 | JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); |
126 | } | 126 | } |
127 | 127 | ||
128 | /* | 128 | /* |
129 | ** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to | 129 | ** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to |
130 | ** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for | 130 | ** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for |
131 | ** more details. | 131 | ** more details. |
132 | */ | 132 | */ |
133 | static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) | 133 | static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) |
134 | { | 134 | { |
135 | if (bh) { | 135 | if (bh) { |
136 | clear_buffer_dirty(bh); | 136 | clear_buffer_dirty(bh); |
137 | clear_buffer_journal_test(bh); | 137 | clear_buffer_journal_test(bh); |
138 | } | 138 | } |
139 | return 0; | 139 | return 0; |
140 | } | 140 | } |
141 | 141 | ||
142 | static void disable_barrier(struct super_block *s) | 142 | static void disable_barrier(struct super_block *s) |
143 | { | 143 | { |
144 | REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); | 144 | REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); |
145 | printk("reiserfs: disabling flush barriers on %s\n", | 145 | printk("reiserfs: disabling flush barriers on %s\n", |
146 | reiserfs_bdevname(s)); | 146 | reiserfs_bdevname(s)); |
147 | } | 147 | } |
148 | 148 | ||
149 | static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block | 149 | static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block |
150 | *p_s_sb) | 150 | *p_s_sb) |
151 | { | 151 | { |
152 | struct reiserfs_bitmap_node *bn; | 152 | struct reiserfs_bitmap_node *bn; |
153 | static int id; | 153 | static int id; |
154 | 154 | ||
155 | bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS); | 155 | bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS); |
156 | if (!bn) { | 156 | if (!bn) { |
157 | return NULL; | 157 | return NULL; |
158 | } | 158 | } |
159 | bn->data = kzalloc(p_s_sb->s_blocksize, GFP_NOFS); | 159 | bn->data = kzalloc(p_s_sb->s_blocksize, GFP_NOFS); |
160 | if (!bn->data) { | 160 | if (!bn->data) { |
161 | kfree(bn); | 161 | kfree(bn); |
162 | return NULL; | 162 | return NULL; |
163 | } | 163 | } |
164 | bn->id = id++; | 164 | bn->id = id++; |
165 | INIT_LIST_HEAD(&bn->list); | 165 | INIT_LIST_HEAD(&bn->list); |
166 | return bn; | 166 | return bn; |
167 | } | 167 | } |
168 | 168 | ||
169 | static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb) | 169 | static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb) |
170 | { | 170 | { |
171 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 171 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
172 | struct reiserfs_bitmap_node *bn = NULL; | 172 | struct reiserfs_bitmap_node *bn = NULL; |
173 | struct list_head *entry = journal->j_bitmap_nodes.next; | 173 | struct list_head *entry = journal->j_bitmap_nodes.next; |
174 | 174 | ||
175 | journal->j_used_bitmap_nodes++; | 175 | journal->j_used_bitmap_nodes++; |
176 | repeat: | 176 | repeat: |
177 | 177 | ||
178 | if (entry != &journal->j_bitmap_nodes) { | 178 | if (entry != &journal->j_bitmap_nodes) { |
179 | bn = list_entry(entry, struct reiserfs_bitmap_node, list); | 179 | bn = list_entry(entry, struct reiserfs_bitmap_node, list); |
180 | list_del(entry); | 180 | list_del(entry); |
181 | memset(bn->data, 0, p_s_sb->s_blocksize); | 181 | memset(bn->data, 0, p_s_sb->s_blocksize); |
182 | journal->j_free_bitmap_nodes--; | 182 | journal->j_free_bitmap_nodes--; |
183 | return bn; | 183 | return bn; |
184 | } | 184 | } |
185 | bn = allocate_bitmap_node(p_s_sb); | 185 | bn = allocate_bitmap_node(p_s_sb); |
186 | if (!bn) { | 186 | if (!bn) { |
187 | yield(); | 187 | yield(); |
188 | goto repeat; | 188 | goto repeat; |
189 | } | 189 | } |
190 | return bn; | 190 | return bn; |
191 | } | 191 | } |
192 | static inline void free_bitmap_node(struct super_block *p_s_sb, | 192 | static inline void free_bitmap_node(struct super_block *p_s_sb, |
193 | struct reiserfs_bitmap_node *bn) | 193 | struct reiserfs_bitmap_node *bn) |
194 | { | 194 | { |
195 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 195 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
196 | journal->j_used_bitmap_nodes--; | 196 | journal->j_used_bitmap_nodes--; |
197 | if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { | 197 | if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { |
198 | kfree(bn->data); | 198 | kfree(bn->data); |
199 | kfree(bn); | 199 | kfree(bn); |
200 | } else { | 200 | } else { |
201 | list_add(&bn->list, &journal->j_bitmap_nodes); | 201 | list_add(&bn->list, &journal->j_bitmap_nodes); |
202 | journal->j_free_bitmap_nodes++; | 202 | journal->j_free_bitmap_nodes++; |
203 | } | 203 | } |
204 | } | 204 | } |
205 | 205 | ||
206 | static void allocate_bitmap_nodes(struct super_block *p_s_sb) | 206 | static void allocate_bitmap_nodes(struct super_block *p_s_sb) |
207 | { | 207 | { |
208 | int i; | 208 | int i; |
209 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 209 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
210 | struct reiserfs_bitmap_node *bn = NULL; | 210 | struct reiserfs_bitmap_node *bn = NULL; |
211 | for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) { | 211 | for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) { |
212 | bn = allocate_bitmap_node(p_s_sb); | 212 | bn = allocate_bitmap_node(p_s_sb); |
213 | if (bn) { | 213 | if (bn) { |
214 | list_add(&bn->list, &journal->j_bitmap_nodes); | 214 | list_add(&bn->list, &journal->j_bitmap_nodes); |
215 | journal->j_free_bitmap_nodes++; | 215 | journal->j_free_bitmap_nodes++; |
216 | } else { | 216 | } else { |
217 | break; // this is ok, we'll try again when more are needed | 217 | break; // this is ok, we'll try again when more are needed |
218 | } | 218 | } |
219 | } | 219 | } |
220 | } | 220 | } |
221 | 221 | ||
222 | static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block, | 222 | static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block, |
223 | struct reiserfs_list_bitmap *jb) | 223 | struct reiserfs_list_bitmap *jb) |
224 | { | 224 | { |
225 | int bmap_nr = block / (p_s_sb->s_blocksize << 3); | 225 | int bmap_nr = block / (p_s_sb->s_blocksize << 3); |
226 | int bit_nr = block % (p_s_sb->s_blocksize << 3); | 226 | int bit_nr = block % (p_s_sb->s_blocksize << 3); |
227 | 227 | ||
228 | if (!jb->bitmaps[bmap_nr]) { | 228 | if (!jb->bitmaps[bmap_nr]) { |
229 | jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb); | 229 | jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb); |
230 | } | 230 | } |
231 | set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data); | 231 | set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data); |
232 | return 0; | 232 | return 0; |
233 | } | 233 | } |
234 | 234 | ||
235 | static void cleanup_bitmap_list(struct super_block *p_s_sb, | 235 | static void cleanup_bitmap_list(struct super_block *p_s_sb, |
236 | struct reiserfs_list_bitmap *jb) | 236 | struct reiserfs_list_bitmap *jb) |
237 | { | 237 | { |
238 | int i; | 238 | int i; |
239 | if (jb->bitmaps == NULL) | 239 | if (jb->bitmaps == NULL) |
240 | return; | 240 | return; |
241 | 241 | ||
242 | for (i = 0; i < SB_BMAP_NR(p_s_sb); i++) { | 242 | for (i = 0; i < SB_BMAP_NR(p_s_sb); i++) { |
243 | if (jb->bitmaps[i]) { | 243 | if (jb->bitmaps[i]) { |
244 | free_bitmap_node(p_s_sb, jb->bitmaps[i]); | 244 | free_bitmap_node(p_s_sb, jb->bitmaps[i]); |
245 | jb->bitmaps[i] = NULL; | 245 | jb->bitmaps[i] = NULL; |
246 | } | 246 | } |
247 | } | 247 | } |
248 | } | 248 | } |
249 | 249 | ||
250 | /* | 250 | /* |
251 | ** only call this on FS unmount. | 251 | ** only call this on FS unmount. |
252 | */ | 252 | */ |
253 | static int free_list_bitmaps(struct super_block *p_s_sb, | 253 | static int free_list_bitmaps(struct super_block *p_s_sb, |
254 | struct reiserfs_list_bitmap *jb_array) | 254 | struct reiserfs_list_bitmap *jb_array) |
255 | { | 255 | { |
256 | int i; | 256 | int i; |
257 | struct reiserfs_list_bitmap *jb; | 257 | struct reiserfs_list_bitmap *jb; |
258 | for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { | 258 | for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { |
259 | jb = jb_array + i; | 259 | jb = jb_array + i; |
260 | jb->journal_list = NULL; | 260 | jb->journal_list = NULL; |
261 | cleanup_bitmap_list(p_s_sb, jb); | 261 | cleanup_bitmap_list(p_s_sb, jb); |
262 | vfree(jb->bitmaps); | 262 | vfree(jb->bitmaps); |
263 | jb->bitmaps = NULL; | 263 | jb->bitmaps = NULL; |
264 | } | 264 | } |
265 | return 0; | 265 | return 0; |
266 | } | 266 | } |
267 | 267 | ||
268 | static int free_bitmap_nodes(struct super_block *p_s_sb) | 268 | static int free_bitmap_nodes(struct super_block *p_s_sb) |
269 | { | 269 | { |
270 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 270 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
271 | struct list_head *next = journal->j_bitmap_nodes.next; | 271 | struct list_head *next = journal->j_bitmap_nodes.next; |
272 | struct reiserfs_bitmap_node *bn; | 272 | struct reiserfs_bitmap_node *bn; |
273 | 273 | ||
274 | while (next != &journal->j_bitmap_nodes) { | 274 | while (next != &journal->j_bitmap_nodes) { |
275 | bn = list_entry(next, struct reiserfs_bitmap_node, list); | 275 | bn = list_entry(next, struct reiserfs_bitmap_node, list); |
276 | list_del(next); | 276 | list_del(next); |
277 | kfree(bn->data); | 277 | kfree(bn->data); |
278 | kfree(bn); | 278 | kfree(bn); |
279 | next = journal->j_bitmap_nodes.next; | 279 | next = journal->j_bitmap_nodes.next; |
280 | journal->j_free_bitmap_nodes--; | 280 | journal->j_free_bitmap_nodes--; |
281 | } | 281 | } |
282 | 282 | ||
283 | return 0; | 283 | return 0; |
284 | } | 284 | } |
285 | 285 | ||
286 | /* | 286 | /* |
287 | ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. | 287 | ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. |
288 | ** jb_array is the array to be filled in. | 288 | ** jb_array is the array to be filled in. |
289 | */ | 289 | */ |
290 | int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, | 290 | int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, |
291 | struct reiserfs_list_bitmap *jb_array, | 291 | struct reiserfs_list_bitmap *jb_array, |
292 | int bmap_nr) | 292 | int bmap_nr) |
293 | { | 293 | { |
294 | int i; | 294 | int i; |
295 | int failed = 0; | 295 | int failed = 0; |
296 | struct reiserfs_list_bitmap *jb; | 296 | struct reiserfs_list_bitmap *jb; |
297 | int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *); | 297 | int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *); |
298 | 298 | ||
299 | for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { | 299 | for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { |
300 | jb = jb_array + i; | 300 | jb = jb_array + i; |
301 | jb->journal_list = NULL; | 301 | jb->journal_list = NULL; |
302 | jb->bitmaps = vmalloc(mem); | 302 | jb->bitmaps = vmalloc(mem); |
303 | if (!jb->bitmaps) { | 303 | if (!jb->bitmaps) { |
304 | reiserfs_warning(p_s_sb, | 304 | reiserfs_warning(p_s_sb, |
305 | "clm-2000, unable to allocate bitmaps for journal lists"); | 305 | "clm-2000, unable to allocate bitmaps for journal lists"); |
306 | failed = 1; | 306 | failed = 1; |
307 | break; | 307 | break; |
308 | } | 308 | } |
309 | memset(jb->bitmaps, 0, mem); | 309 | memset(jb->bitmaps, 0, mem); |
310 | } | 310 | } |
311 | if (failed) { | 311 | if (failed) { |
312 | free_list_bitmaps(p_s_sb, jb_array); | 312 | free_list_bitmaps(p_s_sb, jb_array); |
313 | return -1; | 313 | return -1; |
314 | } | 314 | } |
315 | return 0; | 315 | return 0; |
316 | } | 316 | } |
317 | 317 | ||
318 | /* | 318 | /* |
319 | ** find an available list bitmap. If you can't find one, flush a commit list | 319 | ** find an available list bitmap. If you can't find one, flush a commit list |
320 | ** and try again | 320 | ** and try again |
321 | */ | 321 | */ |
322 | static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb, | 322 | static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb, |
323 | struct reiserfs_journal_list | 323 | struct reiserfs_journal_list |
324 | *jl) | 324 | *jl) |
325 | { | 325 | { |
326 | int i, j; | 326 | int i, j; |
327 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 327 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
328 | struct reiserfs_list_bitmap *jb = NULL; | 328 | struct reiserfs_list_bitmap *jb = NULL; |
329 | 329 | ||
330 | for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) { | 330 | for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) { |
331 | i = journal->j_list_bitmap_index; | 331 | i = journal->j_list_bitmap_index; |
332 | journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS; | 332 | journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS; |
333 | jb = journal->j_list_bitmap + i; | 333 | jb = journal->j_list_bitmap + i; |
334 | if (journal->j_list_bitmap[i].journal_list) { | 334 | if (journal->j_list_bitmap[i].journal_list) { |
335 | flush_commit_list(p_s_sb, | 335 | flush_commit_list(p_s_sb, |
336 | journal->j_list_bitmap[i]. | 336 | journal->j_list_bitmap[i]. |
337 | journal_list, 1); | 337 | journal_list, 1); |
338 | if (!journal->j_list_bitmap[i].journal_list) { | 338 | if (!journal->j_list_bitmap[i].journal_list) { |
339 | break; | 339 | break; |
340 | } | 340 | } |
341 | } else { | 341 | } else { |
342 | break; | 342 | break; |
343 | } | 343 | } |
344 | } | 344 | } |
345 | if (jb->journal_list) { /* double check to make sure if flushed correctly */ | 345 | if (jb->journal_list) { /* double check to make sure if flushed correctly */ |
346 | return NULL; | 346 | return NULL; |
347 | } | 347 | } |
348 | jb->journal_list = jl; | 348 | jb->journal_list = jl; |
349 | return jb; | 349 | return jb; |
350 | } | 350 | } |
351 | 351 | ||
352 | /* | 352 | /* |
353 | ** allocates a new chunk of X nodes, and links them all together as a list. | 353 | ** allocates a new chunk of X nodes, and links them all together as a list. |
354 | ** Uses the cnode->next and cnode->prev pointers | 354 | ** Uses the cnode->next and cnode->prev pointers |
355 | ** returns NULL on failure | 355 | ** returns NULL on failure |
356 | */ | 356 | */ |
357 | static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) | 357 | static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) |
358 | { | 358 | { |
359 | struct reiserfs_journal_cnode *head; | 359 | struct reiserfs_journal_cnode *head; |
360 | int i; | 360 | int i; |
361 | if (num_cnodes <= 0) { | 361 | if (num_cnodes <= 0) { |
362 | return NULL; | 362 | return NULL; |
363 | } | 363 | } |
364 | head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)); | 364 | head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)); |
365 | if (!head) { | 365 | if (!head) { |
366 | return NULL; | 366 | return NULL; |
367 | } | 367 | } |
368 | memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)); | 368 | memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)); |
369 | head[0].prev = NULL; | 369 | head[0].prev = NULL; |
370 | head[0].next = head + 1; | 370 | head[0].next = head + 1; |
371 | for (i = 1; i < num_cnodes; i++) { | 371 | for (i = 1; i < num_cnodes; i++) { |
372 | head[i].prev = head + (i - 1); | 372 | head[i].prev = head + (i - 1); |
373 | head[i].next = head + (i + 1); /* if last one, overwrite it after the if */ | 373 | head[i].next = head + (i + 1); /* if last one, overwrite it after the if */ |
374 | } | 374 | } |
375 | head[num_cnodes - 1].next = NULL; | 375 | head[num_cnodes - 1].next = NULL; |
376 | return head; | 376 | return head; |
377 | } | 377 | } |
378 | 378 | ||
379 | /* | 379 | /* |
380 | ** pulls a cnode off the free list, or returns NULL on failure | 380 | ** pulls a cnode off the free list, or returns NULL on failure |
381 | */ | 381 | */ |
382 | static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) | 382 | static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) |
383 | { | 383 | { |
384 | struct reiserfs_journal_cnode *cn; | 384 | struct reiserfs_journal_cnode *cn; |
385 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 385 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
386 | 386 | ||
387 | reiserfs_check_lock_depth(p_s_sb, "get_cnode"); | 387 | reiserfs_check_lock_depth(p_s_sb, "get_cnode"); |
388 | 388 | ||
389 | if (journal->j_cnode_free <= 0) { | 389 | if (journal->j_cnode_free <= 0) { |
390 | return NULL; | 390 | return NULL; |
391 | } | 391 | } |
392 | journal->j_cnode_used++; | 392 | journal->j_cnode_used++; |
393 | journal->j_cnode_free--; | 393 | journal->j_cnode_free--; |
394 | cn = journal->j_cnode_free_list; | 394 | cn = journal->j_cnode_free_list; |
395 | if (!cn) { | 395 | if (!cn) { |
396 | return cn; | 396 | return cn; |
397 | } | 397 | } |
398 | if (cn->next) { | 398 | if (cn->next) { |
399 | cn->next->prev = NULL; | 399 | cn->next->prev = NULL; |
400 | } | 400 | } |
401 | journal->j_cnode_free_list = cn->next; | 401 | journal->j_cnode_free_list = cn->next; |
402 | memset(cn, 0, sizeof(struct reiserfs_journal_cnode)); | 402 | memset(cn, 0, sizeof(struct reiserfs_journal_cnode)); |
403 | return cn; | 403 | return cn; |
404 | } | 404 | } |
405 | 405 | ||
406 | /* | 406 | /* |
407 | ** returns a cnode to the free list | 407 | ** returns a cnode to the free list |
408 | */ | 408 | */ |
409 | static void free_cnode(struct super_block *p_s_sb, | 409 | static void free_cnode(struct super_block *p_s_sb, |
410 | struct reiserfs_journal_cnode *cn) | 410 | struct reiserfs_journal_cnode *cn) |
411 | { | 411 | { |
412 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 412 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
413 | 413 | ||
414 | reiserfs_check_lock_depth(p_s_sb, "free_cnode"); | 414 | reiserfs_check_lock_depth(p_s_sb, "free_cnode"); |
415 | 415 | ||
416 | journal->j_cnode_used--; | 416 | journal->j_cnode_used--; |
417 | journal->j_cnode_free++; | 417 | journal->j_cnode_free++; |
418 | /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ | 418 | /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ |
419 | cn->next = journal->j_cnode_free_list; | 419 | cn->next = journal->j_cnode_free_list; |
420 | if (journal->j_cnode_free_list) { | 420 | if (journal->j_cnode_free_list) { |
421 | journal->j_cnode_free_list->prev = cn; | 421 | journal->j_cnode_free_list->prev = cn; |
422 | } | 422 | } |
423 | cn->prev = NULL; /* not needed with the memset, but I might kill the memset, and forget to do this */ | 423 | cn->prev = NULL; /* not needed with the memset, but I might kill the memset, and forget to do this */ |
424 | journal->j_cnode_free_list = cn; | 424 | journal->j_cnode_free_list = cn; |
425 | } | 425 | } |
426 | 426 | ||
427 | static void clear_prepared_bits(struct buffer_head *bh) | 427 | static void clear_prepared_bits(struct buffer_head *bh) |
428 | { | 428 | { |
429 | clear_buffer_journal_prepared(bh); | 429 | clear_buffer_journal_prepared(bh); |
430 | clear_buffer_journal_restore_dirty(bh); | 430 | clear_buffer_journal_restore_dirty(bh); |
431 | } | 431 | } |
432 | 432 | ||
433 | /* utility function to force a BUG if it is called without the big | 433 | /* utility function to force a BUG if it is called without the big |
434 | ** kernel lock held. caller is the string printed just before calling BUG() | 434 | ** kernel lock held. caller is the string printed just before calling BUG() |
435 | */ | 435 | */ |
436 | void reiserfs_check_lock_depth(struct super_block *sb, char *caller) | 436 | void reiserfs_check_lock_depth(struct super_block *sb, char *caller) |
437 | { | 437 | { |
438 | #ifdef CONFIG_SMP | 438 | #ifdef CONFIG_SMP |
439 | if (current->lock_depth < 0) { | 439 | if (current->lock_depth < 0) { |
440 | reiserfs_panic(sb, "%s called without kernel lock held", | 440 | reiserfs_panic(sb, "%s called without kernel lock held", |
441 | caller); | 441 | caller); |
442 | } | 442 | } |
443 | #else | 443 | #else |
444 | ; | 444 | ; |
445 | #endif | 445 | #endif |
446 | } | 446 | } |
447 | 447 | ||
448 | /* return a cnode with same dev, block number and size in table, or null if not found */ | 448 | /* return a cnode with same dev, block number and size in table, or null if not found */ |
449 | static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct | 449 | static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct |
450 | super_block | 450 | super_block |
451 | *sb, | 451 | *sb, |
452 | struct | 452 | struct |
453 | reiserfs_journal_cnode | 453 | reiserfs_journal_cnode |
454 | **table, | 454 | **table, |
455 | long bl) | 455 | long bl) |
456 | { | 456 | { |
457 | struct reiserfs_journal_cnode *cn; | 457 | struct reiserfs_journal_cnode *cn; |
458 | cn = journal_hash(table, sb, bl); | 458 | cn = journal_hash(table, sb, bl); |
459 | while (cn) { | 459 | while (cn) { |
460 | if (cn->blocknr == bl && cn->sb == sb) | 460 | if (cn->blocknr == bl && cn->sb == sb) |
461 | return cn; | 461 | return cn; |
462 | cn = cn->hnext; | 462 | cn = cn->hnext; |
463 | } | 463 | } |
464 | return (struct reiserfs_journal_cnode *)0; | 464 | return (struct reiserfs_journal_cnode *)0; |
465 | } | 465 | } |
466 | 466 | ||
467 | /* | 467 | /* |
468 | ** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated | 468 | ** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated |
469 | ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever | 469 | ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever |
470 | ** being overwritten by a replay after crashing. | 470 | ** being overwritten by a replay after crashing. |
471 | ** | 471 | ** |
472 | ** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting | 472 | ** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting |
473 | ** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make | 473 | ** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make |
474 | ** sure you never write the block without logging it. | 474 | ** sure you never write the block without logging it. |
475 | ** | 475 | ** |
476 | ** next_zero_bit is a suggestion about the next block to try for find_forward. | 476 | ** next_zero_bit is a suggestion about the next block to try for find_forward. |
477 | ** when bl is rejected because it is set in a journal list bitmap, we search | 477 | ** when bl is rejected because it is set in a journal list bitmap, we search |
478 | ** for the next zero bit in the bitmap that rejected bl. Then, we return that | 478 | ** for the next zero bit in the bitmap that rejected bl. Then, we return that |
479 | ** through next_zero_bit for find_forward to try. | 479 | ** through next_zero_bit for find_forward to try. |
480 | ** | 480 | ** |
481 | ** Just because we return something in next_zero_bit does not mean we won't | 481 | ** Just because we return something in next_zero_bit does not mean we won't |
482 | ** reject it on the next call to reiserfs_in_journal | 482 | ** reject it on the next call to reiserfs_in_journal |
483 | ** | 483 | ** |
484 | */ | 484 | */ |
485 | int reiserfs_in_journal(struct super_block *p_s_sb, | 485 | int reiserfs_in_journal(struct super_block *p_s_sb, |
486 | int bmap_nr, int bit_nr, int search_all, | 486 | int bmap_nr, int bit_nr, int search_all, |
487 | b_blocknr_t * next_zero_bit) | 487 | b_blocknr_t * next_zero_bit) |
488 | { | 488 | { |
489 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 489 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
490 | struct reiserfs_journal_cnode *cn; | 490 | struct reiserfs_journal_cnode *cn; |
491 | struct reiserfs_list_bitmap *jb; | 491 | struct reiserfs_list_bitmap *jb; |
492 | int i; | 492 | int i; |
493 | unsigned long bl; | 493 | unsigned long bl; |
494 | 494 | ||
495 | *next_zero_bit = 0; /* always start this at zero. */ | 495 | *next_zero_bit = 0; /* always start this at zero. */ |
496 | 496 | ||
497 | PROC_INFO_INC(p_s_sb, journal.in_journal); | 497 | PROC_INFO_INC(p_s_sb, journal.in_journal); |
498 | /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. | 498 | /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. |
499 | ** if we crash before the transaction that freed it commits, this transaction won't | 499 | ** if we crash before the transaction that freed it commits, this transaction won't |
500 | ** have committed either, and the block will never be written | 500 | ** have committed either, and the block will never be written |
501 | */ | 501 | */ |
502 | if (search_all) { | 502 | if (search_all) { |
503 | for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { | 503 | for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { |
504 | PROC_INFO_INC(p_s_sb, journal.in_journal_bitmap); | 504 | PROC_INFO_INC(p_s_sb, journal.in_journal_bitmap); |
505 | jb = journal->j_list_bitmap + i; | 505 | jb = journal->j_list_bitmap + i; |
506 | if (jb->journal_list && jb->bitmaps[bmap_nr] && | 506 | if (jb->journal_list && jb->bitmaps[bmap_nr] && |
507 | test_bit(bit_nr, | 507 | test_bit(bit_nr, |
508 | (unsigned long *)jb->bitmaps[bmap_nr]-> | 508 | (unsigned long *)jb->bitmaps[bmap_nr]-> |
509 | data)) { | 509 | data)) { |
510 | *next_zero_bit = | 510 | *next_zero_bit = |
511 | find_next_zero_bit((unsigned long *) | 511 | find_next_zero_bit((unsigned long *) |
512 | (jb->bitmaps[bmap_nr]-> | 512 | (jb->bitmaps[bmap_nr]-> |
513 | data), | 513 | data), |
514 | p_s_sb->s_blocksize << 3, | 514 | p_s_sb->s_blocksize << 3, |
515 | bit_nr + 1); | 515 | bit_nr + 1); |
516 | return 1; | 516 | return 1; |
517 | } | 517 | } |
518 | } | 518 | } |
519 | } | 519 | } |
520 | 520 | ||
521 | bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr; | 521 | bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr; |
522 | /* is it in any old transactions? */ | 522 | /* is it in any old transactions? */ |
523 | if (search_all | 523 | if (search_all |
524 | && (cn = | 524 | && (cn = |
525 | get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) { | 525 | get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) { |
526 | return 1; | 526 | return 1; |
527 | } | 527 | } |
528 | 528 | ||
529 | /* is it in the current transaction. This should never happen */ | 529 | /* is it in the current transaction. This should never happen */ |
530 | if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) { | 530 | if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) { |
531 | BUG(); | 531 | BUG(); |
532 | return 1; | 532 | return 1; |
533 | } | 533 | } |
534 | 534 | ||
535 | PROC_INFO_INC(p_s_sb, journal.in_journal_reusable); | 535 | PROC_INFO_INC(p_s_sb, journal.in_journal_reusable); |
536 | /* safe for reuse */ | 536 | /* safe for reuse */ |
537 | return 0; | 537 | return 0; |
538 | } | 538 | } |
539 | 539 | ||
540 | /* insert cn into table | 540 | /* insert cn into table |
541 | */ | 541 | */ |
542 | static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, | 542 | static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, |
543 | struct reiserfs_journal_cnode *cn) | 543 | struct reiserfs_journal_cnode *cn) |
544 | { | 544 | { |
545 | struct reiserfs_journal_cnode *cn_orig; | 545 | struct reiserfs_journal_cnode *cn_orig; |
546 | 546 | ||
547 | cn_orig = journal_hash(table, cn->sb, cn->blocknr); | 547 | cn_orig = journal_hash(table, cn->sb, cn->blocknr); |
548 | cn->hnext = cn_orig; | 548 | cn->hnext = cn_orig; |
549 | cn->hprev = NULL; | 549 | cn->hprev = NULL; |
550 | if (cn_orig) { | 550 | if (cn_orig) { |
551 | cn_orig->hprev = cn; | 551 | cn_orig->hprev = cn; |
552 | } | 552 | } |
553 | journal_hash(table, cn->sb, cn->blocknr) = cn; | 553 | journal_hash(table, cn->sb, cn->blocknr) = cn; |
554 | } | 554 | } |
555 | 555 | ||
556 | /* lock the current transaction */ | 556 | /* lock the current transaction */ |
557 | static inline void lock_journal(struct super_block *p_s_sb) | 557 | static inline void lock_journal(struct super_block *p_s_sb) |
558 | { | 558 | { |
559 | PROC_INFO_INC(p_s_sb, journal.lock_journal); | 559 | PROC_INFO_INC(p_s_sb, journal.lock_journal); |
560 | down(&SB_JOURNAL(p_s_sb)->j_lock); | 560 | down(&SB_JOURNAL(p_s_sb)->j_lock); |
561 | } | 561 | } |
562 | 562 | ||
563 | /* unlock the current transaction */ | 563 | /* unlock the current transaction */ |
564 | static inline void unlock_journal(struct super_block *p_s_sb) | 564 | static inline void unlock_journal(struct super_block *p_s_sb) |
565 | { | 565 | { |
566 | up(&SB_JOURNAL(p_s_sb)->j_lock); | 566 | up(&SB_JOURNAL(p_s_sb)->j_lock); |
567 | } | 567 | } |
568 | 568 | ||
569 | static inline void get_journal_list(struct reiserfs_journal_list *jl) | 569 | static inline void get_journal_list(struct reiserfs_journal_list *jl) |
570 | { | 570 | { |
571 | jl->j_refcount++; | 571 | jl->j_refcount++; |
572 | } | 572 | } |
573 | 573 | ||
574 | static inline void put_journal_list(struct super_block *s, | 574 | static inline void put_journal_list(struct super_block *s, |
575 | struct reiserfs_journal_list *jl) | 575 | struct reiserfs_journal_list *jl) |
576 | { | 576 | { |
577 | if (jl->j_refcount < 1) { | 577 | if (jl->j_refcount < 1) { |
578 | reiserfs_panic(s, "trans id %lu, refcount at %d", | 578 | reiserfs_panic(s, "trans id %lu, refcount at %d", |
579 | jl->j_trans_id, jl->j_refcount); | 579 | jl->j_trans_id, jl->j_refcount); |
580 | } | 580 | } |
581 | if (--jl->j_refcount == 0) | 581 | if (--jl->j_refcount == 0) |
582 | kfree(jl); | 582 | kfree(jl); |
583 | } | 583 | } |
584 | 584 | ||
585 | /* | 585 | /* |
586 | ** this used to be much more involved, and I'm keeping it just in case things get ugly again. | 586 | ** this used to be much more involved, and I'm keeping it just in case things get ugly again. |
587 | ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a | 587 | ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a |
588 | ** transaction. | 588 | ** transaction. |
589 | */ | 589 | */ |
590 | static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, | 590 | static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, |
591 | struct reiserfs_journal_list *jl) | 591 | struct reiserfs_journal_list *jl) |
592 | { | 592 | { |
593 | 593 | ||
594 | struct reiserfs_list_bitmap *jb = jl->j_list_bitmap; | 594 | struct reiserfs_list_bitmap *jb = jl->j_list_bitmap; |
595 | if (jb) { | 595 | if (jb) { |
596 | cleanup_bitmap_list(p_s_sb, jb); | 596 | cleanup_bitmap_list(p_s_sb, jb); |
597 | } | 597 | } |
598 | jl->j_list_bitmap->journal_list = NULL; | 598 | jl->j_list_bitmap->journal_list = NULL; |
599 | jl->j_list_bitmap = NULL; | 599 | jl->j_list_bitmap = NULL; |
600 | } | 600 | } |
601 | 601 | ||
602 | static int journal_list_still_alive(struct super_block *s, | 602 | static int journal_list_still_alive(struct super_block *s, |
603 | unsigned long trans_id) | 603 | unsigned long trans_id) |
604 | { | 604 | { |
605 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 605 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
606 | struct list_head *entry = &journal->j_journal_list; | 606 | struct list_head *entry = &journal->j_journal_list; |
607 | struct reiserfs_journal_list *jl; | 607 | struct reiserfs_journal_list *jl; |
608 | 608 | ||
609 | if (!list_empty(entry)) { | 609 | if (!list_empty(entry)) { |
610 | jl = JOURNAL_LIST_ENTRY(entry->next); | 610 | jl = JOURNAL_LIST_ENTRY(entry->next); |
611 | if (jl->j_trans_id <= trans_id) { | 611 | if (jl->j_trans_id <= trans_id) { |
612 | return 1; | 612 | return 1; |
613 | } | 613 | } |
614 | } | 614 | } |
615 | return 0; | 615 | return 0; |
616 | } | 616 | } |
617 | 617 | ||
618 | static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) | 618 | static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) |
619 | { | 619 | { |
620 | char b[BDEVNAME_SIZE]; | 620 | char b[BDEVNAME_SIZE]; |
621 | 621 | ||
622 | if (buffer_journaled(bh)) { | 622 | if (buffer_journaled(bh)) { |
623 | reiserfs_warning(NULL, | 623 | reiserfs_warning(NULL, |
624 | "clm-2084: pinned buffer %lu:%s sent to disk", | 624 | "clm-2084: pinned buffer %lu:%s sent to disk", |
625 | bh->b_blocknr, bdevname(bh->b_bdev, b)); | 625 | bh->b_blocknr, bdevname(bh->b_bdev, b)); |
626 | } | 626 | } |
627 | if (uptodate) | 627 | if (uptodate) |
628 | set_buffer_uptodate(bh); | 628 | set_buffer_uptodate(bh); |
629 | else | 629 | else |
630 | clear_buffer_uptodate(bh); | 630 | clear_buffer_uptodate(bh); |
631 | unlock_buffer(bh); | 631 | unlock_buffer(bh); |
632 | put_bh(bh); | 632 | put_bh(bh); |
633 | } | 633 | } |
634 | 634 | ||
635 | static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) | 635 | static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) |
636 | { | 636 | { |
637 | if (uptodate) | 637 | if (uptodate) |
638 | set_buffer_uptodate(bh); | 638 | set_buffer_uptodate(bh); |
639 | else | 639 | else |
640 | clear_buffer_uptodate(bh); | 640 | clear_buffer_uptodate(bh); |
641 | unlock_buffer(bh); | 641 | unlock_buffer(bh); |
642 | put_bh(bh); | 642 | put_bh(bh); |
643 | } | 643 | } |
644 | 644 | ||
645 | static void submit_logged_buffer(struct buffer_head *bh) | 645 | static void submit_logged_buffer(struct buffer_head *bh) |
646 | { | 646 | { |
647 | get_bh(bh); | 647 | get_bh(bh); |
648 | bh->b_end_io = reiserfs_end_buffer_io_sync; | 648 | bh->b_end_io = reiserfs_end_buffer_io_sync; |
649 | clear_buffer_journal_new(bh); | 649 | clear_buffer_journal_new(bh); |
650 | clear_buffer_dirty(bh); | 650 | clear_buffer_dirty(bh); |
651 | if (!test_clear_buffer_journal_test(bh)) | 651 | if (!test_clear_buffer_journal_test(bh)) |
652 | BUG(); | 652 | BUG(); |
653 | if (!buffer_uptodate(bh)) | 653 | if (!buffer_uptodate(bh)) |
654 | BUG(); | 654 | BUG(); |
655 | submit_bh(WRITE, bh); | 655 | submit_bh(WRITE, bh); |
656 | } | 656 | } |
657 | 657 | ||
658 | static void submit_ordered_buffer(struct buffer_head *bh) | 658 | static void submit_ordered_buffer(struct buffer_head *bh) |
659 | { | 659 | { |
660 | get_bh(bh); | 660 | get_bh(bh); |
661 | bh->b_end_io = reiserfs_end_ordered_io; | 661 | bh->b_end_io = reiserfs_end_ordered_io; |
662 | clear_buffer_dirty(bh); | 662 | clear_buffer_dirty(bh); |
663 | if (!buffer_uptodate(bh)) | 663 | if (!buffer_uptodate(bh)) |
664 | BUG(); | 664 | BUG(); |
665 | submit_bh(WRITE, bh); | 665 | submit_bh(WRITE, bh); |
666 | } | 666 | } |
667 | 667 | ||
668 | static int submit_barrier_buffer(struct buffer_head *bh) | 668 | static int submit_barrier_buffer(struct buffer_head *bh) |
669 | { | 669 | { |
670 | get_bh(bh); | 670 | get_bh(bh); |
671 | bh->b_end_io = reiserfs_end_ordered_io; | 671 | bh->b_end_io = reiserfs_end_ordered_io; |
672 | clear_buffer_dirty(bh); | 672 | clear_buffer_dirty(bh); |
673 | if (!buffer_uptodate(bh)) | 673 | if (!buffer_uptodate(bh)) |
674 | BUG(); | 674 | BUG(); |
675 | return submit_bh(WRITE_BARRIER, bh); | 675 | return submit_bh(WRITE_BARRIER, bh); |
676 | } | 676 | } |
677 | 677 | ||
678 | static void check_barrier_completion(struct super_block *s, | 678 | static void check_barrier_completion(struct super_block *s, |
679 | struct buffer_head *bh) | 679 | struct buffer_head *bh) |
680 | { | 680 | { |
681 | if (buffer_eopnotsupp(bh)) { | 681 | if (buffer_eopnotsupp(bh)) { |
682 | clear_buffer_eopnotsupp(bh); | 682 | clear_buffer_eopnotsupp(bh); |
683 | disable_barrier(s); | 683 | disable_barrier(s); |
684 | set_buffer_uptodate(bh); | 684 | set_buffer_uptodate(bh); |
685 | set_buffer_dirty(bh); | 685 | set_buffer_dirty(bh); |
686 | sync_dirty_buffer(bh); | 686 | sync_dirty_buffer(bh); |
687 | } | 687 | } |
688 | } | 688 | } |
689 | 689 | ||
690 | #define CHUNK_SIZE 32 | 690 | #define CHUNK_SIZE 32 |
691 | struct buffer_chunk { | 691 | struct buffer_chunk { |
692 | struct buffer_head *bh[CHUNK_SIZE]; | 692 | struct buffer_head *bh[CHUNK_SIZE]; |
693 | int nr; | 693 | int nr; |
694 | }; | 694 | }; |
695 | 695 | ||
696 | static void write_chunk(struct buffer_chunk *chunk) | 696 | static void write_chunk(struct buffer_chunk *chunk) |
697 | { | 697 | { |
698 | int i; | 698 | int i; |
699 | get_fs_excl(); | 699 | get_fs_excl(); |
700 | for (i = 0; i < chunk->nr; i++) { | 700 | for (i = 0; i < chunk->nr; i++) { |
701 | submit_logged_buffer(chunk->bh[i]); | 701 | submit_logged_buffer(chunk->bh[i]); |
702 | } | 702 | } |
703 | chunk->nr = 0; | 703 | chunk->nr = 0; |
704 | put_fs_excl(); | 704 | put_fs_excl(); |
705 | } | 705 | } |
706 | 706 | ||
707 | static void write_ordered_chunk(struct buffer_chunk *chunk) | 707 | static void write_ordered_chunk(struct buffer_chunk *chunk) |
708 | { | 708 | { |
709 | int i; | 709 | int i; |
710 | get_fs_excl(); | 710 | get_fs_excl(); |
711 | for (i = 0; i < chunk->nr; i++) { | 711 | for (i = 0; i < chunk->nr; i++) { |
712 | submit_ordered_buffer(chunk->bh[i]); | 712 | submit_ordered_buffer(chunk->bh[i]); |
713 | } | 713 | } |
714 | chunk->nr = 0; | 714 | chunk->nr = 0; |
715 | put_fs_excl(); | 715 | put_fs_excl(); |
716 | } | 716 | } |
717 | 717 | ||
718 | static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, | 718 | static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, |
719 | spinlock_t * lock, void (fn) (struct buffer_chunk *)) | 719 | spinlock_t * lock, void (fn) (struct buffer_chunk *)) |
720 | { | 720 | { |
721 | int ret = 0; | 721 | int ret = 0; |
722 | if (chunk->nr >= CHUNK_SIZE) | 722 | if (chunk->nr >= CHUNK_SIZE) |
723 | BUG(); | 723 | BUG(); |
724 | chunk->bh[chunk->nr++] = bh; | 724 | chunk->bh[chunk->nr++] = bh; |
725 | if (chunk->nr >= CHUNK_SIZE) { | 725 | if (chunk->nr >= CHUNK_SIZE) { |
726 | ret = 1; | 726 | ret = 1; |
727 | if (lock) | 727 | if (lock) |
728 | spin_unlock(lock); | 728 | spin_unlock(lock); |
729 | fn(chunk); | 729 | fn(chunk); |
730 | if (lock) | 730 | if (lock) |
731 | spin_lock(lock); | 731 | spin_lock(lock); |
732 | } | 732 | } |
733 | return ret; | 733 | return ret; |
734 | } | 734 | } |
735 | 735 | ||
736 | static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0); | 736 | static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0); |
737 | static struct reiserfs_jh *alloc_jh(void) | 737 | static struct reiserfs_jh *alloc_jh(void) |
738 | { | 738 | { |
739 | struct reiserfs_jh *jh; | 739 | struct reiserfs_jh *jh; |
740 | while (1) { | 740 | while (1) { |
741 | jh = kmalloc(sizeof(*jh), GFP_NOFS); | 741 | jh = kmalloc(sizeof(*jh), GFP_NOFS); |
742 | if (jh) { | 742 | if (jh) { |
743 | atomic_inc(&nr_reiserfs_jh); | 743 | atomic_inc(&nr_reiserfs_jh); |
744 | return jh; | 744 | return jh; |
745 | } | 745 | } |
746 | yield(); | 746 | yield(); |
747 | } | 747 | } |
748 | } | 748 | } |
749 | 749 | ||
750 | /* | 750 | /* |
751 | * we want to free the jh when the buffer has been written | 751 | * we want to free the jh when the buffer has been written |
752 | * and waited on | 752 | * and waited on |
753 | */ | 753 | */ |
754 | void reiserfs_free_jh(struct buffer_head *bh) | 754 | void reiserfs_free_jh(struct buffer_head *bh) |
755 | { | 755 | { |
756 | struct reiserfs_jh *jh; | 756 | struct reiserfs_jh *jh; |
757 | 757 | ||
758 | jh = bh->b_private; | 758 | jh = bh->b_private; |
759 | if (jh) { | 759 | if (jh) { |
760 | bh->b_private = NULL; | 760 | bh->b_private = NULL; |
761 | jh->bh = NULL; | 761 | jh->bh = NULL; |
762 | list_del_init(&jh->list); | 762 | list_del_init(&jh->list); |
763 | kfree(jh); | 763 | kfree(jh); |
764 | if (atomic_read(&nr_reiserfs_jh) <= 0) | 764 | if (atomic_read(&nr_reiserfs_jh) <= 0) |
765 | BUG(); | 765 | BUG(); |
766 | atomic_dec(&nr_reiserfs_jh); | 766 | atomic_dec(&nr_reiserfs_jh); |
767 | put_bh(bh); | 767 | put_bh(bh); |
768 | } | 768 | } |
769 | } | 769 | } |
770 | 770 | ||
771 | static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, | 771 | static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, |
772 | int tail) | 772 | int tail) |
773 | { | 773 | { |
774 | struct reiserfs_jh *jh; | 774 | struct reiserfs_jh *jh; |
775 | 775 | ||
776 | if (bh->b_private) { | 776 | if (bh->b_private) { |
777 | spin_lock(&j->j_dirty_buffers_lock); | 777 | spin_lock(&j->j_dirty_buffers_lock); |
778 | if (!bh->b_private) { | 778 | if (!bh->b_private) { |
779 | spin_unlock(&j->j_dirty_buffers_lock); | 779 | spin_unlock(&j->j_dirty_buffers_lock); |
780 | goto no_jh; | 780 | goto no_jh; |
781 | } | 781 | } |
782 | jh = bh->b_private; | 782 | jh = bh->b_private; |
783 | list_del_init(&jh->list); | 783 | list_del_init(&jh->list); |
784 | } else { | 784 | } else { |
785 | no_jh: | 785 | no_jh: |
786 | get_bh(bh); | 786 | get_bh(bh); |
787 | jh = alloc_jh(); | 787 | jh = alloc_jh(); |
788 | spin_lock(&j->j_dirty_buffers_lock); | 788 | spin_lock(&j->j_dirty_buffers_lock); |
789 | /* buffer must be locked for __add_jh, should be able to have | 789 | /* buffer must be locked for __add_jh, should be able to have |
790 | * two adds at the same time | 790 | * two adds at the same time |
791 | */ | 791 | */ |
792 | if (bh->b_private) | 792 | if (bh->b_private) |
793 | BUG(); | 793 | BUG(); |
794 | jh->bh = bh; | 794 | jh->bh = bh; |
795 | bh->b_private = jh; | 795 | bh->b_private = jh; |
796 | } | 796 | } |
797 | jh->jl = j->j_current_jl; | 797 | jh->jl = j->j_current_jl; |
798 | if (tail) | 798 | if (tail) |
799 | list_add_tail(&jh->list, &jh->jl->j_tail_bh_list); | 799 | list_add_tail(&jh->list, &jh->jl->j_tail_bh_list); |
800 | else { | 800 | else { |
801 | list_add_tail(&jh->list, &jh->jl->j_bh_list); | 801 | list_add_tail(&jh->list, &jh->jl->j_bh_list); |
802 | } | 802 | } |
803 | spin_unlock(&j->j_dirty_buffers_lock); | 803 | spin_unlock(&j->j_dirty_buffers_lock); |
804 | return 0; | 804 | return 0; |
805 | } | 805 | } |
806 | 806 | ||
807 | int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) | 807 | int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) |
808 | { | 808 | { |
809 | return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1); | 809 | return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1); |
810 | } | 810 | } |
811 | int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) | 811 | int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) |
812 | { | 812 | { |
813 | return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0); | 813 | return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0); |
814 | } | 814 | } |
815 | 815 | ||
816 | #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list) | 816 | #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list) |
817 | static int write_ordered_buffers(spinlock_t * lock, | 817 | static int write_ordered_buffers(spinlock_t * lock, |
818 | struct reiserfs_journal *j, | 818 | struct reiserfs_journal *j, |
819 | struct reiserfs_journal_list *jl, | 819 | struct reiserfs_journal_list *jl, |
820 | struct list_head *list) | 820 | struct list_head *list) |
821 | { | 821 | { |
822 | struct buffer_head *bh; | 822 | struct buffer_head *bh; |
823 | struct reiserfs_jh *jh; | 823 | struct reiserfs_jh *jh; |
824 | int ret = j->j_errno; | 824 | int ret = j->j_errno; |
825 | struct buffer_chunk chunk; | 825 | struct buffer_chunk chunk; |
826 | struct list_head tmp; | 826 | struct list_head tmp; |
827 | INIT_LIST_HEAD(&tmp); | 827 | INIT_LIST_HEAD(&tmp); |
828 | 828 | ||
829 | chunk.nr = 0; | 829 | chunk.nr = 0; |
830 | spin_lock(lock); | 830 | spin_lock(lock); |
831 | while (!list_empty(list)) { | 831 | while (!list_empty(list)) { |
832 | jh = JH_ENTRY(list->next); | 832 | jh = JH_ENTRY(list->next); |
833 | bh = jh->bh; | 833 | bh = jh->bh; |
834 | get_bh(bh); | 834 | get_bh(bh); |
835 | if (test_set_buffer_locked(bh)) { | 835 | if (test_set_buffer_locked(bh)) { |
836 | if (!buffer_dirty(bh)) { | 836 | if (!buffer_dirty(bh)) { |
837 | list_del_init(&jh->list); | 837 | list_del_init(&jh->list); |
838 | list_add(&jh->list, &tmp); | 838 | list_add(&jh->list, &tmp); |
839 | goto loop_next; | 839 | goto loop_next; |
840 | } | 840 | } |
841 | spin_unlock(lock); | 841 | spin_unlock(lock); |
842 | if (chunk.nr) | 842 | if (chunk.nr) |
843 | write_ordered_chunk(&chunk); | 843 | write_ordered_chunk(&chunk); |
844 | wait_on_buffer(bh); | 844 | wait_on_buffer(bh); |
845 | cond_resched(); | 845 | cond_resched(); |
846 | spin_lock(lock); | 846 | spin_lock(lock); |
847 | goto loop_next; | 847 | goto loop_next; |
848 | } | 848 | } |
849 | if (buffer_dirty(bh)) { | 849 | if (buffer_dirty(bh)) { |
850 | list_del_init(&jh->list); | 850 | list_del_init(&jh->list); |
851 | list_add(&jh->list, &tmp); | 851 | list_add(&jh->list, &tmp); |
852 | add_to_chunk(&chunk, bh, lock, write_ordered_chunk); | 852 | add_to_chunk(&chunk, bh, lock, write_ordered_chunk); |
853 | } else { | 853 | } else { |
854 | reiserfs_free_jh(bh); | 854 | reiserfs_free_jh(bh); |
855 | unlock_buffer(bh); | 855 | unlock_buffer(bh); |
856 | } | 856 | } |
857 | loop_next: | 857 | loop_next: |
858 | put_bh(bh); | 858 | put_bh(bh); |
859 | cond_resched_lock(lock); | 859 | cond_resched_lock(lock); |
860 | } | 860 | } |
861 | if (chunk.nr) { | 861 | if (chunk.nr) { |
862 | spin_unlock(lock); | 862 | spin_unlock(lock); |
863 | write_ordered_chunk(&chunk); | 863 | write_ordered_chunk(&chunk); |
864 | spin_lock(lock); | 864 | spin_lock(lock); |
865 | } | 865 | } |
866 | while (!list_empty(&tmp)) { | 866 | while (!list_empty(&tmp)) { |
867 | jh = JH_ENTRY(tmp.prev); | 867 | jh = JH_ENTRY(tmp.prev); |
868 | bh = jh->bh; | 868 | bh = jh->bh; |
869 | get_bh(bh); | 869 | get_bh(bh); |
870 | reiserfs_free_jh(bh); | 870 | reiserfs_free_jh(bh); |
871 | 871 | ||
872 | if (buffer_locked(bh)) { | 872 | if (buffer_locked(bh)) { |
873 | spin_unlock(lock); | 873 | spin_unlock(lock); |
874 | wait_on_buffer(bh); | 874 | wait_on_buffer(bh); |
875 | spin_lock(lock); | 875 | spin_lock(lock); |
876 | } | 876 | } |
877 | if (!buffer_uptodate(bh)) { | 877 | if (!buffer_uptodate(bh)) { |
878 | ret = -EIO; | 878 | ret = -EIO; |
879 | } | 879 | } |
880 | /* ugly interaction with invalidatepage here. | 880 | /* ugly interaction with invalidatepage here. |
881 | * reiserfs_invalidate_page will pin any buffer that has a valid | 881 | * reiserfs_invalidate_page will pin any buffer that has a valid |
882 | * journal head from an older transaction. If someone else sets | 882 | * journal head from an older transaction. If someone else sets |
883 | * our buffer dirty after we write it in the first loop, and | 883 | * our buffer dirty after we write it in the first loop, and |
884 | * then someone truncates the page away, nobody will ever write | 884 | * then someone truncates the page away, nobody will ever write |
885 | * the buffer. We're safe if we write the page one last time | 885 | * the buffer. We're safe if we write the page one last time |
886 | * after freeing the journal header. | 886 | * after freeing the journal header. |
887 | */ | 887 | */ |
888 | if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { | 888 | if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { |
889 | spin_unlock(lock); | 889 | spin_unlock(lock); |
890 | ll_rw_block(WRITE, 1, &bh); | 890 | ll_rw_block(WRITE, 1, &bh); |
891 | spin_lock(lock); | 891 | spin_lock(lock); |
892 | } | 892 | } |
893 | put_bh(bh); | 893 | put_bh(bh); |
894 | cond_resched_lock(lock); | 894 | cond_resched_lock(lock); |
895 | } | 895 | } |
896 | spin_unlock(lock); | 896 | spin_unlock(lock); |
897 | return ret; | 897 | return ret; |
898 | } | 898 | } |
899 | 899 | ||
900 | static int flush_older_commits(struct super_block *s, | 900 | static int flush_older_commits(struct super_block *s, |
901 | struct reiserfs_journal_list *jl) | 901 | struct reiserfs_journal_list *jl) |
902 | { | 902 | { |
903 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 903 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
904 | struct reiserfs_journal_list *other_jl; | 904 | struct reiserfs_journal_list *other_jl; |
905 | struct reiserfs_journal_list *first_jl; | 905 | struct reiserfs_journal_list *first_jl; |
906 | struct list_head *entry; | 906 | struct list_head *entry; |
907 | unsigned long trans_id = jl->j_trans_id; | 907 | unsigned long trans_id = jl->j_trans_id; |
908 | unsigned long other_trans_id; | 908 | unsigned long other_trans_id; |
909 | unsigned long first_trans_id; | 909 | unsigned long first_trans_id; |
910 | 910 | ||
911 | find_first: | 911 | find_first: |
912 | /* | 912 | /* |
913 | * first we walk backwards to find the oldest uncommitted transation | 913 | * first we walk backwards to find the oldest uncommitted transation |
914 | */ | 914 | */ |
915 | first_jl = jl; | 915 | first_jl = jl; |
916 | entry = jl->j_list.prev; | 916 | entry = jl->j_list.prev; |
917 | while (1) { | 917 | while (1) { |
918 | other_jl = JOURNAL_LIST_ENTRY(entry); | 918 | other_jl = JOURNAL_LIST_ENTRY(entry); |
919 | if (entry == &journal->j_journal_list || | 919 | if (entry == &journal->j_journal_list || |
920 | atomic_read(&other_jl->j_older_commits_done)) | 920 | atomic_read(&other_jl->j_older_commits_done)) |
921 | break; | 921 | break; |
922 | 922 | ||
923 | first_jl = other_jl; | 923 | first_jl = other_jl; |
924 | entry = other_jl->j_list.prev; | 924 | entry = other_jl->j_list.prev; |
925 | } | 925 | } |
926 | 926 | ||
927 | /* if we didn't find any older uncommitted transactions, return now */ | 927 | /* if we didn't find any older uncommitted transactions, return now */ |
928 | if (first_jl == jl) { | 928 | if (first_jl == jl) { |
929 | return 0; | 929 | return 0; |
930 | } | 930 | } |
931 | 931 | ||
932 | first_trans_id = first_jl->j_trans_id; | 932 | first_trans_id = first_jl->j_trans_id; |
933 | 933 | ||
934 | entry = &first_jl->j_list; | 934 | entry = &first_jl->j_list; |
935 | while (1) { | 935 | while (1) { |
936 | other_jl = JOURNAL_LIST_ENTRY(entry); | 936 | other_jl = JOURNAL_LIST_ENTRY(entry); |
937 | other_trans_id = other_jl->j_trans_id; | 937 | other_trans_id = other_jl->j_trans_id; |
938 | 938 | ||
939 | if (other_trans_id < trans_id) { | 939 | if (other_trans_id < trans_id) { |
940 | if (atomic_read(&other_jl->j_commit_left) != 0) { | 940 | if (atomic_read(&other_jl->j_commit_left) != 0) { |
941 | flush_commit_list(s, other_jl, 0); | 941 | flush_commit_list(s, other_jl, 0); |
942 | 942 | ||
943 | /* list we were called with is gone, return */ | 943 | /* list we were called with is gone, return */ |
944 | if (!journal_list_still_alive(s, trans_id)) | 944 | if (!journal_list_still_alive(s, trans_id)) |
945 | return 1; | 945 | return 1; |
946 | 946 | ||
947 | /* the one we just flushed is gone, this means all | 947 | /* the one we just flushed is gone, this means all |
948 | * older lists are also gone, so first_jl is no longer | 948 | * older lists are also gone, so first_jl is no longer |
949 | * valid either. Go back to the beginning. | 949 | * valid either. Go back to the beginning. |
950 | */ | 950 | */ |
951 | if (!journal_list_still_alive | 951 | if (!journal_list_still_alive |
952 | (s, other_trans_id)) { | 952 | (s, other_trans_id)) { |
953 | goto find_first; | 953 | goto find_first; |
954 | } | 954 | } |
955 | } | 955 | } |
956 | entry = entry->next; | 956 | entry = entry->next; |
957 | if (entry == &journal->j_journal_list) | 957 | if (entry == &journal->j_journal_list) |
958 | return 0; | 958 | return 0; |
959 | } else { | 959 | } else { |
960 | return 0; | 960 | return 0; |
961 | } | 961 | } |
962 | } | 962 | } |
963 | return 0; | 963 | return 0; |
964 | } | 964 | } |
965 | int reiserfs_async_progress_wait(struct super_block *s) | 965 | int reiserfs_async_progress_wait(struct super_block *s) |
966 | { | 966 | { |
967 | DEFINE_WAIT(wait); | 967 | DEFINE_WAIT(wait); |
968 | struct reiserfs_journal *j = SB_JOURNAL(s); | 968 | struct reiserfs_journal *j = SB_JOURNAL(s); |
969 | if (atomic_read(&j->j_async_throttle)) | 969 | if (atomic_read(&j->j_async_throttle)) |
970 | blk_congestion_wait(WRITE, HZ / 10); | 970 | blk_congestion_wait(WRITE, HZ / 10); |
971 | return 0; | 971 | return 0; |
972 | } | 972 | } |
973 | 973 | ||
974 | /* | 974 | /* |
975 | ** if this journal list still has commit blocks unflushed, send them to disk. | 975 | ** if this journal list still has commit blocks unflushed, send them to disk. |
976 | ** | 976 | ** |
977 | ** log areas must be flushed in order (transaction 2 can't commit before transaction 1) | 977 | ** log areas must be flushed in order (transaction 2 can't commit before transaction 1) |
978 | ** Before the commit block can by written, every other log block must be safely on disk | 978 | ** Before the commit block can by written, every other log block must be safely on disk |
979 | ** | 979 | ** |
980 | */ | 980 | */ |
981 | static int flush_commit_list(struct super_block *s, | 981 | static int flush_commit_list(struct super_block *s, |
982 | struct reiserfs_journal_list *jl, int flushall) | 982 | struct reiserfs_journal_list *jl, int flushall) |
983 | { | 983 | { |
984 | int i; | 984 | int i; |
985 | int bn; | 985 | int bn; |
986 | struct buffer_head *tbh = NULL; | 986 | struct buffer_head *tbh = NULL; |
987 | unsigned long trans_id = jl->j_trans_id; | 987 | unsigned long trans_id = jl->j_trans_id; |
988 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 988 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
989 | int barrier = 0; | 989 | int barrier = 0; |
990 | int retval = 0; | 990 | int retval = 0; |
991 | int write_len; | ||
991 | 992 | ||
992 | reiserfs_check_lock_depth(s, "flush_commit_list"); | 993 | reiserfs_check_lock_depth(s, "flush_commit_list"); |
993 | 994 | ||
994 | if (atomic_read(&jl->j_older_commits_done)) { | 995 | if (atomic_read(&jl->j_older_commits_done)) { |
995 | return 0; | 996 | return 0; |
996 | } | 997 | } |
997 | 998 | ||
998 | get_fs_excl(); | 999 | get_fs_excl(); |
999 | 1000 | ||
1000 | /* before we can put our commit blocks on disk, we have to make sure everyone older than | 1001 | /* before we can put our commit blocks on disk, we have to make sure everyone older than |
1001 | ** us is on disk too | 1002 | ** us is on disk too |
1002 | */ | 1003 | */ |
1003 | BUG_ON(jl->j_len <= 0); | 1004 | BUG_ON(jl->j_len <= 0); |
1004 | BUG_ON(trans_id == journal->j_trans_id); | 1005 | BUG_ON(trans_id == journal->j_trans_id); |
1005 | 1006 | ||
1006 | get_journal_list(jl); | 1007 | get_journal_list(jl); |
1007 | if (flushall) { | 1008 | if (flushall) { |
1008 | if (flush_older_commits(s, jl) == 1) { | 1009 | if (flush_older_commits(s, jl) == 1) { |
1009 | /* list disappeared during flush_older_commits. return */ | 1010 | /* list disappeared during flush_older_commits. return */ |
1010 | goto put_jl; | 1011 | goto put_jl; |
1011 | } | 1012 | } |
1012 | } | 1013 | } |
1013 | 1014 | ||
1014 | /* make sure nobody is trying to flush this one at the same time */ | 1015 | /* make sure nobody is trying to flush this one at the same time */ |
1015 | down(&jl->j_commit_lock); | 1016 | down(&jl->j_commit_lock); |
1016 | if (!journal_list_still_alive(s, trans_id)) { | 1017 | if (!journal_list_still_alive(s, trans_id)) { |
1017 | up(&jl->j_commit_lock); | 1018 | up(&jl->j_commit_lock); |
1018 | goto put_jl; | 1019 | goto put_jl; |
1019 | } | 1020 | } |
1020 | BUG_ON(jl->j_trans_id == 0); | 1021 | BUG_ON(jl->j_trans_id == 0); |
1021 | 1022 | ||
1022 | /* this commit is done, exit */ | 1023 | /* this commit is done, exit */ |
1023 | if (atomic_read(&(jl->j_commit_left)) <= 0) { | 1024 | if (atomic_read(&(jl->j_commit_left)) <= 0) { |
1024 | if (flushall) { | 1025 | if (flushall) { |
1025 | atomic_set(&(jl->j_older_commits_done), 1); | 1026 | atomic_set(&(jl->j_older_commits_done), 1); |
1026 | } | 1027 | } |
1027 | up(&jl->j_commit_lock); | 1028 | up(&jl->j_commit_lock); |
1028 | goto put_jl; | 1029 | goto put_jl; |
1029 | } | 1030 | } |
1030 | 1031 | ||
1031 | if (!list_empty(&jl->j_bh_list)) { | 1032 | if (!list_empty(&jl->j_bh_list)) { |
1032 | unlock_kernel(); | 1033 | unlock_kernel(); |
1033 | write_ordered_buffers(&journal->j_dirty_buffers_lock, | 1034 | write_ordered_buffers(&journal->j_dirty_buffers_lock, |
1034 | journal, jl, &jl->j_bh_list); | 1035 | journal, jl, &jl->j_bh_list); |
1035 | lock_kernel(); | 1036 | lock_kernel(); |
1036 | } | 1037 | } |
1037 | BUG_ON(!list_empty(&jl->j_bh_list)); | 1038 | BUG_ON(!list_empty(&jl->j_bh_list)); |
1038 | /* | 1039 | /* |
1039 | * for the description block and all the log blocks, submit any buffers | 1040 | * for the description block and all the log blocks, submit any buffers |
1040 | * that haven't already reached the disk | 1041 | * that haven't already reached the disk. Try to write at least 256 |
1042 | * log blocks. later on, we will only wait on blocks that correspond | ||
1043 | * to this transaction, but while we're unplugging we might as well | ||
1044 | * get a chunk of data on there. | ||
1041 | */ | 1045 | */ |
1042 | atomic_inc(&journal->j_async_throttle); | 1046 | atomic_inc(&journal->j_async_throttle); |
1043 | for (i = 0; i < (jl->j_len + 1); i++) { | 1047 | write_len = jl->j_len + 1; |
1048 | if (write_len < 256) | ||
1049 | write_len = 256; | ||
1050 | for (i = 0 ; i < write_len ; i++) { | ||
1044 | bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % | 1051 | bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % |
1045 | SB_ONDISK_JOURNAL_SIZE(s); | 1052 | SB_ONDISK_JOURNAL_SIZE(s); |
1046 | tbh = journal_find_get_block(s, bn); | 1053 | tbh = journal_find_get_block(s, bn); |
1047 | if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ | 1054 | if (tbh) { |
1048 | ll_rw_block(SWRITE, 1, &tbh); | 1055 | if (buffer_dirty(tbh)) |
1049 | put_bh(tbh); | 1056 | ll_rw_block(WRITE, 1, &tbh) ; |
1057 | put_bh(tbh) ; | ||
1058 | } | ||
1050 | } | 1059 | } |
1051 | atomic_dec(&journal->j_async_throttle); | 1060 | atomic_dec(&journal->j_async_throttle); |
1052 | 1061 | ||
1053 | /* We're skipping the commit if there's an error */ | 1062 | /* We're skipping the commit if there's an error */ |
1054 | if (retval || reiserfs_is_journal_aborted(journal)) | 1063 | if (retval || reiserfs_is_journal_aborted(journal)) |
1055 | barrier = 0; | 1064 | barrier = 0; |
1056 | 1065 | ||
1057 | /* wait on everything written so far before writing the commit | 1066 | /* wait on everything written so far before writing the commit |
1058 | * if we are in barrier mode, send the commit down now | 1067 | * if we are in barrier mode, send the commit down now |
1059 | */ | 1068 | */ |
1060 | barrier = reiserfs_barrier_flush(s); | 1069 | barrier = reiserfs_barrier_flush(s); |
1061 | if (barrier) { | 1070 | if (barrier) { |
1062 | int ret; | 1071 | int ret; |
1063 | lock_buffer(jl->j_commit_bh); | 1072 | lock_buffer(jl->j_commit_bh); |
1064 | ret = submit_barrier_buffer(jl->j_commit_bh); | 1073 | ret = submit_barrier_buffer(jl->j_commit_bh); |
1065 | if (ret == -EOPNOTSUPP) { | 1074 | if (ret == -EOPNOTSUPP) { |
1066 | set_buffer_uptodate(jl->j_commit_bh); | 1075 | set_buffer_uptodate(jl->j_commit_bh); |
1067 | disable_barrier(s); | 1076 | disable_barrier(s); |
1068 | barrier = 0; | 1077 | barrier = 0; |
1069 | } | 1078 | } |
1070 | } | 1079 | } |
1071 | for (i = 0; i < (jl->j_len + 1); i++) { | 1080 | for (i = 0; i < (jl->j_len + 1); i++) { |
1072 | bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + | 1081 | bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + |
1073 | (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); | 1082 | (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); |
1074 | tbh = journal_find_get_block(s, bn); | 1083 | tbh = journal_find_get_block(s, bn); |
1075 | wait_on_buffer(tbh); | 1084 | wait_on_buffer(tbh); |
1076 | // since we're using ll_rw_blk above, it might have skipped over | 1085 | // since we're using ll_rw_blk above, it might have skipped over |
1077 | // a locked buffer. Double check here | 1086 | // a locked buffer. Double check here |
1078 | // | 1087 | // |
1079 | if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ | 1088 | if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ |
1080 | sync_dirty_buffer(tbh); | 1089 | sync_dirty_buffer(tbh); |
1081 | if (unlikely(!buffer_uptodate(tbh))) { | 1090 | if (unlikely(!buffer_uptodate(tbh))) { |
1082 | #ifdef CONFIG_REISERFS_CHECK | 1091 | #ifdef CONFIG_REISERFS_CHECK |
1083 | reiserfs_warning(s, "journal-601, buffer write failed"); | 1092 | reiserfs_warning(s, "journal-601, buffer write failed"); |
1084 | #endif | 1093 | #endif |
1085 | retval = -EIO; | 1094 | retval = -EIO; |
1086 | } | 1095 | } |
1087 | put_bh(tbh); /* once for journal_find_get_block */ | 1096 | put_bh(tbh); /* once for journal_find_get_block */ |
1088 | put_bh(tbh); /* once due to original getblk in do_journal_end */ | 1097 | put_bh(tbh); /* once due to original getblk in do_journal_end */ |
1089 | atomic_dec(&(jl->j_commit_left)); | 1098 | atomic_dec(&(jl->j_commit_left)); |
1090 | } | 1099 | } |
1091 | 1100 | ||
1092 | BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); | 1101 | BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); |
1093 | 1102 | ||
1094 | if (!barrier) { | 1103 | if (!barrier) { |
1095 | /* If there was a write error in the journal - we can't commit | 1104 | /* If there was a write error in the journal - we can't commit |
1096 | * this transaction - it will be invalid and, if successful, | 1105 | * this transaction - it will be invalid and, if successful, |
1097 | * will just end up propogating the write error out to | 1106 | * will just end up propogating the write error out to |
1098 | * the file system. */ | 1107 | * the file system. */ |
1099 | if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { | 1108 | if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { |
1100 | if (buffer_dirty(jl->j_commit_bh)) | 1109 | if (buffer_dirty(jl->j_commit_bh)) |
1101 | BUG(); | 1110 | BUG(); |
1102 | mark_buffer_dirty(jl->j_commit_bh) ; | 1111 | mark_buffer_dirty(jl->j_commit_bh) ; |
1103 | sync_dirty_buffer(jl->j_commit_bh) ; | 1112 | sync_dirty_buffer(jl->j_commit_bh) ; |
1104 | } | 1113 | } |
1105 | } else | 1114 | } else |
1106 | wait_on_buffer(jl->j_commit_bh); | 1115 | wait_on_buffer(jl->j_commit_bh); |
1107 | 1116 | ||
1108 | check_barrier_completion(s, jl->j_commit_bh); | 1117 | check_barrier_completion(s, jl->j_commit_bh); |
1109 | 1118 | ||
1110 | /* If there was a write error in the journal - we can't commit this | 1119 | /* If there was a write error in the journal - we can't commit this |
1111 | * transaction - it will be invalid and, if successful, will just end | 1120 | * transaction - it will be invalid and, if successful, will just end |
1112 | * up propogating the write error out to the filesystem. */ | 1121 | * up propogating the write error out to the filesystem. */ |
1113 | if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { | 1122 | if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { |
1114 | #ifdef CONFIG_REISERFS_CHECK | 1123 | #ifdef CONFIG_REISERFS_CHECK |
1115 | reiserfs_warning(s, "journal-615: buffer write failed"); | 1124 | reiserfs_warning(s, "journal-615: buffer write failed"); |
1116 | #endif | 1125 | #endif |
1117 | retval = -EIO; | 1126 | retval = -EIO; |
1118 | } | 1127 | } |
1119 | bforget(jl->j_commit_bh); | 1128 | bforget(jl->j_commit_bh); |
1120 | if (journal->j_last_commit_id != 0 && | 1129 | if (journal->j_last_commit_id != 0 && |
1121 | (jl->j_trans_id - journal->j_last_commit_id) != 1) { | 1130 | (jl->j_trans_id - journal->j_last_commit_id) != 1) { |
1122 | reiserfs_warning(s, "clm-2200: last commit %lu, current %lu", | 1131 | reiserfs_warning(s, "clm-2200: last commit %lu, current %lu", |
1123 | journal->j_last_commit_id, jl->j_trans_id); | 1132 | journal->j_last_commit_id, jl->j_trans_id); |
1124 | } | 1133 | } |
1125 | journal->j_last_commit_id = jl->j_trans_id; | 1134 | journal->j_last_commit_id = jl->j_trans_id; |
1126 | 1135 | ||
1127 | /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ | 1136 | /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ |
1128 | cleanup_freed_for_journal_list(s, jl); | 1137 | cleanup_freed_for_journal_list(s, jl); |
1129 | 1138 | ||
1130 | retval = retval ? retval : journal->j_errno; | 1139 | retval = retval ? retval : journal->j_errno; |
1131 | 1140 | ||
1132 | /* mark the metadata dirty */ | 1141 | /* mark the metadata dirty */ |
1133 | if (!retval) | 1142 | if (!retval) |
1134 | dirty_one_transaction(s, jl); | 1143 | dirty_one_transaction(s, jl); |
1135 | atomic_dec(&(jl->j_commit_left)); | 1144 | atomic_dec(&(jl->j_commit_left)); |
1136 | 1145 | ||
1137 | if (flushall) { | 1146 | if (flushall) { |
1138 | atomic_set(&(jl->j_older_commits_done), 1); | 1147 | atomic_set(&(jl->j_older_commits_done), 1); |
1139 | } | 1148 | } |
1140 | up(&jl->j_commit_lock); | 1149 | up(&jl->j_commit_lock); |
1141 | put_jl: | 1150 | put_jl: |
1142 | put_journal_list(s, jl); | 1151 | put_journal_list(s, jl); |
1143 | 1152 | ||
1144 | if (retval) | 1153 | if (retval) |
1145 | reiserfs_abort(s, retval, "Journal write error in %s", | 1154 | reiserfs_abort(s, retval, "Journal write error in %s", |
1146 | __FUNCTION__); | 1155 | __FUNCTION__); |
1147 | put_fs_excl(); | 1156 | put_fs_excl(); |
1148 | return retval; | 1157 | return retval; |
1149 | } | 1158 | } |
1150 | 1159 | ||
1151 | /* | 1160 | /* |
1152 | ** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or | 1161 | ** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or |
1153 | ** returns NULL if it can't find anything | 1162 | ** returns NULL if it can't find anything |
1154 | */ | 1163 | */ |
1155 | static struct reiserfs_journal_list *find_newer_jl_for_cn(struct | 1164 | static struct reiserfs_journal_list *find_newer_jl_for_cn(struct |
1156 | reiserfs_journal_cnode | 1165 | reiserfs_journal_cnode |
1157 | *cn) | 1166 | *cn) |
1158 | { | 1167 | { |
1159 | struct super_block *sb = cn->sb; | 1168 | struct super_block *sb = cn->sb; |
1160 | b_blocknr_t blocknr = cn->blocknr; | 1169 | b_blocknr_t blocknr = cn->blocknr; |
1161 | 1170 | ||
1162 | cn = cn->hprev; | 1171 | cn = cn->hprev; |
1163 | while (cn) { | 1172 | while (cn) { |
1164 | if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) { | 1173 | if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) { |
1165 | return cn->jlist; | 1174 | return cn->jlist; |
1166 | } | 1175 | } |
1167 | cn = cn->hprev; | 1176 | cn = cn->hprev; |
1168 | } | 1177 | } |
1169 | return NULL; | 1178 | return NULL; |
1170 | } | 1179 | } |
1171 | 1180 | ||
1172 | static void remove_journal_hash(struct super_block *, | 1181 | static void remove_journal_hash(struct super_block *, |
1173 | struct reiserfs_journal_cnode **, | 1182 | struct reiserfs_journal_cnode **, |
1174 | struct reiserfs_journal_list *, unsigned long, | 1183 | struct reiserfs_journal_list *, unsigned long, |
1175 | int); | 1184 | int); |
1176 | 1185 | ||
1177 | /* | 1186 | /* |
1178 | ** once all the real blocks have been flushed, it is safe to remove them from the | 1187 | ** once all the real blocks have been flushed, it is safe to remove them from the |
1179 | ** journal list for this transaction. Aside from freeing the cnode, this also allows the | 1188 | ** journal list for this transaction. Aside from freeing the cnode, this also allows the |
1180 | ** block to be reallocated for data blocks if it had been deleted. | 1189 | ** block to be reallocated for data blocks if it had been deleted. |
1181 | */ | 1190 | */ |
1182 | static void remove_all_from_journal_list(struct super_block *p_s_sb, | 1191 | static void remove_all_from_journal_list(struct super_block *p_s_sb, |
1183 | struct reiserfs_journal_list *jl, | 1192 | struct reiserfs_journal_list *jl, |
1184 | int debug) | 1193 | int debug) |
1185 | { | 1194 | { |
1186 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 1195 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
1187 | struct reiserfs_journal_cnode *cn, *last; | 1196 | struct reiserfs_journal_cnode *cn, *last; |
1188 | cn = jl->j_realblock; | 1197 | cn = jl->j_realblock; |
1189 | 1198 | ||
1190 | /* which is better, to lock once around the whole loop, or | 1199 | /* which is better, to lock once around the whole loop, or |
1191 | ** to lock for each call to remove_journal_hash? | 1200 | ** to lock for each call to remove_journal_hash? |
1192 | */ | 1201 | */ |
1193 | while (cn) { | 1202 | while (cn) { |
1194 | if (cn->blocknr != 0) { | 1203 | if (cn->blocknr != 0) { |
1195 | if (debug) { | 1204 | if (debug) { |
1196 | reiserfs_warning(p_s_sb, | 1205 | reiserfs_warning(p_s_sb, |
1197 | "block %u, bh is %d, state %ld", | 1206 | "block %u, bh is %d, state %ld", |
1198 | cn->blocknr, cn->bh ? 1 : 0, | 1207 | cn->blocknr, cn->bh ? 1 : 0, |
1199 | cn->state); | 1208 | cn->state); |
1200 | } | 1209 | } |
1201 | cn->state = 0; | 1210 | cn->state = 0; |
1202 | remove_journal_hash(p_s_sb, journal->j_list_hash_table, | 1211 | remove_journal_hash(p_s_sb, journal->j_list_hash_table, |
1203 | jl, cn->blocknr, 1); | 1212 | jl, cn->blocknr, 1); |
1204 | } | 1213 | } |
1205 | last = cn; | 1214 | last = cn; |
1206 | cn = cn->next; | 1215 | cn = cn->next; |
1207 | free_cnode(p_s_sb, last); | 1216 | free_cnode(p_s_sb, last); |
1208 | } | 1217 | } |
1209 | jl->j_realblock = NULL; | 1218 | jl->j_realblock = NULL; |
1210 | } | 1219 | } |
1211 | 1220 | ||
1212 | /* | 1221 | /* |
1213 | ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block. | 1222 | ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block. |
1214 | ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start | 1223 | ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start |
1215 | ** releasing blocks in this transaction for reuse as data blocks. | 1224 | ** releasing blocks in this transaction for reuse as data blocks. |
1216 | ** called by flush_journal_list, before it calls remove_all_from_journal_list | 1225 | ** called by flush_journal_list, before it calls remove_all_from_journal_list |
1217 | ** | 1226 | ** |
1218 | */ | 1227 | */ |
1219 | static int _update_journal_header_block(struct super_block *p_s_sb, | 1228 | static int _update_journal_header_block(struct super_block *p_s_sb, |
1220 | unsigned long offset, | 1229 | unsigned long offset, |
1221 | unsigned long trans_id) | 1230 | unsigned long trans_id) |
1222 | { | 1231 | { |
1223 | struct reiserfs_journal_header *jh; | 1232 | struct reiserfs_journal_header *jh; |
1224 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 1233 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
1225 | 1234 | ||
1226 | if (reiserfs_is_journal_aborted(journal)) | 1235 | if (reiserfs_is_journal_aborted(journal)) |
1227 | return -EIO; | 1236 | return -EIO; |
1228 | 1237 | ||
1229 | if (trans_id >= journal->j_last_flush_trans_id) { | 1238 | if (trans_id >= journal->j_last_flush_trans_id) { |
1230 | if (buffer_locked((journal->j_header_bh))) { | 1239 | if (buffer_locked((journal->j_header_bh))) { |
1231 | wait_on_buffer((journal->j_header_bh)); | 1240 | wait_on_buffer((journal->j_header_bh)); |
1232 | if (unlikely(!buffer_uptodate(journal->j_header_bh))) { | 1241 | if (unlikely(!buffer_uptodate(journal->j_header_bh))) { |
1233 | #ifdef CONFIG_REISERFS_CHECK | 1242 | #ifdef CONFIG_REISERFS_CHECK |
1234 | reiserfs_warning(p_s_sb, | 1243 | reiserfs_warning(p_s_sb, |
1235 | "journal-699: buffer write failed"); | 1244 | "journal-699: buffer write failed"); |
1236 | #endif | 1245 | #endif |
1237 | return -EIO; | 1246 | return -EIO; |
1238 | } | 1247 | } |
1239 | } | 1248 | } |
1240 | journal->j_last_flush_trans_id = trans_id; | 1249 | journal->j_last_flush_trans_id = trans_id; |
1241 | journal->j_first_unflushed_offset = offset; | 1250 | journal->j_first_unflushed_offset = offset; |
1242 | jh = (struct reiserfs_journal_header *)(journal->j_header_bh-> | 1251 | jh = (struct reiserfs_journal_header *)(journal->j_header_bh-> |
1243 | b_data); | 1252 | b_data); |
1244 | jh->j_last_flush_trans_id = cpu_to_le32(trans_id); | 1253 | jh->j_last_flush_trans_id = cpu_to_le32(trans_id); |
1245 | jh->j_first_unflushed_offset = cpu_to_le32(offset); | 1254 | jh->j_first_unflushed_offset = cpu_to_le32(offset); |
1246 | jh->j_mount_id = cpu_to_le32(journal->j_mount_id); | 1255 | jh->j_mount_id = cpu_to_le32(journal->j_mount_id); |
1247 | 1256 | ||
1248 | if (reiserfs_barrier_flush(p_s_sb)) { | 1257 | if (reiserfs_barrier_flush(p_s_sb)) { |
1249 | int ret; | 1258 | int ret; |
1250 | lock_buffer(journal->j_header_bh); | 1259 | lock_buffer(journal->j_header_bh); |
1251 | ret = submit_barrier_buffer(journal->j_header_bh); | 1260 | ret = submit_barrier_buffer(journal->j_header_bh); |
1252 | if (ret == -EOPNOTSUPP) { | 1261 | if (ret == -EOPNOTSUPP) { |
1253 | set_buffer_uptodate(journal->j_header_bh); | 1262 | set_buffer_uptodate(journal->j_header_bh); |
1254 | disable_barrier(p_s_sb); | 1263 | disable_barrier(p_s_sb); |
1255 | goto sync; | 1264 | goto sync; |
1256 | } | 1265 | } |
1257 | wait_on_buffer(journal->j_header_bh); | 1266 | wait_on_buffer(journal->j_header_bh); |
1258 | check_barrier_completion(p_s_sb, journal->j_header_bh); | 1267 | check_barrier_completion(p_s_sb, journal->j_header_bh); |
1259 | } else { | 1268 | } else { |
1260 | sync: | 1269 | sync: |
1261 | set_buffer_dirty(journal->j_header_bh); | 1270 | set_buffer_dirty(journal->j_header_bh); |
1262 | sync_dirty_buffer(journal->j_header_bh); | 1271 | sync_dirty_buffer(journal->j_header_bh); |
1263 | } | 1272 | } |
1264 | if (!buffer_uptodate(journal->j_header_bh)) { | 1273 | if (!buffer_uptodate(journal->j_header_bh)) { |
1265 | reiserfs_warning(p_s_sb, | 1274 | reiserfs_warning(p_s_sb, |
1266 | "journal-837: IO error during journal replay"); | 1275 | "journal-837: IO error during journal replay"); |
1267 | return -EIO; | 1276 | return -EIO; |
1268 | } | 1277 | } |
1269 | } | 1278 | } |
1270 | return 0; | 1279 | return 0; |
1271 | } | 1280 | } |
1272 | 1281 | ||
1273 | static int update_journal_header_block(struct super_block *p_s_sb, | 1282 | static int update_journal_header_block(struct super_block *p_s_sb, |
1274 | unsigned long offset, | 1283 | unsigned long offset, |
1275 | unsigned long trans_id) | 1284 | unsigned long trans_id) |
1276 | { | 1285 | { |
1277 | return _update_journal_header_block(p_s_sb, offset, trans_id); | 1286 | return _update_journal_header_block(p_s_sb, offset, trans_id); |
1278 | } | 1287 | } |
1279 | 1288 | ||
1280 | /* | 1289 | /* |
1281 | ** flush any and all journal lists older than you are | 1290 | ** flush any and all journal lists older than you are |
1282 | ** can only be called from flush_journal_list | 1291 | ** can only be called from flush_journal_list |
1283 | */ | 1292 | */ |
1284 | static int flush_older_journal_lists(struct super_block *p_s_sb, | 1293 | static int flush_older_journal_lists(struct super_block *p_s_sb, |
1285 | struct reiserfs_journal_list *jl) | 1294 | struct reiserfs_journal_list *jl) |
1286 | { | 1295 | { |
1287 | struct list_head *entry; | 1296 | struct list_head *entry; |
1288 | struct reiserfs_journal_list *other_jl; | 1297 | struct reiserfs_journal_list *other_jl; |
1289 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 1298 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
1290 | unsigned long trans_id = jl->j_trans_id; | 1299 | unsigned long trans_id = jl->j_trans_id; |
1291 | 1300 | ||
1292 | /* we know we are the only ones flushing things, no extra race | 1301 | /* we know we are the only ones flushing things, no extra race |
1293 | * protection is required. | 1302 | * protection is required. |
1294 | */ | 1303 | */ |
1295 | restart: | 1304 | restart: |
1296 | entry = journal->j_journal_list.next; | 1305 | entry = journal->j_journal_list.next; |
1297 | /* Did we wrap? */ | 1306 | /* Did we wrap? */ |
1298 | if (entry == &journal->j_journal_list) | 1307 | if (entry == &journal->j_journal_list) |
1299 | return 0; | 1308 | return 0; |
1300 | other_jl = JOURNAL_LIST_ENTRY(entry); | 1309 | other_jl = JOURNAL_LIST_ENTRY(entry); |
1301 | if (other_jl->j_trans_id < trans_id) { | 1310 | if (other_jl->j_trans_id < trans_id) { |
1302 | BUG_ON(other_jl->j_refcount <= 0); | 1311 | BUG_ON(other_jl->j_refcount <= 0); |
1303 | /* do not flush all */ | 1312 | /* do not flush all */ |
1304 | flush_journal_list(p_s_sb, other_jl, 0); | 1313 | flush_journal_list(p_s_sb, other_jl, 0); |
1305 | 1314 | ||
1306 | /* other_jl is now deleted from the list */ | 1315 | /* other_jl is now deleted from the list */ |
1307 | goto restart; | 1316 | goto restart; |
1308 | } | 1317 | } |
1309 | return 0; | 1318 | return 0; |
1310 | } | 1319 | } |
1311 | 1320 | ||
1312 | static void del_from_work_list(struct super_block *s, | 1321 | static void del_from_work_list(struct super_block *s, |
1313 | struct reiserfs_journal_list *jl) | 1322 | struct reiserfs_journal_list *jl) |
1314 | { | 1323 | { |
1315 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 1324 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
1316 | if (!list_empty(&jl->j_working_list)) { | 1325 | if (!list_empty(&jl->j_working_list)) { |
1317 | list_del_init(&jl->j_working_list); | 1326 | list_del_init(&jl->j_working_list); |
1318 | journal->j_num_work_lists--; | 1327 | journal->j_num_work_lists--; |
1319 | } | 1328 | } |
1320 | } | 1329 | } |
1321 | 1330 | ||
1322 | /* flush a journal list, both commit and real blocks | 1331 | /* flush a journal list, both commit and real blocks |
1323 | ** | 1332 | ** |
1324 | ** always set flushall to 1, unless you are calling from inside | 1333 | ** always set flushall to 1, unless you are calling from inside |
1325 | ** flush_journal_list | 1334 | ** flush_journal_list |
1326 | ** | 1335 | ** |
1327 | ** IMPORTANT. This can only be called while there are no journal writers, | 1336 | ** IMPORTANT. This can only be called while there are no journal writers, |
1328 | ** and the journal is locked. That means it can only be called from | 1337 | ** and the journal is locked. That means it can only be called from |
1329 | ** do_journal_end, or by journal_release | 1338 | ** do_journal_end, or by journal_release |
1330 | */ | 1339 | */ |
1331 | static int flush_journal_list(struct super_block *s, | 1340 | static int flush_journal_list(struct super_block *s, |
1332 | struct reiserfs_journal_list *jl, int flushall) | 1341 | struct reiserfs_journal_list *jl, int flushall) |
1333 | { | 1342 | { |
1334 | struct reiserfs_journal_list *pjl; | 1343 | struct reiserfs_journal_list *pjl; |
1335 | struct reiserfs_journal_cnode *cn, *last; | 1344 | struct reiserfs_journal_cnode *cn, *last; |
1336 | int count; | 1345 | int count; |
1337 | int was_jwait = 0; | 1346 | int was_jwait = 0; |
1338 | int was_dirty = 0; | 1347 | int was_dirty = 0; |
1339 | struct buffer_head *saved_bh; | 1348 | struct buffer_head *saved_bh; |
1340 | unsigned long j_len_saved = jl->j_len; | 1349 | unsigned long j_len_saved = jl->j_len; |
1341 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 1350 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
1342 | int err = 0; | 1351 | int err = 0; |
1343 | 1352 | ||
1344 | BUG_ON(j_len_saved <= 0); | 1353 | BUG_ON(j_len_saved <= 0); |
1345 | 1354 | ||
1346 | if (atomic_read(&journal->j_wcount) != 0) { | 1355 | if (atomic_read(&journal->j_wcount) != 0) { |
1347 | reiserfs_warning(s, | 1356 | reiserfs_warning(s, |
1348 | "clm-2048: flush_journal_list called with wcount %d", | 1357 | "clm-2048: flush_journal_list called with wcount %d", |
1349 | atomic_read(&journal->j_wcount)); | 1358 | atomic_read(&journal->j_wcount)); |
1350 | } | 1359 | } |
1351 | BUG_ON(jl->j_trans_id == 0); | 1360 | BUG_ON(jl->j_trans_id == 0); |
1352 | 1361 | ||
1353 | /* if flushall == 0, the lock is already held */ | 1362 | /* if flushall == 0, the lock is already held */ |
1354 | if (flushall) { | 1363 | if (flushall) { |
1355 | down(&journal->j_flush_sem); | 1364 | down(&journal->j_flush_sem); |
1356 | } else if (!down_trylock(&journal->j_flush_sem)) { | 1365 | } else if (!down_trylock(&journal->j_flush_sem)) { |
1357 | BUG(); | 1366 | BUG(); |
1358 | } | 1367 | } |
1359 | 1368 | ||
1360 | count = 0; | 1369 | count = 0; |
1361 | if (j_len_saved > journal->j_trans_max) { | 1370 | if (j_len_saved > journal->j_trans_max) { |
1362 | reiserfs_panic(s, | 1371 | reiserfs_panic(s, |
1363 | "journal-715: flush_journal_list, length is %lu, trans id %lu\n", | 1372 | "journal-715: flush_journal_list, length is %lu, trans id %lu\n", |
1364 | j_len_saved, jl->j_trans_id); | 1373 | j_len_saved, jl->j_trans_id); |
1365 | return 0; | 1374 | return 0; |
1366 | } | 1375 | } |
1367 | 1376 | ||
1368 | get_fs_excl(); | 1377 | get_fs_excl(); |
1369 | 1378 | ||
1370 | /* if all the work is already done, get out of here */ | 1379 | /* if all the work is already done, get out of here */ |
1371 | if (atomic_read(&(jl->j_nonzerolen)) <= 0 && | 1380 | if (atomic_read(&(jl->j_nonzerolen)) <= 0 && |
1372 | atomic_read(&(jl->j_commit_left)) <= 0) { | 1381 | atomic_read(&(jl->j_commit_left)) <= 0) { |
1373 | goto flush_older_and_return; | 1382 | goto flush_older_and_return; |
1374 | } | 1383 | } |
1375 | 1384 | ||
1376 | /* start by putting the commit list on disk. This will also flush | 1385 | /* start by putting the commit list on disk. This will also flush |
1377 | ** the commit lists of any olders transactions | 1386 | ** the commit lists of any olders transactions |
1378 | */ | 1387 | */ |
1379 | flush_commit_list(s, jl, 1); | 1388 | flush_commit_list(s, jl, 1); |
1380 | 1389 | ||
1381 | if (!(jl->j_state & LIST_DIRTY) | 1390 | if (!(jl->j_state & LIST_DIRTY) |
1382 | && !reiserfs_is_journal_aborted(journal)) | 1391 | && !reiserfs_is_journal_aborted(journal)) |
1383 | BUG(); | 1392 | BUG(); |
1384 | 1393 | ||
1385 | /* are we done now? */ | 1394 | /* are we done now? */ |
1386 | if (atomic_read(&(jl->j_nonzerolen)) <= 0 && | 1395 | if (atomic_read(&(jl->j_nonzerolen)) <= 0 && |
1387 | atomic_read(&(jl->j_commit_left)) <= 0) { | 1396 | atomic_read(&(jl->j_commit_left)) <= 0) { |
1388 | goto flush_older_and_return; | 1397 | goto flush_older_and_return; |
1389 | } | 1398 | } |
1390 | 1399 | ||
1391 | /* loop through each cnode, see if we need to write it, | 1400 | /* loop through each cnode, see if we need to write it, |
1392 | ** or wait on a more recent transaction, or just ignore it | 1401 | ** or wait on a more recent transaction, or just ignore it |
1393 | */ | 1402 | */ |
1394 | if (atomic_read(&(journal->j_wcount)) != 0) { | 1403 | if (atomic_read(&(journal->j_wcount)) != 0) { |
1395 | reiserfs_panic(s, | 1404 | reiserfs_panic(s, |
1396 | "journal-844: panic journal list is flushing, wcount is not 0\n"); | 1405 | "journal-844: panic journal list is flushing, wcount is not 0\n"); |
1397 | } | 1406 | } |
1398 | cn = jl->j_realblock; | 1407 | cn = jl->j_realblock; |
1399 | while (cn) { | 1408 | while (cn) { |
1400 | was_jwait = 0; | 1409 | was_jwait = 0; |
1401 | was_dirty = 0; | 1410 | was_dirty = 0; |
1402 | saved_bh = NULL; | 1411 | saved_bh = NULL; |
1403 | /* blocknr of 0 is no longer in the hash, ignore it */ | 1412 | /* blocknr of 0 is no longer in the hash, ignore it */ |
1404 | if (cn->blocknr == 0) { | 1413 | if (cn->blocknr == 0) { |
1405 | goto free_cnode; | 1414 | goto free_cnode; |
1406 | } | 1415 | } |
1407 | 1416 | ||
1408 | /* This transaction failed commit. Don't write out to the disk */ | 1417 | /* This transaction failed commit. Don't write out to the disk */ |
1409 | if (!(jl->j_state & LIST_DIRTY)) | 1418 | if (!(jl->j_state & LIST_DIRTY)) |
1410 | goto free_cnode; | 1419 | goto free_cnode; |
1411 | 1420 | ||
1412 | pjl = find_newer_jl_for_cn(cn); | 1421 | pjl = find_newer_jl_for_cn(cn); |
1413 | /* the order is important here. We check pjl to make sure we | 1422 | /* the order is important here. We check pjl to make sure we |
1414 | ** don't clear BH_JDirty_wait if we aren't the one writing this | 1423 | ** don't clear BH_JDirty_wait if we aren't the one writing this |
1415 | ** block to disk | 1424 | ** block to disk |
1416 | */ | 1425 | */ |
1417 | if (!pjl && cn->bh) { | 1426 | if (!pjl && cn->bh) { |
1418 | saved_bh = cn->bh; | 1427 | saved_bh = cn->bh; |
1419 | 1428 | ||
1420 | /* we do this to make sure nobody releases the buffer while | 1429 | /* we do this to make sure nobody releases the buffer while |
1421 | ** we are working with it | 1430 | ** we are working with it |
1422 | */ | 1431 | */ |
1423 | get_bh(saved_bh); | 1432 | get_bh(saved_bh); |
1424 | 1433 | ||
1425 | if (buffer_journal_dirty(saved_bh)) { | 1434 | if (buffer_journal_dirty(saved_bh)) { |
1426 | BUG_ON(!can_dirty(cn)); | 1435 | BUG_ON(!can_dirty(cn)); |
1427 | was_jwait = 1; | 1436 | was_jwait = 1; |
1428 | was_dirty = 1; | 1437 | was_dirty = 1; |
1429 | } else if (can_dirty(cn)) { | 1438 | } else if (can_dirty(cn)) { |
1430 | /* everything with !pjl && jwait should be writable */ | 1439 | /* everything with !pjl && jwait should be writable */ |
1431 | BUG(); | 1440 | BUG(); |
1432 | } | 1441 | } |
1433 | } | 1442 | } |
1434 | 1443 | ||
1435 | /* if someone has this block in a newer transaction, just make | 1444 | /* if someone has this block in a newer transaction, just make |
1436 | ** sure they are commited, and don't try writing it to disk | 1445 | ** sure they are commited, and don't try writing it to disk |
1437 | */ | 1446 | */ |
1438 | if (pjl) { | 1447 | if (pjl) { |
1439 | if (atomic_read(&pjl->j_commit_left)) | 1448 | if (atomic_read(&pjl->j_commit_left)) |
1440 | flush_commit_list(s, pjl, 1); | 1449 | flush_commit_list(s, pjl, 1); |
1441 | goto free_cnode; | 1450 | goto free_cnode; |
1442 | } | 1451 | } |
1443 | 1452 | ||
1444 | /* bh == NULL when the block got to disk on its own, OR, | 1453 | /* bh == NULL when the block got to disk on its own, OR, |
1445 | ** the block got freed in a future transaction | 1454 | ** the block got freed in a future transaction |
1446 | */ | 1455 | */ |
1447 | if (saved_bh == NULL) { | 1456 | if (saved_bh == NULL) { |
1448 | goto free_cnode; | 1457 | goto free_cnode; |
1449 | } | 1458 | } |
1450 | 1459 | ||
1451 | /* this should never happen. kupdate_one_transaction has this list | 1460 | /* this should never happen. kupdate_one_transaction has this list |
1452 | ** locked while it works, so we should never see a buffer here that | 1461 | ** locked while it works, so we should never see a buffer here that |
1453 | ** is not marked JDirty_wait | 1462 | ** is not marked JDirty_wait |
1454 | */ | 1463 | */ |
1455 | if ((!was_jwait) && !buffer_locked(saved_bh)) { | 1464 | if ((!was_jwait) && !buffer_locked(saved_bh)) { |
1456 | reiserfs_warning(s, | 1465 | reiserfs_warning(s, |
1457 | "journal-813: BAD! buffer %llu %cdirty %cjwait, " | 1466 | "journal-813: BAD! buffer %llu %cdirty %cjwait, " |
1458 | "not in a newer tranasction", | 1467 | "not in a newer tranasction", |
1459 | (unsigned long long)saved_bh-> | 1468 | (unsigned long long)saved_bh-> |
1460 | b_blocknr, was_dirty ? ' ' : '!', | 1469 | b_blocknr, was_dirty ? ' ' : '!', |
1461 | was_jwait ? ' ' : '!'); | 1470 | was_jwait ? ' ' : '!'); |
1462 | } | 1471 | } |
1463 | if (was_dirty) { | 1472 | if (was_dirty) { |
1464 | /* we inc again because saved_bh gets decremented at free_cnode */ | 1473 | /* we inc again because saved_bh gets decremented at free_cnode */ |
1465 | get_bh(saved_bh); | 1474 | get_bh(saved_bh); |
1466 | set_bit(BLOCK_NEEDS_FLUSH, &cn->state); | 1475 | set_bit(BLOCK_NEEDS_FLUSH, &cn->state); |
1467 | lock_buffer(saved_bh); | 1476 | lock_buffer(saved_bh); |
1468 | BUG_ON(cn->blocknr != saved_bh->b_blocknr); | 1477 | BUG_ON(cn->blocknr != saved_bh->b_blocknr); |
1469 | if (buffer_dirty(saved_bh)) | 1478 | if (buffer_dirty(saved_bh)) |
1470 | submit_logged_buffer(saved_bh); | 1479 | submit_logged_buffer(saved_bh); |
1471 | else | 1480 | else |
1472 | unlock_buffer(saved_bh); | 1481 | unlock_buffer(saved_bh); |
1473 | count++; | 1482 | count++; |
1474 | } else { | 1483 | } else { |
1475 | reiserfs_warning(s, | 1484 | reiserfs_warning(s, |
1476 | "clm-2082: Unable to flush buffer %llu in %s", | 1485 | "clm-2082: Unable to flush buffer %llu in %s", |
1477 | (unsigned long long)saved_bh-> | 1486 | (unsigned long long)saved_bh-> |
1478 | b_blocknr, __FUNCTION__); | 1487 | b_blocknr, __FUNCTION__); |
1479 | } | 1488 | } |
1480 | free_cnode: | 1489 | free_cnode: |
1481 | last = cn; | 1490 | last = cn; |
1482 | cn = cn->next; | 1491 | cn = cn->next; |
1483 | if (saved_bh) { | 1492 | if (saved_bh) { |
1484 | /* we incremented this to keep others from taking the buffer head away */ | 1493 | /* we incremented this to keep others from taking the buffer head away */ |
1485 | put_bh(saved_bh); | 1494 | put_bh(saved_bh); |
1486 | if (atomic_read(&(saved_bh->b_count)) < 0) { | 1495 | if (atomic_read(&(saved_bh->b_count)) < 0) { |
1487 | reiserfs_warning(s, | 1496 | reiserfs_warning(s, |
1488 | "journal-945: saved_bh->b_count < 0"); | 1497 | "journal-945: saved_bh->b_count < 0"); |
1489 | } | 1498 | } |
1490 | } | 1499 | } |
1491 | } | 1500 | } |
1492 | if (count > 0) { | 1501 | if (count > 0) { |
1493 | cn = jl->j_realblock; | 1502 | cn = jl->j_realblock; |
1494 | while (cn) { | 1503 | while (cn) { |
1495 | if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { | 1504 | if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { |
1496 | if (!cn->bh) { | 1505 | if (!cn->bh) { |
1497 | reiserfs_panic(s, | 1506 | reiserfs_panic(s, |
1498 | "journal-1011: cn->bh is NULL\n"); | 1507 | "journal-1011: cn->bh is NULL\n"); |
1499 | } | 1508 | } |
1500 | wait_on_buffer(cn->bh); | 1509 | wait_on_buffer(cn->bh); |
1501 | if (!cn->bh) { | 1510 | if (!cn->bh) { |
1502 | reiserfs_panic(s, | 1511 | reiserfs_panic(s, |
1503 | "journal-1012: cn->bh is NULL\n"); | 1512 | "journal-1012: cn->bh is NULL\n"); |
1504 | } | 1513 | } |
1505 | if (unlikely(!buffer_uptodate(cn->bh))) { | 1514 | if (unlikely(!buffer_uptodate(cn->bh))) { |
1506 | #ifdef CONFIG_REISERFS_CHECK | 1515 | #ifdef CONFIG_REISERFS_CHECK |
1507 | reiserfs_warning(s, | 1516 | reiserfs_warning(s, |
1508 | "journal-949: buffer write failed\n"); | 1517 | "journal-949: buffer write failed\n"); |
1509 | #endif | 1518 | #endif |
1510 | err = -EIO; | 1519 | err = -EIO; |
1511 | } | 1520 | } |
1512 | /* note, we must clear the JDirty_wait bit after the up to date | 1521 | /* note, we must clear the JDirty_wait bit after the up to date |
1513 | ** check, otherwise we race against our flushpage routine | 1522 | ** check, otherwise we race against our flushpage routine |
1514 | */ | 1523 | */ |
1515 | BUG_ON(!test_clear_buffer_journal_dirty | 1524 | BUG_ON(!test_clear_buffer_journal_dirty |
1516 | (cn->bh)); | 1525 | (cn->bh)); |
1517 | 1526 | ||
1518 | /* undo the inc from journal_mark_dirty */ | 1527 | /* undo the inc from journal_mark_dirty */ |
1519 | put_bh(cn->bh); | 1528 | put_bh(cn->bh); |
1520 | brelse(cn->bh); | 1529 | brelse(cn->bh); |
1521 | } | 1530 | } |
1522 | cn = cn->next; | 1531 | cn = cn->next; |
1523 | } | 1532 | } |
1524 | } | 1533 | } |
1525 | 1534 | ||
1526 | if (err) | 1535 | if (err) |
1527 | reiserfs_abort(s, -EIO, | 1536 | reiserfs_abort(s, -EIO, |
1528 | "Write error while pushing transaction to disk in %s", | 1537 | "Write error while pushing transaction to disk in %s", |
1529 | __FUNCTION__); | 1538 | __FUNCTION__); |
1530 | flush_older_and_return: | 1539 | flush_older_and_return: |
1531 | 1540 | ||
1532 | /* before we can update the journal header block, we _must_ flush all | 1541 | /* before we can update the journal header block, we _must_ flush all |
1533 | ** real blocks from all older transactions to disk. This is because | 1542 | ** real blocks from all older transactions to disk. This is because |
1534 | ** once the header block is updated, this transaction will not be | 1543 | ** once the header block is updated, this transaction will not be |
1535 | ** replayed after a crash | 1544 | ** replayed after a crash |
1536 | */ | 1545 | */ |
1537 | if (flushall) { | 1546 | if (flushall) { |
1538 | flush_older_journal_lists(s, jl); | 1547 | flush_older_journal_lists(s, jl); |
1539 | } | 1548 | } |
1540 | 1549 | ||
1541 | err = journal->j_errno; | 1550 | err = journal->j_errno; |
1542 | /* before we can remove everything from the hash tables for this | 1551 | /* before we can remove everything from the hash tables for this |
1543 | ** transaction, we must make sure it can never be replayed | 1552 | ** transaction, we must make sure it can never be replayed |
1544 | ** | 1553 | ** |
1545 | ** since we are only called from do_journal_end, we know for sure there | 1554 | ** since we are only called from do_journal_end, we know for sure there |
1546 | ** are no allocations going on while we are flushing journal lists. So, | 1555 | ** are no allocations going on while we are flushing journal lists. So, |
1547 | ** we only need to update the journal header block for the last list | 1556 | ** we only need to update the journal header block for the last list |
1548 | ** being flushed | 1557 | ** being flushed |
1549 | */ | 1558 | */ |
1550 | if (!err && flushall) { | 1559 | if (!err && flushall) { |
1551 | err = | 1560 | err = |
1552 | update_journal_header_block(s, | 1561 | update_journal_header_block(s, |
1553 | (jl->j_start + jl->j_len + | 1562 | (jl->j_start + jl->j_len + |
1554 | 2) % SB_ONDISK_JOURNAL_SIZE(s), | 1563 | 2) % SB_ONDISK_JOURNAL_SIZE(s), |
1555 | jl->j_trans_id); | 1564 | jl->j_trans_id); |
1556 | if (err) | 1565 | if (err) |
1557 | reiserfs_abort(s, -EIO, | 1566 | reiserfs_abort(s, -EIO, |
1558 | "Write error while updating journal header in %s", | 1567 | "Write error while updating journal header in %s", |
1559 | __FUNCTION__); | 1568 | __FUNCTION__); |
1560 | } | 1569 | } |
1561 | remove_all_from_journal_list(s, jl, 0); | 1570 | remove_all_from_journal_list(s, jl, 0); |
1562 | list_del_init(&jl->j_list); | 1571 | list_del_init(&jl->j_list); |
1563 | journal->j_num_lists--; | 1572 | journal->j_num_lists--; |
1564 | del_from_work_list(s, jl); | 1573 | del_from_work_list(s, jl); |
1565 | 1574 | ||
1566 | if (journal->j_last_flush_id != 0 && | 1575 | if (journal->j_last_flush_id != 0 && |
1567 | (jl->j_trans_id - journal->j_last_flush_id) != 1) { | 1576 | (jl->j_trans_id - journal->j_last_flush_id) != 1) { |
1568 | reiserfs_warning(s, "clm-2201: last flush %lu, current %lu", | 1577 | reiserfs_warning(s, "clm-2201: last flush %lu, current %lu", |
1569 | journal->j_last_flush_id, jl->j_trans_id); | 1578 | journal->j_last_flush_id, jl->j_trans_id); |
1570 | } | 1579 | } |
1571 | journal->j_last_flush_id = jl->j_trans_id; | 1580 | journal->j_last_flush_id = jl->j_trans_id; |
1572 | 1581 | ||
1573 | /* not strictly required since we are freeing the list, but it should | 1582 | /* not strictly required since we are freeing the list, but it should |
1574 | * help find code using dead lists later on | 1583 | * help find code using dead lists later on |
1575 | */ | 1584 | */ |
1576 | jl->j_len = 0; | 1585 | jl->j_len = 0; |
1577 | atomic_set(&(jl->j_nonzerolen), 0); | 1586 | atomic_set(&(jl->j_nonzerolen), 0); |
1578 | jl->j_start = 0; | 1587 | jl->j_start = 0; |
1579 | jl->j_realblock = NULL; | 1588 | jl->j_realblock = NULL; |
1580 | jl->j_commit_bh = NULL; | 1589 | jl->j_commit_bh = NULL; |
1581 | jl->j_trans_id = 0; | 1590 | jl->j_trans_id = 0; |
1582 | jl->j_state = 0; | 1591 | jl->j_state = 0; |
1583 | put_journal_list(s, jl); | 1592 | put_journal_list(s, jl); |
1584 | if (flushall) | 1593 | if (flushall) |
1585 | up(&journal->j_flush_sem); | 1594 | up(&journal->j_flush_sem); |
1586 | put_fs_excl(); | 1595 | put_fs_excl(); |
1587 | return err; | 1596 | return err; |
1588 | } | 1597 | } |
1589 | 1598 | ||
1590 | static int write_one_transaction(struct super_block *s, | 1599 | static int write_one_transaction(struct super_block *s, |
1591 | struct reiserfs_journal_list *jl, | 1600 | struct reiserfs_journal_list *jl, |
1592 | struct buffer_chunk *chunk) | 1601 | struct buffer_chunk *chunk) |
1593 | { | 1602 | { |
1594 | struct reiserfs_journal_cnode *cn; | 1603 | struct reiserfs_journal_cnode *cn; |
1595 | int ret = 0; | 1604 | int ret = 0; |
1596 | 1605 | ||
1597 | jl->j_state |= LIST_TOUCHED; | 1606 | jl->j_state |= LIST_TOUCHED; |
1598 | del_from_work_list(s, jl); | 1607 | del_from_work_list(s, jl); |
1599 | if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) { | 1608 | if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) { |
1600 | return 0; | 1609 | return 0; |
1601 | } | 1610 | } |
1602 | 1611 | ||
1603 | cn = jl->j_realblock; | 1612 | cn = jl->j_realblock; |
1604 | while (cn) { | 1613 | while (cn) { |
1605 | /* if the blocknr == 0, this has been cleared from the hash, | 1614 | /* if the blocknr == 0, this has been cleared from the hash, |
1606 | ** skip it | 1615 | ** skip it |
1607 | */ | 1616 | */ |
1608 | if (cn->blocknr == 0) { | 1617 | if (cn->blocknr == 0) { |
1609 | goto next; | 1618 | goto next; |
1610 | } | 1619 | } |
1611 | if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { | 1620 | if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { |
1612 | struct buffer_head *tmp_bh; | 1621 | struct buffer_head *tmp_bh; |
1613 | /* we can race against journal_mark_freed when we try | 1622 | /* we can race against journal_mark_freed when we try |
1614 | * to lock_buffer(cn->bh), so we have to inc the buffer | 1623 | * to lock_buffer(cn->bh), so we have to inc the buffer |
1615 | * count, and recheck things after locking | 1624 | * count, and recheck things after locking |
1616 | */ | 1625 | */ |
1617 | tmp_bh = cn->bh; | 1626 | tmp_bh = cn->bh; |
1618 | get_bh(tmp_bh); | 1627 | get_bh(tmp_bh); |
1619 | lock_buffer(tmp_bh); | 1628 | lock_buffer(tmp_bh); |
1620 | if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) { | 1629 | if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) { |
1621 | if (!buffer_journal_dirty(tmp_bh) || | 1630 | if (!buffer_journal_dirty(tmp_bh) || |
1622 | buffer_journal_prepared(tmp_bh)) | 1631 | buffer_journal_prepared(tmp_bh)) |
1623 | BUG(); | 1632 | BUG(); |
1624 | add_to_chunk(chunk, tmp_bh, NULL, write_chunk); | 1633 | add_to_chunk(chunk, tmp_bh, NULL, write_chunk); |
1625 | ret++; | 1634 | ret++; |
1626 | } else { | 1635 | } else { |
1627 | /* note, cn->bh might be null now */ | 1636 | /* note, cn->bh might be null now */ |
1628 | unlock_buffer(tmp_bh); | 1637 | unlock_buffer(tmp_bh); |
1629 | } | 1638 | } |
1630 | put_bh(tmp_bh); | 1639 | put_bh(tmp_bh); |
1631 | } | 1640 | } |
1632 | next: | 1641 | next: |
1633 | cn = cn->next; | 1642 | cn = cn->next; |
1634 | cond_resched(); | 1643 | cond_resched(); |
1635 | } | 1644 | } |
1636 | return ret; | 1645 | return ret; |
1637 | } | 1646 | } |
1638 | 1647 | ||
1639 | /* used by flush_commit_list */ | 1648 | /* used by flush_commit_list */ |
1640 | static int dirty_one_transaction(struct super_block *s, | 1649 | static int dirty_one_transaction(struct super_block *s, |
1641 | struct reiserfs_journal_list *jl) | 1650 | struct reiserfs_journal_list *jl) |
1642 | { | 1651 | { |
1643 | struct reiserfs_journal_cnode *cn; | 1652 | struct reiserfs_journal_cnode *cn; |
1644 | struct reiserfs_journal_list *pjl; | 1653 | struct reiserfs_journal_list *pjl; |
1645 | int ret = 0; | 1654 | int ret = 0; |
1646 | 1655 | ||
1647 | jl->j_state |= LIST_DIRTY; | 1656 | jl->j_state |= LIST_DIRTY; |
1648 | cn = jl->j_realblock; | 1657 | cn = jl->j_realblock; |
1649 | while (cn) { | 1658 | while (cn) { |
1650 | /* look for a more recent transaction that logged this | 1659 | /* look for a more recent transaction that logged this |
1651 | ** buffer. Only the most recent transaction with a buffer in | 1660 | ** buffer. Only the most recent transaction with a buffer in |
1652 | ** it is allowed to send that buffer to disk | 1661 | ** it is allowed to send that buffer to disk |
1653 | */ | 1662 | */ |
1654 | pjl = find_newer_jl_for_cn(cn); | 1663 | pjl = find_newer_jl_for_cn(cn); |
1655 | if (!pjl && cn->blocknr && cn->bh | 1664 | if (!pjl && cn->blocknr && cn->bh |
1656 | && buffer_journal_dirty(cn->bh)) { | 1665 | && buffer_journal_dirty(cn->bh)) { |
1657 | BUG_ON(!can_dirty(cn)); | 1666 | BUG_ON(!can_dirty(cn)); |
1658 | /* if the buffer is prepared, it will either be logged | 1667 | /* if the buffer is prepared, it will either be logged |
1659 | * or restored. If restored, we need to make sure | 1668 | * or restored. If restored, we need to make sure |
1660 | * it actually gets marked dirty | 1669 | * it actually gets marked dirty |
1661 | */ | 1670 | */ |
1662 | clear_buffer_journal_new(cn->bh); | 1671 | clear_buffer_journal_new(cn->bh); |
1663 | if (buffer_journal_prepared(cn->bh)) { | 1672 | if (buffer_journal_prepared(cn->bh)) { |
1664 | set_buffer_journal_restore_dirty(cn->bh); | 1673 | set_buffer_journal_restore_dirty(cn->bh); |
1665 | } else { | 1674 | } else { |
1666 | set_buffer_journal_test(cn->bh); | 1675 | set_buffer_journal_test(cn->bh); |
1667 | mark_buffer_dirty(cn->bh); | 1676 | mark_buffer_dirty(cn->bh); |
1668 | } | 1677 | } |
1669 | } | 1678 | } |
1670 | cn = cn->next; | 1679 | cn = cn->next; |
1671 | } | 1680 | } |
1672 | return ret; | 1681 | return ret; |
1673 | } | 1682 | } |
1674 | 1683 | ||
1675 | static int kupdate_transactions(struct super_block *s, | 1684 | static int kupdate_transactions(struct super_block *s, |
1676 | struct reiserfs_journal_list *jl, | 1685 | struct reiserfs_journal_list *jl, |
1677 | struct reiserfs_journal_list **next_jl, | 1686 | struct reiserfs_journal_list **next_jl, |
1678 | unsigned long *next_trans_id, | 1687 | unsigned long *next_trans_id, |
1679 | int num_blocks, int num_trans) | 1688 | int num_blocks, int num_trans) |
1680 | { | 1689 | { |
1681 | int ret = 0; | 1690 | int ret = 0; |
1682 | int written = 0; | 1691 | int written = 0; |
1683 | int transactions_flushed = 0; | 1692 | int transactions_flushed = 0; |
1684 | unsigned long orig_trans_id = jl->j_trans_id; | 1693 | unsigned long orig_trans_id = jl->j_trans_id; |
1685 | struct buffer_chunk chunk; | 1694 | struct buffer_chunk chunk; |
1686 | struct list_head *entry; | 1695 | struct list_head *entry; |
1687 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 1696 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
1688 | chunk.nr = 0; | 1697 | chunk.nr = 0; |
1689 | 1698 | ||
1690 | down(&journal->j_flush_sem); | 1699 | down(&journal->j_flush_sem); |
1691 | if (!journal_list_still_alive(s, orig_trans_id)) { | 1700 | if (!journal_list_still_alive(s, orig_trans_id)) { |
1692 | goto done; | 1701 | goto done; |
1693 | } | 1702 | } |
1694 | 1703 | ||
1695 | /* we've got j_flush_sem held, nobody is going to delete any | 1704 | /* we've got j_flush_sem held, nobody is going to delete any |
1696 | * of these lists out from underneath us | 1705 | * of these lists out from underneath us |
1697 | */ | 1706 | */ |
1698 | while ((num_trans && transactions_flushed < num_trans) || | 1707 | while ((num_trans && transactions_flushed < num_trans) || |
1699 | (!num_trans && written < num_blocks)) { | 1708 | (!num_trans && written < num_blocks)) { |
1700 | 1709 | ||
1701 | if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) || | 1710 | if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) || |
1702 | atomic_read(&jl->j_commit_left) | 1711 | atomic_read(&jl->j_commit_left) |
1703 | || !(jl->j_state & LIST_DIRTY)) { | 1712 | || !(jl->j_state & LIST_DIRTY)) { |
1704 | del_from_work_list(s, jl); | 1713 | del_from_work_list(s, jl); |
1705 | break; | 1714 | break; |
1706 | } | 1715 | } |
1707 | ret = write_one_transaction(s, jl, &chunk); | 1716 | ret = write_one_transaction(s, jl, &chunk); |
1708 | 1717 | ||
1709 | if (ret < 0) | 1718 | if (ret < 0) |
1710 | goto done; | 1719 | goto done; |
1711 | transactions_flushed++; | 1720 | transactions_flushed++; |
1712 | written += ret; | 1721 | written += ret; |
1713 | entry = jl->j_list.next; | 1722 | entry = jl->j_list.next; |
1714 | 1723 | ||
1715 | /* did we wrap? */ | 1724 | /* did we wrap? */ |
1716 | if (entry == &journal->j_journal_list) { | 1725 | if (entry == &journal->j_journal_list) { |
1717 | break; | 1726 | break; |
1718 | } | 1727 | } |
1719 | jl = JOURNAL_LIST_ENTRY(entry); | 1728 | jl = JOURNAL_LIST_ENTRY(entry); |
1720 | 1729 | ||
1721 | /* don't bother with older transactions */ | 1730 | /* don't bother with older transactions */ |
1722 | if (jl->j_trans_id <= orig_trans_id) | 1731 | if (jl->j_trans_id <= orig_trans_id) |
1723 | break; | 1732 | break; |
1724 | } | 1733 | } |
1725 | if (chunk.nr) { | 1734 | if (chunk.nr) { |
1726 | write_chunk(&chunk); | 1735 | write_chunk(&chunk); |
1727 | } | 1736 | } |
1728 | 1737 | ||
1729 | done: | 1738 | done: |
1730 | up(&journal->j_flush_sem); | 1739 | up(&journal->j_flush_sem); |
1731 | return ret; | 1740 | return ret; |
1732 | } | 1741 | } |
1733 | 1742 | ||
1734 | /* for o_sync and fsync heavy applications, they tend to use | 1743 | /* for o_sync and fsync heavy applications, they tend to use |
1735 | ** all the journa list slots with tiny transactions. These | 1744 | ** all the journa list slots with tiny transactions. These |
1736 | ** trigger lots and lots of calls to update the header block, which | 1745 | ** trigger lots and lots of calls to update the header block, which |
1737 | ** adds seeks and slows things down. | 1746 | ** adds seeks and slows things down. |
1738 | ** | 1747 | ** |
1739 | ** This function tries to clear out a large chunk of the journal lists | 1748 | ** This function tries to clear out a large chunk of the journal lists |
1740 | ** at once, which makes everything faster since only the newest journal | 1749 | ** at once, which makes everything faster since only the newest journal |
1741 | ** list updates the header block | 1750 | ** list updates the header block |
1742 | */ | 1751 | */ |
1743 | static int flush_used_journal_lists(struct super_block *s, | 1752 | static int flush_used_journal_lists(struct super_block *s, |
1744 | struct reiserfs_journal_list *jl) | 1753 | struct reiserfs_journal_list *jl) |
1745 | { | 1754 | { |
1746 | unsigned long len = 0; | 1755 | unsigned long len = 0; |
1747 | unsigned long cur_len; | 1756 | unsigned long cur_len; |
1748 | int ret; | 1757 | int ret; |
1749 | int i; | 1758 | int i; |
1750 | int limit = 256; | 1759 | int limit = 256; |
1751 | struct reiserfs_journal_list *tjl; | 1760 | struct reiserfs_journal_list *tjl; |
1752 | struct reiserfs_journal_list *flush_jl; | 1761 | struct reiserfs_journal_list *flush_jl; |
1753 | unsigned long trans_id; | 1762 | unsigned long trans_id; |
1754 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 1763 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
1755 | 1764 | ||
1756 | flush_jl = tjl = jl; | 1765 | flush_jl = tjl = jl; |
1757 | 1766 | ||
1758 | /* in data logging mode, try harder to flush a lot of blocks */ | 1767 | /* in data logging mode, try harder to flush a lot of blocks */ |
1759 | if (reiserfs_data_log(s)) | 1768 | if (reiserfs_data_log(s)) |
1760 | limit = 1024; | 1769 | limit = 1024; |
1761 | /* flush for 256 transactions or limit blocks, whichever comes first */ | 1770 | /* flush for 256 transactions or limit blocks, whichever comes first */ |
1762 | for (i = 0; i < 256 && len < limit; i++) { | 1771 | for (i = 0; i < 256 && len < limit; i++) { |
1763 | if (atomic_read(&tjl->j_commit_left) || | 1772 | if (atomic_read(&tjl->j_commit_left) || |
1764 | tjl->j_trans_id < jl->j_trans_id) { | 1773 | tjl->j_trans_id < jl->j_trans_id) { |
1765 | break; | 1774 | break; |
1766 | } | 1775 | } |
1767 | cur_len = atomic_read(&tjl->j_nonzerolen); | 1776 | cur_len = atomic_read(&tjl->j_nonzerolen); |
1768 | if (cur_len > 0) { | 1777 | if (cur_len > 0) { |
1769 | tjl->j_state &= ~LIST_TOUCHED; | 1778 | tjl->j_state &= ~LIST_TOUCHED; |
1770 | } | 1779 | } |
1771 | len += cur_len; | 1780 | len += cur_len; |
1772 | flush_jl = tjl; | 1781 | flush_jl = tjl; |
1773 | if (tjl->j_list.next == &journal->j_journal_list) | 1782 | if (tjl->j_list.next == &journal->j_journal_list) |
1774 | break; | 1783 | break; |
1775 | tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); | 1784 | tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); |
1776 | } | 1785 | } |
1777 | /* try to find a group of blocks we can flush across all the | 1786 | /* try to find a group of blocks we can flush across all the |
1778 | ** transactions, but only bother if we've actually spanned | 1787 | ** transactions, but only bother if we've actually spanned |
1779 | ** across multiple lists | 1788 | ** across multiple lists |
1780 | */ | 1789 | */ |
1781 | if (flush_jl != jl) { | 1790 | if (flush_jl != jl) { |
1782 | ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); | 1791 | ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); |
1783 | } | 1792 | } |
1784 | flush_journal_list(s, flush_jl, 1); | 1793 | flush_journal_list(s, flush_jl, 1); |
1785 | return 0; | 1794 | return 0; |
1786 | } | 1795 | } |
1787 | 1796 | ||
1788 | /* | 1797 | /* |
1789 | ** removes any nodes in table with name block and dev as bh. | 1798 | ** removes any nodes in table with name block and dev as bh. |
1790 | ** only touchs the hnext and hprev pointers. | 1799 | ** only touchs the hnext and hprev pointers. |
1791 | */ | 1800 | */ |
1792 | void remove_journal_hash(struct super_block *sb, | 1801 | void remove_journal_hash(struct super_block *sb, |
1793 | struct reiserfs_journal_cnode **table, | 1802 | struct reiserfs_journal_cnode **table, |
1794 | struct reiserfs_journal_list *jl, | 1803 | struct reiserfs_journal_list *jl, |
1795 | unsigned long block, int remove_freed) | 1804 | unsigned long block, int remove_freed) |
1796 | { | 1805 | { |
1797 | struct reiserfs_journal_cnode *cur; | 1806 | struct reiserfs_journal_cnode *cur; |
1798 | struct reiserfs_journal_cnode **head; | 1807 | struct reiserfs_journal_cnode **head; |
1799 | 1808 | ||
1800 | head = &(journal_hash(table, sb, block)); | 1809 | head = &(journal_hash(table, sb, block)); |
1801 | if (!head) { | 1810 | if (!head) { |
1802 | return; | 1811 | return; |
1803 | } | 1812 | } |
1804 | cur = *head; | 1813 | cur = *head; |
1805 | while (cur) { | 1814 | while (cur) { |
1806 | if (cur->blocknr == block && cur->sb == sb | 1815 | if (cur->blocknr == block && cur->sb == sb |
1807 | && (jl == NULL || jl == cur->jlist) | 1816 | && (jl == NULL || jl == cur->jlist) |
1808 | && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) { | 1817 | && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) { |
1809 | if (cur->hnext) { | 1818 | if (cur->hnext) { |
1810 | cur->hnext->hprev = cur->hprev; | 1819 | cur->hnext->hprev = cur->hprev; |
1811 | } | 1820 | } |
1812 | if (cur->hprev) { | 1821 | if (cur->hprev) { |
1813 | cur->hprev->hnext = cur->hnext; | 1822 | cur->hprev->hnext = cur->hnext; |
1814 | } else { | 1823 | } else { |
1815 | *head = cur->hnext; | 1824 | *head = cur->hnext; |
1816 | } | 1825 | } |
1817 | cur->blocknr = 0; | 1826 | cur->blocknr = 0; |
1818 | cur->sb = NULL; | 1827 | cur->sb = NULL; |
1819 | cur->state = 0; | 1828 | cur->state = 0; |
1820 | if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ | 1829 | if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ |
1821 | atomic_dec(&(cur->jlist->j_nonzerolen)); | 1830 | atomic_dec(&(cur->jlist->j_nonzerolen)); |
1822 | cur->bh = NULL; | 1831 | cur->bh = NULL; |
1823 | cur->jlist = NULL; | 1832 | cur->jlist = NULL; |
1824 | } | 1833 | } |
1825 | cur = cur->hnext; | 1834 | cur = cur->hnext; |
1826 | } | 1835 | } |
1827 | } | 1836 | } |
1828 | 1837 | ||
1829 | static void free_journal_ram(struct super_block *p_s_sb) | 1838 | static void free_journal_ram(struct super_block *p_s_sb) |
1830 | { | 1839 | { |
1831 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 1840 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
1832 | kfree(journal->j_current_jl); | 1841 | kfree(journal->j_current_jl); |
1833 | journal->j_num_lists--; | 1842 | journal->j_num_lists--; |
1834 | 1843 | ||
1835 | vfree(journal->j_cnode_free_orig); | 1844 | vfree(journal->j_cnode_free_orig); |
1836 | free_list_bitmaps(p_s_sb, journal->j_list_bitmap); | 1845 | free_list_bitmaps(p_s_sb, journal->j_list_bitmap); |
1837 | free_bitmap_nodes(p_s_sb); /* must be after free_list_bitmaps */ | 1846 | free_bitmap_nodes(p_s_sb); /* must be after free_list_bitmaps */ |
1838 | if (journal->j_header_bh) { | 1847 | if (journal->j_header_bh) { |
1839 | brelse(journal->j_header_bh); | 1848 | brelse(journal->j_header_bh); |
1840 | } | 1849 | } |
1841 | /* j_header_bh is on the journal dev, make sure not to release the journal | 1850 | /* j_header_bh is on the journal dev, make sure not to release the journal |
1842 | * dev until we brelse j_header_bh | 1851 | * dev until we brelse j_header_bh |
1843 | */ | 1852 | */ |
1844 | release_journal_dev(p_s_sb, journal); | 1853 | release_journal_dev(p_s_sb, journal); |
1845 | vfree(journal); | 1854 | vfree(journal); |
1846 | } | 1855 | } |
1847 | 1856 | ||
1848 | /* | 1857 | /* |
1849 | ** call on unmount. Only set error to 1 if you haven't made your way out | 1858 | ** call on unmount. Only set error to 1 if you haven't made your way out |
1850 | ** of read_super() yet. Any other caller must keep error at 0. | 1859 | ** of read_super() yet. Any other caller must keep error at 0. |
1851 | */ | 1860 | */ |
1852 | static int do_journal_release(struct reiserfs_transaction_handle *th, | 1861 | static int do_journal_release(struct reiserfs_transaction_handle *th, |
1853 | struct super_block *p_s_sb, int error) | 1862 | struct super_block *p_s_sb, int error) |
1854 | { | 1863 | { |
1855 | struct reiserfs_transaction_handle myth; | 1864 | struct reiserfs_transaction_handle myth; |
1856 | int flushed = 0; | 1865 | int flushed = 0; |
1857 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 1866 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
1858 | 1867 | ||
1859 | /* we only want to flush out transactions if we were called with error == 0 | 1868 | /* we only want to flush out transactions if we were called with error == 0 |
1860 | */ | 1869 | */ |
1861 | if (!error && !(p_s_sb->s_flags & MS_RDONLY)) { | 1870 | if (!error && !(p_s_sb->s_flags & MS_RDONLY)) { |
1862 | /* end the current trans */ | 1871 | /* end the current trans */ |
1863 | BUG_ON(!th->t_trans_id); | 1872 | BUG_ON(!th->t_trans_id); |
1864 | do_journal_end(th, p_s_sb, 10, FLUSH_ALL); | 1873 | do_journal_end(th, p_s_sb, 10, FLUSH_ALL); |
1865 | 1874 | ||
1866 | /* make sure something gets logged to force our way into the flush code */ | 1875 | /* make sure something gets logged to force our way into the flush code */ |
1867 | if (!journal_join(&myth, p_s_sb, 1)) { | 1876 | if (!journal_join(&myth, p_s_sb, 1)) { |
1868 | reiserfs_prepare_for_journal(p_s_sb, | 1877 | reiserfs_prepare_for_journal(p_s_sb, |
1869 | SB_BUFFER_WITH_SB(p_s_sb), | 1878 | SB_BUFFER_WITH_SB(p_s_sb), |
1870 | 1); | 1879 | 1); |
1871 | journal_mark_dirty(&myth, p_s_sb, | 1880 | journal_mark_dirty(&myth, p_s_sb, |
1872 | SB_BUFFER_WITH_SB(p_s_sb)); | 1881 | SB_BUFFER_WITH_SB(p_s_sb)); |
1873 | do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); | 1882 | do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); |
1874 | flushed = 1; | 1883 | flushed = 1; |
1875 | } | 1884 | } |
1876 | } | 1885 | } |
1877 | 1886 | ||
1878 | /* this also catches errors during the do_journal_end above */ | 1887 | /* this also catches errors during the do_journal_end above */ |
1879 | if (!error && reiserfs_is_journal_aborted(journal)) { | 1888 | if (!error && reiserfs_is_journal_aborted(journal)) { |
1880 | memset(&myth, 0, sizeof(myth)); | 1889 | memset(&myth, 0, sizeof(myth)); |
1881 | if (!journal_join_abort(&myth, p_s_sb, 1)) { | 1890 | if (!journal_join_abort(&myth, p_s_sb, 1)) { |
1882 | reiserfs_prepare_for_journal(p_s_sb, | 1891 | reiserfs_prepare_for_journal(p_s_sb, |
1883 | SB_BUFFER_WITH_SB(p_s_sb), | 1892 | SB_BUFFER_WITH_SB(p_s_sb), |
1884 | 1); | 1893 | 1); |
1885 | journal_mark_dirty(&myth, p_s_sb, | 1894 | journal_mark_dirty(&myth, p_s_sb, |
1886 | SB_BUFFER_WITH_SB(p_s_sb)); | 1895 | SB_BUFFER_WITH_SB(p_s_sb)); |
1887 | do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); | 1896 | do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); |
1888 | } | 1897 | } |
1889 | } | 1898 | } |
1890 | 1899 | ||
1891 | reiserfs_mounted_fs_count--; | 1900 | reiserfs_mounted_fs_count--; |
1892 | /* wait for all commits to finish */ | 1901 | /* wait for all commits to finish */ |
1893 | cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work); | 1902 | cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work); |
1894 | flush_workqueue(commit_wq); | 1903 | flush_workqueue(commit_wq); |
1895 | if (!reiserfs_mounted_fs_count) { | 1904 | if (!reiserfs_mounted_fs_count) { |
1896 | destroy_workqueue(commit_wq); | 1905 | destroy_workqueue(commit_wq); |
1897 | commit_wq = NULL; | 1906 | commit_wq = NULL; |
1898 | } | 1907 | } |
1899 | 1908 | ||
1900 | free_journal_ram(p_s_sb); | 1909 | free_journal_ram(p_s_sb); |
1901 | 1910 | ||
1902 | return 0; | 1911 | return 0; |
1903 | } | 1912 | } |
1904 | 1913 | ||
1905 | /* | 1914 | /* |
1906 | ** call on unmount. flush all journal trans, release all alloc'd ram | 1915 | ** call on unmount. flush all journal trans, release all alloc'd ram |
1907 | */ | 1916 | */ |
1908 | int journal_release(struct reiserfs_transaction_handle *th, | 1917 | int journal_release(struct reiserfs_transaction_handle *th, |
1909 | struct super_block *p_s_sb) | 1918 | struct super_block *p_s_sb) |
1910 | { | 1919 | { |
1911 | return do_journal_release(th, p_s_sb, 0); | 1920 | return do_journal_release(th, p_s_sb, 0); |
1912 | } | 1921 | } |
1913 | 1922 | ||
1914 | /* | 1923 | /* |
1915 | ** only call from an error condition inside reiserfs_read_super! | 1924 | ** only call from an error condition inside reiserfs_read_super! |
1916 | */ | 1925 | */ |
1917 | int journal_release_error(struct reiserfs_transaction_handle *th, | 1926 | int journal_release_error(struct reiserfs_transaction_handle *th, |
1918 | struct super_block *p_s_sb) | 1927 | struct super_block *p_s_sb) |
1919 | { | 1928 | { |
1920 | return do_journal_release(th, p_s_sb, 1); | 1929 | return do_journal_release(th, p_s_sb, 1); |
1921 | } | 1930 | } |
1922 | 1931 | ||
1923 | /* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ | 1932 | /* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ |
1924 | static int journal_compare_desc_commit(struct super_block *p_s_sb, | 1933 | static int journal_compare_desc_commit(struct super_block *p_s_sb, |
1925 | struct reiserfs_journal_desc *desc, | 1934 | struct reiserfs_journal_desc *desc, |
1926 | struct reiserfs_journal_commit *commit) | 1935 | struct reiserfs_journal_commit *commit) |
1927 | { | 1936 | { |
1928 | if (get_commit_trans_id(commit) != get_desc_trans_id(desc) || | 1937 | if (get_commit_trans_id(commit) != get_desc_trans_id(desc) || |
1929 | get_commit_trans_len(commit) != get_desc_trans_len(desc) || | 1938 | get_commit_trans_len(commit) != get_desc_trans_len(desc) || |
1930 | get_commit_trans_len(commit) > SB_JOURNAL(p_s_sb)->j_trans_max || | 1939 | get_commit_trans_len(commit) > SB_JOURNAL(p_s_sb)->j_trans_max || |
1931 | get_commit_trans_len(commit) <= 0) { | 1940 | get_commit_trans_len(commit) <= 0) { |
1932 | return 1; | 1941 | return 1; |
1933 | } | 1942 | } |
1934 | return 0; | 1943 | return 0; |
1935 | } | 1944 | } |
1936 | 1945 | ||
1937 | /* returns 0 if it did not find a description block | 1946 | /* returns 0 if it did not find a description block |
1938 | ** returns -1 if it found a corrupt commit block | 1947 | ** returns -1 if it found a corrupt commit block |
1939 | ** returns 1 if both desc and commit were valid | 1948 | ** returns 1 if both desc and commit were valid |
1940 | */ | 1949 | */ |
1941 | static int journal_transaction_is_valid(struct super_block *p_s_sb, | 1950 | static int journal_transaction_is_valid(struct super_block *p_s_sb, |
1942 | struct buffer_head *d_bh, | 1951 | struct buffer_head *d_bh, |
1943 | unsigned long *oldest_invalid_trans_id, | 1952 | unsigned long *oldest_invalid_trans_id, |
1944 | unsigned long *newest_mount_id) | 1953 | unsigned long *newest_mount_id) |
1945 | { | 1954 | { |
1946 | struct reiserfs_journal_desc *desc; | 1955 | struct reiserfs_journal_desc *desc; |
1947 | struct reiserfs_journal_commit *commit; | 1956 | struct reiserfs_journal_commit *commit; |
1948 | struct buffer_head *c_bh; | 1957 | struct buffer_head *c_bh; |
1949 | unsigned long offset; | 1958 | unsigned long offset; |
1950 | 1959 | ||
1951 | if (!d_bh) | 1960 | if (!d_bh) |
1952 | return 0; | 1961 | return 0; |
1953 | 1962 | ||
1954 | desc = (struct reiserfs_journal_desc *)d_bh->b_data; | 1963 | desc = (struct reiserfs_journal_desc *)d_bh->b_data; |
1955 | if (get_desc_trans_len(desc) > 0 | 1964 | if (get_desc_trans_len(desc) > 0 |
1956 | && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) { | 1965 | && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) { |
1957 | if (oldest_invalid_trans_id && *oldest_invalid_trans_id | 1966 | if (oldest_invalid_trans_id && *oldest_invalid_trans_id |
1958 | && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { | 1967 | && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { |
1959 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, | 1968 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, |
1960 | "journal-986: transaction " | 1969 | "journal-986: transaction " |
1961 | "is valid returning because trans_id %d is greater than " | 1970 | "is valid returning because trans_id %d is greater than " |
1962 | "oldest_invalid %lu", | 1971 | "oldest_invalid %lu", |
1963 | get_desc_trans_id(desc), | 1972 | get_desc_trans_id(desc), |
1964 | *oldest_invalid_trans_id); | 1973 | *oldest_invalid_trans_id); |
1965 | return 0; | 1974 | return 0; |
1966 | } | 1975 | } |
1967 | if (newest_mount_id | 1976 | if (newest_mount_id |
1968 | && *newest_mount_id > get_desc_mount_id(desc)) { | 1977 | && *newest_mount_id > get_desc_mount_id(desc)) { |
1969 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, | 1978 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, |
1970 | "journal-1087: transaction " | 1979 | "journal-1087: transaction " |
1971 | "is valid returning because mount_id %d is less than " | 1980 | "is valid returning because mount_id %d is less than " |
1972 | "newest_mount_id %lu", | 1981 | "newest_mount_id %lu", |
1973 | get_desc_mount_id(desc), | 1982 | get_desc_mount_id(desc), |
1974 | *newest_mount_id); | 1983 | *newest_mount_id); |
1975 | return -1; | 1984 | return -1; |
1976 | } | 1985 | } |
1977 | if (get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max) { | 1986 | if (get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max) { |
1978 | reiserfs_warning(p_s_sb, | 1987 | reiserfs_warning(p_s_sb, |
1979 | "journal-2018: Bad transaction length %d encountered, ignoring transaction", | 1988 | "journal-2018: Bad transaction length %d encountered, ignoring transaction", |
1980 | get_desc_trans_len(desc)); | 1989 | get_desc_trans_len(desc)); |
1981 | return -1; | 1990 | return -1; |
1982 | } | 1991 | } |
1983 | offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); | 1992 | offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); |
1984 | 1993 | ||
1985 | /* ok, we have a journal description block, lets see if the transaction was valid */ | 1994 | /* ok, we have a journal description block, lets see if the transaction was valid */ |
1986 | c_bh = | 1995 | c_bh = |
1987 | journal_bread(p_s_sb, | 1996 | journal_bread(p_s_sb, |
1988 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + | 1997 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + |
1989 | ((offset + get_desc_trans_len(desc) + | 1998 | ((offset + get_desc_trans_len(desc) + |
1990 | 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); | 1999 | 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); |
1991 | if (!c_bh) | 2000 | if (!c_bh) |
1992 | return 0; | 2001 | return 0; |
1993 | commit = (struct reiserfs_journal_commit *)c_bh->b_data; | 2002 | commit = (struct reiserfs_journal_commit *)c_bh->b_data; |
1994 | if (journal_compare_desc_commit(p_s_sb, desc, commit)) { | 2003 | if (journal_compare_desc_commit(p_s_sb, desc, commit)) { |
1995 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, | 2004 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, |
1996 | "journal_transaction_is_valid, commit offset %ld had bad " | 2005 | "journal_transaction_is_valid, commit offset %ld had bad " |
1997 | "time %d or length %d", | 2006 | "time %d or length %d", |
1998 | c_bh->b_blocknr - | 2007 | c_bh->b_blocknr - |
1999 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), | 2008 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), |
2000 | get_commit_trans_id(commit), | 2009 | get_commit_trans_id(commit), |
2001 | get_commit_trans_len(commit)); | 2010 | get_commit_trans_len(commit)); |
2002 | brelse(c_bh); | 2011 | brelse(c_bh); |
2003 | if (oldest_invalid_trans_id) { | 2012 | if (oldest_invalid_trans_id) { |
2004 | *oldest_invalid_trans_id = | 2013 | *oldest_invalid_trans_id = |
2005 | get_desc_trans_id(desc); | 2014 | get_desc_trans_id(desc); |
2006 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, | 2015 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, |
2007 | "journal-1004: " | 2016 | "journal-1004: " |
2008 | "transaction_is_valid setting oldest invalid trans_id " | 2017 | "transaction_is_valid setting oldest invalid trans_id " |
2009 | "to %d", | 2018 | "to %d", |
2010 | get_desc_trans_id(desc)); | 2019 | get_desc_trans_id(desc)); |
2011 | } | 2020 | } |
2012 | return -1; | 2021 | return -1; |
2013 | } | 2022 | } |
2014 | brelse(c_bh); | 2023 | brelse(c_bh); |
2015 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, | 2024 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, |
2016 | "journal-1006: found valid " | 2025 | "journal-1006: found valid " |
2017 | "transaction start offset %llu, len %d id %d", | 2026 | "transaction start offset %llu, len %d id %d", |
2018 | d_bh->b_blocknr - | 2027 | d_bh->b_blocknr - |
2019 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), | 2028 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), |
2020 | get_desc_trans_len(desc), | 2029 | get_desc_trans_len(desc), |
2021 | get_desc_trans_id(desc)); | 2030 | get_desc_trans_id(desc)); |
2022 | return 1; | 2031 | return 1; |
2023 | } else { | 2032 | } else { |
2024 | return 0; | 2033 | return 0; |
2025 | } | 2034 | } |
2026 | } | 2035 | } |
2027 | 2036 | ||
2028 | static void brelse_array(struct buffer_head **heads, int num) | 2037 | static void brelse_array(struct buffer_head **heads, int num) |
2029 | { | 2038 | { |
2030 | int i; | 2039 | int i; |
2031 | for (i = 0; i < num; i++) { | 2040 | for (i = 0; i < num; i++) { |
2032 | brelse(heads[i]); | 2041 | brelse(heads[i]); |
2033 | } | 2042 | } |
2034 | } | 2043 | } |
2035 | 2044 | ||
2036 | /* | 2045 | /* |
2037 | ** given the start, and values for the oldest acceptable transactions, | 2046 | ** given the start, and values for the oldest acceptable transactions, |
2038 | ** this either reads in a replays a transaction, or returns because the transaction | 2047 | ** this either reads in a replays a transaction, or returns because the transaction |
2039 | ** is invalid, or too old. | 2048 | ** is invalid, or too old. |
2040 | */ | 2049 | */ |
2041 | static int journal_read_transaction(struct super_block *p_s_sb, | 2050 | static int journal_read_transaction(struct super_block *p_s_sb, |
2042 | unsigned long cur_dblock, | 2051 | unsigned long cur_dblock, |
2043 | unsigned long oldest_start, | 2052 | unsigned long oldest_start, |
2044 | unsigned long oldest_trans_id, | 2053 | unsigned long oldest_trans_id, |
2045 | unsigned long newest_mount_id) | 2054 | unsigned long newest_mount_id) |
2046 | { | 2055 | { |
2047 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 2056 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
2048 | struct reiserfs_journal_desc *desc; | 2057 | struct reiserfs_journal_desc *desc; |
2049 | struct reiserfs_journal_commit *commit; | 2058 | struct reiserfs_journal_commit *commit; |
2050 | unsigned long trans_id = 0; | 2059 | unsigned long trans_id = 0; |
2051 | struct buffer_head *c_bh; | 2060 | struct buffer_head *c_bh; |
2052 | struct buffer_head *d_bh; | 2061 | struct buffer_head *d_bh; |
2053 | struct buffer_head **log_blocks = NULL; | 2062 | struct buffer_head **log_blocks = NULL; |
2054 | struct buffer_head **real_blocks = NULL; | 2063 | struct buffer_head **real_blocks = NULL; |
2055 | unsigned long trans_offset; | 2064 | unsigned long trans_offset; |
2056 | int i; | 2065 | int i; |
2057 | int trans_half; | 2066 | int trans_half; |
2058 | 2067 | ||
2059 | d_bh = journal_bread(p_s_sb, cur_dblock); | 2068 | d_bh = journal_bread(p_s_sb, cur_dblock); |
2060 | if (!d_bh) | 2069 | if (!d_bh) |
2061 | return 1; | 2070 | return 1; |
2062 | desc = (struct reiserfs_journal_desc *)d_bh->b_data; | 2071 | desc = (struct reiserfs_journal_desc *)d_bh->b_data; |
2063 | trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); | 2072 | trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); |
2064 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: " | 2073 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: " |
2065 | "journal_read_transaction, offset %llu, len %d mount_id %d", | 2074 | "journal_read_transaction, offset %llu, len %d mount_id %d", |
2066 | d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), | 2075 | d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), |
2067 | get_desc_trans_len(desc), get_desc_mount_id(desc)); | 2076 | get_desc_trans_len(desc), get_desc_mount_id(desc)); |
2068 | if (get_desc_trans_id(desc) < oldest_trans_id) { | 2077 | if (get_desc_trans_id(desc) < oldest_trans_id) { |
2069 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: " | 2078 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: " |
2070 | "journal_read_trans skipping because %lu is too old", | 2079 | "journal_read_trans skipping because %lu is too old", |
2071 | cur_dblock - | 2080 | cur_dblock - |
2072 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); | 2081 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); |
2073 | brelse(d_bh); | 2082 | brelse(d_bh); |
2074 | return 1; | 2083 | return 1; |
2075 | } | 2084 | } |
2076 | if (get_desc_mount_id(desc) != newest_mount_id) { | 2085 | if (get_desc_mount_id(desc) != newest_mount_id) { |
2077 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: " | 2086 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: " |
2078 | "journal_read_trans skipping because %d is != " | 2087 | "journal_read_trans skipping because %d is != " |
2079 | "newest_mount_id %lu", get_desc_mount_id(desc), | 2088 | "newest_mount_id %lu", get_desc_mount_id(desc), |
2080 | newest_mount_id); | 2089 | newest_mount_id); |
2081 | brelse(d_bh); | 2090 | brelse(d_bh); |
2082 | return 1; | 2091 | return 1; |
2083 | } | 2092 | } |
2084 | c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + | 2093 | c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + |
2085 | ((trans_offset + get_desc_trans_len(desc) + 1) % | 2094 | ((trans_offset + get_desc_trans_len(desc) + 1) % |
2086 | SB_ONDISK_JOURNAL_SIZE(p_s_sb))); | 2095 | SB_ONDISK_JOURNAL_SIZE(p_s_sb))); |
2087 | if (!c_bh) { | 2096 | if (!c_bh) { |
2088 | brelse(d_bh); | 2097 | brelse(d_bh); |
2089 | return 1; | 2098 | return 1; |
2090 | } | 2099 | } |
2091 | commit = (struct reiserfs_journal_commit *)c_bh->b_data; | 2100 | commit = (struct reiserfs_journal_commit *)c_bh->b_data; |
2092 | if (journal_compare_desc_commit(p_s_sb, desc, commit)) { | 2101 | if (journal_compare_desc_commit(p_s_sb, desc, commit)) { |
2093 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, | 2102 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, |
2094 | "journal_read_transaction, " | 2103 | "journal_read_transaction, " |
2095 | "commit offset %llu had bad time %d or length %d", | 2104 | "commit offset %llu had bad time %d or length %d", |
2096 | c_bh->b_blocknr - | 2105 | c_bh->b_blocknr - |
2097 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), | 2106 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), |
2098 | get_commit_trans_id(commit), | 2107 | get_commit_trans_id(commit), |
2099 | get_commit_trans_len(commit)); | 2108 | get_commit_trans_len(commit)); |
2100 | brelse(c_bh); | 2109 | brelse(c_bh); |
2101 | brelse(d_bh); | 2110 | brelse(d_bh); |
2102 | return 1; | 2111 | return 1; |
2103 | } | 2112 | } |
2104 | trans_id = get_desc_trans_id(desc); | 2113 | trans_id = get_desc_trans_id(desc); |
2105 | /* now we know we've got a good transaction, and it was inside the valid time ranges */ | 2114 | /* now we know we've got a good transaction, and it was inside the valid time ranges */ |
2106 | log_blocks = kmalloc(get_desc_trans_len(desc) * | 2115 | log_blocks = kmalloc(get_desc_trans_len(desc) * |
2107 | sizeof(struct buffer_head *), GFP_NOFS); | 2116 | sizeof(struct buffer_head *), GFP_NOFS); |
2108 | real_blocks = kmalloc(get_desc_trans_len(desc) * | 2117 | real_blocks = kmalloc(get_desc_trans_len(desc) * |
2109 | sizeof(struct buffer_head *), GFP_NOFS); | 2118 | sizeof(struct buffer_head *), GFP_NOFS); |
2110 | if (!log_blocks || !real_blocks) { | 2119 | if (!log_blocks || !real_blocks) { |
2111 | brelse(c_bh); | 2120 | brelse(c_bh); |
2112 | brelse(d_bh); | 2121 | brelse(d_bh); |
2113 | kfree(log_blocks); | 2122 | kfree(log_blocks); |
2114 | kfree(real_blocks); | 2123 | kfree(real_blocks); |
2115 | reiserfs_warning(p_s_sb, | 2124 | reiserfs_warning(p_s_sb, |
2116 | "journal-1169: kmalloc failed, unable to mount FS"); | 2125 | "journal-1169: kmalloc failed, unable to mount FS"); |
2117 | return -1; | 2126 | return -1; |
2118 | } | 2127 | } |
2119 | /* get all the buffer heads */ | 2128 | /* get all the buffer heads */ |
2120 | trans_half = journal_trans_half(p_s_sb->s_blocksize); | 2129 | trans_half = journal_trans_half(p_s_sb->s_blocksize); |
2121 | for (i = 0; i < get_desc_trans_len(desc); i++) { | 2130 | for (i = 0; i < get_desc_trans_len(desc); i++) { |
2122 | log_blocks[i] = | 2131 | log_blocks[i] = |
2123 | journal_getblk(p_s_sb, | 2132 | journal_getblk(p_s_sb, |
2124 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + | 2133 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + |
2125 | (trans_offset + 1 + | 2134 | (trans_offset + 1 + |
2126 | i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); | 2135 | i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); |
2127 | if (i < trans_half) { | 2136 | if (i < trans_half) { |
2128 | real_blocks[i] = | 2137 | real_blocks[i] = |
2129 | sb_getblk(p_s_sb, | 2138 | sb_getblk(p_s_sb, |
2130 | le32_to_cpu(desc->j_realblock[i])); | 2139 | le32_to_cpu(desc->j_realblock[i])); |
2131 | } else { | 2140 | } else { |
2132 | real_blocks[i] = | 2141 | real_blocks[i] = |
2133 | sb_getblk(p_s_sb, | 2142 | sb_getblk(p_s_sb, |
2134 | le32_to_cpu(commit-> | 2143 | le32_to_cpu(commit-> |
2135 | j_realblock[i - trans_half])); | 2144 | j_realblock[i - trans_half])); |
2136 | } | 2145 | } |
2137 | if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { | 2146 | if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { |
2138 | reiserfs_warning(p_s_sb, | 2147 | reiserfs_warning(p_s_sb, |
2139 | "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem"); | 2148 | "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem"); |
2140 | goto abort_replay; | 2149 | goto abort_replay; |
2141 | } | 2150 | } |
2142 | /* make sure we don't try to replay onto log or reserved area */ | 2151 | /* make sure we don't try to replay onto log or reserved area */ |
2143 | if (is_block_in_log_or_reserved_area | 2152 | if (is_block_in_log_or_reserved_area |
2144 | (p_s_sb, real_blocks[i]->b_blocknr)) { | 2153 | (p_s_sb, real_blocks[i]->b_blocknr)) { |
2145 | reiserfs_warning(p_s_sb, | 2154 | reiserfs_warning(p_s_sb, |
2146 | "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block"); | 2155 | "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block"); |
2147 | abort_replay: | 2156 | abort_replay: |
2148 | brelse_array(log_blocks, i); | 2157 | brelse_array(log_blocks, i); |
2149 | brelse_array(real_blocks, i); | 2158 | brelse_array(real_blocks, i); |
2150 | brelse(c_bh); | 2159 | brelse(c_bh); |
2151 | brelse(d_bh); | 2160 | brelse(d_bh); |
2152 | kfree(log_blocks); | 2161 | kfree(log_blocks); |
2153 | kfree(real_blocks); | 2162 | kfree(real_blocks); |
2154 | return -1; | 2163 | return -1; |
2155 | } | 2164 | } |
2156 | } | 2165 | } |
2157 | /* read in the log blocks, memcpy to the corresponding real block */ | 2166 | /* read in the log blocks, memcpy to the corresponding real block */ |
2158 | ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); | 2167 | ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); |
2159 | for (i = 0; i < get_desc_trans_len(desc); i++) { | 2168 | for (i = 0; i < get_desc_trans_len(desc); i++) { |
2160 | wait_on_buffer(log_blocks[i]); | 2169 | wait_on_buffer(log_blocks[i]); |
2161 | if (!buffer_uptodate(log_blocks[i])) { | 2170 | if (!buffer_uptodate(log_blocks[i])) { |
2162 | reiserfs_warning(p_s_sb, | 2171 | reiserfs_warning(p_s_sb, |
2163 | "journal-1212: REPLAY FAILURE fsck required! buffer write failed"); | 2172 | "journal-1212: REPLAY FAILURE fsck required! buffer write failed"); |
2164 | brelse_array(log_blocks + i, | 2173 | brelse_array(log_blocks + i, |
2165 | get_desc_trans_len(desc) - i); | 2174 | get_desc_trans_len(desc) - i); |
2166 | brelse_array(real_blocks, get_desc_trans_len(desc)); | 2175 | brelse_array(real_blocks, get_desc_trans_len(desc)); |
2167 | brelse(c_bh); | 2176 | brelse(c_bh); |
2168 | brelse(d_bh); | 2177 | brelse(d_bh); |
2169 | kfree(log_blocks); | 2178 | kfree(log_blocks); |
2170 | kfree(real_blocks); | 2179 | kfree(real_blocks); |
2171 | return -1; | 2180 | return -1; |
2172 | } | 2181 | } |
2173 | memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, | 2182 | memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, |
2174 | real_blocks[i]->b_size); | 2183 | real_blocks[i]->b_size); |
2175 | set_buffer_uptodate(real_blocks[i]); | 2184 | set_buffer_uptodate(real_blocks[i]); |
2176 | brelse(log_blocks[i]); | 2185 | brelse(log_blocks[i]); |
2177 | } | 2186 | } |
2178 | /* flush out the real blocks */ | 2187 | /* flush out the real blocks */ |
2179 | for (i = 0; i < get_desc_trans_len(desc); i++) { | 2188 | for (i = 0; i < get_desc_trans_len(desc); i++) { |
2180 | set_buffer_dirty(real_blocks[i]); | 2189 | set_buffer_dirty(real_blocks[i]); |
2181 | ll_rw_block(SWRITE, 1, real_blocks + i); | 2190 | ll_rw_block(SWRITE, 1, real_blocks + i); |
2182 | } | 2191 | } |
2183 | for (i = 0; i < get_desc_trans_len(desc); i++) { | 2192 | for (i = 0; i < get_desc_trans_len(desc); i++) { |
2184 | wait_on_buffer(real_blocks[i]); | 2193 | wait_on_buffer(real_blocks[i]); |
2185 | if (!buffer_uptodate(real_blocks[i])) { | 2194 | if (!buffer_uptodate(real_blocks[i])) { |
2186 | reiserfs_warning(p_s_sb, | 2195 | reiserfs_warning(p_s_sb, |
2187 | "journal-1226: REPLAY FAILURE, fsck required! buffer write failed"); | 2196 | "journal-1226: REPLAY FAILURE, fsck required! buffer write failed"); |
2188 | brelse_array(real_blocks + i, | 2197 | brelse_array(real_blocks + i, |
2189 | get_desc_trans_len(desc) - i); | 2198 | get_desc_trans_len(desc) - i); |
2190 | brelse(c_bh); | 2199 | brelse(c_bh); |
2191 | brelse(d_bh); | 2200 | brelse(d_bh); |
2192 | kfree(log_blocks); | 2201 | kfree(log_blocks); |
2193 | kfree(real_blocks); | 2202 | kfree(real_blocks); |
2194 | return -1; | 2203 | return -1; |
2195 | } | 2204 | } |
2196 | brelse(real_blocks[i]); | 2205 | brelse(real_blocks[i]); |
2197 | } | 2206 | } |
2198 | cur_dblock = | 2207 | cur_dblock = |
2199 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + | 2208 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + |
2200 | ((trans_offset + get_desc_trans_len(desc) + | 2209 | ((trans_offset + get_desc_trans_len(desc) + |
2201 | 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); | 2210 | 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); |
2202 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, | 2211 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, |
2203 | "journal-1095: setting journal " "start to offset %ld", | 2212 | "journal-1095: setting journal " "start to offset %ld", |
2204 | cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); | 2213 | cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); |
2205 | 2214 | ||
2206 | /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ | 2215 | /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ |
2207 | journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); | 2216 | journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); |
2208 | journal->j_last_flush_trans_id = trans_id; | 2217 | journal->j_last_flush_trans_id = trans_id; |
2209 | journal->j_trans_id = trans_id + 1; | 2218 | journal->j_trans_id = trans_id + 1; |
2210 | brelse(c_bh); | 2219 | brelse(c_bh); |
2211 | brelse(d_bh); | 2220 | brelse(d_bh); |
2212 | kfree(log_blocks); | 2221 | kfree(log_blocks); |
2213 | kfree(real_blocks); | 2222 | kfree(real_blocks); |
2214 | return 0; | 2223 | return 0; |
2215 | } | 2224 | } |
2216 | 2225 | ||
2217 | /* This function reads blocks starting from block and to max_block of bufsize | 2226 | /* This function reads blocks starting from block and to max_block of bufsize |
2218 | size (but no more than BUFNR blocks at a time). This proved to improve | 2227 | size (but no more than BUFNR blocks at a time). This proved to improve |
2219 | mounting speed on self-rebuilding raid5 arrays at least. | 2228 | mounting speed on self-rebuilding raid5 arrays at least. |
2220 | Right now it is only used from journal code. But later we might use it | 2229 | Right now it is only used from journal code. But later we might use it |
2221 | from other places. | 2230 | from other places. |
2222 | Note: Do not use journal_getblk/sb_getblk functions here! */ | 2231 | Note: Do not use journal_getblk/sb_getblk functions here! */ |
2223 | static struct buffer_head *reiserfs_breada(struct block_device *dev, int block, | 2232 | static struct buffer_head *reiserfs_breada(struct block_device *dev, int block, |
2224 | int bufsize, unsigned int max_block) | 2233 | int bufsize, unsigned int max_block) |
2225 | { | 2234 | { |
2226 | struct buffer_head *bhlist[BUFNR]; | 2235 | struct buffer_head *bhlist[BUFNR]; |
2227 | unsigned int blocks = BUFNR; | 2236 | unsigned int blocks = BUFNR; |
2228 | struct buffer_head *bh; | 2237 | struct buffer_head *bh; |
2229 | int i, j; | 2238 | int i, j; |
2230 | 2239 | ||
2231 | bh = __getblk(dev, block, bufsize); | 2240 | bh = __getblk(dev, block, bufsize); |
2232 | if (buffer_uptodate(bh)) | 2241 | if (buffer_uptodate(bh)) |
2233 | return (bh); | 2242 | return (bh); |
2234 | 2243 | ||
2235 | if (block + BUFNR > max_block) { | 2244 | if (block + BUFNR > max_block) { |
2236 | blocks = max_block - block; | 2245 | blocks = max_block - block; |
2237 | } | 2246 | } |
2238 | bhlist[0] = bh; | 2247 | bhlist[0] = bh; |
2239 | j = 1; | 2248 | j = 1; |
2240 | for (i = 1; i < blocks; i++) { | 2249 | for (i = 1; i < blocks; i++) { |
2241 | bh = __getblk(dev, block + i, bufsize); | 2250 | bh = __getblk(dev, block + i, bufsize); |
2242 | if (buffer_uptodate(bh)) { | 2251 | if (buffer_uptodate(bh)) { |
2243 | brelse(bh); | 2252 | brelse(bh); |
2244 | break; | 2253 | break; |
2245 | } else | 2254 | } else |
2246 | bhlist[j++] = bh; | 2255 | bhlist[j++] = bh; |
2247 | } | 2256 | } |
2248 | ll_rw_block(READ, j, bhlist); | 2257 | ll_rw_block(READ, j, bhlist); |
2249 | for (i = 1; i < j; i++) | 2258 | for (i = 1; i < j; i++) |
2250 | brelse(bhlist[i]); | 2259 | brelse(bhlist[i]); |
2251 | bh = bhlist[0]; | 2260 | bh = bhlist[0]; |
2252 | wait_on_buffer(bh); | 2261 | wait_on_buffer(bh); |
2253 | if (buffer_uptodate(bh)) | 2262 | if (buffer_uptodate(bh)) |
2254 | return bh; | 2263 | return bh; |
2255 | brelse(bh); | 2264 | brelse(bh); |
2256 | return NULL; | 2265 | return NULL; |
2257 | } | 2266 | } |
2258 | 2267 | ||
2259 | /* | 2268 | /* |
2260 | ** read and replay the log | 2269 | ** read and replay the log |
2261 | ** on a clean unmount, the journal header's next unflushed pointer will be to an invalid | 2270 | ** on a clean unmount, the journal header's next unflushed pointer will be to an invalid |
2262 | ** transaction. This tests that before finding all the transactions in the log, which makes normal mount times fast. | 2271 | ** transaction. This tests that before finding all the transactions in the log, which makes normal mount times fast. |
2263 | ** | 2272 | ** |
2264 | ** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid. | 2273 | ** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid. |
2265 | ** | 2274 | ** |
2266 | ** On exit, it sets things up so the first transaction will work correctly. | 2275 | ** On exit, it sets things up so the first transaction will work correctly. |
2267 | */ | 2276 | */ |
2268 | static int journal_read(struct super_block *p_s_sb) | 2277 | static int journal_read(struct super_block *p_s_sb) |
2269 | { | 2278 | { |
2270 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 2279 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
2271 | struct reiserfs_journal_desc *desc; | 2280 | struct reiserfs_journal_desc *desc; |
2272 | unsigned long oldest_trans_id = 0; | 2281 | unsigned long oldest_trans_id = 0; |
2273 | unsigned long oldest_invalid_trans_id = 0; | 2282 | unsigned long oldest_invalid_trans_id = 0; |
2274 | time_t start; | 2283 | time_t start; |
2275 | unsigned long oldest_start = 0; | 2284 | unsigned long oldest_start = 0; |
2276 | unsigned long cur_dblock = 0; | 2285 | unsigned long cur_dblock = 0; |
2277 | unsigned long newest_mount_id = 9; | 2286 | unsigned long newest_mount_id = 9; |
2278 | struct buffer_head *d_bh; | 2287 | struct buffer_head *d_bh; |
2279 | struct reiserfs_journal_header *jh; | 2288 | struct reiserfs_journal_header *jh; |
2280 | int valid_journal_header = 0; | 2289 | int valid_journal_header = 0; |
2281 | int replay_count = 0; | 2290 | int replay_count = 0; |
2282 | int continue_replay = 1; | 2291 | int continue_replay = 1; |
2283 | int ret; | 2292 | int ret; |
2284 | char b[BDEVNAME_SIZE]; | 2293 | char b[BDEVNAME_SIZE]; |
2285 | 2294 | ||
2286 | cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); | 2295 | cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); |
2287 | reiserfs_info(p_s_sb, "checking transaction log (%s)\n", | 2296 | reiserfs_info(p_s_sb, "checking transaction log (%s)\n", |
2288 | bdevname(journal->j_dev_bd, b)); | 2297 | bdevname(journal->j_dev_bd, b)); |
2289 | start = get_seconds(); | 2298 | start = get_seconds(); |
2290 | 2299 | ||
2291 | /* step 1, read in the journal header block. Check the transaction it says | 2300 | /* step 1, read in the journal header block. Check the transaction it says |
2292 | ** is the first unflushed, and if that transaction is not valid, | 2301 | ** is the first unflushed, and if that transaction is not valid, |
2293 | ** replay is done | 2302 | ** replay is done |
2294 | */ | 2303 | */ |
2295 | journal->j_header_bh = journal_bread(p_s_sb, | 2304 | journal->j_header_bh = journal_bread(p_s_sb, |
2296 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) | 2305 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) |
2297 | + SB_ONDISK_JOURNAL_SIZE(p_s_sb)); | 2306 | + SB_ONDISK_JOURNAL_SIZE(p_s_sb)); |
2298 | if (!journal->j_header_bh) { | 2307 | if (!journal->j_header_bh) { |
2299 | return 1; | 2308 | return 1; |
2300 | } | 2309 | } |
2301 | jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data); | 2310 | jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data); |
2302 | if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 && | 2311 | if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 && |
2303 | le32_to_cpu(jh->j_first_unflushed_offset) < | 2312 | le32_to_cpu(jh->j_first_unflushed_offset) < |
2304 | SB_ONDISK_JOURNAL_SIZE(p_s_sb) | 2313 | SB_ONDISK_JOURNAL_SIZE(p_s_sb) |
2305 | && le32_to_cpu(jh->j_last_flush_trans_id) > 0) { | 2314 | && le32_to_cpu(jh->j_last_flush_trans_id) > 0) { |
2306 | oldest_start = | 2315 | oldest_start = |
2307 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + | 2316 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + |
2308 | le32_to_cpu(jh->j_first_unflushed_offset); | 2317 | le32_to_cpu(jh->j_first_unflushed_offset); |
2309 | oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; | 2318 | oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; |
2310 | newest_mount_id = le32_to_cpu(jh->j_mount_id); | 2319 | newest_mount_id = le32_to_cpu(jh->j_mount_id); |
2311 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, | 2320 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, |
2312 | "journal-1153: found in " | 2321 | "journal-1153: found in " |
2313 | "header: first_unflushed_offset %d, last_flushed_trans_id " | 2322 | "header: first_unflushed_offset %d, last_flushed_trans_id " |
2314 | "%lu", le32_to_cpu(jh->j_first_unflushed_offset), | 2323 | "%lu", le32_to_cpu(jh->j_first_unflushed_offset), |
2315 | le32_to_cpu(jh->j_last_flush_trans_id)); | 2324 | le32_to_cpu(jh->j_last_flush_trans_id)); |
2316 | valid_journal_header = 1; | 2325 | valid_journal_header = 1; |
2317 | 2326 | ||
2318 | /* now, we try to read the first unflushed offset. If it is not valid, | 2327 | /* now, we try to read the first unflushed offset. If it is not valid, |
2319 | ** there is nothing more we can do, and it makes no sense to read | 2328 | ** there is nothing more we can do, and it makes no sense to read |
2320 | ** through the whole log. | 2329 | ** through the whole log. |
2321 | */ | 2330 | */ |
2322 | d_bh = | 2331 | d_bh = |
2323 | journal_bread(p_s_sb, | 2332 | journal_bread(p_s_sb, |
2324 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + | 2333 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + |
2325 | le32_to_cpu(jh->j_first_unflushed_offset)); | 2334 | le32_to_cpu(jh->j_first_unflushed_offset)); |
2326 | ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL); | 2335 | ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL); |
2327 | if (!ret) { | 2336 | if (!ret) { |
2328 | continue_replay = 0; | 2337 | continue_replay = 0; |
2329 | } | 2338 | } |
2330 | brelse(d_bh); | 2339 | brelse(d_bh); |
2331 | goto start_log_replay; | 2340 | goto start_log_replay; |
2332 | } | 2341 | } |
2333 | 2342 | ||
2334 | if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) { | 2343 | if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) { |
2335 | reiserfs_warning(p_s_sb, | 2344 | reiserfs_warning(p_s_sb, |
2336 | "clm-2076: device is readonly, unable to replay log"); | 2345 | "clm-2076: device is readonly, unable to replay log"); |
2337 | return -1; | 2346 | return -1; |
2338 | } | 2347 | } |
2339 | 2348 | ||
2340 | /* ok, there are transactions that need to be replayed. start with the first log block, find | 2349 | /* ok, there are transactions that need to be replayed. start with the first log block, find |
2341 | ** all the valid transactions, and pick out the oldest. | 2350 | ** all the valid transactions, and pick out the oldest. |
2342 | */ | 2351 | */ |
2343 | while (continue_replay | 2352 | while (continue_replay |
2344 | && cur_dblock < | 2353 | && cur_dblock < |
2345 | (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + | 2354 | (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + |
2346 | SB_ONDISK_JOURNAL_SIZE(p_s_sb))) { | 2355 | SB_ONDISK_JOURNAL_SIZE(p_s_sb))) { |
2347 | /* Note that it is required for blocksize of primary fs device and journal | 2356 | /* Note that it is required for blocksize of primary fs device and journal |
2348 | device to be the same */ | 2357 | device to be the same */ |
2349 | d_bh = | 2358 | d_bh = |
2350 | reiserfs_breada(journal->j_dev_bd, cur_dblock, | 2359 | reiserfs_breada(journal->j_dev_bd, cur_dblock, |
2351 | p_s_sb->s_blocksize, | 2360 | p_s_sb->s_blocksize, |
2352 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + | 2361 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + |
2353 | SB_ONDISK_JOURNAL_SIZE(p_s_sb)); | 2362 | SB_ONDISK_JOURNAL_SIZE(p_s_sb)); |
2354 | ret = | 2363 | ret = |
2355 | journal_transaction_is_valid(p_s_sb, d_bh, | 2364 | journal_transaction_is_valid(p_s_sb, d_bh, |
2356 | &oldest_invalid_trans_id, | 2365 | &oldest_invalid_trans_id, |
2357 | &newest_mount_id); | 2366 | &newest_mount_id); |
2358 | if (ret == 1) { | 2367 | if (ret == 1) { |
2359 | desc = (struct reiserfs_journal_desc *)d_bh->b_data; | 2368 | desc = (struct reiserfs_journal_desc *)d_bh->b_data; |
2360 | if (oldest_start == 0) { /* init all oldest_ values */ | 2369 | if (oldest_start == 0) { /* init all oldest_ values */ |
2361 | oldest_trans_id = get_desc_trans_id(desc); | 2370 | oldest_trans_id = get_desc_trans_id(desc); |
2362 | oldest_start = d_bh->b_blocknr; | 2371 | oldest_start = d_bh->b_blocknr; |
2363 | newest_mount_id = get_desc_mount_id(desc); | 2372 | newest_mount_id = get_desc_mount_id(desc); |
2364 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, | 2373 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, |
2365 | "journal-1179: Setting " | 2374 | "journal-1179: Setting " |
2366 | "oldest_start to offset %llu, trans_id %lu", | 2375 | "oldest_start to offset %llu, trans_id %lu", |
2367 | oldest_start - | 2376 | oldest_start - |
2368 | SB_ONDISK_JOURNAL_1st_BLOCK | 2377 | SB_ONDISK_JOURNAL_1st_BLOCK |
2369 | (p_s_sb), oldest_trans_id); | 2378 | (p_s_sb), oldest_trans_id); |
2370 | } else if (oldest_trans_id > get_desc_trans_id(desc)) { | 2379 | } else if (oldest_trans_id > get_desc_trans_id(desc)) { |
2371 | /* one we just read was older */ | 2380 | /* one we just read was older */ |
2372 | oldest_trans_id = get_desc_trans_id(desc); | 2381 | oldest_trans_id = get_desc_trans_id(desc); |
2373 | oldest_start = d_bh->b_blocknr; | 2382 | oldest_start = d_bh->b_blocknr; |
2374 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, | 2383 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, |
2375 | "journal-1180: Resetting " | 2384 | "journal-1180: Resetting " |
2376 | "oldest_start to offset %lu, trans_id %lu", | 2385 | "oldest_start to offset %lu, trans_id %lu", |
2377 | oldest_start - | 2386 | oldest_start - |
2378 | SB_ONDISK_JOURNAL_1st_BLOCK | 2387 | SB_ONDISK_JOURNAL_1st_BLOCK |
2379 | (p_s_sb), oldest_trans_id); | 2388 | (p_s_sb), oldest_trans_id); |
2380 | } | 2389 | } |
2381 | if (newest_mount_id < get_desc_mount_id(desc)) { | 2390 | if (newest_mount_id < get_desc_mount_id(desc)) { |
2382 | newest_mount_id = get_desc_mount_id(desc); | 2391 | newest_mount_id = get_desc_mount_id(desc); |
2383 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, | 2392 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, |
2384 | "journal-1299: Setting " | 2393 | "journal-1299: Setting " |
2385 | "newest_mount_id to %d", | 2394 | "newest_mount_id to %d", |
2386 | get_desc_mount_id(desc)); | 2395 | get_desc_mount_id(desc)); |
2387 | } | 2396 | } |
2388 | cur_dblock += get_desc_trans_len(desc) + 2; | 2397 | cur_dblock += get_desc_trans_len(desc) + 2; |
2389 | } else { | 2398 | } else { |
2390 | cur_dblock++; | 2399 | cur_dblock++; |
2391 | } | 2400 | } |
2392 | brelse(d_bh); | 2401 | brelse(d_bh); |
2393 | } | 2402 | } |
2394 | 2403 | ||
2395 | start_log_replay: | 2404 | start_log_replay: |
2396 | cur_dblock = oldest_start; | 2405 | cur_dblock = oldest_start; |
2397 | if (oldest_trans_id) { | 2406 | if (oldest_trans_id) { |
2398 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, | 2407 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, |
2399 | "journal-1206: Starting replay " | 2408 | "journal-1206: Starting replay " |
2400 | "from offset %llu, trans_id %lu", | 2409 | "from offset %llu, trans_id %lu", |
2401 | cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), | 2410 | cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), |
2402 | oldest_trans_id); | 2411 | oldest_trans_id); |
2403 | 2412 | ||
2404 | } | 2413 | } |
2405 | replay_count = 0; | 2414 | replay_count = 0; |
2406 | while (continue_replay && oldest_trans_id > 0) { | 2415 | while (continue_replay && oldest_trans_id > 0) { |
2407 | ret = | 2416 | ret = |
2408 | journal_read_transaction(p_s_sb, cur_dblock, oldest_start, | 2417 | journal_read_transaction(p_s_sb, cur_dblock, oldest_start, |
2409 | oldest_trans_id, newest_mount_id); | 2418 | oldest_trans_id, newest_mount_id); |
2410 | if (ret < 0) { | 2419 | if (ret < 0) { |
2411 | return ret; | 2420 | return ret; |
2412 | } else if (ret != 0) { | 2421 | } else if (ret != 0) { |
2413 | break; | 2422 | break; |
2414 | } | 2423 | } |
2415 | cur_dblock = | 2424 | cur_dblock = |
2416 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start; | 2425 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start; |
2417 | replay_count++; | 2426 | replay_count++; |
2418 | if (cur_dblock == oldest_start) | 2427 | if (cur_dblock == oldest_start) |
2419 | break; | 2428 | break; |
2420 | } | 2429 | } |
2421 | 2430 | ||
2422 | if (oldest_trans_id == 0) { | 2431 | if (oldest_trans_id == 0) { |
2423 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, | 2432 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, |
2424 | "journal-1225: No valid " "transactions found"); | 2433 | "journal-1225: No valid " "transactions found"); |
2425 | } | 2434 | } |
2426 | /* j_start does not get set correctly if we don't replay any transactions. | 2435 | /* j_start does not get set correctly if we don't replay any transactions. |
2427 | ** if we had a valid journal_header, set j_start to the first unflushed transaction value, | 2436 | ** if we had a valid journal_header, set j_start to the first unflushed transaction value, |
2428 | ** copy the trans_id from the header | 2437 | ** copy the trans_id from the header |
2429 | */ | 2438 | */ |
2430 | if (valid_journal_header && replay_count == 0) { | 2439 | if (valid_journal_header && replay_count == 0) { |
2431 | journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); | 2440 | journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); |
2432 | journal->j_trans_id = | 2441 | journal->j_trans_id = |
2433 | le32_to_cpu(jh->j_last_flush_trans_id) + 1; | 2442 | le32_to_cpu(jh->j_last_flush_trans_id) + 1; |
2434 | journal->j_last_flush_trans_id = | 2443 | journal->j_last_flush_trans_id = |
2435 | le32_to_cpu(jh->j_last_flush_trans_id); | 2444 | le32_to_cpu(jh->j_last_flush_trans_id); |
2436 | journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1; | 2445 | journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1; |
2437 | } else { | 2446 | } else { |
2438 | journal->j_mount_id = newest_mount_id + 1; | 2447 | journal->j_mount_id = newest_mount_id + 1; |
2439 | } | 2448 | } |
2440 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " | 2449 | reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " |
2441 | "newest_mount_id to %lu", journal->j_mount_id); | 2450 | "newest_mount_id to %lu", journal->j_mount_id); |
2442 | journal->j_first_unflushed_offset = journal->j_start; | 2451 | journal->j_first_unflushed_offset = journal->j_start; |
2443 | if (replay_count > 0) { | 2452 | if (replay_count > 0) { |
2444 | reiserfs_info(p_s_sb, | 2453 | reiserfs_info(p_s_sb, |
2445 | "replayed %d transactions in %lu seconds\n", | 2454 | "replayed %d transactions in %lu seconds\n", |
2446 | replay_count, get_seconds() - start); | 2455 | replay_count, get_seconds() - start); |
2447 | } | 2456 | } |
2448 | if (!bdev_read_only(p_s_sb->s_bdev) && | 2457 | if (!bdev_read_only(p_s_sb->s_bdev) && |
2449 | _update_journal_header_block(p_s_sb, journal->j_start, | 2458 | _update_journal_header_block(p_s_sb, journal->j_start, |
2450 | journal->j_last_flush_trans_id)) { | 2459 | journal->j_last_flush_trans_id)) { |
2451 | /* replay failed, caller must call free_journal_ram and abort | 2460 | /* replay failed, caller must call free_journal_ram and abort |
2452 | ** the mount | 2461 | ** the mount |
2453 | */ | 2462 | */ |
2454 | return -1; | 2463 | return -1; |
2455 | } | 2464 | } |
2456 | return 0; | 2465 | return 0; |
2457 | } | 2466 | } |
2458 | 2467 | ||
2459 | static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) | 2468 | static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) |
2460 | { | 2469 | { |
2461 | struct reiserfs_journal_list *jl; | 2470 | struct reiserfs_journal_list *jl; |
2462 | jl = kzalloc(sizeof(struct reiserfs_journal_list), | 2471 | jl = kzalloc(sizeof(struct reiserfs_journal_list), |
2463 | GFP_NOFS | __GFP_NOFAIL); | 2472 | GFP_NOFS | __GFP_NOFAIL); |
2464 | INIT_LIST_HEAD(&jl->j_list); | 2473 | INIT_LIST_HEAD(&jl->j_list); |
2465 | INIT_LIST_HEAD(&jl->j_working_list); | 2474 | INIT_LIST_HEAD(&jl->j_working_list); |
2466 | INIT_LIST_HEAD(&jl->j_tail_bh_list); | 2475 | INIT_LIST_HEAD(&jl->j_tail_bh_list); |
2467 | INIT_LIST_HEAD(&jl->j_bh_list); | 2476 | INIT_LIST_HEAD(&jl->j_bh_list); |
2468 | sema_init(&jl->j_commit_lock, 1); | 2477 | sema_init(&jl->j_commit_lock, 1); |
2469 | SB_JOURNAL(s)->j_num_lists++; | 2478 | SB_JOURNAL(s)->j_num_lists++; |
2470 | get_journal_list(jl); | 2479 | get_journal_list(jl); |
2471 | return jl; | 2480 | return jl; |
2472 | } | 2481 | } |
2473 | 2482 | ||
2474 | static void journal_list_init(struct super_block *p_s_sb) | 2483 | static void journal_list_init(struct super_block *p_s_sb) |
2475 | { | 2484 | { |
2476 | SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb); | 2485 | SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb); |
2477 | } | 2486 | } |
2478 | 2487 | ||
2479 | static int release_journal_dev(struct super_block *super, | 2488 | static int release_journal_dev(struct super_block *super, |
2480 | struct reiserfs_journal *journal) | 2489 | struct reiserfs_journal *journal) |
2481 | { | 2490 | { |
2482 | int result; | 2491 | int result; |
2483 | 2492 | ||
2484 | result = 0; | 2493 | result = 0; |
2485 | 2494 | ||
2486 | if (journal->j_dev_file != NULL) { | 2495 | if (journal->j_dev_file != NULL) { |
2487 | result = filp_close(journal->j_dev_file, NULL); | 2496 | result = filp_close(journal->j_dev_file, NULL); |
2488 | journal->j_dev_file = NULL; | 2497 | journal->j_dev_file = NULL; |
2489 | journal->j_dev_bd = NULL; | 2498 | journal->j_dev_bd = NULL; |
2490 | } else if (journal->j_dev_bd != NULL) { | 2499 | } else if (journal->j_dev_bd != NULL) { |
2491 | result = blkdev_put(journal->j_dev_bd); | 2500 | result = blkdev_put(journal->j_dev_bd); |
2492 | journal->j_dev_bd = NULL; | 2501 | journal->j_dev_bd = NULL; |
2493 | } | 2502 | } |
2494 | 2503 | ||
2495 | if (result != 0) { | 2504 | if (result != 0) { |
2496 | reiserfs_warning(super, | 2505 | reiserfs_warning(super, |
2497 | "sh-457: release_journal_dev: Cannot release journal device: %i", | 2506 | "sh-457: release_journal_dev: Cannot release journal device: %i", |
2498 | result); | 2507 | result); |
2499 | } | 2508 | } |
2500 | return result; | 2509 | return result; |
2501 | } | 2510 | } |
2502 | 2511 | ||
2503 | static int journal_init_dev(struct super_block *super, | 2512 | static int journal_init_dev(struct super_block *super, |
2504 | struct reiserfs_journal *journal, | 2513 | struct reiserfs_journal *journal, |
2505 | const char *jdev_name) | 2514 | const char *jdev_name) |
2506 | { | 2515 | { |
2507 | int result; | 2516 | int result; |
2508 | dev_t jdev; | 2517 | dev_t jdev; |
2509 | int blkdev_mode = FMODE_READ | FMODE_WRITE; | 2518 | int blkdev_mode = FMODE_READ | FMODE_WRITE; |
2510 | char b[BDEVNAME_SIZE]; | 2519 | char b[BDEVNAME_SIZE]; |
2511 | 2520 | ||
2512 | result = 0; | 2521 | result = 0; |
2513 | 2522 | ||
2514 | journal->j_dev_bd = NULL; | 2523 | journal->j_dev_bd = NULL; |
2515 | journal->j_dev_file = NULL; | 2524 | journal->j_dev_file = NULL; |
2516 | jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? | 2525 | jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? |
2517 | new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; | 2526 | new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; |
2518 | 2527 | ||
2519 | if (bdev_read_only(super->s_bdev)) | 2528 | if (bdev_read_only(super->s_bdev)) |
2520 | blkdev_mode = FMODE_READ; | 2529 | blkdev_mode = FMODE_READ; |
2521 | 2530 | ||
2522 | /* there is no "jdev" option and journal is on separate device */ | 2531 | /* there is no "jdev" option and journal is on separate device */ |
2523 | if ((!jdev_name || !jdev_name[0])) { | 2532 | if ((!jdev_name || !jdev_name[0])) { |
2524 | journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); | 2533 | journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); |
2525 | if (IS_ERR(journal->j_dev_bd)) { | 2534 | if (IS_ERR(journal->j_dev_bd)) { |
2526 | result = PTR_ERR(journal->j_dev_bd); | 2535 | result = PTR_ERR(journal->j_dev_bd); |
2527 | journal->j_dev_bd = NULL; | 2536 | journal->j_dev_bd = NULL; |
2528 | reiserfs_warning(super, "sh-458: journal_init_dev: " | 2537 | reiserfs_warning(super, "sh-458: journal_init_dev: " |
2529 | "cannot init journal device '%s': %i", | 2538 | "cannot init journal device '%s': %i", |
2530 | __bdevname(jdev, b), result); | 2539 | __bdevname(jdev, b), result); |
2531 | return result; | 2540 | return result; |
2532 | } else if (jdev != super->s_dev) | 2541 | } else if (jdev != super->s_dev) |
2533 | set_blocksize(journal->j_dev_bd, super->s_blocksize); | 2542 | set_blocksize(journal->j_dev_bd, super->s_blocksize); |
2534 | return 0; | 2543 | return 0; |
2535 | } | 2544 | } |
2536 | 2545 | ||
2537 | journal->j_dev_file = filp_open(jdev_name, 0, 0); | 2546 | journal->j_dev_file = filp_open(jdev_name, 0, 0); |
2538 | if (!IS_ERR(journal->j_dev_file)) { | 2547 | if (!IS_ERR(journal->j_dev_file)) { |
2539 | struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; | 2548 | struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; |
2540 | if (!S_ISBLK(jdev_inode->i_mode)) { | 2549 | if (!S_ISBLK(jdev_inode->i_mode)) { |
2541 | reiserfs_warning(super, "journal_init_dev: '%s' is " | 2550 | reiserfs_warning(super, "journal_init_dev: '%s' is " |
2542 | "not a block device", jdev_name); | 2551 | "not a block device", jdev_name); |
2543 | result = -ENOTBLK; | 2552 | result = -ENOTBLK; |
2544 | release_journal_dev(super, journal); | 2553 | release_journal_dev(super, journal); |
2545 | } else { | 2554 | } else { |
2546 | /* ok */ | 2555 | /* ok */ |
2547 | journal->j_dev_bd = I_BDEV(jdev_inode); | 2556 | journal->j_dev_bd = I_BDEV(jdev_inode); |
2548 | set_blocksize(journal->j_dev_bd, super->s_blocksize); | 2557 | set_blocksize(journal->j_dev_bd, super->s_blocksize); |
2549 | reiserfs_info(super, | 2558 | reiserfs_info(super, |
2550 | "journal_init_dev: journal device: %s\n", | 2559 | "journal_init_dev: journal device: %s\n", |
2551 | bdevname(journal->j_dev_bd, b)); | 2560 | bdevname(journal->j_dev_bd, b)); |
2552 | } | 2561 | } |
2553 | } else { | 2562 | } else { |
2554 | result = PTR_ERR(journal->j_dev_file); | 2563 | result = PTR_ERR(journal->j_dev_file); |
2555 | journal->j_dev_file = NULL; | 2564 | journal->j_dev_file = NULL; |
2556 | reiserfs_warning(super, | 2565 | reiserfs_warning(super, |
2557 | "journal_init_dev: Cannot open '%s': %i", | 2566 | "journal_init_dev: Cannot open '%s': %i", |
2558 | jdev_name, result); | 2567 | jdev_name, result); |
2559 | } | 2568 | } |
2560 | return result; | 2569 | return result; |
2561 | } | 2570 | } |
2562 | 2571 | ||
2563 | /* | 2572 | /* |
2564 | ** must be called once on fs mount. calls journal_read for you | 2573 | ** must be called once on fs mount. calls journal_read for you |
2565 | */ | 2574 | */ |
2566 | int journal_init(struct super_block *p_s_sb, const char *j_dev_name, | 2575 | int journal_init(struct super_block *p_s_sb, const char *j_dev_name, |
2567 | int old_format, unsigned int commit_max_age) | 2576 | int old_format, unsigned int commit_max_age) |
2568 | { | 2577 | { |
2569 | int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2; | 2578 | int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2; |
2570 | struct buffer_head *bhjh; | 2579 | struct buffer_head *bhjh; |
2571 | struct reiserfs_super_block *rs; | 2580 | struct reiserfs_super_block *rs; |
2572 | struct reiserfs_journal_header *jh; | 2581 | struct reiserfs_journal_header *jh; |
2573 | struct reiserfs_journal *journal; | 2582 | struct reiserfs_journal *journal; |
2574 | struct reiserfs_journal_list *jl; | 2583 | struct reiserfs_journal_list *jl; |
2575 | char b[BDEVNAME_SIZE]; | 2584 | char b[BDEVNAME_SIZE]; |
2576 | 2585 | ||
2577 | journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof(struct reiserfs_journal)); | 2586 | journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof(struct reiserfs_journal)); |
2578 | if (!journal) { | 2587 | if (!journal) { |
2579 | reiserfs_warning(p_s_sb, | 2588 | reiserfs_warning(p_s_sb, |
2580 | "journal-1256: unable to get memory for journal structure"); | 2589 | "journal-1256: unable to get memory for journal structure"); |
2581 | return 1; | 2590 | return 1; |
2582 | } | 2591 | } |
2583 | memset(journal, 0, sizeof(struct reiserfs_journal)); | 2592 | memset(journal, 0, sizeof(struct reiserfs_journal)); |
2584 | INIT_LIST_HEAD(&journal->j_bitmap_nodes); | 2593 | INIT_LIST_HEAD(&journal->j_bitmap_nodes); |
2585 | INIT_LIST_HEAD(&journal->j_prealloc_list); | 2594 | INIT_LIST_HEAD(&journal->j_prealloc_list); |
2586 | INIT_LIST_HEAD(&journal->j_working_list); | 2595 | INIT_LIST_HEAD(&journal->j_working_list); |
2587 | INIT_LIST_HEAD(&journal->j_journal_list); | 2596 | INIT_LIST_HEAD(&journal->j_journal_list); |
2588 | journal->j_persistent_trans = 0; | 2597 | journal->j_persistent_trans = 0; |
2589 | if (reiserfs_allocate_list_bitmaps(p_s_sb, | 2598 | if (reiserfs_allocate_list_bitmaps(p_s_sb, |
2590 | journal->j_list_bitmap, | 2599 | journal->j_list_bitmap, |
2591 | SB_BMAP_NR(p_s_sb))) | 2600 | SB_BMAP_NR(p_s_sb))) |
2592 | goto free_and_return; | 2601 | goto free_and_return; |
2593 | allocate_bitmap_nodes(p_s_sb); | 2602 | allocate_bitmap_nodes(p_s_sb); |
2594 | 2603 | ||
2595 | /* reserved for journal area support */ | 2604 | /* reserved for journal area support */ |
2596 | SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ? | 2605 | SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ? |
2597 | REISERFS_OLD_DISK_OFFSET_IN_BYTES | 2606 | REISERFS_OLD_DISK_OFFSET_IN_BYTES |
2598 | / p_s_sb->s_blocksize + | 2607 | / p_s_sb->s_blocksize + |
2599 | SB_BMAP_NR(p_s_sb) + | 2608 | SB_BMAP_NR(p_s_sb) + |
2600 | 1 : | 2609 | 1 : |
2601 | REISERFS_DISK_OFFSET_IN_BYTES / | 2610 | REISERFS_DISK_OFFSET_IN_BYTES / |
2602 | p_s_sb->s_blocksize + 2); | 2611 | p_s_sb->s_blocksize + 2); |
2603 | 2612 | ||
2604 | /* Sanity check to see is the standard journal fitting withing first bitmap | 2613 | /* Sanity check to see is the standard journal fitting withing first bitmap |
2605 | (actual for small blocksizes) */ | 2614 | (actual for small blocksizes) */ |
2606 | if (!SB_ONDISK_JOURNAL_DEVICE(p_s_sb) && | 2615 | if (!SB_ONDISK_JOURNAL_DEVICE(p_s_sb) && |
2607 | (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) + | 2616 | (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) + |
2608 | SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8)) { | 2617 | SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8)) { |
2609 | reiserfs_warning(p_s_sb, | 2618 | reiserfs_warning(p_s_sb, |
2610 | "journal-1393: journal does not fit for area " | 2619 | "journal-1393: journal does not fit for area " |
2611 | "addressed by first of bitmap blocks. It starts at " | 2620 | "addressed by first of bitmap blocks. It starts at " |
2612 | "%u and its size is %u. Block size %ld", | 2621 | "%u and its size is %u. Block size %ld", |
2613 | SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb), | 2622 | SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb), |
2614 | SB_ONDISK_JOURNAL_SIZE(p_s_sb), | 2623 | SB_ONDISK_JOURNAL_SIZE(p_s_sb), |
2615 | p_s_sb->s_blocksize); | 2624 | p_s_sb->s_blocksize); |
2616 | goto free_and_return; | 2625 | goto free_and_return; |
2617 | } | 2626 | } |
2618 | 2627 | ||
2619 | if (journal_init_dev(p_s_sb, journal, j_dev_name) != 0) { | 2628 | if (journal_init_dev(p_s_sb, journal, j_dev_name) != 0) { |
2620 | reiserfs_warning(p_s_sb, | 2629 | reiserfs_warning(p_s_sb, |
2621 | "sh-462: unable to initialize jornal device"); | 2630 | "sh-462: unable to initialize jornal device"); |
2622 | goto free_and_return; | 2631 | goto free_and_return; |
2623 | } | 2632 | } |
2624 | 2633 | ||
2625 | rs = SB_DISK_SUPER_BLOCK(p_s_sb); | 2634 | rs = SB_DISK_SUPER_BLOCK(p_s_sb); |
2626 | 2635 | ||
2627 | /* read journal header */ | 2636 | /* read journal header */ |
2628 | bhjh = journal_bread(p_s_sb, | 2637 | bhjh = journal_bread(p_s_sb, |
2629 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + | 2638 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + |
2630 | SB_ONDISK_JOURNAL_SIZE(p_s_sb)); | 2639 | SB_ONDISK_JOURNAL_SIZE(p_s_sb)); |
2631 | if (!bhjh) { | 2640 | if (!bhjh) { |
2632 | reiserfs_warning(p_s_sb, | 2641 | reiserfs_warning(p_s_sb, |
2633 | "sh-459: unable to read journal header"); | 2642 | "sh-459: unable to read journal header"); |
2634 | goto free_and_return; | 2643 | goto free_and_return; |
2635 | } | 2644 | } |
2636 | jh = (struct reiserfs_journal_header *)(bhjh->b_data); | 2645 | jh = (struct reiserfs_journal_header *)(bhjh->b_data); |
2637 | 2646 | ||
2638 | /* make sure that journal matches to the super block */ | 2647 | /* make sure that journal matches to the super block */ |
2639 | if (is_reiserfs_jr(rs) | 2648 | if (is_reiserfs_jr(rs) |
2640 | && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != | 2649 | && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != |
2641 | sb_jp_journal_magic(rs))) { | 2650 | sb_jp_journal_magic(rs))) { |
2642 | reiserfs_warning(p_s_sb, | 2651 | reiserfs_warning(p_s_sb, |
2643 | "sh-460: journal header magic %x " | 2652 | "sh-460: journal header magic %x " |
2644 | "(device %s) does not match to magic found in super " | 2653 | "(device %s) does not match to magic found in super " |
2645 | "block %x", jh->jh_journal.jp_journal_magic, | 2654 | "block %x", jh->jh_journal.jp_journal_magic, |
2646 | bdevname(journal->j_dev_bd, b), | 2655 | bdevname(journal->j_dev_bd, b), |
2647 | sb_jp_journal_magic(rs)); | 2656 | sb_jp_journal_magic(rs)); |
2648 | brelse(bhjh); | 2657 | brelse(bhjh); |
2649 | goto free_and_return; | 2658 | goto free_and_return; |
2650 | } | 2659 | } |
2651 | 2660 | ||
2652 | journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max); | 2661 | journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max); |
2653 | journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch); | 2662 | journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch); |
2654 | journal->j_max_commit_age = | 2663 | journal->j_max_commit_age = |
2655 | le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age); | 2664 | le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age); |
2656 | journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; | 2665 | journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; |
2657 | 2666 | ||
2658 | if (journal->j_trans_max) { | 2667 | if (journal->j_trans_max) { |
2659 | /* make sure these parameters are available, assign it if they are not */ | 2668 | /* make sure these parameters are available, assign it if they are not */ |
2660 | __u32 initial = journal->j_trans_max; | 2669 | __u32 initial = journal->j_trans_max; |
2661 | __u32 ratio = 1; | 2670 | __u32 ratio = 1; |
2662 | 2671 | ||
2663 | if (p_s_sb->s_blocksize < 4096) | 2672 | if (p_s_sb->s_blocksize < 4096) |
2664 | ratio = 4096 / p_s_sb->s_blocksize; | 2673 | ratio = 4096 / p_s_sb->s_blocksize; |
2665 | 2674 | ||
2666 | if (SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max < | 2675 | if (SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max < |
2667 | JOURNAL_MIN_RATIO) | 2676 | JOURNAL_MIN_RATIO) |
2668 | journal->j_trans_max = | 2677 | journal->j_trans_max = |
2669 | SB_ONDISK_JOURNAL_SIZE(p_s_sb) / JOURNAL_MIN_RATIO; | 2678 | SB_ONDISK_JOURNAL_SIZE(p_s_sb) / JOURNAL_MIN_RATIO; |
2670 | if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio) | 2679 | if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio) |
2671 | journal->j_trans_max = | 2680 | journal->j_trans_max = |
2672 | JOURNAL_TRANS_MAX_DEFAULT / ratio; | 2681 | JOURNAL_TRANS_MAX_DEFAULT / ratio; |
2673 | if (journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio) | 2682 | if (journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio) |
2674 | journal->j_trans_max = | 2683 | journal->j_trans_max = |
2675 | JOURNAL_TRANS_MIN_DEFAULT / ratio; | 2684 | JOURNAL_TRANS_MIN_DEFAULT / ratio; |
2676 | 2685 | ||
2677 | if (journal->j_trans_max != initial) | 2686 | if (journal->j_trans_max != initial) |
2678 | reiserfs_warning(p_s_sb, | 2687 | reiserfs_warning(p_s_sb, |
2679 | "sh-461: journal_init: wrong transaction max size (%u). Changed to %u", | 2688 | "sh-461: journal_init: wrong transaction max size (%u). Changed to %u", |
2680 | initial, journal->j_trans_max); | 2689 | initial, journal->j_trans_max); |
2681 | 2690 | ||
2682 | journal->j_max_batch = journal->j_trans_max * | 2691 | journal->j_max_batch = journal->j_trans_max * |
2683 | JOURNAL_MAX_BATCH_DEFAULT / JOURNAL_TRANS_MAX_DEFAULT; | 2692 | JOURNAL_MAX_BATCH_DEFAULT / JOURNAL_TRANS_MAX_DEFAULT; |
2684 | } | 2693 | } |
2685 | 2694 | ||
2686 | if (!journal->j_trans_max) { | 2695 | if (!journal->j_trans_max) { |
2687 | /*we have the file system was created by old version of mkreiserfs | 2696 | /*we have the file system was created by old version of mkreiserfs |
2688 | so this field contains zero value */ | 2697 | so this field contains zero value */ |
2689 | journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT; | 2698 | journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT; |
2690 | journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT; | 2699 | journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT; |
2691 | journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE; | 2700 | journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE; |
2692 | 2701 | ||
2693 | /* for blocksize >= 4096 - max transaction size is 1024. For block size < 4096 | 2702 | /* for blocksize >= 4096 - max transaction size is 1024. For block size < 4096 |
2694 | trans max size is decreased proportionally */ | 2703 | trans max size is decreased proportionally */ |
2695 | if (p_s_sb->s_blocksize < 4096) { | 2704 | if (p_s_sb->s_blocksize < 4096) { |
2696 | journal->j_trans_max /= (4096 / p_s_sb->s_blocksize); | 2705 | journal->j_trans_max /= (4096 / p_s_sb->s_blocksize); |
2697 | journal->j_max_batch = (journal->j_trans_max) * 9 / 10; | 2706 | journal->j_max_batch = (journal->j_trans_max) * 9 / 10; |
2698 | } | 2707 | } |
2699 | } | 2708 | } |
2700 | 2709 | ||
2701 | journal->j_default_max_commit_age = journal->j_max_commit_age; | 2710 | journal->j_default_max_commit_age = journal->j_max_commit_age; |
2702 | 2711 | ||
2703 | if (commit_max_age != 0) { | 2712 | if (commit_max_age != 0) { |
2704 | journal->j_max_commit_age = commit_max_age; | 2713 | journal->j_max_commit_age = commit_max_age; |
2705 | journal->j_max_trans_age = commit_max_age; | 2714 | journal->j_max_trans_age = commit_max_age; |
2706 | } | 2715 | } |
2707 | 2716 | ||
2708 | reiserfs_info(p_s_sb, "journal params: device %s, size %u, " | 2717 | reiserfs_info(p_s_sb, "journal params: device %s, size %u, " |
2709 | "journal first block %u, max trans len %u, max batch %u, " | 2718 | "journal first block %u, max trans len %u, max batch %u, " |
2710 | "max commit age %u, max trans age %u\n", | 2719 | "max commit age %u, max trans age %u\n", |
2711 | bdevname(journal->j_dev_bd, b), | 2720 | bdevname(journal->j_dev_bd, b), |
2712 | SB_ONDISK_JOURNAL_SIZE(p_s_sb), | 2721 | SB_ONDISK_JOURNAL_SIZE(p_s_sb), |
2713 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), | 2722 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), |
2714 | journal->j_trans_max, | 2723 | journal->j_trans_max, |
2715 | journal->j_max_batch, | 2724 | journal->j_max_batch, |
2716 | journal->j_max_commit_age, journal->j_max_trans_age); | 2725 | journal->j_max_commit_age, journal->j_max_trans_age); |
2717 | 2726 | ||
2718 | brelse(bhjh); | 2727 | brelse(bhjh); |
2719 | 2728 | ||
2720 | journal->j_list_bitmap_index = 0; | 2729 | journal->j_list_bitmap_index = 0; |
2721 | journal_list_init(p_s_sb); | 2730 | journal_list_init(p_s_sb); |
2722 | 2731 | ||
2723 | memset(journal->j_list_hash_table, 0, | 2732 | memset(journal->j_list_hash_table, 0, |
2724 | JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); | 2733 | JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); |
2725 | 2734 | ||
2726 | INIT_LIST_HEAD(&journal->j_dirty_buffers); | 2735 | INIT_LIST_HEAD(&journal->j_dirty_buffers); |
2727 | spin_lock_init(&journal->j_dirty_buffers_lock); | 2736 | spin_lock_init(&journal->j_dirty_buffers_lock); |
2728 | 2737 | ||
2729 | journal->j_start = 0; | 2738 | journal->j_start = 0; |
2730 | journal->j_len = 0; | 2739 | journal->j_len = 0; |
2731 | journal->j_len_alloc = 0; | 2740 | journal->j_len_alloc = 0; |
2732 | atomic_set(&(journal->j_wcount), 0); | 2741 | atomic_set(&(journal->j_wcount), 0); |
2733 | atomic_set(&(journal->j_async_throttle), 0); | 2742 | atomic_set(&(journal->j_async_throttle), 0); |
2734 | journal->j_bcount = 0; | 2743 | journal->j_bcount = 0; |
2735 | journal->j_trans_start_time = 0; | 2744 | journal->j_trans_start_time = 0; |
2736 | journal->j_last = NULL; | 2745 | journal->j_last = NULL; |
2737 | journal->j_first = NULL; | 2746 | journal->j_first = NULL; |
2738 | init_waitqueue_head(&(journal->j_join_wait)); | 2747 | init_waitqueue_head(&(journal->j_join_wait)); |
2739 | sema_init(&journal->j_lock, 1); | 2748 | sema_init(&journal->j_lock, 1); |
2740 | sema_init(&journal->j_flush_sem, 1); | 2749 | sema_init(&journal->j_flush_sem, 1); |
2741 | 2750 | ||
2742 | journal->j_trans_id = 10; | 2751 | journal->j_trans_id = 10; |
2743 | journal->j_mount_id = 10; | 2752 | journal->j_mount_id = 10; |
2744 | journal->j_state = 0; | 2753 | journal->j_state = 0; |
2745 | atomic_set(&(journal->j_jlock), 0); | 2754 | atomic_set(&(journal->j_jlock), 0); |
2746 | journal->j_cnode_free_list = allocate_cnodes(num_cnodes); | 2755 | journal->j_cnode_free_list = allocate_cnodes(num_cnodes); |
2747 | journal->j_cnode_free_orig = journal->j_cnode_free_list; | 2756 | journal->j_cnode_free_orig = journal->j_cnode_free_list; |
2748 | journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; | 2757 | journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; |
2749 | journal->j_cnode_used = 0; | 2758 | journal->j_cnode_used = 0; |
2750 | journal->j_must_wait = 0; | 2759 | journal->j_must_wait = 0; |
2751 | 2760 | ||
2752 | if (journal->j_cnode_free == 0) { | 2761 | if (journal->j_cnode_free == 0) { |
2753 | reiserfs_warning(p_s_sb, "journal-2004: Journal cnode memory " | 2762 | reiserfs_warning(p_s_sb, "journal-2004: Journal cnode memory " |
2754 | "allocation failed (%ld bytes). Journal is " | 2763 | "allocation failed (%ld bytes). Journal is " |
2755 | "too large for available memory. Usually " | 2764 | "too large for available memory. Usually " |
2756 | "this is due to a journal that is too large.", | 2765 | "this is due to a journal that is too large.", |
2757 | sizeof (struct reiserfs_journal_cnode) * num_cnodes); | 2766 | sizeof (struct reiserfs_journal_cnode) * num_cnodes); |
2758 | goto free_and_return; | 2767 | goto free_and_return; |
2759 | } | 2768 | } |
2760 | 2769 | ||
2761 | init_journal_hash(p_s_sb); | 2770 | init_journal_hash(p_s_sb); |
2762 | jl = journal->j_current_jl; | 2771 | jl = journal->j_current_jl; |
2763 | jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl); | 2772 | jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl); |
2764 | if (!jl->j_list_bitmap) { | 2773 | if (!jl->j_list_bitmap) { |
2765 | reiserfs_warning(p_s_sb, | 2774 | reiserfs_warning(p_s_sb, |
2766 | "journal-2005, get_list_bitmap failed for journal list 0"); | 2775 | "journal-2005, get_list_bitmap failed for journal list 0"); |
2767 | goto free_and_return; | 2776 | goto free_and_return; |
2768 | } | 2777 | } |
2769 | if (journal_read(p_s_sb) < 0) { | 2778 | if (journal_read(p_s_sb) < 0) { |
2770 | reiserfs_warning(p_s_sb, "Replay Failure, unable to mount"); | 2779 | reiserfs_warning(p_s_sb, "Replay Failure, unable to mount"); |
2771 | goto free_and_return; | 2780 | goto free_and_return; |
2772 | } | 2781 | } |
2773 | 2782 | ||
2774 | reiserfs_mounted_fs_count++; | 2783 | reiserfs_mounted_fs_count++; |
2775 | if (reiserfs_mounted_fs_count <= 1) | 2784 | if (reiserfs_mounted_fs_count <= 1) |
2776 | commit_wq = create_workqueue("reiserfs"); | 2785 | commit_wq = create_workqueue("reiserfs"); |
2777 | 2786 | ||
2778 | INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb); | 2787 | INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb); |
2779 | return 0; | 2788 | return 0; |
2780 | free_and_return: | 2789 | free_and_return: |
2781 | free_journal_ram(p_s_sb); | 2790 | free_journal_ram(p_s_sb); |
2782 | return 1; | 2791 | return 1; |
2783 | } | 2792 | } |
2784 | 2793 | ||
2785 | /* | 2794 | /* |
2786 | ** test for a polite end of the current transaction. Used by file_write, and should | 2795 | ** test for a polite end of the current transaction. Used by file_write, and should |
2787 | ** be used by delete to make sure they don't write more than can fit inside a single | 2796 | ** be used by delete to make sure they don't write more than can fit inside a single |
2788 | ** transaction | 2797 | ** transaction |
2789 | */ | 2798 | */ |
2790 | int journal_transaction_should_end(struct reiserfs_transaction_handle *th, | 2799 | int journal_transaction_should_end(struct reiserfs_transaction_handle *th, |
2791 | int new_alloc) | 2800 | int new_alloc) |
2792 | { | 2801 | { |
2793 | struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); | 2802 | struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); |
2794 | time_t now = get_seconds(); | 2803 | time_t now = get_seconds(); |
2795 | /* cannot restart while nested */ | 2804 | /* cannot restart while nested */ |
2796 | BUG_ON(!th->t_trans_id); | 2805 | BUG_ON(!th->t_trans_id); |
2797 | if (th->t_refcount > 1) | 2806 | if (th->t_refcount > 1) |
2798 | return 0; | 2807 | return 0; |
2799 | if (journal->j_must_wait > 0 || | 2808 | if (journal->j_must_wait > 0 || |
2800 | (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || | 2809 | (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || |
2801 | atomic_read(&(journal->j_jlock)) || | 2810 | atomic_read(&(journal->j_jlock)) || |
2802 | (now - journal->j_trans_start_time) > journal->j_max_trans_age || | 2811 | (now - journal->j_trans_start_time) > journal->j_max_trans_age || |
2803 | journal->j_cnode_free < (journal->j_trans_max * 3)) { | 2812 | journal->j_cnode_free < (journal->j_trans_max * 3)) { |
2804 | return 1; | 2813 | return 1; |
2805 | } | 2814 | } |
2806 | return 0; | 2815 | return 0; |
2807 | } | 2816 | } |
2808 | 2817 | ||
2809 | /* this must be called inside a transaction, and requires the | 2818 | /* this must be called inside a transaction, and requires the |
2810 | ** kernel_lock to be held | 2819 | ** kernel_lock to be held |
2811 | */ | 2820 | */ |
2812 | void reiserfs_block_writes(struct reiserfs_transaction_handle *th) | 2821 | void reiserfs_block_writes(struct reiserfs_transaction_handle *th) |
2813 | { | 2822 | { |
2814 | struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); | 2823 | struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); |
2815 | BUG_ON(!th->t_trans_id); | 2824 | BUG_ON(!th->t_trans_id); |
2816 | journal->j_must_wait = 1; | 2825 | journal->j_must_wait = 1; |
2817 | set_bit(J_WRITERS_BLOCKED, &journal->j_state); | 2826 | set_bit(J_WRITERS_BLOCKED, &journal->j_state); |
2818 | return; | 2827 | return; |
2819 | } | 2828 | } |
2820 | 2829 | ||
2821 | /* this must be called without a transaction started, and does not | 2830 | /* this must be called without a transaction started, and does not |
2822 | ** require BKL | 2831 | ** require BKL |
2823 | */ | 2832 | */ |
2824 | void reiserfs_allow_writes(struct super_block *s) | 2833 | void reiserfs_allow_writes(struct super_block *s) |
2825 | { | 2834 | { |
2826 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 2835 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
2827 | clear_bit(J_WRITERS_BLOCKED, &journal->j_state); | 2836 | clear_bit(J_WRITERS_BLOCKED, &journal->j_state); |
2828 | wake_up(&journal->j_join_wait); | 2837 | wake_up(&journal->j_join_wait); |
2829 | } | 2838 | } |
2830 | 2839 | ||
2831 | /* this must be called without a transaction started, and does not | 2840 | /* this must be called without a transaction started, and does not |
2832 | ** require BKL | 2841 | ** require BKL |
2833 | */ | 2842 | */ |
2834 | void reiserfs_wait_on_write_block(struct super_block *s) | 2843 | void reiserfs_wait_on_write_block(struct super_block *s) |
2835 | { | 2844 | { |
2836 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 2845 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
2837 | wait_event(journal->j_join_wait, | 2846 | wait_event(journal->j_join_wait, |
2838 | !test_bit(J_WRITERS_BLOCKED, &journal->j_state)); | 2847 | !test_bit(J_WRITERS_BLOCKED, &journal->j_state)); |
2839 | } | 2848 | } |
2840 | 2849 | ||
2841 | static void queue_log_writer(struct super_block *s) | 2850 | static void queue_log_writer(struct super_block *s) |
2842 | { | 2851 | { |
2843 | wait_queue_t wait; | 2852 | wait_queue_t wait; |
2844 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 2853 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
2845 | set_bit(J_WRITERS_QUEUED, &journal->j_state); | 2854 | set_bit(J_WRITERS_QUEUED, &journal->j_state); |
2846 | 2855 | ||
2847 | /* | 2856 | /* |
2848 | * we don't want to use wait_event here because | 2857 | * we don't want to use wait_event here because |
2849 | * we only want to wait once. | 2858 | * we only want to wait once. |
2850 | */ | 2859 | */ |
2851 | init_waitqueue_entry(&wait, current); | 2860 | init_waitqueue_entry(&wait, current); |
2852 | add_wait_queue(&journal->j_join_wait, &wait); | 2861 | add_wait_queue(&journal->j_join_wait, &wait); |
2853 | set_current_state(TASK_UNINTERRUPTIBLE); | 2862 | set_current_state(TASK_UNINTERRUPTIBLE); |
2854 | if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) | 2863 | if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) |
2855 | schedule(); | 2864 | schedule(); |
2856 | current->state = TASK_RUNNING; | 2865 | current->state = TASK_RUNNING; |
2857 | remove_wait_queue(&journal->j_join_wait, &wait); | 2866 | remove_wait_queue(&journal->j_join_wait, &wait); |
2858 | } | 2867 | } |
2859 | 2868 | ||
2860 | static void wake_queued_writers(struct super_block *s) | 2869 | static void wake_queued_writers(struct super_block *s) |
2861 | { | 2870 | { |
2862 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 2871 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
2863 | if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state)) | 2872 | if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state)) |
2864 | wake_up(&journal->j_join_wait); | 2873 | wake_up(&journal->j_join_wait); |
2865 | } | 2874 | } |
2866 | 2875 | ||
2867 | static void let_transaction_grow(struct super_block *sb, unsigned long trans_id) | 2876 | static void let_transaction_grow(struct super_block *sb, unsigned long trans_id) |
2868 | { | 2877 | { |
2869 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 2878 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
2870 | unsigned long bcount = journal->j_bcount; | 2879 | unsigned long bcount = journal->j_bcount; |
2871 | while (1) { | 2880 | while (1) { |
2872 | schedule_timeout_uninterruptible(1); | 2881 | schedule_timeout_uninterruptible(1); |
2873 | journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; | 2882 | journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; |
2874 | while ((atomic_read(&journal->j_wcount) > 0 || | 2883 | while ((atomic_read(&journal->j_wcount) > 0 || |
2875 | atomic_read(&journal->j_jlock)) && | 2884 | atomic_read(&journal->j_jlock)) && |
2876 | journal->j_trans_id == trans_id) { | 2885 | journal->j_trans_id == trans_id) { |
2877 | queue_log_writer(sb); | 2886 | queue_log_writer(sb); |
2878 | } | 2887 | } |
2879 | if (journal->j_trans_id != trans_id) | 2888 | if (journal->j_trans_id != trans_id) |
2880 | break; | 2889 | break; |
2881 | if (bcount == journal->j_bcount) | 2890 | if (bcount == journal->j_bcount) |
2882 | break; | 2891 | break; |
2883 | bcount = journal->j_bcount; | 2892 | bcount = journal->j_bcount; |
2884 | } | 2893 | } |
2885 | } | 2894 | } |
2886 | 2895 | ||
2887 | /* join == true if you must join an existing transaction. | 2896 | /* join == true if you must join an existing transaction. |
2888 | ** join == false if you can deal with waiting for others to finish | 2897 | ** join == false if you can deal with waiting for others to finish |
2889 | ** | 2898 | ** |
2890 | ** this will block until the transaction is joinable. send the number of blocks you | 2899 | ** this will block until the transaction is joinable. send the number of blocks you |
2891 | ** expect to use in nblocks. | 2900 | ** expect to use in nblocks. |
2892 | */ | 2901 | */ |
2893 | static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | 2902 | static int do_journal_begin_r(struct reiserfs_transaction_handle *th, |
2894 | struct super_block *p_s_sb, unsigned long nblocks, | 2903 | struct super_block *p_s_sb, unsigned long nblocks, |
2895 | int join) | 2904 | int join) |
2896 | { | 2905 | { |
2897 | time_t now = get_seconds(); | 2906 | time_t now = get_seconds(); |
2898 | int old_trans_id; | 2907 | int old_trans_id; |
2899 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 2908 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
2900 | struct reiserfs_transaction_handle myth; | 2909 | struct reiserfs_transaction_handle myth; |
2901 | int sched_count = 0; | 2910 | int sched_count = 0; |
2902 | int retval; | 2911 | int retval; |
2903 | 2912 | ||
2904 | reiserfs_check_lock_depth(p_s_sb, "journal_begin"); | 2913 | reiserfs_check_lock_depth(p_s_sb, "journal_begin"); |
2905 | if (nblocks > journal->j_trans_max) | 2914 | if (nblocks > journal->j_trans_max) |
2906 | BUG(); | 2915 | BUG(); |
2907 | 2916 | ||
2908 | PROC_INFO_INC(p_s_sb, journal.journal_being); | 2917 | PROC_INFO_INC(p_s_sb, journal.journal_being); |
2909 | /* set here for journal_join */ | 2918 | /* set here for journal_join */ |
2910 | th->t_refcount = 1; | 2919 | th->t_refcount = 1; |
2911 | th->t_super = p_s_sb; | 2920 | th->t_super = p_s_sb; |
2912 | 2921 | ||
2913 | relock: | 2922 | relock: |
2914 | lock_journal(p_s_sb); | 2923 | lock_journal(p_s_sb); |
2915 | if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { | 2924 | if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { |
2916 | unlock_journal(p_s_sb); | 2925 | unlock_journal(p_s_sb); |
2917 | retval = journal->j_errno; | 2926 | retval = journal->j_errno; |
2918 | goto out_fail; | 2927 | goto out_fail; |
2919 | } | 2928 | } |
2920 | journal->j_bcount++; | 2929 | journal->j_bcount++; |
2921 | 2930 | ||
2922 | if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { | 2931 | if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { |
2923 | unlock_journal(p_s_sb); | 2932 | unlock_journal(p_s_sb); |
2924 | reiserfs_wait_on_write_block(p_s_sb); | 2933 | reiserfs_wait_on_write_block(p_s_sb); |
2925 | PROC_INFO_INC(p_s_sb, journal.journal_relock_writers); | 2934 | PROC_INFO_INC(p_s_sb, journal.journal_relock_writers); |
2926 | goto relock; | 2935 | goto relock; |
2927 | } | 2936 | } |
2928 | now = get_seconds(); | 2937 | now = get_seconds(); |
2929 | 2938 | ||
2930 | /* if there is no room in the journal OR | 2939 | /* if there is no room in the journal OR |
2931 | ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning | 2940 | ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning |
2932 | ** we don't sleep if there aren't other writers | 2941 | ** we don't sleep if there aren't other writers |
2933 | */ | 2942 | */ |
2934 | 2943 | ||
2935 | if ((!join && journal->j_must_wait > 0) || | 2944 | if ((!join && journal->j_must_wait > 0) || |
2936 | (!join | 2945 | (!join |
2937 | && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch) | 2946 | && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch) |
2938 | || (!join && atomic_read(&journal->j_wcount) > 0 | 2947 | || (!join && atomic_read(&journal->j_wcount) > 0 |
2939 | && journal->j_trans_start_time > 0 | 2948 | && journal->j_trans_start_time > 0 |
2940 | && (now - journal->j_trans_start_time) > | 2949 | && (now - journal->j_trans_start_time) > |
2941 | journal->j_max_trans_age) || (!join | 2950 | journal->j_max_trans_age) || (!join |
2942 | && atomic_read(&journal->j_jlock)) | 2951 | && atomic_read(&journal->j_jlock)) |
2943 | || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { | 2952 | || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { |
2944 | 2953 | ||
2945 | old_trans_id = journal->j_trans_id; | 2954 | old_trans_id = journal->j_trans_id; |
2946 | unlock_journal(p_s_sb); /* allow others to finish this transaction */ | 2955 | unlock_journal(p_s_sb); /* allow others to finish this transaction */ |
2947 | 2956 | ||
2948 | if (!join && (journal->j_len_alloc + nblocks + 2) >= | 2957 | if (!join && (journal->j_len_alloc + nblocks + 2) >= |
2949 | journal->j_max_batch && | 2958 | journal->j_max_batch && |
2950 | ((journal->j_len + nblocks + 2) * 100) < | 2959 | ((journal->j_len + nblocks + 2) * 100) < |
2951 | (journal->j_len_alloc * 75)) { | 2960 | (journal->j_len_alloc * 75)) { |
2952 | if (atomic_read(&journal->j_wcount) > 10) { | 2961 | if (atomic_read(&journal->j_wcount) > 10) { |
2953 | sched_count++; | 2962 | sched_count++; |
2954 | queue_log_writer(p_s_sb); | 2963 | queue_log_writer(p_s_sb); |
2955 | goto relock; | 2964 | goto relock; |
2956 | } | 2965 | } |
2957 | } | 2966 | } |
2958 | /* don't mess with joining the transaction if all we have to do is | 2967 | /* don't mess with joining the transaction if all we have to do is |
2959 | * wait for someone else to do a commit | 2968 | * wait for someone else to do a commit |
2960 | */ | 2969 | */ |
2961 | if (atomic_read(&journal->j_jlock)) { | 2970 | if (atomic_read(&journal->j_jlock)) { |
2962 | while (journal->j_trans_id == old_trans_id && | 2971 | while (journal->j_trans_id == old_trans_id && |
2963 | atomic_read(&journal->j_jlock)) { | 2972 | atomic_read(&journal->j_jlock)) { |
2964 | queue_log_writer(p_s_sb); | 2973 | queue_log_writer(p_s_sb); |
2965 | } | 2974 | } |
2966 | goto relock; | 2975 | goto relock; |
2967 | } | 2976 | } |
2968 | retval = journal_join(&myth, p_s_sb, 1); | 2977 | retval = journal_join(&myth, p_s_sb, 1); |
2969 | if (retval) | 2978 | if (retval) |
2970 | goto out_fail; | 2979 | goto out_fail; |
2971 | 2980 | ||
2972 | /* someone might have ended the transaction while we joined */ | 2981 | /* someone might have ended the transaction while we joined */ |
2973 | if (old_trans_id != journal->j_trans_id) { | 2982 | if (old_trans_id != journal->j_trans_id) { |
2974 | retval = do_journal_end(&myth, p_s_sb, 1, 0); | 2983 | retval = do_journal_end(&myth, p_s_sb, 1, 0); |
2975 | } else { | 2984 | } else { |
2976 | retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW); | 2985 | retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW); |
2977 | } | 2986 | } |
2978 | 2987 | ||
2979 | if (retval) | 2988 | if (retval) |
2980 | goto out_fail; | 2989 | goto out_fail; |
2981 | 2990 | ||
2982 | PROC_INFO_INC(p_s_sb, journal.journal_relock_wcount); | 2991 | PROC_INFO_INC(p_s_sb, journal.journal_relock_wcount); |
2983 | goto relock; | 2992 | goto relock; |
2984 | } | 2993 | } |
2985 | /* we are the first writer, set trans_id */ | 2994 | /* we are the first writer, set trans_id */ |
2986 | if (journal->j_trans_start_time == 0) { | 2995 | if (journal->j_trans_start_time == 0) { |
2987 | journal->j_trans_start_time = get_seconds(); | 2996 | journal->j_trans_start_time = get_seconds(); |
2988 | } | 2997 | } |
2989 | atomic_inc(&(journal->j_wcount)); | 2998 | atomic_inc(&(journal->j_wcount)); |
2990 | journal->j_len_alloc += nblocks; | 2999 | journal->j_len_alloc += nblocks; |
2991 | th->t_blocks_logged = 0; | 3000 | th->t_blocks_logged = 0; |
2992 | th->t_blocks_allocated = nblocks; | 3001 | th->t_blocks_allocated = nblocks; |
2993 | th->t_trans_id = journal->j_trans_id; | 3002 | th->t_trans_id = journal->j_trans_id; |
2994 | unlock_journal(p_s_sb); | 3003 | unlock_journal(p_s_sb); |
2995 | INIT_LIST_HEAD(&th->t_list); | 3004 | INIT_LIST_HEAD(&th->t_list); |
2996 | get_fs_excl(); | 3005 | get_fs_excl(); |
2997 | return 0; | 3006 | return 0; |
2998 | 3007 | ||
2999 | out_fail: | 3008 | out_fail: |
3000 | memset(th, 0, sizeof(*th)); | 3009 | memset(th, 0, sizeof(*th)); |
3001 | /* Re-set th->t_super, so we can properly keep track of how many | 3010 | /* Re-set th->t_super, so we can properly keep track of how many |
3002 | * persistent transactions there are. We need to do this so if this | 3011 | * persistent transactions there are. We need to do this so if this |
3003 | * call is part of a failed restart_transaction, we can free it later */ | 3012 | * call is part of a failed restart_transaction, we can free it later */ |
3004 | th->t_super = p_s_sb; | 3013 | th->t_super = p_s_sb; |
3005 | return retval; | 3014 | return retval; |
3006 | } | 3015 | } |
3007 | 3016 | ||
3008 | struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct | 3017 | struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct |
3009 | super_block | 3018 | super_block |
3010 | *s, | 3019 | *s, |
3011 | int nblocks) | 3020 | int nblocks) |
3012 | { | 3021 | { |
3013 | int ret; | 3022 | int ret; |
3014 | struct reiserfs_transaction_handle *th; | 3023 | struct reiserfs_transaction_handle *th; |
3015 | 3024 | ||
3016 | /* if we're nesting into an existing transaction. It will be | 3025 | /* if we're nesting into an existing transaction. It will be |
3017 | ** persistent on its own | 3026 | ** persistent on its own |
3018 | */ | 3027 | */ |
3019 | if (reiserfs_transaction_running(s)) { | 3028 | if (reiserfs_transaction_running(s)) { |
3020 | th = current->journal_info; | 3029 | th = current->journal_info; |
3021 | th->t_refcount++; | 3030 | th->t_refcount++; |
3022 | if (th->t_refcount < 2) { | 3031 | if (th->t_refcount < 2) { |
3023 | BUG(); | 3032 | BUG(); |
3024 | } | 3033 | } |
3025 | return th; | 3034 | return th; |
3026 | } | 3035 | } |
3027 | th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); | 3036 | th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); |
3028 | if (!th) | 3037 | if (!th) |
3029 | return NULL; | 3038 | return NULL; |
3030 | ret = journal_begin(th, s, nblocks); | 3039 | ret = journal_begin(th, s, nblocks); |
3031 | if (ret) { | 3040 | if (ret) { |
3032 | kfree(th); | 3041 | kfree(th); |
3033 | return NULL; | 3042 | return NULL; |
3034 | } | 3043 | } |
3035 | 3044 | ||
3036 | SB_JOURNAL(s)->j_persistent_trans++; | 3045 | SB_JOURNAL(s)->j_persistent_trans++; |
3037 | return th; | 3046 | return th; |
3038 | } | 3047 | } |
3039 | 3048 | ||
3040 | int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) | 3049 | int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) |
3041 | { | 3050 | { |
3042 | struct super_block *s = th->t_super; | 3051 | struct super_block *s = th->t_super; |
3043 | int ret = 0; | 3052 | int ret = 0; |
3044 | if (th->t_trans_id) | 3053 | if (th->t_trans_id) |
3045 | ret = journal_end(th, th->t_super, th->t_blocks_allocated); | 3054 | ret = journal_end(th, th->t_super, th->t_blocks_allocated); |
3046 | else | 3055 | else |
3047 | ret = -EIO; | 3056 | ret = -EIO; |
3048 | if (th->t_refcount == 0) { | 3057 | if (th->t_refcount == 0) { |
3049 | SB_JOURNAL(s)->j_persistent_trans--; | 3058 | SB_JOURNAL(s)->j_persistent_trans--; |
3050 | kfree(th); | 3059 | kfree(th); |
3051 | } | 3060 | } |
3052 | return ret; | 3061 | return ret; |
3053 | } | 3062 | } |
3054 | 3063 | ||
3055 | static int journal_join(struct reiserfs_transaction_handle *th, | 3064 | static int journal_join(struct reiserfs_transaction_handle *th, |
3056 | struct super_block *p_s_sb, unsigned long nblocks) | 3065 | struct super_block *p_s_sb, unsigned long nblocks) |
3057 | { | 3066 | { |
3058 | struct reiserfs_transaction_handle *cur_th = current->journal_info; | 3067 | struct reiserfs_transaction_handle *cur_th = current->journal_info; |
3059 | 3068 | ||
3060 | /* this keeps do_journal_end from NULLing out the current->journal_info | 3069 | /* this keeps do_journal_end from NULLing out the current->journal_info |
3061 | ** pointer | 3070 | ** pointer |
3062 | */ | 3071 | */ |
3063 | th->t_handle_save = cur_th; | 3072 | th->t_handle_save = cur_th; |
3064 | if (cur_th && cur_th->t_refcount > 1) { | 3073 | if (cur_th && cur_th->t_refcount > 1) { |
3065 | BUG(); | 3074 | BUG(); |
3066 | } | 3075 | } |
3067 | return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN); | 3076 | return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN); |
3068 | } | 3077 | } |
3069 | 3078 | ||
3070 | int journal_join_abort(struct reiserfs_transaction_handle *th, | 3079 | int journal_join_abort(struct reiserfs_transaction_handle *th, |
3071 | struct super_block *p_s_sb, unsigned long nblocks) | 3080 | struct super_block *p_s_sb, unsigned long nblocks) |
3072 | { | 3081 | { |
3073 | struct reiserfs_transaction_handle *cur_th = current->journal_info; | 3082 | struct reiserfs_transaction_handle *cur_th = current->journal_info; |
3074 | 3083 | ||
3075 | /* this keeps do_journal_end from NULLing out the current->journal_info | 3084 | /* this keeps do_journal_end from NULLing out the current->journal_info |
3076 | ** pointer | 3085 | ** pointer |
3077 | */ | 3086 | */ |
3078 | th->t_handle_save = cur_th; | 3087 | th->t_handle_save = cur_th; |
3079 | if (cur_th && cur_th->t_refcount > 1) { | 3088 | if (cur_th && cur_th->t_refcount > 1) { |
3080 | BUG(); | 3089 | BUG(); |
3081 | } | 3090 | } |
3082 | return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT); | 3091 | return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT); |
3083 | } | 3092 | } |
3084 | 3093 | ||
3085 | int journal_begin(struct reiserfs_transaction_handle *th, | 3094 | int journal_begin(struct reiserfs_transaction_handle *th, |
3086 | struct super_block *p_s_sb, unsigned long nblocks) | 3095 | struct super_block *p_s_sb, unsigned long nblocks) |
3087 | { | 3096 | { |
3088 | struct reiserfs_transaction_handle *cur_th = current->journal_info; | 3097 | struct reiserfs_transaction_handle *cur_th = current->journal_info; |
3089 | int ret; | 3098 | int ret; |
3090 | 3099 | ||
3091 | th->t_handle_save = NULL; | 3100 | th->t_handle_save = NULL; |
3092 | if (cur_th) { | 3101 | if (cur_th) { |
3093 | /* we are nesting into the current transaction */ | 3102 | /* we are nesting into the current transaction */ |
3094 | if (cur_th->t_super == p_s_sb) { | 3103 | if (cur_th->t_super == p_s_sb) { |
3095 | BUG_ON(!cur_th->t_refcount); | 3104 | BUG_ON(!cur_th->t_refcount); |
3096 | cur_th->t_refcount++; | 3105 | cur_th->t_refcount++; |
3097 | memcpy(th, cur_th, sizeof(*th)); | 3106 | memcpy(th, cur_th, sizeof(*th)); |
3098 | if (th->t_refcount <= 1) | 3107 | if (th->t_refcount <= 1) |
3099 | reiserfs_warning(p_s_sb, | 3108 | reiserfs_warning(p_s_sb, |
3100 | "BAD: refcount <= 1, but journal_info != 0"); | 3109 | "BAD: refcount <= 1, but journal_info != 0"); |
3101 | return 0; | 3110 | return 0; |
3102 | } else { | 3111 | } else { |
3103 | /* we've ended up with a handle from a different filesystem. | 3112 | /* we've ended up with a handle from a different filesystem. |
3104 | ** save it and restore on journal_end. This should never | 3113 | ** save it and restore on journal_end. This should never |
3105 | ** really happen... | 3114 | ** really happen... |
3106 | */ | 3115 | */ |
3107 | reiserfs_warning(p_s_sb, | 3116 | reiserfs_warning(p_s_sb, |
3108 | "clm-2100: nesting info a different FS"); | 3117 | "clm-2100: nesting info a different FS"); |
3109 | th->t_handle_save = current->journal_info; | 3118 | th->t_handle_save = current->journal_info; |
3110 | current->journal_info = th; | 3119 | current->journal_info = th; |
3111 | } | 3120 | } |
3112 | } else { | 3121 | } else { |
3113 | current->journal_info = th; | 3122 | current->journal_info = th; |
3114 | } | 3123 | } |
3115 | ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG); | 3124 | ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG); |
3116 | if (current->journal_info != th) | 3125 | if (current->journal_info != th) |
3117 | BUG(); | 3126 | BUG(); |
3118 | 3127 | ||
3119 | /* I guess this boils down to being the reciprocal of clm-2100 above. | 3128 | /* I guess this boils down to being the reciprocal of clm-2100 above. |
3120 | * If do_journal_begin_r fails, we need to put it back, since journal_end | 3129 | * If do_journal_begin_r fails, we need to put it back, since journal_end |
3121 | * won't be called to do it. */ | 3130 | * won't be called to do it. */ |
3122 | if (ret) | 3131 | if (ret) |
3123 | current->journal_info = th->t_handle_save; | 3132 | current->journal_info = th->t_handle_save; |
3124 | else | 3133 | else |
3125 | BUG_ON(!th->t_refcount); | 3134 | BUG_ON(!th->t_refcount); |
3126 | 3135 | ||
3127 | return ret; | 3136 | return ret; |
3128 | } | 3137 | } |
3129 | 3138 | ||
3130 | /* | 3139 | /* |
3131 | ** puts bh into the current transaction. If it was already there, reorders removes the | 3140 | ** puts bh into the current transaction. If it was already there, reorders removes the |
3132 | ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order). | 3141 | ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order). |
3133 | ** | 3142 | ** |
3134 | ** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the | 3143 | ** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the |
3135 | ** transaction is committed. | 3144 | ** transaction is committed. |
3136 | ** | 3145 | ** |
3137 | ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. | 3146 | ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. |
3138 | */ | 3147 | */ |
3139 | int journal_mark_dirty(struct reiserfs_transaction_handle *th, | 3148 | int journal_mark_dirty(struct reiserfs_transaction_handle *th, |
3140 | struct super_block *p_s_sb, struct buffer_head *bh) | 3149 | struct super_block *p_s_sb, struct buffer_head *bh) |
3141 | { | 3150 | { |
3142 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 3151 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
3143 | struct reiserfs_journal_cnode *cn = NULL; | 3152 | struct reiserfs_journal_cnode *cn = NULL; |
3144 | int count_already_incd = 0; | 3153 | int count_already_incd = 0; |
3145 | int prepared = 0; | 3154 | int prepared = 0; |
3146 | BUG_ON(!th->t_trans_id); | 3155 | BUG_ON(!th->t_trans_id); |
3147 | 3156 | ||
3148 | PROC_INFO_INC(p_s_sb, journal.mark_dirty); | 3157 | PROC_INFO_INC(p_s_sb, journal.mark_dirty); |
3149 | if (th->t_trans_id != journal->j_trans_id) { | 3158 | if (th->t_trans_id != journal->j_trans_id) { |
3150 | reiserfs_panic(th->t_super, | 3159 | reiserfs_panic(th->t_super, |
3151 | "journal-1577: handle trans id %ld != current trans id %ld\n", | 3160 | "journal-1577: handle trans id %ld != current trans id %ld\n", |
3152 | th->t_trans_id, journal->j_trans_id); | 3161 | th->t_trans_id, journal->j_trans_id); |
3153 | } | 3162 | } |
3154 | 3163 | ||
3155 | p_s_sb->s_dirt = 1; | 3164 | p_s_sb->s_dirt = 1; |
3156 | 3165 | ||
3157 | prepared = test_clear_buffer_journal_prepared(bh); | 3166 | prepared = test_clear_buffer_journal_prepared(bh); |
3158 | clear_buffer_journal_restore_dirty(bh); | 3167 | clear_buffer_journal_restore_dirty(bh); |
3159 | /* already in this transaction, we are done */ | 3168 | /* already in this transaction, we are done */ |
3160 | if (buffer_journaled(bh)) { | 3169 | if (buffer_journaled(bh)) { |
3161 | PROC_INFO_INC(p_s_sb, journal.mark_dirty_already); | 3170 | PROC_INFO_INC(p_s_sb, journal.mark_dirty_already); |
3162 | return 0; | 3171 | return 0; |
3163 | } | 3172 | } |
3164 | 3173 | ||
3165 | /* this must be turned into a panic instead of a warning. We can't allow | 3174 | /* this must be turned into a panic instead of a warning. We can't allow |
3166 | ** a dirty or journal_dirty or locked buffer to be logged, as some changes | 3175 | ** a dirty or journal_dirty or locked buffer to be logged, as some changes |
3167 | ** could get to disk too early. NOT GOOD. | 3176 | ** could get to disk too early. NOT GOOD. |
3168 | */ | 3177 | */ |
3169 | if (!prepared || buffer_dirty(bh)) { | 3178 | if (!prepared || buffer_dirty(bh)) { |
3170 | reiserfs_warning(p_s_sb, "journal-1777: buffer %llu bad state " | 3179 | reiserfs_warning(p_s_sb, "journal-1777: buffer %llu bad state " |
3171 | "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", | 3180 | "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", |
3172 | (unsigned long long)bh->b_blocknr, | 3181 | (unsigned long long)bh->b_blocknr, |
3173 | prepared ? ' ' : '!', | 3182 | prepared ? ' ' : '!', |
3174 | buffer_locked(bh) ? ' ' : '!', | 3183 | buffer_locked(bh) ? ' ' : '!', |
3175 | buffer_dirty(bh) ? ' ' : '!', | 3184 | buffer_dirty(bh) ? ' ' : '!', |
3176 | buffer_journal_dirty(bh) ? ' ' : '!'); | 3185 | buffer_journal_dirty(bh) ? ' ' : '!'); |
3177 | } | 3186 | } |
3178 | 3187 | ||
3179 | if (atomic_read(&(journal->j_wcount)) <= 0) { | 3188 | if (atomic_read(&(journal->j_wcount)) <= 0) { |
3180 | reiserfs_warning(p_s_sb, | 3189 | reiserfs_warning(p_s_sb, |
3181 | "journal-1409: journal_mark_dirty returning because j_wcount was %d", | 3190 | "journal-1409: journal_mark_dirty returning because j_wcount was %d", |
3182 | atomic_read(&(journal->j_wcount))); | 3191 | atomic_read(&(journal->j_wcount))); |
3183 | return 1; | 3192 | return 1; |
3184 | } | 3193 | } |
3185 | /* this error means I've screwed up, and we've overflowed the transaction. | 3194 | /* this error means I've screwed up, and we've overflowed the transaction. |
3186 | ** Nothing can be done here, except make the FS readonly or panic. | 3195 | ** Nothing can be done here, except make the FS readonly or panic. |
3187 | */ | 3196 | */ |
3188 | if (journal->j_len >= journal->j_trans_max) { | 3197 | if (journal->j_len >= journal->j_trans_max) { |
3189 | reiserfs_panic(th->t_super, | 3198 | reiserfs_panic(th->t_super, |
3190 | "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n", | 3199 | "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n", |
3191 | journal->j_len); | 3200 | journal->j_len); |
3192 | } | 3201 | } |
3193 | 3202 | ||
3194 | if (buffer_journal_dirty(bh)) { | 3203 | if (buffer_journal_dirty(bh)) { |
3195 | count_already_incd = 1; | 3204 | count_already_incd = 1; |
3196 | PROC_INFO_INC(p_s_sb, journal.mark_dirty_notjournal); | 3205 | PROC_INFO_INC(p_s_sb, journal.mark_dirty_notjournal); |
3197 | clear_buffer_journal_dirty(bh); | 3206 | clear_buffer_journal_dirty(bh); |
3198 | } | 3207 | } |
3199 | 3208 | ||
3200 | if (journal->j_len > journal->j_len_alloc) { | 3209 | if (journal->j_len > journal->j_len_alloc) { |
3201 | journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT; | 3210 | journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT; |
3202 | } | 3211 | } |
3203 | 3212 | ||
3204 | set_buffer_journaled(bh); | 3213 | set_buffer_journaled(bh); |
3205 | 3214 | ||
3206 | /* now put this guy on the end */ | 3215 | /* now put this guy on the end */ |
3207 | if (!cn) { | 3216 | if (!cn) { |
3208 | cn = get_cnode(p_s_sb); | 3217 | cn = get_cnode(p_s_sb); |
3209 | if (!cn) { | 3218 | if (!cn) { |
3210 | reiserfs_panic(p_s_sb, "get_cnode failed!\n"); | 3219 | reiserfs_panic(p_s_sb, "get_cnode failed!\n"); |
3211 | } | 3220 | } |
3212 | 3221 | ||
3213 | if (th->t_blocks_logged == th->t_blocks_allocated) { | 3222 | if (th->t_blocks_logged == th->t_blocks_allocated) { |
3214 | th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT; | 3223 | th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT; |
3215 | journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT; | 3224 | journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT; |
3216 | } | 3225 | } |
3217 | th->t_blocks_logged++; | 3226 | th->t_blocks_logged++; |
3218 | journal->j_len++; | 3227 | journal->j_len++; |
3219 | 3228 | ||
3220 | cn->bh = bh; | 3229 | cn->bh = bh; |
3221 | cn->blocknr = bh->b_blocknr; | 3230 | cn->blocknr = bh->b_blocknr; |
3222 | cn->sb = p_s_sb; | 3231 | cn->sb = p_s_sb; |
3223 | cn->jlist = NULL; | 3232 | cn->jlist = NULL; |
3224 | insert_journal_hash(journal->j_hash_table, cn); | 3233 | insert_journal_hash(journal->j_hash_table, cn); |
3225 | if (!count_already_incd) { | 3234 | if (!count_already_incd) { |
3226 | get_bh(bh); | 3235 | get_bh(bh); |
3227 | } | 3236 | } |
3228 | } | 3237 | } |
3229 | cn->next = NULL; | 3238 | cn->next = NULL; |
3230 | cn->prev = journal->j_last; | 3239 | cn->prev = journal->j_last; |
3231 | cn->bh = bh; | 3240 | cn->bh = bh; |
3232 | if (journal->j_last) { | 3241 | if (journal->j_last) { |
3233 | journal->j_last->next = cn; | 3242 | journal->j_last->next = cn; |
3234 | journal->j_last = cn; | 3243 | journal->j_last = cn; |
3235 | } else { | 3244 | } else { |
3236 | journal->j_first = cn; | 3245 | journal->j_first = cn; |
3237 | journal->j_last = cn; | 3246 | journal->j_last = cn; |
3238 | } | 3247 | } |
3239 | return 0; | 3248 | return 0; |
3240 | } | 3249 | } |
3241 | 3250 | ||
3242 | int journal_end(struct reiserfs_transaction_handle *th, | 3251 | int journal_end(struct reiserfs_transaction_handle *th, |
3243 | struct super_block *p_s_sb, unsigned long nblocks) | 3252 | struct super_block *p_s_sb, unsigned long nblocks) |
3244 | { | 3253 | { |
3245 | if (!current->journal_info && th->t_refcount > 1) | 3254 | if (!current->journal_info && th->t_refcount > 1) |
3246 | reiserfs_warning(p_s_sb, "REISER-NESTING: th NULL, refcount %d", | 3255 | reiserfs_warning(p_s_sb, "REISER-NESTING: th NULL, refcount %d", |
3247 | th->t_refcount); | 3256 | th->t_refcount); |
3248 | 3257 | ||
3249 | if (!th->t_trans_id) { | 3258 | if (!th->t_trans_id) { |
3250 | WARN_ON(1); | 3259 | WARN_ON(1); |
3251 | return -EIO; | 3260 | return -EIO; |
3252 | } | 3261 | } |
3253 | 3262 | ||
3254 | th->t_refcount--; | 3263 | th->t_refcount--; |
3255 | if (th->t_refcount > 0) { | 3264 | if (th->t_refcount > 0) { |
3256 | struct reiserfs_transaction_handle *cur_th = | 3265 | struct reiserfs_transaction_handle *cur_th = |
3257 | current->journal_info; | 3266 | current->journal_info; |
3258 | 3267 | ||
3259 | /* we aren't allowed to close a nested transaction on a different | 3268 | /* we aren't allowed to close a nested transaction on a different |
3260 | ** filesystem from the one in the task struct | 3269 | ** filesystem from the one in the task struct |
3261 | */ | 3270 | */ |
3262 | if (cur_th->t_super != th->t_super) | 3271 | if (cur_th->t_super != th->t_super) |
3263 | BUG(); | 3272 | BUG(); |
3264 | 3273 | ||
3265 | if (th != cur_th) { | 3274 | if (th != cur_th) { |
3266 | memcpy(current->journal_info, th, sizeof(*th)); | 3275 | memcpy(current->journal_info, th, sizeof(*th)); |
3267 | th->t_trans_id = 0; | 3276 | th->t_trans_id = 0; |
3268 | } | 3277 | } |
3269 | return 0; | 3278 | return 0; |
3270 | } else { | 3279 | } else { |
3271 | return do_journal_end(th, p_s_sb, nblocks, 0); | 3280 | return do_journal_end(th, p_s_sb, nblocks, 0); |
3272 | } | 3281 | } |
3273 | } | 3282 | } |
3274 | 3283 | ||
3275 | /* removes from the current transaction, relsing and descrementing any counters. | 3284 | /* removes from the current transaction, relsing and descrementing any counters. |
3276 | ** also files the removed buffer directly onto the clean list | 3285 | ** also files the removed buffer directly onto the clean list |
3277 | ** | 3286 | ** |
3278 | ** called by journal_mark_freed when a block has been deleted | 3287 | ** called by journal_mark_freed when a block has been deleted |
3279 | ** | 3288 | ** |
3280 | ** returns 1 if it cleaned and relsed the buffer. 0 otherwise | 3289 | ** returns 1 if it cleaned and relsed the buffer. 0 otherwise |
3281 | */ | 3290 | */ |
3282 | static int remove_from_transaction(struct super_block *p_s_sb, | 3291 | static int remove_from_transaction(struct super_block *p_s_sb, |
3283 | b_blocknr_t blocknr, int already_cleaned) | 3292 | b_blocknr_t blocknr, int already_cleaned) |
3284 | { | 3293 | { |
3285 | struct buffer_head *bh; | 3294 | struct buffer_head *bh; |
3286 | struct reiserfs_journal_cnode *cn; | 3295 | struct reiserfs_journal_cnode *cn; |
3287 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 3296 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
3288 | int ret = 0; | 3297 | int ret = 0; |
3289 | 3298 | ||
3290 | cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); | 3299 | cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); |
3291 | if (!cn || !cn->bh) { | 3300 | if (!cn || !cn->bh) { |
3292 | return ret; | 3301 | return ret; |
3293 | } | 3302 | } |
3294 | bh = cn->bh; | 3303 | bh = cn->bh; |
3295 | if (cn->prev) { | 3304 | if (cn->prev) { |
3296 | cn->prev->next = cn->next; | 3305 | cn->prev->next = cn->next; |
3297 | } | 3306 | } |
3298 | if (cn->next) { | 3307 | if (cn->next) { |
3299 | cn->next->prev = cn->prev; | 3308 | cn->next->prev = cn->prev; |
3300 | } | 3309 | } |
3301 | if (cn == journal->j_first) { | 3310 | if (cn == journal->j_first) { |
3302 | journal->j_first = cn->next; | 3311 | journal->j_first = cn->next; |
3303 | } | 3312 | } |
3304 | if (cn == journal->j_last) { | 3313 | if (cn == journal->j_last) { |
3305 | journal->j_last = cn->prev; | 3314 | journal->j_last = cn->prev; |
3306 | } | 3315 | } |
3307 | if (bh) | 3316 | if (bh) |
3308 | remove_journal_hash(p_s_sb, journal->j_hash_table, NULL, | 3317 | remove_journal_hash(p_s_sb, journal->j_hash_table, NULL, |
3309 | bh->b_blocknr, 0); | 3318 | bh->b_blocknr, 0); |
3310 | clear_buffer_journaled(bh); /* don't log this one */ | 3319 | clear_buffer_journaled(bh); /* don't log this one */ |
3311 | 3320 | ||
3312 | if (!already_cleaned) { | 3321 | if (!already_cleaned) { |
3313 | clear_buffer_journal_dirty(bh); | 3322 | clear_buffer_journal_dirty(bh); |
3314 | clear_buffer_dirty(bh); | 3323 | clear_buffer_dirty(bh); |
3315 | clear_buffer_journal_test(bh); | 3324 | clear_buffer_journal_test(bh); |
3316 | put_bh(bh); | 3325 | put_bh(bh); |
3317 | if (atomic_read(&(bh->b_count)) < 0) { | 3326 | if (atomic_read(&(bh->b_count)) < 0) { |
3318 | reiserfs_warning(p_s_sb, | 3327 | reiserfs_warning(p_s_sb, |
3319 | "journal-1752: remove from trans, b_count < 0"); | 3328 | "journal-1752: remove from trans, b_count < 0"); |
3320 | } | 3329 | } |
3321 | ret = 1; | 3330 | ret = 1; |
3322 | } | 3331 | } |
3323 | journal->j_len--; | 3332 | journal->j_len--; |
3324 | journal->j_len_alloc--; | 3333 | journal->j_len_alloc--; |
3325 | free_cnode(p_s_sb, cn); | 3334 | free_cnode(p_s_sb, cn); |
3326 | return ret; | 3335 | return ret; |
3327 | } | 3336 | } |
3328 | 3337 | ||
3329 | /* | 3338 | /* |
3330 | ** for any cnode in a journal list, it can only be dirtied of all the | 3339 | ** for any cnode in a journal list, it can only be dirtied of all the |
3331 | ** transactions that include it are commited to disk. | 3340 | ** transactions that include it are commited to disk. |
3332 | ** this checks through each transaction, and returns 1 if you are allowed to dirty, | 3341 | ** this checks through each transaction, and returns 1 if you are allowed to dirty, |
3333 | ** and 0 if you aren't | 3342 | ** and 0 if you aren't |
3334 | ** | 3343 | ** |
3335 | ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log | 3344 | ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log |
3336 | ** blocks for a given transaction on disk | 3345 | ** blocks for a given transaction on disk |
3337 | ** | 3346 | ** |
3338 | */ | 3347 | */ |
3339 | static int can_dirty(struct reiserfs_journal_cnode *cn) | 3348 | static int can_dirty(struct reiserfs_journal_cnode *cn) |
3340 | { | 3349 | { |
3341 | struct super_block *sb = cn->sb; | 3350 | struct super_block *sb = cn->sb; |
3342 | b_blocknr_t blocknr = cn->blocknr; | 3351 | b_blocknr_t blocknr = cn->blocknr; |
3343 | struct reiserfs_journal_cnode *cur = cn->hprev; | 3352 | struct reiserfs_journal_cnode *cur = cn->hprev; |
3344 | int can_dirty = 1; | 3353 | int can_dirty = 1; |
3345 | 3354 | ||
3346 | /* first test hprev. These are all newer than cn, so any node here | 3355 | /* first test hprev. These are all newer than cn, so any node here |
3347 | ** with the same block number and dev means this node can't be sent | 3356 | ** with the same block number and dev means this node can't be sent |
3348 | ** to disk right now. | 3357 | ** to disk right now. |
3349 | */ | 3358 | */ |
3350 | while (cur && can_dirty) { | 3359 | while (cur && can_dirty) { |
3351 | if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && | 3360 | if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && |
3352 | cur->blocknr == blocknr) { | 3361 | cur->blocknr == blocknr) { |
3353 | can_dirty = 0; | 3362 | can_dirty = 0; |
3354 | } | 3363 | } |
3355 | cur = cur->hprev; | 3364 | cur = cur->hprev; |
3356 | } | 3365 | } |
3357 | /* then test hnext. These are all older than cn. As long as they | 3366 | /* then test hnext. These are all older than cn. As long as they |
3358 | ** are committed to the log, it is safe to write cn to disk | 3367 | ** are committed to the log, it is safe to write cn to disk |
3359 | */ | 3368 | */ |
3360 | cur = cn->hnext; | 3369 | cur = cn->hnext; |
3361 | while (cur && can_dirty) { | 3370 | while (cur && can_dirty) { |
3362 | if (cur->jlist && cur->jlist->j_len > 0 && | 3371 | if (cur->jlist && cur->jlist->j_len > 0 && |
3363 | atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && | 3372 | atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && |
3364 | cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { | 3373 | cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { |
3365 | can_dirty = 0; | 3374 | can_dirty = 0; |
3366 | } | 3375 | } |
3367 | cur = cur->hnext; | 3376 | cur = cur->hnext; |
3368 | } | 3377 | } |
3369 | return can_dirty; | 3378 | return can_dirty; |
3370 | } | 3379 | } |
3371 | 3380 | ||
3372 | /* syncs the commit blocks, but does not force the real buffers to disk | 3381 | /* syncs the commit blocks, but does not force the real buffers to disk |
3373 | ** will wait until the current transaction is done/commited before returning | 3382 | ** will wait until the current transaction is done/commited before returning |
3374 | */ | 3383 | */ |
3375 | int journal_end_sync(struct reiserfs_transaction_handle *th, | 3384 | int journal_end_sync(struct reiserfs_transaction_handle *th, |
3376 | struct super_block *p_s_sb, unsigned long nblocks) | 3385 | struct super_block *p_s_sb, unsigned long nblocks) |
3377 | { | 3386 | { |
3378 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 3387 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
3379 | 3388 | ||
3380 | BUG_ON(!th->t_trans_id); | 3389 | BUG_ON(!th->t_trans_id); |
3381 | /* you can sync while nested, very, very bad */ | 3390 | /* you can sync while nested, very, very bad */ |
3382 | if (th->t_refcount > 1) { | 3391 | if (th->t_refcount > 1) { |
3383 | BUG(); | 3392 | BUG(); |
3384 | } | 3393 | } |
3385 | if (journal->j_len == 0) { | 3394 | if (journal->j_len == 0) { |
3386 | reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), | 3395 | reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), |
3387 | 1); | 3396 | 1); |
3388 | journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); | 3397 | journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); |
3389 | } | 3398 | } |
3390 | return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT); | 3399 | return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT); |
3391 | } | 3400 | } |
3392 | 3401 | ||
3393 | /* | 3402 | /* |
3394 | ** writeback the pending async commits to disk | 3403 | ** writeback the pending async commits to disk |
3395 | */ | 3404 | */ |
3396 | static void flush_async_commits(void *p) | 3405 | static void flush_async_commits(void *p) |
3397 | { | 3406 | { |
3398 | struct super_block *p_s_sb = p; | 3407 | struct super_block *p_s_sb = p; |
3399 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 3408 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
3400 | struct reiserfs_journal_list *jl; | 3409 | struct reiserfs_journal_list *jl; |
3401 | struct list_head *entry; | 3410 | struct list_head *entry; |
3402 | 3411 | ||
3403 | lock_kernel(); | 3412 | lock_kernel(); |
3404 | if (!list_empty(&journal->j_journal_list)) { | 3413 | if (!list_empty(&journal->j_journal_list)) { |
3405 | /* last entry is the youngest, commit it and you get everything */ | 3414 | /* last entry is the youngest, commit it and you get everything */ |
3406 | entry = journal->j_journal_list.prev; | 3415 | entry = journal->j_journal_list.prev; |
3407 | jl = JOURNAL_LIST_ENTRY(entry); | 3416 | jl = JOURNAL_LIST_ENTRY(entry); |
3408 | flush_commit_list(p_s_sb, jl, 1); | 3417 | flush_commit_list(p_s_sb, jl, 1); |
3409 | } | 3418 | } |
3410 | unlock_kernel(); | 3419 | unlock_kernel(); |
3411 | /* | 3420 | /* |
3412 | * this is a little racey, but there's no harm in missing | 3421 | * this is a little racey, but there's no harm in missing |
3413 | * the filemap_fdata_write | 3422 | * the filemap_fdata_write |
3414 | */ | 3423 | */ |
3415 | if (!atomic_read(&journal->j_async_throttle) | 3424 | if (!atomic_read(&journal->j_async_throttle) |
3416 | && !reiserfs_is_journal_aborted(journal)) { | 3425 | && !reiserfs_is_journal_aborted(journal)) { |
3417 | atomic_inc(&journal->j_async_throttle); | 3426 | atomic_inc(&journal->j_async_throttle); |
3418 | filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping); | 3427 | filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping); |
3419 | atomic_dec(&journal->j_async_throttle); | 3428 | atomic_dec(&journal->j_async_throttle); |
3420 | } | 3429 | } |
3421 | } | 3430 | } |
3422 | 3431 | ||
3423 | /* | 3432 | /* |
3424 | ** flushes any old transactions to disk | 3433 | ** flushes any old transactions to disk |
3425 | ** ends the current transaction if it is too old | 3434 | ** ends the current transaction if it is too old |
3426 | */ | 3435 | */ |
3427 | int reiserfs_flush_old_commits(struct super_block *p_s_sb) | 3436 | int reiserfs_flush_old_commits(struct super_block *p_s_sb) |
3428 | { | 3437 | { |
3429 | time_t now; | 3438 | time_t now; |
3430 | struct reiserfs_transaction_handle th; | 3439 | struct reiserfs_transaction_handle th; |
3431 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 3440 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
3432 | 3441 | ||
3433 | now = get_seconds(); | 3442 | now = get_seconds(); |
3434 | /* safety check so we don't flush while we are replaying the log during | 3443 | /* safety check so we don't flush while we are replaying the log during |
3435 | * mount | 3444 | * mount |
3436 | */ | 3445 | */ |
3437 | if (list_empty(&journal->j_journal_list)) { | 3446 | if (list_empty(&journal->j_journal_list)) { |
3438 | return 0; | 3447 | return 0; |
3439 | } | 3448 | } |
3440 | 3449 | ||
3441 | /* check the current transaction. If there are no writers, and it is | 3450 | /* check the current transaction. If there are no writers, and it is |
3442 | * too old, finish it, and force the commit blocks to disk | 3451 | * too old, finish it, and force the commit blocks to disk |
3443 | */ | 3452 | */ |
3444 | if (atomic_read(&journal->j_wcount) <= 0 && | 3453 | if (atomic_read(&journal->j_wcount) <= 0 && |
3445 | journal->j_trans_start_time > 0 && | 3454 | journal->j_trans_start_time > 0 && |
3446 | journal->j_len > 0 && | 3455 | journal->j_len > 0 && |
3447 | (now - journal->j_trans_start_time) > journal->j_max_trans_age) { | 3456 | (now - journal->j_trans_start_time) > journal->j_max_trans_age) { |
3448 | if (!journal_join(&th, p_s_sb, 1)) { | 3457 | if (!journal_join(&th, p_s_sb, 1)) { |
3449 | reiserfs_prepare_for_journal(p_s_sb, | 3458 | reiserfs_prepare_for_journal(p_s_sb, |
3450 | SB_BUFFER_WITH_SB(p_s_sb), | 3459 | SB_BUFFER_WITH_SB(p_s_sb), |
3451 | 1); | 3460 | 1); |
3452 | journal_mark_dirty(&th, p_s_sb, | 3461 | journal_mark_dirty(&th, p_s_sb, |
3453 | SB_BUFFER_WITH_SB(p_s_sb)); | 3462 | SB_BUFFER_WITH_SB(p_s_sb)); |
3454 | 3463 | ||
3455 | /* we're only being called from kreiserfsd, it makes no sense to do | 3464 | /* we're only being called from kreiserfsd, it makes no sense to do |
3456 | ** an async commit so that kreiserfsd can do it later | 3465 | ** an async commit so that kreiserfsd can do it later |
3457 | */ | 3466 | */ |
3458 | do_journal_end(&th, p_s_sb, 1, COMMIT_NOW | WAIT); | 3467 | do_journal_end(&th, p_s_sb, 1, COMMIT_NOW | WAIT); |
3459 | } | 3468 | } |
3460 | } | 3469 | } |
3461 | return p_s_sb->s_dirt; | 3470 | return p_s_sb->s_dirt; |
3462 | } | 3471 | } |
3463 | 3472 | ||
3464 | /* | 3473 | /* |
3465 | ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit | 3474 | ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit |
3466 | ** | 3475 | ** |
3467 | ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all | 3476 | ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all |
3468 | ** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just | 3477 | ** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just |
3469 | ** flushes the commit list and returns 0. | 3478 | ** flushes the commit list and returns 0. |
3470 | ** | 3479 | ** |
3471 | ** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. | 3480 | ** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. |
3472 | ** | 3481 | ** |
3473 | ** Note, we can't allow the journal_end to proceed while there are still writers in the log. | 3482 | ** Note, we can't allow the journal_end to proceed while there are still writers in the log. |
3474 | */ | 3483 | */ |
3475 | static int check_journal_end(struct reiserfs_transaction_handle *th, | 3484 | static int check_journal_end(struct reiserfs_transaction_handle *th, |
3476 | struct super_block *p_s_sb, unsigned long nblocks, | 3485 | struct super_block *p_s_sb, unsigned long nblocks, |
3477 | int flags) | 3486 | int flags) |
3478 | { | 3487 | { |
3479 | 3488 | ||
3480 | time_t now; | 3489 | time_t now; |
3481 | int flush = flags & FLUSH_ALL; | 3490 | int flush = flags & FLUSH_ALL; |
3482 | int commit_now = flags & COMMIT_NOW; | 3491 | int commit_now = flags & COMMIT_NOW; |
3483 | int wait_on_commit = flags & WAIT; | 3492 | int wait_on_commit = flags & WAIT; |
3484 | struct reiserfs_journal_list *jl; | 3493 | struct reiserfs_journal_list *jl; |
3485 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 3494 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
3486 | 3495 | ||
3487 | BUG_ON(!th->t_trans_id); | 3496 | BUG_ON(!th->t_trans_id); |
3488 | 3497 | ||
3489 | if (th->t_trans_id != journal->j_trans_id) { | 3498 | if (th->t_trans_id != journal->j_trans_id) { |
3490 | reiserfs_panic(th->t_super, | 3499 | reiserfs_panic(th->t_super, |
3491 | "journal-1577: handle trans id %ld != current trans id %ld\n", | 3500 | "journal-1577: handle trans id %ld != current trans id %ld\n", |
3492 | th->t_trans_id, journal->j_trans_id); | 3501 | th->t_trans_id, journal->j_trans_id); |
3493 | } | 3502 | } |
3494 | 3503 | ||
3495 | journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); | 3504 | journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); |
3496 | if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ | 3505 | if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ |
3497 | atomic_dec(&(journal->j_wcount)); | 3506 | atomic_dec(&(journal->j_wcount)); |
3498 | } | 3507 | } |
3499 | 3508 | ||
3500 | /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released | 3509 | /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released |
3501 | ** will be dealt with by next transaction that actually writes something, but should be taken | 3510 | ** will be dealt with by next transaction that actually writes something, but should be taken |
3502 | ** care of in this trans | 3511 | ** care of in this trans |
3503 | */ | 3512 | */ |
3504 | if (journal->j_len == 0) { | 3513 | if (journal->j_len == 0) { |
3505 | BUG(); | 3514 | BUG(); |
3506 | } | 3515 | } |
3507 | /* if wcount > 0, and we are called to with flush or commit_now, | 3516 | /* if wcount > 0, and we are called to with flush or commit_now, |
3508 | ** we wait on j_join_wait. We will wake up when the last writer has | 3517 | ** we wait on j_join_wait. We will wake up when the last writer has |
3509 | ** finished the transaction, and started it on its way to the disk. | 3518 | ** finished the transaction, and started it on its way to the disk. |
3510 | ** Then, we flush the commit or journal list, and just return 0 | 3519 | ** Then, we flush the commit or journal list, and just return 0 |
3511 | ** because the rest of journal end was already done for this transaction. | 3520 | ** because the rest of journal end was already done for this transaction. |
3512 | */ | 3521 | */ |
3513 | if (atomic_read(&(journal->j_wcount)) > 0) { | 3522 | if (atomic_read(&(journal->j_wcount)) > 0) { |
3514 | if (flush || commit_now) { | 3523 | if (flush || commit_now) { |
3515 | unsigned trans_id; | 3524 | unsigned trans_id; |
3516 | 3525 | ||
3517 | jl = journal->j_current_jl; | 3526 | jl = journal->j_current_jl; |
3518 | trans_id = jl->j_trans_id; | 3527 | trans_id = jl->j_trans_id; |
3519 | if (wait_on_commit) | 3528 | if (wait_on_commit) |
3520 | jl->j_state |= LIST_COMMIT_PENDING; | 3529 | jl->j_state |= LIST_COMMIT_PENDING; |
3521 | atomic_set(&(journal->j_jlock), 1); | 3530 | atomic_set(&(journal->j_jlock), 1); |
3522 | if (flush) { | 3531 | if (flush) { |
3523 | journal->j_next_full_flush = 1; | 3532 | journal->j_next_full_flush = 1; |
3524 | } | 3533 | } |
3525 | unlock_journal(p_s_sb); | 3534 | unlock_journal(p_s_sb); |
3526 | 3535 | ||
3527 | /* sleep while the current transaction is still j_jlocked */ | 3536 | /* sleep while the current transaction is still j_jlocked */ |
3528 | while (journal->j_trans_id == trans_id) { | 3537 | while (journal->j_trans_id == trans_id) { |
3529 | if (atomic_read(&journal->j_jlock)) { | 3538 | if (atomic_read(&journal->j_jlock)) { |
3530 | queue_log_writer(p_s_sb); | 3539 | queue_log_writer(p_s_sb); |
3531 | } else { | 3540 | } else { |
3532 | lock_journal(p_s_sb); | 3541 | lock_journal(p_s_sb); |
3533 | if (journal->j_trans_id == trans_id) { | 3542 | if (journal->j_trans_id == trans_id) { |
3534 | atomic_set(&(journal->j_jlock), | 3543 | atomic_set(&(journal->j_jlock), |
3535 | 1); | 3544 | 1); |
3536 | } | 3545 | } |
3537 | unlock_journal(p_s_sb); | 3546 | unlock_journal(p_s_sb); |
3538 | } | 3547 | } |
3539 | } | 3548 | } |
3540 | if (journal->j_trans_id == trans_id) { | 3549 | if (journal->j_trans_id == trans_id) { |
3541 | BUG(); | 3550 | BUG(); |
3542 | } | 3551 | } |
3543 | if (commit_now | 3552 | if (commit_now |
3544 | && journal_list_still_alive(p_s_sb, trans_id) | 3553 | && journal_list_still_alive(p_s_sb, trans_id) |
3545 | && wait_on_commit) { | 3554 | && wait_on_commit) { |
3546 | flush_commit_list(p_s_sb, jl, 1); | 3555 | flush_commit_list(p_s_sb, jl, 1); |
3547 | } | 3556 | } |
3548 | return 0; | 3557 | return 0; |
3549 | } | 3558 | } |
3550 | unlock_journal(p_s_sb); | 3559 | unlock_journal(p_s_sb); |
3551 | return 0; | 3560 | return 0; |
3552 | } | 3561 | } |
3553 | 3562 | ||
3554 | /* deal with old transactions where we are the last writers */ | 3563 | /* deal with old transactions where we are the last writers */ |
3555 | now = get_seconds(); | 3564 | now = get_seconds(); |
3556 | if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) { | 3565 | if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) { |
3557 | commit_now = 1; | 3566 | commit_now = 1; |
3558 | journal->j_next_async_flush = 1; | 3567 | journal->j_next_async_flush = 1; |
3559 | } | 3568 | } |
3560 | /* don't batch when someone is waiting on j_join_wait */ | 3569 | /* don't batch when someone is waiting on j_join_wait */ |
3561 | /* don't batch when syncing the commit or flushing the whole trans */ | 3570 | /* don't batch when syncing the commit or flushing the whole trans */ |
3562 | if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) | 3571 | if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) |
3563 | && !flush && !commit_now && (journal->j_len < journal->j_max_batch) | 3572 | && !flush && !commit_now && (journal->j_len < journal->j_max_batch) |
3564 | && journal->j_len_alloc < journal->j_max_batch | 3573 | && journal->j_len_alloc < journal->j_max_batch |
3565 | && journal->j_cnode_free > (journal->j_trans_max * 3)) { | 3574 | && journal->j_cnode_free > (journal->j_trans_max * 3)) { |
3566 | journal->j_bcount++; | 3575 | journal->j_bcount++; |
3567 | unlock_journal(p_s_sb); | 3576 | unlock_journal(p_s_sb); |
3568 | return 0; | 3577 | return 0; |
3569 | } | 3578 | } |
3570 | 3579 | ||
3571 | if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { | 3580 | if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { |
3572 | reiserfs_panic(p_s_sb, | 3581 | reiserfs_panic(p_s_sb, |
3573 | "journal-003: journal_end: j_start (%ld) is too high\n", | 3582 | "journal-003: journal_end: j_start (%ld) is too high\n", |
3574 | journal->j_start); | 3583 | journal->j_start); |
3575 | } | 3584 | } |
3576 | return 1; | 3585 | return 1; |
3577 | } | 3586 | } |
3578 | 3587 | ||
3579 | /* | 3588 | /* |
3580 | ** Does all the work that makes deleting blocks safe. | 3589 | ** Does all the work that makes deleting blocks safe. |
3581 | ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. | 3590 | ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. |
3582 | ** | 3591 | ** |
3583 | ** otherwise: | 3592 | ** otherwise: |
3584 | ** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes | 3593 | ** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes |
3585 | ** before this transaction has finished. | 3594 | ** before this transaction has finished. |
3586 | ** | 3595 | ** |
3587 | ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with | 3596 | ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with |
3588 | ** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash, | 3597 | ** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash, |
3589 | ** the block can't be reallocated yet. | 3598 | ** the block can't be reallocated yet. |
3590 | ** | 3599 | ** |
3591 | ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. | 3600 | ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. |
3592 | */ | 3601 | */ |
3593 | int journal_mark_freed(struct reiserfs_transaction_handle *th, | 3602 | int journal_mark_freed(struct reiserfs_transaction_handle *th, |
3594 | struct super_block *p_s_sb, b_blocknr_t blocknr) | 3603 | struct super_block *p_s_sb, b_blocknr_t blocknr) |
3595 | { | 3604 | { |
3596 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 3605 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
3597 | struct reiserfs_journal_cnode *cn = NULL; | 3606 | struct reiserfs_journal_cnode *cn = NULL; |
3598 | struct buffer_head *bh = NULL; | 3607 | struct buffer_head *bh = NULL; |
3599 | struct reiserfs_list_bitmap *jb = NULL; | 3608 | struct reiserfs_list_bitmap *jb = NULL; |
3600 | int cleaned = 0; | 3609 | int cleaned = 0; |
3601 | BUG_ON(!th->t_trans_id); | 3610 | BUG_ON(!th->t_trans_id); |
3602 | 3611 | ||
3603 | cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); | 3612 | cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); |
3604 | if (cn && cn->bh) { | 3613 | if (cn && cn->bh) { |
3605 | bh = cn->bh; | 3614 | bh = cn->bh; |
3606 | get_bh(bh); | 3615 | get_bh(bh); |
3607 | } | 3616 | } |
3608 | /* if it is journal new, we just remove it from this transaction */ | 3617 | /* if it is journal new, we just remove it from this transaction */ |
3609 | if (bh && buffer_journal_new(bh)) { | 3618 | if (bh && buffer_journal_new(bh)) { |
3610 | clear_buffer_journal_new(bh); | 3619 | clear_buffer_journal_new(bh); |
3611 | clear_prepared_bits(bh); | 3620 | clear_prepared_bits(bh); |
3612 | reiserfs_clean_and_file_buffer(bh); | 3621 | reiserfs_clean_and_file_buffer(bh); |
3613 | cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); | 3622 | cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); |
3614 | } else { | 3623 | } else { |
3615 | /* set the bit for this block in the journal bitmap for this transaction */ | 3624 | /* set the bit for this block in the journal bitmap for this transaction */ |
3616 | jb = journal->j_current_jl->j_list_bitmap; | 3625 | jb = journal->j_current_jl->j_list_bitmap; |
3617 | if (!jb) { | 3626 | if (!jb) { |
3618 | reiserfs_panic(p_s_sb, | 3627 | reiserfs_panic(p_s_sb, |
3619 | "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n"); | 3628 | "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n"); |
3620 | } | 3629 | } |
3621 | set_bit_in_list_bitmap(p_s_sb, blocknr, jb); | 3630 | set_bit_in_list_bitmap(p_s_sb, blocknr, jb); |
3622 | 3631 | ||
3623 | /* Note, the entire while loop is not allowed to schedule. */ | 3632 | /* Note, the entire while loop is not allowed to schedule. */ |
3624 | 3633 | ||
3625 | if (bh) { | 3634 | if (bh) { |
3626 | clear_prepared_bits(bh); | 3635 | clear_prepared_bits(bh); |
3627 | reiserfs_clean_and_file_buffer(bh); | 3636 | reiserfs_clean_and_file_buffer(bh); |
3628 | } | 3637 | } |
3629 | cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); | 3638 | cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); |
3630 | 3639 | ||
3631 | /* find all older transactions with this block, make sure they don't try to write it out */ | 3640 | /* find all older transactions with this block, make sure they don't try to write it out */ |
3632 | cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, | 3641 | cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, |
3633 | blocknr); | 3642 | blocknr); |
3634 | while (cn) { | 3643 | while (cn) { |
3635 | if (p_s_sb == cn->sb && blocknr == cn->blocknr) { | 3644 | if (p_s_sb == cn->sb && blocknr == cn->blocknr) { |
3636 | set_bit(BLOCK_FREED, &cn->state); | 3645 | set_bit(BLOCK_FREED, &cn->state); |
3637 | if (cn->bh) { | 3646 | if (cn->bh) { |
3638 | if (!cleaned) { | 3647 | if (!cleaned) { |
3639 | /* remove_from_transaction will brelse the buffer if it was | 3648 | /* remove_from_transaction will brelse the buffer if it was |
3640 | ** in the current trans | 3649 | ** in the current trans |
3641 | */ | 3650 | */ |
3642 | clear_buffer_journal_dirty(cn-> | 3651 | clear_buffer_journal_dirty(cn-> |
3643 | bh); | 3652 | bh); |
3644 | clear_buffer_dirty(cn->bh); | 3653 | clear_buffer_dirty(cn->bh); |
3645 | clear_buffer_journal_test(cn-> | 3654 | clear_buffer_journal_test(cn-> |
3646 | bh); | 3655 | bh); |
3647 | cleaned = 1; | 3656 | cleaned = 1; |
3648 | put_bh(cn->bh); | 3657 | put_bh(cn->bh); |
3649 | if (atomic_read | 3658 | if (atomic_read |
3650 | (&(cn->bh->b_count)) < 0) { | 3659 | (&(cn->bh->b_count)) < 0) { |
3651 | reiserfs_warning(p_s_sb, | 3660 | reiserfs_warning(p_s_sb, |
3652 | "journal-2138: cn->bh->b_count < 0"); | 3661 | "journal-2138: cn->bh->b_count < 0"); |
3653 | } | 3662 | } |
3654 | } | 3663 | } |
3655 | if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ | 3664 | if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ |
3656 | atomic_dec(& | 3665 | atomic_dec(& |
3657 | (cn->jlist-> | 3666 | (cn->jlist-> |
3658 | j_nonzerolen)); | 3667 | j_nonzerolen)); |
3659 | } | 3668 | } |
3660 | cn->bh = NULL; | 3669 | cn->bh = NULL; |
3661 | } | 3670 | } |
3662 | } | 3671 | } |
3663 | cn = cn->hnext; | 3672 | cn = cn->hnext; |
3664 | } | 3673 | } |
3665 | } | 3674 | } |
3666 | 3675 | ||
3667 | if (bh) { | 3676 | if (bh) { |
3668 | put_bh(bh); /* get_hash grabs the buffer */ | 3677 | put_bh(bh); /* get_hash grabs the buffer */ |
3669 | if (atomic_read(&(bh->b_count)) < 0) { | 3678 | if (atomic_read(&(bh->b_count)) < 0) { |
3670 | reiserfs_warning(p_s_sb, | 3679 | reiserfs_warning(p_s_sb, |
3671 | "journal-2165: bh->b_count < 0"); | 3680 | "journal-2165: bh->b_count < 0"); |
3672 | } | 3681 | } |
3673 | } | 3682 | } |
3674 | return 0; | 3683 | return 0; |
3675 | } | 3684 | } |
3676 | 3685 | ||
3677 | void reiserfs_update_inode_transaction(struct inode *inode) | 3686 | void reiserfs_update_inode_transaction(struct inode *inode) |
3678 | { | 3687 | { |
3679 | struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb); | 3688 | struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb); |
3680 | REISERFS_I(inode)->i_jl = journal->j_current_jl; | 3689 | REISERFS_I(inode)->i_jl = journal->j_current_jl; |
3681 | REISERFS_I(inode)->i_trans_id = journal->j_trans_id; | 3690 | REISERFS_I(inode)->i_trans_id = journal->j_trans_id; |
3682 | } | 3691 | } |
3683 | 3692 | ||
3684 | /* | 3693 | /* |
3685 | * returns -1 on error, 0 if no commits/barriers were done and 1 | 3694 | * returns -1 on error, 0 if no commits/barriers were done and 1 |
3686 | * if a transaction was actually committed and the barrier was done | 3695 | * if a transaction was actually committed and the barrier was done |
3687 | */ | 3696 | */ |
3688 | static int __commit_trans_jl(struct inode *inode, unsigned long id, | 3697 | static int __commit_trans_jl(struct inode *inode, unsigned long id, |
3689 | struct reiserfs_journal_list *jl) | 3698 | struct reiserfs_journal_list *jl) |
3690 | { | 3699 | { |
3691 | struct reiserfs_transaction_handle th; | 3700 | struct reiserfs_transaction_handle th; |
3692 | struct super_block *sb = inode->i_sb; | 3701 | struct super_block *sb = inode->i_sb; |
3693 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 3702 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
3694 | int ret = 0; | 3703 | int ret = 0; |
3695 | 3704 | ||
3696 | /* is it from the current transaction, or from an unknown transaction? */ | 3705 | /* is it from the current transaction, or from an unknown transaction? */ |
3697 | if (id == journal->j_trans_id) { | 3706 | if (id == journal->j_trans_id) { |
3698 | jl = journal->j_current_jl; | 3707 | jl = journal->j_current_jl; |
3699 | /* try to let other writers come in and grow this transaction */ | 3708 | /* try to let other writers come in and grow this transaction */ |
3700 | let_transaction_grow(sb, id); | 3709 | let_transaction_grow(sb, id); |
3701 | if (journal->j_trans_id != id) { | 3710 | if (journal->j_trans_id != id) { |
3702 | goto flush_commit_only; | 3711 | goto flush_commit_only; |
3703 | } | 3712 | } |
3704 | 3713 | ||
3705 | ret = journal_begin(&th, sb, 1); | 3714 | ret = journal_begin(&th, sb, 1); |
3706 | if (ret) | 3715 | if (ret) |
3707 | return ret; | 3716 | return ret; |
3708 | 3717 | ||
3709 | /* someone might have ended this transaction while we joined */ | 3718 | /* someone might have ended this transaction while we joined */ |
3710 | if (journal->j_trans_id != id) { | 3719 | if (journal->j_trans_id != id) { |
3711 | reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), | 3720 | reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), |
3712 | 1); | 3721 | 1); |
3713 | journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)); | 3722 | journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)); |
3714 | ret = journal_end(&th, sb, 1); | 3723 | ret = journal_end(&th, sb, 1); |
3715 | goto flush_commit_only; | 3724 | goto flush_commit_only; |
3716 | } | 3725 | } |
3717 | 3726 | ||
3718 | ret = journal_end_sync(&th, sb, 1); | 3727 | ret = journal_end_sync(&th, sb, 1); |
3719 | if (!ret) | 3728 | if (!ret) |
3720 | ret = 1; | 3729 | ret = 1; |
3721 | 3730 | ||
3722 | } else { | 3731 | } else { |
3723 | /* this gets tricky, we have to make sure the journal list in | 3732 | /* this gets tricky, we have to make sure the journal list in |
3724 | * the inode still exists. We know the list is still around | 3733 | * the inode still exists. We know the list is still around |
3725 | * if we've got a larger transaction id than the oldest list | 3734 | * if we've got a larger transaction id than the oldest list |
3726 | */ | 3735 | */ |
3727 | flush_commit_only: | 3736 | flush_commit_only: |
3728 | if (journal_list_still_alive(inode->i_sb, id)) { | 3737 | if (journal_list_still_alive(inode->i_sb, id)) { |
3729 | /* | 3738 | /* |
3730 | * we only set ret to 1 when we know for sure | 3739 | * we only set ret to 1 when we know for sure |
3731 | * the barrier hasn't been started yet on the commit | 3740 | * the barrier hasn't been started yet on the commit |
3732 | * block. | 3741 | * block. |
3733 | */ | 3742 | */ |
3734 | if (atomic_read(&jl->j_commit_left) > 1) | 3743 | if (atomic_read(&jl->j_commit_left) > 1) |
3735 | ret = 1; | 3744 | ret = 1; |
3736 | flush_commit_list(sb, jl, 1); | 3745 | flush_commit_list(sb, jl, 1); |
3737 | if (journal->j_errno) | 3746 | if (journal->j_errno) |
3738 | ret = journal->j_errno; | 3747 | ret = journal->j_errno; |
3739 | } | 3748 | } |
3740 | } | 3749 | } |
3741 | /* otherwise the list is gone, and long since committed */ | 3750 | /* otherwise the list is gone, and long since committed */ |
3742 | return ret; | 3751 | return ret; |
3743 | } | 3752 | } |
3744 | 3753 | ||
3745 | int reiserfs_commit_for_inode(struct inode *inode) | 3754 | int reiserfs_commit_for_inode(struct inode *inode) |
3746 | { | 3755 | { |
3747 | unsigned long id = REISERFS_I(inode)->i_trans_id; | 3756 | unsigned long id = REISERFS_I(inode)->i_trans_id; |
3748 | struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; | 3757 | struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; |
3749 | 3758 | ||
3750 | /* for the whole inode, assume unset id means it was | 3759 | /* for the whole inode, assume unset id means it was |
3751 | * changed in the current transaction. More conservative | 3760 | * changed in the current transaction. More conservative |
3752 | */ | 3761 | */ |
3753 | if (!id || !jl) { | 3762 | if (!id || !jl) { |
3754 | reiserfs_update_inode_transaction(inode); | 3763 | reiserfs_update_inode_transaction(inode); |
3755 | id = REISERFS_I(inode)->i_trans_id; | 3764 | id = REISERFS_I(inode)->i_trans_id; |
3756 | /* jl will be updated in __commit_trans_jl */ | 3765 | /* jl will be updated in __commit_trans_jl */ |
3757 | } | 3766 | } |
3758 | 3767 | ||
3759 | return __commit_trans_jl(inode, id, jl); | 3768 | return __commit_trans_jl(inode, id, jl); |
3760 | } | 3769 | } |
3761 | 3770 | ||
3762 | void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, | 3771 | void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, |
3763 | struct buffer_head *bh) | 3772 | struct buffer_head *bh) |
3764 | { | 3773 | { |
3765 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 3774 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
3766 | PROC_INFO_INC(p_s_sb, journal.restore_prepared); | 3775 | PROC_INFO_INC(p_s_sb, journal.restore_prepared); |
3767 | if (!bh) { | 3776 | if (!bh) { |
3768 | return; | 3777 | return; |
3769 | } | 3778 | } |
3770 | if (test_clear_buffer_journal_restore_dirty(bh) && | 3779 | if (test_clear_buffer_journal_restore_dirty(bh) && |
3771 | buffer_journal_dirty(bh)) { | 3780 | buffer_journal_dirty(bh)) { |
3772 | struct reiserfs_journal_cnode *cn; | 3781 | struct reiserfs_journal_cnode *cn; |
3773 | cn = get_journal_hash_dev(p_s_sb, | 3782 | cn = get_journal_hash_dev(p_s_sb, |
3774 | journal->j_list_hash_table, | 3783 | journal->j_list_hash_table, |
3775 | bh->b_blocknr); | 3784 | bh->b_blocknr); |
3776 | if (cn && can_dirty(cn)) { | 3785 | if (cn && can_dirty(cn)) { |
3777 | set_buffer_journal_test(bh); | 3786 | set_buffer_journal_test(bh); |
3778 | mark_buffer_dirty(bh); | 3787 | mark_buffer_dirty(bh); |
3779 | } | 3788 | } |
3780 | } | 3789 | } |
3781 | clear_buffer_journal_prepared(bh); | 3790 | clear_buffer_journal_prepared(bh); |
3782 | } | 3791 | } |
3783 | 3792 | ||
3784 | extern struct tree_balance *cur_tb; | 3793 | extern struct tree_balance *cur_tb; |
3785 | /* | 3794 | /* |
3786 | ** before we can change a metadata block, we have to make sure it won't | 3795 | ** before we can change a metadata block, we have to make sure it won't |
3787 | ** be written to disk while we are altering it. So, we must: | 3796 | ** be written to disk while we are altering it. So, we must: |
3788 | ** clean it | 3797 | ** clean it |
3789 | ** wait on it. | 3798 | ** wait on it. |
3790 | ** | 3799 | ** |
3791 | */ | 3800 | */ |
3792 | int reiserfs_prepare_for_journal(struct super_block *p_s_sb, | 3801 | int reiserfs_prepare_for_journal(struct super_block *p_s_sb, |
3793 | struct buffer_head *bh, int wait) | 3802 | struct buffer_head *bh, int wait) |
3794 | { | 3803 | { |
3795 | PROC_INFO_INC(p_s_sb, journal.prepare); | 3804 | PROC_INFO_INC(p_s_sb, journal.prepare); |
3796 | 3805 | ||
3797 | if (test_set_buffer_locked(bh)) { | 3806 | if (test_set_buffer_locked(bh)) { |
3798 | if (!wait) | 3807 | if (!wait) |
3799 | return 0; | 3808 | return 0; |
3800 | lock_buffer(bh); | 3809 | lock_buffer(bh); |
3801 | } | 3810 | } |
3802 | set_buffer_journal_prepared(bh); | 3811 | set_buffer_journal_prepared(bh); |
3803 | if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) { | 3812 | if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) { |
3804 | clear_buffer_journal_test(bh); | 3813 | clear_buffer_journal_test(bh); |
3805 | set_buffer_journal_restore_dirty(bh); | 3814 | set_buffer_journal_restore_dirty(bh); |
3806 | } | 3815 | } |
3807 | unlock_buffer(bh); | 3816 | unlock_buffer(bh); |
3808 | return 1; | 3817 | return 1; |
3809 | } | 3818 | } |
3810 | 3819 | ||
3811 | static void flush_old_journal_lists(struct super_block *s) | 3820 | static void flush_old_journal_lists(struct super_block *s) |
3812 | { | 3821 | { |
3813 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 3822 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
3814 | struct reiserfs_journal_list *jl; | 3823 | struct reiserfs_journal_list *jl; |
3815 | struct list_head *entry; | 3824 | struct list_head *entry; |
3816 | time_t now = get_seconds(); | 3825 | time_t now = get_seconds(); |
3817 | 3826 | ||
3818 | while (!list_empty(&journal->j_journal_list)) { | 3827 | while (!list_empty(&journal->j_journal_list)) { |
3819 | entry = journal->j_journal_list.next; | 3828 | entry = journal->j_journal_list.next; |
3820 | jl = JOURNAL_LIST_ENTRY(entry); | 3829 | jl = JOURNAL_LIST_ENTRY(entry); |
3821 | /* this check should always be run, to send old lists to disk */ | 3830 | /* this check should always be run, to send old lists to disk */ |
3822 | if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) { | 3831 | if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) { |
3823 | flush_used_journal_lists(s, jl); | 3832 | flush_used_journal_lists(s, jl); |
3824 | } else { | 3833 | } else { |
3825 | break; | 3834 | break; |
3826 | } | 3835 | } |
3827 | } | 3836 | } |
3828 | } | 3837 | } |
3829 | 3838 | ||
3830 | /* | 3839 | /* |
3831 | ** long and ugly. If flush, will not return until all commit | 3840 | ** long and ugly. If flush, will not return until all commit |
3832 | ** blocks and all real buffers in the trans are on disk. | 3841 | ** blocks and all real buffers in the trans are on disk. |
3833 | ** If no_async, won't return until all commit blocks are on disk. | 3842 | ** If no_async, won't return until all commit blocks are on disk. |
3834 | ** | 3843 | ** |
3835 | ** keep reading, there are comments as you go along | 3844 | ** keep reading, there are comments as you go along |
3836 | ** | 3845 | ** |
3837 | ** If the journal is aborted, we just clean up. Things like flushing | 3846 | ** If the journal is aborted, we just clean up. Things like flushing |
3838 | ** journal lists, etc just won't happen. | 3847 | ** journal lists, etc just won't happen. |
3839 | */ | 3848 | */ |
3840 | static int do_journal_end(struct reiserfs_transaction_handle *th, | 3849 | static int do_journal_end(struct reiserfs_transaction_handle *th, |
3841 | struct super_block *p_s_sb, unsigned long nblocks, | 3850 | struct super_block *p_s_sb, unsigned long nblocks, |
3842 | int flags) | 3851 | int flags) |
3843 | { | 3852 | { |
3844 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); | 3853 | struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); |
3845 | struct reiserfs_journal_cnode *cn, *next, *jl_cn; | 3854 | struct reiserfs_journal_cnode *cn, *next, *jl_cn; |
3846 | struct reiserfs_journal_cnode *last_cn = NULL; | 3855 | struct reiserfs_journal_cnode *last_cn = NULL; |
3847 | struct reiserfs_journal_desc *desc; | 3856 | struct reiserfs_journal_desc *desc; |
3848 | struct reiserfs_journal_commit *commit; | 3857 | struct reiserfs_journal_commit *commit; |
3849 | struct buffer_head *c_bh; /* commit bh */ | 3858 | struct buffer_head *c_bh; /* commit bh */ |
3850 | struct buffer_head *d_bh; /* desc bh */ | 3859 | struct buffer_head *d_bh; /* desc bh */ |
3851 | int cur_write_start = 0; /* start index of current log write */ | 3860 | int cur_write_start = 0; /* start index of current log write */ |
3852 | int old_start; | 3861 | int old_start; |
3853 | int i; | 3862 | int i; |
3854 | int flush = flags & FLUSH_ALL; | 3863 | int flush = flags & FLUSH_ALL; |
3855 | int wait_on_commit = flags & WAIT; | 3864 | int wait_on_commit = flags & WAIT; |
3856 | struct reiserfs_journal_list *jl, *temp_jl; | 3865 | struct reiserfs_journal_list *jl, *temp_jl; |
3857 | struct list_head *entry, *safe; | 3866 | struct list_head *entry, *safe; |
3858 | unsigned long jindex; | 3867 | unsigned long jindex; |
3859 | unsigned long commit_trans_id; | 3868 | unsigned long commit_trans_id; |
3860 | int trans_half; | 3869 | int trans_half; |
3861 | 3870 | ||
3862 | BUG_ON(th->t_refcount > 1); | 3871 | BUG_ON(th->t_refcount > 1); |
3863 | BUG_ON(!th->t_trans_id); | 3872 | BUG_ON(!th->t_trans_id); |
3864 | 3873 | ||
3865 | put_fs_excl(); | 3874 | put_fs_excl(); |
3866 | current->journal_info = th->t_handle_save; | 3875 | current->journal_info = th->t_handle_save; |
3867 | reiserfs_check_lock_depth(p_s_sb, "journal end"); | 3876 | reiserfs_check_lock_depth(p_s_sb, "journal end"); |
3868 | if (journal->j_len == 0) { | 3877 | if (journal->j_len == 0) { |
3869 | reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), | 3878 | reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), |
3870 | 1); | 3879 | 1); |
3871 | journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); | 3880 | journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); |
3872 | } | 3881 | } |
3873 | 3882 | ||
3874 | lock_journal(p_s_sb); | 3883 | lock_journal(p_s_sb); |
3875 | if (journal->j_next_full_flush) { | 3884 | if (journal->j_next_full_flush) { |
3876 | flags |= FLUSH_ALL; | 3885 | flags |= FLUSH_ALL; |
3877 | flush = 1; | 3886 | flush = 1; |
3878 | } | 3887 | } |
3879 | if (journal->j_next_async_flush) { | 3888 | if (journal->j_next_async_flush) { |
3880 | flags |= COMMIT_NOW | WAIT; | 3889 | flags |= COMMIT_NOW | WAIT; |
3881 | wait_on_commit = 1; | 3890 | wait_on_commit = 1; |
3882 | } | 3891 | } |
3883 | 3892 | ||
3884 | /* check_journal_end locks the journal, and unlocks if it does not return 1 | 3893 | /* check_journal_end locks the journal, and unlocks if it does not return 1 |
3885 | ** it tells us if we should continue with the journal_end, or just return | 3894 | ** it tells us if we should continue with the journal_end, or just return |
3886 | */ | 3895 | */ |
3887 | if (!check_journal_end(th, p_s_sb, nblocks, flags)) { | 3896 | if (!check_journal_end(th, p_s_sb, nblocks, flags)) { |
3888 | p_s_sb->s_dirt = 1; | 3897 | p_s_sb->s_dirt = 1; |
3889 | wake_queued_writers(p_s_sb); | 3898 | wake_queued_writers(p_s_sb); |
3890 | reiserfs_async_progress_wait(p_s_sb); | 3899 | reiserfs_async_progress_wait(p_s_sb); |
3891 | goto out; | 3900 | goto out; |
3892 | } | 3901 | } |
3893 | 3902 | ||
3894 | /* check_journal_end might set these, check again */ | 3903 | /* check_journal_end might set these, check again */ |
3895 | if (journal->j_next_full_flush) { | 3904 | if (journal->j_next_full_flush) { |
3896 | flush = 1; | 3905 | flush = 1; |
3897 | } | 3906 | } |
3898 | 3907 | ||
3899 | /* | 3908 | /* |
3900 | ** j must wait means we have to flush the log blocks, and the real blocks for | 3909 | ** j must wait means we have to flush the log blocks, and the real blocks for |
3901 | ** this transaction | 3910 | ** this transaction |
3902 | */ | 3911 | */ |
3903 | if (journal->j_must_wait > 0) { | 3912 | if (journal->j_must_wait > 0) { |
3904 | flush = 1; | 3913 | flush = 1; |
3905 | } | 3914 | } |
3906 | #ifdef REISERFS_PREALLOCATE | 3915 | #ifdef REISERFS_PREALLOCATE |
3907 | /* quota ops might need to nest, setup the journal_info pointer for them | 3916 | /* quota ops might need to nest, setup the journal_info pointer for them |
3908 | * and raise the refcount so that it is > 0. */ | 3917 | * and raise the refcount so that it is > 0. */ |
3909 | current->journal_info = th; | 3918 | current->journal_info = th; |
3910 | th->t_refcount++; | 3919 | th->t_refcount++; |
3911 | reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into | 3920 | reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into |
3912 | * the transaction */ | 3921 | * the transaction */ |
3913 | th->t_refcount--; | 3922 | th->t_refcount--; |
3914 | current->journal_info = th->t_handle_save; | 3923 | current->journal_info = th->t_handle_save; |
3915 | #endif | 3924 | #endif |
3916 | 3925 | ||
3917 | /* setup description block */ | 3926 | /* setup description block */ |
3918 | d_bh = | 3927 | d_bh = |
3919 | journal_getblk(p_s_sb, | 3928 | journal_getblk(p_s_sb, |
3920 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + | 3929 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + |
3921 | journal->j_start); | 3930 | journal->j_start); |
3922 | set_buffer_uptodate(d_bh); | 3931 | set_buffer_uptodate(d_bh); |
3923 | desc = (struct reiserfs_journal_desc *)(d_bh)->b_data; | 3932 | desc = (struct reiserfs_journal_desc *)(d_bh)->b_data; |
3924 | memset(d_bh->b_data, 0, d_bh->b_size); | 3933 | memset(d_bh->b_data, 0, d_bh->b_size); |
3925 | memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); | 3934 | memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); |
3926 | set_desc_trans_id(desc, journal->j_trans_id); | 3935 | set_desc_trans_id(desc, journal->j_trans_id); |
3927 | 3936 | ||
3928 | /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ | 3937 | /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ |
3929 | c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + | 3938 | c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + |
3930 | ((journal->j_start + journal->j_len + | 3939 | ((journal->j_start + journal->j_len + |
3931 | 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); | 3940 | 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); |
3932 | commit = (struct reiserfs_journal_commit *)c_bh->b_data; | 3941 | commit = (struct reiserfs_journal_commit *)c_bh->b_data; |
3933 | memset(c_bh->b_data, 0, c_bh->b_size); | 3942 | memset(c_bh->b_data, 0, c_bh->b_size); |
3934 | set_commit_trans_id(commit, journal->j_trans_id); | 3943 | set_commit_trans_id(commit, journal->j_trans_id); |
3935 | set_buffer_uptodate(c_bh); | 3944 | set_buffer_uptodate(c_bh); |
3936 | 3945 | ||
3937 | /* init this journal list */ | 3946 | /* init this journal list */ |
3938 | jl = journal->j_current_jl; | 3947 | jl = journal->j_current_jl; |
3939 | 3948 | ||
3940 | /* we lock the commit before doing anything because | 3949 | /* we lock the commit before doing anything because |
3941 | * we want to make sure nobody tries to run flush_commit_list until | 3950 | * we want to make sure nobody tries to run flush_commit_list until |
3942 | * the new transaction is fully setup, and we've already flushed the | 3951 | * the new transaction is fully setup, and we've already flushed the |
3943 | * ordered bh list | 3952 | * ordered bh list |
3944 | */ | 3953 | */ |
3945 | down(&jl->j_commit_lock); | 3954 | down(&jl->j_commit_lock); |
3946 | 3955 | ||
3947 | /* save the transaction id in case we need to commit it later */ | 3956 | /* save the transaction id in case we need to commit it later */ |
3948 | commit_trans_id = jl->j_trans_id; | 3957 | commit_trans_id = jl->j_trans_id; |
3949 | 3958 | ||
3950 | atomic_set(&jl->j_older_commits_done, 0); | 3959 | atomic_set(&jl->j_older_commits_done, 0); |
3951 | jl->j_trans_id = journal->j_trans_id; | 3960 | jl->j_trans_id = journal->j_trans_id; |
3952 | jl->j_timestamp = journal->j_trans_start_time; | 3961 | jl->j_timestamp = journal->j_trans_start_time; |
3953 | jl->j_commit_bh = c_bh; | 3962 | jl->j_commit_bh = c_bh; |
3954 | jl->j_start = journal->j_start; | 3963 | jl->j_start = journal->j_start; |
3955 | jl->j_len = journal->j_len; | 3964 | jl->j_len = journal->j_len; |
3956 | atomic_set(&jl->j_nonzerolen, journal->j_len); | 3965 | atomic_set(&jl->j_nonzerolen, journal->j_len); |
3957 | atomic_set(&jl->j_commit_left, journal->j_len + 2); | 3966 | atomic_set(&jl->j_commit_left, journal->j_len + 2); |
3958 | jl->j_realblock = NULL; | 3967 | jl->j_realblock = NULL; |
3959 | 3968 | ||
3960 | /* The ENTIRE FOR LOOP MUST not cause schedule to occur. | 3969 | /* The ENTIRE FOR LOOP MUST not cause schedule to occur. |
3961 | ** for each real block, add it to the journal list hash, | 3970 | ** for each real block, add it to the journal list hash, |
3962 | ** copy into real block index array in the commit or desc block | 3971 | ** copy into real block index array in the commit or desc block |
3963 | */ | 3972 | */ |
3964 | trans_half = journal_trans_half(p_s_sb->s_blocksize); | 3973 | trans_half = journal_trans_half(p_s_sb->s_blocksize); |
3965 | for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { | 3974 | for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { |
3966 | if (buffer_journaled(cn->bh)) { | 3975 | if (buffer_journaled(cn->bh)) { |
3967 | jl_cn = get_cnode(p_s_sb); | 3976 | jl_cn = get_cnode(p_s_sb); |
3968 | if (!jl_cn) { | 3977 | if (!jl_cn) { |
3969 | reiserfs_panic(p_s_sb, | 3978 | reiserfs_panic(p_s_sb, |
3970 | "journal-1676, get_cnode returned NULL\n"); | 3979 | "journal-1676, get_cnode returned NULL\n"); |
3971 | } | 3980 | } |
3972 | if (i == 0) { | 3981 | if (i == 0) { |
3973 | jl->j_realblock = jl_cn; | 3982 | jl->j_realblock = jl_cn; |
3974 | } | 3983 | } |
3975 | jl_cn->prev = last_cn; | 3984 | jl_cn->prev = last_cn; |
3976 | jl_cn->next = NULL; | 3985 | jl_cn->next = NULL; |
3977 | if (last_cn) { | 3986 | if (last_cn) { |
3978 | last_cn->next = jl_cn; | 3987 | last_cn->next = jl_cn; |
3979 | } | 3988 | } |
3980 | last_cn = jl_cn; | 3989 | last_cn = jl_cn; |
3981 | /* make sure the block we are trying to log is not a block | 3990 | /* make sure the block we are trying to log is not a block |
3982 | of journal or reserved area */ | 3991 | of journal or reserved area */ |
3983 | 3992 | ||
3984 | if (is_block_in_log_or_reserved_area | 3993 | if (is_block_in_log_or_reserved_area |
3985 | (p_s_sb, cn->bh->b_blocknr)) { | 3994 | (p_s_sb, cn->bh->b_blocknr)) { |
3986 | reiserfs_panic(p_s_sb, | 3995 | reiserfs_panic(p_s_sb, |
3987 | "journal-2332: Trying to log block %lu, which is a log block\n", | 3996 | "journal-2332: Trying to log block %lu, which is a log block\n", |
3988 | cn->bh->b_blocknr); | 3997 | cn->bh->b_blocknr); |
3989 | } | 3998 | } |
3990 | jl_cn->blocknr = cn->bh->b_blocknr; | 3999 | jl_cn->blocknr = cn->bh->b_blocknr; |
3991 | jl_cn->state = 0; | 4000 | jl_cn->state = 0; |
3992 | jl_cn->sb = p_s_sb; | 4001 | jl_cn->sb = p_s_sb; |
3993 | jl_cn->bh = cn->bh; | 4002 | jl_cn->bh = cn->bh; |
3994 | jl_cn->jlist = jl; | 4003 | jl_cn->jlist = jl; |
3995 | insert_journal_hash(journal->j_list_hash_table, jl_cn); | 4004 | insert_journal_hash(journal->j_list_hash_table, jl_cn); |
3996 | if (i < trans_half) { | 4005 | if (i < trans_half) { |
3997 | desc->j_realblock[i] = | 4006 | desc->j_realblock[i] = |
3998 | cpu_to_le32(cn->bh->b_blocknr); | 4007 | cpu_to_le32(cn->bh->b_blocknr); |
3999 | } else { | 4008 | } else { |
4000 | commit->j_realblock[i - trans_half] = | 4009 | commit->j_realblock[i - trans_half] = |
4001 | cpu_to_le32(cn->bh->b_blocknr); | 4010 | cpu_to_le32(cn->bh->b_blocknr); |
4002 | } | 4011 | } |
4003 | } else { | 4012 | } else { |
4004 | i--; | 4013 | i--; |
4005 | } | 4014 | } |
4006 | } | 4015 | } |
4007 | set_desc_trans_len(desc, journal->j_len); | 4016 | set_desc_trans_len(desc, journal->j_len); |
4008 | set_desc_mount_id(desc, journal->j_mount_id); | 4017 | set_desc_mount_id(desc, journal->j_mount_id); |
4009 | set_desc_trans_id(desc, journal->j_trans_id); | 4018 | set_desc_trans_id(desc, journal->j_trans_id); |
4010 | set_commit_trans_len(commit, journal->j_len); | 4019 | set_commit_trans_len(commit, journal->j_len); |
4011 | 4020 | ||
4012 | /* special check in case all buffers in the journal were marked for not logging */ | 4021 | /* special check in case all buffers in the journal were marked for not logging */ |
4013 | if (journal->j_len == 0) { | 4022 | if (journal->j_len == 0) { |
4014 | BUG(); | 4023 | BUG(); |
4015 | } | 4024 | } |
4016 | 4025 | ||
4017 | /* we're about to dirty all the log blocks, mark the description block | 4026 | /* we're about to dirty all the log blocks, mark the description block |
4018 | * dirty now too. Don't mark the commit block dirty until all the | 4027 | * dirty now too. Don't mark the commit block dirty until all the |
4019 | * others are on disk | 4028 | * others are on disk |
4020 | */ | 4029 | */ |
4021 | mark_buffer_dirty(d_bh); | 4030 | mark_buffer_dirty(d_bh); |
4022 | 4031 | ||
4023 | /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ | 4032 | /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ |
4024 | cur_write_start = journal->j_start; | 4033 | cur_write_start = journal->j_start; |
4025 | cn = journal->j_first; | 4034 | cn = journal->j_first; |
4026 | jindex = 1; /* start at one so we don't get the desc again */ | 4035 | jindex = 1; /* start at one so we don't get the desc again */ |
4027 | while (cn) { | 4036 | while (cn) { |
4028 | clear_buffer_journal_new(cn->bh); | 4037 | clear_buffer_journal_new(cn->bh); |
4029 | /* copy all the real blocks into log area. dirty log blocks */ | 4038 | /* copy all the real blocks into log area. dirty log blocks */ |
4030 | if (buffer_journaled(cn->bh)) { | 4039 | if (buffer_journaled(cn->bh)) { |
4031 | struct buffer_head *tmp_bh; | 4040 | struct buffer_head *tmp_bh; |
4032 | char *addr; | 4041 | char *addr; |
4033 | struct page *page; | 4042 | struct page *page; |
4034 | tmp_bh = | 4043 | tmp_bh = |
4035 | journal_getblk(p_s_sb, | 4044 | journal_getblk(p_s_sb, |
4036 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + | 4045 | SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + |
4037 | ((cur_write_start + | 4046 | ((cur_write_start + |
4038 | jindex) % | 4047 | jindex) % |
4039 | SB_ONDISK_JOURNAL_SIZE(p_s_sb))); | 4048 | SB_ONDISK_JOURNAL_SIZE(p_s_sb))); |
4040 | set_buffer_uptodate(tmp_bh); | 4049 | set_buffer_uptodate(tmp_bh); |
4041 | page = cn->bh->b_page; | 4050 | page = cn->bh->b_page; |
4042 | addr = kmap(page); | 4051 | addr = kmap(page); |
4043 | memcpy(tmp_bh->b_data, | 4052 | memcpy(tmp_bh->b_data, |
4044 | addr + offset_in_page(cn->bh->b_data), | 4053 | addr + offset_in_page(cn->bh->b_data), |
4045 | cn->bh->b_size); | 4054 | cn->bh->b_size); |
4046 | kunmap(page); | 4055 | kunmap(page); |
4047 | mark_buffer_dirty(tmp_bh); | 4056 | mark_buffer_dirty(tmp_bh); |
4048 | jindex++; | 4057 | jindex++; |
4049 | set_buffer_journal_dirty(cn->bh); | 4058 | set_buffer_journal_dirty(cn->bh); |
4050 | clear_buffer_journaled(cn->bh); | 4059 | clear_buffer_journaled(cn->bh); |
4051 | } else { | 4060 | } else { |
4052 | /* JDirty cleared sometime during transaction. don't log this one */ | 4061 | /* JDirty cleared sometime during transaction. don't log this one */ |
4053 | reiserfs_warning(p_s_sb, | 4062 | reiserfs_warning(p_s_sb, |
4054 | "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!"); | 4063 | "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!"); |
4055 | brelse(cn->bh); | 4064 | brelse(cn->bh); |
4056 | } | 4065 | } |
4057 | next = cn->next; | 4066 | next = cn->next; |
4058 | free_cnode(p_s_sb, cn); | 4067 | free_cnode(p_s_sb, cn); |
4059 | cn = next; | 4068 | cn = next; |
4060 | cond_resched(); | 4069 | cond_resched(); |
4061 | } | 4070 | } |
4062 | 4071 | ||
4063 | /* we are done with both the c_bh and d_bh, but | 4072 | /* we are done with both the c_bh and d_bh, but |
4064 | ** c_bh must be written after all other commit blocks, | 4073 | ** c_bh must be written after all other commit blocks, |
4065 | ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. | 4074 | ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. |
4066 | */ | 4075 | */ |
4067 | 4076 | ||
4068 | journal->j_current_jl = alloc_journal_list(p_s_sb); | 4077 | journal->j_current_jl = alloc_journal_list(p_s_sb); |
4069 | 4078 | ||
4070 | /* now it is safe to insert this transaction on the main list */ | 4079 | /* now it is safe to insert this transaction on the main list */ |
4071 | list_add_tail(&jl->j_list, &journal->j_journal_list); | 4080 | list_add_tail(&jl->j_list, &journal->j_journal_list); |
4072 | list_add_tail(&jl->j_working_list, &journal->j_working_list); | 4081 | list_add_tail(&jl->j_working_list, &journal->j_working_list); |
4073 | journal->j_num_work_lists++; | 4082 | journal->j_num_work_lists++; |
4074 | 4083 | ||
4075 | /* reset journal values for the next transaction */ | 4084 | /* reset journal values for the next transaction */ |
4076 | old_start = journal->j_start; | 4085 | old_start = journal->j_start; |
4077 | journal->j_start = | 4086 | journal->j_start = |
4078 | (journal->j_start + journal->j_len + | 4087 | (journal->j_start + journal->j_len + |
4079 | 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb); | 4088 | 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb); |
4080 | atomic_set(&(journal->j_wcount), 0); | 4089 | atomic_set(&(journal->j_wcount), 0); |
4081 | journal->j_bcount = 0; | 4090 | journal->j_bcount = 0; |
4082 | journal->j_last = NULL; | 4091 | journal->j_last = NULL; |
4083 | journal->j_first = NULL; | 4092 | journal->j_first = NULL; |
4084 | journal->j_len = 0; | 4093 | journal->j_len = 0; |
4085 | journal->j_trans_start_time = 0; | 4094 | journal->j_trans_start_time = 0; |
4086 | journal->j_trans_id++; | 4095 | journal->j_trans_id++; |
4087 | journal->j_current_jl->j_trans_id = journal->j_trans_id; | 4096 | journal->j_current_jl->j_trans_id = journal->j_trans_id; |
4088 | journal->j_must_wait = 0; | 4097 | journal->j_must_wait = 0; |
4089 | journal->j_len_alloc = 0; | 4098 | journal->j_len_alloc = 0; |
4090 | journal->j_next_full_flush = 0; | 4099 | journal->j_next_full_flush = 0; |
4091 | journal->j_next_async_flush = 0; | 4100 | journal->j_next_async_flush = 0; |
4092 | init_journal_hash(p_s_sb); | 4101 | init_journal_hash(p_s_sb); |
4093 | 4102 | ||
4094 | // make sure reiserfs_add_jh sees the new current_jl before we | 4103 | // make sure reiserfs_add_jh sees the new current_jl before we |
4095 | // write out the tails | 4104 | // write out the tails |
4096 | smp_mb(); | 4105 | smp_mb(); |
4097 | 4106 | ||
4098 | /* tail conversion targets have to hit the disk before we end the | 4107 | /* tail conversion targets have to hit the disk before we end the |
4099 | * transaction. Otherwise a later transaction might repack the tail | 4108 | * transaction. Otherwise a later transaction might repack the tail |
4100 | * before this transaction commits, leaving the data block unflushed and | 4109 | * before this transaction commits, leaving the data block unflushed and |
4101 | * clean, if we crash before the later transaction commits, the data block | 4110 | * clean, if we crash before the later transaction commits, the data block |
4102 | * is lost. | 4111 | * is lost. |
4103 | */ | 4112 | */ |
4104 | if (!list_empty(&jl->j_tail_bh_list)) { | 4113 | if (!list_empty(&jl->j_tail_bh_list)) { |
4105 | unlock_kernel(); | 4114 | unlock_kernel(); |
4106 | write_ordered_buffers(&journal->j_dirty_buffers_lock, | 4115 | write_ordered_buffers(&journal->j_dirty_buffers_lock, |
4107 | journal, jl, &jl->j_tail_bh_list); | 4116 | journal, jl, &jl->j_tail_bh_list); |
4108 | lock_kernel(); | 4117 | lock_kernel(); |
4109 | } | 4118 | } |
4110 | if (!list_empty(&jl->j_tail_bh_list)) | 4119 | if (!list_empty(&jl->j_tail_bh_list)) |
4111 | BUG(); | 4120 | BUG(); |
4112 | up(&jl->j_commit_lock); | 4121 | up(&jl->j_commit_lock); |
4113 | 4122 | ||
4114 | /* honor the flush wishes from the caller, simple commits can | 4123 | /* honor the flush wishes from the caller, simple commits can |
4115 | ** be done outside the journal lock, they are done below | 4124 | ** be done outside the journal lock, they are done below |
4116 | ** | 4125 | ** |
4117 | ** if we don't flush the commit list right now, we put it into | 4126 | ** if we don't flush the commit list right now, we put it into |
4118 | ** the work queue so the people waiting on the async progress work | 4127 | ** the work queue so the people waiting on the async progress work |
4119 | ** queue don't wait for this proc to flush journal lists and such. | 4128 | ** queue don't wait for this proc to flush journal lists and such. |
4120 | */ | 4129 | */ |
4121 | if (flush) { | 4130 | if (flush) { |
4122 | flush_commit_list(p_s_sb, jl, 1); | 4131 | flush_commit_list(p_s_sb, jl, 1); |
4123 | flush_journal_list(p_s_sb, jl, 1); | 4132 | flush_journal_list(p_s_sb, jl, 1); |
4124 | } else if (!(jl->j_state & LIST_COMMIT_PENDING)) | 4133 | } else if (!(jl->j_state & LIST_COMMIT_PENDING)) |
4125 | queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); | 4134 | queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); |
4126 | 4135 | ||
4127 | /* if the next transaction has any chance of wrapping, flush | 4136 | /* if the next transaction has any chance of wrapping, flush |
4128 | ** transactions that might get overwritten. If any journal lists are very | 4137 | ** transactions that might get overwritten. If any journal lists are very |
4129 | ** old flush them as well. | 4138 | ** old flush them as well. |
4130 | */ | 4139 | */ |
4131 | first_jl: | 4140 | first_jl: |
4132 | list_for_each_safe(entry, safe, &journal->j_journal_list) { | 4141 | list_for_each_safe(entry, safe, &journal->j_journal_list) { |
4133 | temp_jl = JOURNAL_LIST_ENTRY(entry); | 4142 | temp_jl = JOURNAL_LIST_ENTRY(entry); |
4134 | if (journal->j_start <= temp_jl->j_start) { | 4143 | if (journal->j_start <= temp_jl->j_start) { |
4135 | if ((journal->j_start + journal->j_trans_max + 1) >= | 4144 | if ((journal->j_start + journal->j_trans_max + 1) >= |
4136 | temp_jl->j_start) { | 4145 | temp_jl->j_start) { |
4137 | flush_used_journal_lists(p_s_sb, temp_jl); | 4146 | flush_used_journal_lists(p_s_sb, temp_jl); |
4138 | goto first_jl; | 4147 | goto first_jl; |
4139 | } else if ((journal->j_start + | 4148 | } else if ((journal->j_start + |
4140 | journal->j_trans_max + 1) < | 4149 | journal->j_trans_max + 1) < |
4141 | SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { | 4150 | SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { |
4142 | /* if we don't cross into the next transaction and we don't | 4151 | /* if we don't cross into the next transaction and we don't |
4143 | * wrap, there is no way we can overlap any later transactions | 4152 | * wrap, there is no way we can overlap any later transactions |
4144 | * break now | 4153 | * break now |
4145 | */ | 4154 | */ |
4146 | break; | 4155 | break; |
4147 | } | 4156 | } |
4148 | } else if ((journal->j_start + | 4157 | } else if ((journal->j_start + |
4149 | journal->j_trans_max + 1) > | 4158 | journal->j_trans_max + 1) > |
4150 | SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { | 4159 | SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { |
4151 | if (((journal->j_start + journal->j_trans_max + 1) % | 4160 | if (((journal->j_start + journal->j_trans_max + 1) % |
4152 | SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= | 4161 | SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= |
4153 | temp_jl->j_start) { | 4162 | temp_jl->j_start) { |
4154 | flush_used_journal_lists(p_s_sb, temp_jl); | 4163 | flush_used_journal_lists(p_s_sb, temp_jl); |
4155 | goto first_jl; | 4164 | goto first_jl; |
4156 | } else { | 4165 | } else { |
4157 | /* we don't overlap anything from out start to the end of the | 4166 | /* we don't overlap anything from out start to the end of the |
4158 | * log, and our wrapped portion doesn't overlap anything at | 4167 | * log, and our wrapped portion doesn't overlap anything at |
4159 | * the start of the log. We can break | 4168 | * the start of the log. We can break |
4160 | */ | 4169 | */ |
4161 | break; | 4170 | break; |
4162 | } | 4171 | } |
4163 | } | 4172 | } |
4164 | } | 4173 | } |
4165 | flush_old_journal_lists(p_s_sb); | 4174 | flush_old_journal_lists(p_s_sb); |
4166 | 4175 | ||
4167 | journal->j_current_jl->j_list_bitmap = | 4176 | journal->j_current_jl->j_list_bitmap = |
4168 | get_list_bitmap(p_s_sb, journal->j_current_jl); | 4177 | get_list_bitmap(p_s_sb, journal->j_current_jl); |
4169 | 4178 | ||
4170 | if (!(journal->j_current_jl->j_list_bitmap)) { | 4179 | if (!(journal->j_current_jl->j_list_bitmap)) { |
4171 | reiserfs_panic(p_s_sb, | 4180 | reiserfs_panic(p_s_sb, |
4172 | "journal-1996: do_journal_end, could not get a list bitmap\n"); | 4181 | "journal-1996: do_journal_end, could not get a list bitmap\n"); |
4173 | } | 4182 | } |
4174 | 4183 | ||
4175 | atomic_set(&(journal->j_jlock), 0); | 4184 | atomic_set(&(journal->j_jlock), 0); |
4176 | unlock_journal(p_s_sb); | 4185 | unlock_journal(p_s_sb); |
4177 | /* wake up any body waiting to join. */ | 4186 | /* wake up any body waiting to join. */ |
4178 | clear_bit(J_WRITERS_QUEUED, &journal->j_state); | 4187 | clear_bit(J_WRITERS_QUEUED, &journal->j_state); |
4179 | wake_up(&(journal->j_join_wait)); | 4188 | wake_up(&(journal->j_join_wait)); |
4180 | 4189 | ||
4181 | if (!flush && wait_on_commit && | 4190 | if (!flush && wait_on_commit && |
4182 | journal_list_still_alive(p_s_sb, commit_trans_id)) { | 4191 | journal_list_still_alive(p_s_sb, commit_trans_id)) { |
4183 | flush_commit_list(p_s_sb, jl, 1); | 4192 | flush_commit_list(p_s_sb, jl, 1); |
4184 | } | 4193 | } |
4185 | out: | 4194 | out: |
4186 | reiserfs_check_lock_depth(p_s_sb, "journal end2"); | 4195 | reiserfs_check_lock_depth(p_s_sb, "journal end2"); |
4187 | 4196 | ||
4188 | memset(th, 0, sizeof(*th)); | 4197 | memset(th, 0, sizeof(*th)); |
4189 | /* Re-set th->t_super, so we can properly keep track of how many | 4198 | /* Re-set th->t_super, so we can properly keep track of how many |
4190 | * persistent transactions there are. We need to do this so if this | 4199 | * persistent transactions there are. We need to do this so if this |
4191 | * call is part of a failed restart_transaction, we can free it later */ | 4200 | * call is part of a failed restart_transaction, we can free it later */ |
4192 | th->t_super = p_s_sb; | 4201 | th->t_super = p_s_sb; |
4193 | 4202 | ||
4194 | return journal->j_errno; | 4203 | return journal->j_errno; |
4195 | } | 4204 | } |
4196 | 4205 | ||
4197 | static void __reiserfs_journal_abort_hard(struct super_block *sb) | 4206 | static void __reiserfs_journal_abort_hard(struct super_block *sb) |
4198 | { | 4207 | { |
4199 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 4208 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
4200 | if (test_bit(J_ABORTED, &journal->j_state)) | 4209 | if (test_bit(J_ABORTED, &journal->j_state)) |
4201 | return; | 4210 | return; |
4202 | 4211 | ||
4203 | printk(KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n", | 4212 | printk(KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n", |
4204 | reiserfs_bdevname(sb)); | 4213 | reiserfs_bdevname(sb)); |
4205 | 4214 | ||
4206 | sb->s_flags |= MS_RDONLY; | 4215 | sb->s_flags |= MS_RDONLY; |
4207 | set_bit(J_ABORTED, &journal->j_state); | 4216 | set_bit(J_ABORTED, &journal->j_state); |
4208 | 4217 | ||
4209 | #ifdef CONFIG_REISERFS_CHECK | 4218 | #ifdef CONFIG_REISERFS_CHECK |
4210 | dump_stack(); | 4219 | dump_stack(); |
4211 | #endif | 4220 | #endif |
4212 | } | 4221 | } |
4213 | 4222 | ||
4214 | static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno) | 4223 | static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno) |
4215 | { | 4224 | { |
4216 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 4225 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
4217 | if (test_bit(J_ABORTED, &journal->j_state)) | 4226 | if (test_bit(J_ABORTED, &journal->j_state)) |
4218 | return; | 4227 | return; |
4219 | 4228 | ||
4220 | if (!journal->j_errno) | 4229 | if (!journal->j_errno) |
4221 | journal->j_errno = errno; | 4230 | journal->j_errno = errno; |
4222 | 4231 | ||
4223 | __reiserfs_journal_abort_hard(sb); | 4232 | __reiserfs_journal_abort_hard(sb); |
4224 | } | 4233 | } |
4225 | 4234 | ||
4226 | void reiserfs_journal_abort(struct super_block *sb, int errno) | 4235 | void reiserfs_journal_abort(struct super_block *sb, int errno) |
4227 | { | 4236 | { |
4228 | return __reiserfs_journal_abort_soft(sb, errno); | 4237 | return __reiserfs_journal_abort_soft(sb, errno); |
4229 | } | 4238 | } |
4230 | 4239 |