Commit 4f902c37727bbedbc0508a1477874c58ddcc9af8
1 parent
49cb8d2d49
Exists in
master
and in
20 other branches
ocfs2: Fix extent lookup to return true size of holes
Initially, we had wired things to return a size '1' of holes. Cook up a small amount of code to find the next extent and calculate the number of clusters between the virtual offset and the next allocated extent. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Showing 5 changed files with 109 additions and 12 deletions Inline Diff
fs/ocfs2/aops.c
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | 4 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU General Public | 7 | * modify it under the terms of the GNU General Public |
8 | * License as published by the Free Software Foundation; either | 8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. | 9 | * version 2 of the License, or (at your option) any later version. |
10 | * | 10 | * |
11 | * This program is distributed in the hope that it will be useful, | 11 | * This program is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * General Public License for more details. | 14 | * General Public License for more details. |
15 | * | 15 | * |
16 | * You should have received a copy of the GNU General Public | 16 | * You should have received a copy of the GNU General Public |
17 | * License along with this program; if not, write to the | 17 | * License along with this program; if not, write to the |
18 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 18 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
19 | * Boston, MA 021110-1307, USA. | 19 | * Boston, MA 021110-1307, USA. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/highmem.h> | 24 | #include <linux/highmem.h> |
25 | #include <linux/pagemap.h> | 25 | #include <linux/pagemap.h> |
26 | #include <asm/byteorder.h> | 26 | #include <asm/byteorder.h> |
27 | #include <linux/swap.h> | 27 | #include <linux/swap.h> |
28 | #include <linux/pipe_fs_i.h> | 28 | #include <linux/pipe_fs_i.h> |
29 | 29 | ||
30 | #define MLOG_MASK_PREFIX ML_FILE_IO | 30 | #define MLOG_MASK_PREFIX ML_FILE_IO |
31 | #include <cluster/masklog.h> | 31 | #include <cluster/masklog.h> |
32 | 32 | ||
33 | #include "ocfs2.h" | 33 | #include "ocfs2.h" |
34 | 34 | ||
35 | #include "alloc.h" | 35 | #include "alloc.h" |
36 | #include "aops.h" | 36 | #include "aops.h" |
37 | #include "dlmglue.h" | 37 | #include "dlmglue.h" |
38 | #include "extent_map.h" | 38 | #include "extent_map.h" |
39 | #include "file.h" | 39 | #include "file.h" |
40 | #include "inode.h" | 40 | #include "inode.h" |
41 | #include "journal.h" | 41 | #include "journal.h" |
42 | #include "suballoc.h" | 42 | #include "suballoc.h" |
43 | #include "super.h" | 43 | #include "super.h" |
44 | #include "symlink.h" | 44 | #include "symlink.h" |
45 | 45 | ||
46 | #include "buffer_head_io.h" | 46 | #include "buffer_head_io.h" |
47 | 47 | ||
48 | static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, | 48 | static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, |
49 | struct buffer_head *bh_result, int create) | 49 | struct buffer_head *bh_result, int create) |
50 | { | 50 | { |
51 | int err = -EIO; | 51 | int err = -EIO; |
52 | int status; | 52 | int status; |
53 | struct ocfs2_dinode *fe = NULL; | 53 | struct ocfs2_dinode *fe = NULL; |
54 | struct buffer_head *bh = NULL; | 54 | struct buffer_head *bh = NULL; |
55 | struct buffer_head *buffer_cache_bh = NULL; | 55 | struct buffer_head *buffer_cache_bh = NULL; |
56 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 56 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
57 | void *kaddr; | 57 | void *kaddr; |
58 | 58 | ||
59 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, | 59 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, |
60 | (unsigned long long)iblock, bh_result, create); | 60 | (unsigned long long)iblock, bh_result, create); |
61 | 61 | ||
62 | BUG_ON(ocfs2_inode_is_fast_symlink(inode)); | 62 | BUG_ON(ocfs2_inode_is_fast_symlink(inode)); |
63 | 63 | ||
64 | if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) { | 64 | if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) { |
65 | mlog(ML_ERROR, "block offset > PATH_MAX: %llu", | 65 | mlog(ML_ERROR, "block offset > PATH_MAX: %llu", |
66 | (unsigned long long)iblock); | 66 | (unsigned long long)iblock); |
67 | goto bail; | 67 | goto bail; |
68 | } | 68 | } |
69 | 69 | ||
70 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 70 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), |
71 | OCFS2_I(inode)->ip_blkno, | 71 | OCFS2_I(inode)->ip_blkno, |
72 | &bh, OCFS2_BH_CACHED, inode); | 72 | &bh, OCFS2_BH_CACHED, inode); |
73 | if (status < 0) { | 73 | if (status < 0) { |
74 | mlog_errno(status); | 74 | mlog_errno(status); |
75 | goto bail; | 75 | goto bail; |
76 | } | 76 | } |
77 | fe = (struct ocfs2_dinode *) bh->b_data; | 77 | fe = (struct ocfs2_dinode *) bh->b_data; |
78 | 78 | ||
79 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 79 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
80 | mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", | 80 | mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", |
81 | (unsigned long long)fe->i_blkno, 7, fe->i_signature); | 81 | (unsigned long long)fe->i_blkno, 7, fe->i_signature); |
82 | goto bail; | 82 | goto bail; |
83 | } | 83 | } |
84 | 84 | ||
85 | if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, | 85 | if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, |
86 | le32_to_cpu(fe->i_clusters))) { | 86 | le32_to_cpu(fe->i_clusters))) { |
87 | mlog(ML_ERROR, "block offset is outside the allocated size: " | 87 | mlog(ML_ERROR, "block offset is outside the allocated size: " |
88 | "%llu\n", (unsigned long long)iblock); | 88 | "%llu\n", (unsigned long long)iblock); |
89 | goto bail; | 89 | goto bail; |
90 | } | 90 | } |
91 | 91 | ||
92 | /* We don't use the page cache to create symlink data, so if | 92 | /* We don't use the page cache to create symlink data, so if |
93 | * need be, copy it over from the buffer cache. */ | 93 | * need be, copy it over from the buffer cache. */ |
94 | if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) { | 94 | if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) { |
95 | u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + | 95 | u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + |
96 | iblock; | 96 | iblock; |
97 | buffer_cache_bh = sb_getblk(osb->sb, blkno); | 97 | buffer_cache_bh = sb_getblk(osb->sb, blkno); |
98 | if (!buffer_cache_bh) { | 98 | if (!buffer_cache_bh) { |
99 | mlog(ML_ERROR, "couldn't getblock for symlink!\n"); | 99 | mlog(ML_ERROR, "couldn't getblock for symlink!\n"); |
100 | goto bail; | 100 | goto bail; |
101 | } | 101 | } |
102 | 102 | ||
103 | /* we haven't locked out transactions, so a commit | 103 | /* we haven't locked out transactions, so a commit |
104 | * could've happened. Since we've got a reference on | 104 | * could've happened. Since we've got a reference on |
105 | * the bh, even if it commits while we're doing the | 105 | * the bh, even if it commits while we're doing the |
106 | * copy, the data is still good. */ | 106 | * copy, the data is still good. */ |
107 | if (buffer_jbd(buffer_cache_bh) | 107 | if (buffer_jbd(buffer_cache_bh) |
108 | && ocfs2_inode_is_new(inode)) { | 108 | && ocfs2_inode_is_new(inode)) { |
109 | kaddr = kmap_atomic(bh_result->b_page, KM_USER0); | 109 | kaddr = kmap_atomic(bh_result->b_page, KM_USER0); |
110 | if (!kaddr) { | 110 | if (!kaddr) { |
111 | mlog(ML_ERROR, "couldn't kmap!\n"); | 111 | mlog(ML_ERROR, "couldn't kmap!\n"); |
112 | goto bail; | 112 | goto bail; |
113 | } | 113 | } |
114 | memcpy(kaddr + (bh_result->b_size * iblock), | 114 | memcpy(kaddr + (bh_result->b_size * iblock), |
115 | buffer_cache_bh->b_data, | 115 | buffer_cache_bh->b_data, |
116 | bh_result->b_size); | 116 | bh_result->b_size); |
117 | kunmap_atomic(kaddr, KM_USER0); | 117 | kunmap_atomic(kaddr, KM_USER0); |
118 | set_buffer_uptodate(bh_result); | 118 | set_buffer_uptodate(bh_result); |
119 | } | 119 | } |
120 | brelse(buffer_cache_bh); | 120 | brelse(buffer_cache_bh); |
121 | } | 121 | } |
122 | 122 | ||
123 | map_bh(bh_result, inode->i_sb, | 123 | map_bh(bh_result, inode->i_sb, |
124 | le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock); | 124 | le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock); |
125 | 125 | ||
126 | err = 0; | 126 | err = 0; |
127 | 127 | ||
128 | bail: | 128 | bail: |
129 | if (bh) | 129 | if (bh) |
130 | brelse(bh); | 130 | brelse(bh); |
131 | 131 | ||
132 | mlog_exit(err); | 132 | mlog_exit(err); |
133 | return err; | 133 | return err; |
134 | } | 134 | } |
135 | 135 | ||
136 | static int ocfs2_get_block(struct inode *inode, sector_t iblock, | 136 | static int ocfs2_get_block(struct inode *inode, sector_t iblock, |
137 | struct buffer_head *bh_result, int create) | 137 | struct buffer_head *bh_result, int create) |
138 | { | 138 | { |
139 | int err = 0; | 139 | int err = 0; |
140 | unsigned int ext_flags; | 140 | unsigned int ext_flags; |
141 | u64 p_blkno, past_eof; | 141 | u64 p_blkno, past_eof; |
142 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 142 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
143 | 143 | ||
144 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, | 144 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, |
145 | (unsigned long long)iblock, bh_result, create); | 145 | (unsigned long long)iblock, bh_result, create); |
146 | 146 | ||
147 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) | 147 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) |
148 | mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n", | 148 | mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n", |
149 | inode, inode->i_ino); | 149 | inode, inode->i_ino); |
150 | 150 | ||
151 | if (S_ISLNK(inode->i_mode)) { | 151 | if (S_ISLNK(inode->i_mode)) { |
152 | /* this always does I/O for some reason. */ | 152 | /* this always does I/O for some reason. */ |
153 | err = ocfs2_symlink_get_block(inode, iblock, bh_result, create); | 153 | err = ocfs2_symlink_get_block(inode, iblock, bh_result, create); |
154 | goto bail; | 154 | goto bail; |
155 | } | 155 | } |
156 | 156 | ||
157 | err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL, | 157 | err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL, |
158 | &ext_flags); | 158 | &ext_flags); |
159 | if (err) { | 159 | if (err) { |
160 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " | 160 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " |
161 | "%llu, NULL)\n", err, inode, (unsigned long long)iblock, | 161 | "%llu, NULL)\n", err, inode, (unsigned long long)iblock, |
162 | (unsigned long long)p_blkno); | 162 | (unsigned long long)p_blkno); |
163 | goto bail; | 163 | goto bail; |
164 | } | 164 | } |
165 | 165 | ||
166 | /* | 166 | /* |
167 | * ocfs2 never allocates in this function - the only time we | 167 | * ocfs2 never allocates in this function - the only time we |
168 | * need to use BH_New is when we're extending i_size on a file | 168 | * need to use BH_New is when we're extending i_size on a file |
169 | * system which doesn't support holes, in which case BH_New | 169 | * system which doesn't support holes, in which case BH_New |
170 | * allows block_prepare_write() to zero. | 170 | * allows block_prepare_write() to zero. |
171 | */ | 171 | */ |
172 | mlog_bug_on_msg(create && p_blkno == 0 && ocfs2_sparse_alloc(osb), | 172 | mlog_bug_on_msg(create && p_blkno == 0 && ocfs2_sparse_alloc(osb), |
173 | "ino %lu, iblock %llu\n", inode->i_ino, | 173 | "ino %lu, iblock %llu\n", inode->i_ino, |
174 | (unsigned long long)iblock); | 174 | (unsigned long long)iblock); |
175 | 175 | ||
176 | /* Treat the unwritten extent as a hole for zeroing purposes. */ | 176 | /* Treat the unwritten extent as a hole for zeroing purposes. */ |
177 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) | 177 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) |
178 | map_bh(bh_result, inode->i_sb, p_blkno); | 178 | map_bh(bh_result, inode->i_sb, p_blkno); |
179 | 179 | ||
180 | if (!ocfs2_sparse_alloc(osb)) { | 180 | if (!ocfs2_sparse_alloc(osb)) { |
181 | if (p_blkno == 0) { | 181 | if (p_blkno == 0) { |
182 | err = -EIO; | 182 | err = -EIO; |
183 | mlog(ML_ERROR, | 183 | mlog(ML_ERROR, |
184 | "iblock = %llu p_blkno = %llu blkno=(%llu)\n", | 184 | "iblock = %llu p_blkno = %llu blkno=(%llu)\n", |
185 | (unsigned long long)iblock, | 185 | (unsigned long long)iblock, |
186 | (unsigned long long)p_blkno, | 186 | (unsigned long long)p_blkno, |
187 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 187 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
188 | mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); | 188 | mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); |
189 | dump_stack(); | 189 | dump_stack(); |
190 | } | 190 | } |
191 | 191 | ||
192 | past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); | 192 | past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); |
193 | mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, | 193 | mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, |
194 | (unsigned long long)past_eof); | 194 | (unsigned long long)past_eof); |
195 | 195 | ||
196 | if (create && (iblock >= past_eof)) | 196 | if (create && (iblock >= past_eof)) |
197 | set_buffer_new(bh_result); | 197 | set_buffer_new(bh_result); |
198 | } | 198 | } |
199 | 199 | ||
200 | bail: | 200 | bail: |
201 | if (err < 0) | 201 | if (err < 0) |
202 | err = -EIO; | 202 | err = -EIO; |
203 | 203 | ||
204 | mlog_exit(err); | 204 | mlog_exit(err); |
205 | return err; | 205 | return err; |
206 | } | 206 | } |
207 | 207 | ||
208 | static int ocfs2_readpage(struct file *file, struct page *page) | 208 | static int ocfs2_readpage(struct file *file, struct page *page) |
209 | { | 209 | { |
210 | struct inode *inode = page->mapping->host; | 210 | struct inode *inode = page->mapping->host; |
211 | loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT; | 211 | loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT; |
212 | int ret, unlock = 1; | 212 | int ret, unlock = 1; |
213 | 213 | ||
214 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); | 214 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); |
215 | 215 | ||
216 | ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); | 216 | ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); |
217 | if (ret != 0) { | 217 | if (ret != 0) { |
218 | if (ret == AOP_TRUNCATED_PAGE) | 218 | if (ret == AOP_TRUNCATED_PAGE) |
219 | unlock = 0; | 219 | unlock = 0; |
220 | mlog_errno(ret); | 220 | mlog_errno(ret); |
221 | goto out; | 221 | goto out; |
222 | } | 222 | } |
223 | 223 | ||
224 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | 224 | down_read(&OCFS2_I(inode)->ip_alloc_sem); |
225 | 225 | ||
226 | /* | 226 | /* |
227 | * i_size might have just been updated as we grabed the meta lock. We | 227 | * i_size might have just been updated as we grabed the meta lock. We |
228 | * might now be discovering a truncate that hit on another node. | 228 | * might now be discovering a truncate that hit on another node. |
229 | * block_read_full_page->get_block freaks out if it is asked to read | 229 | * block_read_full_page->get_block freaks out if it is asked to read |
230 | * beyond the end of a file, so we check here. Callers | 230 | * beyond the end of a file, so we check here. Callers |
231 | * (generic_file_read, fault->nopage) are clever enough to check i_size | 231 | * (generic_file_read, fault->nopage) are clever enough to check i_size |
232 | * and notice that the page they just read isn't needed. | 232 | * and notice that the page they just read isn't needed. |
233 | * | 233 | * |
234 | * XXX sys_readahead() seems to get that wrong? | 234 | * XXX sys_readahead() seems to get that wrong? |
235 | */ | 235 | */ |
236 | if (start >= i_size_read(inode)) { | 236 | if (start >= i_size_read(inode)) { |
237 | char *addr = kmap(page); | 237 | char *addr = kmap(page); |
238 | memset(addr, 0, PAGE_SIZE); | 238 | memset(addr, 0, PAGE_SIZE); |
239 | flush_dcache_page(page); | 239 | flush_dcache_page(page); |
240 | kunmap(page); | 240 | kunmap(page); |
241 | SetPageUptodate(page); | 241 | SetPageUptodate(page); |
242 | ret = 0; | 242 | ret = 0; |
243 | goto out_alloc; | 243 | goto out_alloc; |
244 | } | 244 | } |
245 | 245 | ||
246 | ret = ocfs2_data_lock_with_page(inode, 0, page); | 246 | ret = ocfs2_data_lock_with_page(inode, 0, page); |
247 | if (ret != 0) { | 247 | if (ret != 0) { |
248 | if (ret == AOP_TRUNCATED_PAGE) | 248 | if (ret == AOP_TRUNCATED_PAGE) |
249 | unlock = 0; | 249 | unlock = 0; |
250 | mlog_errno(ret); | 250 | mlog_errno(ret); |
251 | goto out_alloc; | 251 | goto out_alloc; |
252 | } | 252 | } |
253 | 253 | ||
254 | ret = block_read_full_page(page, ocfs2_get_block); | 254 | ret = block_read_full_page(page, ocfs2_get_block); |
255 | unlock = 0; | 255 | unlock = 0; |
256 | 256 | ||
257 | ocfs2_data_unlock(inode, 0); | 257 | ocfs2_data_unlock(inode, 0); |
258 | out_alloc: | 258 | out_alloc: |
259 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 259 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
260 | ocfs2_meta_unlock(inode, 0); | 260 | ocfs2_meta_unlock(inode, 0); |
261 | out: | 261 | out: |
262 | if (unlock) | 262 | if (unlock) |
263 | unlock_page(page); | 263 | unlock_page(page); |
264 | mlog_exit(ret); | 264 | mlog_exit(ret); |
265 | return ret; | 265 | return ret; |
266 | } | 266 | } |
267 | 267 | ||
268 | /* Note: Because we don't support holes, our allocation has | 268 | /* Note: Because we don't support holes, our allocation has |
269 | * already happened (allocation writes zeros to the file data) | 269 | * already happened (allocation writes zeros to the file data) |
270 | * so we don't have to worry about ordered writes in | 270 | * so we don't have to worry about ordered writes in |
271 | * ocfs2_writepage. | 271 | * ocfs2_writepage. |
272 | * | 272 | * |
273 | * ->writepage is called during the process of invalidating the page cache | 273 | * ->writepage is called during the process of invalidating the page cache |
274 | * during blocked lock processing. It can't block on any cluster locks | 274 | * during blocked lock processing. It can't block on any cluster locks |
275 | * to during block mapping. It's relying on the fact that the block | 275 | * to during block mapping. It's relying on the fact that the block |
276 | * mapping can't have disappeared under the dirty pages that it is | 276 | * mapping can't have disappeared under the dirty pages that it is |
277 | * being asked to write back. | 277 | * being asked to write back. |
278 | */ | 278 | */ |
279 | static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) | 279 | static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) |
280 | { | 280 | { |
281 | int ret; | 281 | int ret; |
282 | 282 | ||
283 | mlog_entry("(0x%p)\n", page); | 283 | mlog_entry("(0x%p)\n", page); |
284 | 284 | ||
285 | ret = block_write_full_page(page, ocfs2_get_block, wbc); | 285 | ret = block_write_full_page(page, ocfs2_get_block, wbc); |
286 | 286 | ||
287 | mlog_exit(ret); | 287 | mlog_exit(ret); |
288 | 288 | ||
289 | return ret; | 289 | return ret; |
290 | } | 290 | } |
291 | 291 | ||
292 | /* | 292 | /* |
293 | * This is called from ocfs2_write_zero_page() which has handled it's | 293 | * This is called from ocfs2_write_zero_page() which has handled it's |
294 | * own cluster locking and has ensured allocation exists for those | 294 | * own cluster locking and has ensured allocation exists for those |
295 | * blocks to be written. | 295 | * blocks to be written. |
296 | */ | 296 | */ |
297 | int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, | 297 | int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, |
298 | unsigned from, unsigned to) | 298 | unsigned from, unsigned to) |
299 | { | 299 | { |
300 | int ret; | 300 | int ret; |
301 | 301 | ||
302 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | 302 | down_read(&OCFS2_I(inode)->ip_alloc_sem); |
303 | 303 | ||
304 | ret = block_prepare_write(page, from, to, ocfs2_get_block); | 304 | ret = block_prepare_write(page, from, to, ocfs2_get_block); |
305 | 305 | ||
306 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 306 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
307 | 307 | ||
308 | return ret; | 308 | return ret; |
309 | } | 309 | } |
310 | 310 | ||
311 | /* Taken from ext3. We don't necessarily need the full blown | 311 | /* Taken from ext3. We don't necessarily need the full blown |
312 | * functionality yet, but IMHO it's better to cut and paste the whole | 312 | * functionality yet, but IMHO it's better to cut and paste the whole |
313 | * thing so we can avoid introducing our own bugs (and easily pick up | 313 | * thing so we can avoid introducing our own bugs (and easily pick up |
314 | * their fixes when they happen) --Mark */ | 314 | * their fixes when they happen) --Mark */ |
315 | int walk_page_buffers( handle_t *handle, | 315 | int walk_page_buffers( handle_t *handle, |
316 | struct buffer_head *head, | 316 | struct buffer_head *head, |
317 | unsigned from, | 317 | unsigned from, |
318 | unsigned to, | 318 | unsigned to, |
319 | int *partial, | 319 | int *partial, |
320 | int (*fn)( handle_t *handle, | 320 | int (*fn)( handle_t *handle, |
321 | struct buffer_head *bh)) | 321 | struct buffer_head *bh)) |
322 | { | 322 | { |
323 | struct buffer_head *bh; | 323 | struct buffer_head *bh; |
324 | unsigned block_start, block_end; | 324 | unsigned block_start, block_end; |
325 | unsigned blocksize = head->b_size; | 325 | unsigned blocksize = head->b_size; |
326 | int err, ret = 0; | 326 | int err, ret = 0; |
327 | struct buffer_head *next; | 327 | struct buffer_head *next; |
328 | 328 | ||
329 | for ( bh = head, block_start = 0; | 329 | for ( bh = head, block_start = 0; |
330 | ret == 0 && (bh != head || !block_start); | 330 | ret == 0 && (bh != head || !block_start); |
331 | block_start = block_end, bh = next) | 331 | block_start = block_end, bh = next) |
332 | { | 332 | { |
333 | next = bh->b_this_page; | 333 | next = bh->b_this_page; |
334 | block_end = block_start + blocksize; | 334 | block_end = block_start + blocksize; |
335 | if (block_end <= from || block_start >= to) { | 335 | if (block_end <= from || block_start >= to) { |
336 | if (partial && !buffer_uptodate(bh)) | 336 | if (partial && !buffer_uptodate(bh)) |
337 | *partial = 1; | 337 | *partial = 1; |
338 | continue; | 338 | continue; |
339 | } | 339 | } |
340 | err = (*fn)(handle, bh); | 340 | err = (*fn)(handle, bh); |
341 | if (!ret) | 341 | if (!ret) |
342 | ret = err; | 342 | ret = err; |
343 | } | 343 | } |
344 | return ret; | 344 | return ret; |
345 | } | 345 | } |
346 | 346 | ||
347 | handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | 347 | handle_t *ocfs2_start_walk_page_trans(struct inode *inode, |
348 | struct page *page, | 348 | struct page *page, |
349 | unsigned from, | 349 | unsigned from, |
350 | unsigned to) | 350 | unsigned to) |
351 | { | 351 | { |
352 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 352 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
353 | handle_t *handle = NULL; | 353 | handle_t *handle = NULL; |
354 | int ret = 0; | 354 | int ret = 0; |
355 | 355 | ||
356 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 356 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
357 | if (!handle) { | 357 | if (!handle) { |
358 | ret = -ENOMEM; | 358 | ret = -ENOMEM; |
359 | mlog_errno(ret); | 359 | mlog_errno(ret); |
360 | goto out; | 360 | goto out; |
361 | } | 361 | } |
362 | 362 | ||
363 | if (ocfs2_should_order_data(inode)) { | 363 | if (ocfs2_should_order_data(inode)) { |
364 | ret = walk_page_buffers(handle, | 364 | ret = walk_page_buffers(handle, |
365 | page_buffers(page), | 365 | page_buffers(page), |
366 | from, to, NULL, | 366 | from, to, NULL, |
367 | ocfs2_journal_dirty_data); | 367 | ocfs2_journal_dirty_data); |
368 | if (ret < 0) | 368 | if (ret < 0) |
369 | mlog_errno(ret); | 369 | mlog_errno(ret); |
370 | } | 370 | } |
371 | out: | 371 | out: |
372 | if (ret) { | 372 | if (ret) { |
373 | if (handle) | 373 | if (handle) |
374 | ocfs2_commit_trans(osb, handle); | 374 | ocfs2_commit_trans(osb, handle); |
375 | handle = ERR_PTR(ret); | 375 | handle = ERR_PTR(ret); |
376 | } | 376 | } |
377 | return handle; | 377 | return handle; |
378 | } | 378 | } |
379 | 379 | ||
380 | static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) | 380 | static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) |
381 | { | 381 | { |
382 | sector_t status; | 382 | sector_t status; |
383 | u64 p_blkno = 0; | 383 | u64 p_blkno = 0; |
384 | int err = 0; | 384 | int err = 0; |
385 | struct inode *inode = mapping->host; | 385 | struct inode *inode = mapping->host; |
386 | 386 | ||
387 | mlog_entry("(block = %llu)\n", (unsigned long long)block); | 387 | mlog_entry("(block = %llu)\n", (unsigned long long)block); |
388 | 388 | ||
389 | /* We don't need to lock journal system files, since they aren't | 389 | /* We don't need to lock journal system files, since they aren't |
390 | * accessed concurrently from multiple nodes. | 390 | * accessed concurrently from multiple nodes. |
391 | */ | 391 | */ |
392 | if (!INODE_JOURNAL(inode)) { | 392 | if (!INODE_JOURNAL(inode)) { |
393 | err = ocfs2_meta_lock(inode, NULL, 0); | 393 | err = ocfs2_meta_lock(inode, NULL, 0); |
394 | if (err) { | 394 | if (err) { |
395 | if (err != -ENOENT) | 395 | if (err != -ENOENT) |
396 | mlog_errno(err); | 396 | mlog_errno(err); |
397 | goto bail; | 397 | goto bail; |
398 | } | 398 | } |
399 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | 399 | down_read(&OCFS2_I(inode)->ip_alloc_sem); |
400 | } | 400 | } |
401 | 401 | ||
402 | err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, NULL); | 402 | err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, NULL); |
403 | 403 | ||
404 | if (!INODE_JOURNAL(inode)) { | 404 | if (!INODE_JOURNAL(inode)) { |
405 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 405 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
406 | ocfs2_meta_unlock(inode, 0); | 406 | ocfs2_meta_unlock(inode, 0); |
407 | } | 407 | } |
408 | 408 | ||
409 | if (err) { | 409 | if (err) { |
410 | mlog(ML_ERROR, "get_blocks() failed, block = %llu\n", | 410 | mlog(ML_ERROR, "get_blocks() failed, block = %llu\n", |
411 | (unsigned long long)block); | 411 | (unsigned long long)block); |
412 | mlog_errno(err); | 412 | mlog_errno(err); |
413 | goto bail; | 413 | goto bail; |
414 | } | 414 | } |
415 | 415 | ||
416 | 416 | ||
417 | bail: | 417 | bail: |
418 | status = err ? 0 : p_blkno; | 418 | status = err ? 0 : p_blkno; |
419 | 419 | ||
420 | mlog_exit((int)status); | 420 | mlog_exit((int)status); |
421 | 421 | ||
422 | return status; | 422 | return status; |
423 | } | 423 | } |
424 | 424 | ||
425 | /* | 425 | /* |
426 | * TODO: Make this into a generic get_blocks function. | 426 | * TODO: Make this into a generic get_blocks function. |
427 | * | 427 | * |
428 | * From do_direct_io in direct-io.c: | 428 | * From do_direct_io in direct-io.c: |
429 | * "So what we do is to permit the ->get_blocks function to populate | 429 | * "So what we do is to permit the ->get_blocks function to populate |
430 | * bh.b_size with the size of IO which is permitted at this offset and | 430 | * bh.b_size with the size of IO which is permitted at this offset and |
431 | * this i_blkbits." | 431 | * this i_blkbits." |
432 | * | 432 | * |
433 | * This function is called directly from get_more_blocks in direct-io.c. | 433 | * This function is called directly from get_more_blocks in direct-io.c. |
434 | * | 434 | * |
435 | * called like this: dio->get_blocks(dio->inode, fs_startblk, | 435 | * called like this: dio->get_blocks(dio->inode, fs_startblk, |
436 | * fs_count, map_bh, dio->rw == WRITE); | 436 | * fs_count, map_bh, dio->rw == WRITE); |
437 | */ | 437 | */ |
438 | static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | 438 | static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, |
439 | struct buffer_head *bh_result, int create) | 439 | struct buffer_head *bh_result, int create) |
440 | { | 440 | { |
441 | int ret; | 441 | int ret; |
442 | u64 p_blkno, inode_blocks; | 442 | u64 p_blkno, inode_blocks, contig_blocks; |
443 | int contig_blocks; | ||
444 | unsigned int ext_flags; | 443 | unsigned int ext_flags; |
445 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; | 444 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; |
446 | unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; | 445 | unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; |
447 | 446 | ||
448 | /* This function won't even be called if the request isn't all | 447 | /* This function won't even be called if the request isn't all |
449 | * nicely aligned and of the right size, so there's no need | 448 | * nicely aligned and of the right size, so there's no need |
450 | * for us to check any of that. */ | 449 | * for us to check any of that. */ |
451 | 450 | ||
452 | inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); | 451 | inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); |
453 | 452 | ||
454 | /* | 453 | /* |
455 | * Any write past EOF is not allowed because we'd be extending. | 454 | * Any write past EOF is not allowed because we'd be extending. |
456 | */ | 455 | */ |
457 | if (create && (iblock + max_blocks) > inode_blocks) { | 456 | if (create && (iblock + max_blocks) > inode_blocks) { |
458 | ret = -EIO; | 457 | ret = -EIO; |
459 | goto bail; | 458 | goto bail; |
460 | } | 459 | } |
461 | 460 | ||
462 | /* This figures out the size of the next contiguous block, and | 461 | /* This figures out the size of the next contiguous block, and |
463 | * our logical offset */ | 462 | * our logical offset */ |
464 | ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, | 463 | ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, |
465 | &contig_blocks, &ext_flags); | 464 | &contig_blocks, &ext_flags); |
466 | if (ret) { | 465 | if (ret) { |
467 | mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n", | 466 | mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n", |
468 | (unsigned long long)iblock); | 467 | (unsigned long long)iblock); |
469 | ret = -EIO; | 468 | ret = -EIO; |
470 | goto bail; | 469 | goto bail; |
471 | } | 470 | } |
472 | 471 | ||
473 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno) { | 472 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno) { |
474 | ocfs2_error(inode->i_sb, | 473 | ocfs2_error(inode->i_sb, |
475 | "Inode %llu has a hole at block %llu\n", | 474 | "Inode %llu has a hole at block %llu\n", |
476 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 475 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
477 | (unsigned long long)iblock); | 476 | (unsigned long long)iblock); |
478 | ret = -EROFS; | 477 | ret = -EROFS; |
479 | goto bail; | 478 | goto bail; |
480 | } | 479 | } |
481 | 480 | ||
482 | /* | 481 | /* |
483 | * get_more_blocks() expects us to describe a hole by clearing | 482 | * get_more_blocks() expects us to describe a hole by clearing |
484 | * the mapped bit on bh_result(). | 483 | * the mapped bit on bh_result(). |
485 | * | 484 | * |
486 | * Consider an unwritten extent as a hole. | 485 | * Consider an unwritten extent as a hole. |
487 | */ | 486 | */ |
488 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) | 487 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) |
489 | map_bh(bh_result, inode->i_sb, p_blkno); | 488 | map_bh(bh_result, inode->i_sb, p_blkno); |
490 | else { | 489 | else { |
491 | /* | 490 | /* |
492 | * ocfs2_prepare_inode_for_write() should have caught | 491 | * ocfs2_prepare_inode_for_write() should have caught |
493 | * the case where we'd be filling a hole and triggered | 492 | * the case where we'd be filling a hole and triggered |
494 | * a buffered write instead. | 493 | * a buffered write instead. |
495 | */ | 494 | */ |
496 | if (create) { | 495 | if (create) { |
497 | ret = -EIO; | 496 | ret = -EIO; |
498 | mlog_errno(ret); | 497 | mlog_errno(ret); |
499 | goto bail; | 498 | goto bail; |
500 | } | 499 | } |
501 | 500 | ||
502 | clear_buffer_mapped(bh_result); | 501 | clear_buffer_mapped(bh_result); |
503 | } | 502 | } |
504 | 503 | ||
505 | /* make sure we don't map more than max_blocks blocks here as | 504 | /* make sure we don't map more than max_blocks blocks here as |
506 | that's all the kernel will handle at this point. */ | 505 | that's all the kernel will handle at this point. */ |
507 | if (max_blocks < contig_blocks) | 506 | if (max_blocks < contig_blocks) |
508 | contig_blocks = max_blocks; | 507 | contig_blocks = max_blocks; |
509 | bh_result->b_size = contig_blocks << blocksize_bits; | 508 | bh_result->b_size = contig_blocks << blocksize_bits; |
510 | bail: | 509 | bail: |
511 | return ret; | 510 | return ret; |
512 | } | 511 | } |
513 | 512 | ||
514 | /* | 513 | /* |
515 | * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're | 514 | * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're |
516 | * particularly interested in the aio/dio case. Like the core uses | 515 | * particularly interested in the aio/dio case. Like the core uses |
517 | * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from | 516 | * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from |
518 | * truncation on another. | 517 | * truncation on another. |
519 | */ | 518 | */ |
520 | static void ocfs2_dio_end_io(struct kiocb *iocb, | 519 | static void ocfs2_dio_end_io(struct kiocb *iocb, |
521 | loff_t offset, | 520 | loff_t offset, |
522 | ssize_t bytes, | 521 | ssize_t bytes, |
523 | void *private) | 522 | void *private) |
524 | { | 523 | { |
525 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; | 524 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; |
526 | 525 | ||
527 | /* this io's submitter should not have unlocked this before we could */ | 526 | /* this io's submitter should not have unlocked this before we could */ |
528 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); | 527 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); |
529 | ocfs2_iocb_clear_rw_locked(iocb); | 528 | ocfs2_iocb_clear_rw_locked(iocb); |
530 | up_read(&inode->i_alloc_sem); | 529 | up_read(&inode->i_alloc_sem); |
531 | ocfs2_rw_unlock(inode, 0); | 530 | ocfs2_rw_unlock(inode, 0); |
532 | } | 531 | } |
533 | 532 | ||
534 | /* | 533 | /* |
535 | * ocfs2_invalidatepage() and ocfs2_releasepage() are shamelessly stolen | 534 | * ocfs2_invalidatepage() and ocfs2_releasepage() are shamelessly stolen |
536 | * from ext3. PageChecked() bits have been removed as OCFS2 does not | 535 | * from ext3. PageChecked() bits have been removed as OCFS2 does not |
537 | * do journalled data. | 536 | * do journalled data. |
538 | */ | 537 | */ |
539 | static void ocfs2_invalidatepage(struct page *page, unsigned long offset) | 538 | static void ocfs2_invalidatepage(struct page *page, unsigned long offset) |
540 | { | 539 | { |
541 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; | 540 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; |
542 | 541 | ||
543 | journal_invalidatepage(journal, page, offset); | 542 | journal_invalidatepage(journal, page, offset); |
544 | } | 543 | } |
545 | 544 | ||
546 | static int ocfs2_releasepage(struct page *page, gfp_t wait) | 545 | static int ocfs2_releasepage(struct page *page, gfp_t wait) |
547 | { | 546 | { |
548 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; | 547 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; |
549 | 548 | ||
550 | if (!page_has_buffers(page)) | 549 | if (!page_has_buffers(page)) |
551 | return 0; | 550 | return 0; |
552 | return journal_try_to_free_buffers(journal, page, wait); | 551 | return journal_try_to_free_buffers(journal, page, wait); |
553 | } | 552 | } |
554 | 553 | ||
555 | static ssize_t ocfs2_direct_IO(int rw, | 554 | static ssize_t ocfs2_direct_IO(int rw, |
556 | struct kiocb *iocb, | 555 | struct kiocb *iocb, |
557 | const struct iovec *iov, | 556 | const struct iovec *iov, |
558 | loff_t offset, | 557 | loff_t offset, |
559 | unsigned long nr_segs) | 558 | unsigned long nr_segs) |
560 | { | 559 | { |
561 | struct file *file = iocb->ki_filp; | 560 | struct file *file = iocb->ki_filp; |
562 | struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; | 561 | struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; |
563 | int ret; | 562 | int ret; |
564 | 563 | ||
565 | mlog_entry_void(); | 564 | mlog_entry_void(); |
566 | 565 | ||
567 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { | 566 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { |
568 | /* | 567 | /* |
569 | * We get PR data locks even for O_DIRECT. This | 568 | * We get PR data locks even for O_DIRECT. This |
570 | * allows concurrent O_DIRECT I/O but doesn't let | 569 | * allows concurrent O_DIRECT I/O but doesn't let |
571 | * O_DIRECT with extending and buffered zeroing writes | 570 | * O_DIRECT with extending and buffered zeroing writes |
572 | * race. If they did race then the buffered zeroing | 571 | * race. If they did race then the buffered zeroing |
573 | * could be written back after the O_DIRECT I/O. It's | 572 | * could be written back after the O_DIRECT I/O. It's |
574 | * one thing to tell people not to mix buffered and | 573 | * one thing to tell people not to mix buffered and |
575 | * O_DIRECT writes, but expecting them to understand | 574 | * O_DIRECT writes, but expecting them to understand |
576 | * that file extension is also an implicit buffered | 575 | * that file extension is also an implicit buffered |
577 | * write is too much. By getting the PR we force | 576 | * write is too much. By getting the PR we force |
578 | * writeback of the buffered zeroing before | 577 | * writeback of the buffered zeroing before |
579 | * proceeding. | 578 | * proceeding. |
580 | */ | 579 | */ |
581 | ret = ocfs2_data_lock(inode, 0); | 580 | ret = ocfs2_data_lock(inode, 0); |
582 | if (ret < 0) { | 581 | if (ret < 0) { |
583 | mlog_errno(ret); | 582 | mlog_errno(ret); |
584 | goto out; | 583 | goto out; |
585 | } | 584 | } |
586 | ocfs2_data_unlock(inode, 0); | 585 | ocfs2_data_unlock(inode, 0); |
587 | } | 586 | } |
588 | 587 | ||
589 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 588 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, |
590 | inode->i_sb->s_bdev, iov, offset, | 589 | inode->i_sb->s_bdev, iov, offset, |
591 | nr_segs, | 590 | nr_segs, |
592 | ocfs2_direct_IO_get_blocks, | 591 | ocfs2_direct_IO_get_blocks, |
593 | ocfs2_dio_end_io); | 592 | ocfs2_dio_end_io); |
594 | out: | 593 | out: |
595 | mlog_exit(ret); | 594 | mlog_exit(ret); |
596 | return ret; | 595 | return ret; |
597 | } | 596 | } |
598 | 597 | ||
599 | static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, | 598 | static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, |
600 | u32 cpos, | 599 | u32 cpos, |
601 | unsigned int *start, | 600 | unsigned int *start, |
602 | unsigned int *end) | 601 | unsigned int *end) |
603 | { | 602 | { |
604 | unsigned int cluster_start = 0, cluster_end = PAGE_CACHE_SIZE; | 603 | unsigned int cluster_start = 0, cluster_end = PAGE_CACHE_SIZE; |
605 | 604 | ||
606 | if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) { | 605 | if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) { |
607 | unsigned int cpp; | 606 | unsigned int cpp; |
608 | 607 | ||
609 | cpp = 1 << (PAGE_CACHE_SHIFT - osb->s_clustersize_bits); | 608 | cpp = 1 << (PAGE_CACHE_SHIFT - osb->s_clustersize_bits); |
610 | 609 | ||
611 | cluster_start = cpos % cpp; | 610 | cluster_start = cpos % cpp; |
612 | cluster_start = cluster_start << osb->s_clustersize_bits; | 611 | cluster_start = cluster_start << osb->s_clustersize_bits; |
613 | 612 | ||
614 | cluster_end = cluster_start + osb->s_clustersize; | 613 | cluster_end = cluster_start + osb->s_clustersize; |
615 | } | 614 | } |
616 | 615 | ||
617 | BUG_ON(cluster_start > PAGE_SIZE); | 616 | BUG_ON(cluster_start > PAGE_SIZE); |
618 | BUG_ON(cluster_end > PAGE_SIZE); | 617 | BUG_ON(cluster_end > PAGE_SIZE); |
619 | 618 | ||
620 | if (start) | 619 | if (start) |
621 | *start = cluster_start; | 620 | *start = cluster_start; |
622 | if (end) | 621 | if (end) |
623 | *end = cluster_end; | 622 | *end = cluster_end; |
624 | } | 623 | } |
625 | 624 | ||
626 | /* | 625 | /* |
627 | * 'from' and 'to' are the region in the page to avoid zeroing. | 626 | * 'from' and 'to' are the region in the page to avoid zeroing. |
628 | * | 627 | * |
629 | * If pagesize > clustersize, this function will avoid zeroing outside | 628 | * If pagesize > clustersize, this function will avoid zeroing outside |
630 | * of the cluster boundary. | 629 | * of the cluster boundary. |
631 | * | 630 | * |
632 | * from == to == 0 is code for "zero the entire cluster region" | 631 | * from == to == 0 is code for "zero the entire cluster region" |
633 | */ | 632 | */ |
634 | static void ocfs2_clear_page_regions(struct page *page, | 633 | static void ocfs2_clear_page_regions(struct page *page, |
635 | struct ocfs2_super *osb, u32 cpos, | 634 | struct ocfs2_super *osb, u32 cpos, |
636 | unsigned from, unsigned to) | 635 | unsigned from, unsigned to) |
637 | { | 636 | { |
638 | void *kaddr; | 637 | void *kaddr; |
639 | unsigned int cluster_start, cluster_end; | 638 | unsigned int cluster_start, cluster_end; |
640 | 639 | ||
641 | ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end); | 640 | ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end); |
642 | 641 | ||
643 | kaddr = kmap_atomic(page, KM_USER0); | 642 | kaddr = kmap_atomic(page, KM_USER0); |
644 | 643 | ||
645 | if (from || to) { | 644 | if (from || to) { |
646 | if (from > cluster_start) | 645 | if (from > cluster_start) |
647 | memset(kaddr + cluster_start, 0, from - cluster_start); | 646 | memset(kaddr + cluster_start, 0, from - cluster_start); |
648 | if (to < cluster_end) | 647 | if (to < cluster_end) |
649 | memset(kaddr + to, 0, cluster_end - to); | 648 | memset(kaddr + to, 0, cluster_end - to); |
650 | } else { | 649 | } else { |
651 | memset(kaddr + cluster_start, 0, cluster_end - cluster_start); | 650 | memset(kaddr + cluster_start, 0, cluster_end - cluster_start); |
652 | } | 651 | } |
653 | 652 | ||
654 | kunmap_atomic(kaddr, KM_USER0); | 653 | kunmap_atomic(kaddr, KM_USER0); |
655 | } | 654 | } |
656 | 655 | ||
657 | /* | 656 | /* |
658 | * Some of this taken from block_prepare_write(). We already have our | 657 | * Some of this taken from block_prepare_write(). We already have our |
659 | * mapping by now though, and the entire write will be allocating or | 658 | * mapping by now though, and the entire write will be allocating or |
660 | * it won't, so not much need to use BH_New. | 659 | * it won't, so not much need to use BH_New. |
661 | * | 660 | * |
662 | * This will also skip zeroing, which is handled externally. | 661 | * This will also skip zeroing, which is handled externally. |
663 | */ | 662 | */ |
664 | int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, | 663 | int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, |
665 | struct inode *inode, unsigned int from, | 664 | struct inode *inode, unsigned int from, |
666 | unsigned int to, int new) | 665 | unsigned int to, int new) |
667 | { | 666 | { |
668 | int ret = 0; | 667 | int ret = 0; |
669 | struct buffer_head *head, *bh, *wait[2], **wait_bh = wait; | 668 | struct buffer_head *head, *bh, *wait[2], **wait_bh = wait; |
670 | unsigned int block_end, block_start; | 669 | unsigned int block_end, block_start; |
671 | unsigned int bsize = 1 << inode->i_blkbits; | 670 | unsigned int bsize = 1 << inode->i_blkbits; |
672 | 671 | ||
673 | if (!page_has_buffers(page)) | 672 | if (!page_has_buffers(page)) |
674 | create_empty_buffers(page, bsize, 0); | 673 | create_empty_buffers(page, bsize, 0); |
675 | 674 | ||
676 | head = page_buffers(page); | 675 | head = page_buffers(page); |
677 | for (bh = head, block_start = 0; bh != head || !block_start; | 676 | for (bh = head, block_start = 0; bh != head || !block_start; |
678 | bh = bh->b_this_page, block_start += bsize) { | 677 | bh = bh->b_this_page, block_start += bsize) { |
679 | block_end = block_start + bsize; | 678 | block_end = block_start + bsize; |
680 | 679 | ||
681 | /* | 680 | /* |
682 | * Ignore blocks outside of our i/o range - | 681 | * Ignore blocks outside of our i/o range - |
683 | * they may belong to unallocated clusters. | 682 | * they may belong to unallocated clusters. |
684 | */ | 683 | */ |
685 | if (block_start >= to || block_end <= from) { | 684 | if (block_start >= to || block_end <= from) { |
686 | if (PageUptodate(page)) | 685 | if (PageUptodate(page)) |
687 | set_buffer_uptodate(bh); | 686 | set_buffer_uptodate(bh); |
688 | continue; | 687 | continue; |
689 | } | 688 | } |
690 | 689 | ||
691 | /* | 690 | /* |
692 | * For an allocating write with cluster size >= page | 691 | * For an allocating write with cluster size >= page |
693 | * size, we always write the entire page. | 692 | * size, we always write the entire page. |
694 | */ | 693 | */ |
695 | 694 | ||
696 | if (buffer_new(bh)) | 695 | if (buffer_new(bh)) |
697 | clear_buffer_new(bh); | 696 | clear_buffer_new(bh); |
698 | 697 | ||
699 | if (!buffer_mapped(bh)) { | 698 | if (!buffer_mapped(bh)) { |
700 | map_bh(bh, inode->i_sb, *p_blkno); | 699 | map_bh(bh, inode->i_sb, *p_blkno); |
701 | unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); | 700 | unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); |
702 | } | 701 | } |
703 | 702 | ||
704 | if (PageUptodate(page)) { | 703 | if (PageUptodate(page)) { |
705 | if (!buffer_uptodate(bh)) | 704 | if (!buffer_uptodate(bh)) |
706 | set_buffer_uptodate(bh); | 705 | set_buffer_uptodate(bh); |
707 | } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && | 706 | } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && |
708 | (block_start < from || block_end > to)) { | 707 | (block_start < from || block_end > to)) { |
709 | ll_rw_block(READ, 1, &bh); | 708 | ll_rw_block(READ, 1, &bh); |
710 | *wait_bh++=bh; | 709 | *wait_bh++=bh; |
711 | } | 710 | } |
712 | 711 | ||
713 | *p_blkno = *p_blkno + 1; | 712 | *p_blkno = *p_blkno + 1; |
714 | } | 713 | } |
715 | 714 | ||
716 | /* | 715 | /* |
717 | * If we issued read requests - let them complete. | 716 | * If we issued read requests - let them complete. |
718 | */ | 717 | */ |
719 | while(wait_bh > wait) { | 718 | while(wait_bh > wait) { |
720 | wait_on_buffer(*--wait_bh); | 719 | wait_on_buffer(*--wait_bh); |
721 | if (!buffer_uptodate(*wait_bh)) | 720 | if (!buffer_uptodate(*wait_bh)) |
722 | ret = -EIO; | 721 | ret = -EIO; |
723 | } | 722 | } |
724 | 723 | ||
725 | if (ret == 0 || !new) | 724 | if (ret == 0 || !new) |
726 | return ret; | 725 | return ret; |
727 | 726 | ||
728 | /* | 727 | /* |
729 | * If we get -EIO above, zero out any newly allocated blocks | 728 | * If we get -EIO above, zero out any newly allocated blocks |
730 | * to avoid exposing stale data. | 729 | * to avoid exposing stale data. |
731 | */ | 730 | */ |
732 | bh = head; | 731 | bh = head; |
733 | block_start = 0; | 732 | block_start = 0; |
734 | do { | 733 | do { |
735 | void *kaddr; | 734 | void *kaddr; |
736 | 735 | ||
737 | block_end = block_start + bsize; | 736 | block_end = block_start + bsize; |
738 | if (block_end <= from) | 737 | if (block_end <= from) |
739 | goto next_bh; | 738 | goto next_bh; |
740 | if (block_start >= to) | 739 | if (block_start >= to) |
741 | break; | 740 | break; |
742 | 741 | ||
743 | kaddr = kmap_atomic(page, KM_USER0); | 742 | kaddr = kmap_atomic(page, KM_USER0); |
744 | memset(kaddr+block_start, 0, bh->b_size); | 743 | memset(kaddr+block_start, 0, bh->b_size); |
745 | flush_dcache_page(page); | 744 | flush_dcache_page(page); |
746 | kunmap_atomic(kaddr, KM_USER0); | 745 | kunmap_atomic(kaddr, KM_USER0); |
747 | set_buffer_uptodate(bh); | 746 | set_buffer_uptodate(bh); |
748 | mark_buffer_dirty(bh); | 747 | mark_buffer_dirty(bh); |
749 | 748 | ||
750 | next_bh: | 749 | next_bh: |
751 | block_start = block_end; | 750 | block_start = block_end; |
752 | bh = bh->b_this_page; | 751 | bh = bh->b_this_page; |
753 | } while (bh != head); | 752 | } while (bh != head); |
754 | 753 | ||
755 | return ret; | 754 | return ret; |
756 | } | 755 | } |
757 | 756 | ||
758 | /* | 757 | /* |
759 | * This will copy user data from the buffer page in the splice | 758 | * This will copy user data from the buffer page in the splice |
760 | * context. | 759 | * context. |
761 | * | 760 | * |
762 | * For now, we ignore SPLICE_F_MOVE as that would require some extra | 761 | * For now, we ignore SPLICE_F_MOVE as that would require some extra |
763 | * communication out all the way to ocfs2_write(). | 762 | * communication out all the way to ocfs2_write(). |
764 | */ | 763 | */ |
765 | int ocfs2_map_and_write_splice_data(struct inode *inode, | 764 | int ocfs2_map_and_write_splice_data(struct inode *inode, |
766 | struct ocfs2_write_ctxt *wc, u64 *p_blkno, | 765 | struct ocfs2_write_ctxt *wc, u64 *p_blkno, |
767 | unsigned int *ret_from, unsigned int *ret_to) | 766 | unsigned int *ret_from, unsigned int *ret_to) |
768 | { | 767 | { |
769 | int ret; | 768 | int ret; |
770 | unsigned int to, from, cluster_start, cluster_end; | 769 | unsigned int to, from, cluster_start, cluster_end; |
771 | char *src, *dst; | 770 | char *src, *dst; |
772 | struct ocfs2_splice_write_priv *sp = wc->w_private; | 771 | struct ocfs2_splice_write_priv *sp = wc->w_private; |
773 | struct pipe_buffer *buf = sp->s_buf; | 772 | struct pipe_buffer *buf = sp->s_buf; |
774 | unsigned long bytes, src_from; | 773 | unsigned long bytes, src_from; |
775 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 774 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
776 | 775 | ||
777 | ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, | 776 | ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, |
778 | &cluster_end); | 777 | &cluster_end); |
779 | 778 | ||
780 | from = sp->s_offset; | 779 | from = sp->s_offset; |
781 | src_from = sp->s_buf_offset; | 780 | src_from = sp->s_buf_offset; |
782 | bytes = wc->w_count; | 781 | bytes = wc->w_count; |
783 | 782 | ||
784 | if (wc->w_large_pages) { | 783 | if (wc->w_large_pages) { |
785 | /* | 784 | /* |
786 | * For cluster size < page size, we have to | 785 | * For cluster size < page size, we have to |
787 | * calculate pos within the cluster and obey | 786 | * calculate pos within the cluster and obey |
788 | * the rightmost boundary. | 787 | * the rightmost boundary. |
789 | */ | 788 | */ |
790 | bytes = min(bytes, (unsigned long)(osb->s_clustersize | 789 | bytes = min(bytes, (unsigned long)(osb->s_clustersize |
791 | - (wc->w_pos & (osb->s_clustersize - 1)))); | 790 | - (wc->w_pos & (osb->s_clustersize - 1)))); |
792 | } | 791 | } |
793 | to = from + bytes; | 792 | to = from + bytes; |
794 | 793 | ||
795 | if (wc->w_this_page_new) | 794 | if (wc->w_this_page_new) |
796 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, | 795 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, |
797 | cluster_start, cluster_end, 1); | 796 | cluster_start, cluster_end, 1); |
798 | else | 797 | else |
799 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, | 798 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, |
800 | from, to, 0); | 799 | from, to, 0); |
801 | if (ret) { | 800 | if (ret) { |
802 | mlog_errno(ret); | 801 | mlog_errno(ret); |
803 | goto out; | 802 | goto out; |
804 | } | 803 | } |
805 | 804 | ||
806 | BUG_ON(from > PAGE_CACHE_SIZE); | 805 | BUG_ON(from > PAGE_CACHE_SIZE); |
807 | BUG_ON(to > PAGE_CACHE_SIZE); | 806 | BUG_ON(to > PAGE_CACHE_SIZE); |
808 | BUG_ON(from > osb->s_clustersize); | 807 | BUG_ON(from > osb->s_clustersize); |
809 | BUG_ON(to > osb->s_clustersize); | 808 | BUG_ON(to > osb->s_clustersize); |
810 | 809 | ||
811 | src = buf->ops->map(sp->s_pipe, buf, 1); | 810 | src = buf->ops->map(sp->s_pipe, buf, 1); |
812 | dst = kmap_atomic(wc->w_this_page, KM_USER1); | 811 | dst = kmap_atomic(wc->w_this_page, KM_USER1); |
813 | memcpy(dst + from, src + src_from, bytes); | 812 | memcpy(dst + from, src + src_from, bytes); |
814 | kunmap_atomic(wc->w_this_page, KM_USER1); | 813 | kunmap_atomic(wc->w_this_page, KM_USER1); |
815 | buf->ops->unmap(sp->s_pipe, buf, src); | 814 | buf->ops->unmap(sp->s_pipe, buf, src); |
816 | 815 | ||
817 | wc->w_finished_copy = 1; | 816 | wc->w_finished_copy = 1; |
818 | 817 | ||
819 | *ret_from = from; | 818 | *ret_from = from; |
820 | *ret_to = to; | 819 | *ret_to = to; |
821 | out: | 820 | out: |
822 | 821 | ||
823 | return bytes ? (unsigned int)bytes : ret; | 822 | return bytes ? (unsigned int)bytes : ret; |
824 | } | 823 | } |
825 | 824 | ||
826 | /* | 825 | /* |
827 | * This will copy user data from the iovec in the buffered write | 826 | * This will copy user data from the iovec in the buffered write |
828 | * context. | 827 | * context. |
829 | */ | 828 | */ |
830 | int ocfs2_map_and_write_user_data(struct inode *inode, | 829 | int ocfs2_map_and_write_user_data(struct inode *inode, |
831 | struct ocfs2_write_ctxt *wc, u64 *p_blkno, | 830 | struct ocfs2_write_ctxt *wc, u64 *p_blkno, |
832 | unsigned int *ret_from, unsigned int *ret_to) | 831 | unsigned int *ret_from, unsigned int *ret_to) |
833 | { | 832 | { |
834 | int ret; | 833 | int ret; |
835 | unsigned int to, from, cluster_start, cluster_end; | 834 | unsigned int to, from, cluster_start, cluster_end; |
836 | unsigned long bytes, src_from; | 835 | unsigned long bytes, src_from; |
837 | char *dst; | 836 | char *dst; |
838 | struct ocfs2_buffered_write_priv *bp = wc->w_private; | 837 | struct ocfs2_buffered_write_priv *bp = wc->w_private; |
839 | const struct iovec *cur_iov = bp->b_cur_iov; | 838 | const struct iovec *cur_iov = bp->b_cur_iov; |
840 | char __user *buf; | 839 | char __user *buf; |
841 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 840 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
842 | 841 | ||
843 | ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, | 842 | ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, |
844 | &cluster_end); | 843 | &cluster_end); |
845 | 844 | ||
846 | buf = cur_iov->iov_base + bp->b_cur_off; | 845 | buf = cur_iov->iov_base + bp->b_cur_off; |
847 | src_from = (unsigned long)buf & ~PAGE_CACHE_MASK; | 846 | src_from = (unsigned long)buf & ~PAGE_CACHE_MASK; |
848 | 847 | ||
849 | from = wc->w_pos & (PAGE_CACHE_SIZE - 1); | 848 | from = wc->w_pos & (PAGE_CACHE_SIZE - 1); |
850 | 849 | ||
851 | /* | 850 | /* |
852 | * This is a lot of comparisons, but it reads quite | 851 | * This is a lot of comparisons, but it reads quite |
853 | * easily, which is important here. | 852 | * easily, which is important here. |
854 | */ | 853 | */ |
855 | /* Stay within the src page */ | 854 | /* Stay within the src page */ |
856 | bytes = PAGE_SIZE - src_from; | 855 | bytes = PAGE_SIZE - src_from; |
857 | /* Stay within the vector */ | 856 | /* Stay within the vector */ |
858 | bytes = min(bytes, | 857 | bytes = min(bytes, |
859 | (unsigned long)(cur_iov->iov_len - bp->b_cur_off)); | 858 | (unsigned long)(cur_iov->iov_len - bp->b_cur_off)); |
860 | /* Stay within count */ | 859 | /* Stay within count */ |
861 | bytes = min(bytes, (unsigned long)wc->w_count); | 860 | bytes = min(bytes, (unsigned long)wc->w_count); |
862 | /* | 861 | /* |
863 | * For clustersize > page size, just stay within | 862 | * For clustersize > page size, just stay within |
864 | * target page, otherwise we have to calculate pos | 863 | * target page, otherwise we have to calculate pos |
865 | * within the cluster and obey the rightmost | 864 | * within the cluster and obey the rightmost |
866 | * boundary. | 865 | * boundary. |
867 | */ | 866 | */ |
868 | if (wc->w_large_pages) { | 867 | if (wc->w_large_pages) { |
869 | /* | 868 | /* |
870 | * For cluster size < page size, we have to | 869 | * For cluster size < page size, we have to |
871 | * calculate pos within the cluster and obey | 870 | * calculate pos within the cluster and obey |
872 | * the rightmost boundary. | 871 | * the rightmost boundary. |
873 | */ | 872 | */ |
874 | bytes = min(bytes, (unsigned long)(osb->s_clustersize | 873 | bytes = min(bytes, (unsigned long)(osb->s_clustersize |
875 | - (wc->w_pos & (osb->s_clustersize - 1)))); | 874 | - (wc->w_pos & (osb->s_clustersize - 1)))); |
876 | } else { | 875 | } else { |
877 | /* | 876 | /* |
878 | * cluster size > page size is the most common | 877 | * cluster size > page size is the most common |
879 | * case - we just stay within the target page | 878 | * case - we just stay within the target page |
880 | * boundary. | 879 | * boundary. |
881 | */ | 880 | */ |
882 | bytes = min(bytes, PAGE_CACHE_SIZE - from); | 881 | bytes = min(bytes, PAGE_CACHE_SIZE - from); |
883 | } | 882 | } |
884 | 883 | ||
885 | to = from + bytes; | 884 | to = from + bytes; |
886 | 885 | ||
887 | if (wc->w_this_page_new) | 886 | if (wc->w_this_page_new) |
888 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, | 887 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, |
889 | cluster_start, cluster_end, 1); | 888 | cluster_start, cluster_end, 1); |
890 | else | 889 | else |
891 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, | 890 | ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, |
892 | from, to, 0); | 891 | from, to, 0); |
893 | if (ret) { | 892 | if (ret) { |
894 | mlog_errno(ret); | 893 | mlog_errno(ret); |
895 | goto out; | 894 | goto out; |
896 | } | 895 | } |
897 | 896 | ||
898 | BUG_ON(from > PAGE_CACHE_SIZE); | 897 | BUG_ON(from > PAGE_CACHE_SIZE); |
899 | BUG_ON(to > PAGE_CACHE_SIZE); | 898 | BUG_ON(to > PAGE_CACHE_SIZE); |
900 | BUG_ON(from > osb->s_clustersize); | 899 | BUG_ON(from > osb->s_clustersize); |
901 | BUG_ON(to > osb->s_clustersize); | 900 | BUG_ON(to > osb->s_clustersize); |
902 | 901 | ||
903 | dst = kmap(wc->w_this_page); | 902 | dst = kmap(wc->w_this_page); |
904 | memcpy(dst + from, bp->b_src_buf + src_from, bytes); | 903 | memcpy(dst + from, bp->b_src_buf + src_from, bytes); |
905 | kunmap(wc->w_this_page); | 904 | kunmap(wc->w_this_page); |
906 | 905 | ||
907 | /* | 906 | /* |
908 | * XXX: This is slow, but simple. The caller of | 907 | * XXX: This is slow, but simple. The caller of |
909 | * ocfs2_buffered_write_cluster() is responsible for | 908 | * ocfs2_buffered_write_cluster() is responsible for |
910 | * passing through the iovecs, so it's difficult to | 909 | * passing through the iovecs, so it's difficult to |
911 | * predict what our next step is in here after our | 910 | * predict what our next step is in here after our |
912 | * initial write. A future version should be pushing | 911 | * initial write. A future version should be pushing |
913 | * that iovec manipulation further down. | 912 | * that iovec manipulation further down. |
914 | * | 913 | * |
915 | * By setting this, we indicate that a copy from user | 914 | * By setting this, we indicate that a copy from user |
916 | * data was done, and subsequent calls for this | 915 | * data was done, and subsequent calls for this |
917 | * cluster will skip copying more data. | 916 | * cluster will skip copying more data. |
918 | */ | 917 | */ |
919 | wc->w_finished_copy = 1; | 918 | wc->w_finished_copy = 1; |
920 | 919 | ||
921 | *ret_from = from; | 920 | *ret_from = from; |
922 | *ret_to = to; | 921 | *ret_to = to; |
923 | out: | 922 | out: |
924 | 923 | ||
925 | return bytes ? (unsigned int)bytes : ret; | 924 | return bytes ? (unsigned int)bytes : ret; |
926 | } | 925 | } |
927 | 926 | ||
928 | /* | 927 | /* |
929 | * Map, fill and write a page to disk. | 928 | * Map, fill and write a page to disk. |
930 | * | 929 | * |
931 | * The work of copying data is done via callback. Newly allocated | 930 | * The work of copying data is done via callback. Newly allocated |
932 | * pages which don't take user data will be zero'd (set 'new' to | 931 | * pages which don't take user data will be zero'd (set 'new' to |
933 | * indicate an allocating write) | 932 | * indicate an allocating write) |
934 | * | 933 | * |
935 | * Returns a negative error code or the number of bytes copied into | 934 | * Returns a negative error code or the number of bytes copied into |
936 | * the page. | 935 | * the page. |
937 | */ | 936 | */ |
938 | int ocfs2_write_data_page(struct inode *inode, handle_t *handle, | 937 | int ocfs2_write_data_page(struct inode *inode, handle_t *handle, |
939 | u64 *p_blkno, struct page *page, | 938 | u64 *p_blkno, struct page *page, |
940 | struct ocfs2_write_ctxt *wc, int new) | 939 | struct ocfs2_write_ctxt *wc, int new) |
941 | { | 940 | { |
942 | int ret, copied = 0; | 941 | int ret, copied = 0; |
943 | unsigned int from = 0, to = 0; | 942 | unsigned int from = 0, to = 0; |
944 | unsigned int cluster_start, cluster_end; | 943 | unsigned int cluster_start, cluster_end; |
945 | unsigned int zero_from = 0, zero_to = 0; | 944 | unsigned int zero_from = 0, zero_to = 0; |
946 | 945 | ||
947 | ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), wc->w_cpos, | 946 | ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), wc->w_cpos, |
948 | &cluster_start, &cluster_end); | 947 | &cluster_start, &cluster_end); |
949 | 948 | ||
950 | if ((wc->w_pos >> PAGE_CACHE_SHIFT) == page->index | 949 | if ((wc->w_pos >> PAGE_CACHE_SHIFT) == page->index |
951 | && !wc->w_finished_copy) { | 950 | && !wc->w_finished_copy) { |
952 | 951 | ||
953 | wc->w_this_page = page; | 952 | wc->w_this_page = page; |
954 | wc->w_this_page_new = new; | 953 | wc->w_this_page_new = new; |
955 | ret = wc->w_write_data_page(inode, wc, p_blkno, &from, &to); | 954 | ret = wc->w_write_data_page(inode, wc, p_blkno, &from, &to); |
956 | if (ret < 0) { | 955 | if (ret < 0) { |
957 | mlog_errno(ret); | 956 | mlog_errno(ret); |
958 | goto out; | 957 | goto out; |
959 | } | 958 | } |
960 | 959 | ||
961 | copied = ret; | 960 | copied = ret; |
962 | 961 | ||
963 | zero_from = from; | 962 | zero_from = from; |
964 | zero_to = to; | 963 | zero_to = to; |
965 | if (new) { | 964 | if (new) { |
966 | from = cluster_start; | 965 | from = cluster_start; |
967 | to = cluster_end; | 966 | to = cluster_end; |
968 | } | 967 | } |
969 | } else { | 968 | } else { |
970 | /* | 969 | /* |
971 | * If we haven't allocated the new page yet, we | 970 | * If we haven't allocated the new page yet, we |
972 | * shouldn't be writing it out without copying user | 971 | * shouldn't be writing it out without copying user |
973 | * data. This is likely a math error from the caller. | 972 | * data. This is likely a math error from the caller. |
974 | */ | 973 | */ |
975 | BUG_ON(!new); | 974 | BUG_ON(!new); |
976 | 975 | ||
977 | from = cluster_start; | 976 | from = cluster_start; |
978 | to = cluster_end; | 977 | to = cluster_end; |
979 | 978 | ||
980 | ret = ocfs2_map_page_blocks(page, p_blkno, inode, | 979 | ret = ocfs2_map_page_blocks(page, p_blkno, inode, |
981 | cluster_start, cluster_end, 1); | 980 | cluster_start, cluster_end, 1); |
982 | if (ret) { | 981 | if (ret) { |
983 | mlog_errno(ret); | 982 | mlog_errno(ret); |
984 | goto out; | 983 | goto out; |
985 | } | 984 | } |
986 | } | 985 | } |
987 | 986 | ||
988 | /* | 987 | /* |
989 | * Parts of newly allocated pages need to be zero'd. | 988 | * Parts of newly allocated pages need to be zero'd. |
990 | * | 989 | * |
991 | * Above, we have also rewritten 'to' and 'from' - as far as | 990 | * Above, we have also rewritten 'to' and 'from' - as far as |
992 | * the rest of the function is concerned, the entire cluster | 991 | * the rest of the function is concerned, the entire cluster |
993 | * range inside of a page needs to be written. | 992 | * range inside of a page needs to be written. |
994 | * | 993 | * |
995 | * We can skip this if the page is up to date - it's already | 994 | * We can skip this if the page is up to date - it's already |
996 | * been zero'd from being read in as a hole. | 995 | * been zero'd from being read in as a hole. |
997 | */ | 996 | */ |
998 | if (new && !PageUptodate(page)) | 997 | if (new && !PageUptodate(page)) |
999 | ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb), | 998 | ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb), |
1000 | wc->w_cpos, zero_from, zero_to); | 999 | wc->w_cpos, zero_from, zero_to); |
1001 | 1000 | ||
1002 | flush_dcache_page(page); | 1001 | flush_dcache_page(page); |
1003 | 1002 | ||
1004 | if (ocfs2_should_order_data(inode)) { | 1003 | if (ocfs2_should_order_data(inode)) { |
1005 | ret = walk_page_buffers(handle, | 1004 | ret = walk_page_buffers(handle, |
1006 | page_buffers(page), | 1005 | page_buffers(page), |
1007 | from, to, NULL, | 1006 | from, to, NULL, |
1008 | ocfs2_journal_dirty_data); | 1007 | ocfs2_journal_dirty_data); |
1009 | if (ret < 0) | 1008 | if (ret < 0) |
1010 | mlog_errno(ret); | 1009 | mlog_errno(ret); |
1011 | } | 1010 | } |
1012 | 1011 | ||
1013 | /* | 1012 | /* |
1014 | * We don't use generic_commit_write() because we need to | 1013 | * We don't use generic_commit_write() because we need to |
1015 | * handle our own i_size update. | 1014 | * handle our own i_size update. |
1016 | */ | 1015 | */ |
1017 | ret = block_commit_write(page, from, to); | 1016 | ret = block_commit_write(page, from, to); |
1018 | if (ret) | 1017 | if (ret) |
1019 | mlog_errno(ret); | 1018 | mlog_errno(ret); |
1020 | out: | 1019 | out: |
1021 | 1020 | ||
1022 | return copied ? copied : ret; | 1021 | return copied ? copied : ret; |
1023 | } | 1022 | } |
1024 | 1023 | ||
1025 | /* | 1024 | /* |
1026 | * Do the actual write of some data into an inode. Optionally allocate | 1025 | * Do the actual write of some data into an inode. Optionally allocate |
1027 | * in order to fulfill the write. | 1026 | * in order to fulfill the write. |
1028 | * | 1027 | * |
1029 | * cpos is the logical cluster offset within the file to write at | 1028 | * cpos is the logical cluster offset within the file to write at |
1030 | * | 1029 | * |
1031 | * 'phys' is the physical mapping of that offset. a 'phys' value of | 1030 | * 'phys' is the physical mapping of that offset. a 'phys' value of |
1032 | * zero indicates that allocation is required. In this case, data_ac | 1031 | * zero indicates that allocation is required. In this case, data_ac |
1033 | * and meta_ac should be valid (meta_ac can be null if metadata | 1032 | * and meta_ac should be valid (meta_ac can be null if metadata |
1034 | * allocation isn't required). | 1033 | * allocation isn't required). |
1035 | */ | 1034 | */ |
1036 | static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle, | 1035 | static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle, |
1037 | struct buffer_head *di_bh, | 1036 | struct buffer_head *di_bh, |
1038 | struct ocfs2_alloc_context *data_ac, | 1037 | struct ocfs2_alloc_context *data_ac, |
1039 | struct ocfs2_alloc_context *meta_ac, | 1038 | struct ocfs2_alloc_context *meta_ac, |
1040 | struct ocfs2_write_ctxt *wc) | 1039 | struct ocfs2_write_ctxt *wc) |
1041 | { | 1040 | { |
1042 | int ret, i, numpages = 1, new; | 1041 | int ret, i, numpages = 1, new; |
1043 | unsigned int copied = 0; | 1042 | unsigned int copied = 0; |
1044 | u32 tmp_pos; | 1043 | u32 tmp_pos; |
1045 | u64 v_blkno, p_blkno; | 1044 | u64 v_blkno, p_blkno; |
1046 | struct address_space *mapping = file->f_mapping; | 1045 | struct address_space *mapping = file->f_mapping; |
1047 | struct inode *inode = mapping->host; | 1046 | struct inode *inode = mapping->host; |
1048 | unsigned long index, start; | 1047 | unsigned long index, start; |
1049 | struct page **cpages; | 1048 | struct page **cpages; |
1050 | 1049 | ||
1051 | new = phys == 0 ? 1 : 0; | 1050 | new = phys == 0 ? 1 : 0; |
1052 | 1051 | ||
1053 | /* | 1052 | /* |
1054 | * Figure out how many pages we'll be manipulating here. For | 1053 | * Figure out how many pages we'll be manipulating here. For |
1055 | * non allocating write, we just change the one | 1054 | * non allocating write, we just change the one |
1056 | * page. Otherwise, we'll need a whole clusters worth. | 1055 | * page. Otherwise, we'll need a whole clusters worth. |
1057 | */ | 1056 | */ |
1058 | if (new) | 1057 | if (new) |
1059 | numpages = ocfs2_pages_per_cluster(inode->i_sb); | 1058 | numpages = ocfs2_pages_per_cluster(inode->i_sb); |
1060 | 1059 | ||
1061 | cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS); | 1060 | cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS); |
1062 | if (!cpages) { | 1061 | if (!cpages) { |
1063 | ret = -ENOMEM; | 1062 | ret = -ENOMEM; |
1064 | mlog_errno(ret); | 1063 | mlog_errno(ret); |
1065 | return ret; | 1064 | return ret; |
1066 | } | 1065 | } |
1067 | 1066 | ||
1068 | /* | 1067 | /* |
1069 | * Fill our page array first. That way we've grabbed enough so | 1068 | * Fill our page array first. That way we've grabbed enough so |
1070 | * that we can zero and flush if we error after adding the | 1069 | * that we can zero and flush if we error after adding the |
1071 | * extent. | 1070 | * extent. |
1072 | */ | 1071 | */ |
1073 | if (new) { | 1072 | if (new) { |
1074 | start = ocfs2_align_clusters_to_page_index(inode->i_sb, | 1073 | start = ocfs2_align_clusters_to_page_index(inode->i_sb, |
1075 | wc->w_cpos); | 1074 | wc->w_cpos); |
1076 | v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, wc->w_cpos); | 1075 | v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, wc->w_cpos); |
1077 | } else { | 1076 | } else { |
1078 | start = wc->w_pos >> PAGE_CACHE_SHIFT; | 1077 | start = wc->w_pos >> PAGE_CACHE_SHIFT; |
1079 | v_blkno = wc->w_pos >> inode->i_sb->s_blocksize_bits; | 1078 | v_blkno = wc->w_pos >> inode->i_sb->s_blocksize_bits; |
1080 | } | 1079 | } |
1081 | 1080 | ||
1082 | for(i = 0; i < numpages; i++) { | 1081 | for(i = 0; i < numpages; i++) { |
1083 | index = start + i; | 1082 | index = start + i; |
1084 | 1083 | ||
1085 | cpages[i] = grab_cache_page(mapping, index); | 1084 | cpages[i] = grab_cache_page(mapping, index); |
1086 | if (!cpages[i]) { | 1085 | if (!cpages[i]) { |
1087 | ret = -ENOMEM; | 1086 | ret = -ENOMEM; |
1088 | mlog_errno(ret); | 1087 | mlog_errno(ret); |
1089 | goto out; | 1088 | goto out; |
1090 | } | 1089 | } |
1091 | } | 1090 | } |
1092 | 1091 | ||
1093 | if (new) { | 1092 | if (new) { |
1094 | /* | 1093 | /* |
1095 | * This is safe to call with the page locks - it won't take | 1094 | * This is safe to call with the page locks - it won't take |
1096 | * any additional semaphores or cluster locks. | 1095 | * any additional semaphores or cluster locks. |
1097 | */ | 1096 | */ |
1098 | tmp_pos = wc->w_cpos; | 1097 | tmp_pos = wc->w_cpos; |
1099 | ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, | 1098 | ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, |
1100 | &tmp_pos, 1, di_bh, handle, | 1099 | &tmp_pos, 1, di_bh, handle, |
1101 | data_ac, meta_ac, NULL); | 1100 | data_ac, meta_ac, NULL); |
1102 | /* | 1101 | /* |
1103 | * This shouldn't happen because we must have already | 1102 | * This shouldn't happen because we must have already |
1104 | * calculated the correct meta data allocation required. The | 1103 | * calculated the correct meta data allocation required. The |
1105 | * internal tree allocation code should know how to increase | 1104 | * internal tree allocation code should know how to increase |
1106 | * transaction credits itself. | 1105 | * transaction credits itself. |
1107 | * | 1106 | * |
1108 | * If need be, we could handle -EAGAIN for a | 1107 | * If need be, we could handle -EAGAIN for a |
1109 | * RESTART_TRANS here. | 1108 | * RESTART_TRANS here. |
1110 | */ | 1109 | */ |
1111 | mlog_bug_on_msg(ret == -EAGAIN, | 1110 | mlog_bug_on_msg(ret == -EAGAIN, |
1112 | "Inode %llu: EAGAIN return during allocation.\n", | 1111 | "Inode %llu: EAGAIN return during allocation.\n", |
1113 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 1112 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
1114 | if (ret < 0) { | 1113 | if (ret < 0) { |
1115 | mlog_errno(ret); | 1114 | mlog_errno(ret); |
1116 | goto out; | 1115 | goto out; |
1117 | } | 1116 | } |
1118 | } | 1117 | } |
1119 | 1118 | ||
1120 | ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL, | 1119 | ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL, |
1121 | NULL); | 1120 | NULL); |
1122 | if (ret < 0) { | 1121 | if (ret < 0) { |
1123 | 1122 | ||
1124 | /* | 1123 | /* |
1125 | * XXX: Should we go readonly here? | 1124 | * XXX: Should we go readonly here? |
1126 | */ | 1125 | */ |
1127 | 1126 | ||
1128 | mlog_errno(ret); | 1127 | mlog_errno(ret); |
1129 | goto out; | 1128 | goto out; |
1130 | } | 1129 | } |
1131 | 1130 | ||
1132 | BUG_ON(p_blkno == 0); | 1131 | BUG_ON(p_blkno == 0); |
1133 | 1132 | ||
1134 | for(i = 0; i < numpages; i++) { | 1133 | for(i = 0; i < numpages; i++) { |
1135 | ret = ocfs2_write_data_page(inode, handle, &p_blkno, cpages[i], | 1134 | ret = ocfs2_write_data_page(inode, handle, &p_blkno, cpages[i], |
1136 | wc, new); | 1135 | wc, new); |
1137 | if (ret < 0) { | 1136 | if (ret < 0) { |
1138 | mlog_errno(ret); | 1137 | mlog_errno(ret); |
1139 | goto out; | 1138 | goto out; |
1140 | } | 1139 | } |
1141 | 1140 | ||
1142 | copied += ret; | 1141 | copied += ret; |
1143 | } | 1142 | } |
1144 | 1143 | ||
1145 | out: | 1144 | out: |
1146 | for(i = 0; i < numpages; i++) { | 1145 | for(i = 0; i < numpages; i++) { |
1147 | unlock_page(cpages[i]); | 1146 | unlock_page(cpages[i]); |
1148 | mark_page_accessed(cpages[i]); | 1147 | mark_page_accessed(cpages[i]); |
1149 | page_cache_release(cpages[i]); | 1148 | page_cache_release(cpages[i]); |
1150 | } | 1149 | } |
1151 | kfree(cpages); | 1150 | kfree(cpages); |
1152 | 1151 | ||
1153 | return copied ? copied : ret; | 1152 | return copied ? copied : ret; |
1154 | } | 1153 | } |
1155 | 1154 | ||
1156 | static void ocfs2_write_ctxt_init(struct ocfs2_write_ctxt *wc, | 1155 | static void ocfs2_write_ctxt_init(struct ocfs2_write_ctxt *wc, |
1157 | struct ocfs2_super *osb, loff_t pos, | 1156 | struct ocfs2_super *osb, loff_t pos, |
1158 | size_t count, ocfs2_page_writer *cb, | 1157 | size_t count, ocfs2_page_writer *cb, |
1159 | void *cb_priv) | 1158 | void *cb_priv) |
1160 | { | 1159 | { |
1161 | wc->w_count = count; | 1160 | wc->w_count = count; |
1162 | wc->w_pos = pos; | 1161 | wc->w_pos = pos; |
1163 | wc->w_cpos = wc->w_pos >> osb->s_clustersize_bits; | 1162 | wc->w_cpos = wc->w_pos >> osb->s_clustersize_bits; |
1164 | wc->w_finished_copy = 0; | 1163 | wc->w_finished_copy = 0; |
1165 | 1164 | ||
1166 | if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) | 1165 | if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) |
1167 | wc->w_large_pages = 1; | 1166 | wc->w_large_pages = 1; |
1168 | else | 1167 | else |
1169 | wc->w_large_pages = 0; | 1168 | wc->w_large_pages = 0; |
1170 | 1169 | ||
1171 | wc->w_write_data_page = cb; | 1170 | wc->w_write_data_page = cb; |
1172 | wc->w_private = cb_priv; | 1171 | wc->w_private = cb_priv; |
1173 | } | 1172 | } |
1174 | 1173 | ||
1175 | /* | 1174 | /* |
1176 | * Write a cluster to an inode. The cluster may not be allocated yet, | 1175 | * Write a cluster to an inode. The cluster may not be allocated yet, |
1177 | * in which case it will be. This only exists for buffered writes - | 1176 | * in which case it will be. This only exists for buffered writes - |
1178 | * O_DIRECT takes a more "traditional" path through the kernel. | 1177 | * O_DIRECT takes a more "traditional" path through the kernel. |
1179 | * | 1178 | * |
1180 | * The caller is responsible for incrementing pos, written counts, etc | 1179 | * The caller is responsible for incrementing pos, written counts, etc |
1181 | * | 1180 | * |
1182 | * For file systems that don't support sparse files, pre-allocation | 1181 | * For file systems that don't support sparse files, pre-allocation |
1183 | * and page zeroing up until cpos should be done prior to this | 1182 | * and page zeroing up until cpos should be done prior to this |
1184 | * function call. | 1183 | * function call. |
1185 | * | 1184 | * |
1186 | * Callers should be holding i_sem, and the rw cluster lock. | 1185 | * Callers should be holding i_sem, and the rw cluster lock. |
1187 | * | 1186 | * |
1188 | * Returns the number of user bytes written, or less than zero for | 1187 | * Returns the number of user bytes written, or less than zero for |
1189 | * error. | 1188 | * error. |
1190 | */ | 1189 | */ |
1191 | ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, | 1190 | ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, |
1192 | size_t count, ocfs2_page_writer *actor, | 1191 | size_t count, ocfs2_page_writer *actor, |
1193 | void *priv) | 1192 | void *priv) |
1194 | { | 1193 | { |
1195 | int ret, credits = OCFS2_INODE_UPDATE_CREDITS; | 1194 | int ret, credits = OCFS2_INODE_UPDATE_CREDITS; |
1196 | ssize_t written = 0; | 1195 | ssize_t written = 0; |
1197 | u32 phys; | 1196 | u32 phys; |
1198 | struct inode *inode = file->f_mapping->host; | 1197 | struct inode *inode = file->f_mapping->host; |
1199 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1198 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1200 | struct buffer_head *di_bh = NULL; | 1199 | struct buffer_head *di_bh = NULL; |
1201 | struct ocfs2_dinode *di; | 1200 | struct ocfs2_dinode *di; |
1202 | struct ocfs2_alloc_context *data_ac = NULL; | 1201 | struct ocfs2_alloc_context *data_ac = NULL; |
1203 | struct ocfs2_alloc_context *meta_ac = NULL; | 1202 | struct ocfs2_alloc_context *meta_ac = NULL; |
1204 | handle_t *handle; | 1203 | handle_t *handle; |
1205 | struct ocfs2_write_ctxt wc; | 1204 | struct ocfs2_write_ctxt wc; |
1206 | 1205 | ||
1207 | ocfs2_write_ctxt_init(&wc, osb, pos, count, actor, priv); | 1206 | ocfs2_write_ctxt_init(&wc, osb, pos, count, actor, priv); |
1208 | 1207 | ||
1209 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | 1208 | ret = ocfs2_meta_lock(inode, &di_bh, 1); |
1210 | if (ret) { | 1209 | if (ret) { |
1211 | mlog_errno(ret); | 1210 | mlog_errno(ret); |
1212 | goto out; | 1211 | goto out; |
1213 | } | 1212 | } |
1214 | di = (struct ocfs2_dinode *)di_bh->b_data; | 1213 | di = (struct ocfs2_dinode *)di_bh->b_data; |
1215 | 1214 | ||
1216 | /* | 1215 | /* |
1217 | * Take alloc sem here to prevent concurrent lookups. That way | 1216 | * Take alloc sem here to prevent concurrent lookups. That way |
1218 | * the mapping, zeroing and tree manipulation within | 1217 | * the mapping, zeroing and tree manipulation within |
1219 | * ocfs2_write() will be safe against ->readpage(). This | 1218 | * ocfs2_write() will be safe against ->readpage(). This |
1220 | * should also serve to lock out allocation from a shared | 1219 | * should also serve to lock out allocation from a shared |
1221 | * writeable region. | 1220 | * writeable region. |
1222 | */ | 1221 | */ |
1223 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 1222 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
1224 | 1223 | ||
1225 | ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL, NULL); | 1224 | ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL, NULL); |
1226 | if (ret) { | 1225 | if (ret) { |
1227 | mlog_errno(ret); | 1226 | mlog_errno(ret); |
1228 | goto out_meta; | 1227 | goto out_meta; |
1229 | } | 1228 | } |
1230 | 1229 | ||
1231 | /* phys == 0 means that allocation is required. */ | 1230 | /* phys == 0 means that allocation is required. */ |
1232 | if (phys == 0) { | 1231 | if (phys == 0) { |
1233 | ret = ocfs2_lock_allocators(inode, di, 1, &data_ac, &meta_ac); | 1232 | ret = ocfs2_lock_allocators(inode, di, 1, &data_ac, &meta_ac); |
1234 | if (ret) { | 1233 | if (ret) { |
1235 | mlog_errno(ret); | 1234 | mlog_errno(ret); |
1236 | goto out_meta; | 1235 | goto out_meta; |
1237 | } | 1236 | } |
1238 | 1237 | ||
1239 | credits = ocfs2_calc_extend_credits(inode->i_sb, di, 1); | 1238 | credits = ocfs2_calc_extend_credits(inode->i_sb, di, 1); |
1240 | } | 1239 | } |
1241 | 1240 | ||
1242 | ret = ocfs2_data_lock(inode, 1); | 1241 | ret = ocfs2_data_lock(inode, 1); |
1243 | if (ret) { | 1242 | if (ret) { |
1244 | mlog_errno(ret); | 1243 | mlog_errno(ret); |
1245 | goto out_meta; | 1244 | goto out_meta; |
1246 | } | 1245 | } |
1247 | 1246 | ||
1248 | handle = ocfs2_start_trans(osb, credits); | 1247 | handle = ocfs2_start_trans(osb, credits); |
1249 | if (IS_ERR(handle)) { | 1248 | if (IS_ERR(handle)) { |
1250 | ret = PTR_ERR(handle); | 1249 | ret = PTR_ERR(handle); |
1251 | mlog_errno(ret); | 1250 | mlog_errno(ret); |
1252 | goto out_data; | 1251 | goto out_data; |
1253 | } | 1252 | } |
1254 | 1253 | ||
1255 | written = ocfs2_write(file, phys, handle, di_bh, data_ac, | 1254 | written = ocfs2_write(file, phys, handle, di_bh, data_ac, |
1256 | meta_ac, &wc); | 1255 | meta_ac, &wc); |
1257 | if (written < 0) { | 1256 | if (written < 0) { |
1258 | ret = written; | 1257 | ret = written; |
1259 | mlog_errno(ret); | 1258 | mlog_errno(ret); |
1260 | goto out_commit; | 1259 | goto out_commit; |
1261 | } | 1260 | } |
1262 | 1261 | ||
1263 | ret = ocfs2_journal_access(handle, inode, di_bh, | 1262 | ret = ocfs2_journal_access(handle, inode, di_bh, |
1264 | OCFS2_JOURNAL_ACCESS_WRITE); | 1263 | OCFS2_JOURNAL_ACCESS_WRITE); |
1265 | if (ret) { | 1264 | if (ret) { |
1266 | mlog_errno(ret); | 1265 | mlog_errno(ret); |
1267 | goto out_commit; | 1266 | goto out_commit; |
1268 | } | 1267 | } |
1269 | 1268 | ||
1270 | pos += written; | 1269 | pos += written; |
1271 | if (pos > inode->i_size) { | 1270 | if (pos > inode->i_size) { |
1272 | i_size_write(inode, pos); | 1271 | i_size_write(inode, pos); |
1273 | mark_inode_dirty(inode); | 1272 | mark_inode_dirty(inode); |
1274 | } | 1273 | } |
1275 | inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode))); | 1274 | inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode))); |
1276 | di->i_size = cpu_to_le64((u64)i_size_read(inode)); | 1275 | di->i_size = cpu_to_le64((u64)i_size_read(inode)); |
1277 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 1276 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
1278 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); | 1277 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); |
1279 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 1278 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
1280 | 1279 | ||
1281 | ret = ocfs2_journal_dirty(handle, di_bh); | 1280 | ret = ocfs2_journal_dirty(handle, di_bh); |
1282 | if (ret) | 1281 | if (ret) |
1283 | mlog_errno(ret); | 1282 | mlog_errno(ret); |
1284 | 1283 | ||
1285 | out_commit: | 1284 | out_commit: |
1286 | ocfs2_commit_trans(osb, handle); | 1285 | ocfs2_commit_trans(osb, handle); |
1287 | 1286 | ||
1288 | out_data: | 1287 | out_data: |
1289 | ocfs2_data_unlock(inode, 1); | 1288 | ocfs2_data_unlock(inode, 1); |
1290 | 1289 | ||
1291 | out_meta: | 1290 | out_meta: |
1292 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1291 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
1293 | ocfs2_meta_unlock(inode, 1); | 1292 | ocfs2_meta_unlock(inode, 1); |
1294 | 1293 | ||
1295 | out: | 1294 | out: |
1296 | brelse(di_bh); | 1295 | brelse(di_bh); |
1297 | if (data_ac) | 1296 | if (data_ac) |
1298 | ocfs2_free_alloc_context(data_ac); | 1297 | ocfs2_free_alloc_context(data_ac); |
1299 | if (meta_ac) | 1298 | if (meta_ac) |
1300 | ocfs2_free_alloc_context(meta_ac); | 1299 | ocfs2_free_alloc_context(meta_ac); |
1301 | 1300 | ||
1302 | return written ? written : ret; | 1301 | return written ? written : ret; |
1303 | } | 1302 | } |
1304 | 1303 | ||
1305 | const struct address_space_operations ocfs2_aops = { | 1304 | const struct address_space_operations ocfs2_aops = { |
1306 | .readpage = ocfs2_readpage, | 1305 | .readpage = ocfs2_readpage, |
1307 | .writepage = ocfs2_writepage, | 1306 | .writepage = ocfs2_writepage, |
1308 | .bmap = ocfs2_bmap, | 1307 | .bmap = ocfs2_bmap, |
1309 | .sync_page = block_sync_page, | 1308 | .sync_page = block_sync_page, |
1310 | .direct_IO = ocfs2_direct_IO, | 1309 | .direct_IO = ocfs2_direct_IO, |
1311 | .invalidatepage = ocfs2_invalidatepage, | 1310 | .invalidatepage = ocfs2_invalidatepage, |
1312 | .releasepage = ocfs2_releasepage, | 1311 | .releasepage = ocfs2_releasepage, |
1313 | .migratepage = buffer_migrate_page, | 1312 | .migratepage = buffer_migrate_page, |
1314 | }; | 1313 | }; |
1315 | 1314 |
fs/ocfs2/extent_map.c
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * extent_map.c | 4 | * extent_map.c |
5 | * | 5 | * |
6 | * Block/Cluster mapping functions | 6 | * Block/Cluster mapping functions |
7 | * | 7 | * |
8 | * Copyright (C) 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2004 Oracle. All rights reserved. |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public | 11 | * modify it under the terms of the GNU General Public |
12 | * License, version 2, as published by the Free Software Foundation. | 12 | * License, version 2, as published by the Free Software Foundation. |
13 | * | 13 | * |
14 | * This program is distributed in the hope that it will be useful, | 14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | * General Public License for more details. | 17 | * General Public License for more details. |
18 | * | 18 | * |
19 | * You should have received a copy of the GNU General Public | 19 | * You should have received a copy of the GNU General Public |
20 | * License along with this program; if not, write to the | 20 | * License along with this program; if not, write to the |
21 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 21 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
22 | * Boston, MA 021110-1307, USA. | 22 | * Boston, MA 021110-1307, USA. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | 28 | ||
29 | #define MLOG_MASK_PREFIX ML_EXTENT_MAP | 29 | #define MLOG_MASK_PREFIX ML_EXTENT_MAP |
30 | #include <cluster/masklog.h> | 30 | #include <cluster/masklog.h> |
31 | 31 | ||
32 | #include "ocfs2.h" | 32 | #include "ocfs2.h" |
33 | 33 | ||
34 | #include "alloc.h" | 34 | #include "alloc.h" |
35 | #include "extent_map.h" | 35 | #include "extent_map.h" |
36 | #include "inode.h" | 36 | #include "inode.h" |
37 | #include "super.h" | 37 | #include "super.h" |
38 | 38 | ||
39 | #include "buffer_head_io.h" | 39 | #include "buffer_head_io.h" |
40 | 40 | ||
41 | /* | 41 | /* |
42 | * Return the 1st index within el which contains an extent start | ||
43 | * larger than v_cluster. | ||
44 | */ | ||
45 | static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el, | ||
46 | u32 v_cluster) | ||
47 | { | ||
48 | int i; | ||
49 | struct ocfs2_extent_rec *rec; | ||
50 | |||
51 | for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { | ||
52 | rec = &el->l_recs[i]; | ||
53 | |||
54 | if (v_cluster < le32_to_cpu(rec->e_cpos)) | ||
55 | break; | ||
56 | } | ||
57 | |||
58 | return i; | ||
59 | } | ||
60 | |||
61 | /* | ||
62 | * Figure out the size of a hole which starts at v_cluster within the given | ||
63 | * extent list. | ||
64 | * | ||
65 | * If there is no more allocation past v_cluster, we return the maximum | ||
66 | * cluster size minus v_cluster. | ||
67 | * | ||
68 | * If we have in-inode extents, then el points to the dinode list and | ||
69 | * eb_bh is NULL. Otherwise, eb_bh should point to the extent block | ||
70 | * containing el. | ||
71 | */ | ||
72 | static int ocfs2_figure_hole_clusters(struct inode *inode, | ||
73 | struct ocfs2_extent_list *el, | ||
74 | struct buffer_head *eb_bh, | ||
75 | u32 v_cluster, | ||
76 | u32 *num_clusters) | ||
77 | { | ||
78 | int ret, i; | ||
79 | struct buffer_head *next_eb_bh = NULL; | ||
80 | struct ocfs2_extent_block *eb, *next_eb; | ||
81 | |||
82 | i = ocfs2_search_for_hole_index(el, v_cluster); | ||
83 | |||
84 | if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) { | ||
85 | eb = (struct ocfs2_extent_block *)eb_bh->b_data; | ||
86 | |||
87 | /* | ||
88 | * Check the next leaf for any extents. | ||
89 | */ | ||
90 | |||
91 | if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) | ||
92 | goto no_more_extents; | ||
93 | |||
94 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | ||
95 | le64_to_cpu(eb->h_next_leaf_blk), | ||
96 | &next_eb_bh, OCFS2_BH_CACHED, inode); | ||
97 | if (ret) { | ||
98 | mlog_errno(ret); | ||
99 | goto out; | ||
100 | } | ||
101 | next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; | ||
102 | |||
103 | if (!OCFS2_IS_VALID_EXTENT_BLOCK(next_eb)) { | ||
104 | ret = -EROFS; | ||
105 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, next_eb); | ||
106 | goto out; | ||
107 | } | ||
108 | |||
109 | el = &next_eb->h_list; | ||
110 | |||
111 | i = ocfs2_search_for_hole_index(el, v_cluster); | ||
112 | } | ||
113 | |||
114 | no_more_extents: | ||
115 | if (i == le16_to_cpu(el->l_next_free_rec)) { | ||
116 | /* | ||
117 | * We're at the end of our existing allocation. Just | ||
118 | * return the maximum number of clusters we could | ||
119 | * possibly allocate. | ||
120 | */ | ||
121 | *num_clusters = UINT_MAX - v_cluster; | ||
122 | } else { | ||
123 | *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster; | ||
124 | } | ||
125 | |||
126 | ret = 0; | ||
127 | out: | ||
128 | brelse(next_eb_bh); | ||
129 | return ret; | ||
130 | } | ||
131 | |||
132 | /* | ||
42 | * Return the index of the extent record which contains cluster #v_cluster. | 133 | * Return the index of the extent record which contains cluster #v_cluster. |
43 | * -1 is returned if it was not found. | 134 | * -1 is returned if it was not found. |
44 | * | 135 | * |
45 | * Should work fine on interior and exterior nodes. | 136 | * Should work fine on interior and exterior nodes. |
46 | */ | 137 | */ |
47 | static int ocfs2_search_extent_list(struct ocfs2_extent_list *el, | 138 | static int ocfs2_search_extent_list(struct ocfs2_extent_list *el, |
48 | u32 v_cluster) | 139 | u32 v_cluster) |
49 | { | 140 | { |
50 | int ret = -1; | 141 | int ret = -1; |
51 | int i; | 142 | int i; |
52 | struct ocfs2_extent_rec *rec; | 143 | struct ocfs2_extent_rec *rec; |
53 | u32 rec_end, rec_start, clusters; | 144 | u32 rec_end, rec_start, clusters; |
54 | 145 | ||
55 | for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { | 146 | for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { |
56 | rec = &el->l_recs[i]; | 147 | rec = &el->l_recs[i]; |
57 | 148 | ||
58 | rec_start = le32_to_cpu(rec->e_cpos); | 149 | rec_start = le32_to_cpu(rec->e_cpos); |
59 | clusters = ocfs2_rec_clusters(el, rec); | 150 | clusters = ocfs2_rec_clusters(el, rec); |
60 | 151 | ||
61 | rec_end = rec_start + clusters; | 152 | rec_end = rec_start + clusters; |
62 | 153 | ||
63 | if (v_cluster >= rec_start && v_cluster < rec_end) { | 154 | if (v_cluster >= rec_start && v_cluster < rec_end) { |
64 | ret = i; | 155 | ret = i; |
65 | break; | 156 | break; |
66 | } | 157 | } |
67 | } | 158 | } |
68 | 159 | ||
69 | return ret; | 160 | return ret; |
70 | } | 161 | } |
71 | 162 | ||
72 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | 163 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, |
73 | u32 *p_cluster, u32 *num_clusters, | 164 | u32 *p_cluster, u32 *num_clusters, |
74 | unsigned int *extent_flags) | 165 | unsigned int *extent_flags) |
75 | { | 166 | { |
76 | int ret, i; | 167 | int ret, i; |
77 | unsigned int flags = 0; | 168 | unsigned int flags = 0; |
78 | struct buffer_head *di_bh = NULL; | 169 | struct buffer_head *di_bh = NULL; |
79 | struct buffer_head *eb_bh = NULL; | 170 | struct buffer_head *eb_bh = NULL; |
80 | struct ocfs2_dinode *di; | 171 | struct ocfs2_dinode *di; |
81 | struct ocfs2_extent_block *eb; | 172 | struct ocfs2_extent_block *eb; |
82 | struct ocfs2_extent_list *el; | 173 | struct ocfs2_extent_list *el; |
83 | struct ocfs2_extent_rec *rec; | 174 | struct ocfs2_extent_rec *rec; |
84 | u32 coff; | 175 | u32 coff; |
85 | 176 | ||
86 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, | 177 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, |
87 | &di_bh, OCFS2_BH_CACHED, inode); | 178 | &di_bh, OCFS2_BH_CACHED, inode); |
88 | if (ret) { | 179 | if (ret) { |
89 | mlog_errno(ret); | 180 | mlog_errno(ret); |
90 | goto out; | 181 | goto out; |
91 | } | 182 | } |
92 | 183 | ||
93 | di = (struct ocfs2_dinode *) di_bh->b_data; | 184 | di = (struct ocfs2_dinode *) di_bh->b_data; |
94 | el = &di->id2.i_list; | 185 | el = &di->id2.i_list; |
95 | 186 | ||
96 | if (el->l_tree_depth) { | 187 | if (el->l_tree_depth) { |
97 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); | 188 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); |
98 | if (ret) { | 189 | if (ret) { |
99 | mlog_errno(ret); | 190 | mlog_errno(ret); |
100 | goto out; | 191 | goto out; |
101 | } | 192 | } |
102 | 193 | ||
103 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | 194 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; |
104 | el = &eb->h_list; | 195 | el = &eb->h_list; |
105 | 196 | ||
106 | if (el->l_tree_depth) { | 197 | if (el->l_tree_depth) { |
107 | ocfs2_error(inode->i_sb, | 198 | ocfs2_error(inode->i_sb, |
108 | "Inode %lu has non zero tree depth in " | 199 | "Inode %lu has non zero tree depth in " |
109 | "leaf block %llu\n", inode->i_ino, | 200 | "leaf block %llu\n", inode->i_ino, |
110 | (unsigned long long)eb_bh->b_blocknr); | 201 | (unsigned long long)eb_bh->b_blocknr); |
111 | ret = -EROFS; | 202 | ret = -EROFS; |
112 | goto out; | 203 | goto out; |
113 | } | 204 | } |
114 | } | 205 | } |
115 | 206 | ||
116 | i = ocfs2_search_extent_list(el, v_cluster); | 207 | i = ocfs2_search_extent_list(el, v_cluster); |
117 | if (i == -1) { | 208 | if (i == -1) { |
118 | /* | 209 | /* |
119 | * A hole was found. Return some canned values that | 210 | * A hole was found. Return some canned values that |
120 | * callers can key on. | 211 | * callers can key on. If asked for, num_clusters will |
212 | * be populated with the size of the hole. | ||
121 | */ | 213 | */ |
122 | *p_cluster = 0; | 214 | *p_cluster = 0; |
123 | if (num_clusters) | 215 | if (num_clusters) { |
124 | *num_clusters = 1; | 216 | ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, |
217 | v_cluster, | ||
218 | num_clusters); | ||
219 | if (ret) { | ||
220 | mlog_errno(ret); | ||
221 | goto out; | ||
222 | } | ||
223 | } | ||
125 | } else { | 224 | } else { |
126 | rec = &el->l_recs[i]; | 225 | rec = &el->l_recs[i]; |
127 | 226 | ||
128 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); | 227 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); |
129 | 228 | ||
130 | if (!rec->e_blkno) { | 229 | if (!rec->e_blkno) { |
131 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | 230 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " |
132 | "record (%u, %u, 0)", inode->i_ino, | 231 | "record (%u, %u, 0)", inode->i_ino, |
133 | le32_to_cpu(rec->e_cpos), | 232 | le32_to_cpu(rec->e_cpos), |
134 | ocfs2_rec_clusters(el, rec)); | 233 | ocfs2_rec_clusters(el, rec)); |
135 | ret = -EROFS; | 234 | ret = -EROFS; |
136 | goto out; | 235 | goto out; |
137 | } | 236 | } |
138 | 237 | ||
139 | coff = v_cluster - le32_to_cpu(rec->e_cpos); | 238 | coff = v_cluster - le32_to_cpu(rec->e_cpos); |
140 | 239 | ||
141 | *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, | 240 | *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, |
142 | le64_to_cpu(rec->e_blkno)); | 241 | le64_to_cpu(rec->e_blkno)); |
143 | *p_cluster = *p_cluster + coff; | 242 | *p_cluster = *p_cluster + coff; |
144 | 243 | ||
145 | if (num_clusters) | 244 | if (num_clusters) |
146 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; | 245 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; |
147 | 246 | ||
148 | flags = rec->e_flags; | 247 | flags = rec->e_flags; |
149 | } | 248 | } |
150 | 249 | ||
151 | if (extent_flags) | 250 | if (extent_flags) |
152 | *extent_flags = flags; | 251 | *extent_flags = flags; |
153 | 252 | ||
154 | out: | 253 | out: |
155 | brelse(di_bh); | 254 | brelse(di_bh); |
156 | brelse(eb_bh); | 255 | brelse(eb_bh); |
157 | return ret; | 256 | return ret; |
158 | } | 257 | } |
159 | 258 | ||
160 | /* | 259 | /* |
161 | * This expects alloc_sem to be held. The allocation cannot change at | 260 | * This expects alloc_sem to be held. The allocation cannot change at |
162 | * all while the map is in the process of being updated. | 261 | * all while the map is in the process of being updated. |
163 | */ | 262 | */ |
164 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, | 263 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, |
165 | int *ret_count, unsigned int *extent_flags) | 264 | u64 *ret_count, unsigned int *extent_flags) |
166 | { | 265 | { |
167 | int ret; | 266 | int ret; |
168 | int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | 267 | int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); |
169 | u32 cpos, num_clusters, p_cluster; | 268 | u32 cpos, num_clusters, p_cluster; |
170 | u64 boff = 0; | 269 | u64 boff = 0; |
171 | 270 | ||
172 | cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno); | 271 | cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno); |
173 | 272 | ||
174 | ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, | 273 | ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, |
175 | extent_flags); | 274 | extent_flags); |
176 | if (ret) { | 275 | if (ret) { |
177 | mlog_errno(ret); | 276 | mlog_errno(ret); |
178 | goto out; | 277 | goto out; |
179 | } | 278 | } |
180 | 279 | ||
181 | /* | 280 | /* |
182 | * p_cluster == 0 indicates a hole. | 281 | * p_cluster == 0 indicates a hole. |
183 | */ | 282 | */ |
184 | if (p_cluster) { | 283 | if (p_cluster) { |
185 | boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); | 284 | boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); |
186 | boff += (v_blkno & (u64)(bpc - 1)); | 285 | boff += (v_blkno & (u64)(bpc - 1)); |
187 | } | 286 | } |
188 | 287 | ||
189 | *p_blkno = boff; | 288 | *p_blkno = boff; |
190 | 289 | ||
191 | if (ret_count) { | 290 | if (ret_count) { |
192 | *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); | 291 | *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); |
193 | *ret_count -= v_blkno & (u64)(bpc - 1); | 292 | *ret_count -= v_blkno & (u64)(bpc - 1); |
194 | } | 293 | } |
195 | 294 | ||
196 | out: | 295 | out: |
197 | return ret; | 296 | return ret; |
198 | } | 297 | } |
199 | 298 |
fs/ocfs2/extent_map.h
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * extent_map.h | 4 | * extent_map.h |
5 | * | 5 | * |
6 | * In-memory file extent mappings for OCFS2. | 6 | * In-memory file extent mappings for OCFS2. |
7 | * | 7 | * |
8 | * Copyright (C) 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2004 Oracle. All rights reserved. |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public | 11 | * modify it under the terms of the GNU General Public |
12 | * License, version 2, as published by the Free Software Foundation. | 12 | * License, version 2, as published by the Free Software Foundation. |
13 | * | 13 | * |
14 | * This program is distributed in the hope that it will be useful, | 14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | * General Public License for more details. | 17 | * General Public License for more details. |
18 | * | 18 | * |
19 | * You should have received a copy of the GNU General Public | 19 | * You should have received a copy of the GNU General Public |
20 | * License along with this program; if not, write to the | 20 | * License along with this program; if not, write to the |
21 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 21 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
22 | * Boston, MA 021110-1307, USA. | 22 | * Boston, MA 021110-1307, USA. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #ifndef _EXTENT_MAP_H | 25 | #ifndef _EXTENT_MAP_H |
26 | #define _EXTENT_MAP_H | 26 | #define _EXTENT_MAP_H |
27 | 27 | ||
28 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, | 28 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, |
29 | u32 *num_clusters, unsigned int *extent_flags); | 29 | u32 *num_clusters, unsigned int *extent_flags); |
30 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, | 30 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, |
31 | int *ret_count, unsigned int *extent_flags); | 31 | u64 *ret_count, unsigned int *extent_flags); |
32 | 32 | ||
33 | #endif /* _EXTENT_MAP_H */ | 33 | #endif /* _EXTENT_MAP_H */ |
34 | 34 |
fs/ocfs2/journal.c
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * journal.c | 4 | * journal.c |
5 | * | 5 | * |
6 | * Defines functions of journalling api | 6 | * Defines functions of journalling api |
7 | * | 7 | * |
8 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public | 11 | * modify it under the terms of the GNU General Public |
12 | * License as published by the Free Software Foundation; either | 12 | * License as published by the Free Software Foundation; either |
13 | * version 2 of the License, or (at your option) any later version. | 13 | * version 2 of the License, or (at your option) any later version. |
14 | * | 14 | * |
15 | * This program is distributed in the hope that it will be useful, | 15 | * This program is distributed in the hope that it will be useful, |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 | * General Public License for more details. | 18 | * General Public License for more details. |
19 | * | 19 | * |
20 | * You should have received a copy of the GNU General Public | 20 | * You should have received a copy of the GNU General Public |
21 | * License along with this program; if not, write to the | 21 | * License along with this program; if not, write to the |
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
23 | * Boston, MA 021110-1307, USA. | 23 | * Boston, MA 021110-1307, USA. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
30 | #include <linux/kthread.h> | 30 | #include <linux/kthread.h> |
31 | 31 | ||
32 | #define MLOG_MASK_PREFIX ML_JOURNAL | 32 | #define MLOG_MASK_PREFIX ML_JOURNAL |
33 | #include <cluster/masklog.h> | 33 | #include <cluster/masklog.h> |
34 | 34 | ||
35 | #include "ocfs2.h" | 35 | #include "ocfs2.h" |
36 | 36 | ||
37 | #include "alloc.h" | 37 | #include "alloc.h" |
38 | #include "dlmglue.h" | 38 | #include "dlmglue.h" |
39 | #include "extent_map.h" | 39 | #include "extent_map.h" |
40 | #include "heartbeat.h" | 40 | #include "heartbeat.h" |
41 | #include "inode.h" | 41 | #include "inode.h" |
42 | #include "journal.h" | 42 | #include "journal.h" |
43 | #include "localalloc.h" | 43 | #include "localalloc.h" |
44 | #include "namei.h" | 44 | #include "namei.h" |
45 | #include "slot_map.h" | 45 | #include "slot_map.h" |
46 | #include "super.h" | 46 | #include "super.h" |
47 | #include "vote.h" | 47 | #include "vote.h" |
48 | #include "sysfile.h" | 48 | #include "sysfile.h" |
49 | 49 | ||
50 | #include "buffer_head_io.h" | 50 | #include "buffer_head_io.h" |
51 | 51 | ||
52 | DEFINE_SPINLOCK(trans_inc_lock); | 52 | DEFINE_SPINLOCK(trans_inc_lock); |
53 | 53 | ||
54 | static int ocfs2_force_read_journal(struct inode *inode); | 54 | static int ocfs2_force_read_journal(struct inode *inode); |
55 | static int ocfs2_recover_node(struct ocfs2_super *osb, | 55 | static int ocfs2_recover_node(struct ocfs2_super *osb, |
56 | int node_num); | 56 | int node_num); |
57 | static int __ocfs2_recovery_thread(void *arg); | 57 | static int __ocfs2_recovery_thread(void *arg); |
58 | static int ocfs2_commit_cache(struct ocfs2_super *osb); | 58 | static int ocfs2_commit_cache(struct ocfs2_super *osb); |
59 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb); | 59 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb); |
60 | static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, | 60 | static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, |
61 | int dirty); | 61 | int dirty); |
62 | static int ocfs2_trylock_journal(struct ocfs2_super *osb, | 62 | static int ocfs2_trylock_journal(struct ocfs2_super *osb, |
63 | int slot_num); | 63 | int slot_num); |
64 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, | 64 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, |
65 | int slot); | 65 | int slot); |
66 | static int ocfs2_commit_thread(void *arg); | 66 | static int ocfs2_commit_thread(void *arg); |
67 | 67 | ||
68 | static int ocfs2_commit_cache(struct ocfs2_super *osb) | 68 | static int ocfs2_commit_cache(struct ocfs2_super *osb) |
69 | { | 69 | { |
70 | int status = 0; | 70 | int status = 0; |
71 | unsigned int flushed; | 71 | unsigned int flushed; |
72 | unsigned long old_id; | 72 | unsigned long old_id; |
73 | struct ocfs2_journal *journal = NULL; | 73 | struct ocfs2_journal *journal = NULL; |
74 | 74 | ||
75 | mlog_entry_void(); | 75 | mlog_entry_void(); |
76 | 76 | ||
77 | journal = osb->journal; | 77 | journal = osb->journal; |
78 | 78 | ||
79 | /* Flush all pending commits and checkpoint the journal. */ | 79 | /* Flush all pending commits and checkpoint the journal. */ |
80 | down_write(&journal->j_trans_barrier); | 80 | down_write(&journal->j_trans_barrier); |
81 | 81 | ||
82 | if (atomic_read(&journal->j_num_trans) == 0) { | 82 | if (atomic_read(&journal->j_num_trans) == 0) { |
83 | up_write(&journal->j_trans_barrier); | 83 | up_write(&journal->j_trans_barrier); |
84 | mlog(0, "No transactions for me to flush!\n"); | 84 | mlog(0, "No transactions for me to flush!\n"); |
85 | goto finally; | 85 | goto finally; |
86 | } | 86 | } |
87 | 87 | ||
88 | journal_lock_updates(journal->j_journal); | 88 | journal_lock_updates(journal->j_journal); |
89 | status = journal_flush(journal->j_journal); | 89 | status = journal_flush(journal->j_journal); |
90 | journal_unlock_updates(journal->j_journal); | 90 | journal_unlock_updates(journal->j_journal); |
91 | if (status < 0) { | 91 | if (status < 0) { |
92 | up_write(&journal->j_trans_barrier); | 92 | up_write(&journal->j_trans_barrier); |
93 | mlog_errno(status); | 93 | mlog_errno(status); |
94 | goto finally; | 94 | goto finally; |
95 | } | 95 | } |
96 | 96 | ||
97 | old_id = ocfs2_inc_trans_id(journal); | 97 | old_id = ocfs2_inc_trans_id(journal); |
98 | 98 | ||
99 | flushed = atomic_read(&journal->j_num_trans); | 99 | flushed = atomic_read(&journal->j_num_trans); |
100 | atomic_set(&journal->j_num_trans, 0); | 100 | atomic_set(&journal->j_num_trans, 0); |
101 | up_write(&journal->j_trans_barrier); | 101 | up_write(&journal->j_trans_barrier); |
102 | 102 | ||
103 | mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", | 103 | mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", |
104 | journal->j_trans_id, flushed); | 104 | journal->j_trans_id, flushed); |
105 | 105 | ||
106 | ocfs2_kick_vote_thread(osb); | 106 | ocfs2_kick_vote_thread(osb); |
107 | wake_up(&journal->j_checkpointed); | 107 | wake_up(&journal->j_checkpointed); |
108 | finally: | 108 | finally: |
109 | mlog_exit(status); | 109 | mlog_exit(status); |
110 | return status; | 110 | return status; |
111 | } | 111 | } |
112 | 112 | ||
113 | /* pass it NULL and it will allocate a new handle object for you. If | 113 | /* pass it NULL and it will allocate a new handle object for you. If |
114 | * you pass it a handle however, it may still return error, in which | 114 | * you pass it a handle however, it may still return error, in which |
115 | * case it has free'd the passed handle for you. */ | 115 | * case it has free'd the passed handle for you. */ |
116 | handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) | 116 | handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) |
117 | { | 117 | { |
118 | journal_t *journal = osb->journal->j_journal; | 118 | journal_t *journal = osb->journal->j_journal; |
119 | handle_t *handle; | 119 | handle_t *handle; |
120 | 120 | ||
121 | BUG_ON(!osb || !osb->journal->j_journal); | 121 | BUG_ON(!osb || !osb->journal->j_journal); |
122 | 122 | ||
123 | if (ocfs2_is_hard_readonly(osb)) | 123 | if (ocfs2_is_hard_readonly(osb)) |
124 | return ERR_PTR(-EROFS); | 124 | return ERR_PTR(-EROFS); |
125 | 125 | ||
126 | BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE); | 126 | BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE); |
127 | BUG_ON(max_buffs <= 0); | 127 | BUG_ON(max_buffs <= 0); |
128 | 128 | ||
129 | /* JBD might support this, but our journalling code doesn't yet. */ | 129 | /* JBD might support this, but our journalling code doesn't yet. */ |
130 | if (journal_current_handle()) { | 130 | if (journal_current_handle()) { |
131 | mlog(ML_ERROR, "Recursive transaction attempted!\n"); | 131 | mlog(ML_ERROR, "Recursive transaction attempted!\n"); |
132 | BUG(); | 132 | BUG(); |
133 | } | 133 | } |
134 | 134 | ||
135 | down_read(&osb->journal->j_trans_barrier); | 135 | down_read(&osb->journal->j_trans_barrier); |
136 | 136 | ||
137 | handle = journal_start(journal, max_buffs); | 137 | handle = journal_start(journal, max_buffs); |
138 | if (IS_ERR(handle)) { | 138 | if (IS_ERR(handle)) { |
139 | up_read(&osb->journal->j_trans_barrier); | 139 | up_read(&osb->journal->j_trans_barrier); |
140 | 140 | ||
141 | mlog_errno(PTR_ERR(handle)); | 141 | mlog_errno(PTR_ERR(handle)); |
142 | 142 | ||
143 | if (is_journal_aborted(journal)) { | 143 | if (is_journal_aborted(journal)) { |
144 | ocfs2_abort(osb->sb, "Detected aborted journal"); | 144 | ocfs2_abort(osb->sb, "Detected aborted journal"); |
145 | handle = ERR_PTR(-EROFS); | 145 | handle = ERR_PTR(-EROFS); |
146 | } | 146 | } |
147 | } else { | 147 | } else { |
148 | if (!ocfs2_mount_local(osb)) | 148 | if (!ocfs2_mount_local(osb)) |
149 | atomic_inc(&(osb->journal->j_num_trans)); | 149 | atomic_inc(&(osb->journal->j_num_trans)); |
150 | } | 150 | } |
151 | 151 | ||
152 | return handle; | 152 | return handle; |
153 | } | 153 | } |
154 | 154 | ||
155 | int ocfs2_commit_trans(struct ocfs2_super *osb, | 155 | int ocfs2_commit_trans(struct ocfs2_super *osb, |
156 | handle_t *handle) | 156 | handle_t *handle) |
157 | { | 157 | { |
158 | int ret; | 158 | int ret; |
159 | struct ocfs2_journal *journal = osb->journal; | 159 | struct ocfs2_journal *journal = osb->journal; |
160 | 160 | ||
161 | BUG_ON(!handle); | 161 | BUG_ON(!handle); |
162 | 162 | ||
163 | ret = journal_stop(handle); | 163 | ret = journal_stop(handle); |
164 | if (ret < 0) | 164 | if (ret < 0) |
165 | mlog_errno(ret); | 165 | mlog_errno(ret); |
166 | 166 | ||
167 | up_read(&journal->j_trans_barrier); | 167 | up_read(&journal->j_trans_barrier); |
168 | 168 | ||
169 | return ret; | 169 | return ret; |
170 | } | 170 | } |
171 | 171 | ||
172 | /* | 172 | /* |
173 | * 'nblocks' is what you want to add to the current | 173 | * 'nblocks' is what you want to add to the current |
174 | * transaction. extend_trans will either extend the current handle by | 174 | * transaction. extend_trans will either extend the current handle by |
175 | * nblocks, or commit it and start a new one with nblocks credits. | 175 | * nblocks, or commit it and start a new one with nblocks credits. |
176 | * | 176 | * |
177 | * WARNING: This will not release any semaphores or disk locks taken | 177 | * WARNING: This will not release any semaphores or disk locks taken |
178 | * during the transaction, so make sure they were taken *before* | 178 | * during the transaction, so make sure they were taken *before* |
179 | * start_trans or we'll have ordering deadlocks. | 179 | * start_trans or we'll have ordering deadlocks. |
180 | * | 180 | * |
181 | * WARNING2: Note that we do *not* drop j_trans_barrier here. This is | 181 | * WARNING2: Note that we do *not* drop j_trans_barrier here. This is |
182 | * good because transaction ids haven't yet been recorded on the | 182 | * good because transaction ids haven't yet been recorded on the |
183 | * cluster locks associated with this handle. | 183 | * cluster locks associated with this handle. |
184 | */ | 184 | */ |
185 | int ocfs2_extend_trans(handle_t *handle, int nblocks) | 185 | int ocfs2_extend_trans(handle_t *handle, int nblocks) |
186 | { | 186 | { |
187 | int status; | 187 | int status; |
188 | 188 | ||
189 | BUG_ON(!handle); | 189 | BUG_ON(!handle); |
190 | BUG_ON(!nblocks); | 190 | BUG_ON(!nblocks); |
191 | 191 | ||
192 | mlog_entry_void(); | 192 | mlog_entry_void(); |
193 | 193 | ||
194 | mlog(0, "Trying to extend transaction by %d blocks\n", nblocks); | 194 | mlog(0, "Trying to extend transaction by %d blocks\n", nblocks); |
195 | 195 | ||
196 | status = journal_extend(handle, nblocks); | 196 | status = journal_extend(handle, nblocks); |
197 | if (status < 0) { | 197 | if (status < 0) { |
198 | mlog_errno(status); | 198 | mlog_errno(status); |
199 | goto bail; | 199 | goto bail; |
200 | } | 200 | } |
201 | 201 | ||
202 | if (status > 0) { | 202 | if (status > 0) { |
203 | mlog(0, "journal_extend failed, trying journal_restart\n"); | 203 | mlog(0, "journal_extend failed, trying journal_restart\n"); |
204 | status = journal_restart(handle, nblocks); | 204 | status = journal_restart(handle, nblocks); |
205 | if (status < 0) { | 205 | if (status < 0) { |
206 | mlog_errno(status); | 206 | mlog_errno(status); |
207 | goto bail; | 207 | goto bail; |
208 | } | 208 | } |
209 | } | 209 | } |
210 | 210 | ||
211 | status = 0; | 211 | status = 0; |
212 | bail: | 212 | bail: |
213 | 213 | ||
214 | mlog_exit(status); | 214 | mlog_exit(status); |
215 | return status; | 215 | return status; |
216 | } | 216 | } |
217 | 217 | ||
218 | int ocfs2_journal_access(handle_t *handle, | 218 | int ocfs2_journal_access(handle_t *handle, |
219 | struct inode *inode, | 219 | struct inode *inode, |
220 | struct buffer_head *bh, | 220 | struct buffer_head *bh, |
221 | int type) | 221 | int type) |
222 | { | 222 | { |
223 | int status; | 223 | int status; |
224 | 224 | ||
225 | BUG_ON(!inode); | 225 | BUG_ON(!inode); |
226 | BUG_ON(!handle); | 226 | BUG_ON(!handle); |
227 | BUG_ON(!bh); | 227 | BUG_ON(!bh); |
228 | 228 | ||
229 | mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %zu\n", | 229 | mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %zu\n", |
230 | (unsigned long long)bh->b_blocknr, type, | 230 | (unsigned long long)bh->b_blocknr, type, |
231 | (type == OCFS2_JOURNAL_ACCESS_CREATE) ? | 231 | (type == OCFS2_JOURNAL_ACCESS_CREATE) ? |
232 | "OCFS2_JOURNAL_ACCESS_CREATE" : | 232 | "OCFS2_JOURNAL_ACCESS_CREATE" : |
233 | "OCFS2_JOURNAL_ACCESS_WRITE", | 233 | "OCFS2_JOURNAL_ACCESS_WRITE", |
234 | bh->b_size); | 234 | bh->b_size); |
235 | 235 | ||
236 | /* we can safely remove this assertion after testing. */ | 236 | /* we can safely remove this assertion after testing. */ |
237 | if (!buffer_uptodate(bh)) { | 237 | if (!buffer_uptodate(bh)) { |
238 | mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n"); | 238 | mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n"); |
239 | mlog(ML_ERROR, "b_blocknr=%llu\n", | 239 | mlog(ML_ERROR, "b_blocknr=%llu\n", |
240 | (unsigned long long)bh->b_blocknr); | 240 | (unsigned long long)bh->b_blocknr); |
241 | BUG(); | 241 | BUG(); |
242 | } | 242 | } |
243 | 243 | ||
244 | /* Set the current transaction information on the inode so | 244 | /* Set the current transaction information on the inode so |
245 | * that the locking code knows whether it can drop it's locks | 245 | * that the locking code knows whether it can drop it's locks |
246 | * on this inode or not. We're protected from the commit | 246 | * on this inode or not. We're protected from the commit |
247 | * thread updating the current transaction id until | 247 | * thread updating the current transaction id until |
248 | * ocfs2_commit_trans() because ocfs2_start_trans() took | 248 | * ocfs2_commit_trans() because ocfs2_start_trans() took |
249 | * j_trans_barrier for us. */ | 249 | * j_trans_barrier for us. */ |
250 | ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode); | 250 | ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode); |
251 | 251 | ||
252 | mutex_lock(&OCFS2_I(inode)->ip_io_mutex); | 252 | mutex_lock(&OCFS2_I(inode)->ip_io_mutex); |
253 | switch (type) { | 253 | switch (type) { |
254 | case OCFS2_JOURNAL_ACCESS_CREATE: | 254 | case OCFS2_JOURNAL_ACCESS_CREATE: |
255 | case OCFS2_JOURNAL_ACCESS_WRITE: | 255 | case OCFS2_JOURNAL_ACCESS_WRITE: |
256 | status = journal_get_write_access(handle, bh); | 256 | status = journal_get_write_access(handle, bh); |
257 | break; | 257 | break; |
258 | 258 | ||
259 | case OCFS2_JOURNAL_ACCESS_UNDO: | 259 | case OCFS2_JOURNAL_ACCESS_UNDO: |
260 | status = journal_get_undo_access(handle, bh); | 260 | status = journal_get_undo_access(handle, bh); |
261 | break; | 261 | break; |
262 | 262 | ||
263 | default: | 263 | default: |
264 | status = -EINVAL; | 264 | status = -EINVAL; |
265 | mlog(ML_ERROR, "Uknown access type!\n"); | 265 | mlog(ML_ERROR, "Uknown access type!\n"); |
266 | } | 266 | } |
267 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | 267 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); |
268 | 268 | ||
269 | if (status < 0) | 269 | if (status < 0) |
270 | mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", | 270 | mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", |
271 | status, type); | 271 | status, type); |
272 | 272 | ||
273 | mlog_exit(status); | 273 | mlog_exit(status); |
274 | return status; | 274 | return status; |
275 | } | 275 | } |
276 | 276 | ||
277 | int ocfs2_journal_dirty(handle_t *handle, | 277 | int ocfs2_journal_dirty(handle_t *handle, |
278 | struct buffer_head *bh) | 278 | struct buffer_head *bh) |
279 | { | 279 | { |
280 | int status; | 280 | int status; |
281 | 281 | ||
282 | mlog_entry("(bh->b_blocknr=%llu)\n", | 282 | mlog_entry("(bh->b_blocknr=%llu)\n", |
283 | (unsigned long long)bh->b_blocknr); | 283 | (unsigned long long)bh->b_blocknr); |
284 | 284 | ||
285 | status = journal_dirty_metadata(handle, bh); | 285 | status = journal_dirty_metadata(handle, bh); |
286 | if (status < 0) | 286 | if (status < 0) |
287 | mlog(ML_ERROR, "Could not dirty metadata buffer. " | 287 | mlog(ML_ERROR, "Could not dirty metadata buffer. " |
288 | "(bh->b_blocknr=%llu)\n", | 288 | "(bh->b_blocknr=%llu)\n", |
289 | (unsigned long long)bh->b_blocknr); | 289 | (unsigned long long)bh->b_blocknr); |
290 | 290 | ||
291 | mlog_exit(status); | 291 | mlog_exit(status); |
292 | return status; | 292 | return status; |
293 | } | 293 | } |
294 | 294 | ||
295 | int ocfs2_journal_dirty_data(handle_t *handle, | 295 | int ocfs2_journal_dirty_data(handle_t *handle, |
296 | struct buffer_head *bh) | 296 | struct buffer_head *bh) |
297 | { | 297 | { |
298 | int err = journal_dirty_data(handle, bh); | 298 | int err = journal_dirty_data(handle, bh); |
299 | if (err) | 299 | if (err) |
300 | mlog_errno(err); | 300 | mlog_errno(err); |
301 | /* TODO: When we can handle it, abort the handle and go RO on | 301 | /* TODO: When we can handle it, abort the handle and go RO on |
302 | * error here. */ | 302 | * error here. */ |
303 | 303 | ||
304 | return err; | 304 | return err; |
305 | } | 305 | } |
306 | 306 | ||
307 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * 5) | 307 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * 5) |
308 | 308 | ||
309 | void ocfs2_set_journal_params(struct ocfs2_super *osb) | 309 | void ocfs2_set_journal_params(struct ocfs2_super *osb) |
310 | { | 310 | { |
311 | journal_t *journal = osb->journal->j_journal; | 311 | journal_t *journal = osb->journal->j_journal; |
312 | 312 | ||
313 | spin_lock(&journal->j_state_lock); | 313 | spin_lock(&journal->j_state_lock); |
314 | journal->j_commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL; | 314 | journal->j_commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL; |
315 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | 315 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) |
316 | journal->j_flags |= JFS_BARRIER; | 316 | journal->j_flags |= JFS_BARRIER; |
317 | else | 317 | else |
318 | journal->j_flags &= ~JFS_BARRIER; | 318 | journal->j_flags &= ~JFS_BARRIER; |
319 | spin_unlock(&journal->j_state_lock); | 319 | spin_unlock(&journal->j_state_lock); |
320 | } | 320 | } |
321 | 321 | ||
322 | int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | 322 | int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) |
323 | { | 323 | { |
324 | int status = -1; | 324 | int status = -1; |
325 | struct inode *inode = NULL; /* the journal inode */ | 325 | struct inode *inode = NULL; /* the journal inode */ |
326 | journal_t *j_journal = NULL; | 326 | journal_t *j_journal = NULL; |
327 | struct ocfs2_dinode *di = NULL; | 327 | struct ocfs2_dinode *di = NULL; |
328 | struct buffer_head *bh = NULL; | 328 | struct buffer_head *bh = NULL; |
329 | struct ocfs2_super *osb; | 329 | struct ocfs2_super *osb; |
330 | int meta_lock = 0; | 330 | int meta_lock = 0; |
331 | 331 | ||
332 | mlog_entry_void(); | 332 | mlog_entry_void(); |
333 | 333 | ||
334 | BUG_ON(!journal); | 334 | BUG_ON(!journal); |
335 | 335 | ||
336 | osb = journal->j_osb; | 336 | osb = journal->j_osb; |
337 | 337 | ||
338 | /* already have the inode for our journal */ | 338 | /* already have the inode for our journal */ |
339 | inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, | 339 | inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, |
340 | osb->slot_num); | 340 | osb->slot_num); |
341 | if (inode == NULL) { | 341 | if (inode == NULL) { |
342 | status = -EACCES; | 342 | status = -EACCES; |
343 | mlog_errno(status); | 343 | mlog_errno(status); |
344 | goto done; | 344 | goto done; |
345 | } | 345 | } |
346 | if (is_bad_inode(inode)) { | 346 | if (is_bad_inode(inode)) { |
347 | mlog(ML_ERROR, "access error (bad inode)\n"); | 347 | mlog(ML_ERROR, "access error (bad inode)\n"); |
348 | iput(inode); | 348 | iput(inode); |
349 | inode = NULL; | 349 | inode = NULL; |
350 | status = -EACCES; | 350 | status = -EACCES; |
351 | goto done; | 351 | goto done; |
352 | } | 352 | } |
353 | 353 | ||
354 | SET_INODE_JOURNAL(inode); | 354 | SET_INODE_JOURNAL(inode); |
355 | OCFS2_I(inode)->ip_open_count++; | 355 | OCFS2_I(inode)->ip_open_count++; |
356 | 356 | ||
357 | /* Skip recovery waits here - journal inode metadata never | 357 | /* Skip recovery waits here - journal inode metadata never |
358 | * changes in a live cluster so it can be considered an | 358 | * changes in a live cluster so it can be considered an |
359 | * exception to the rule. */ | 359 | * exception to the rule. */ |
360 | status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); | 360 | status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); |
361 | if (status < 0) { | 361 | if (status < 0) { |
362 | if (status != -ERESTARTSYS) | 362 | if (status != -ERESTARTSYS) |
363 | mlog(ML_ERROR, "Could not get lock on journal!\n"); | 363 | mlog(ML_ERROR, "Could not get lock on journal!\n"); |
364 | goto done; | 364 | goto done; |
365 | } | 365 | } |
366 | 366 | ||
367 | meta_lock = 1; | 367 | meta_lock = 1; |
368 | di = (struct ocfs2_dinode *)bh->b_data; | 368 | di = (struct ocfs2_dinode *)bh->b_data; |
369 | 369 | ||
370 | if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { | 370 | if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { |
371 | mlog(ML_ERROR, "Journal file size (%lld) is too small!\n", | 371 | mlog(ML_ERROR, "Journal file size (%lld) is too small!\n", |
372 | inode->i_size); | 372 | inode->i_size); |
373 | status = -EINVAL; | 373 | status = -EINVAL; |
374 | goto done; | 374 | goto done; |
375 | } | 375 | } |
376 | 376 | ||
377 | mlog(0, "inode->i_size = %lld\n", inode->i_size); | 377 | mlog(0, "inode->i_size = %lld\n", inode->i_size); |
378 | mlog(0, "inode->i_blocks = %llu\n", | 378 | mlog(0, "inode->i_blocks = %llu\n", |
379 | (unsigned long long)inode->i_blocks); | 379 | (unsigned long long)inode->i_blocks); |
380 | mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters); | 380 | mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters); |
381 | 381 | ||
382 | /* call the kernels journal init function now */ | 382 | /* call the kernels journal init function now */ |
383 | j_journal = journal_init_inode(inode); | 383 | j_journal = journal_init_inode(inode); |
384 | if (j_journal == NULL) { | 384 | if (j_journal == NULL) { |
385 | mlog(ML_ERROR, "Linux journal layer error\n"); | 385 | mlog(ML_ERROR, "Linux journal layer error\n"); |
386 | status = -EINVAL; | 386 | status = -EINVAL; |
387 | goto done; | 387 | goto done; |
388 | } | 388 | } |
389 | 389 | ||
390 | mlog(0, "Returned from journal_init_inode\n"); | 390 | mlog(0, "Returned from journal_init_inode\n"); |
391 | mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen); | 391 | mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen); |
392 | 392 | ||
393 | *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & | 393 | *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & |
394 | OCFS2_JOURNAL_DIRTY_FL); | 394 | OCFS2_JOURNAL_DIRTY_FL); |
395 | 395 | ||
396 | journal->j_journal = j_journal; | 396 | journal->j_journal = j_journal; |
397 | journal->j_inode = inode; | 397 | journal->j_inode = inode; |
398 | journal->j_bh = bh; | 398 | journal->j_bh = bh; |
399 | 399 | ||
400 | ocfs2_set_journal_params(osb); | 400 | ocfs2_set_journal_params(osb); |
401 | 401 | ||
402 | journal->j_state = OCFS2_JOURNAL_LOADED; | 402 | journal->j_state = OCFS2_JOURNAL_LOADED; |
403 | 403 | ||
404 | status = 0; | 404 | status = 0; |
405 | done: | 405 | done: |
406 | if (status < 0) { | 406 | if (status < 0) { |
407 | if (meta_lock) | 407 | if (meta_lock) |
408 | ocfs2_meta_unlock(inode, 1); | 408 | ocfs2_meta_unlock(inode, 1); |
409 | if (bh != NULL) | 409 | if (bh != NULL) |
410 | brelse(bh); | 410 | brelse(bh); |
411 | if (inode) { | 411 | if (inode) { |
412 | OCFS2_I(inode)->ip_open_count--; | 412 | OCFS2_I(inode)->ip_open_count--; |
413 | iput(inode); | 413 | iput(inode); |
414 | } | 414 | } |
415 | } | 415 | } |
416 | 416 | ||
417 | mlog_exit(status); | 417 | mlog_exit(status); |
418 | return status; | 418 | return status; |
419 | } | 419 | } |
420 | 420 | ||
421 | static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, | 421 | static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, |
422 | int dirty) | 422 | int dirty) |
423 | { | 423 | { |
424 | int status; | 424 | int status; |
425 | unsigned int flags; | 425 | unsigned int flags; |
426 | struct ocfs2_journal *journal = osb->journal; | 426 | struct ocfs2_journal *journal = osb->journal; |
427 | struct buffer_head *bh = journal->j_bh; | 427 | struct buffer_head *bh = journal->j_bh; |
428 | struct ocfs2_dinode *fe; | 428 | struct ocfs2_dinode *fe; |
429 | 429 | ||
430 | mlog_entry_void(); | 430 | mlog_entry_void(); |
431 | 431 | ||
432 | fe = (struct ocfs2_dinode *)bh->b_data; | 432 | fe = (struct ocfs2_dinode *)bh->b_data; |
433 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 433 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
434 | /* This is called from startup/shutdown which will | 434 | /* This is called from startup/shutdown which will |
435 | * handle the errors in a specific manner, so no need | 435 | * handle the errors in a specific manner, so no need |
436 | * to call ocfs2_error() here. */ | 436 | * to call ocfs2_error() here. */ |
437 | mlog(ML_ERROR, "Journal dinode %llu has invalid " | 437 | mlog(ML_ERROR, "Journal dinode %llu has invalid " |
438 | "signature: %.*s", (unsigned long long)fe->i_blkno, 7, | 438 | "signature: %.*s", (unsigned long long)fe->i_blkno, 7, |
439 | fe->i_signature); | 439 | fe->i_signature); |
440 | status = -EIO; | 440 | status = -EIO; |
441 | goto out; | 441 | goto out; |
442 | } | 442 | } |
443 | 443 | ||
444 | flags = le32_to_cpu(fe->id1.journal1.ij_flags); | 444 | flags = le32_to_cpu(fe->id1.journal1.ij_flags); |
445 | if (dirty) | 445 | if (dirty) |
446 | flags |= OCFS2_JOURNAL_DIRTY_FL; | 446 | flags |= OCFS2_JOURNAL_DIRTY_FL; |
447 | else | 447 | else |
448 | flags &= ~OCFS2_JOURNAL_DIRTY_FL; | 448 | flags &= ~OCFS2_JOURNAL_DIRTY_FL; |
449 | fe->id1.journal1.ij_flags = cpu_to_le32(flags); | 449 | fe->id1.journal1.ij_flags = cpu_to_le32(flags); |
450 | 450 | ||
451 | status = ocfs2_write_block(osb, bh, journal->j_inode); | 451 | status = ocfs2_write_block(osb, bh, journal->j_inode); |
452 | if (status < 0) | 452 | if (status < 0) |
453 | mlog_errno(status); | 453 | mlog_errno(status); |
454 | 454 | ||
455 | out: | 455 | out: |
456 | mlog_exit(status); | 456 | mlog_exit(status); |
457 | return status; | 457 | return status; |
458 | } | 458 | } |
459 | 459 | ||
460 | /* | 460 | /* |
461 | * If the journal has been kmalloc'd it needs to be freed after this | 461 | * If the journal has been kmalloc'd it needs to be freed after this |
462 | * call. | 462 | * call. |
463 | */ | 463 | */ |
464 | void ocfs2_journal_shutdown(struct ocfs2_super *osb) | 464 | void ocfs2_journal_shutdown(struct ocfs2_super *osb) |
465 | { | 465 | { |
466 | struct ocfs2_journal *journal = NULL; | 466 | struct ocfs2_journal *journal = NULL; |
467 | int status = 0; | 467 | int status = 0; |
468 | struct inode *inode = NULL; | 468 | struct inode *inode = NULL; |
469 | int num_running_trans = 0; | 469 | int num_running_trans = 0; |
470 | 470 | ||
471 | mlog_entry_void(); | 471 | mlog_entry_void(); |
472 | 472 | ||
473 | BUG_ON(!osb); | 473 | BUG_ON(!osb); |
474 | 474 | ||
475 | journal = osb->journal; | 475 | journal = osb->journal; |
476 | if (!journal) | 476 | if (!journal) |
477 | goto done; | 477 | goto done; |
478 | 478 | ||
479 | inode = journal->j_inode; | 479 | inode = journal->j_inode; |
480 | 480 | ||
481 | if (journal->j_state != OCFS2_JOURNAL_LOADED) | 481 | if (journal->j_state != OCFS2_JOURNAL_LOADED) |
482 | goto done; | 482 | goto done; |
483 | 483 | ||
484 | /* need to inc inode use count as journal_destroy will iput. */ | 484 | /* need to inc inode use count as journal_destroy will iput. */ |
485 | if (!igrab(inode)) | 485 | if (!igrab(inode)) |
486 | BUG(); | 486 | BUG(); |
487 | 487 | ||
488 | num_running_trans = atomic_read(&(osb->journal->j_num_trans)); | 488 | num_running_trans = atomic_read(&(osb->journal->j_num_trans)); |
489 | if (num_running_trans > 0) | 489 | if (num_running_trans > 0) |
490 | mlog(0, "Shutting down journal: must wait on %d " | 490 | mlog(0, "Shutting down journal: must wait on %d " |
491 | "running transactions!\n", | 491 | "running transactions!\n", |
492 | num_running_trans); | 492 | num_running_trans); |
493 | 493 | ||
494 | /* Do a commit_cache here. It will flush our journal, *and* | 494 | /* Do a commit_cache here. It will flush our journal, *and* |
495 | * release any locks that are still held. | 495 | * release any locks that are still held. |
496 | * set the SHUTDOWN flag and release the trans lock. | 496 | * set the SHUTDOWN flag and release the trans lock. |
497 | * the commit thread will take the trans lock for us below. */ | 497 | * the commit thread will take the trans lock for us below. */ |
498 | journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN; | 498 | journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN; |
499 | 499 | ||
500 | /* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not | 500 | /* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not |
501 | * drop the trans_lock (which we want to hold until we | 501 | * drop the trans_lock (which we want to hold until we |
502 | * completely destroy the journal. */ | 502 | * completely destroy the journal. */ |
503 | if (osb->commit_task) { | 503 | if (osb->commit_task) { |
504 | /* Wait for the commit thread */ | 504 | /* Wait for the commit thread */ |
505 | mlog(0, "Waiting for ocfs2commit to exit....\n"); | 505 | mlog(0, "Waiting for ocfs2commit to exit....\n"); |
506 | kthread_stop(osb->commit_task); | 506 | kthread_stop(osb->commit_task); |
507 | osb->commit_task = NULL; | 507 | osb->commit_task = NULL; |
508 | } | 508 | } |
509 | 509 | ||
510 | BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); | 510 | BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); |
511 | 511 | ||
512 | if (ocfs2_mount_local(osb)) { | 512 | if (ocfs2_mount_local(osb)) { |
513 | journal_lock_updates(journal->j_journal); | 513 | journal_lock_updates(journal->j_journal); |
514 | status = journal_flush(journal->j_journal); | 514 | status = journal_flush(journal->j_journal); |
515 | journal_unlock_updates(journal->j_journal); | 515 | journal_unlock_updates(journal->j_journal); |
516 | if (status < 0) | 516 | if (status < 0) |
517 | mlog_errno(status); | 517 | mlog_errno(status); |
518 | } | 518 | } |
519 | 519 | ||
520 | if (status == 0) { | 520 | if (status == 0) { |
521 | /* | 521 | /* |
522 | * Do not toggle if flush was unsuccessful otherwise | 522 | * Do not toggle if flush was unsuccessful otherwise |
523 | * will leave dirty metadata in a "clean" journal | 523 | * will leave dirty metadata in a "clean" journal |
524 | */ | 524 | */ |
525 | status = ocfs2_journal_toggle_dirty(osb, 0); | 525 | status = ocfs2_journal_toggle_dirty(osb, 0); |
526 | if (status < 0) | 526 | if (status < 0) |
527 | mlog_errno(status); | 527 | mlog_errno(status); |
528 | } | 528 | } |
529 | 529 | ||
530 | /* Shutdown the kernel journal system */ | 530 | /* Shutdown the kernel journal system */ |
531 | journal_destroy(journal->j_journal); | 531 | journal_destroy(journal->j_journal); |
532 | 532 | ||
533 | OCFS2_I(inode)->ip_open_count--; | 533 | OCFS2_I(inode)->ip_open_count--; |
534 | 534 | ||
535 | /* unlock our journal */ | 535 | /* unlock our journal */ |
536 | ocfs2_meta_unlock(inode, 1); | 536 | ocfs2_meta_unlock(inode, 1); |
537 | 537 | ||
538 | brelse(journal->j_bh); | 538 | brelse(journal->j_bh); |
539 | journal->j_bh = NULL; | 539 | journal->j_bh = NULL; |
540 | 540 | ||
541 | journal->j_state = OCFS2_JOURNAL_FREE; | 541 | journal->j_state = OCFS2_JOURNAL_FREE; |
542 | 542 | ||
543 | // up_write(&journal->j_trans_barrier); | 543 | // up_write(&journal->j_trans_barrier); |
544 | done: | 544 | done: |
545 | if (inode) | 545 | if (inode) |
546 | iput(inode); | 546 | iput(inode); |
547 | mlog_exit_void(); | 547 | mlog_exit_void(); |
548 | } | 548 | } |
549 | 549 | ||
550 | static void ocfs2_clear_journal_error(struct super_block *sb, | 550 | static void ocfs2_clear_journal_error(struct super_block *sb, |
551 | journal_t *journal, | 551 | journal_t *journal, |
552 | int slot) | 552 | int slot) |
553 | { | 553 | { |
554 | int olderr; | 554 | int olderr; |
555 | 555 | ||
556 | olderr = journal_errno(journal); | 556 | olderr = journal_errno(journal); |
557 | if (olderr) { | 557 | if (olderr) { |
558 | mlog(ML_ERROR, "File system error %d recorded in " | 558 | mlog(ML_ERROR, "File system error %d recorded in " |
559 | "journal %u.\n", olderr, slot); | 559 | "journal %u.\n", olderr, slot); |
560 | mlog(ML_ERROR, "File system on device %s needs checking.\n", | 560 | mlog(ML_ERROR, "File system on device %s needs checking.\n", |
561 | sb->s_id); | 561 | sb->s_id); |
562 | 562 | ||
563 | journal_ack_err(journal); | 563 | journal_ack_err(journal); |
564 | journal_clear_err(journal); | 564 | journal_clear_err(journal); |
565 | } | 565 | } |
566 | } | 566 | } |
567 | 567 | ||
568 | int ocfs2_journal_load(struct ocfs2_journal *journal, int local) | 568 | int ocfs2_journal_load(struct ocfs2_journal *journal, int local) |
569 | { | 569 | { |
570 | int status = 0; | 570 | int status = 0; |
571 | struct ocfs2_super *osb; | 571 | struct ocfs2_super *osb; |
572 | 572 | ||
573 | mlog_entry_void(); | 573 | mlog_entry_void(); |
574 | 574 | ||
575 | if (!journal) | 575 | if (!journal) |
576 | BUG(); | 576 | BUG(); |
577 | 577 | ||
578 | osb = journal->j_osb; | 578 | osb = journal->j_osb; |
579 | 579 | ||
580 | status = journal_load(journal->j_journal); | 580 | status = journal_load(journal->j_journal); |
581 | if (status < 0) { | 581 | if (status < 0) { |
582 | mlog(ML_ERROR, "Failed to load journal!\n"); | 582 | mlog(ML_ERROR, "Failed to load journal!\n"); |
583 | goto done; | 583 | goto done; |
584 | } | 584 | } |
585 | 585 | ||
586 | ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); | 586 | ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); |
587 | 587 | ||
588 | status = ocfs2_journal_toggle_dirty(osb, 1); | 588 | status = ocfs2_journal_toggle_dirty(osb, 1); |
589 | if (status < 0) { | 589 | if (status < 0) { |
590 | mlog_errno(status); | 590 | mlog_errno(status); |
591 | goto done; | 591 | goto done; |
592 | } | 592 | } |
593 | 593 | ||
594 | /* Launch the commit thread */ | 594 | /* Launch the commit thread */ |
595 | if (!local) { | 595 | if (!local) { |
596 | osb->commit_task = kthread_run(ocfs2_commit_thread, osb, | 596 | osb->commit_task = kthread_run(ocfs2_commit_thread, osb, |
597 | "ocfs2cmt"); | 597 | "ocfs2cmt"); |
598 | if (IS_ERR(osb->commit_task)) { | 598 | if (IS_ERR(osb->commit_task)) { |
599 | status = PTR_ERR(osb->commit_task); | 599 | status = PTR_ERR(osb->commit_task); |
600 | osb->commit_task = NULL; | 600 | osb->commit_task = NULL; |
601 | mlog(ML_ERROR, "unable to launch ocfs2commit thread, " | 601 | mlog(ML_ERROR, "unable to launch ocfs2commit thread, " |
602 | "error=%d", status); | 602 | "error=%d", status); |
603 | goto done; | 603 | goto done; |
604 | } | 604 | } |
605 | } else | 605 | } else |
606 | osb->commit_task = NULL; | 606 | osb->commit_task = NULL; |
607 | 607 | ||
608 | done: | 608 | done: |
609 | mlog_exit(status); | 609 | mlog_exit(status); |
610 | return status; | 610 | return status; |
611 | } | 611 | } |
612 | 612 | ||
613 | 613 | ||
614 | /* 'full' flag tells us whether we clear out all blocks or if we just | 614 | /* 'full' flag tells us whether we clear out all blocks or if we just |
615 | * mark the journal clean */ | 615 | * mark the journal clean */ |
616 | int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full) | 616 | int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full) |
617 | { | 617 | { |
618 | int status; | 618 | int status; |
619 | 619 | ||
620 | mlog_entry_void(); | 620 | mlog_entry_void(); |
621 | 621 | ||
622 | BUG_ON(!journal); | 622 | BUG_ON(!journal); |
623 | 623 | ||
624 | status = journal_wipe(journal->j_journal, full); | 624 | status = journal_wipe(journal->j_journal, full); |
625 | if (status < 0) { | 625 | if (status < 0) { |
626 | mlog_errno(status); | 626 | mlog_errno(status); |
627 | goto bail; | 627 | goto bail; |
628 | } | 628 | } |
629 | 629 | ||
630 | status = ocfs2_journal_toggle_dirty(journal->j_osb, 0); | 630 | status = ocfs2_journal_toggle_dirty(journal->j_osb, 0); |
631 | if (status < 0) | 631 | if (status < 0) |
632 | mlog_errno(status); | 632 | mlog_errno(status); |
633 | 633 | ||
634 | bail: | 634 | bail: |
635 | mlog_exit(status); | 635 | mlog_exit(status); |
636 | return status; | 636 | return status; |
637 | } | 637 | } |
638 | 638 | ||
639 | /* | 639 | /* |
640 | * JBD Might read a cached version of another nodes journal file. We | 640 | * JBD Might read a cached version of another nodes journal file. We |
641 | * don't want this as this file changes often and we get no | 641 | * don't want this as this file changes often and we get no |
642 | * notification on those changes. The only way to be sure that we've | 642 | * notification on those changes. The only way to be sure that we've |
643 | * got the most up to date version of those blocks then is to force | 643 | * got the most up to date version of those blocks then is to force |
644 | * read them off disk. Just searching through the buffer cache won't | 644 | * read them off disk. Just searching through the buffer cache won't |
645 | * work as there may be pages backing this file which are still marked | 645 | * work as there may be pages backing this file which are still marked |
646 | * up to date. We know things can't change on this file underneath us | 646 | * up to date. We know things can't change on this file underneath us |
647 | * as we have the lock by now :) | 647 | * as we have the lock by now :) |
648 | */ | 648 | */ |
649 | static int ocfs2_force_read_journal(struct inode *inode) | 649 | static int ocfs2_force_read_journal(struct inode *inode) |
650 | { | 650 | { |
651 | int status = 0; | 651 | int status = 0; |
652 | int i, p_blocks; | 652 | int i; |
653 | u64 v_blkno, p_blkno; | 653 | u64 v_blkno, p_blkno, p_blocks; |
654 | #define CONCURRENT_JOURNAL_FILL 32 | 654 | #define CONCURRENT_JOURNAL_FILL 32ULL |
655 | struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL]; | 655 | struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL]; |
656 | 656 | ||
657 | mlog_entry_void(); | 657 | mlog_entry_void(); |
658 | 658 | ||
659 | BUG_ON(inode->i_blocks != | 659 | BUG_ON(inode->i_blocks != |
660 | ocfs2_align_bytes_to_sectors(i_size_read(inode))); | 660 | ocfs2_align_bytes_to_sectors(i_size_read(inode))); |
661 | 661 | ||
662 | memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); | 662 | memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); |
663 | 663 | ||
664 | mlog(0, "Force reading %llu blocks\n", | 664 | mlog(0, "Force reading %llu blocks\n", |
665 | (unsigned long long)(inode->i_blocks >> | 665 | (unsigned long long)(inode->i_blocks >> |
666 | (inode->i_sb->s_blocksize_bits - 9))); | 666 | (inode->i_sb->s_blocksize_bits - 9))); |
667 | 667 | ||
668 | v_blkno = 0; | 668 | v_blkno = 0; |
669 | while (v_blkno < | 669 | while (v_blkno < |
670 | (inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9))) { | 670 | (inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9))) { |
671 | 671 | ||
672 | status = ocfs2_extent_map_get_blocks(inode, v_blkno, | 672 | status = ocfs2_extent_map_get_blocks(inode, v_blkno, |
673 | &p_blkno, &p_blocks, NULL); | 673 | &p_blkno, &p_blocks, NULL); |
674 | if (status < 0) { | 674 | if (status < 0) { |
675 | mlog_errno(status); | 675 | mlog_errno(status); |
676 | goto bail; | 676 | goto bail; |
677 | } | 677 | } |
678 | 678 | ||
679 | if (p_blocks > CONCURRENT_JOURNAL_FILL) | 679 | if (p_blocks > CONCURRENT_JOURNAL_FILL) |
680 | p_blocks = CONCURRENT_JOURNAL_FILL; | 680 | p_blocks = CONCURRENT_JOURNAL_FILL; |
681 | 681 | ||
682 | /* We are reading journal data which should not | 682 | /* We are reading journal data which should not |
683 | * be put in the uptodate cache */ | 683 | * be put in the uptodate cache */ |
684 | status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), | 684 | status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), |
685 | p_blkno, p_blocks, bhs, 0, | 685 | p_blkno, p_blocks, bhs, 0, |
686 | NULL); | 686 | NULL); |
687 | if (status < 0) { | 687 | if (status < 0) { |
688 | mlog_errno(status); | 688 | mlog_errno(status); |
689 | goto bail; | 689 | goto bail; |
690 | } | 690 | } |
691 | 691 | ||
692 | for(i = 0; i < p_blocks; i++) { | 692 | for(i = 0; i < p_blocks; i++) { |
693 | brelse(bhs[i]); | 693 | brelse(bhs[i]); |
694 | bhs[i] = NULL; | 694 | bhs[i] = NULL; |
695 | } | 695 | } |
696 | 696 | ||
697 | v_blkno += p_blocks; | 697 | v_blkno += p_blocks; |
698 | } | 698 | } |
699 | 699 | ||
700 | bail: | 700 | bail: |
701 | for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) | 701 | for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) |
702 | if (bhs[i]) | 702 | if (bhs[i]) |
703 | brelse(bhs[i]); | 703 | brelse(bhs[i]); |
704 | mlog_exit(status); | 704 | mlog_exit(status); |
705 | return status; | 705 | return status; |
706 | } | 706 | } |
707 | 707 | ||
708 | struct ocfs2_la_recovery_item { | 708 | struct ocfs2_la_recovery_item { |
709 | struct list_head lri_list; | 709 | struct list_head lri_list; |
710 | int lri_slot; | 710 | int lri_slot; |
711 | struct ocfs2_dinode *lri_la_dinode; | 711 | struct ocfs2_dinode *lri_la_dinode; |
712 | struct ocfs2_dinode *lri_tl_dinode; | 712 | struct ocfs2_dinode *lri_tl_dinode; |
713 | }; | 713 | }; |
714 | 714 | ||
715 | /* Does the second half of the recovery process. By this point, the | 715 | /* Does the second half of the recovery process. By this point, the |
716 | * node is marked clean and can actually be considered recovered, | 716 | * node is marked clean and can actually be considered recovered, |
717 | * hence it's no longer in the recovery map, but there's still some | 717 | * hence it's no longer in the recovery map, but there's still some |
718 | * cleanup we can do which shouldn't happen within the recovery thread | 718 | * cleanup we can do which shouldn't happen within the recovery thread |
719 | * as locking in that context becomes very difficult if we are to take | 719 | * as locking in that context becomes very difficult if we are to take |
720 | * recovering nodes into account. | 720 | * recovering nodes into account. |
721 | * | 721 | * |
722 | * NOTE: This function can and will sleep on recovery of other nodes | 722 | * NOTE: This function can and will sleep on recovery of other nodes |
723 | * during cluster locking, just like any other ocfs2 process. | 723 | * during cluster locking, just like any other ocfs2 process. |
724 | */ | 724 | */ |
725 | void ocfs2_complete_recovery(struct work_struct *work) | 725 | void ocfs2_complete_recovery(struct work_struct *work) |
726 | { | 726 | { |
727 | int ret; | 727 | int ret; |
728 | struct ocfs2_journal *journal = | 728 | struct ocfs2_journal *journal = |
729 | container_of(work, struct ocfs2_journal, j_recovery_work); | 729 | container_of(work, struct ocfs2_journal, j_recovery_work); |
730 | struct ocfs2_super *osb = journal->j_osb; | 730 | struct ocfs2_super *osb = journal->j_osb; |
731 | struct ocfs2_dinode *la_dinode, *tl_dinode; | 731 | struct ocfs2_dinode *la_dinode, *tl_dinode; |
732 | struct ocfs2_la_recovery_item *item; | 732 | struct ocfs2_la_recovery_item *item; |
733 | struct list_head *p, *n; | 733 | struct list_head *p, *n; |
734 | LIST_HEAD(tmp_la_list); | 734 | LIST_HEAD(tmp_la_list); |
735 | 735 | ||
736 | mlog_entry_void(); | 736 | mlog_entry_void(); |
737 | 737 | ||
738 | mlog(0, "completing recovery from keventd\n"); | 738 | mlog(0, "completing recovery from keventd\n"); |
739 | 739 | ||
740 | spin_lock(&journal->j_lock); | 740 | spin_lock(&journal->j_lock); |
741 | list_splice_init(&journal->j_la_cleanups, &tmp_la_list); | 741 | list_splice_init(&journal->j_la_cleanups, &tmp_la_list); |
742 | spin_unlock(&journal->j_lock); | 742 | spin_unlock(&journal->j_lock); |
743 | 743 | ||
744 | list_for_each_safe(p, n, &tmp_la_list) { | 744 | list_for_each_safe(p, n, &tmp_la_list) { |
745 | item = list_entry(p, struct ocfs2_la_recovery_item, lri_list); | 745 | item = list_entry(p, struct ocfs2_la_recovery_item, lri_list); |
746 | list_del_init(&item->lri_list); | 746 | list_del_init(&item->lri_list); |
747 | 747 | ||
748 | mlog(0, "Complete recovery for slot %d\n", item->lri_slot); | 748 | mlog(0, "Complete recovery for slot %d\n", item->lri_slot); |
749 | 749 | ||
750 | la_dinode = item->lri_la_dinode; | 750 | la_dinode = item->lri_la_dinode; |
751 | if (la_dinode) { | 751 | if (la_dinode) { |
752 | mlog(0, "Clean up local alloc %llu\n", | 752 | mlog(0, "Clean up local alloc %llu\n", |
753 | (unsigned long long)la_dinode->i_blkno); | 753 | (unsigned long long)la_dinode->i_blkno); |
754 | 754 | ||
755 | ret = ocfs2_complete_local_alloc_recovery(osb, | 755 | ret = ocfs2_complete_local_alloc_recovery(osb, |
756 | la_dinode); | 756 | la_dinode); |
757 | if (ret < 0) | 757 | if (ret < 0) |
758 | mlog_errno(ret); | 758 | mlog_errno(ret); |
759 | 759 | ||
760 | kfree(la_dinode); | 760 | kfree(la_dinode); |
761 | } | 761 | } |
762 | 762 | ||
763 | tl_dinode = item->lri_tl_dinode; | 763 | tl_dinode = item->lri_tl_dinode; |
764 | if (tl_dinode) { | 764 | if (tl_dinode) { |
765 | mlog(0, "Clean up truncate log %llu\n", | 765 | mlog(0, "Clean up truncate log %llu\n", |
766 | (unsigned long long)tl_dinode->i_blkno); | 766 | (unsigned long long)tl_dinode->i_blkno); |
767 | 767 | ||
768 | ret = ocfs2_complete_truncate_log_recovery(osb, | 768 | ret = ocfs2_complete_truncate_log_recovery(osb, |
769 | tl_dinode); | 769 | tl_dinode); |
770 | if (ret < 0) | 770 | if (ret < 0) |
771 | mlog_errno(ret); | 771 | mlog_errno(ret); |
772 | 772 | ||
773 | kfree(tl_dinode); | 773 | kfree(tl_dinode); |
774 | } | 774 | } |
775 | 775 | ||
776 | ret = ocfs2_recover_orphans(osb, item->lri_slot); | 776 | ret = ocfs2_recover_orphans(osb, item->lri_slot); |
777 | if (ret < 0) | 777 | if (ret < 0) |
778 | mlog_errno(ret); | 778 | mlog_errno(ret); |
779 | 779 | ||
780 | kfree(item); | 780 | kfree(item); |
781 | } | 781 | } |
782 | 782 | ||
783 | mlog(0, "Recovery completion\n"); | 783 | mlog(0, "Recovery completion\n"); |
784 | mlog_exit_void(); | 784 | mlog_exit_void(); |
785 | } | 785 | } |
786 | 786 | ||
787 | /* NOTE: This function always eats your references to la_dinode and | 787 | /* NOTE: This function always eats your references to la_dinode and |
788 | * tl_dinode, either manually on error, or by passing them to | 788 | * tl_dinode, either manually on error, or by passing them to |
789 | * ocfs2_complete_recovery */ | 789 | * ocfs2_complete_recovery */ |
790 | static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, | 790 | static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, |
791 | int slot_num, | 791 | int slot_num, |
792 | struct ocfs2_dinode *la_dinode, | 792 | struct ocfs2_dinode *la_dinode, |
793 | struct ocfs2_dinode *tl_dinode) | 793 | struct ocfs2_dinode *tl_dinode) |
794 | { | 794 | { |
795 | struct ocfs2_la_recovery_item *item; | 795 | struct ocfs2_la_recovery_item *item; |
796 | 796 | ||
797 | item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS); | 797 | item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS); |
798 | if (!item) { | 798 | if (!item) { |
799 | /* Though we wish to avoid it, we are in fact safe in | 799 | /* Though we wish to avoid it, we are in fact safe in |
800 | * skipping local alloc cleanup as fsck.ocfs2 is more | 800 | * skipping local alloc cleanup as fsck.ocfs2 is more |
801 | * than capable of reclaiming unused space. */ | 801 | * than capable of reclaiming unused space. */ |
802 | if (la_dinode) | 802 | if (la_dinode) |
803 | kfree(la_dinode); | 803 | kfree(la_dinode); |
804 | 804 | ||
805 | if (tl_dinode) | 805 | if (tl_dinode) |
806 | kfree(tl_dinode); | 806 | kfree(tl_dinode); |
807 | 807 | ||
808 | mlog_errno(-ENOMEM); | 808 | mlog_errno(-ENOMEM); |
809 | return; | 809 | return; |
810 | } | 810 | } |
811 | 811 | ||
812 | INIT_LIST_HEAD(&item->lri_list); | 812 | INIT_LIST_HEAD(&item->lri_list); |
813 | item->lri_la_dinode = la_dinode; | 813 | item->lri_la_dinode = la_dinode; |
814 | item->lri_slot = slot_num; | 814 | item->lri_slot = slot_num; |
815 | item->lri_tl_dinode = tl_dinode; | 815 | item->lri_tl_dinode = tl_dinode; |
816 | 816 | ||
817 | spin_lock(&journal->j_lock); | 817 | spin_lock(&journal->j_lock); |
818 | list_add_tail(&item->lri_list, &journal->j_la_cleanups); | 818 | list_add_tail(&item->lri_list, &journal->j_la_cleanups); |
819 | queue_work(ocfs2_wq, &journal->j_recovery_work); | 819 | queue_work(ocfs2_wq, &journal->j_recovery_work); |
820 | spin_unlock(&journal->j_lock); | 820 | spin_unlock(&journal->j_lock); |
821 | } | 821 | } |
822 | 822 | ||
823 | /* Called by the mount code to queue recovery the last part of | 823 | /* Called by the mount code to queue recovery the last part of |
824 | * recovery for it's own slot. */ | 824 | * recovery for it's own slot. */ |
825 | void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) | 825 | void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) |
826 | { | 826 | { |
827 | struct ocfs2_journal *journal = osb->journal; | 827 | struct ocfs2_journal *journal = osb->journal; |
828 | 828 | ||
829 | if (osb->dirty) { | 829 | if (osb->dirty) { |
830 | /* No need to queue up our truncate_log as regular | 830 | /* No need to queue up our truncate_log as regular |
831 | * cleanup will catch that. */ | 831 | * cleanup will catch that. */ |
832 | ocfs2_queue_recovery_completion(journal, | 832 | ocfs2_queue_recovery_completion(journal, |
833 | osb->slot_num, | 833 | osb->slot_num, |
834 | osb->local_alloc_copy, | 834 | osb->local_alloc_copy, |
835 | NULL); | 835 | NULL); |
836 | ocfs2_schedule_truncate_log_flush(osb, 0); | 836 | ocfs2_schedule_truncate_log_flush(osb, 0); |
837 | 837 | ||
838 | osb->local_alloc_copy = NULL; | 838 | osb->local_alloc_copy = NULL; |
839 | osb->dirty = 0; | 839 | osb->dirty = 0; |
840 | } | 840 | } |
841 | } | 841 | } |
842 | 842 | ||
843 | static int __ocfs2_recovery_thread(void *arg) | 843 | static int __ocfs2_recovery_thread(void *arg) |
844 | { | 844 | { |
845 | int status, node_num; | 845 | int status, node_num; |
846 | struct ocfs2_super *osb = arg; | 846 | struct ocfs2_super *osb = arg; |
847 | 847 | ||
848 | mlog_entry_void(); | 848 | mlog_entry_void(); |
849 | 849 | ||
850 | status = ocfs2_wait_on_mount(osb); | 850 | status = ocfs2_wait_on_mount(osb); |
851 | if (status < 0) { | 851 | if (status < 0) { |
852 | goto bail; | 852 | goto bail; |
853 | } | 853 | } |
854 | 854 | ||
855 | restart: | 855 | restart: |
856 | status = ocfs2_super_lock(osb, 1); | 856 | status = ocfs2_super_lock(osb, 1); |
857 | if (status < 0) { | 857 | if (status < 0) { |
858 | mlog_errno(status); | 858 | mlog_errno(status); |
859 | goto bail; | 859 | goto bail; |
860 | } | 860 | } |
861 | 861 | ||
862 | while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { | 862 | while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { |
863 | node_num = ocfs2_node_map_first_set_bit(osb, | 863 | node_num = ocfs2_node_map_first_set_bit(osb, |
864 | &osb->recovery_map); | 864 | &osb->recovery_map); |
865 | if (node_num == O2NM_INVALID_NODE_NUM) { | 865 | if (node_num == O2NM_INVALID_NODE_NUM) { |
866 | mlog(0, "Out of nodes to recover.\n"); | 866 | mlog(0, "Out of nodes to recover.\n"); |
867 | break; | 867 | break; |
868 | } | 868 | } |
869 | 869 | ||
870 | status = ocfs2_recover_node(osb, node_num); | 870 | status = ocfs2_recover_node(osb, node_num); |
871 | if (status < 0) { | 871 | if (status < 0) { |
872 | mlog(ML_ERROR, | 872 | mlog(ML_ERROR, |
873 | "Error %d recovering node %d on device (%u,%u)!\n", | 873 | "Error %d recovering node %d on device (%u,%u)!\n", |
874 | status, node_num, | 874 | status, node_num, |
875 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 875 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
876 | mlog(ML_ERROR, "Volume requires unmount.\n"); | 876 | mlog(ML_ERROR, "Volume requires unmount.\n"); |
877 | continue; | 877 | continue; |
878 | } | 878 | } |
879 | 879 | ||
880 | ocfs2_recovery_map_clear(osb, node_num); | 880 | ocfs2_recovery_map_clear(osb, node_num); |
881 | } | 881 | } |
882 | ocfs2_super_unlock(osb, 1); | 882 | ocfs2_super_unlock(osb, 1); |
883 | 883 | ||
884 | /* We always run recovery on our own orphan dir - the dead | 884 | /* We always run recovery on our own orphan dir - the dead |
885 | * node(s) may have voted "no" on an inode delete earlier. A | 885 | * node(s) may have voted "no" on an inode delete earlier. A |
886 | * revote is therefore required. */ | 886 | * revote is therefore required. */ |
887 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, | 887 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, |
888 | NULL); | 888 | NULL); |
889 | 889 | ||
890 | bail: | 890 | bail: |
891 | mutex_lock(&osb->recovery_lock); | 891 | mutex_lock(&osb->recovery_lock); |
892 | if (!status && | 892 | if (!status && |
893 | !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { | 893 | !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { |
894 | mutex_unlock(&osb->recovery_lock); | 894 | mutex_unlock(&osb->recovery_lock); |
895 | goto restart; | 895 | goto restart; |
896 | } | 896 | } |
897 | 897 | ||
898 | osb->recovery_thread_task = NULL; | 898 | osb->recovery_thread_task = NULL; |
899 | mb(); /* sync with ocfs2_recovery_thread_running */ | 899 | mb(); /* sync with ocfs2_recovery_thread_running */ |
900 | wake_up(&osb->recovery_event); | 900 | wake_up(&osb->recovery_event); |
901 | 901 | ||
902 | mutex_unlock(&osb->recovery_lock); | 902 | mutex_unlock(&osb->recovery_lock); |
903 | 903 | ||
904 | mlog_exit(status); | 904 | mlog_exit(status); |
905 | /* no one is callint kthread_stop() for us so the kthread() api | 905 | /* no one is callint kthread_stop() for us so the kthread() api |
906 | * requires that we call do_exit(). And it isn't exported, but | 906 | * requires that we call do_exit(). And it isn't exported, but |
907 | * complete_and_exit() seems to be a minimal wrapper around it. */ | 907 | * complete_and_exit() seems to be a minimal wrapper around it. */ |
908 | complete_and_exit(NULL, status); | 908 | complete_and_exit(NULL, status); |
909 | return status; | 909 | return status; |
910 | } | 910 | } |
911 | 911 | ||
912 | void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) | 912 | void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) |
913 | { | 913 | { |
914 | mlog_entry("(node_num=%d, osb->node_num = %d)\n", | 914 | mlog_entry("(node_num=%d, osb->node_num = %d)\n", |
915 | node_num, osb->node_num); | 915 | node_num, osb->node_num); |
916 | 916 | ||
917 | mutex_lock(&osb->recovery_lock); | 917 | mutex_lock(&osb->recovery_lock); |
918 | if (osb->disable_recovery) | 918 | if (osb->disable_recovery) |
919 | goto out; | 919 | goto out; |
920 | 920 | ||
921 | /* People waiting on recovery will wait on | 921 | /* People waiting on recovery will wait on |
922 | * the recovery map to empty. */ | 922 | * the recovery map to empty. */ |
923 | if (!ocfs2_recovery_map_set(osb, node_num)) | 923 | if (!ocfs2_recovery_map_set(osb, node_num)) |
924 | mlog(0, "node %d already be in recovery.\n", node_num); | 924 | mlog(0, "node %d already be in recovery.\n", node_num); |
925 | 925 | ||
926 | mlog(0, "starting recovery thread...\n"); | 926 | mlog(0, "starting recovery thread...\n"); |
927 | 927 | ||
928 | if (osb->recovery_thread_task) | 928 | if (osb->recovery_thread_task) |
929 | goto out; | 929 | goto out; |
930 | 930 | ||
931 | osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb, | 931 | osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb, |
932 | "ocfs2rec"); | 932 | "ocfs2rec"); |
933 | if (IS_ERR(osb->recovery_thread_task)) { | 933 | if (IS_ERR(osb->recovery_thread_task)) { |
934 | mlog_errno((int)PTR_ERR(osb->recovery_thread_task)); | 934 | mlog_errno((int)PTR_ERR(osb->recovery_thread_task)); |
935 | osb->recovery_thread_task = NULL; | 935 | osb->recovery_thread_task = NULL; |
936 | } | 936 | } |
937 | 937 | ||
938 | out: | 938 | out: |
939 | mutex_unlock(&osb->recovery_lock); | 939 | mutex_unlock(&osb->recovery_lock); |
940 | wake_up(&osb->recovery_event); | 940 | wake_up(&osb->recovery_event); |
941 | 941 | ||
942 | mlog_exit_void(); | 942 | mlog_exit_void(); |
943 | } | 943 | } |
944 | 944 | ||
945 | /* Does the actual journal replay and marks the journal inode as | 945 | /* Does the actual journal replay and marks the journal inode as |
946 | * clean. Will only replay if the journal inode is marked dirty. */ | 946 | * clean. Will only replay if the journal inode is marked dirty. */ |
947 | static int ocfs2_replay_journal(struct ocfs2_super *osb, | 947 | static int ocfs2_replay_journal(struct ocfs2_super *osb, |
948 | int node_num, | 948 | int node_num, |
949 | int slot_num) | 949 | int slot_num) |
950 | { | 950 | { |
951 | int status; | 951 | int status; |
952 | int got_lock = 0; | 952 | int got_lock = 0; |
953 | unsigned int flags; | 953 | unsigned int flags; |
954 | struct inode *inode = NULL; | 954 | struct inode *inode = NULL; |
955 | struct ocfs2_dinode *fe; | 955 | struct ocfs2_dinode *fe; |
956 | journal_t *journal = NULL; | 956 | journal_t *journal = NULL; |
957 | struct buffer_head *bh = NULL; | 957 | struct buffer_head *bh = NULL; |
958 | 958 | ||
959 | inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, | 959 | inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, |
960 | slot_num); | 960 | slot_num); |
961 | if (inode == NULL) { | 961 | if (inode == NULL) { |
962 | status = -EACCES; | 962 | status = -EACCES; |
963 | mlog_errno(status); | 963 | mlog_errno(status); |
964 | goto done; | 964 | goto done; |
965 | } | 965 | } |
966 | if (is_bad_inode(inode)) { | 966 | if (is_bad_inode(inode)) { |
967 | status = -EACCES; | 967 | status = -EACCES; |
968 | iput(inode); | 968 | iput(inode); |
969 | inode = NULL; | 969 | inode = NULL; |
970 | mlog_errno(status); | 970 | mlog_errno(status); |
971 | goto done; | 971 | goto done; |
972 | } | 972 | } |
973 | SET_INODE_JOURNAL(inode); | 973 | SET_INODE_JOURNAL(inode); |
974 | 974 | ||
975 | status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); | 975 | status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); |
976 | if (status < 0) { | 976 | if (status < 0) { |
977 | mlog(0, "status returned from ocfs2_meta_lock=%d\n", status); | 977 | mlog(0, "status returned from ocfs2_meta_lock=%d\n", status); |
978 | if (status != -ERESTARTSYS) | 978 | if (status != -ERESTARTSYS) |
979 | mlog(ML_ERROR, "Could not lock journal!\n"); | 979 | mlog(ML_ERROR, "Could not lock journal!\n"); |
980 | goto done; | 980 | goto done; |
981 | } | 981 | } |
982 | got_lock = 1; | 982 | got_lock = 1; |
983 | 983 | ||
984 | fe = (struct ocfs2_dinode *) bh->b_data; | 984 | fe = (struct ocfs2_dinode *) bh->b_data; |
985 | 985 | ||
986 | flags = le32_to_cpu(fe->id1.journal1.ij_flags); | 986 | flags = le32_to_cpu(fe->id1.journal1.ij_flags); |
987 | 987 | ||
988 | if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { | 988 | if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { |
989 | mlog(0, "No recovery required for node %d\n", node_num); | 989 | mlog(0, "No recovery required for node %d\n", node_num); |
990 | goto done; | 990 | goto done; |
991 | } | 991 | } |
992 | 992 | ||
993 | mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n", | 993 | mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n", |
994 | node_num, slot_num, | 994 | node_num, slot_num, |
995 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 995 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
996 | 996 | ||
997 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | 997 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); |
998 | 998 | ||
999 | status = ocfs2_force_read_journal(inode); | 999 | status = ocfs2_force_read_journal(inode); |
1000 | if (status < 0) { | 1000 | if (status < 0) { |
1001 | mlog_errno(status); | 1001 | mlog_errno(status); |
1002 | goto done; | 1002 | goto done; |
1003 | } | 1003 | } |
1004 | 1004 | ||
1005 | mlog(0, "calling journal_init_inode\n"); | 1005 | mlog(0, "calling journal_init_inode\n"); |
1006 | journal = journal_init_inode(inode); | 1006 | journal = journal_init_inode(inode); |
1007 | if (journal == NULL) { | 1007 | if (journal == NULL) { |
1008 | mlog(ML_ERROR, "Linux journal layer error\n"); | 1008 | mlog(ML_ERROR, "Linux journal layer error\n"); |
1009 | status = -EIO; | 1009 | status = -EIO; |
1010 | goto done; | 1010 | goto done; |
1011 | } | 1011 | } |
1012 | 1012 | ||
1013 | status = journal_load(journal); | 1013 | status = journal_load(journal); |
1014 | if (status < 0) { | 1014 | if (status < 0) { |
1015 | mlog_errno(status); | 1015 | mlog_errno(status); |
1016 | if (!igrab(inode)) | 1016 | if (!igrab(inode)) |
1017 | BUG(); | 1017 | BUG(); |
1018 | journal_destroy(journal); | 1018 | journal_destroy(journal); |
1019 | goto done; | 1019 | goto done; |
1020 | } | 1020 | } |
1021 | 1021 | ||
1022 | ocfs2_clear_journal_error(osb->sb, journal, slot_num); | 1022 | ocfs2_clear_journal_error(osb->sb, journal, slot_num); |
1023 | 1023 | ||
1024 | /* wipe the journal */ | 1024 | /* wipe the journal */ |
1025 | mlog(0, "flushing the journal.\n"); | 1025 | mlog(0, "flushing the journal.\n"); |
1026 | journal_lock_updates(journal); | 1026 | journal_lock_updates(journal); |
1027 | status = journal_flush(journal); | 1027 | status = journal_flush(journal); |
1028 | journal_unlock_updates(journal); | 1028 | journal_unlock_updates(journal); |
1029 | if (status < 0) | 1029 | if (status < 0) |
1030 | mlog_errno(status); | 1030 | mlog_errno(status); |
1031 | 1031 | ||
1032 | /* This will mark the node clean */ | 1032 | /* This will mark the node clean */ |
1033 | flags = le32_to_cpu(fe->id1.journal1.ij_flags); | 1033 | flags = le32_to_cpu(fe->id1.journal1.ij_flags); |
1034 | flags &= ~OCFS2_JOURNAL_DIRTY_FL; | 1034 | flags &= ~OCFS2_JOURNAL_DIRTY_FL; |
1035 | fe->id1.journal1.ij_flags = cpu_to_le32(flags); | 1035 | fe->id1.journal1.ij_flags = cpu_to_le32(flags); |
1036 | 1036 | ||
1037 | status = ocfs2_write_block(osb, bh, inode); | 1037 | status = ocfs2_write_block(osb, bh, inode); |
1038 | if (status < 0) | 1038 | if (status < 0) |
1039 | mlog_errno(status); | 1039 | mlog_errno(status); |
1040 | 1040 | ||
1041 | if (!igrab(inode)) | 1041 | if (!igrab(inode)) |
1042 | BUG(); | 1042 | BUG(); |
1043 | 1043 | ||
1044 | journal_destroy(journal); | 1044 | journal_destroy(journal); |
1045 | 1045 | ||
1046 | done: | 1046 | done: |
1047 | /* drop the lock on this nodes journal */ | 1047 | /* drop the lock on this nodes journal */ |
1048 | if (got_lock) | 1048 | if (got_lock) |
1049 | ocfs2_meta_unlock(inode, 1); | 1049 | ocfs2_meta_unlock(inode, 1); |
1050 | 1050 | ||
1051 | if (inode) | 1051 | if (inode) |
1052 | iput(inode); | 1052 | iput(inode); |
1053 | 1053 | ||
1054 | if (bh) | 1054 | if (bh) |
1055 | brelse(bh); | 1055 | brelse(bh); |
1056 | 1056 | ||
1057 | mlog_exit(status); | 1057 | mlog_exit(status); |
1058 | return status; | 1058 | return status; |
1059 | } | 1059 | } |
1060 | 1060 | ||
1061 | /* | 1061 | /* |
1062 | * Do the most important parts of node recovery: | 1062 | * Do the most important parts of node recovery: |
1063 | * - Replay it's journal | 1063 | * - Replay it's journal |
1064 | * - Stamp a clean local allocator file | 1064 | * - Stamp a clean local allocator file |
1065 | * - Stamp a clean truncate log | 1065 | * - Stamp a clean truncate log |
1066 | * - Mark the node clean | 1066 | * - Mark the node clean |
1067 | * | 1067 | * |
1068 | * If this function completes without error, a node in OCFS2 can be | 1068 | * If this function completes without error, a node in OCFS2 can be |
1069 | * said to have been safely recovered. As a result, failure during the | 1069 | * said to have been safely recovered. As a result, failure during the |
1070 | * second part of a nodes recovery process (local alloc recovery) is | 1070 | * second part of a nodes recovery process (local alloc recovery) is |
1071 | * far less concerning. | 1071 | * far less concerning. |
1072 | */ | 1072 | */ |
1073 | static int ocfs2_recover_node(struct ocfs2_super *osb, | 1073 | static int ocfs2_recover_node(struct ocfs2_super *osb, |
1074 | int node_num) | 1074 | int node_num) |
1075 | { | 1075 | { |
1076 | int status = 0; | 1076 | int status = 0; |
1077 | int slot_num; | 1077 | int slot_num; |
1078 | struct ocfs2_slot_info *si = osb->slot_info; | 1078 | struct ocfs2_slot_info *si = osb->slot_info; |
1079 | struct ocfs2_dinode *la_copy = NULL; | 1079 | struct ocfs2_dinode *la_copy = NULL; |
1080 | struct ocfs2_dinode *tl_copy = NULL; | 1080 | struct ocfs2_dinode *tl_copy = NULL; |
1081 | 1081 | ||
1082 | mlog_entry("(node_num=%d, osb->node_num = %d)\n", | 1082 | mlog_entry("(node_num=%d, osb->node_num = %d)\n", |
1083 | node_num, osb->node_num); | 1083 | node_num, osb->node_num); |
1084 | 1084 | ||
1085 | mlog(0, "checking node %d\n", node_num); | 1085 | mlog(0, "checking node %d\n", node_num); |
1086 | 1086 | ||
1087 | /* Should not ever be called to recover ourselves -- in that | 1087 | /* Should not ever be called to recover ourselves -- in that |
1088 | * case we should've called ocfs2_journal_load instead. */ | 1088 | * case we should've called ocfs2_journal_load instead. */ |
1089 | BUG_ON(osb->node_num == node_num); | 1089 | BUG_ON(osb->node_num == node_num); |
1090 | 1090 | ||
1091 | slot_num = ocfs2_node_num_to_slot(si, node_num); | 1091 | slot_num = ocfs2_node_num_to_slot(si, node_num); |
1092 | if (slot_num == OCFS2_INVALID_SLOT) { | 1092 | if (slot_num == OCFS2_INVALID_SLOT) { |
1093 | status = 0; | 1093 | status = 0; |
1094 | mlog(0, "no slot for this node, so no recovery required.\n"); | 1094 | mlog(0, "no slot for this node, so no recovery required.\n"); |
1095 | goto done; | 1095 | goto done; |
1096 | } | 1096 | } |
1097 | 1097 | ||
1098 | mlog(0, "node %d was using slot %d\n", node_num, slot_num); | 1098 | mlog(0, "node %d was using slot %d\n", node_num, slot_num); |
1099 | 1099 | ||
1100 | status = ocfs2_replay_journal(osb, node_num, slot_num); | 1100 | status = ocfs2_replay_journal(osb, node_num, slot_num); |
1101 | if (status < 0) { | 1101 | if (status < 0) { |
1102 | mlog_errno(status); | 1102 | mlog_errno(status); |
1103 | goto done; | 1103 | goto done; |
1104 | } | 1104 | } |
1105 | 1105 | ||
1106 | /* Stamp a clean local alloc file AFTER recovering the journal... */ | 1106 | /* Stamp a clean local alloc file AFTER recovering the journal... */ |
1107 | status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy); | 1107 | status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy); |
1108 | if (status < 0) { | 1108 | if (status < 0) { |
1109 | mlog_errno(status); | 1109 | mlog_errno(status); |
1110 | goto done; | 1110 | goto done; |
1111 | } | 1111 | } |
1112 | 1112 | ||
1113 | /* An error from begin_truncate_log_recovery is not | 1113 | /* An error from begin_truncate_log_recovery is not |
1114 | * serious enough to warrant halting the rest of | 1114 | * serious enough to warrant halting the rest of |
1115 | * recovery. */ | 1115 | * recovery. */ |
1116 | status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy); | 1116 | status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy); |
1117 | if (status < 0) | 1117 | if (status < 0) |
1118 | mlog_errno(status); | 1118 | mlog_errno(status); |
1119 | 1119 | ||
1120 | /* Likewise, this would be a strange but ultimately not so | 1120 | /* Likewise, this would be a strange but ultimately not so |
1121 | * harmful place to get an error... */ | 1121 | * harmful place to get an error... */ |
1122 | ocfs2_clear_slot(si, slot_num); | 1122 | ocfs2_clear_slot(si, slot_num); |
1123 | status = ocfs2_update_disk_slots(osb, si); | 1123 | status = ocfs2_update_disk_slots(osb, si); |
1124 | if (status < 0) | 1124 | if (status < 0) |
1125 | mlog_errno(status); | 1125 | mlog_errno(status); |
1126 | 1126 | ||
1127 | /* This will kfree the memory pointed to by la_copy and tl_copy */ | 1127 | /* This will kfree the memory pointed to by la_copy and tl_copy */ |
1128 | ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, | 1128 | ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, |
1129 | tl_copy); | 1129 | tl_copy); |
1130 | 1130 | ||
1131 | status = 0; | 1131 | status = 0; |
1132 | done: | 1132 | done: |
1133 | 1133 | ||
1134 | mlog_exit(status); | 1134 | mlog_exit(status); |
1135 | return status; | 1135 | return status; |
1136 | } | 1136 | } |
1137 | 1137 | ||
1138 | /* Test node liveness by trylocking his journal. If we get the lock, | 1138 | /* Test node liveness by trylocking his journal. If we get the lock, |
1139 | * we drop it here. Return 0 if we got the lock, -EAGAIN if node is | 1139 | * we drop it here. Return 0 if we got the lock, -EAGAIN if node is |
1140 | * still alive (we couldn't get the lock) and < 0 on error. */ | 1140 | * still alive (we couldn't get the lock) and < 0 on error. */ |
1141 | static int ocfs2_trylock_journal(struct ocfs2_super *osb, | 1141 | static int ocfs2_trylock_journal(struct ocfs2_super *osb, |
1142 | int slot_num) | 1142 | int slot_num) |
1143 | { | 1143 | { |
1144 | int status, flags; | 1144 | int status, flags; |
1145 | struct inode *inode = NULL; | 1145 | struct inode *inode = NULL; |
1146 | 1146 | ||
1147 | inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, | 1147 | inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, |
1148 | slot_num); | 1148 | slot_num); |
1149 | if (inode == NULL) { | 1149 | if (inode == NULL) { |
1150 | mlog(ML_ERROR, "access error\n"); | 1150 | mlog(ML_ERROR, "access error\n"); |
1151 | status = -EACCES; | 1151 | status = -EACCES; |
1152 | goto bail; | 1152 | goto bail; |
1153 | } | 1153 | } |
1154 | if (is_bad_inode(inode)) { | 1154 | if (is_bad_inode(inode)) { |
1155 | mlog(ML_ERROR, "access error (bad inode)\n"); | 1155 | mlog(ML_ERROR, "access error (bad inode)\n"); |
1156 | iput(inode); | 1156 | iput(inode); |
1157 | inode = NULL; | 1157 | inode = NULL; |
1158 | status = -EACCES; | 1158 | status = -EACCES; |
1159 | goto bail; | 1159 | goto bail; |
1160 | } | 1160 | } |
1161 | SET_INODE_JOURNAL(inode); | 1161 | SET_INODE_JOURNAL(inode); |
1162 | 1162 | ||
1163 | flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE; | 1163 | flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE; |
1164 | status = ocfs2_meta_lock_full(inode, NULL, 1, flags); | 1164 | status = ocfs2_meta_lock_full(inode, NULL, 1, flags); |
1165 | if (status < 0) { | 1165 | if (status < 0) { |
1166 | if (status != -EAGAIN) | 1166 | if (status != -EAGAIN) |
1167 | mlog_errno(status); | 1167 | mlog_errno(status); |
1168 | goto bail; | 1168 | goto bail; |
1169 | } | 1169 | } |
1170 | 1170 | ||
1171 | ocfs2_meta_unlock(inode, 1); | 1171 | ocfs2_meta_unlock(inode, 1); |
1172 | bail: | 1172 | bail: |
1173 | if (inode) | 1173 | if (inode) |
1174 | iput(inode); | 1174 | iput(inode); |
1175 | 1175 | ||
1176 | return status; | 1176 | return status; |
1177 | } | 1177 | } |
1178 | 1178 | ||
1179 | /* Call this underneath ocfs2_super_lock. It also assumes that the | 1179 | /* Call this underneath ocfs2_super_lock. It also assumes that the |
1180 | * slot info struct has been updated from disk. */ | 1180 | * slot info struct has been updated from disk. */ |
1181 | int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | 1181 | int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) |
1182 | { | 1182 | { |
1183 | int status, i, node_num; | 1183 | int status, i, node_num; |
1184 | struct ocfs2_slot_info *si = osb->slot_info; | 1184 | struct ocfs2_slot_info *si = osb->slot_info; |
1185 | 1185 | ||
1186 | /* This is called with the super block cluster lock, so we | 1186 | /* This is called with the super block cluster lock, so we |
1187 | * know that the slot map can't change underneath us. */ | 1187 | * know that the slot map can't change underneath us. */ |
1188 | 1188 | ||
1189 | spin_lock(&si->si_lock); | 1189 | spin_lock(&si->si_lock); |
1190 | for(i = 0; i < si->si_num_slots; i++) { | 1190 | for(i = 0; i < si->si_num_slots; i++) { |
1191 | if (i == osb->slot_num) | 1191 | if (i == osb->slot_num) |
1192 | continue; | 1192 | continue; |
1193 | if (ocfs2_is_empty_slot(si, i)) | 1193 | if (ocfs2_is_empty_slot(si, i)) |
1194 | continue; | 1194 | continue; |
1195 | 1195 | ||
1196 | node_num = si->si_global_node_nums[i]; | 1196 | node_num = si->si_global_node_nums[i]; |
1197 | if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num)) | 1197 | if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num)) |
1198 | continue; | 1198 | continue; |
1199 | spin_unlock(&si->si_lock); | 1199 | spin_unlock(&si->si_lock); |
1200 | 1200 | ||
1201 | /* Ok, we have a slot occupied by another node which | 1201 | /* Ok, we have a slot occupied by another node which |
1202 | * is not in the recovery map. We trylock his journal | 1202 | * is not in the recovery map. We trylock his journal |
1203 | * file here to test if he's alive. */ | 1203 | * file here to test if he's alive. */ |
1204 | status = ocfs2_trylock_journal(osb, i); | 1204 | status = ocfs2_trylock_journal(osb, i); |
1205 | if (!status) { | 1205 | if (!status) { |
1206 | /* Since we're called from mount, we know that | 1206 | /* Since we're called from mount, we know that |
1207 | * the recovery thread can't race us on | 1207 | * the recovery thread can't race us on |
1208 | * setting / checking the recovery bits. */ | 1208 | * setting / checking the recovery bits. */ |
1209 | ocfs2_recovery_thread(osb, node_num); | 1209 | ocfs2_recovery_thread(osb, node_num); |
1210 | } else if ((status < 0) && (status != -EAGAIN)) { | 1210 | } else if ((status < 0) && (status != -EAGAIN)) { |
1211 | mlog_errno(status); | 1211 | mlog_errno(status); |
1212 | goto bail; | 1212 | goto bail; |
1213 | } | 1213 | } |
1214 | 1214 | ||
1215 | spin_lock(&si->si_lock); | 1215 | spin_lock(&si->si_lock); |
1216 | } | 1216 | } |
1217 | spin_unlock(&si->si_lock); | 1217 | spin_unlock(&si->si_lock); |
1218 | 1218 | ||
1219 | status = 0; | 1219 | status = 0; |
1220 | bail: | 1220 | bail: |
1221 | mlog_exit(status); | 1221 | mlog_exit(status); |
1222 | return status; | 1222 | return status; |
1223 | } | 1223 | } |
1224 | 1224 | ||
1225 | static int ocfs2_queue_orphans(struct ocfs2_super *osb, | 1225 | static int ocfs2_queue_orphans(struct ocfs2_super *osb, |
1226 | int slot, | 1226 | int slot, |
1227 | struct inode **head) | 1227 | struct inode **head) |
1228 | { | 1228 | { |
1229 | int status; | 1229 | int status; |
1230 | struct inode *orphan_dir_inode = NULL; | 1230 | struct inode *orphan_dir_inode = NULL; |
1231 | struct inode *iter; | 1231 | struct inode *iter; |
1232 | unsigned long offset, blk, local; | 1232 | unsigned long offset, blk, local; |
1233 | struct buffer_head *bh = NULL; | 1233 | struct buffer_head *bh = NULL; |
1234 | struct ocfs2_dir_entry *de; | 1234 | struct ocfs2_dir_entry *de; |
1235 | struct super_block *sb = osb->sb; | 1235 | struct super_block *sb = osb->sb; |
1236 | 1236 | ||
1237 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 1237 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
1238 | ORPHAN_DIR_SYSTEM_INODE, | 1238 | ORPHAN_DIR_SYSTEM_INODE, |
1239 | slot); | 1239 | slot); |
1240 | if (!orphan_dir_inode) { | 1240 | if (!orphan_dir_inode) { |
1241 | status = -ENOENT; | 1241 | status = -ENOENT; |
1242 | mlog_errno(status); | 1242 | mlog_errno(status); |
1243 | return status; | 1243 | return status; |
1244 | } | 1244 | } |
1245 | 1245 | ||
1246 | mutex_lock(&orphan_dir_inode->i_mutex); | 1246 | mutex_lock(&orphan_dir_inode->i_mutex); |
1247 | status = ocfs2_meta_lock(orphan_dir_inode, NULL, 0); | 1247 | status = ocfs2_meta_lock(orphan_dir_inode, NULL, 0); |
1248 | if (status < 0) { | 1248 | if (status < 0) { |
1249 | mlog_errno(status); | 1249 | mlog_errno(status); |
1250 | goto out; | 1250 | goto out; |
1251 | } | 1251 | } |
1252 | 1252 | ||
1253 | offset = 0; | 1253 | offset = 0; |
1254 | iter = NULL; | 1254 | iter = NULL; |
1255 | while(offset < i_size_read(orphan_dir_inode)) { | 1255 | while(offset < i_size_read(orphan_dir_inode)) { |
1256 | blk = offset >> sb->s_blocksize_bits; | 1256 | blk = offset >> sb->s_blocksize_bits; |
1257 | 1257 | ||
1258 | bh = ocfs2_bread(orphan_dir_inode, blk, &status, 0); | 1258 | bh = ocfs2_bread(orphan_dir_inode, blk, &status, 0); |
1259 | if (!bh) | 1259 | if (!bh) |
1260 | status = -EINVAL; | 1260 | status = -EINVAL; |
1261 | if (status < 0) { | 1261 | if (status < 0) { |
1262 | if (bh) | 1262 | if (bh) |
1263 | brelse(bh); | 1263 | brelse(bh); |
1264 | mlog_errno(status); | 1264 | mlog_errno(status); |
1265 | goto out_unlock; | 1265 | goto out_unlock; |
1266 | } | 1266 | } |
1267 | 1267 | ||
1268 | local = 0; | 1268 | local = 0; |
1269 | while(offset < i_size_read(orphan_dir_inode) | 1269 | while(offset < i_size_read(orphan_dir_inode) |
1270 | && local < sb->s_blocksize) { | 1270 | && local < sb->s_blocksize) { |
1271 | de = (struct ocfs2_dir_entry *) (bh->b_data + local); | 1271 | de = (struct ocfs2_dir_entry *) (bh->b_data + local); |
1272 | 1272 | ||
1273 | if (!ocfs2_check_dir_entry(orphan_dir_inode, | 1273 | if (!ocfs2_check_dir_entry(orphan_dir_inode, |
1274 | de, bh, local)) { | 1274 | de, bh, local)) { |
1275 | status = -EINVAL; | 1275 | status = -EINVAL; |
1276 | mlog_errno(status); | 1276 | mlog_errno(status); |
1277 | brelse(bh); | 1277 | brelse(bh); |
1278 | goto out_unlock; | 1278 | goto out_unlock; |
1279 | } | 1279 | } |
1280 | 1280 | ||
1281 | local += le16_to_cpu(de->rec_len); | 1281 | local += le16_to_cpu(de->rec_len); |
1282 | offset += le16_to_cpu(de->rec_len); | 1282 | offset += le16_to_cpu(de->rec_len); |
1283 | 1283 | ||
1284 | /* I guess we silently fail on no inode? */ | 1284 | /* I guess we silently fail on no inode? */ |
1285 | if (!le64_to_cpu(de->inode)) | 1285 | if (!le64_to_cpu(de->inode)) |
1286 | continue; | 1286 | continue; |
1287 | if (de->file_type > OCFS2_FT_MAX) { | 1287 | if (de->file_type > OCFS2_FT_MAX) { |
1288 | mlog(ML_ERROR, | 1288 | mlog(ML_ERROR, |
1289 | "block %llu contains invalid de: " | 1289 | "block %llu contains invalid de: " |
1290 | "inode = %llu, rec_len = %u, " | 1290 | "inode = %llu, rec_len = %u, " |
1291 | "name_len = %u, file_type = %u, " | 1291 | "name_len = %u, file_type = %u, " |
1292 | "name='%.*s'\n", | 1292 | "name='%.*s'\n", |
1293 | (unsigned long long)bh->b_blocknr, | 1293 | (unsigned long long)bh->b_blocknr, |
1294 | (unsigned long long)le64_to_cpu(de->inode), | 1294 | (unsigned long long)le64_to_cpu(de->inode), |
1295 | le16_to_cpu(de->rec_len), | 1295 | le16_to_cpu(de->rec_len), |
1296 | de->name_len, | 1296 | de->name_len, |
1297 | de->file_type, | 1297 | de->file_type, |
1298 | de->name_len, | 1298 | de->name_len, |
1299 | de->name); | 1299 | de->name); |
1300 | continue; | 1300 | continue; |
1301 | } | 1301 | } |
1302 | if (de->name_len == 1 && !strncmp(".", de->name, 1)) | 1302 | if (de->name_len == 1 && !strncmp(".", de->name, 1)) |
1303 | continue; | 1303 | continue; |
1304 | if (de->name_len == 2 && !strncmp("..", de->name, 2)) | 1304 | if (de->name_len == 2 && !strncmp("..", de->name, 2)) |
1305 | continue; | 1305 | continue; |
1306 | 1306 | ||
1307 | iter = ocfs2_iget(osb, le64_to_cpu(de->inode), | 1307 | iter = ocfs2_iget(osb, le64_to_cpu(de->inode), |
1308 | OCFS2_FI_FLAG_ORPHAN_RECOVERY); | 1308 | OCFS2_FI_FLAG_ORPHAN_RECOVERY); |
1309 | if (IS_ERR(iter)) | 1309 | if (IS_ERR(iter)) |
1310 | continue; | 1310 | continue; |
1311 | 1311 | ||
1312 | mlog(0, "queue orphan %llu\n", | 1312 | mlog(0, "queue orphan %llu\n", |
1313 | (unsigned long long)OCFS2_I(iter)->ip_blkno); | 1313 | (unsigned long long)OCFS2_I(iter)->ip_blkno); |
1314 | /* No locking is required for the next_orphan | 1314 | /* No locking is required for the next_orphan |
1315 | * queue as there is only ever a single | 1315 | * queue as there is only ever a single |
1316 | * process doing orphan recovery. */ | 1316 | * process doing orphan recovery. */ |
1317 | OCFS2_I(iter)->ip_next_orphan = *head; | 1317 | OCFS2_I(iter)->ip_next_orphan = *head; |
1318 | *head = iter; | 1318 | *head = iter; |
1319 | } | 1319 | } |
1320 | brelse(bh); | 1320 | brelse(bh); |
1321 | } | 1321 | } |
1322 | 1322 | ||
1323 | out_unlock: | 1323 | out_unlock: |
1324 | ocfs2_meta_unlock(orphan_dir_inode, 0); | 1324 | ocfs2_meta_unlock(orphan_dir_inode, 0); |
1325 | out: | 1325 | out: |
1326 | mutex_unlock(&orphan_dir_inode->i_mutex); | 1326 | mutex_unlock(&orphan_dir_inode->i_mutex); |
1327 | iput(orphan_dir_inode); | 1327 | iput(orphan_dir_inode); |
1328 | return status; | 1328 | return status; |
1329 | } | 1329 | } |
1330 | 1330 | ||
1331 | static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb, | 1331 | static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb, |
1332 | int slot) | 1332 | int slot) |
1333 | { | 1333 | { |
1334 | int ret; | 1334 | int ret; |
1335 | 1335 | ||
1336 | spin_lock(&osb->osb_lock); | 1336 | spin_lock(&osb->osb_lock); |
1337 | ret = !osb->osb_orphan_wipes[slot]; | 1337 | ret = !osb->osb_orphan_wipes[slot]; |
1338 | spin_unlock(&osb->osb_lock); | 1338 | spin_unlock(&osb->osb_lock); |
1339 | return ret; | 1339 | return ret; |
1340 | } | 1340 | } |
1341 | 1341 | ||
1342 | static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb, | 1342 | static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb, |
1343 | int slot) | 1343 | int slot) |
1344 | { | 1344 | { |
1345 | spin_lock(&osb->osb_lock); | 1345 | spin_lock(&osb->osb_lock); |
1346 | /* Mark ourselves such that new processes in delete_inode() | 1346 | /* Mark ourselves such that new processes in delete_inode() |
1347 | * know to quit early. */ | 1347 | * know to quit early. */ |
1348 | ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot); | 1348 | ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot); |
1349 | while (osb->osb_orphan_wipes[slot]) { | 1349 | while (osb->osb_orphan_wipes[slot]) { |
1350 | /* If any processes are already in the middle of an | 1350 | /* If any processes are already in the middle of an |
1351 | * orphan wipe on this dir, then we need to wait for | 1351 | * orphan wipe on this dir, then we need to wait for |
1352 | * them. */ | 1352 | * them. */ |
1353 | spin_unlock(&osb->osb_lock); | 1353 | spin_unlock(&osb->osb_lock); |
1354 | wait_event_interruptible(osb->osb_wipe_event, | 1354 | wait_event_interruptible(osb->osb_wipe_event, |
1355 | ocfs2_orphan_recovery_can_continue(osb, slot)); | 1355 | ocfs2_orphan_recovery_can_continue(osb, slot)); |
1356 | spin_lock(&osb->osb_lock); | 1356 | spin_lock(&osb->osb_lock); |
1357 | } | 1357 | } |
1358 | spin_unlock(&osb->osb_lock); | 1358 | spin_unlock(&osb->osb_lock); |
1359 | } | 1359 | } |
1360 | 1360 | ||
1361 | static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb, | 1361 | static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb, |
1362 | int slot) | 1362 | int slot) |
1363 | { | 1363 | { |
1364 | ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot); | 1364 | ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot); |
1365 | } | 1365 | } |
1366 | 1366 | ||
1367 | /* | 1367 | /* |
1368 | * Orphan recovery. Each mounted node has it's own orphan dir which we | 1368 | * Orphan recovery. Each mounted node has it's own orphan dir which we |
1369 | * must run during recovery. Our strategy here is to build a list of | 1369 | * must run during recovery. Our strategy here is to build a list of |
1370 | * the inodes in the orphan dir and iget/iput them. The VFS does | 1370 | * the inodes in the orphan dir and iget/iput them. The VFS does |
1371 | * (most) of the rest of the work. | 1371 | * (most) of the rest of the work. |
1372 | * | 1372 | * |
1373 | * Orphan recovery can happen at any time, not just mount so we have a | 1373 | * Orphan recovery can happen at any time, not just mount so we have a |
1374 | * couple of extra considerations. | 1374 | * couple of extra considerations. |
1375 | * | 1375 | * |
1376 | * - We grab as many inodes as we can under the orphan dir lock - | 1376 | * - We grab as many inodes as we can under the orphan dir lock - |
1377 | * doing iget() outside the orphan dir risks getting a reference on | 1377 | * doing iget() outside the orphan dir risks getting a reference on |
1378 | * an invalid inode. | 1378 | * an invalid inode. |
1379 | * - We must be sure not to deadlock with other processes on the | 1379 | * - We must be sure not to deadlock with other processes on the |
1380 | * system wanting to run delete_inode(). This can happen when they go | 1380 | * system wanting to run delete_inode(). This can happen when they go |
1381 | * to lock the orphan dir and the orphan recovery process attempts to | 1381 | * to lock the orphan dir and the orphan recovery process attempts to |
1382 | * iget() inside the orphan dir lock. This can be avoided by | 1382 | * iget() inside the orphan dir lock. This can be avoided by |
1383 | * advertising our state to ocfs2_delete_inode(). | 1383 | * advertising our state to ocfs2_delete_inode(). |
1384 | */ | 1384 | */ |
1385 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, | 1385 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, |
1386 | int slot) | 1386 | int slot) |
1387 | { | 1387 | { |
1388 | int ret = 0; | 1388 | int ret = 0; |
1389 | struct inode *inode = NULL; | 1389 | struct inode *inode = NULL; |
1390 | struct inode *iter; | 1390 | struct inode *iter; |
1391 | struct ocfs2_inode_info *oi; | 1391 | struct ocfs2_inode_info *oi; |
1392 | 1392 | ||
1393 | mlog(0, "Recover inodes from orphan dir in slot %d\n", slot); | 1393 | mlog(0, "Recover inodes from orphan dir in slot %d\n", slot); |
1394 | 1394 | ||
1395 | ocfs2_mark_recovering_orphan_dir(osb, slot); | 1395 | ocfs2_mark_recovering_orphan_dir(osb, slot); |
1396 | ret = ocfs2_queue_orphans(osb, slot, &inode); | 1396 | ret = ocfs2_queue_orphans(osb, slot, &inode); |
1397 | ocfs2_clear_recovering_orphan_dir(osb, slot); | 1397 | ocfs2_clear_recovering_orphan_dir(osb, slot); |
1398 | 1398 | ||
1399 | /* Error here should be noted, but we want to continue with as | 1399 | /* Error here should be noted, but we want to continue with as |
1400 | * many queued inodes as we've got. */ | 1400 | * many queued inodes as we've got. */ |
1401 | if (ret) | 1401 | if (ret) |
1402 | mlog_errno(ret); | 1402 | mlog_errno(ret); |
1403 | 1403 | ||
1404 | while (inode) { | 1404 | while (inode) { |
1405 | oi = OCFS2_I(inode); | 1405 | oi = OCFS2_I(inode); |
1406 | mlog(0, "iput orphan %llu\n", (unsigned long long)oi->ip_blkno); | 1406 | mlog(0, "iput orphan %llu\n", (unsigned long long)oi->ip_blkno); |
1407 | 1407 | ||
1408 | iter = oi->ip_next_orphan; | 1408 | iter = oi->ip_next_orphan; |
1409 | 1409 | ||
1410 | spin_lock(&oi->ip_lock); | 1410 | spin_lock(&oi->ip_lock); |
1411 | /* Delete voting may have set these on the assumption | 1411 | /* Delete voting may have set these on the assumption |
1412 | * that the other node would wipe them successfully. | 1412 | * that the other node would wipe them successfully. |
1413 | * If they are still in the node's orphan dir, we need | 1413 | * If they are still in the node's orphan dir, we need |
1414 | * to reset that state. */ | 1414 | * to reset that state. */ |
1415 | oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); | 1415 | oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); |
1416 | 1416 | ||
1417 | /* Set the proper information to get us going into | 1417 | /* Set the proper information to get us going into |
1418 | * ocfs2_delete_inode. */ | 1418 | * ocfs2_delete_inode. */ |
1419 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; | 1419 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; |
1420 | spin_unlock(&oi->ip_lock); | 1420 | spin_unlock(&oi->ip_lock); |
1421 | 1421 | ||
1422 | iput(inode); | 1422 | iput(inode); |
1423 | 1423 | ||
1424 | inode = iter; | 1424 | inode = iter; |
1425 | } | 1425 | } |
1426 | 1426 | ||
1427 | return ret; | 1427 | return ret; |
1428 | } | 1428 | } |
1429 | 1429 | ||
1430 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb) | 1430 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb) |
1431 | { | 1431 | { |
1432 | /* This check is good because ocfs2 will wait on our recovery | 1432 | /* This check is good because ocfs2 will wait on our recovery |
1433 | * thread before changing it to something other than MOUNTED | 1433 | * thread before changing it to something other than MOUNTED |
1434 | * or DISABLED. */ | 1434 | * or DISABLED. */ |
1435 | wait_event(osb->osb_mount_event, | 1435 | wait_event(osb->osb_mount_event, |
1436 | atomic_read(&osb->vol_state) == VOLUME_MOUNTED || | 1436 | atomic_read(&osb->vol_state) == VOLUME_MOUNTED || |
1437 | atomic_read(&osb->vol_state) == VOLUME_DISABLED); | 1437 | atomic_read(&osb->vol_state) == VOLUME_DISABLED); |
1438 | 1438 | ||
1439 | /* If there's an error on mount, then we may never get to the | 1439 | /* If there's an error on mount, then we may never get to the |
1440 | * MOUNTED flag, but this is set right before | 1440 | * MOUNTED flag, but this is set right before |
1441 | * dismount_volume() so we can trust it. */ | 1441 | * dismount_volume() so we can trust it. */ |
1442 | if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) { | 1442 | if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) { |
1443 | mlog(0, "mount error, exiting!\n"); | 1443 | mlog(0, "mount error, exiting!\n"); |
1444 | return -EBUSY; | 1444 | return -EBUSY; |
1445 | } | 1445 | } |
1446 | 1446 | ||
1447 | return 0; | 1447 | return 0; |
1448 | } | 1448 | } |
1449 | 1449 | ||
1450 | static int ocfs2_commit_thread(void *arg) | 1450 | static int ocfs2_commit_thread(void *arg) |
1451 | { | 1451 | { |
1452 | int status; | 1452 | int status; |
1453 | struct ocfs2_super *osb = arg; | 1453 | struct ocfs2_super *osb = arg; |
1454 | struct ocfs2_journal *journal = osb->journal; | 1454 | struct ocfs2_journal *journal = osb->journal; |
1455 | 1455 | ||
1456 | /* we can trust j_num_trans here because _should_stop() is only set in | 1456 | /* we can trust j_num_trans here because _should_stop() is only set in |
1457 | * shutdown and nobody other than ourselves should be able to start | 1457 | * shutdown and nobody other than ourselves should be able to start |
1458 | * transactions. committing on shutdown might take a few iterations | 1458 | * transactions. committing on shutdown might take a few iterations |
1459 | * as final transactions put deleted inodes on the list */ | 1459 | * as final transactions put deleted inodes on the list */ |
1460 | while (!(kthread_should_stop() && | 1460 | while (!(kthread_should_stop() && |
1461 | atomic_read(&journal->j_num_trans) == 0)) { | 1461 | atomic_read(&journal->j_num_trans) == 0)) { |
1462 | 1462 | ||
1463 | wait_event_interruptible(osb->checkpoint_event, | 1463 | wait_event_interruptible(osb->checkpoint_event, |
1464 | atomic_read(&journal->j_num_trans) | 1464 | atomic_read(&journal->j_num_trans) |
1465 | || kthread_should_stop()); | 1465 | || kthread_should_stop()); |
1466 | 1466 | ||
1467 | status = ocfs2_commit_cache(osb); | 1467 | status = ocfs2_commit_cache(osb); |
1468 | if (status < 0) | 1468 | if (status < 0) |
1469 | mlog_errno(status); | 1469 | mlog_errno(status); |
1470 | 1470 | ||
1471 | if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){ | 1471 | if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){ |
1472 | mlog(ML_KTHREAD, | 1472 | mlog(ML_KTHREAD, |
1473 | "commit_thread: %u transactions pending on " | 1473 | "commit_thread: %u transactions pending on " |
1474 | "shutdown\n", | 1474 | "shutdown\n", |
1475 | atomic_read(&journal->j_num_trans)); | 1475 | atomic_read(&journal->j_num_trans)); |
1476 | } | 1476 | } |
1477 | } | 1477 | } |
1478 | 1478 | ||
1479 | return 0; | 1479 | return 0; |
1480 | } | 1480 | } |
1481 | 1481 | ||
1482 | /* Look for a dirty journal without taking any cluster locks. Used for | 1482 | /* Look for a dirty journal without taking any cluster locks. Used for |
1483 | * hard readonly access to determine whether the file system journals | 1483 | * hard readonly access to determine whether the file system journals |
1484 | * require recovery. */ | 1484 | * require recovery. */ |
1485 | int ocfs2_check_journals_nolocks(struct ocfs2_super *osb) | 1485 | int ocfs2_check_journals_nolocks(struct ocfs2_super *osb) |
1486 | { | 1486 | { |
1487 | int ret = 0; | 1487 | int ret = 0; |
1488 | unsigned int slot; | 1488 | unsigned int slot; |
1489 | struct buffer_head *di_bh; | 1489 | struct buffer_head *di_bh; |
1490 | struct ocfs2_dinode *di; | 1490 | struct ocfs2_dinode *di; |
1491 | struct inode *journal = NULL; | 1491 | struct inode *journal = NULL; |
1492 | 1492 | ||
1493 | for(slot = 0; slot < osb->max_slots; slot++) { | 1493 | for(slot = 0; slot < osb->max_slots; slot++) { |
1494 | journal = ocfs2_get_system_file_inode(osb, | 1494 | journal = ocfs2_get_system_file_inode(osb, |
1495 | JOURNAL_SYSTEM_INODE, | 1495 | JOURNAL_SYSTEM_INODE, |
1496 | slot); | 1496 | slot); |
1497 | if (!journal || is_bad_inode(journal)) { | 1497 | if (!journal || is_bad_inode(journal)) { |
1498 | ret = -EACCES; | 1498 | ret = -EACCES; |
1499 | mlog_errno(ret); | 1499 | mlog_errno(ret); |
1500 | goto out; | 1500 | goto out; |
1501 | } | 1501 | } |
1502 | 1502 | ||
1503 | di_bh = NULL; | 1503 | di_bh = NULL; |
1504 | ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh, | 1504 | ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh, |
1505 | 0, journal); | 1505 | 0, journal); |
1506 | if (ret < 0) { | 1506 | if (ret < 0) { |
1507 | mlog_errno(ret); | 1507 | mlog_errno(ret); |
1508 | goto out; | 1508 | goto out; |
1509 | } | 1509 | } |
1510 | 1510 | ||
1511 | di = (struct ocfs2_dinode *) di_bh->b_data; | 1511 | di = (struct ocfs2_dinode *) di_bh->b_data; |
1512 | 1512 | ||
1513 | if (le32_to_cpu(di->id1.journal1.ij_flags) & | 1513 | if (le32_to_cpu(di->id1.journal1.ij_flags) & |
1514 | OCFS2_JOURNAL_DIRTY_FL) | 1514 | OCFS2_JOURNAL_DIRTY_FL) |
1515 | ret = -EROFS; | 1515 | ret = -EROFS; |
1516 | 1516 | ||
1517 | brelse(di_bh); | 1517 | brelse(di_bh); |
1518 | if (ret) | 1518 | if (ret) |
1519 | break; | 1519 | break; |
1520 | } | 1520 | } |
1521 | 1521 | ||
1522 | out: | 1522 | out: |
1523 | if (journal) | 1523 | if (journal) |
1524 | iput(journal); | 1524 | iput(journal); |
1525 | 1525 | ||
1526 | return ret; | 1526 | return ret; |
1527 | } | 1527 | } |
1528 | 1528 |
fs/ocfs2/namei.c
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * namei.c | 4 | * namei.c |
5 | * | 5 | * |
6 | * Create and rename file, directory, symlinks | 6 | * Create and rename file, directory, symlinks |
7 | * | 7 | * |
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. |
9 | * | 9 | * |
10 | * Portions of this code from linux/fs/ext3/dir.c | 10 | * Portions of this code from linux/fs/ext3/dir.c |
11 | * | 11 | * |
12 | * Copyright (C) 1992, 1993, 1994, 1995 | 12 | * Copyright (C) 1992, 1993, 1994, 1995 |
13 | * Remy Card (card@masi.ibp.fr) | 13 | * Remy Card (card@masi.ibp.fr) |
14 | * Laboratoire MASI - Institut Blaise pascal | 14 | * Laboratoire MASI - Institut Blaise pascal |
15 | * Universite Pierre et Marie Curie (Paris VI) | 15 | * Universite Pierre et Marie Curie (Paris VI) |
16 | * | 16 | * |
17 | * from | 17 | * from |
18 | * | 18 | * |
19 | * linux/fs/minix/dir.c | 19 | * linux/fs/minix/dir.c |
20 | * | 20 | * |
21 | * Copyright (C) 1991, 1992 Linux Torvalds | 21 | * Copyright (C) 1991, 1992 Linux Torvalds |
22 | * | 22 | * |
23 | * This program is free software; you can redistribute it and/or | 23 | * This program is free software; you can redistribute it and/or |
24 | * modify it under the terms of the GNU General Public | 24 | * modify it under the terms of the GNU General Public |
25 | * License as published by the Free Software Foundation; either | 25 | * License as published by the Free Software Foundation; either |
26 | * version 2 of the License, or (at your option) any later version. | 26 | * version 2 of the License, or (at your option) any later version. |
27 | * | 27 | * |
28 | * This program is distributed in the hope that it will be useful, | 28 | * This program is distributed in the hope that it will be useful, |
29 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 29 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
30 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 30 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
31 | * General Public License for more details. | 31 | * General Public License for more details. |
32 | * | 32 | * |
33 | * You should have received a copy of the GNU General Public | 33 | * You should have received a copy of the GNU General Public |
34 | * License along with this program; if not, write to the | 34 | * License along with this program; if not, write to the |
35 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 35 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
36 | * Boston, MA 021110-1307, USA. | 36 | * Boston, MA 021110-1307, USA. |
37 | */ | 37 | */ |
38 | 38 | ||
39 | #include <linux/fs.h> | 39 | #include <linux/fs.h> |
40 | #include <linux/types.h> | 40 | #include <linux/types.h> |
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/highmem.h> | 42 | #include <linux/highmem.h> |
43 | 43 | ||
44 | #define MLOG_MASK_PREFIX ML_NAMEI | 44 | #define MLOG_MASK_PREFIX ML_NAMEI |
45 | #include <cluster/masklog.h> | 45 | #include <cluster/masklog.h> |
46 | 46 | ||
47 | #include "ocfs2.h" | 47 | #include "ocfs2.h" |
48 | 48 | ||
49 | #include "alloc.h" | 49 | #include "alloc.h" |
50 | #include "dcache.h" | 50 | #include "dcache.h" |
51 | #include "dir.h" | 51 | #include "dir.h" |
52 | #include "dlmglue.h" | 52 | #include "dlmglue.h" |
53 | #include "extent_map.h" | 53 | #include "extent_map.h" |
54 | #include "file.h" | 54 | #include "file.h" |
55 | #include "inode.h" | 55 | #include "inode.h" |
56 | #include "journal.h" | 56 | #include "journal.h" |
57 | #include "namei.h" | 57 | #include "namei.h" |
58 | #include "suballoc.h" | 58 | #include "suballoc.h" |
59 | #include "super.h" | 59 | #include "super.h" |
60 | #include "symlink.h" | 60 | #include "symlink.h" |
61 | #include "sysfile.h" | 61 | #include "sysfile.h" |
62 | #include "uptodate.h" | 62 | #include "uptodate.h" |
63 | #include "vote.h" | 63 | #include "vote.h" |
64 | 64 | ||
65 | #include "buffer_head_io.h" | 65 | #include "buffer_head_io.h" |
66 | 66 | ||
67 | #define NAMEI_RA_CHUNKS 2 | 67 | #define NAMEI_RA_CHUNKS 2 |
68 | #define NAMEI_RA_BLOCKS 4 | 68 | #define NAMEI_RA_BLOCKS 4 |
69 | #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) | 69 | #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) |
70 | #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) | 70 | #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) |
71 | 71 | ||
72 | static int inline ocfs2_search_dirblock(struct buffer_head *bh, | 72 | static int inline ocfs2_search_dirblock(struct buffer_head *bh, |
73 | struct inode *dir, | 73 | struct inode *dir, |
74 | const char *name, int namelen, | 74 | const char *name, int namelen, |
75 | unsigned long offset, | 75 | unsigned long offset, |
76 | struct ocfs2_dir_entry **res_dir); | 76 | struct ocfs2_dir_entry **res_dir); |
77 | 77 | ||
78 | static int ocfs2_delete_entry(handle_t *handle, | 78 | static int ocfs2_delete_entry(handle_t *handle, |
79 | struct inode *dir, | 79 | struct inode *dir, |
80 | struct ocfs2_dir_entry *de_del, | 80 | struct ocfs2_dir_entry *de_del, |
81 | struct buffer_head *bh); | 81 | struct buffer_head *bh); |
82 | 82 | ||
83 | static int __ocfs2_add_entry(handle_t *handle, | 83 | static int __ocfs2_add_entry(handle_t *handle, |
84 | struct inode *dir, | 84 | struct inode *dir, |
85 | const char *name, int namelen, | 85 | const char *name, int namelen, |
86 | struct inode *inode, u64 blkno, | 86 | struct inode *inode, u64 blkno, |
87 | struct buffer_head *parent_fe_bh, | 87 | struct buffer_head *parent_fe_bh, |
88 | struct buffer_head *insert_bh); | 88 | struct buffer_head *insert_bh); |
89 | 89 | ||
90 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, | 90 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, |
91 | struct inode *dir, | 91 | struct inode *dir, |
92 | struct dentry *dentry, int mode, | 92 | struct dentry *dentry, int mode, |
93 | dev_t dev, | 93 | dev_t dev, |
94 | struct buffer_head **new_fe_bh, | 94 | struct buffer_head **new_fe_bh, |
95 | struct buffer_head *parent_fe_bh, | 95 | struct buffer_head *parent_fe_bh, |
96 | handle_t *handle, | 96 | handle_t *handle, |
97 | struct inode **ret_inode, | 97 | struct inode **ret_inode, |
98 | struct ocfs2_alloc_context *inode_ac); | 98 | struct ocfs2_alloc_context *inode_ac); |
99 | 99 | ||
100 | static int ocfs2_fill_new_dir(struct ocfs2_super *osb, | 100 | static int ocfs2_fill_new_dir(struct ocfs2_super *osb, |
101 | handle_t *handle, | 101 | handle_t *handle, |
102 | struct inode *parent, | 102 | struct inode *parent, |
103 | struct inode *inode, | 103 | struct inode *inode, |
104 | struct buffer_head *fe_bh, | 104 | struct buffer_head *fe_bh, |
105 | struct ocfs2_alloc_context *data_ac); | 105 | struct ocfs2_alloc_context *data_ac); |
106 | 106 | ||
107 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | 107 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, |
108 | struct inode **ret_orphan_dir, | 108 | struct inode **ret_orphan_dir, |
109 | struct inode *inode, | 109 | struct inode *inode, |
110 | char *name, | 110 | char *name, |
111 | struct buffer_head **de_bh); | 111 | struct buffer_head **de_bh); |
112 | 112 | ||
113 | static int ocfs2_orphan_add(struct ocfs2_super *osb, | 113 | static int ocfs2_orphan_add(struct ocfs2_super *osb, |
114 | handle_t *handle, | 114 | handle_t *handle, |
115 | struct inode *inode, | 115 | struct inode *inode, |
116 | struct ocfs2_dinode *fe, | 116 | struct ocfs2_dinode *fe, |
117 | char *name, | 117 | char *name, |
118 | struct buffer_head *de_bh, | 118 | struct buffer_head *de_bh, |
119 | struct inode *orphan_dir_inode); | 119 | struct inode *orphan_dir_inode); |
120 | 120 | ||
121 | static int ocfs2_create_symlink_data(struct ocfs2_super *osb, | 121 | static int ocfs2_create_symlink_data(struct ocfs2_super *osb, |
122 | handle_t *handle, | 122 | handle_t *handle, |
123 | struct inode *inode, | 123 | struct inode *inode, |
124 | const char *symname); | 124 | const char *symname); |
125 | 125 | ||
126 | static inline int ocfs2_add_entry(handle_t *handle, | 126 | static inline int ocfs2_add_entry(handle_t *handle, |
127 | struct dentry *dentry, | 127 | struct dentry *dentry, |
128 | struct inode *inode, u64 blkno, | 128 | struct inode *inode, u64 blkno, |
129 | struct buffer_head *parent_fe_bh, | 129 | struct buffer_head *parent_fe_bh, |
130 | struct buffer_head *insert_bh) | 130 | struct buffer_head *insert_bh) |
131 | { | 131 | { |
132 | return __ocfs2_add_entry(handle, dentry->d_parent->d_inode, | 132 | return __ocfs2_add_entry(handle, dentry->d_parent->d_inode, |
133 | dentry->d_name.name, dentry->d_name.len, | 133 | dentry->d_name.name, dentry->d_name.len, |
134 | inode, blkno, parent_fe_bh, insert_bh); | 134 | inode, blkno, parent_fe_bh, insert_bh); |
135 | } | 135 | } |
136 | 136 | ||
137 | /* An orphan dir name is an 8 byte value, printed as a hex string */ | 137 | /* An orphan dir name is an 8 byte value, printed as a hex string */ |
138 | #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) | 138 | #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) |
139 | 139 | ||
140 | static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, | 140 | static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, |
141 | struct nameidata *nd) | 141 | struct nameidata *nd) |
142 | { | 142 | { |
143 | int status; | 143 | int status; |
144 | u64 blkno; | 144 | u64 blkno; |
145 | struct buffer_head *dirent_bh = NULL; | 145 | struct buffer_head *dirent_bh = NULL; |
146 | struct inode *inode = NULL; | 146 | struct inode *inode = NULL; |
147 | struct dentry *ret; | 147 | struct dentry *ret; |
148 | struct ocfs2_dir_entry *dirent; | 148 | struct ocfs2_dir_entry *dirent; |
149 | struct ocfs2_inode_info *oi; | 149 | struct ocfs2_inode_info *oi; |
150 | 150 | ||
151 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, | 151 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, |
152 | dentry->d_name.len, dentry->d_name.name); | 152 | dentry->d_name.len, dentry->d_name.name); |
153 | 153 | ||
154 | if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) { | 154 | if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) { |
155 | ret = ERR_PTR(-ENAMETOOLONG); | 155 | ret = ERR_PTR(-ENAMETOOLONG); |
156 | goto bail; | 156 | goto bail; |
157 | } | 157 | } |
158 | 158 | ||
159 | mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, | 159 | mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, |
160 | dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); | 160 | dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); |
161 | 161 | ||
162 | status = ocfs2_meta_lock(dir, NULL, 0); | 162 | status = ocfs2_meta_lock(dir, NULL, 0); |
163 | if (status < 0) { | 163 | if (status < 0) { |
164 | if (status != -ENOENT) | 164 | if (status != -ENOENT) |
165 | mlog_errno(status); | 165 | mlog_errno(status); |
166 | ret = ERR_PTR(status); | 166 | ret = ERR_PTR(status); |
167 | goto bail; | 167 | goto bail; |
168 | } | 168 | } |
169 | 169 | ||
170 | status = ocfs2_find_files_on_disk(dentry->d_name.name, | 170 | status = ocfs2_find_files_on_disk(dentry->d_name.name, |
171 | dentry->d_name.len, &blkno, | 171 | dentry->d_name.len, &blkno, |
172 | dir, &dirent_bh, &dirent); | 172 | dir, &dirent_bh, &dirent); |
173 | if (status < 0) | 173 | if (status < 0) |
174 | goto bail_add; | 174 | goto bail_add; |
175 | 175 | ||
176 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0); | 176 | inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0); |
177 | if (IS_ERR(inode)) { | 177 | if (IS_ERR(inode)) { |
178 | ret = ERR_PTR(-EACCES); | 178 | ret = ERR_PTR(-EACCES); |
179 | goto bail_unlock; | 179 | goto bail_unlock; |
180 | } | 180 | } |
181 | 181 | ||
182 | oi = OCFS2_I(inode); | 182 | oi = OCFS2_I(inode); |
183 | /* Clear any orphaned state... If we were able to look up the | 183 | /* Clear any orphaned state... If we were able to look up the |
184 | * inode from a directory, it certainly can't be orphaned. We | 184 | * inode from a directory, it certainly can't be orphaned. We |
185 | * might have the bad state from a node which intended to | 185 | * might have the bad state from a node which intended to |
186 | * orphan this inode but crashed before it could commit the | 186 | * orphan this inode but crashed before it could commit the |
187 | * unlink. */ | 187 | * unlink. */ |
188 | spin_lock(&oi->ip_lock); | 188 | spin_lock(&oi->ip_lock); |
189 | oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED; | 189 | oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED; |
190 | spin_unlock(&oi->ip_lock); | 190 | spin_unlock(&oi->ip_lock); |
191 | 191 | ||
192 | bail_add: | 192 | bail_add: |
193 | dentry->d_op = &ocfs2_dentry_ops; | 193 | dentry->d_op = &ocfs2_dentry_ops; |
194 | ret = d_splice_alias(inode, dentry); | 194 | ret = d_splice_alias(inode, dentry); |
195 | 195 | ||
196 | if (inode) { | 196 | if (inode) { |
197 | /* | 197 | /* |
198 | * If d_splice_alias() finds a DCACHE_DISCONNECTED | 198 | * If d_splice_alias() finds a DCACHE_DISCONNECTED |
199 | * dentry, it will d_move() it on top of ourse. The | 199 | * dentry, it will d_move() it on top of ourse. The |
200 | * return value will indicate this however, so in | 200 | * return value will indicate this however, so in |
201 | * those cases, we switch them around for the locking | 201 | * those cases, we switch them around for the locking |
202 | * code. | 202 | * code. |
203 | * | 203 | * |
204 | * NOTE: This dentry already has ->d_op set from | 204 | * NOTE: This dentry already has ->d_op set from |
205 | * ocfs2_get_parent() and ocfs2_get_dentry() | 205 | * ocfs2_get_parent() and ocfs2_get_dentry() |
206 | */ | 206 | */ |
207 | if (ret) | 207 | if (ret) |
208 | dentry = ret; | 208 | dentry = ret; |
209 | 209 | ||
210 | status = ocfs2_dentry_attach_lock(dentry, inode, | 210 | status = ocfs2_dentry_attach_lock(dentry, inode, |
211 | OCFS2_I(dir)->ip_blkno); | 211 | OCFS2_I(dir)->ip_blkno); |
212 | if (status) { | 212 | if (status) { |
213 | mlog_errno(status); | 213 | mlog_errno(status); |
214 | ret = ERR_PTR(status); | 214 | ret = ERR_PTR(status); |
215 | goto bail_unlock; | 215 | goto bail_unlock; |
216 | } | 216 | } |
217 | } | 217 | } |
218 | 218 | ||
219 | bail_unlock: | 219 | bail_unlock: |
220 | /* Don't drop the cluster lock until *after* the d_add -- | 220 | /* Don't drop the cluster lock until *after* the d_add -- |
221 | * unlink on another node will message us to remove that | 221 | * unlink on another node will message us to remove that |
222 | * dentry under this lock so otherwise we can race this with | 222 | * dentry under this lock so otherwise we can race this with |
223 | * the vote thread and have a stale dentry. */ | 223 | * the vote thread and have a stale dentry. */ |
224 | ocfs2_meta_unlock(dir, 0); | 224 | ocfs2_meta_unlock(dir, 0); |
225 | 225 | ||
226 | bail: | 226 | bail: |
227 | if (dirent_bh) | 227 | if (dirent_bh) |
228 | brelse(dirent_bh); | 228 | brelse(dirent_bh); |
229 | 229 | ||
230 | mlog_exit_ptr(ret); | 230 | mlog_exit_ptr(ret); |
231 | 231 | ||
232 | return ret; | 232 | return ret; |
233 | } | 233 | } |
234 | 234 | ||
235 | static int ocfs2_fill_new_dir(struct ocfs2_super *osb, | 235 | static int ocfs2_fill_new_dir(struct ocfs2_super *osb, |
236 | handle_t *handle, | 236 | handle_t *handle, |
237 | struct inode *parent, | 237 | struct inode *parent, |
238 | struct inode *inode, | 238 | struct inode *inode, |
239 | struct buffer_head *fe_bh, | 239 | struct buffer_head *fe_bh, |
240 | struct ocfs2_alloc_context *data_ac) | 240 | struct ocfs2_alloc_context *data_ac) |
241 | { | 241 | { |
242 | int status; | 242 | int status; |
243 | struct buffer_head *new_bh = NULL; | 243 | struct buffer_head *new_bh = NULL; |
244 | struct ocfs2_dir_entry *de = NULL; | 244 | struct ocfs2_dir_entry *de = NULL; |
245 | 245 | ||
246 | mlog_entry_void(); | 246 | mlog_entry_void(); |
247 | 247 | ||
248 | status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, | 248 | status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, |
249 | data_ac, NULL, &new_bh); | 249 | data_ac, NULL, &new_bh); |
250 | if (status < 0) { | 250 | if (status < 0) { |
251 | mlog_errno(status); | 251 | mlog_errno(status); |
252 | goto bail; | 252 | goto bail; |
253 | } | 253 | } |
254 | 254 | ||
255 | ocfs2_set_new_buffer_uptodate(inode, new_bh); | 255 | ocfs2_set_new_buffer_uptodate(inode, new_bh); |
256 | 256 | ||
257 | status = ocfs2_journal_access(handle, inode, new_bh, | 257 | status = ocfs2_journal_access(handle, inode, new_bh, |
258 | OCFS2_JOURNAL_ACCESS_CREATE); | 258 | OCFS2_JOURNAL_ACCESS_CREATE); |
259 | if (status < 0) { | 259 | if (status < 0) { |
260 | mlog_errno(status); | 260 | mlog_errno(status); |
261 | goto bail; | 261 | goto bail; |
262 | } | 262 | } |
263 | memset(new_bh->b_data, 0, osb->sb->s_blocksize); | 263 | memset(new_bh->b_data, 0, osb->sb->s_blocksize); |
264 | 264 | ||
265 | de = (struct ocfs2_dir_entry *) new_bh->b_data; | 265 | de = (struct ocfs2_dir_entry *) new_bh->b_data; |
266 | de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno); | 266 | de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno); |
267 | de->name_len = 1; | 267 | de->name_len = 1; |
268 | de->rec_len = | 268 | de->rec_len = |
269 | cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len)); | 269 | cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len)); |
270 | strcpy(de->name, "."); | 270 | strcpy(de->name, "."); |
271 | ocfs2_set_de_type(de, S_IFDIR); | 271 | ocfs2_set_de_type(de, S_IFDIR); |
272 | de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len)); | 272 | de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len)); |
273 | de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno); | 273 | de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno); |
274 | de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize - | 274 | de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize - |
275 | OCFS2_DIR_REC_LEN(1)); | 275 | OCFS2_DIR_REC_LEN(1)); |
276 | de->name_len = 2; | 276 | de->name_len = 2; |
277 | strcpy(de->name, ".."); | 277 | strcpy(de->name, ".."); |
278 | ocfs2_set_de_type(de, S_IFDIR); | 278 | ocfs2_set_de_type(de, S_IFDIR); |
279 | 279 | ||
280 | status = ocfs2_journal_dirty(handle, new_bh); | 280 | status = ocfs2_journal_dirty(handle, new_bh); |
281 | if (status < 0) { | 281 | if (status < 0) { |
282 | mlog_errno(status); | 282 | mlog_errno(status); |
283 | goto bail; | 283 | goto bail; |
284 | } | 284 | } |
285 | 285 | ||
286 | i_size_write(inode, inode->i_sb->s_blocksize); | 286 | i_size_write(inode, inode->i_sb->s_blocksize); |
287 | inode->i_nlink = 2; | 287 | inode->i_nlink = 2; |
288 | inode->i_blocks = ocfs2_align_bytes_to_sectors(inode->i_sb->s_blocksize); | 288 | inode->i_blocks = ocfs2_align_bytes_to_sectors(inode->i_sb->s_blocksize); |
289 | status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); | 289 | status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); |
290 | if (status < 0) { | 290 | if (status < 0) { |
291 | mlog_errno(status); | 291 | mlog_errno(status); |
292 | goto bail; | 292 | goto bail; |
293 | } | 293 | } |
294 | 294 | ||
295 | status = 0; | 295 | status = 0; |
296 | bail: | 296 | bail: |
297 | if (new_bh) | 297 | if (new_bh) |
298 | brelse(new_bh); | 298 | brelse(new_bh); |
299 | 299 | ||
300 | mlog_exit(status); | 300 | mlog_exit(status); |
301 | return status; | 301 | return status; |
302 | } | 302 | } |
303 | 303 | ||
304 | static int ocfs2_mknod(struct inode *dir, | 304 | static int ocfs2_mknod(struct inode *dir, |
305 | struct dentry *dentry, | 305 | struct dentry *dentry, |
306 | int mode, | 306 | int mode, |
307 | dev_t dev) | 307 | dev_t dev) |
308 | { | 308 | { |
309 | int status = 0; | 309 | int status = 0; |
310 | struct buffer_head *parent_fe_bh = NULL; | 310 | struct buffer_head *parent_fe_bh = NULL; |
311 | handle_t *handle = NULL; | 311 | handle_t *handle = NULL; |
312 | struct ocfs2_super *osb; | 312 | struct ocfs2_super *osb; |
313 | struct ocfs2_dinode *dirfe; | 313 | struct ocfs2_dinode *dirfe; |
314 | struct buffer_head *new_fe_bh = NULL; | 314 | struct buffer_head *new_fe_bh = NULL; |
315 | struct buffer_head *de_bh = NULL; | 315 | struct buffer_head *de_bh = NULL; |
316 | struct inode *inode = NULL; | 316 | struct inode *inode = NULL; |
317 | struct ocfs2_alloc_context *inode_ac = NULL; | 317 | struct ocfs2_alloc_context *inode_ac = NULL; |
318 | struct ocfs2_alloc_context *data_ac = NULL; | 318 | struct ocfs2_alloc_context *data_ac = NULL; |
319 | 319 | ||
320 | mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, | 320 | mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, |
321 | (unsigned long)dev, dentry->d_name.len, | 321 | (unsigned long)dev, dentry->d_name.len, |
322 | dentry->d_name.name); | 322 | dentry->d_name.name); |
323 | 323 | ||
324 | /* get our super block */ | 324 | /* get our super block */ |
325 | osb = OCFS2_SB(dir->i_sb); | 325 | osb = OCFS2_SB(dir->i_sb); |
326 | 326 | ||
327 | status = ocfs2_meta_lock(dir, &parent_fe_bh, 1); | 327 | status = ocfs2_meta_lock(dir, &parent_fe_bh, 1); |
328 | if (status < 0) { | 328 | if (status < 0) { |
329 | if (status != -ENOENT) | 329 | if (status != -ENOENT) |
330 | mlog_errno(status); | 330 | mlog_errno(status); |
331 | return status; | 331 | return status; |
332 | } | 332 | } |
333 | 333 | ||
334 | if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) { | 334 | if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) { |
335 | status = -EMLINK; | 335 | status = -EMLINK; |
336 | goto leave; | 336 | goto leave; |
337 | } | 337 | } |
338 | 338 | ||
339 | dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data; | 339 | dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data; |
340 | if (!dirfe->i_links_count) { | 340 | if (!dirfe->i_links_count) { |
341 | /* can't make a file in a deleted directory. */ | 341 | /* can't make a file in a deleted directory. */ |
342 | status = -ENOENT; | 342 | status = -ENOENT; |
343 | goto leave; | 343 | goto leave; |
344 | } | 344 | } |
345 | 345 | ||
346 | status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name, | 346 | status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name, |
347 | dentry->d_name.len); | 347 | dentry->d_name.len); |
348 | if (status) | 348 | if (status) |
349 | goto leave; | 349 | goto leave; |
350 | 350 | ||
351 | /* get a spot inside the dir. */ | 351 | /* get a spot inside the dir. */ |
352 | status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh, | 352 | status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh, |
353 | dentry->d_name.name, | 353 | dentry->d_name.name, |
354 | dentry->d_name.len, &de_bh); | 354 | dentry->d_name.len, &de_bh); |
355 | if (status < 0) { | 355 | if (status < 0) { |
356 | mlog_errno(status); | 356 | mlog_errno(status); |
357 | goto leave; | 357 | goto leave; |
358 | } | 358 | } |
359 | 359 | ||
360 | /* reserve an inode spot */ | 360 | /* reserve an inode spot */ |
361 | status = ocfs2_reserve_new_inode(osb, &inode_ac); | 361 | status = ocfs2_reserve_new_inode(osb, &inode_ac); |
362 | if (status < 0) { | 362 | if (status < 0) { |
363 | if (status != -ENOSPC) | 363 | if (status != -ENOSPC) |
364 | mlog_errno(status); | 364 | mlog_errno(status); |
365 | goto leave; | 365 | goto leave; |
366 | } | 366 | } |
367 | 367 | ||
368 | /* are we making a directory? If so, reserve a cluster for his | 368 | /* are we making a directory? If so, reserve a cluster for his |
369 | * 1st extent. */ | 369 | * 1st extent. */ |
370 | if (S_ISDIR(mode)) { | 370 | if (S_ISDIR(mode)) { |
371 | status = ocfs2_reserve_clusters(osb, 1, &data_ac); | 371 | status = ocfs2_reserve_clusters(osb, 1, &data_ac); |
372 | if (status < 0) { | 372 | if (status < 0) { |
373 | if (status != -ENOSPC) | 373 | if (status != -ENOSPC) |
374 | mlog_errno(status); | 374 | mlog_errno(status); |
375 | goto leave; | 375 | goto leave; |
376 | } | 376 | } |
377 | } | 377 | } |
378 | 378 | ||
379 | handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS); | 379 | handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS); |
380 | if (IS_ERR(handle)) { | 380 | if (IS_ERR(handle)) { |
381 | status = PTR_ERR(handle); | 381 | status = PTR_ERR(handle); |
382 | handle = NULL; | 382 | handle = NULL; |
383 | mlog_errno(status); | 383 | mlog_errno(status); |
384 | goto leave; | 384 | goto leave; |
385 | } | 385 | } |
386 | 386 | ||
387 | /* do the real work now. */ | 387 | /* do the real work now. */ |
388 | status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev, | 388 | status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev, |
389 | &new_fe_bh, parent_fe_bh, handle, | 389 | &new_fe_bh, parent_fe_bh, handle, |
390 | &inode, inode_ac); | 390 | &inode, inode_ac); |
391 | if (status < 0) { | 391 | if (status < 0) { |
392 | mlog_errno(status); | 392 | mlog_errno(status); |
393 | goto leave; | 393 | goto leave; |
394 | } | 394 | } |
395 | 395 | ||
396 | if (S_ISDIR(mode)) { | 396 | if (S_ISDIR(mode)) { |
397 | status = ocfs2_fill_new_dir(osb, handle, dir, inode, | 397 | status = ocfs2_fill_new_dir(osb, handle, dir, inode, |
398 | new_fe_bh, data_ac); | 398 | new_fe_bh, data_ac); |
399 | if (status < 0) { | 399 | if (status < 0) { |
400 | mlog_errno(status); | 400 | mlog_errno(status); |
401 | goto leave; | 401 | goto leave; |
402 | } | 402 | } |
403 | 403 | ||
404 | status = ocfs2_journal_access(handle, dir, parent_fe_bh, | 404 | status = ocfs2_journal_access(handle, dir, parent_fe_bh, |
405 | OCFS2_JOURNAL_ACCESS_WRITE); | 405 | OCFS2_JOURNAL_ACCESS_WRITE); |
406 | if (status < 0) { | 406 | if (status < 0) { |
407 | mlog_errno(status); | 407 | mlog_errno(status); |
408 | goto leave; | 408 | goto leave; |
409 | } | 409 | } |
410 | le16_add_cpu(&dirfe->i_links_count, 1); | 410 | le16_add_cpu(&dirfe->i_links_count, 1); |
411 | status = ocfs2_journal_dirty(handle, parent_fe_bh); | 411 | status = ocfs2_journal_dirty(handle, parent_fe_bh); |
412 | if (status < 0) { | 412 | if (status < 0) { |
413 | mlog_errno(status); | 413 | mlog_errno(status); |
414 | goto leave; | 414 | goto leave; |
415 | } | 415 | } |
416 | inc_nlink(dir); | 416 | inc_nlink(dir); |
417 | } | 417 | } |
418 | 418 | ||
419 | status = ocfs2_add_entry(handle, dentry, inode, | 419 | status = ocfs2_add_entry(handle, dentry, inode, |
420 | OCFS2_I(inode)->ip_blkno, parent_fe_bh, | 420 | OCFS2_I(inode)->ip_blkno, parent_fe_bh, |
421 | de_bh); | 421 | de_bh); |
422 | if (status < 0) { | 422 | if (status < 0) { |
423 | mlog_errno(status); | 423 | mlog_errno(status); |
424 | goto leave; | 424 | goto leave; |
425 | } | 425 | } |
426 | 426 | ||
427 | status = ocfs2_dentry_attach_lock(dentry, inode, | 427 | status = ocfs2_dentry_attach_lock(dentry, inode, |
428 | OCFS2_I(dir)->ip_blkno); | 428 | OCFS2_I(dir)->ip_blkno); |
429 | if (status) { | 429 | if (status) { |
430 | mlog_errno(status); | 430 | mlog_errno(status); |
431 | goto leave; | 431 | goto leave; |
432 | } | 432 | } |
433 | 433 | ||
434 | insert_inode_hash(inode); | 434 | insert_inode_hash(inode); |
435 | dentry->d_op = &ocfs2_dentry_ops; | 435 | dentry->d_op = &ocfs2_dentry_ops; |
436 | d_instantiate(dentry, inode); | 436 | d_instantiate(dentry, inode); |
437 | status = 0; | 437 | status = 0; |
438 | leave: | 438 | leave: |
439 | if (handle) | 439 | if (handle) |
440 | ocfs2_commit_trans(osb, handle); | 440 | ocfs2_commit_trans(osb, handle); |
441 | 441 | ||
442 | ocfs2_meta_unlock(dir, 1); | 442 | ocfs2_meta_unlock(dir, 1); |
443 | 443 | ||
444 | if (status == -ENOSPC) | 444 | if (status == -ENOSPC) |
445 | mlog(0, "Disk is full\n"); | 445 | mlog(0, "Disk is full\n"); |
446 | 446 | ||
447 | if (new_fe_bh) | 447 | if (new_fe_bh) |
448 | brelse(new_fe_bh); | 448 | brelse(new_fe_bh); |
449 | 449 | ||
450 | if (de_bh) | 450 | if (de_bh) |
451 | brelse(de_bh); | 451 | brelse(de_bh); |
452 | 452 | ||
453 | if (parent_fe_bh) | 453 | if (parent_fe_bh) |
454 | brelse(parent_fe_bh); | 454 | brelse(parent_fe_bh); |
455 | 455 | ||
456 | if ((status < 0) && inode) | 456 | if ((status < 0) && inode) |
457 | iput(inode); | 457 | iput(inode); |
458 | 458 | ||
459 | if (inode_ac) | 459 | if (inode_ac) |
460 | ocfs2_free_alloc_context(inode_ac); | 460 | ocfs2_free_alloc_context(inode_ac); |
461 | 461 | ||
462 | if (data_ac) | 462 | if (data_ac) |
463 | ocfs2_free_alloc_context(data_ac); | 463 | ocfs2_free_alloc_context(data_ac); |
464 | 464 | ||
465 | mlog_exit(status); | 465 | mlog_exit(status); |
466 | 466 | ||
467 | return status; | 467 | return status; |
468 | } | 468 | } |
469 | 469 | ||
470 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, | 470 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, |
471 | struct inode *dir, | 471 | struct inode *dir, |
472 | struct dentry *dentry, int mode, | 472 | struct dentry *dentry, int mode, |
473 | dev_t dev, | 473 | dev_t dev, |
474 | struct buffer_head **new_fe_bh, | 474 | struct buffer_head **new_fe_bh, |
475 | struct buffer_head *parent_fe_bh, | 475 | struct buffer_head *parent_fe_bh, |
476 | handle_t *handle, | 476 | handle_t *handle, |
477 | struct inode **ret_inode, | 477 | struct inode **ret_inode, |
478 | struct ocfs2_alloc_context *inode_ac) | 478 | struct ocfs2_alloc_context *inode_ac) |
479 | { | 479 | { |
480 | int status = 0; | 480 | int status = 0; |
481 | struct ocfs2_dinode *fe = NULL; | 481 | struct ocfs2_dinode *fe = NULL; |
482 | struct ocfs2_extent_list *fel; | 482 | struct ocfs2_extent_list *fel; |
483 | u64 fe_blkno = 0; | 483 | u64 fe_blkno = 0; |
484 | u16 suballoc_bit; | 484 | u16 suballoc_bit; |
485 | struct inode *inode = NULL; | 485 | struct inode *inode = NULL; |
486 | 486 | ||
487 | mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, | 487 | mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, |
488 | (unsigned long)dev, dentry->d_name.len, | 488 | (unsigned long)dev, dentry->d_name.len, |
489 | dentry->d_name.name); | 489 | dentry->d_name.name); |
490 | 490 | ||
491 | *new_fe_bh = NULL; | 491 | *new_fe_bh = NULL; |
492 | *ret_inode = NULL; | 492 | *ret_inode = NULL; |
493 | 493 | ||
494 | status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit, | 494 | status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit, |
495 | &fe_blkno); | 495 | &fe_blkno); |
496 | if (status < 0) { | 496 | if (status < 0) { |
497 | mlog_errno(status); | 497 | mlog_errno(status); |
498 | goto leave; | 498 | goto leave; |
499 | } | 499 | } |
500 | 500 | ||
501 | inode = new_inode(dir->i_sb); | 501 | inode = new_inode(dir->i_sb); |
502 | if (IS_ERR(inode)) { | 502 | if (IS_ERR(inode)) { |
503 | status = PTR_ERR(inode); | 503 | status = PTR_ERR(inode); |
504 | mlog(ML_ERROR, "new_inode failed!\n"); | 504 | mlog(ML_ERROR, "new_inode failed!\n"); |
505 | goto leave; | 505 | goto leave; |
506 | } | 506 | } |
507 | 507 | ||
508 | /* populate as many fields early on as possible - many of | 508 | /* populate as many fields early on as possible - many of |
509 | * these are used by the support functions here and in | 509 | * these are used by the support functions here and in |
510 | * callers. */ | 510 | * callers. */ |
511 | inode->i_ino = ino_from_blkno(osb->sb, fe_blkno); | 511 | inode->i_ino = ino_from_blkno(osb->sb, fe_blkno); |
512 | OCFS2_I(inode)->ip_blkno = fe_blkno; | 512 | OCFS2_I(inode)->ip_blkno = fe_blkno; |
513 | if (S_ISDIR(mode)) | 513 | if (S_ISDIR(mode)) |
514 | inode->i_nlink = 2; | 514 | inode->i_nlink = 2; |
515 | else | 515 | else |
516 | inode->i_nlink = 1; | 516 | inode->i_nlink = 1; |
517 | inode->i_mode = mode; | 517 | inode->i_mode = mode; |
518 | spin_lock(&osb->osb_lock); | 518 | spin_lock(&osb->osb_lock); |
519 | inode->i_generation = osb->s_next_generation++; | 519 | inode->i_generation = osb->s_next_generation++; |
520 | spin_unlock(&osb->osb_lock); | 520 | spin_unlock(&osb->osb_lock); |
521 | 521 | ||
522 | *new_fe_bh = sb_getblk(osb->sb, fe_blkno); | 522 | *new_fe_bh = sb_getblk(osb->sb, fe_blkno); |
523 | if (!*new_fe_bh) { | 523 | if (!*new_fe_bh) { |
524 | status = -EIO; | 524 | status = -EIO; |
525 | mlog_errno(status); | 525 | mlog_errno(status); |
526 | goto leave; | 526 | goto leave; |
527 | } | 527 | } |
528 | ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh); | 528 | ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh); |
529 | 529 | ||
530 | status = ocfs2_journal_access(handle, inode, *new_fe_bh, | 530 | status = ocfs2_journal_access(handle, inode, *new_fe_bh, |
531 | OCFS2_JOURNAL_ACCESS_CREATE); | 531 | OCFS2_JOURNAL_ACCESS_CREATE); |
532 | if (status < 0) { | 532 | if (status < 0) { |
533 | mlog_errno(status); | 533 | mlog_errno(status); |
534 | goto leave; | 534 | goto leave; |
535 | } | 535 | } |
536 | 536 | ||
537 | fe = (struct ocfs2_dinode *) (*new_fe_bh)->b_data; | 537 | fe = (struct ocfs2_dinode *) (*new_fe_bh)->b_data; |
538 | memset(fe, 0, osb->sb->s_blocksize); | 538 | memset(fe, 0, osb->sb->s_blocksize); |
539 | 539 | ||
540 | fe->i_generation = cpu_to_le32(inode->i_generation); | 540 | fe->i_generation = cpu_to_le32(inode->i_generation); |
541 | fe->i_fs_generation = cpu_to_le32(osb->fs_generation); | 541 | fe->i_fs_generation = cpu_to_le32(osb->fs_generation); |
542 | fe->i_blkno = cpu_to_le64(fe_blkno); | 542 | fe->i_blkno = cpu_to_le64(fe_blkno); |
543 | fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); | 543 | fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); |
544 | fe->i_suballoc_slot = cpu_to_le16(osb->slot_num); | 544 | fe->i_suballoc_slot = cpu_to_le16(osb->slot_num); |
545 | fe->i_uid = cpu_to_le32(current->fsuid); | 545 | fe->i_uid = cpu_to_le32(current->fsuid); |
546 | if (dir->i_mode & S_ISGID) { | 546 | if (dir->i_mode & S_ISGID) { |
547 | fe->i_gid = cpu_to_le32(dir->i_gid); | 547 | fe->i_gid = cpu_to_le32(dir->i_gid); |
548 | if (S_ISDIR(mode)) | 548 | if (S_ISDIR(mode)) |
549 | mode |= S_ISGID; | 549 | mode |= S_ISGID; |
550 | } else | 550 | } else |
551 | fe->i_gid = cpu_to_le32(current->fsgid); | 551 | fe->i_gid = cpu_to_le32(current->fsgid); |
552 | fe->i_mode = cpu_to_le16(mode); | 552 | fe->i_mode = cpu_to_le16(mode); |
553 | if (S_ISCHR(mode) || S_ISBLK(mode)) | 553 | if (S_ISCHR(mode) || S_ISBLK(mode)) |
554 | fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev)); | 554 | fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev)); |
555 | 555 | ||
556 | fe->i_links_count = cpu_to_le16(inode->i_nlink); | 556 | fe->i_links_count = cpu_to_le16(inode->i_nlink); |
557 | 557 | ||
558 | fe->i_last_eb_blk = 0; | 558 | fe->i_last_eb_blk = 0; |
559 | strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); | 559 | strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); |
560 | le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL); | 560 | le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL); |
561 | fe->i_atime = fe->i_ctime = fe->i_mtime = | 561 | fe->i_atime = fe->i_ctime = fe->i_mtime = |
562 | cpu_to_le64(CURRENT_TIME.tv_sec); | 562 | cpu_to_le64(CURRENT_TIME.tv_sec); |
563 | fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = | 563 | fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = |
564 | cpu_to_le32(CURRENT_TIME.tv_nsec); | 564 | cpu_to_le32(CURRENT_TIME.tv_nsec); |
565 | fe->i_dtime = 0; | 565 | fe->i_dtime = 0; |
566 | 566 | ||
567 | fel = &fe->id2.i_list; | 567 | fel = &fe->id2.i_list; |
568 | fel->l_tree_depth = 0; | 568 | fel->l_tree_depth = 0; |
569 | fel->l_next_free_rec = 0; | 569 | fel->l_next_free_rec = 0; |
570 | fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb)); | 570 | fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb)); |
571 | 571 | ||
572 | status = ocfs2_journal_dirty(handle, *new_fe_bh); | 572 | status = ocfs2_journal_dirty(handle, *new_fe_bh); |
573 | if (status < 0) { | 573 | if (status < 0) { |
574 | mlog_errno(status); | 574 | mlog_errno(status); |
575 | goto leave; | 575 | goto leave; |
576 | } | 576 | } |
577 | 577 | ||
578 | if (ocfs2_populate_inode(inode, fe, 1) < 0) { | 578 | if (ocfs2_populate_inode(inode, fe, 1) < 0) { |
579 | mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, " | 579 | mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, " |
580 | "i_blkno=%llu, i_ino=%lu\n", | 580 | "i_blkno=%llu, i_ino=%lu\n", |
581 | (unsigned long long) (*new_fe_bh)->b_blocknr, | 581 | (unsigned long long) (*new_fe_bh)->b_blocknr, |
582 | (unsigned long long)fe->i_blkno, inode->i_ino); | 582 | (unsigned long long)fe->i_blkno, inode->i_ino); |
583 | BUG(); | 583 | BUG(); |
584 | } | 584 | } |
585 | 585 | ||
586 | ocfs2_inode_set_new(osb, inode); | 586 | ocfs2_inode_set_new(osb, inode); |
587 | if (!ocfs2_mount_local(osb)) { | 587 | if (!ocfs2_mount_local(osb)) { |
588 | status = ocfs2_create_new_inode_locks(inode); | 588 | status = ocfs2_create_new_inode_locks(inode); |
589 | if (status < 0) | 589 | if (status < 0) |
590 | mlog_errno(status); | 590 | mlog_errno(status); |
591 | } | 591 | } |
592 | 592 | ||
593 | status = 0; /* error in ocfs2_create_new_inode_locks is not | 593 | status = 0; /* error in ocfs2_create_new_inode_locks is not |
594 | * critical */ | 594 | * critical */ |
595 | 595 | ||
596 | *ret_inode = inode; | 596 | *ret_inode = inode; |
597 | leave: | 597 | leave: |
598 | if (status < 0) { | 598 | if (status < 0) { |
599 | if (*new_fe_bh) { | 599 | if (*new_fe_bh) { |
600 | brelse(*new_fe_bh); | 600 | brelse(*new_fe_bh); |
601 | *new_fe_bh = NULL; | 601 | *new_fe_bh = NULL; |
602 | } | 602 | } |
603 | if (inode) | 603 | if (inode) |
604 | iput(inode); | 604 | iput(inode); |
605 | } | 605 | } |
606 | 606 | ||
607 | mlog_exit(status); | 607 | mlog_exit(status); |
608 | return status; | 608 | return status; |
609 | } | 609 | } |
610 | 610 | ||
611 | static int ocfs2_mkdir(struct inode *dir, | 611 | static int ocfs2_mkdir(struct inode *dir, |
612 | struct dentry *dentry, | 612 | struct dentry *dentry, |
613 | int mode) | 613 | int mode) |
614 | { | 614 | { |
615 | int ret; | 615 | int ret; |
616 | 616 | ||
617 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode, | 617 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode, |
618 | dentry->d_name.len, dentry->d_name.name); | 618 | dentry->d_name.len, dentry->d_name.name); |
619 | ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0); | 619 | ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0); |
620 | mlog_exit(ret); | 620 | mlog_exit(ret); |
621 | 621 | ||
622 | return ret; | 622 | return ret; |
623 | } | 623 | } |
624 | 624 | ||
625 | static int ocfs2_create(struct inode *dir, | 625 | static int ocfs2_create(struct inode *dir, |
626 | struct dentry *dentry, | 626 | struct dentry *dentry, |
627 | int mode, | 627 | int mode, |
628 | struct nameidata *nd) | 628 | struct nameidata *nd) |
629 | { | 629 | { |
630 | int ret; | 630 | int ret; |
631 | 631 | ||
632 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode, | 632 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode, |
633 | dentry->d_name.len, dentry->d_name.name); | 633 | dentry->d_name.len, dentry->d_name.name); |
634 | ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0); | 634 | ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0); |
635 | mlog_exit(ret); | 635 | mlog_exit(ret); |
636 | 636 | ||
637 | return ret; | 637 | return ret; |
638 | } | 638 | } |
639 | 639 | ||
640 | static int ocfs2_link(struct dentry *old_dentry, | 640 | static int ocfs2_link(struct dentry *old_dentry, |
641 | struct inode *dir, | 641 | struct inode *dir, |
642 | struct dentry *dentry) | 642 | struct dentry *dentry) |
643 | { | 643 | { |
644 | handle_t *handle; | 644 | handle_t *handle; |
645 | struct inode *inode = old_dentry->d_inode; | 645 | struct inode *inode = old_dentry->d_inode; |
646 | int err; | 646 | int err; |
647 | struct buffer_head *fe_bh = NULL; | 647 | struct buffer_head *fe_bh = NULL; |
648 | struct buffer_head *parent_fe_bh = NULL; | 648 | struct buffer_head *parent_fe_bh = NULL; |
649 | struct buffer_head *de_bh = NULL; | 649 | struct buffer_head *de_bh = NULL; |
650 | struct ocfs2_dinode *fe = NULL; | 650 | struct ocfs2_dinode *fe = NULL; |
651 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | 651 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
652 | 652 | ||
653 | mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino, | 653 | mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino, |
654 | old_dentry->d_name.len, old_dentry->d_name.name, | 654 | old_dentry->d_name.len, old_dentry->d_name.name, |
655 | dentry->d_name.len, dentry->d_name.name); | 655 | dentry->d_name.len, dentry->d_name.name); |
656 | 656 | ||
657 | if (S_ISDIR(inode->i_mode)) | 657 | if (S_ISDIR(inode->i_mode)) |
658 | return -EPERM; | 658 | return -EPERM; |
659 | 659 | ||
660 | err = ocfs2_meta_lock(dir, &parent_fe_bh, 1); | 660 | err = ocfs2_meta_lock(dir, &parent_fe_bh, 1); |
661 | if (err < 0) { | 661 | if (err < 0) { |
662 | if (err != -ENOENT) | 662 | if (err != -ENOENT) |
663 | mlog_errno(err); | 663 | mlog_errno(err); |
664 | return err; | 664 | return err; |
665 | } | 665 | } |
666 | 666 | ||
667 | if (!dir->i_nlink) { | 667 | if (!dir->i_nlink) { |
668 | err = -ENOENT; | 668 | err = -ENOENT; |
669 | goto out; | 669 | goto out; |
670 | } | 670 | } |
671 | 671 | ||
672 | err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name, | 672 | err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name, |
673 | dentry->d_name.len); | 673 | dentry->d_name.len); |
674 | if (err) | 674 | if (err) |
675 | goto out; | 675 | goto out; |
676 | 676 | ||
677 | err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh, | 677 | err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh, |
678 | dentry->d_name.name, | 678 | dentry->d_name.name, |
679 | dentry->d_name.len, &de_bh); | 679 | dentry->d_name.len, &de_bh); |
680 | if (err < 0) { | 680 | if (err < 0) { |
681 | mlog_errno(err); | 681 | mlog_errno(err); |
682 | goto out; | 682 | goto out; |
683 | } | 683 | } |
684 | 684 | ||
685 | err = ocfs2_meta_lock(inode, &fe_bh, 1); | 685 | err = ocfs2_meta_lock(inode, &fe_bh, 1); |
686 | if (err < 0) { | 686 | if (err < 0) { |
687 | if (err != -ENOENT) | 687 | if (err != -ENOENT) |
688 | mlog_errno(err); | 688 | mlog_errno(err); |
689 | goto out; | 689 | goto out; |
690 | } | 690 | } |
691 | 691 | ||
692 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 692 | fe = (struct ocfs2_dinode *) fe_bh->b_data; |
693 | if (le16_to_cpu(fe->i_links_count) >= OCFS2_LINK_MAX) { | 693 | if (le16_to_cpu(fe->i_links_count) >= OCFS2_LINK_MAX) { |
694 | err = -EMLINK; | 694 | err = -EMLINK; |
695 | goto out_unlock_inode; | 695 | goto out_unlock_inode; |
696 | } | 696 | } |
697 | 697 | ||
698 | handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS); | 698 | handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS); |
699 | if (IS_ERR(handle)) { | 699 | if (IS_ERR(handle)) { |
700 | err = PTR_ERR(handle); | 700 | err = PTR_ERR(handle); |
701 | handle = NULL; | 701 | handle = NULL; |
702 | mlog_errno(err); | 702 | mlog_errno(err); |
703 | goto out_unlock_inode; | 703 | goto out_unlock_inode; |
704 | } | 704 | } |
705 | 705 | ||
706 | err = ocfs2_journal_access(handle, inode, fe_bh, | 706 | err = ocfs2_journal_access(handle, inode, fe_bh, |
707 | OCFS2_JOURNAL_ACCESS_WRITE); | 707 | OCFS2_JOURNAL_ACCESS_WRITE); |
708 | if (err < 0) { | 708 | if (err < 0) { |
709 | mlog_errno(err); | 709 | mlog_errno(err); |
710 | goto out_commit; | 710 | goto out_commit; |
711 | } | 711 | } |
712 | 712 | ||
713 | inc_nlink(inode); | 713 | inc_nlink(inode); |
714 | inode->i_ctime = CURRENT_TIME; | 714 | inode->i_ctime = CURRENT_TIME; |
715 | fe->i_links_count = cpu_to_le16(inode->i_nlink); | 715 | fe->i_links_count = cpu_to_le16(inode->i_nlink); |
716 | fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | 716 | fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); |
717 | fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 717 | fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
718 | 718 | ||
719 | err = ocfs2_journal_dirty(handle, fe_bh); | 719 | err = ocfs2_journal_dirty(handle, fe_bh); |
720 | if (err < 0) { | 720 | if (err < 0) { |
721 | le16_add_cpu(&fe->i_links_count, -1); | 721 | le16_add_cpu(&fe->i_links_count, -1); |
722 | drop_nlink(inode); | 722 | drop_nlink(inode); |
723 | mlog_errno(err); | 723 | mlog_errno(err); |
724 | goto out_commit; | 724 | goto out_commit; |
725 | } | 725 | } |
726 | 726 | ||
727 | err = ocfs2_add_entry(handle, dentry, inode, | 727 | err = ocfs2_add_entry(handle, dentry, inode, |
728 | OCFS2_I(inode)->ip_blkno, | 728 | OCFS2_I(inode)->ip_blkno, |
729 | parent_fe_bh, de_bh); | 729 | parent_fe_bh, de_bh); |
730 | if (err) { | 730 | if (err) { |
731 | le16_add_cpu(&fe->i_links_count, -1); | 731 | le16_add_cpu(&fe->i_links_count, -1); |
732 | drop_nlink(inode); | 732 | drop_nlink(inode); |
733 | mlog_errno(err); | 733 | mlog_errno(err); |
734 | goto out_commit; | 734 | goto out_commit; |
735 | } | 735 | } |
736 | 736 | ||
737 | err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); | 737 | err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); |
738 | if (err) { | 738 | if (err) { |
739 | mlog_errno(err); | 739 | mlog_errno(err); |
740 | goto out_commit; | 740 | goto out_commit; |
741 | } | 741 | } |
742 | 742 | ||
743 | atomic_inc(&inode->i_count); | 743 | atomic_inc(&inode->i_count); |
744 | dentry->d_op = &ocfs2_dentry_ops; | 744 | dentry->d_op = &ocfs2_dentry_ops; |
745 | d_instantiate(dentry, inode); | 745 | d_instantiate(dentry, inode); |
746 | 746 | ||
747 | out_commit: | 747 | out_commit: |
748 | ocfs2_commit_trans(osb, handle); | 748 | ocfs2_commit_trans(osb, handle); |
749 | out_unlock_inode: | 749 | out_unlock_inode: |
750 | ocfs2_meta_unlock(inode, 1); | 750 | ocfs2_meta_unlock(inode, 1); |
751 | 751 | ||
752 | out: | 752 | out: |
753 | ocfs2_meta_unlock(dir, 1); | 753 | ocfs2_meta_unlock(dir, 1); |
754 | 754 | ||
755 | if (de_bh) | 755 | if (de_bh) |
756 | brelse(de_bh); | 756 | brelse(de_bh); |
757 | if (fe_bh) | 757 | if (fe_bh) |
758 | brelse(fe_bh); | 758 | brelse(fe_bh); |
759 | if (parent_fe_bh) | 759 | if (parent_fe_bh) |
760 | brelse(parent_fe_bh); | 760 | brelse(parent_fe_bh); |
761 | 761 | ||
762 | mlog_exit(err); | 762 | mlog_exit(err); |
763 | 763 | ||
764 | return err; | 764 | return err; |
765 | } | 765 | } |
766 | 766 | ||
767 | /* | 767 | /* |
768 | * Takes and drops an exclusive lock on the given dentry. This will | 768 | * Takes and drops an exclusive lock on the given dentry. This will |
769 | * force other nodes to drop it. | 769 | * force other nodes to drop it. |
770 | */ | 770 | */ |
771 | static int ocfs2_remote_dentry_delete(struct dentry *dentry) | 771 | static int ocfs2_remote_dentry_delete(struct dentry *dentry) |
772 | { | 772 | { |
773 | int ret; | 773 | int ret; |
774 | 774 | ||
775 | ret = ocfs2_dentry_lock(dentry, 1); | 775 | ret = ocfs2_dentry_lock(dentry, 1); |
776 | if (ret) | 776 | if (ret) |
777 | mlog_errno(ret); | 777 | mlog_errno(ret); |
778 | else | 778 | else |
779 | ocfs2_dentry_unlock(dentry, 1); | 779 | ocfs2_dentry_unlock(dentry, 1); |
780 | 780 | ||
781 | return ret; | 781 | return ret; |
782 | } | 782 | } |
783 | 783 | ||
784 | static inline int inode_is_unlinkable(struct inode *inode) | 784 | static inline int inode_is_unlinkable(struct inode *inode) |
785 | { | 785 | { |
786 | if (S_ISDIR(inode->i_mode)) { | 786 | if (S_ISDIR(inode->i_mode)) { |
787 | if (inode->i_nlink == 2) | 787 | if (inode->i_nlink == 2) |
788 | return 1; | 788 | return 1; |
789 | return 0; | 789 | return 0; |
790 | } | 790 | } |
791 | 791 | ||
792 | if (inode->i_nlink == 1) | 792 | if (inode->i_nlink == 1) |
793 | return 1; | 793 | return 1; |
794 | return 0; | 794 | return 0; |
795 | } | 795 | } |
796 | 796 | ||
797 | static int ocfs2_unlink(struct inode *dir, | 797 | static int ocfs2_unlink(struct inode *dir, |
798 | struct dentry *dentry) | 798 | struct dentry *dentry) |
799 | { | 799 | { |
800 | int status; | 800 | int status; |
801 | int child_locked = 0; | 801 | int child_locked = 0; |
802 | struct inode *inode = dentry->d_inode; | 802 | struct inode *inode = dentry->d_inode; |
803 | struct inode *orphan_dir = NULL; | 803 | struct inode *orphan_dir = NULL; |
804 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | 804 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
805 | u64 blkno; | 805 | u64 blkno; |
806 | struct ocfs2_dinode *fe = NULL; | 806 | struct ocfs2_dinode *fe = NULL; |
807 | struct buffer_head *fe_bh = NULL; | 807 | struct buffer_head *fe_bh = NULL; |
808 | struct buffer_head *parent_node_bh = NULL; | 808 | struct buffer_head *parent_node_bh = NULL; |
809 | handle_t *handle = NULL; | 809 | handle_t *handle = NULL; |
810 | struct ocfs2_dir_entry *dirent = NULL; | 810 | struct ocfs2_dir_entry *dirent = NULL; |
811 | struct buffer_head *dirent_bh = NULL; | 811 | struct buffer_head *dirent_bh = NULL; |
812 | char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; | 812 | char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; |
813 | struct buffer_head *orphan_entry_bh = NULL; | 813 | struct buffer_head *orphan_entry_bh = NULL; |
814 | 814 | ||
815 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, | 815 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, |
816 | dentry->d_name.len, dentry->d_name.name); | 816 | dentry->d_name.len, dentry->d_name.name); |
817 | 817 | ||
818 | BUG_ON(dentry->d_parent->d_inode != dir); | 818 | BUG_ON(dentry->d_parent->d_inode != dir); |
819 | 819 | ||
820 | mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); | 820 | mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); |
821 | 821 | ||
822 | if (inode == osb->root_inode) { | 822 | if (inode == osb->root_inode) { |
823 | mlog(0, "Cannot delete the root directory\n"); | 823 | mlog(0, "Cannot delete the root directory\n"); |
824 | return -EPERM; | 824 | return -EPERM; |
825 | } | 825 | } |
826 | 826 | ||
827 | status = ocfs2_meta_lock(dir, &parent_node_bh, 1); | 827 | status = ocfs2_meta_lock(dir, &parent_node_bh, 1); |
828 | if (status < 0) { | 828 | if (status < 0) { |
829 | if (status != -ENOENT) | 829 | if (status != -ENOENT) |
830 | mlog_errno(status); | 830 | mlog_errno(status); |
831 | return status; | 831 | return status; |
832 | } | 832 | } |
833 | 833 | ||
834 | status = ocfs2_find_files_on_disk(dentry->d_name.name, | 834 | status = ocfs2_find_files_on_disk(dentry->d_name.name, |
835 | dentry->d_name.len, &blkno, | 835 | dentry->d_name.len, &blkno, |
836 | dir, &dirent_bh, &dirent); | 836 | dir, &dirent_bh, &dirent); |
837 | if (status < 0) { | 837 | if (status < 0) { |
838 | if (status != -ENOENT) | 838 | if (status != -ENOENT) |
839 | mlog_errno(status); | 839 | mlog_errno(status); |
840 | goto leave; | 840 | goto leave; |
841 | } | 841 | } |
842 | 842 | ||
843 | if (OCFS2_I(inode)->ip_blkno != blkno) { | 843 | if (OCFS2_I(inode)->ip_blkno != blkno) { |
844 | status = -ENOENT; | 844 | status = -ENOENT; |
845 | 845 | ||
846 | mlog(0, "ip_blkno %llu != dirent blkno %llu ip_flags = %x\n", | 846 | mlog(0, "ip_blkno %llu != dirent blkno %llu ip_flags = %x\n", |
847 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 847 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
848 | (unsigned long long)blkno, OCFS2_I(inode)->ip_flags); | 848 | (unsigned long long)blkno, OCFS2_I(inode)->ip_flags); |
849 | goto leave; | 849 | goto leave; |
850 | } | 850 | } |
851 | 851 | ||
852 | status = ocfs2_meta_lock(inode, &fe_bh, 1); | 852 | status = ocfs2_meta_lock(inode, &fe_bh, 1); |
853 | if (status < 0) { | 853 | if (status < 0) { |
854 | if (status != -ENOENT) | 854 | if (status != -ENOENT) |
855 | mlog_errno(status); | 855 | mlog_errno(status); |
856 | goto leave; | 856 | goto leave; |
857 | } | 857 | } |
858 | child_locked = 1; | 858 | child_locked = 1; |
859 | 859 | ||
860 | if (S_ISDIR(inode->i_mode)) { | 860 | if (S_ISDIR(inode->i_mode)) { |
861 | if (!ocfs2_empty_dir(inode)) { | 861 | if (!ocfs2_empty_dir(inode)) { |
862 | status = -ENOTEMPTY; | 862 | status = -ENOTEMPTY; |
863 | goto leave; | 863 | goto leave; |
864 | } else if (inode->i_nlink != 2) { | 864 | } else if (inode->i_nlink != 2) { |
865 | status = -ENOTEMPTY; | 865 | status = -ENOTEMPTY; |
866 | goto leave; | 866 | goto leave; |
867 | } | 867 | } |
868 | } | 868 | } |
869 | 869 | ||
870 | status = ocfs2_remote_dentry_delete(dentry); | 870 | status = ocfs2_remote_dentry_delete(dentry); |
871 | if (status < 0) { | 871 | if (status < 0) { |
872 | /* This vote should succeed under all normal | 872 | /* This vote should succeed under all normal |
873 | * circumstances. */ | 873 | * circumstances. */ |
874 | mlog_errno(status); | 874 | mlog_errno(status); |
875 | goto leave; | 875 | goto leave; |
876 | } | 876 | } |
877 | 877 | ||
878 | if (inode_is_unlinkable(inode)) { | 878 | if (inode_is_unlinkable(inode)) { |
879 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode, | 879 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode, |
880 | orphan_name, | 880 | orphan_name, |
881 | &orphan_entry_bh); | 881 | &orphan_entry_bh); |
882 | if (status < 0) { | 882 | if (status < 0) { |
883 | mlog_errno(status); | 883 | mlog_errno(status); |
884 | goto leave; | 884 | goto leave; |
885 | } | 885 | } |
886 | } | 886 | } |
887 | 887 | ||
888 | handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS); | 888 | handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS); |
889 | if (IS_ERR(handle)) { | 889 | if (IS_ERR(handle)) { |
890 | status = PTR_ERR(handle); | 890 | status = PTR_ERR(handle); |
891 | handle = NULL; | 891 | handle = NULL; |
892 | mlog_errno(status); | 892 | mlog_errno(status); |
893 | goto leave; | 893 | goto leave; |
894 | } | 894 | } |
895 | 895 | ||
896 | status = ocfs2_journal_access(handle, inode, fe_bh, | 896 | status = ocfs2_journal_access(handle, inode, fe_bh, |
897 | OCFS2_JOURNAL_ACCESS_WRITE); | 897 | OCFS2_JOURNAL_ACCESS_WRITE); |
898 | if (status < 0) { | 898 | if (status < 0) { |
899 | mlog_errno(status); | 899 | mlog_errno(status); |
900 | goto leave; | 900 | goto leave; |
901 | } | 901 | } |
902 | 902 | ||
903 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 903 | fe = (struct ocfs2_dinode *) fe_bh->b_data; |
904 | 904 | ||
905 | if (inode_is_unlinkable(inode)) { | 905 | if (inode_is_unlinkable(inode)) { |
906 | status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name, | 906 | status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name, |
907 | orphan_entry_bh, orphan_dir); | 907 | orphan_entry_bh, orphan_dir); |
908 | if (status < 0) { | 908 | if (status < 0) { |
909 | mlog_errno(status); | 909 | mlog_errno(status); |
910 | goto leave; | 910 | goto leave; |
911 | } | 911 | } |
912 | } | 912 | } |
913 | 913 | ||
914 | /* delete the name from the parent dir */ | 914 | /* delete the name from the parent dir */ |
915 | status = ocfs2_delete_entry(handle, dir, dirent, dirent_bh); | 915 | status = ocfs2_delete_entry(handle, dir, dirent, dirent_bh); |
916 | if (status < 0) { | 916 | if (status < 0) { |
917 | mlog_errno(status); | 917 | mlog_errno(status); |
918 | goto leave; | 918 | goto leave; |
919 | } | 919 | } |
920 | 920 | ||
921 | if (S_ISDIR(inode->i_mode)) | 921 | if (S_ISDIR(inode->i_mode)) |
922 | drop_nlink(inode); | 922 | drop_nlink(inode); |
923 | drop_nlink(inode); | 923 | drop_nlink(inode); |
924 | fe->i_links_count = cpu_to_le16(inode->i_nlink); | 924 | fe->i_links_count = cpu_to_le16(inode->i_nlink); |
925 | 925 | ||
926 | status = ocfs2_journal_dirty(handle, fe_bh); | 926 | status = ocfs2_journal_dirty(handle, fe_bh); |
927 | if (status < 0) { | 927 | if (status < 0) { |
928 | mlog_errno(status); | 928 | mlog_errno(status); |
929 | goto leave; | 929 | goto leave; |
930 | } | 930 | } |
931 | 931 | ||
932 | dir->i_ctime = dir->i_mtime = CURRENT_TIME; | 932 | dir->i_ctime = dir->i_mtime = CURRENT_TIME; |
933 | if (S_ISDIR(inode->i_mode)) | 933 | if (S_ISDIR(inode->i_mode)) |
934 | drop_nlink(dir); | 934 | drop_nlink(dir); |
935 | 935 | ||
936 | status = ocfs2_mark_inode_dirty(handle, dir, parent_node_bh); | 936 | status = ocfs2_mark_inode_dirty(handle, dir, parent_node_bh); |
937 | if (status < 0) { | 937 | if (status < 0) { |
938 | mlog_errno(status); | 938 | mlog_errno(status); |
939 | if (S_ISDIR(inode->i_mode)) | 939 | if (S_ISDIR(inode->i_mode)) |
940 | inc_nlink(dir); | 940 | inc_nlink(dir); |
941 | } | 941 | } |
942 | 942 | ||
943 | leave: | 943 | leave: |
944 | if (handle) | 944 | if (handle) |
945 | ocfs2_commit_trans(osb, handle); | 945 | ocfs2_commit_trans(osb, handle); |
946 | 946 | ||
947 | if (child_locked) | 947 | if (child_locked) |
948 | ocfs2_meta_unlock(inode, 1); | 948 | ocfs2_meta_unlock(inode, 1); |
949 | 949 | ||
950 | ocfs2_meta_unlock(dir, 1); | 950 | ocfs2_meta_unlock(dir, 1); |
951 | 951 | ||
952 | if (orphan_dir) { | 952 | if (orphan_dir) { |
953 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ | 953 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ |
954 | ocfs2_meta_unlock(orphan_dir, 1); | 954 | ocfs2_meta_unlock(orphan_dir, 1); |
955 | mutex_unlock(&orphan_dir->i_mutex); | 955 | mutex_unlock(&orphan_dir->i_mutex); |
956 | iput(orphan_dir); | 956 | iput(orphan_dir); |
957 | } | 957 | } |
958 | 958 | ||
959 | if (fe_bh) | 959 | if (fe_bh) |
960 | brelse(fe_bh); | 960 | brelse(fe_bh); |
961 | 961 | ||
962 | if (dirent_bh) | 962 | if (dirent_bh) |
963 | brelse(dirent_bh); | 963 | brelse(dirent_bh); |
964 | 964 | ||
965 | if (parent_node_bh) | 965 | if (parent_node_bh) |
966 | brelse(parent_node_bh); | 966 | brelse(parent_node_bh); |
967 | 967 | ||
968 | if (orphan_entry_bh) | 968 | if (orphan_entry_bh) |
969 | brelse(orphan_entry_bh); | 969 | brelse(orphan_entry_bh); |
970 | 970 | ||
971 | mlog_exit(status); | 971 | mlog_exit(status); |
972 | 972 | ||
973 | return status; | 973 | return status; |
974 | } | 974 | } |
975 | 975 | ||
976 | /* | 976 | /* |
977 | * The only place this should be used is rename! | 977 | * The only place this should be used is rename! |
978 | * if they have the same id, then the 1st one is the only one locked. | 978 | * if they have the same id, then the 1st one is the only one locked. |
979 | */ | 979 | */ |
980 | static int ocfs2_double_lock(struct ocfs2_super *osb, | 980 | static int ocfs2_double_lock(struct ocfs2_super *osb, |
981 | struct buffer_head **bh1, | 981 | struct buffer_head **bh1, |
982 | struct inode *inode1, | 982 | struct inode *inode1, |
983 | struct buffer_head **bh2, | 983 | struct buffer_head **bh2, |
984 | struct inode *inode2) | 984 | struct inode *inode2) |
985 | { | 985 | { |
986 | int status; | 986 | int status; |
987 | struct ocfs2_inode_info *oi1 = OCFS2_I(inode1); | 987 | struct ocfs2_inode_info *oi1 = OCFS2_I(inode1); |
988 | struct ocfs2_inode_info *oi2 = OCFS2_I(inode2); | 988 | struct ocfs2_inode_info *oi2 = OCFS2_I(inode2); |
989 | struct buffer_head **tmpbh; | 989 | struct buffer_head **tmpbh; |
990 | struct inode *tmpinode; | 990 | struct inode *tmpinode; |
991 | 991 | ||
992 | mlog_entry("(inode1 = %llu, inode2 = %llu)\n", | 992 | mlog_entry("(inode1 = %llu, inode2 = %llu)\n", |
993 | (unsigned long long)oi1->ip_blkno, | 993 | (unsigned long long)oi1->ip_blkno, |
994 | (unsigned long long)oi2->ip_blkno); | 994 | (unsigned long long)oi2->ip_blkno); |
995 | 995 | ||
996 | if (*bh1) | 996 | if (*bh1) |
997 | *bh1 = NULL; | 997 | *bh1 = NULL; |
998 | if (*bh2) | 998 | if (*bh2) |
999 | *bh2 = NULL; | 999 | *bh2 = NULL; |
1000 | 1000 | ||
1001 | /* we always want to lock the one with the lower lockid first. */ | 1001 | /* we always want to lock the one with the lower lockid first. */ |
1002 | if (oi1->ip_blkno != oi2->ip_blkno) { | 1002 | if (oi1->ip_blkno != oi2->ip_blkno) { |
1003 | if (oi1->ip_blkno < oi2->ip_blkno) { | 1003 | if (oi1->ip_blkno < oi2->ip_blkno) { |
1004 | /* switch id1 and id2 around */ | 1004 | /* switch id1 and id2 around */ |
1005 | mlog(0, "switching them around...\n"); | 1005 | mlog(0, "switching them around...\n"); |
1006 | tmpbh = bh2; | 1006 | tmpbh = bh2; |
1007 | bh2 = bh1; | 1007 | bh2 = bh1; |
1008 | bh1 = tmpbh; | 1008 | bh1 = tmpbh; |
1009 | 1009 | ||
1010 | tmpinode = inode2; | 1010 | tmpinode = inode2; |
1011 | inode2 = inode1; | 1011 | inode2 = inode1; |
1012 | inode1 = tmpinode; | 1012 | inode1 = tmpinode; |
1013 | } | 1013 | } |
1014 | /* lock id2 */ | 1014 | /* lock id2 */ |
1015 | status = ocfs2_meta_lock(inode2, bh2, 1); | 1015 | status = ocfs2_meta_lock(inode2, bh2, 1); |
1016 | if (status < 0) { | 1016 | if (status < 0) { |
1017 | if (status != -ENOENT) | 1017 | if (status != -ENOENT) |
1018 | mlog_errno(status); | 1018 | mlog_errno(status); |
1019 | goto bail; | 1019 | goto bail; |
1020 | } | 1020 | } |
1021 | } | 1021 | } |
1022 | 1022 | ||
1023 | /* lock id1 */ | 1023 | /* lock id1 */ |
1024 | status = ocfs2_meta_lock(inode1, bh1, 1); | 1024 | status = ocfs2_meta_lock(inode1, bh1, 1); |
1025 | if (status < 0) { | 1025 | if (status < 0) { |
1026 | /* | 1026 | /* |
1027 | * An error return must mean that no cluster locks | 1027 | * An error return must mean that no cluster locks |
1028 | * were held on function exit. | 1028 | * were held on function exit. |
1029 | */ | 1029 | */ |
1030 | if (oi1->ip_blkno != oi2->ip_blkno) | 1030 | if (oi1->ip_blkno != oi2->ip_blkno) |
1031 | ocfs2_meta_unlock(inode2, 1); | 1031 | ocfs2_meta_unlock(inode2, 1); |
1032 | 1032 | ||
1033 | if (status != -ENOENT) | 1033 | if (status != -ENOENT) |
1034 | mlog_errno(status); | 1034 | mlog_errno(status); |
1035 | } | 1035 | } |
1036 | 1036 | ||
1037 | bail: | 1037 | bail: |
1038 | mlog_exit(status); | 1038 | mlog_exit(status); |
1039 | return status; | 1039 | return status; |
1040 | } | 1040 | } |
1041 | 1041 | ||
1042 | static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2) | 1042 | static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2) |
1043 | { | 1043 | { |
1044 | ocfs2_meta_unlock(inode1, 1); | 1044 | ocfs2_meta_unlock(inode1, 1); |
1045 | 1045 | ||
1046 | if (inode1 != inode2) | 1046 | if (inode1 != inode2) |
1047 | ocfs2_meta_unlock(inode2, 1); | 1047 | ocfs2_meta_unlock(inode2, 1); |
1048 | } | 1048 | } |
1049 | 1049 | ||
1050 | #define PARENT_INO(buffer) \ | 1050 | #define PARENT_INO(buffer) \ |
1051 | ((struct ocfs2_dir_entry *) \ | 1051 | ((struct ocfs2_dir_entry *) \ |
1052 | ((char *)buffer + \ | 1052 | ((char *)buffer + \ |
1053 | le16_to_cpu(((struct ocfs2_dir_entry *)buffer)->rec_len)))->inode | 1053 | le16_to_cpu(((struct ocfs2_dir_entry *)buffer)->rec_len)))->inode |
1054 | 1054 | ||
1055 | static int ocfs2_rename(struct inode *old_dir, | 1055 | static int ocfs2_rename(struct inode *old_dir, |
1056 | struct dentry *old_dentry, | 1056 | struct dentry *old_dentry, |
1057 | struct inode *new_dir, | 1057 | struct inode *new_dir, |
1058 | struct dentry *new_dentry) | 1058 | struct dentry *new_dentry) |
1059 | { | 1059 | { |
1060 | int status = 0, rename_lock = 0, parents_locked = 0; | 1060 | int status = 0, rename_lock = 0, parents_locked = 0; |
1061 | int old_child_locked = 0, new_child_locked = 0; | 1061 | int old_child_locked = 0, new_child_locked = 0; |
1062 | struct inode *old_inode = old_dentry->d_inode; | 1062 | struct inode *old_inode = old_dentry->d_inode; |
1063 | struct inode *new_inode = new_dentry->d_inode; | 1063 | struct inode *new_inode = new_dentry->d_inode; |
1064 | struct inode *orphan_dir = NULL; | 1064 | struct inode *orphan_dir = NULL; |
1065 | struct ocfs2_dinode *newfe = NULL; | 1065 | struct ocfs2_dinode *newfe = NULL; |
1066 | char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; | 1066 | char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; |
1067 | struct buffer_head *orphan_entry_bh = NULL; | 1067 | struct buffer_head *orphan_entry_bh = NULL; |
1068 | struct buffer_head *newfe_bh = NULL; | 1068 | struct buffer_head *newfe_bh = NULL; |
1069 | struct buffer_head *old_inode_bh = NULL; | 1069 | struct buffer_head *old_inode_bh = NULL; |
1070 | struct buffer_head *insert_entry_bh = NULL; | 1070 | struct buffer_head *insert_entry_bh = NULL; |
1071 | struct ocfs2_super *osb = NULL; | 1071 | struct ocfs2_super *osb = NULL; |
1072 | u64 newfe_blkno; | 1072 | u64 newfe_blkno; |
1073 | handle_t *handle = NULL; | 1073 | handle_t *handle = NULL; |
1074 | struct buffer_head *old_dir_bh = NULL; | 1074 | struct buffer_head *old_dir_bh = NULL; |
1075 | struct buffer_head *new_dir_bh = NULL; | 1075 | struct buffer_head *new_dir_bh = NULL; |
1076 | struct ocfs2_dir_entry *old_de = NULL, *new_de = NULL; // dirent for old_dentry | 1076 | struct ocfs2_dir_entry *old_de = NULL, *new_de = NULL; // dirent for old_dentry |
1077 | // and new_dentry | 1077 | // and new_dentry |
1078 | struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above | 1078 | struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above |
1079 | struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, | 1079 | struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, |
1080 | // this is the 1st dirent bh | 1080 | // this is the 1st dirent bh |
1081 | nlink_t old_dir_nlink = old_dir->i_nlink; | 1081 | nlink_t old_dir_nlink = old_dir->i_nlink; |
1082 | 1082 | ||
1083 | /* At some point it might be nice to break this function up a | 1083 | /* At some point it might be nice to break this function up a |
1084 | * bit. */ | 1084 | * bit. */ |
1085 | 1085 | ||
1086 | mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p, from='%.*s' to='%.*s')\n", | 1086 | mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p, from='%.*s' to='%.*s')\n", |
1087 | old_dir, old_dentry, new_dir, new_dentry, | 1087 | old_dir, old_dentry, new_dir, new_dentry, |
1088 | old_dentry->d_name.len, old_dentry->d_name.name, | 1088 | old_dentry->d_name.len, old_dentry->d_name.name, |
1089 | new_dentry->d_name.len, new_dentry->d_name.name); | 1089 | new_dentry->d_name.len, new_dentry->d_name.name); |
1090 | 1090 | ||
1091 | osb = OCFS2_SB(old_dir->i_sb); | 1091 | osb = OCFS2_SB(old_dir->i_sb); |
1092 | 1092 | ||
1093 | if (new_inode) { | 1093 | if (new_inode) { |
1094 | if (!igrab(new_inode)) | 1094 | if (!igrab(new_inode)) |
1095 | BUG(); | 1095 | BUG(); |
1096 | } | 1096 | } |
1097 | 1097 | ||
1098 | /* Assume a directory hierarchy thusly: | 1098 | /* Assume a directory hierarchy thusly: |
1099 | * a/b/c | 1099 | * a/b/c |
1100 | * a/d | 1100 | * a/d |
1101 | * a,b,c, and d are all directories. | 1101 | * a,b,c, and d are all directories. |
1102 | * | 1102 | * |
1103 | * from cwd of 'a' on both nodes: | 1103 | * from cwd of 'a' on both nodes: |
1104 | * node1: mv b/c d | 1104 | * node1: mv b/c d |
1105 | * node2: mv d b/c | 1105 | * node2: mv d b/c |
1106 | * | 1106 | * |
1107 | * And that's why, just like the VFS, we need a file system | 1107 | * And that's why, just like the VFS, we need a file system |
1108 | * rename lock. */ | 1108 | * rename lock. */ |
1109 | if (old_dentry != new_dentry) { | 1109 | if (old_dentry != new_dentry) { |
1110 | status = ocfs2_rename_lock(osb); | 1110 | status = ocfs2_rename_lock(osb); |
1111 | if (status < 0) { | 1111 | if (status < 0) { |
1112 | mlog_errno(status); | 1112 | mlog_errno(status); |
1113 | goto bail; | 1113 | goto bail; |
1114 | } | 1114 | } |
1115 | rename_lock = 1; | 1115 | rename_lock = 1; |
1116 | } | 1116 | } |
1117 | 1117 | ||
1118 | /* if old and new are the same, this'll just do one lock. */ | 1118 | /* if old and new are the same, this'll just do one lock. */ |
1119 | status = ocfs2_double_lock(osb, &old_dir_bh, old_dir, | 1119 | status = ocfs2_double_lock(osb, &old_dir_bh, old_dir, |
1120 | &new_dir_bh, new_dir); | 1120 | &new_dir_bh, new_dir); |
1121 | if (status < 0) { | 1121 | if (status < 0) { |
1122 | mlog_errno(status); | 1122 | mlog_errno(status); |
1123 | goto bail; | 1123 | goto bail; |
1124 | } | 1124 | } |
1125 | parents_locked = 1; | 1125 | parents_locked = 1; |
1126 | 1126 | ||
1127 | /* make sure both dirs have bhs | 1127 | /* make sure both dirs have bhs |
1128 | * get an extra ref on old_dir_bh if old==new */ | 1128 | * get an extra ref on old_dir_bh if old==new */ |
1129 | if (!new_dir_bh) { | 1129 | if (!new_dir_bh) { |
1130 | if (old_dir_bh) { | 1130 | if (old_dir_bh) { |
1131 | new_dir_bh = old_dir_bh; | 1131 | new_dir_bh = old_dir_bh; |
1132 | get_bh(new_dir_bh); | 1132 | get_bh(new_dir_bh); |
1133 | } else { | 1133 | } else { |
1134 | mlog(ML_ERROR, "no old_dir_bh!\n"); | 1134 | mlog(ML_ERROR, "no old_dir_bh!\n"); |
1135 | status = -EIO; | 1135 | status = -EIO; |
1136 | goto bail; | 1136 | goto bail; |
1137 | } | 1137 | } |
1138 | } | 1138 | } |
1139 | 1139 | ||
1140 | /* | 1140 | /* |
1141 | * Aside from allowing a meta data update, the locking here | 1141 | * Aside from allowing a meta data update, the locking here |
1142 | * also ensures that the vote thread on other nodes won't have | 1142 | * also ensures that the vote thread on other nodes won't have |
1143 | * to concurrently downconvert the inode and the dentry locks. | 1143 | * to concurrently downconvert the inode and the dentry locks. |
1144 | */ | 1144 | */ |
1145 | status = ocfs2_meta_lock(old_inode, &old_inode_bh, 1); | 1145 | status = ocfs2_meta_lock(old_inode, &old_inode_bh, 1); |
1146 | if (status < 0) { | 1146 | if (status < 0) { |
1147 | if (status != -ENOENT) | 1147 | if (status != -ENOENT) |
1148 | mlog_errno(status); | 1148 | mlog_errno(status); |
1149 | goto bail; | 1149 | goto bail; |
1150 | } | 1150 | } |
1151 | old_child_locked = 1; | 1151 | old_child_locked = 1; |
1152 | 1152 | ||
1153 | status = ocfs2_remote_dentry_delete(old_dentry); | 1153 | status = ocfs2_remote_dentry_delete(old_dentry); |
1154 | if (status < 0) { | 1154 | if (status < 0) { |
1155 | mlog_errno(status); | 1155 | mlog_errno(status); |
1156 | goto bail; | 1156 | goto bail; |
1157 | } | 1157 | } |
1158 | 1158 | ||
1159 | if (S_ISDIR(old_inode->i_mode)) { | 1159 | if (S_ISDIR(old_inode->i_mode)) { |
1160 | status = -EIO; | 1160 | status = -EIO; |
1161 | old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0); | 1161 | old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0); |
1162 | if (!old_inode_de_bh) | 1162 | if (!old_inode_de_bh) |
1163 | goto bail; | 1163 | goto bail; |
1164 | 1164 | ||
1165 | status = -EIO; | 1165 | status = -EIO; |
1166 | if (le64_to_cpu(PARENT_INO(old_inode_de_bh->b_data)) != | 1166 | if (le64_to_cpu(PARENT_INO(old_inode_de_bh->b_data)) != |
1167 | OCFS2_I(old_dir)->ip_blkno) | 1167 | OCFS2_I(old_dir)->ip_blkno) |
1168 | goto bail; | 1168 | goto bail; |
1169 | status = -EMLINK; | 1169 | status = -EMLINK; |
1170 | if (!new_inode && new_dir!=old_dir && | 1170 | if (!new_inode && new_dir!=old_dir && |
1171 | new_dir->i_nlink >= OCFS2_LINK_MAX) | 1171 | new_dir->i_nlink >= OCFS2_LINK_MAX) |
1172 | goto bail; | 1172 | goto bail; |
1173 | } | 1173 | } |
1174 | 1174 | ||
1175 | status = -ENOENT; | 1175 | status = -ENOENT; |
1176 | old_de_bh = ocfs2_find_entry(old_dentry->d_name.name, | 1176 | old_de_bh = ocfs2_find_entry(old_dentry->d_name.name, |
1177 | old_dentry->d_name.len, | 1177 | old_dentry->d_name.len, |
1178 | old_dir, &old_de); | 1178 | old_dir, &old_de); |
1179 | if (!old_de_bh) | 1179 | if (!old_de_bh) |
1180 | goto bail; | 1180 | goto bail; |
1181 | 1181 | ||
1182 | /* | 1182 | /* |
1183 | * Check for inode number is _not_ due to possible IO errors. | 1183 | * Check for inode number is _not_ due to possible IO errors. |
1184 | * We might rmdir the source, keep it as pwd of some process | 1184 | * We might rmdir the source, keep it as pwd of some process |
1185 | * and merrily kill the link to whatever was created under the | 1185 | * and merrily kill the link to whatever was created under the |
1186 | * same name. Goodbye sticky bit ;-< | 1186 | * same name. Goodbye sticky bit ;-< |
1187 | */ | 1187 | */ |
1188 | if (le64_to_cpu(old_de->inode) != OCFS2_I(old_inode)->ip_blkno) | 1188 | if (le64_to_cpu(old_de->inode) != OCFS2_I(old_inode)->ip_blkno) |
1189 | goto bail; | 1189 | goto bail; |
1190 | 1190 | ||
1191 | /* check if the target already exists (in which case we need | 1191 | /* check if the target already exists (in which case we need |
1192 | * to delete it */ | 1192 | * to delete it */ |
1193 | status = ocfs2_find_files_on_disk(new_dentry->d_name.name, | 1193 | status = ocfs2_find_files_on_disk(new_dentry->d_name.name, |
1194 | new_dentry->d_name.len, | 1194 | new_dentry->d_name.len, |
1195 | &newfe_blkno, new_dir, &new_de_bh, | 1195 | &newfe_blkno, new_dir, &new_de_bh, |
1196 | &new_de); | 1196 | &new_de); |
1197 | /* The only error we allow here is -ENOENT because the new | 1197 | /* The only error we allow here is -ENOENT because the new |
1198 | * file not existing is perfectly valid. */ | 1198 | * file not existing is perfectly valid. */ |
1199 | if ((status < 0) && (status != -ENOENT)) { | 1199 | if ((status < 0) && (status != -ENOENT)) { |
1200 | /* If we cannot find the file specified we should just */ | 1200 | /* If we cannot find the file specified we should just */ |
1201 | /* return the error... */ | 1201 | /* return the error... */ |
1202 | mlog_errno(status); | 1202 | mlog_errno(status); |
1203 | goto bail; | 1203 | goto bail; |
1204 | } | 1204 | } |
1205 | 1205 | ||
1206 | if (!new_de && new_inode) | 1206 | if (!new_de && new_inode) |
1207 | mlog(ML_ERROR, "inode %lu does not exist in it's parent " | 1207 | mlog(ML_ERROR, "inode %lu does not exist in it's parent " |
1208 | "directory!", new_inode->i_ino); | 1208 | "directory!", new_inode->i_ino); |
1209 | 1209 | ||
1210 | /* In case we need to overwrite an existing file, we blow it | 1210 | /* In case we need to overwrite an existing file, we blow it |
1211 | * away first */ | 1211 | * away first */ |
1212 | if (new_de) { | 1212 | if (new_de) { |
1213 | /* VFS didn't think there existed an inode here, but | 1213 | /* VFS didn't think there existed an inode here, but |
1214 | * someone else in the cluster must have raced our | 1214 | * someone else in the cluster must have raced our |
1215 | * rename to create one. Today we error cleanly, in | 1215 | * rename to create one. Today we error cleanly, in |
1216 | * the future we should consider calling iget to build | 1216 | * the future we should consider calling iget to build |
1217 | * a new struct inode for this entry. */ | 1217 | * a new struct inode for this entry. */ |
1218 | if (!new_inode) { | 1218 | if (!new_inode) { |
1219 | status = -EACCES; | 1219 | status = -EACCES; |
1220 | 1220 | ||
1221 | mlog(0, "We found an inode for name %.*s but VFS " | 1221 | mlog(0, "We found an inode for name %.*s but VFS " |
1222 | "didn't give us one.\n", new_dentry->d_name.len, | 1222 | "didn't give us one.\n", new_dentry->d_name.len, |
1223 | new_dentry->d_name.name); | 1223 | new_dentry->d_name.name); |
1224 | goto bail; | 1224 | goto bail; |
1225 | } | 1225 | } |
1226 | 1226 | ||
1227 | if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) { | 1227 | if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) { |
1228 | status = -EACCES; | 1228 | status = -EACCES; |
1229 | 1229 | ||
1230 | mlog(0, "Inode %llu and dir %llu disagree. flags = %x\n", | 1230 | mlog(0, "Inode %llu and dir %llu disagree. flags = %x\n", |
1231 | (unsigned long long)OCFS2_I(new_inode)->ip_blkno, | 1231 | (unsigned long long)OCFS2_I(new_inode)->ip_blkno, |
1232 | (unsigned long long)newfe_blkno, | 1232 | (unsigned long long)newfe_blkno, |
1233 | OCFS2_I(new_inode)->ip_flags); | 1233 | OCFS2_I(new_inode)->ip_flags); |
1234 | goto bail; | 1234 | goto bail; |
1235 | } | 1235 | } |
1236 | 1236 | ||
1237 | status = ocfs2_meta_lock(new_inode, &newfe_bh, 1); | 1237 | status = ocfs2_meta_lock(new_inode, &newfe_bh, 1); |
1238 | if (status < 0) { | 1238 | if (status < 0) { |
1239 | if (status != -ENOENT) | 1239 | if (status != -ENOENT) |
1240 | mlog_errno(status); | 1240 | mlog_errno(status); |
1241 | goto bail; | 1241 | goto bail; |
1242 | } | 1242 | } |
1243 | new_child_locked = 1; | 1243 | new_child_locked = 1; |
1244 | 1244 | ||
1245 | status = ocfs2_remote_dentry_delete(new_dentry); | 1245 | status = ocfs2_remote_dentry_delete(new_dentry); |
1246 | if (status < 0) { | 1246 | if (status < 0) { |
1247 | mlog_errno(status); | 1247 | mlog_errno(status); |
1248 | goto bail; | 1248 | goto bail; |
1249 | } | 1249 | } |
1250 | 1250 | ||
1251 | newfe = (struct ocfs2_dinode *) newfe_bh->b_data; | 1251 | newfe = (struct ocfs2_dinode *) newfe_bh->b_data; |
1252 | 1252 | ||
1253 | mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu " | 1253 | mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu " |
1254 | "newfebh=%p bhblocknr=%llu\n", new_de, | 1254 | "newfebh=%p bhblocknr=%llu\n", new_de, |
1255 | (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ? | 1255 | (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ? |
1256 | (unsigned long long)newfe_bh->b_blocknr : 0ULL); | 1256 | (unsigned long long)newfe_bh->b_blocknr : 0ULL); |
1257 | 1257 | ||
1258 | if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) { | 1258 | if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) { |
1259 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, | 1259 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, |
1260 | new_inode, | 1260 | new_inode, |
1261 | orphan_name, | 1261 | orphan_name, |
1262 | &orphan_entry_bh); | 1262 | &orphan_entry_bh); |
1263 | if (status < 0) { | 1263 | if (status < 0) { |
1264 | mlog_errno(status); | 1264 | mlog_errno(status); |
1265 | goto bail; | 1265 | goto bail; |
1266 | } | 1266 | } |
1267 | } | 1267 | } |
1268 | } else { | 1268 | } else { |
1269 | BUG_ON(new_dentry->d_parent->d_inode != new_dir); | 1269 | BUG_ON(new_dentry->d_parent->d_inode != new_dir); |
1270 | 1270 | ||
1271 | status = ocfs2_check_dir_for_entry(new_dir, | 1271 | status = ocfs2_check_dir_for_entry(new_dir, |
1272 | new_dentry->d_name.name, | 1272 | new_dentry->d_name.name, |
1273 | new_dentry->d_name.len); | 1273 | new_dentry->d_name.len); |
1274 | if (status) | 1274 | if (status) |
1275 | goto bail; | 1275 | goto bail; |
1276 | 1276 | ||
1277 | status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh, | 1277 | status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh, |
1278 | new_dentry->d_name.name, | 1278 | new_dentry->d_name.name, |
1279 | new_dentry->d_name.len, | 1279 | new_dentry->d_name.len, |
1280 | &insert_entry_bh); | 1280 | &insert_entry_bh); |
1281 | if (status < 0) { | 1281 | if (status < 0) { |
1282 | mlog_errno(status); | 1282 | mlog_errno(status); |
1283 | goto bail; | 1283 | goto bail; |
1284 | } | 1284 | } |
1285 | } | 1285 | } |
1286 | 1286 | ||
1287 | handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS); | 1287 | handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS); |
1288 | if (IS_ERR(handle)) { | 1288 | if (IS_ERR(handle)) { |
1289 | status = PTR_ERR(handle); | 1289 | status = PTR_ERR(handle); |
1290 | handle = NULL; | 1290 | handle = NULL; |
1291 | mlog_errno(status); | 1291 | mlog_errno(status); |
1292 | goto bail; | 1292 | goto bail; |
1293 | } | 1293 | } |
1294 | 1294 | ||
1295 | if (new_de) { | 1295 | if (new_de) { |
1296 | if (S_ISDIR(new_inode->i_mode)) { | 1296 | if (S_ISDIR(new_inode->i_mode)) { |
1297 | if (!ocfs2_empty_dir(new_inode) || | 1297 | if (!ocfs2_empty_dir(new_inode) || |
1298 | new_inode->i_nlink != 2) { | 1298 | new_inode->i_nlink != 2) { |
1299 | status = -ENOTEMPTY; | 1299 | status = -ENOTEMPTY; |
1300 | goto bail; | 1300 | goto bail; |
1301 | } | 1301 | } |
1302 | } | 1302 | } |
1303 | status = ocfs2_journal_access(handle, new_inode, newfe_bh, | 1303 | status = ocfs2_journal_access(handle, new_inode, newfe_bh, |
1304 | OCFS2_JOURNAL_ACCESS_WRITE); | 1304 | OCFS2_JOURNAL_ACCESS_WRITE); |
1305 | if (status < 0) { | 1305 | if (status < 0) { |
1306 | mlog_errno(status); | 1306 | mlog_errno(status); |
1307 | goto bail; | 1307 | goto bail; |
1308 | } | 1308 | } |
1309 | 1309 | ||
1310 | if (S_ISDIR(new_inode->i_mode) || | 1310 | if (S_ISDIR(new_inode->i_mode) || |
1311 | (newfe->i_links_count == cpu_to_le16(1))){ | 1311 | (newfe->i_links_count == cpu_to_le16(1))){ |
1312 | status = ocfs2_orphan_add(osb, handle, new_inode, | 1312 | status = ocfs2_orphan_add(osb, handle, new_inode, |
1313 | newfe, orphan_name, | 1313 | newfe, orphan_name, |
1314 | orphan_entry_bh, orphan_dir); | 1314 | orphan_entry_bh, orphan_dir); |
1315 | if (status < 0) { | 1315 | if (status < 0) { |
1316 | mlog_errno(status); | 1316 | mlog_errno(status); |
1317 | goto bail; | 1317 | goto bail; |
1318 | } | 1318 | } |
1319 | } | 1319 | } |
1320 | 1320 | ||
1321 | /* change the dirent to point to the correct inode */ | 1321 | /* change the dirent to point to the correct inode */ |
1322 | status = ocfs2_journal_access(handle, new_dir, new_de_bh, | 1322 | status = ocfs2_journal_access(handle, new_dir, new_de_bh, |
1323 | OCFS2_JOURNAL_ACCESS_WRITE); | 1323 | OCFS2_JOURNAL_ACCESS_WRITE); |
1324 | if (status < 0) { | 1324 | if (status < 0) { |
1325 | mlog_errno(status); | 1325 | mlog_errno(status); |
1326 | goto bail; | 1326 | goto bail; |
1327 | } | 1327 | } |
1328 | new_de->inode = cpu_to_le64(OCFS2_I(old_inode)->ip_blkno); | 1328 | new_de->inode = cpu_to_le64(OCFS2_I(old_inode)->ip_blkno); |
1329 | new_de->file_type = old_de->file_type; | 1329 | new_de->file_type = old_de->file_type; |
1330 | new_dir->i_version++; | 1330 | new_dir->i_version++; |
1331 | status = ocfs2_journal_dirty(handle, new_de_bh); | 1331 | status = ocfs2_journal_dirty(handle, new_de_bh); |
1332 | if (status < 0) { | 1332 | if (status < 0) { |
1333 | mlog_errno(status); | 1333 | mlog_errno(status); |
1334 | goto bail; | 1334 | goto bail; |
1335 | } | 1335 | } |
1336 | 1336 | ||
1337 | if (S_ISDIR(new_inode->i_mode)) | 1337 | if (S_ISDIR(new_inode->i_mode)) |
1338 | newfe->i_links_count = 0; | 1338 | newfe->i_links_count = 0; |
1339 | else | 1339 | else |
1340 | le16_add_cpu(&newfe->i_links_count, -1); | 1340 | le16_add_cpu(&newfe->i_links_count, -1); |
1341 | 1341 | ||
1342 | status = ocfs2_journal_dirty(handle, newfe_bh); | 1342 | status = ocfs2_journal_dirty(handle, newfe_bh); |
1343 | if (status < 0) { | 1343 | if (status < 0) { |
1344 | mlog_errno(status); | 1344 | mlog_errno(status); |
1345 | goto bail; | 1345 | goto bail; |
1346 | } | 1346 | } |
1347 | } else { | 1347 | } else { |
1348 | /* if the name was not found in new_dir, add it now */ | 1348 | /* if the name was not found in new_dir, add it now */ |
1349 | status = ocfs2_add_entry(handle, new_dentry, old_inode, | 1349 | status = ocfs2_add_entry(handle, new_dentry, old_inode, |
1350 | OCFS2_I(old_inode)->ip_blkno, | 1350 | OCFS2_I(old_inode)->ip_blkno, |
1351 | new_dir_bh, insert_entry_bh); | 1351 | new_dir_bh, insert_entry_bh); |
1352 | } | 1352 | } |
1353 | 1353 | ||
1354 | old_inode->i_ctime = CURRENT_TIME; | 1354 | old_inode->i_ctime = CURRENT_TIME; |
1355 | mark_inode_dirty(old_inode); | 1355 | mark_inode_dirty(old_inode); |
1356 | ocfs2_mark_inode_dirty(handle, old_inode, old_inode_bh); | 1356 | ocfs2_mark_inode_dirty(handle, old_inode, old_inode_bh); |
1357 | 1357 | ||
1358 | /* now that the name has been added to new_dir, remove the old name */ | 1358 | /* now that the name has been added to new_dir, remove the old name */ |
1359 | status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh); | 1359 | status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh); |
1360 | if (status < 0) { | 1360 | if (status < 0) { |
1361 | mlog_errno(status); | 1361 | mlog_errno(status); |
1362 | goto bail; | 1362 | goto bail; |
1363 | } | 1363 | } |
1364 | 1364 | ||
1365 | if (new_inode) { | 1365 | if (new_inode) { |
1366 | new_inode->i_nlink--; | 1366 | new_inode->i_nlink--; |
1367 | new_inode->i_ctime = CURRENT_TIME; | 1367 | new_inode->i_ctime = CURRENT_TIME; |
1368 | } | 1368 | } |
1369 | old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; | 1369 | old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; |
1370 | if (old_inode_de_bh) { | 1370 | if (old_inode_de_bh) { |
1371 | status = ocfs2_journal_access(handle, old_inode, | 1371 | status = ocfs2_journal_access(handle, old_inode, |
1372 | old_inode_de_bh, | 1372 | old_inode_de_bh, |
1373 | OCFS2_JOURNAL_ACCESS_WRITE); | 1373 | OCFS2_JOURNAL_ACCESS_WRITE); |
1374 | PARENT_INO(old_inode_de_bh->b_data) = | 1374 | PARENT_INO(old_inode_de_bh->b_data) = |
1375 | cpu_to_le64(OCFS2_I(new_dir)->ip_blkno); | 1375 | cpu_to_le64(OCFS2_I(new_dir)->ip_blkno); |
1376 | status = ocfs2_journal_dirty(handle, old_inode_de_bh); | 1376 | status = ocfs2_journal_dirty(handle, old_inode_de_bh); |
1377 | old_dir->i_nlink--; | 1377 | old_dir->i_nlink--; |
1378 | if (new_inode) { | 1378 | if (new_inode) { |
1379 | new_inode->i_nlink--; | 1379 | new_inode->i_nlink--; |
1380 | } else { | 1380 | } else { |
1381 | inc_nlink(new_dir); | 1381 | inc_nlink(new_dir); |
1382 | mark_inode_dirty(new_dir); | 1382 | mark_inode_dirty(new_dir); |
1383 | } | 1383 | } |
1384 | } | 1384 | } |
1385 | mark_inode_dirty(old_dir); | 1385 | mark_inode_dirty(old_dir); |
1386 | ocfs2_mark_inode_dirty(handle, old_dir, old_dir_bh); | 1386 | ocfs2_mark_inode_dirty(handle, old_dir, old_dir_bh); |
1387 | if (new_inode) { | 1387 | if (new_inode) { |
1388 | mark_inode_dirty(new_inode); | 1388 | mark_inode_dirty(new_inode); |
1389 | ocfs2_mark_inode_dirty(handle, new_inode, newfe_bh); | 1389 | ocfs2_mark_inode_dirty(handle, new_inode, newfe_bh); |
1390 | } | 1390 | } |
1391 | 1391 | ||
1392 | if (old_dir != new_dir) { | 1392 | if (old_dir != new_dir) { |
1393 | /* Keep the same times on both directories.*/ | 1393 | /* Keep the same times on both directories.*/ |
1394 | new_dir->i_ctime = new_dir->i_mtime = old_dir->i_ctime; | 1394 | new_dir->i_ctime = new_dir->i_mtime = old_dir->i_ctime; |
1395 | 1395 | ||
1396 | /* | 1396 | /* |
1397 | * This will also pick up the i_nlink change from the | 1397 | * This will also pick up the i_nlink change from the |
1398 | * block above. | 1398 | * block above. |
1399 | */ | 1399 | */ |
1400 | ocfs2_mark_inode_dirty(handle, new_dir, new_dir_bh); | 1400 | ocfs2_mark_inode_dirty(handle, new_dir, new_dir_bh); |
1401 | } | 1401 | } |
1402 | 1402 | ||
1403 | if (old_dir_nlink != old_dir->i_nlink) { | 1403 | if (old_dir_nlink != old_dir->i_nlink) { |
1404 | if (!old_dir_bh) { | 1404 | if (!old_dir_bh) { |
1405 | mlog(ML_ERROR, "need to change nlink for old dir " | 1405 | mlog(ML_ERROR, "need to change nlink for old dir " |
1406 | "%llu from %d to %d but bh is NULL!\n", | 1406 | "%llu from %d to %d but bh is NULL!\n", |
1407 | (unsigned long long)OCFS2_I(old_dir)->ip_blkno, | 1407 | (unsigned long long)OCFS2_I(old_dir)->ip_blkno, |
1408 | (int)old_dir_nlink, old_dir->i_nlink); | 1408 | (int)old_dir_nlink, old_dir->i_nlink); |
1409 | } else { | 1409 | } else { |
1410 | struct ocfs2_dinode *fe; | 1410 | struct ocfs2_dinode *fe; |
1411 | status = ocfs2_journal_access(handle, old_dir, | 1411 | status = ocfs2_journal_access(handle, old_dir, |
1412 | old_dir_bh, | 1412 | old_dir_bh, |
1413 | OCFS2_JOURNAL_ACCESS_WRITE); | 1413 | OCFS2_JOURNAL_ACCESS_WRITE); |
1414 | fe = (struct ocfs2_dinode *) old_dir_bh->b_data; | 1414 | fe = (struct ocfs2_dinode *) old_dir_bh->b_data; |
1415 | fe->i_links_count = cpu_to_le16(old_dir->i_nlink); | 1415 | fe->i_links_count = cpu_to_le16(old_dir->i_nlink); |
1416 | status = ocfs2_journal_dirty(handle, old_dir_bh); | 1416 | status = ocfs2_journal_dirty(handle, old_dir_bh); |
1417 | } | 1417 | } |
1418 | } | 1418 | } |
1419 | 1419 | ||
1420 | ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir); | 1420 | ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir); |
1421 | status = 0; | 1421 | status = 0; |
1422 | bail: | 1422 | bail: |
1423 | if (rename_lock) | 1423 | if (rename_lock) |
1424 | ocfs2_rename_unlock(osb); | 1424 | ocfs2_rename_unlock(osb); |
1425 | 1425 | ||
1426 | if (handle) | 1426 | if (handle) |
1427 | ocfs2_commit_trans(osb, handle); | 1427 | ocfs2_commit_trans(osb, handle); |
1428 | 1428 | ||
1429 | if (parents_locked) | 1429 | if (parents_locked) |
1430 | ocfs2_double_unlock(old_dir, new_dir); | 1430 | ocfs2_double_unlock(old_dir, new_dir); |
1431 | 1431 | ||
1432 | if (old_child_locked) | 1432 | if (old_child_locked) |
1433 | ocfs2_meta_unlock(old_inode, 1); | 1433 | ocfs2_meta_unlock(old_inode, 1); |
1434 | 1434 | ||
1435 | if (new_child_locked) | 1435 | if (new_child_locked) |
1436 | ocfs2_meta_unlock(new_inode, 1); | 1436 | ocfs2_meta_unlock(new_inode, 1); |
1437 | 1437 | ||
1438 | if (orphan_dir) { | 1438 | if (orphan_dir) { |
1439 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ | 1439 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ |
1440 | ocfs2_meta_unlock(orphan_dir, 1); | 1440 | ocfs2_meta_unlock(orphan_dir, 1); |
1441 | mutex_unlock(&orphan_dir->i_mutex); | 1441 | mutex_unlock(&orphan_dir->i_mutex); |
1442 | iput(orphan_dir); | 1442 | iput(orphan_dir); |
1443 | } | 1443 | } |
1444 | 1444 | ||
1445 | if (new_inode) | 1445 | if (new_inode) |
1446 | sync_mapping_buffers(old_inode->i_mapping); | 1446 | sync_mapping_buffers(old_inode->i_mapping); |
1447 | 1447 | ||
1448 | if (new_inode) | 1448 | if (new_inode) |
1449 | iput(new_inode); | 1449 | iput(new_inode); |
1450 | if (newfe_bh) | 1450 | if (newfe_bh) |
1451 | brelse(newfe_bh); | 1451 | brelse(newfe_bh); |
1452 | if (old_inode_bh) | 1452 | if (old_inode_bh) |
1453 | brelse(old_inode_bh); | 1453 | brelse(old_inode_bh); |
1454 | if (old_dir_bh) | 1454 | if (old_dir_bh) |
1455 | brelse(old_dir_bh); | 1455 | brelse(old_dir_bh); |
1456 | if (new_dir_bh) | 1456 | if (new_dir_bh) |
1457 | brelse(new_dir_bh); | 1457 | brelse(new_dir_bh); |
1458 | if (new_de_bh) | 1458 | if (new_de_bh) |
1459 | brelse(new_de_bh); | 1459 | brelse(new_de_bh); |
1460 | if (old_de_bh) | 1460 | if (old_de_bh) |
1461 | brelse(old_de_bh); | 1461 | brelse(old_de_bh); |
1462 | if (old_inode_de_bh) | 1462 | if (old_inode_de_bh) |
1463 | brelse(old_inode_de_bh); | 1463 | brelse(old_inode_de_bh); |
1464 | if (orphan_entry_bh) | 1464 | if (orphan_entry_bh) |
1465 | brelse(orphan_entry_bh); | 1465 | brelse(orphan_entry_bh); |
1466 | if (insert_entry_bh) | 1466 | if (insert_entry_bh) |
1467 | brelse(insert_entry_bh); | 1467 | brelse(insert_entry_bh); |
1468 | 1468 | ||
1469 | mlog_exit(status); | 1469 | mlog_exit(status); |
1470 | 1470 | ||
1471 | return status; | 1471 | return status; |
1472 | } | 1472 | } |
1473 | 1473 | ||
1474 | /* | 1474 | /* |
1475 | * we expect i_size = strlen(symname). Copy symname into the file | 1475 | * we expect i_size = strlen(symname). Copy symname into the file |
1476 | * data, including the null terminator. | 1476 | * data, including the null terminator. |
1477 | */ | 1477 | */ |
1478 | static int ocfs2_create_symlink_data(struct ocfs2_super *osb, | 1478 | static int ocfs2_create_symlink_data(struct ocfs2_super *osb, |
1479 | handle_t *handle, | 1479 | handle_t *handle, |
1480 | struct inode *inode, | 1480 | struct inode *inode, |
1481 | const char *symname) | 1481 | const char *symname) |
1482 | { | 1482 | { |
1483 | struct buffer_head **bhs = NULL; | 1483 | struct buffer_head **bhs = NULL; |
1484 | const char *c; | 1484 | const char *c; |
1485 | struct super_block *sb = osb->sb; | 1485 | struct super_block *sb = osb->sb; |
1486 | u64 p_blkno; | 1486 | u64 p_blkno, p_blocks; |
1487 | int p_blocks; | ||
1488 | int virtual, blocks, status, i, bytes_left; | 1487 | int virtual, blocks, status, i, bytes_left; |
1489 | 1488 | ||
1490 | bytes_left = i_size_read(inode) + 1; | 1489 | bytes_left = i_size_read(inode) + 1; |
1491 | /* we can't trust i_blocks because we're actually going to | 1490 | /* we can't trust i_blocks because we're actually going to |
1492 | * write i_size + 1 bytes. */ | 1491 | * write i_size + 1 bytes. */ |
1493 | blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits; | 1492 | blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits; |
1494 | 1493 | ||
1495 | mlog_entry("i_blocks = %llu, i_size = %llu, blocks = %d\n", | 1494 | mlog_entry("i_blocks = %llu, i_size = %llu, blocks = %d\n", |
1496 | (unsigned long long)inode->i_blocks, | 1495 | (unsigned long long)inode->i_blocks, |
1497 | i_size_read(inode), blocks); | 1496 | i_size_read(inode), blocks); |
1498 | 1497 | ||
1499 | /* Sanity check -- make sure we're going to fit. */ | 1498 | /* Sanity check -- make sure we're going to fit. */ |
1500 | if (bytes_left > | 1499 | if (bytes_left > |
1501 | ocfs2_clusters_to_bytes(sb, OCFS2_I(inode)->ip_clusters)) { | 1500 | ocfs2_clusters_to_bytes(sb, OCFS2_I(inode)->ip_clusters)) { |
1502 | status = -EIO; | 1501 | status = -EIO; |
1503 | mlog_errno(status); | 1502 | mlog_errno(status); |
1504 | goto bail; | 1503 | goto bail; |
1505 | } | 1504 | } |
1506 | 1505 | ||
1507 | bhs = kcalloc(blocks, sizeof(struct buffer_head *), GFP_KERNEL); | 1506 | bhs = kcalloc(blocks, sizeof(struct buffer_head *), GFP_KERNEL); |
1508 | if (!bhs) { | 1507 | if (!bhs) { |
1509 | status = -ENOMEM; | 1508 | status = -ENOMEM; |
1510 | mlog_errno(status); | 1509 | mlog_errno(status); |
1511 | goto bail; | 1510 | goto bail; |
1512 | } | 1511 | } |
1513 | 1512 | ||
1514 | status = ocfs2_extent_map_get_blocks(inode, 0, &p_blkno, &p_blocks, | 1513 | status = ocfs2_extent_map_get_blocks(inode, 0, &p_blkno, &p_blocks, |
1515 | NULL); | 1514 | NULL); |
1516 | if (status < 0) { | 1515 | if (status < 0) { |
1517 | mlog_errno(status); | 1516 | mlog_errno(status); |
1518 | goto bail; | 1517 | goto bail; |
1519 | } | 1518 | } |
1520 | 1519 | ||
1521 | /* links can never be larger than one cluster so we know this | 1520 | /* links can never be larger than one cluster so we know this |
1522 | * is all going to be contiguous, but do a sanity check | 1521 | * is all going to be contiguous, but do a sanity check |
1523 | * anyway. */ | 1522 | * anyway. */ |
1524 | if ((p_blocks << sb->s_blocksize_bits) < bytes_left) { | 1523 | if ((p_blocks << sb->s_blocksize_bits) < bytes_left) { |
1525 | status = -EIO; | 1524 | status = -EIO; |
1526 | mlog_errno(status); | 1525 | mlog_errno(status); |
1527 | goto bail; | 1526 | goto bail; |
1528 | } | 1527 | } |
1529 | 1528 | ||
1530 | virtual = 0; | 1529 | virtual = 0; |
1531 | while(bytes_left > 0) { | 1530 | while(bytes_left > 0) { |
1532 | c = &symname[virtual * sb->s_blocksize]; | 1531 | c = &symname[virtual * sb->s_blocksize]; |
1533 | 1532 | ||
1534 | bhs[virtual] = sb_getblk(sb, p_blkno); | 1533 | bhs[virtual] = sb_getblk(sb, p_blkno); |
1535 | if (!bhs[virtual]) { | 1534 | if (!bhs[virtual]) { |
1536 | status = -ENOMEM; | 1535 | status = -ENOMEM; |
1537 | mlog_errno(status); | 1536 | mlog_errno(status); |
1538 | goto bail; | 1537 | goto bail; |
1539 | } | 1538 | } |
1540 | ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]); | 1539 | ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]); |
1541 | 1540 | ||
1542 | status = ocfs2_journal_access(handle, inode, bhs[virtual], | 1541 | status = ocfs2_journal_access(handle, inode, bhs[virtual], |
1543 | OCFS2_JOURNAL_ACCESS_CREATE); | 1542 | OCFS2_JOURNAL_ACCESS_CREATE); |
1544 | if (status < 0) { | 1543 | if (status < 0) { |
1545 | mlog_errno(status); | 1544 | mlog_errno(status); |
1546 | goto bail; | 1545 | goto bail; |
1547 | } | 1546 | } |
1548 | 1547 | ||
1549 | memset(bhs[virtual]->b_data, 0, sb->s_blocksize); | 1548 | memset(bhs[virtual]->b_data, 0, sb->s_blocksize); |
1550 | 1549 | ||
1551 | memcpy(bhs[virtual]->b_data, c, | 1550 | memcpy(bhs[virtual]->b_data, c, |
1552 | (bytes_left > sb->s_blocksize) ? sb->s_blocksize : | 1551 | (bytes_left > sb->s_blocksize) ? sb->s_blocksize : |
1553 | bytes_left); | 1552 | bytes_left); |
1554 | 1553 | ||
1555 | status = ocfs2_journal_dirty(handle, bhs[virtual]); | 1554 | status = ocfs2_journal_dirty(handle, bhs[virtual]); |
1556 | if (status < 0) { | 1555 | if (status < 0) { |
1557 | mlog_errno(status); | 1556 | mlog_errno(status); |
1558 | goto bail; | 1557 | goto bail; |
1559 | } | 1558 | } |
1560 | 1559 | ||
1561 | virtual++; | 1560 | virtual++; |
1562 | p_blkno++; | 1561 | p_blkno++; |
1563 | bytes_left -= sb->s_blocksize; | 1562 | bytes_left -= sb->s_blocksize; |
1564 | } | 1563 | } |
1565 | 1564 | ||
1566 | status = 0; | 1565 | status = 0; |
1567 | bail: | 1566 | bail: |
1568 | 1567 | ||
1569 | if (bhs) { | 1568 | if (bhs) { |
1570 | for(i = 0; i < blocks; i++) | 1569 | for(i = 0; i < blocks; i++) |
1571 | if (bhs[i]) | 1570 | if (bhs[i]) |
1572 | brelse(bhs[i]); | 1571 | brelse(bhs[i]); |
1573 | kfree(bhs); | 1572 | kfree(bhs); |
1574 | } | 1573 | } |
1575 | 1574 | ||
1576 | mlog_exit(status); | 1575 | mlog_exit(status); |
1577 | return status; | 1576 | return status; |
1578 | } | 1577 | } |
1579 | 1578 | ||
1580 | static int ocfs2_symlink(struct inode *dir, | 1579 | static int ocfs2_symlink(struct inode *dir, |
1581 | struct dentry *dentry, | 1580 | struct dentry *dentry, |
1582 | const char *symname) | 1581 | const char *symname) |
1583 | { | 1582 | { |
1584 | int status, l, credits; | 1583 | int status, l, credits; |
1585 | u64 newsize; | 1584 | u64 newsize; |
1586 | struct ocfs2_super *osb = NULL; | 1585 | struct ocfs2_super *osb = NULL; |
1587 | struct inode *inode = NULL; | 1586 | struct inode *inode = NULL; |
1588 | struct super_block *sb; | 1587 | struct super_block *sb; |
1589 | struct buffer_head *new_fe_bh = NULL; | 1588 | struct buffer_head *new_fe_bh = NULL; |
1590 | struct buffer_head *de_bh = NULL; | 1589 | struct buffer_head *de_bh = NULL; |
1591 | struct buffer_head *parent_fe_bh = NULL; | 1590 | struct buffer_head *parent_fe_bh = NULL; |
1592 | struct ocfs2_dinode *fe = NULL; | 1591 | struct ocfs2_dinode *fe = NULL; |
1593 | struct ocfs2_dinode *dirfe; | 1592 | struct ocfs2_dinode *dirfe; |
1594 | handle_t *handle = NULL; | 1593 | handle_t *handle = NULL; |
1595 | struct ocfs2_alloc_context *inode_ac = NULL; | 1594 | struct ocfs2_alloc_context *inode_ac = NULL; |
1596 | struct ocfs2_alloc_context *data_ac = NULL; | 1595 | struct ocfs2_alloc_context *data_ac = NULL; |
1597 | 1596 | ||
1598 | mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, | 1597 | mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, |
1599 | dentry, symname, dentry->d_name.len, dentry->d_name.name); | 1598 | dentry, symname, dentry->d_name.len, dentry->d_name.name); |
1600 | 1599 | ||
1601 | sb = dir->i_sb; | 1600 | sb = dir->i_sb; |
1602 | osb = OCFS2_SB(sb); | 1601 | osb = OCFS2_SB(sb); |
1603 | 1602 | ||
1604 | l = strlen(symname) + 1; | 1603 | l = strlen(symname) + 1; |
1605 | 1604 | ||
1606 | credits = ocfs2_calc_symlink_credits(sb); | 1605 | credits = ocfs2_calc_symlink_credits(sb); |
1607 | 1606 | ||
1608 | /* lock the parent directory */ | 1607 | /* lock the parent directory */ |
1609 | status = ocfs2_meta_lock(dir, &parent_fe_bh, 1); | 1608 | status = ocfs2_meta_lock(dir, &parent_fe_bh, 1); |
1610 | if (status < 0) { | 1609 | if (status < 0) { |
1611 | if (status != -ENOENT) | 1610 | if (status != -ENOENT) |
1612 | mlog_errno(status); | 1611 | mlog_errno(status); |
1613 | return status; | 1612 | return status; |
1614 | } | 1613 | } |
1615 | 1614 | ||
1616 | dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data; | 1615 | dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data; |
1617 | if (!dirfe->i_links_count) { | 1616 | if (!dirfe->i_links_count) { |
1618 | /* can't make a file in a deleted directory. */ | 1617 | /* can't make a file in a deleted directory. */ |
1619 | status = -ENOENT; | 1618 | status = -ENOENT; |
1620 | goto bail; | 1619 | goto bail; |
1621 | } | 1620 | } |
1622 | 1621 | ||
1623 | status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name, | 1622 | status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name, |
1624 | dentry->d_name.len); | 1623 | dentry->d_name.len); |
1625 | if (status) | 1624 | if (status) |
1626 | goto bail; | 1625 | goto bail; |
1627 | 1626 | ||
1628 | status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh, | 1627 | status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh, |
1629 | dentry->d_name.name, | 1628 | dentry->d_name.name, |
1630 | dentry->d_name.len, &de_bh); | 1629 | dentry->d_name.len, &de_bh); |
1631 | if (status < 0) { | 1630 | if (status < 0) { |
1632 | mlog_errno(status); | 1631 | mlog_errno(status); |
1633 | goto bail; | 1632 | goto bail; |
1634 | } | 1633 | } |
1635 | 1634 | ||
1636 | status = ocfs2_reserve_new_inode(osb, &inode_ac); | 1635 | status = ocfs2_reserve_new_inode(osb, &inode_ac); |
1637 | if (status < 0) { | 1636 | if (status < 0) { |
1638 | if (status != -ENOSPC) | 1637 | if (status != -ENOSPC) |
1639 | mlog_errno(status); | 1638 | mlog_errno(status); |
1640 | goto bail; | 1639 | goto bail; |
1641 | } | 1640 | } |
1642 | 1641 | ||
1643 | /* don't reserve bitmap space for fast symlinks. */ | 1642 | /* don't reserve bitmap space for fast symlinks. */ |
1644 | if (l > ocfs2_fast_symlink_chars(sb)) { | 1643 | if (l > ocfs2_fast_symlink_chars(sb)) { |
1645 | status = ocfs2_reserve_clusters(osb, 1, &data_ac); | 1644 | status = ocfs2_reserve_clusters(osb, 1, &data_ac); |
1646 | if (status < 0) { | 1645 | if (status < 0) { |
1647 | if (status != -ENOSPC) | 1646 | if (status != -ENOSPC) |
1648 | mlog_errno(status); | 1647 | mlog_errno(status); |
1649 | goto bail; | 1648 | goto bail; |
1650 | } | 1649 | } |
1651 | } | 1650 | } |
1652 | 1651 | ||
1653 | handle = ocfs2_start_trans(osb, credits); | 1652 | handle = ocfs2_start_trans(osb, credits); |
1654 | if (IS_ERR(handle)) { | 1653 | if (IS_ERR(handle)) { |
1655 | status = PTR_ERR(handle); | 1654 | status = PTR_ERR(handle); |
1656 | handle = NULL; | 1655 | handle = NULL; |
1657 | mlog_errno(status); | 1656 | mlog_errno(status); |
1658 | goto bail; | 1657 | goto bail; |
1659 | } | 1658 | } |
1660 | 1659 | ||
1661 | status = ocfs2_mknod_locked(osb, dir, dentry, | 1660 | status = ocfs2_mknod_locked(osb, dir, dentry, |
1662 | S_IFLNK | S_IRWXUGO, 0, | 1661 | S_IFLNK | S_IRWXUGO, 0, |
1663 | &new_fe_bh, parent_fe_bh, handle, | 1662 | &new_fe_bh, parent_fe_bh, handle, |
1664 | &inode, inode_ac); | 1663 | &inode, inode_ac); |
1665 | if (status < 0) { | 1664 | if (status < 0) { |
1666 | mlog_errno(status); | 1665 | mlog_errno(status); |
1667 | goto bail; | 1666 | goto bail; |
1668 | } | 1667 | } |
1669 | 1668 | ||
1670 | fe = (struct ocfs2_dinode *) new_fe_bh->b_data; | 1669 | fe = (struct ocfs2_dinode *) new_fe_bh->b_data; |
1671 | inode->i_rdev = 0; | 1670 | inode->i_rdev = 0; |
1672 | newsize = l - 1; | 1671 | newsize = l - 1; |
1673 | if (l > ocfs2_fast_symlink_chars(sb)) { | 1672 | if (l > ocfs2_fast_symlink_chars(sb)) { |
1674 | u32 offset = 0; | 1673 | u32 offset = 0; |
1675 | 1674 | ||
1676 | inode->i_op = &ocfs2_symlink_inode_operations; | 1675 | inode->i_op = &ocfs2_symlink_inode_operations; |
1677 | status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, | 1676 | status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, |
1678 | new_fe_bh, | 1677 | new_fe_bh, |
1679 | handle, data_ac, NULL, | 1678 | handle, data_ac, NULL, |
1680 | NULL); | 1679 | NULL); |
1681 | if (status < 0) { | 1680 | if (status < 0) { |
1682 | if (status != -ENOSPC && status != -EINTR) { | 1681 | if (status != -ENOSPC && status != -EINTR) { |
1683 | mlog(ML_ERROR, | 1682 | mlog(ML_ERROR, |
1684 | "Failed to extend file to %llu\n", | 1683 | "Failed to extend file to %llu\n", |
1685 | (unsigned long long)newsize); | 1684 | (unsigned long long)newsize); |
1686 | mlog_errno(status); | 1685 | mlog_errno(status); |
1687 | status = -ENOSPC; | 1686 | status = -ENOSPC; |
1688 | } | 1687 | } |
1689 | goto bail; | 1688 | goto bail; |
1690 | } | 1689 | } |
1691 | i_size_write(inode, newsize); | 1690 | i_size_write(inode, newsize); |
1692 | inode->i_blocks = ocfs2_align_bytes_to_sectors(newsize); | 1691 | inode->i_blocks = ocfs2_align_bytes_to_sectors(newsize); |
1693 | } else { | 1692 | } else { |
1694 | inode->i_op = &ocfs2_fast_symlink_inode_operations; | 1693 | inode->i_op = &ocfs2_fast_symlink_inode_operations; |
1695 | memcpy((char *) fe->id2.i_symlink, symname, l); | 1694 | memcpy((char *) fe->id2.i_symlink, symname, l); |
1696 | i_size_write(inode, newsize); | 1695 | i_size_write(inode, newsize); |
1697 | inode->i_blocks = 0; | 1696 | inode->i_blocks = 0; |
1698 | } | 1697 | } |
1699 | 1698 | ||
1700 | status = ocfs2_mark_inode_dirty(handle, inode, new_fe_bh); | 1699 | status = ocfs2_mark_inode_dirty(handle, inode, new_fe_bh); |
1701 | if (status < 0) { | 1700 | if (status < 0) { |
1702 | mlog_errno(status); | 1701 | mlog_errno(status); |
1703 | goto bail; | 1702 | goto bail; |
1704 | } | 1703 | } |
1705 | 1704 | ||
1706 | if (!ocfs2_inode_is_fast_symlink(inode)) { | 1705 | if (!ocfs2_inode_is_fast_symlink(inode)) { |
1707 | status = ocfs2_create_symlink_data(osb, handle, inode, | 1706 | status = ocfs2_create_symlink_data(osb, handle, inode, |
1708 | symname); | 1707 | symname); |
1709 | if (status < 0) { | 1708 | if (status < 0) { |
1710 | mlog_errno(status); | 1709 | mlog_errno(status); |
1711 | goto bail; | 1710 | goto bail; |
1712 | } | 1711 | } |
1713 | } | 1712 | } |
1714 | 1713 | ||
1715 | status = ocfs2_add_entry(handle, dentry, inode, | 1714 | status = ocfs2_add_entry(handle, dentry, inode, |
1716 | le64_to_cpu(fe->i_blkno), parent_fe_bh, | 1715 | le64_to_cpu(fe->i_blkno), parent_fe_bh, |
1717 | de_bh); | 1716 | de_bh); |
1718 | if (status < 0) { | 1717 | if (status < 0) { |
1719 | mlog_errno(status); | 1718 | mlog_errno(status); |
1720 | goto bail; | 1719 | goto bail; |
1721 | } | 1720 | } |
1722 | 1721 | ||
1723 | status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); | 1722 | status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); |
1724 | if (status) { | 1723 | if (status) { |
1725 | mlog_errno(status); | 1724 | mlog_errno(status); |
1726 | goto bail; | 1725 | goto bail; |
1727 | } | 1726 | } |
1728 | 1727 | ||
1729 | insert_inode_hash(inode); | 1728 | insert_inode_hash(inode); |
1730 | dentry->d_op = &ocfs2_dentry_ops; | 1729 | dentry->d_op = &ocfs2_dentry_ops; |
1731 | d_instantiate(dentry, inode); | 1730 | d_instantiate(dentry, inode); |
1732 | bail: | 1731 | bail: |
1733 | if (handle) | 1732 | if (handle) |
1734 | ocfs2_commit_trans(osb, handle); | 1733 | ocfs2_commit_trans(osb, handle); |
1735 | 1734 | ||
1736 | ocfs2_meta_unlock(dir, 1); | 1735 | ocfs2_meta_unlock(dir, 1); |
1737 | 1736 | ||
1738 | if (new_fe_bh) | 1737 | if (new_fe_bh) |
1739 | brelse(new_fe_bh); | 1738 | brelse(new_fe_bh); |
1740 | if (parent_fe_bh) | 1739 | if (parent_fe_bh) |
1741 | brelse(parent_fe_bh); | 1740 | brelse(parent_fe_bh); |
1742 | if (de_bh) | 1741 | if (de_bh) |
1743 | brelse(de_bh); | 1742 | brelse(de_bh); |
1744 | if (inode_ac) | 1743 | if (inode_ac) |
1745 | ocfs2_free_alloc_context(inode_ac); | 1744 | ocfs2_free_alloc_context(inode_ac); |
1746 | if (data_ac) | 1745 | if (data_ac) |
1747 | ocfs2_free_alloc_context(data_ac); | 1746 | ocfs2_free_alloc_context(data_ac); |
1748 | if ((status < 0) && inode) | 1747 | if ((status < 0) && inode) |
1749 | iput(inode); | 1748 | iput(inode); |
1750 | 1749 | ||
1751 | mlog_exit(status); | 1750 | mlog_exit(status); |
1752 | 1751 | ||
1753 | return status; | 1752 | return status; |
1754 | } | 1753 | } |
1755 | 1754 | ||
1756 | int ocfs2_check_dir_entry(struct inode * dir, | 1755 | int ocfs2_check_dir_entry(struct inode * dir, |
1757 | struct ocfs2_dir_entry * de, | 1756 | struct ocfs2_dir_entry * de, |
1758 | struct buffer_head * bh, | 1757 | struct buffer_head * bh, |
1759 | unsigned long offset) | 1758 | unsigned long offset) |
1760 | { | 1759 | { |
1761 | const char *error_msg = NULL; | 1760 | const char *error_msg = NULL; |
1762 | const int rlen = le16_to_cpu(de->rec_len); | 1761 | const int rlen = le16_to_cpu(de->rec_len); |
1763 | 1762 | ||
1764 | if (rlen < OCFS2_DIR_REC_LEN(1)) | 1763 | if (rlen < OCFS2_DIR_REC_LEN(1)) |
1765 | error_msg = "rec_len is smaller than minimal"; | 1764 | error_msg = "rec_len is smaller than minimal"; |
1766 | else if (rlen % 4 != 0) | 1765 | else if (rlen % 4 != 0) |
1767 | error_msg = "rec_len % 4 != 0"; | 1766 | error_msg = "rec_len % 4 != 0"; |
1768 | else if (rlen < OCFS2_DIR_REC_LEN(de->name_len)) | 1767 | else if (rlen < OCFS2_DIR_REC_LEN(de->name_len)) |
1769 | error_msg = "rec_len is too small for name_len"; | 1768 | error_msg = "rec_len is too small for name_len"; |
1770 | else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) | 1769 | else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) |
1771 | error_msg = "directory entry across blocks"; | 1770 | error_msg = "directory entry across blocks"; |
1772 | 1771 | ||
1773 | if (error_msg != NULL) | 1772 | if (error_msg != NULL) |
1774 | mlog(ML_ERROR, "bad entry in directory #%llu: %s - " | 1773 | mlog(ML_ERROR, "bad entry in directory #%llu: %s - " |
1775 | "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n", | 1774 | "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n", |
1776 | (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg, | 1775 | (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg, |
1777 | offset, (unsigned long long)le64_to_cpu(de->inode), rlen, | 1776 | offset, (unsigned long long)le64_to_cpu(de->inode), rlen, |
1778 | de->name_len); | 1777 | de->name_len); |
1779 | return error_msg == NULL ? 1 : 0; | 1778 | return error_msg == NULL ? 1 : 0; |
1780 | } | 1779 | } |
1781 | 1780 | ||
1782 | /* we don't always have a dentry for what we want to add, so people | 1781 | /* we don't always have a dentry for what we want to add, so people |
1783 | * like orphan dir can call this instead. | 1782 | * like orphan dir can call this instead. |
1784 | * | 1783 | * |
1785 | * If you pass me insert_bh, I'll skip the search of the other dir | 1784 | * If you pass me insert_bh, I'll skip the search of the other dir |
1786 | * blocks and put the record in there. | 1785 | * blocks and put the record in there. |
1787 | */ | 1786 | */ |
1788 | static int __ocfs2_add_entry(handle_t *handle, | 1787 | static int __ocfs2_add_entry(handle_t *handle, |
1789 | struct inode *dir, | 1788 | struct inode *dir, |
1790 | const char *name, int namelen, | 1789 | const char *name, int namelen, |
1791 | struct inode *inode, u64 blkno, | 1790 | struct inode *inode, u64 blkno, |
1792 | struct buffer_head *parent_fe_bh, | 1791 | struct buffer_head *parent_fe_bh, |
1793 | struct buffer_head *insert_bh) | 1792 | struct buffer_head *insert_bh) |
1794 | { | 1793 | { |
1795 | unsigned long offset; | 1794 | unsigned long offset; |
1796 | unsigned short rec_len; | 1795 | unsigned short rec_len; |
1797 | struct ocfs2_dir_entry *de, *de1; | 1796 | struct ocfs2_dir_entry *de, *de1; |
1798 | struct super_block *sb; | 1797 | struct super_block *sb; |
1799 | int retval, status; | 1798 | int retval, status; |
1800 | 1799 | ||
1801 | mlog_entry_void(); | 1800 | mlog_entry_void(); |
1802 | 1801 | ||
1803 | sb = dir->i_sb; | 1802 | sb = dir->i_sb; |
1804 | 1803 | ||
1805 | if (!namelen) | 1804 | if (!namelen) |
1806 | return -EINVAL; | 1805 | return -EINVAL; |
1807 | 1806 | ||
1808 | rec_len = OCFS2_DIR_REC_LEN(namelen); | 1807 | rec_len = OCFS2_DIR_REC_LEN(namelen); |
1809 | offset = 0; | 1808 | offset = 0; |
1810 | de = (struct ocfs2_dir_entry *) insert_bh->b_data; | 1809 | de = (struct ocfs2_dir_entry *) insert_bh->b_data; |
1811 | while (1) { | 1810 | while (1) { |
1812 | BUG_ON((char *)de >= sb->s_blocksize + insert_bh->b_data); | 1811 | BUG_ON((char *)de >= sb->s_blocksize + insert_bh->b_data); |
1813 | /* These checks should've already been passed by the | 1812 | /* These checks should've already been passed by the |
1814 | * prepare function, but I guess we can leave them | 1813 | * prepare function, but I guess we can leave them |
1815 | * here anyway. */ | 1814 | * here anyway. */ |
1816 | if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) { | 1815 | if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) { |
1817 | retval = -ENOENT; | 1816 | retval = -ENOENT; |
1818 | goto bail; | 1817 | goto bail; |
1819 | } | 1818 | } |
1820 | if (ocfs2_match(namelen, name, de)) { | 1819 | if (ocfs2_match(namelen, name, de)) { |
1821 | retval = -EEXIST; | 1820 | retval = -EEXIST; |
1822 | goto bail; | 1821 | goto bail; |
1823 | } | 1822 | } |
1824 | if (((le64_to_cpu(de->inode) == 0) && | 1823 | if (((le64_to_cpu(de->inode) == 0) && |
1825 | (le16_to_cpu(de->rec_len) >= rec_len)) || | 1824 | (le16_to_cpu(de->rec_len) >= rec_len)) || |
1826 | (le16_to_cpu(de->rec_len) >= | 1825 | (le16_to_cpu(de->rec_len) >= |
1827 | (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) { | 1826 | (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) { |
1828 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 1827 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
1829 | retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh); | 1828 | retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh); |
1830 | if (retval < 0) { | 1829 | if (retval < 0) { |
1831 | mlog_errno(retval); | 1830 | mlog_errno(retval); |
1832 | goto bail; | 1831 | goto bail; |
1833 | } | 1832 | } |
1834 | 1833 | ||
1835 | status = ocfs2_journal_access(handle, dir, insert_bh, | 1834 | status = ocfs2_journal_access(handle, dir, insert_bh, |
1836 | OCFS2_JOURNAL_ACCESS_WRITE); | 1835 | OCFS2_JOURNAL_ACCESS_WRITE); |
1837 | /* By now the buffer is marked for journaling */ | 1836 | /* By now the buffer is marked for journaling */ |
1838 | offset += le16_to_cpu(de->rec_len); | 1837 | offset += le16_to_cpu(de->rec_len); |
1839 | if (le64_to_cpu(de->inode)) { | 1838 | if (le64_to_cpu(de->inode)) { |
1840 | de1 = (struct ocfs2_dir_entry *)((char *) de + | 1839 | de1 = (struct ocfs2_dir_entry *)((char *) de + |
1841 | OCFS2_DIR_REC_LEN(de->name_len)); | 1840 | OCFS2_DIR_REC_LEN(de->name_len)); |
1842 | de1->rec_len = | 1841 | de1->rec_len = |
1843 | cpu_to_le16(le16_to_cpu(de->rec_len) - | 1842 | cpu_to_le16(le16_to_cpu(de->rec_len) - |
1844 | OCFS2_DIR_REC_LEN(de->name_len)); | 1843 | OCFS2_DIR_REC_LEN(de->name_len)); |
1845 | de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len)); | 1844 | de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len)); |
1846 | de = de1; | 1845 | de = de1; |
1847 | } | 1846 | } |
1848 | de->file_type = OCFS2_FT_UNKNOWN; | 1847 | de->file_type = OCFS2_FT_UNKNOWN; |
1849 | if (blkno) { | 1848 | if (blkno) { |
1850 | de->inode = cpu_to_le64(blkno); | 1849 | de->inode = cpu_to_le64(blkno); |
1851 | ocfs2_set_de_type(de, inode->i_mode); | 1850 | ocfs2_set_de_type(de, inode->i_mode); |
1852 | } else | 1851 | } else |
1853 | de->inode = 0; | 1852 | de->inode = 0; |
1854 | de->name_len = namelen; | 1853 | de->name_len = namelen; |
1855 | memcpy(de->name, name, namelen); | 1854 | memcpy(de->name, name, namelen); |
1856 | 1855 | ||
1857 | dir->i_version++; | 1856 | dir->i_version++; |
1858 | status = ocfs2_journal_dirty(handle, insert_bh); | 1857 | status = ocfs2_journal_dirty(handle, insert_bh); |
1859 | retval = 0; | 1858 | retval = 0; |
1860 | goto bail; | 1859 | goto bail; |
1861 | } | 1860 | } |
1862 | offset += le16_to_cpu(de->rec_len); | 1861 | offset += le16_to_cpu(de->rec_len); |
1863 | de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len)); | 1862 | de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len)); |
1864 | } | 1863 | } |
1865 | 1864 | ||
1866 | /* when you think about it, the assert above should prevent us | 1865 | /* when you think about it, the assert above should prevent us |
1867 | * from ever getting here. */ | 1866 | * from ever getting here. */ |
1868 | retval = -ENOSPC; | 1867 | retval = -ENOSPC; |
1869 | bail: | 1868 | bail: |
1870 | 1869 | ||
1871 | mlog_exit(retval); | 1870 | mlog_exit(retval); |
1872 | return retval; | 1871 | return retval; |
1873 | } | 1872 | } |
1874 | 1873 | ||
1875 | 1874 | ||
1876 | /* | 1875 | /* |
1877 | * ocfs2_delete_entry deletes a directory entry by merging it with the | 1876 | * ocfs2_delete_entry deletes a directory entry by merging it with the |
1878 | * previous entry | 1877 | * previous entry |
1879 | */ | 1878 | */ |
1880 | static int ocfs2_delete_entry(handle_t *handle, | 1879 | static int ocfs2_delete_entry(handle_t *handle, |
1881 | struct inode *dir, | 1880 | struct inode *dir, |
1882 | struct ocfs2_dir_entry *de_del, | 1881 | struct ocfs2_dir_entry *de_del, |
1883 | struct buffer_head *bh) | 1882 | struct buffer_head *bh) |
1884 | { | 1883 | { |
1885 | struct ocfs2_dir_entry *de, *pde; | 1884 | struct ocfs2_dir_entry *de, *pde; |
1886 | int i, status = -ENOENT; | 1885 | int i, status = -ENOENT; |
1887 | 1886 | ||
1888 | mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh); | 1887 | mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh); |
1889 | 1888 | ||
1890 | i = 0; | 1889 | i = 0; |
1891 | pde = NULL; | 1890 | pde = NULL; |
1892 | de = (struct ocfs2_dir_entry *) bh->b_data; | 1891 | de = (struct ocfs2_dir_entry *) bh->b_data; |
1893 | while (i < bh->b_size) { | 1892 | while (i < bh->b_size) { |
1894 | if (!ocfs2_check_dir_entry(dir, de, bh, i)) { | 1893 | if (!ocfs2_check_dir_entry(dir, de, bh, i)) { |
1895 | status = -EIO; | 1894 | status = -EIO; |
1896 | mlog_errno(status); | 1895 | mlog_errno(status); |
1897 | goto bail; | 1896 | goto bail; |
1898 | } | 1897 | } |
1899 | if (de == de_del) { | 1898 | if (de == de_del) { |
1900 | status = ocfs2_journal_access(handle, dir, bh, | 1899 | status = ocfs2_journal_access(handle, dir, bh, |
1901 | OCFS2_JOURNAL_ACCESS_WRITE); | 1900 | OCFS2_JOURNAL_ACCESS_WRITE); |
1902 | if (status < 0) { | 1901 | if (status < 0) { |
1903 | status = -EIO; | 1902 | status = -EIO; |
1904 | mlog_errno(status); | 1903 | mlog_errno(status); |
1905 | goto bail; | 1904 | goto bail; |
1906 | } | 1905 | } |
1907 | if (pde) | 1906 | if (pde) |
1908 | pde->rec_len = | 1907 | pde->rec_len = |
1909 | cpu_to_le16(le16_to_cpu(pde->rec_len) + | 1908 | cpu_to_le16(le16_to_cpu(pde->rec_len) + |
1910 | le16_to_cpu(de->rec_len)); | 1909 | le16_to_cpu(de->rec_len)); |
1911 | else | 1910 | else |
1912 | de->inode = 0; | 1911 | de->inode = 0; |
1913 | dir->i_version++; | 1912 | dir->i_version++; |
1914 | status = ocfs2_journal_dirty(handle, bh); | 1913 | status = ocfs2_journal_dirty(handle, bh); |
1915 | goto bail; | 1914 | goto bail; |
1916 | } | 1915 | } |
1917 | i += le16_to_cpu(de->rec_len); | 1916 | i += le16_to_cpu(de->rec_len); |
1918 | pde = de; | 1917 | pde = de; |
1919 | de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len)); | 1918 | de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len)); |
1920 | } | 1919 | } |
1921 | bail: | 1920 | bail: |
1922 | mlog_exit(status); | 1921 | mlog_exit(status); |
1923 | return status; | 1922 | return status; |
1924 | } | 1923 | } |
1925 | 1924 | ||
1926 | /* | 1925 | /* |
1927 | * Returns 0 if not found, -1 on failure, and 1 on success | 1926 | * Returns 0 if not found, -1 on failure, and 1 on success |
1928 | */ | 1927 | */ |
1929 | static int inline ocfs2_search_dirblock(struct buffer_head *bh, | 1928 | static int inline ocfs2_search_dirblock(struct buffer_head *bh, |
1930 | struct inode *dir, | 1929 | struct inode *dir, |
1931 | const char *name, int namelen, | 1930 | const char *name, int namelen, |
1932 | unsigned long offset, | 1931 | unsigned long offset, |
1933 | struct ocfs2_dir_entry **res_dir) | 1932 | struct ocfs2_dir_entry **res_dir) |
1934 | { | 1933 | { |
1935 | struct ocfs2_dir_entry *de; | 1934 | struct ocfs2_dir_entry *de; |
1936 | char *dlimit, *de_buf; | 1935 | char *dlimit, *de_buf; |
1937 | int de_len; | 1936 | int de_len; |
1938 | int ret = 0; | 1937 | int ret = 0; |
1939 | 1938 | ||
1940 | mlog_entry_void(); | 1939 | mlog_entry_void(); |
1941 | 1940 | ||
1942 | de_buf = bh->b_data; | 1941 | de_buf = bh->b_data; |
1943 | dlimit = de_buf + dir->i_sb->s_blocksize; | 1942 | dlimit = de_buf + dir->i_sb->s_blocksize; |
1944 | 1943 | ||
1945 | while (de_buf < dlimit) { | 1944 | while (de_buf < dlimit) { |
1946 | /* this code is executed quadratically often */ | 1945 | /* this code is executed quadratically often */ |
1947 | /* do minimal checking `by hand' */ | 1946 | /* do minimal checking `by hand' */ |
1948 | 1947 | ||
1949 | de = (struct ocfs2_dir_entry *) de_buf; | 1948 | de = (struct ocfs2_dir_entry *) de_buf; |
1950 | 1949 | ||
1951 | if (de_buf + namelen <= dlimit && | 1950 | if (de_buf + namelen <= dlimit && |
1952 | ocfs2_match(namelen, name, de)) { | 1951 | ocfs2_match(namelen, name, de)) { |
1953 | /* found a match - just to be sure, do a full check */ | 1952 | /* found a match - just to be sure, do a full check */ |
1954 | if (!ocfs2_check_dir_entry(dir, de, bh, offset)) { | 1953 | if (!ocfs2_check_dir_entry(dir, de, bh, offset)) { |
1955 | ret = -1; | 1954 | ret = -1; |
1956 | goto bail; | 1955 | goto bail; |
1957 | } | 1956 | } |
1958 | *res_dir = de; | 1957 | *res_dir = de; |
1959 | ret = 1; | 1958 | ret = 1; |
1960 | goto bail; | 1959 | goto bail; |
1961 | } | 1960 | } |
1962 | 1961 | ||
1963 | /* prevent looping on a bad block */ | 1962 | /* prevent looping on a bad block */ |
1964 | de_len = le16_to_cpu(de->rec_len); | 1963 | de_len = le16_to_cpu(de->rec_len); |
1965 | if (de_len <= 0) { | 1964 | if (de_len <= 0) { |
1966 | ret = -1; | 1965 | ret = -1; |
1967 | goto bail; | 1966 | goto bail; |
1968 | } | 1967 | } |
1969 | 1968 | ||
1970 | de_buf += de_len; | 1969 | de_buf += de_len; |
1971 | offset += de_len; | 1970 | offset += de_len; |
1972 | } | 1971 | } |
1973 | 1972 | ||
1974 | bail: | 1973 | bail: |
1975 | mlog_exit(ret); | 1974 | mlog_exit(ret); |
1976 | return ret; | 1975 | return ret; |
1977 | } | 1976 | } |
1978 | 1977 | ||
1979 | struct buffer_head *ocfs2_find_entry(const char *name, int namelen, | 1978 | struct buffer_head *ocfs2_find_entry(const char *name, int namelen, |
1980 | struct inode *dir, | 1979 | struct inode *dir, |
1981 | struct ocfs2_dir_entry **res_dir) | 1980 | struct ocfs2_dir_entry **res_dir) |
1982 | { | 1981 | { |
1983 | struct super_block *sb; | 1982 | struct super_block *sb; |
1984 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; | 1983 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; |
1985 | struct buffer_head *bh, *ret = NULL; | 1984 | struct buffer_head *bh, *ret = NULL; |
1986 | unsigned long start, block, b; | 1985 | unsigned long start, block, b; |
1987 | int ra_max = 0; /* Number of bh's in the readahead | 1986 | int ra_max = 0; /* Number of bh's in the readahead |
1988 | buffer, bh_use[] */ | 1987 | buffer, bh_use[] */ |
1989 | int ra_ptr = 0; /* Current index into readahead | 1988 | int ra_ptr = 0; /* Current index into readahead |
1990 | buffer */ | 1989 | buffer */ |
1991 | int num = 0; | 1990 | int num = 0; |
1992 | int nblocks, i, err; | 1991 | int nblocks, i, err; |
1993 | 1992 | ||
1994 | mlog_entry_void(); | 1993 | mlog_entry_void(); |
1995 | 1994 | ||
1996 | *res_dir = NULL; | 1995 | *res_dir = NULL; |
1997 | sb = dir->i_sb; | 1996 | sb = dir->i_sb; |
1998 | 1997 | ||
1999 | nblocks = i_size_read(dir) >> sb->s_blocksize_bits; | 1998 | nblocks = i_size_read(dir) >> sb->s_blocksize_bits; |
2000 | start = OCFS2_I(dir)->ip_dir_start_lookup; | 1999 | start = OCFS2_I(dir)->ip_dir_start_lookup; |
2001 | if (start >= nblocks) | 2000 | if (start >= nblocks) |
2002 | start = 0; | 2001 | start = 0; |
2003 | block = start; | 2002 | block = start; |
2004 | 2003 | ||
2005 | restart: | 2004 | restart: |
2006 | do { | 2005 | do { |
2007 | /* | 2006 | /* |
2008 | * We deal with the read-ahead logic here. | 2007 | * We deal with the read-ahead logic here. |
2009 | */ | 2008 | */ |
2010 | if (ra_ptr >= ra_max) { | 2009 | if (ra_ptr >= ra_max) { |
2011 | /* Refill the readahead buffer */ | 2010 | /* Refill the readahead buffer */ |
2012 | ra_ptr = 0; | 2011 | ra_ptr = 0; |
2013 | b = block; | 2012 | b = block; |
2014 | for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) { | 2013 | for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) { |
2015 | /* | 2014 | /* |
2016 | * Terminate if we reach the end of the | 2015 | * Terminate if we reach the end of the |
2017 | * directory and must wrap, or if our | 2016 | * directory and must wrap, or if our |
2018 | * search has finished at this block. | 2017 | * search has finished at this block. |
2019 | */ | 2018 | */ |
2020 | if (b >= nblocks || (num && block == start)) { | 2019 | if (b >= nblocks || (num && block == start)) { |
2021 | bh_use[ra_max] = NULL; | 2020 | bh_use[ra_max] = NULL; |
2022 | break; | 2021 | break; |
2023 | } | 2022 | } |
2024 | num++; | 2023 | num++; |
2025 | 2024 | ||
2026 | bh = ocfs2_bread(dir, b++, &err, 1); | 2025 | bh = ocfs2_bread(dir, b++, &err, 1); |
2027 | bh_use[ra_max] = bh; | 2026 | bh_use[ra_max] = bh; |
2028 | } | 2027 | } |
2029 | } | 2028 | } |
2030 | if ((bh = bh_use[ra_ptr++]) == NULL) | 2029 | if ((bh = bh_use[ra_ptr++]) == NULL) |
2031 | goto next; | 2030 | goto next; |
2032 | wait_on_buffer(bh); | 2031 | wait_on_buffer(bh); |
2033 | if (!buffer_uptodate(bh)) { | 2032 | if (!buffer_uptodate(bh)) { |
2034 | /* read error, skip block & hope for the best */ | 2033 | /* read error, skip block & hope for the best */ |
2035 | ocfs2_error(dir->i_sb, "reading directory %llu, " | 2034 | ocfs2_error(dir->i_sb, "reading directory %llu, " |
2036 | "offset %lu\n", | 2035 | "offset %lu\n", |
2037 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | 2036 | (unsigned long long)OCFS2_I(dir)->ip_blkno, |
2038 | block); | 2037 | block); |
2039 | brelse(bh); | 2038 | brelse(bh); |
2040 | goto next; | 2039 | goto next; |
2041 | } | 2040 | } |
2042 | i = ocfs2_search_dirblock(bh, dir, name, namelen, | 2041 | i = ocfs2_search_dirblock(bh, dir, name, namelen, |
2043 | block << sb->s_blocksize_bits, | 2042 | block << sb->s_blocksize_bits, |
2044 | res_dir); | 2043 | res_dir); |
2045 | if (i == 1) { | 2044 | if (i == 1) { |
2046 | OCFS2_I(dir)->ip_dir_start_lookup = block; | 2045 | OCFS2_I(dir)->ip_dir_start_lookup = block; |
2047 | ret = bh; | 2046 | ret = bh; |
2048 | goto cleanup_and_exit; | 2047 | goto cleanup_and_exit; |
2049 | } else { | 2048 | } else { |
2050 | brelse(bh); | 2049 | brelse(bh); |
2051 | if (i < 0) | 2050 | if (i < 0) |
2052 | goto cleanup_and_exit; | 2051 | goto cleanup_and_exit; |
2053 | } | 2052 | } |
2054 | next: | 2053 | next: |
2055 | if (++block >= nblocks) | 2054 | if (++block >= nblocks) |
2056 | block = 0; | 2055 | block = 0; |
2057 | } while (block != start); | 2056 | } while (block != start); |
2058 | 2057 | ||
2059 | /* | 2058 | /* |
2060 | * If the directory has grown while we were searching, then | 2059 | * If the directory has grown while we were searching, then |
2061 | * search the last part of the directory before giving up. | 2060 | * search the last part of the directory before giving up. |
2062 | */ | 2061 | */ |
2063 | block = nblocks; | 2062 | block = nblocks; |
2064 | nblocks = i_size_read(dir) >> sb->s_blocksize_bits; | 2063 | nblocks = i_size_read(dir) >> sb->s_blocksize_bits; |
2065 | if (block < nblocks) { | 2064 | if (block < nblocks) { |
2066 | start = 0; | 2065 | start = 0; |
2067 | goto restart; | 2066 | goto restart; |
2068 | } | 2067 | } |
2069 | 2068 | ||
2070 | cleanup_and_exit: | 2069 | cleanup_and_exit: |
2071 | /* Clean up the read-ahead blocks */ | 2070 | /* Clean up the read-ahead blocks */ |
2072 | for (; ra_ptr < ra_max; ra_ptr++) | 2071 | for (; ra_ptr < ra_max; ra_ptr++) |
2073 | brelse(bh_use[ra_ptr]); | 2072 | brelse(bh_use[ra_ptr]); |
2074 | 2073 | ||
2075 | mlog_exit_ptr(ret); | 2074 | mlog_exit_ptr(ret); |
2076 | return ret; | 2075 | return ret; |
2077 | } | 2076 | } |
2078 | 2077 | ||
2079 | static int ocfs2_blkno_stringify(u64 blkno, char *name) | 2078 | static int ocfs2_blkno_stringify(u64 blkno, char *name) |
2080 | { | 2079 | { |
2081 | int status, namelen; | 2080 | int status, namelen; |
2082 | 2081 | ||
2083 | mlog_entry_void(); | 2082 | mlog_entry_void(); |
2084 | 2083 | ||
2085 | namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx", | 2084 | namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx", |
2086 | (long long)blkno); | 2085 | (long long)blkno); |
2087 | if (namelen <= 0) { | 2086 | if (namelen <= 0) { |
2088 | if (namelen) | 2087 | if (namelen) |
2089 | status = namelen; | 2088 | status = namelen; |
2090 | else | 2089 | else |
2091 | status = -EINVAL; | 2090 | status = -EINVAL; |
2092 | mlog_errno(status); | 2091 | mlog_errno(status); |
2093 | goto bail; | 2092 | goto bail; |
2094 | } | 2093 | } |
2095 | if (namelen != OCFS2_ORPHAN_NAMELEN) { | 2094 | if (namelen != OCFS2_ORPHAN_NAMELEN) { |
2096 | status = -EINVAL; | 2095 | status = -EINVAL; |
2097 | mlog_errno(status); | 2096 | mlog_errno(status); |
2098 | goto bail; | 2097 | goto bail; |
2099 | } | 2098 | } |
2100 | 2099 | ||
2101 | mlog(0, "built filename '%s' for orphan dir (len=%d)\n", name, | 2100 | mlog(0, "built filename '%s' for orphan dir (len=%d)\n", name, |
2102 | namelen); | 2101 | namelen); |
2103 | 2102 | ||
2104 | status = 0; | 2103 | status = 0; |
2105 | bail: | 2104 | bail: |
2106 | mlog_exit(status); | 2105 | mlog_exit(status); |
2107 | return status; | 2106 | return status; |
2108 | } | 2107 | } |
2109 | 2108 | ||
2110 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | 2109 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, |
2111 | struct inode **ret_orphan_dir, | 2110 | struct inode **ret_orphan_dir, |
2112 | struct inode *inode, | 2111 | struct inode *inode, |
2113 | char *name, | 2112 | char *name, |
2114 | struct buffer_head **de_bh) | 2113 | struct buffer_head **de_bh) |
2115 | { | 2114 | { |
2116 | struct inode *orphan_dir_inode; | 2115 | struct inode *orphan_dir_inode; |
2117 | struct buffer_head *orphan_dir_bh = NULL; | 2116 | struct buffer_head *orphan_dir_bh = NULL; |
2118 | int status = 0; | 2117 | int status = 0; |
2119 | 2118 | ||
2120 | status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name); | 2119 | status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name); |
2121 | if (status < 0) { | 2120 | if (status < 0) { |
2122 | mlog_errno(status); | 2121 | mlog_errno(status); |
2123 | return status; | 2122 | return status; |
2124 | } | 2123 | } |
2125 | 2124 | ||
2126 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 2125 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
2127 | ORPHAN_DIR_SYSTEM_INODE, | 2126 | ORPHAN_DIR_SYSTEM_INODE, |
2128 | osb->slot_num); | 2127 | osb->slot_num); |
2129 | if (!orphan_dir_inode) { | 2128 | if (!orphan_dir_inode) { |
2130 | status = -ENOENT; | 2129 | status = -ENOENT; |
2131 | mlog_errno(status); | 2130 | mlog_errno(status); |
2132 | return status; | 2131 | return status; |
2133 | } | 2132 | } |
2134 | 2133 | ||
2135 | mutex_lock(&orphan_dir_inode->i_mutex); | 2134 | mutex_lock(&orphan_dir_inode->i_mutex); |
2136 | 2135 | ||
2137 | status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 2136 | status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1); |
2138 | if (status < 0) { | 2137 | if (status < 0) { |
2139 | mlog_errno(status); | 2138 | mlog_errno(status); |
2140 | goto leave; | 2139 | goto leave; |
2141 | } | 2140 | } |
2142 | 2141 | ||
2143 | status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, | 2142 | status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, |
2144 | orphan_dir_bh, name, | 2143 | orphan_dir_bh, name, |
2145 | OCFS2_ORPHAN_NAMELEN, de_bh); | 2144 | OCFS2_ORPHAN_NAMELEN, de_bh); |
2146 | if (status < 0) { | 2145 | if (status < 0) { |
2147 | ocfs2_meta_unlock(orphan_dir_inode, 1); | 2146 | ocfs2_meta_unlock(orphan_dir_inode, 1); |
2148 | 2147 | ||
2149 | mlog_errno(status); | 2148 | mlog_errno(status); |
2150 | goto leave; | 2149 | goto leave; |
2151 | } | 2150 | } |
2152 | 2151 | ||
2153 | *ret_orphan_dir = orphan_dir_inode; | 2152 | *ret_orphan_dir = orphan_dir_inode; |
2154 | 2153 | ||
2155 | leave: | 2154 | leave: |
2156 | if (status) { | 2155 | if (status) { |
2157 | mutex_unlock(&orphan_dir_inode->i_mutex); | 2156 | mutex_unlock(&orphan_dir_inode->i_mutex); |
2158 | iput(orphan_dir_inode); | 2157 | iput(orphan_dir_inode); |
2159 | } | 2158 | } |
2160 | 2159 | ||
2161 | if (orphan_dir_bh) | 2160 | if (orphan_dir_bh) |
2162 | brelse(orphan_dir_bh); | 2161 | brelse(orphan_dir_bh); |
2163 | 2162 | ||
2164 | mlog_exit(status); | 2163 | mlog_exit(status); |
2165 | return status; | 2164 | return status; |
2166 | } | 2165 | } |
2167 | 2166 | ||
2168 | static int ocfs2_orphan_add(struct ocfs2_super *osb, | 2167 | static int ocfs2_orphan_add(struct ocfs2_super *osb, |
2169 | handle_t *handle, | 2168 | handle_t *handle, |
2170 | struct inode *inode, | 2169 | struct inode *inode, |
2171 | struct ocfs2_dinode *fe, | 2170 | struct ocfs2_dinode *fe, |
2172 | char *name, | 2171 | char *name, |
2173 | struct buffer_head *de_bh, | 2172 | struct buffer_head *de_bh, |
2174 | struct inode *orphan_dir_inode) | 2173 | struct inode *orphan_dir_inode) |
2175 | { | 2174 | { |
2176 | struct buffer_head *orphan_dir_bh = NULL; | 2175 | struct buffer_head *orphan_dir_bh = NULL; |
2177 | int status = 0; | 2176 | int status = 0; |
2178 | struct ocfs2_dinode *orphan_fe; | 2177 | struct ocfs2_dinode *orphan_fe; |
2179 | 2178 | ||
2180 | mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); | 2179 | mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); |
2181 | 2180 | ||
2182 | status = ocfs2_read_block(osb, | 2181 | status = ocfs2_read_block(osb, |
2183 | OCFS2_I(orphan_dir_inode)->ip_blkno, | 2182 | OCFS2_I(orphan_dir_inode)->ip_blkno, |
2184 | &orphan_dir_bh, OCFS2_BH_CACHED, | 2183 | &orphan_dir_bh, OCFS2_BH_CACHED, |
2185 | orphan_dir_inode); | 2184 | orphan_dir_inode); |
2186 | if (status < 0) { | 2185 | if (status < 0) { |
2187 | mlog_errno(status); | 2186 | mlog_errno(status); |
2188 | goto leave; | 2187 | goto leave; |
2189 | } | 2188 | } |
2190 | 2189 | ||
2191 | status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh, | 2190 | status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh, |
2192 | OCFS2_JOURNAL_ACCESS_WRITE); | 2191 | OCFS2_JOURNAL_ACCESS_WRITE); |
2193 | if (status < 0) { | 2192 | if (status < 0) { |
2194 | mlog_errno(status); | 2193 | mlog_errno(status); |
2195 | goto leave; | 2194 | goto leave; |
2196 | } | 2195 | } |
2197 | 2196 | ||
2198 | /* we're a cluster, and nlink can change on disk from | 2197 | /* we're a cluster, and nlink can change on disk from |
2199 | * underneath us... */ | 2198 | * underneath us... */ |
2200 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; | 2199 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; |
2201 | if (S_ISDIR(inode->i_mode)) | 2200 | if (S_ISDIR(inode->i_mode)) |
2202 | le16_add_cpu(&orphan_fe->i_links_count, 1); | 2201 | le16_add_cpu(&orphan_fe->i_links_count, 1); |
2203 | orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count); | 2202 | orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count); |
2204 | 2203 | ||
2205 | status = ocfs2_journal_dirty(handle, orphan_dir_bh); | 2204 | status = ocfs2_journal_dirty(handle, orphan_dir_bh); |
2206 | if (status < 0) { | 2205 | if (status < 0) { |
2207 | mlog_errno(status); | 2206 | mlog_errno(status); |
2208 | goto leave; | 2207 | goto leave; |
2209 | } | 2208 | } |
2210 | 2209 | ||
2211 | status = __ocfs2_add_entry(handle, orphan_dir_inode, name, | 2210 | status = __ocfs2_add_entry(handle, orphan_dir_inode, name, |
2212 | OCFS2_ORPHAN_NAMELEN, inode, | 2211 | OCFS2_ORPHAN_NAMELEN, inode, |
2213 | OCFS2_I(inode)->ip_blkno, | 2212 | OCFS2_I(inode)->ip_blkno, |
2214 | orphan_dir_bh, de_bh); | 2213 | orphan_dir_bh, de_bh); |
2215 | if (status < 0) { | 2214 | if (status < 0) { |
2216 | mlog_errno(status); | 2215 | mlog_errno(status); |
2217 | goto leave; | 2216 | goto leave; |
2218 | } | 2217 | } |
2219 | 2218 | ||
2220 | le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); | 2219 | le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); |
2221 | 2220 | ||
2222 | /* Record which orphan dir our inode now resides | 2221 | /* Record which orphan dir our inode now resides |
2223 | * in. delete_inode will use this to determine which orphan | 2222 | * in. delete_inode will use this to determine which orphan |
2224 | * dir to lock. */ | 2223 | * dir to lock. */ |
2225 | fe->i_orphaned_slot = cpu_to_le16(osb->slot_num); | 2224 | fe->i_orphaned_slot = cpu_to_le16(osb->slot_num); |
2226 | 2225 | ||
2227 | mlog(0, "Inode %llu orphaned in slot %d\n", | 2226 | mlog(0, "Inode %llu orphaned in slot %d\n", |
2228 | (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); | 2227 | (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); |
2229 | 2228 | ||
2230 | leave: | 2229 | leave: |
2231 | if (orphan_dir_bh) | 2230 | if (orphan_dir_bh) |
2232 | brelse(orphan_dir_bh); | 2231 | brelse(orphan_dir_bh); |
2233 | 2232 | ||
2234 | mlog_exit(status); | 2233 | mlog_exit(status); |
2235 | return status; | 2234 | return status; |
2236 | } | 2235 | } |
2237 | 2236 | ||
2238 | /* unlike orphan_add, we expect the orphan dir to already be locked here. */ | 2237 | /* unlike orphan_add, we expect the orphan dir to already be locked here. */ |
2239 | int ocfs2_orphan_del(struct ocfs2_super *osb, | 2238 | int ocfs2_orphan_del(struct ocfs2_super *osb, |
2240 | handle_t *handle, | 2239 | handle_t *handle, |
2241 | struct inode *orphan_dir_inode, | 2240 | struct inode *orphan_dir_inode, |
2242 | struct inode *inode, | 2241 | struct inode *inode, |
2243 | struct buffer_head *orphan_dir_bh) | 2242 | struct buffer_head *orphan_dir_bh) |
2244 | { | 2243 | { |
2245 | char name[OCFS2_ORPHAN_NAMELEN + 1]; | 2244 | char name[OCFS2_ORPHAN_NAMELEN + 1]; |
2246 | struct ocfs2_dinode *orphan_fe; | 2245 | struct ocfs2_dinode *orphan_fe; |
2247 | int status = 0; | 2246 | int status = 0; |
2248 | struct buffer_head *target_de_bh = NULL; | 2247 | struct buffer_head *target_de_bh = NULL; |
2249 | struct ocfs2_dir_entry *target_de = NULL; | 2248 | struct ocfs2_dir_entry *target_de = NULL; |
2250 | 2249 | ||
2251 | mlog_entry_void(); | 2250 | mlog_entry_void(); |
2252 | 2251 | ||
2253 | status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name); | 2252 | status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name); |
2254 | if (status < 0) { | 2253 | if (status < 0) { |
2255 | mlog_errno(status); | 2254 | mlog_errno(status); |
2256 | goto leave; | 2255 | goto leave; |
2257 | } | 2256 | } |
2258 | 2257 | ||
2259 | mlog(0, "removing '%s' from orphan dir %llu (namelen=%d)\n", | 2258 | mlog(0, "removing '%s' from orphan dir %llu (namelen=%d)\n", |
2260 | name, (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, | 2259 | name, (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, |
2261 | OCFS2_ORPHAN_NAMELEN); | 2260 | OCFS2_ORPHAN_NAMELEN); |
2262 | 2261 | ||
2263 | /* find it's spot in the orphan directory */ | 2262 | /* find it's spot in the orphan directory */ |
2264 | target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, | 2263 | target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, |
2265 | orphan_dir_inode, &target_de); | 2264 | orphan_dir_inode, &target_de); |
2266 | if (!target_de_bh) { | 2265 | if (!target_de_bh) { |
2267 | status = -ENOENT; | 2266 | status = -ENOENT; |
2268 | mlog_errno(status); | 2267 | mlog_errno(status); |
2269 | goto leave; | 2268 | goto leave; |
2270 | } | 2269 | } |
2271 | 2270 | ||
2272 | /* remove it from the orphan directory */ | 2271 | /* remove it from the orphan directory */ |
2273 | status = ocfs2_delete_entry(handle, orphan_dir_inode, target_de, | 2272 | status = ocfs2_delete_entry(handle, orphan_dir_inode, target_de, |
2274 | target_de_bh); | 2273 | target_de_bh); |
2275 | if (status < 0) { | 2274 | if (status < 0) { |
2276 | mlog_errno(status); | 2275 | mlog_errno(status); |
2277 | goto leave; | 2276 | goto leave; |
2278 | } | 2277 | } |
2279 | 2278 | ||
2280 | status = ocfs2_journal_access(handle,orphan_dir_inode, orphan_dir_bh, | 2279 | status = ocfs2_journal_access(handle,orphan_dir_inode, orphan_dir_bh, |
2281 | OCFS2_JOURNAL_ACCESS_WRITE); | 2280 | OCFS2_JOURNAL_ACCESS_WRITE); |
2282 | if (status < 0) { | 2281 | if (status < 0) { |
2283 | mlog_errno(status); | 2282 | mlog_errno(status); |
2284 | goto leave; | 2283 | goto leave; |
2285 | } | 2284 | } |
2286 | 2285 | ||
2287 | /* do the i_nlink dance! :) */ | 2286 | /* do the i_nlink dance! :) */ |
2288 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; | 2287 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; |
2289 | if (S_ISDIR(inode->i_mode)) | 2288 | if (S_ISDIR(inode->i_mode)) |
2290 | le16_add_cpu(&orphan_fe->i_links_count, -1); | 2289 | le16_add_cpu(&orphan_fe->i_links_count, -1); |
2291 | orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count); | 2290 | orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count); |
2292 | 2291 | ||
2293 | status = ocfs2_journal_dirty(handle, orphan_dir_bh); | 2292 | status = ocfs2_journal_dirty(handle, orphan_dir_bh); |
2294 | if (status < 0) { | 2293 | if (status < 0) { |
2295 | mlog_errno(status); | 2294 | mlog_errno(status); |
2296 | goto leave; | 2295 | goto leave; |
2297 | } | 2296 | } |
2298 | 2297 | ||
2299 | leave: | 2298 | leave: |
2300 | if (target_de_bh) | 2299 | if (target_de_bh) |
2301 | brelse(target_de_bh); | 2300 | brelse(target_de_bh); |
2302 | 2301 | ||
2303 | mlog_exit(status); | 2302 | mlog_exit(status); |
2304 | return status; | 2303 | return status; |
2305 | } | 2304 | } |
2306 | 2305 | ||
2307 | const struct inode_operations ocfs2_dir_iops = { | 2306 | const struct inode_operations ocfs2_dir_iops = { |
2308 | .create = ocfs2_create, | 2307 | .create = ocfs2_create, |
2309 | .lookup = ocfs2_lookup, | 2308 | .lookup = ocfs2_lookup, |
2310 | .link = ocfs2_link, | 2309 | .link = ocfs2_link, |
2311 | .unlink = ocfs2_unlink, | 2310 | .unlink = ocfs2_unlink, |
2312 | .rmdir = ocfs2_unlink, | 2311 | .rmdir = ocfs2_unlink, |
2313 | .symlink = ocfs2_symlink, | 2312 | .symlink = ocfs2_symlink, |
2314 | .mkdir = ocfs2_mkdir, | 2313 | .mkdir = ocfs2_mkdir, |
2315 | .mknod = ocfs2_mknod, | 2314 | .mknod = ocfs2_mknod, |
2316 | .rename = ocfs2_rename, | 2315 | .rename = ocfs2_rename, |
2317 | .setattr = ocfs2_setattr, | 2316 | .setattr = ocfs2_setattr, |
2318 | .getattr = ocfs2_getattr, | 2317 | .getattr = ocfs2_getattr, |
2319 | .permission = ocfs2_permission, | 2318 | .permission = ocfs2_permission, |
2320 | }; | 2319 | }; |
2321 | 2320 |