Commit d28c91740ae2cd1d963f9e4e3889789894cb6d52

Authored by Josef Sipek
Committed by Linus Torvalds
1 parent 6db5fc5d53

[PATCH] struct path: convert ocfs2

Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 4 changed files with 22 additions and 22 deletions Inline Diff

1 /* -*- mode: c; c-basic-offset: 8; -*- 1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0: 2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 * 3 *
4 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 4 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public 7 * modify it under the terms of the GNU General Public
8 * License as published by the Free Software Foundation; either 8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version. 9 * version 2 of the License, or (at your option) any later version.
10 * 10 *
11 * This program is distributed in the hope that it will be useful, 11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details. 14 * General Public License for more details.
15 * 15 *
16 * You should have received a copy of the GNU General Public 16 * You should have received a copy of the GNU General Public
17 * License along with this program; if not, write to the 17 * License along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA. 19 * Boston, MA 021110-1307, USA.
20 */ 20 */
21 21
22 #include <linux/fs.h> 22 #include <linux/fs.h>
23 #include <linux/slab.h> 23 #include <linux/slab.h>
24 #include <linux/highmem.h> 24 #include <linux/highmem.h>
25 #include <linux/pagemap.h> 25 #include <linux/pagemap.h>
26 #include <asm/byteorder.h> 26 #include <asm/byteorder.h>
27 27
28 #define MLOG_MASK_PREFIX ML_FILE_IO 28 #define MLOG_MASK_PREFIX ML_FILE_IO
29 #include <cluster/masklog.h> 29 #include <cluster/masklog.h>
30 30
31 #include "ocfs2.h" 31 #include "ocfs2.h"
32 32
33 #include "alloc.h" 33 #include "alloc.h"
34 #include "aops.h" 34 #include "aops.h"
35 #include "dlmglue.h" 35 #include "dlmglue.h"
36 #include "extent_map.h" 36 #include "extent_map.h"
37 #include "file.h" 37 #include "file.h"
38 #include "inode.h" 38 #include "inode.h"
39 #include "journal.h" 39 #include "journal.h"
40 #include "super.h" 40 #include "super.h"
41 #include "symlink.h" 41 #include "symlink.h"
42 42
43 #include "buffer_head_io.h" 43 #include "buffer_head_io.h"
44 44
45 static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, 45 static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
46 struct buffer_head *bh_result, int create) 46 struct buffer_head *bh_result, int create)
47 { 47 {
48 int err = -EIO; 48 int err = -EIO;
49 int status; 49 int status;
50 struct ocfs2_dinode *fe = NULL; 50 struct ocfs2_dinode *fe = NULL;
51 struct buffer_head *bh = NULL; 51 struct buffer_head *bh = NULL;
52 struct buffer_head *buffer_cache_bh = NULL; 52 struct buffer_head *buffer_cache_bh = NULL;
53 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 53 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
54 void *kaddr; 54 void *kaddr;
55 55
56 mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, 56 mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,
57 (unsigned long long)iblock, bh_result, create); 57 (unsigned long long)iblock, bh_result, create);
58 58
59 BUG_ON(ocfs2_inode_is_fast_symlink(inode)); 59 BUG_ON(ocfs2_inode_is_fast_symlink(inode));
60 60
61 if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) { 61 if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) {
62 mlog(ML_ERROR, "block offset > PATH_MAX: %llu", 62 mlog(ML_ERROR, "block offset > PATH_MAX: %llu",
63 (unsigned long long)iblock); 63 (unsigned long long)iblock);
64 goto bail; 64 goto bail;
65 } 65 }
66 66
67 status = ocfs2_read_block(OCFS2_SB(inode->i_sb), 67 status = ocfs2_read_block(OCFS2_SB(inode->i_sb),
68 OCFS2_I(inode)->ip_blkno, 68 OCFS2_I(inode)->ip_blkno,
69 &bh, OCFS2_BH_CACHED, inode); 69 &bh, OCFS2_BH_CACHED, inode);
70 if (status < 0) { 70 if (status < 0) {
71 mlog_errno(status); 71 mlog_errno(status);
72 goto bail; 72 goto bail;
73 } 73 }
74 fe = (struct ocfs2_dinode *) bh->b_data; 74 fe = (struct ocfs2_dinode *) bh->b_data;
75 75
76 if (!OCFS2_IS_VALID_DINODE(fe)) { 76 if (!OCFS2_IS_VALID_DINODE(fe)) {
77 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", 77 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
78 (unsigned long long)fe->i_blkno, 7, fe->i_signature); 78 (unsigned long long)fe->i_blkno, 7, fe->i_signature);
79 goto bail; 79 goto bail;
80 } 80 }
81 81
82 if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, 82 if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
83 le32_to_cpu(fe->i_clusters))) { 83 le32_to_cpu(fe->i_clusters))) {
84 mlog(ML_ERROR, "block offset is outside the allocated size: " 84 mlog(ML_ERROR, "block offset is outside the allocated size: "
85 "%llu\n", (unsigned long long)iblock); 85 "%llu\n", (unsigned long long)iblock);
86 goto bail; 86 goto bail;
87 } 87 }
88 88
89 /* We don't use the page cache to create symlink data, so if 89 /* We don't use the page cache to create symlink data, so if
90 * need be, copy it over from the buffer cache. */ 90 * need be, copy it over from the buffer cache. */
91 if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) { 91 if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) {
92 u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + 92 u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) +
93 iblock; 93 iblock;
94 buffer_cache_bh = sb_getblk(osb->sb, blkno); 94 buffer_cache_bh = sb_getblk(osb->sb, blkno);
95 if (!buffer_cache_bh) { 95 if (!buffer_cache_bh) {
96 mlog(ML_ERROR, "couldn't getblock for symlink!\n"); 96 mlog(ML_ERROR, "couldn't getblock for symlink!\n");
97 goto bail; 97 goto bail;
98 } 98 }
99 99
100 /* we haven't locked out transactions, so a commit 100 /* we haven't locked out transactions, so a commit
101 * could've happened. Since we've got a reference on 101 * could've happened. Since we've got a reference on
102 * the bh, even if it commits while we're doing the 102 * the bh, even if it commits while we're doing the
103 * copy, the data is still good. */ 103 * copy, the data is still good. */
104 if (buffer_jbd(buffer_cache_bh) 104 if (buffer_jbd(buffer_cache_bh)
105 && ocfs2_inode_is_new(inode)) { 105 && ocfs2_inode_is_new(inode)) {
106 kaddr = kmap_atomic(bh_result->b_page, KM_USER0); 106 kaddr = kmap_atomic(bh_result->b_page, KM_USER0);
107 if (!kaddr) { 107 if (!kaddr) {
108 mlog(ML_ERROR, "couldn't kmap!\n"); 108 mlog(ML_ERROR, "couldn't kmap!\n");
109 goto bail; 109 goto bail;
110 } 110 }
111 memcpy(kaddr + (bh_result->b_size * iblock), 111 memcpy(kaddr + (bh_result->b_size * iblock),
112 buffer_cache_bh->b_data, 112 buffer_cache_bh->b_data,
113 bh_result->b_size); 113 bh_result->b_size);
114 kunmap_atomic(kaddr, KM_USER0); 114 kunmap_atomic(kaddr, KM_USER0);
115 set_buffer_uptodate(bh_result); 115 set_buffer_uptodate(bh_result);
116 } 116 }
117 brelse(buffer_cache_bh); 117 brelse(buffer_cache_bh);
118 } 118 }
119 119
120 map_bh(bh_result, inode->i_sb, 120 map_bh(bh_result, inode->i_sb,
121 le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock); 121 le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock);
122 122
123 err = 0; 123 err = 0;
124 124
125 bail: 125 bail:
126 if (bh) 126 if (bh)
127 brelse(bh); 127 brelse(bh);
128 128
129 mlog_exit(err); 129 mlog_exit(err);
130 return err; 130 return err;
131 } 131 }
132 132
133 static int ocfs2_get_block(struct inode *inode, sector_t iblock, 133 static int ocfs2_get_block(struct inode *inode, sector_t iblock,
134 struct buffer_head *bh_result, int create) 134 struct buffer_head *bh_result, int create)
135 { 135 {
136 int err = 0; 136 int err = 0;
137 u64 p_blkno, past_eof; 137 u64 p_blkno, past_eof;
138 138
139 mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, 139 mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,
140 (unsigned long long)iblock, bh_result, create); 140 (unsigned long long)iblock, bh_result, create);
141 141
142 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) 142 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE)
143 mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n", 143 mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n",
144 inode, inode->i_ino); 144 inode, inode->i_ino);
145 145
146 if (S_ISLNK(inode->i_mode)) { 146 if (S_ISLNK(inode->i_mode)) {
147 /* this always does I/O for some reason. */ 147 /* this always does I/O for some reason. */
148 err = ocfs2_symlink_get_block(inode, iblock, bh_result, create); 148 err = ocfs2_symlink_get_block(inode, iblock, bh_result, create);
149 goto bail; 149 goto bail;
150 } 150 }
151 151
152 /* this can happen if another node truncs after our extend! */ 152 /* this can happen if another node truncs after our extend! */
153 spin_lock(&OCFS2_I(inode)->ip_lock); 153 spin_lock(&OCFS2_I(inode)->ip_lock);
154 if (iblock >= ocfs2_clusters_to_blocks(inode->i_sb, 154 if (iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
155 OCFS2_I(inode)->ip_clusters)) 155 OCFS2_I(inode)->ip_clusters))
156 err = -EIO; 156 err = -EIO;
157 spin_unlock(&OCFS2_I(inode)->ip_lock); 157 spin_unlock(&OCFS2_I(inode)->ip_lock);
158 if (err) 158 if (err)
159 goto bail; 159 goto bail;
160 160
161 err = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno, 161 err = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno,
162 NULL); 162 NULL);
163 if (err) { 163 if (err) {
164 mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " 164 mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
165 "%llu, NULL)\n", err, inode, (unsigned long long)iblock, 165 "%llu, NULL)\n", err, inode, (unsigned long long)iblock,
166 (unsigned long long)p_blkno); 166 (unsigned long long)p_blkno);
167 goto bail; 167 goto bail;
168 } 168 }
169 169
170 map_bh(bh_result, inode->i_sb, p_blkno); 170 map_bh(bh_result, inode->i_sb, p_blkno);
171 171
172 if (bh_result->b_blocknr == 0) { 172 if (bh_result->b_blocknr == 0) {
173 err = -EIO; 173 err = -EIO;
174 mlog(ML_ERROR, "iblock = %llu p_blkno = %llu blkno=(%llu)\n", 174 mlog(ML_ERROR, "iblock = %llu p_blkno = %llu blkno=(%llu)\n",
175 (unsigned long long)iblock, 175 (unsigned long long)iblock,
176 (unsigned long long)p_blkno, 176 (unsigned long long)p_blkno,
177 (unsigned long long)OCFS2_I(inode)->ip_blkno); 177 (unsigned long long)OCFS2_I(inode)->ip_blkno);
178 } 178 }
179 179
180 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 180 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
181 mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, 181 mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
182 (unsigned long long)past_eof); 182 (unsigned long long)past_eof);
183 183
184 if (create && (iblock >= past_eof)) 184 if (create && (iblock >= past_eof))
185 set_buffer_new(bh_result); 185 set_buffer_new(bh_result);
186 186
187 bail: 187 bail:
188 if (err < 0) 188 if (err < 0)
189 err = -EIO; 189 err = -EIO;
190 190
191 mlog_exit(err); 191 mlog_exit(err);
192 return err; 192 return err;
193 } 193 }
194 194
195 static int ocfs2_readpage(struct file *file, struct page *page) 195 static int ocfs2_readpage(struct file *file, struct page *page)
196 { 196 {
197 struct inode *inode = page->mapping->host; 197 struct inode *inode = page->mapping->host;
198 loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT; 198 loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;
199 int ret, unlock = 1; 199 int ret, unlock = 1;
200 200
201 mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); 201 mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0));
202 202
203 ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); 203 ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page);
204 if (ret != 0) { 204 if (ret != 0) {
205 if (ret == AOP_TRUNCATED_PAGE) 205 if (ret == AOP_TRUNCATED_PAGE)
206 unlock = 0; 206 unlock = 0;
207 mlog_errno(ret); 207 mlog_errno(ret);
208 goto out; 208 goto out;
209 } 209 }
210 210
211 down_read(&OCFS2_I(inode)->ip_alloc_sem); 211 down_read(&OCFS2_I(inode)->ip_alloc_sem);
212 212
213 /* 213 /*
214 * i_size might have just been updated as we grabed the meta lock. We 214 * i_size might have just been updated as we grabed the meta lock. We
215 * might now be discovering a truncate that hit on another node. 215 * might now be discovering a truncate that hit on another node.
216 * block_read_full_page->get_block freaks out if it is asked to read 216 * block_read_full_page->get_block freaks out if it is asked to read
217 * beyond the end of a file, so we check here. Callers 217 * beyond the end of a file, so we check here. Callers
218 * (generic_file_read, fault->nopage) are clever enough to check i_size 218 * (generic_file_read, fault->nopage) are clever enough to check i_size
219 * and notice that the page they just read isn't needed. 219 * and notice that the page they just read isn't needed.
220 * 220 *
221 * XXX sys_readahead() seems to get that wrong? 221 * XXX sys_readahead() seems to get that wrong?
222 */ 222 */
223 if (start >= i_size_read(inode)) { 223 if (start >= i_size_read(inode)) {
224 char *addr = kmap(page); 224 char *addr = kmap(page);
225 memset(addr, 0, PAGE_SIZE); 225 memset(addr, 0, PAGE_SIZE);
226 flush_dcache_page(page); 226 flush_dcache_page(page);
227 kunmap(page); 227 kunmap(page);
228 SetPageUptodate(page); 228 SetPageUptodate(page);
229 ret = 0; 229 ret = 0;
230 goto out_alloc; 230 goto out_alloc;
231 } 231 }
232 232
233 ret = ocfs2_data_lock_with_page(inode, 0, page); 233 ret = ocfs2_data_lock_with_page(inode, 0, page);
234 if (ret != 0) { 234 if (ret != 0) {
235 if (ret == AOP_TRUNCATED_PAGE) 235 if (ret == AOP_TRUNCATED_PAGE)
236 unlock = 0; 236 unlock = 0;
237 mlog_errno(ret); 237 mlog_errno(ret);
238 goto out_alloc; 238 goto out_alloc;
239 } 239 }
240 240
241 ret = block_read_full_page(page, ocfs2_get_block); 241 ret = block_read_full_page(page, ocfs2_get_block);
242 unlock = 0; 242 unlock = 0;
243 243
244 ocfs2_data_unlock(inode, 0); 244 ocfs2_data_unlock(inode, 0);
245 out_alloc: 245 out_alloc:
246 up_read(&OCFS2_I(inode)->ip_alloc_sem); 246 up_read(&OCFS2_I(inode)->ip_alloc_sem);
247 ocfs2_meta_unlock(inode, 0); 247 ocfs2_meta_unlock(inode, 0);
248 out: 248 out:
249 if (unlock) 249 if (unlock)
250 unlock_page(page); 250 unlock_page(page);
251 mlog_exit(ret); 251 mlog_exit(ret);
252 return ret; 252 return ret;
253 } 253 }
254 254
255 /* Note: Because we don't support holes, our allocation has 255 /* Note: Because we don't support holes, our allocation has
256 * already happened (allocation writes zeros to the file data) 256 * already happened (allocation writes zeros to the file data)
257 * so we don't have to worry about ordered writes in 257 * so we don't have to worry about ordered writes in
258 * ocfs2_writepage. 258 * ocfs2_writepage.
259 * 259 *
260 * ->writepage is called during the process of invalidating the page cache 260 * ->writepage is called during the process of invalidating the page cache
261 * during blocked lock processing. It can't block on any cluster locks 261 * during blocked lock processing. It can't block on any cluster locks
262 * to during block mapping. It's relying on the fact that the block 262 * to during block mapping. It's relying on the fact that the block
263 * mapping can't have disappeared under the dirty pages that it is 263 * mapping can't have disappeared under the dirty pages that it is
264 * being asked to write back. 264 * being asked to write back.
265 */ 265 */
266 static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) 266 static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
267 { 267 {
268 int ret; 268 int ret;
269 269
270 mlog_entry("(0x%p)\n", page); 270 mlog_entry("(0x%p)\n", page);
271 271
272 ret = block_write_full_page(page, ocfs2_get_block, wbc); 272 ret = block_write_full_page(page, ocfs2_get_block, wbc);
273 273
274 mlog_exit(ret); 274 mlog_exit(ret);
275 275
276 return ret; 276 return ret;
277 } 277 }
278 278
279 /* This can also be called from ocfs2_write_zero_page() which has done 279 /* This can also be called from ocfs2_write_zero_page() which has done
280 * it's own cluster locking. */ 280 * it's own cluster locking. */
281 int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, 281 int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
282 unsigned from, unsigned to) 282 unsigned from, unsigned to)
283 { 283 {
284 int ret; 284 int ret;
285 285
286 down_read(&OCFS2_I(inode)->ip_alloc_sem); 286 down_read(&OCFS2_I(inode)->ip_alloc_sem);
287 287
288 ret = block_prepare_write(page, from, to, ocfs2_get_block); 288 ret = block_prepare_write(page, from, to, ocfs2_get_block);
289 289
290 up_read(&OCFS2_I(inode)->ip_alloc_sem); 290 up_read(&OCFS2_I(inode)->ip_alloc_sem);
291 291
292 return ret; 292 return ret;
293 } 293 }
294 294
295 /* 295 /*
296 * ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called 296 * ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called
297 * from loopback. It must be able to perform its own locking around 297 * from loopback. It must be able to perform its own locking around
298 * ocfs2_get_block(). 298 * ocfs2_get_block().
299 */ 299 */
300 static int ocfs2_prepare_write(struct file *file, struct page *page, 300 static int ocfs2_prepare_write(struct file *file, struct page *page,
301 unsigned from, unsigned to) 301 unsigned from, unsigned to)
302 { 302 {
303 struct inode *inode = page->mapping->host; 303 struct inode *inode = page->mapping->host;
304 int ret; 304 int ret;
305 305
306 mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to); 306 mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
307 307
308 ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); 308 ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page);
309 if (ret != 0) { 309 if (ret != 0) {
310 mlog_errno(ret); 310 mlog_errno(ret);
311 goto out; 311 goto out;
312 } 312 }
313 313
314 ret = ocfs2_prepare_write_nolock(inode, page, from, to); 314 ret = ocfs2_prepare_write_nolock(inode, page, from, to);
315 315
316 ocfs2_meta_unlock(inode, 0); 316 ocfs2_meta_unlock(inode, 0);
317 out: 317 out:
318 mlog_exit(ret); 318 mlog_exit(ret);
319 return ret; 319 return ret;
320 } 320 }
321 321
322 /* Taken from ext3. We don't necessarily need the full blown 322 /* Taken from ext3. We don't necessarily need the full blown
323 * functionality yet, but IMHO it's better to cut and paste the whole 323 * functionality yet, but IMHO it's better to cut and paste the whole
324 * thing so we can avoid introducing our own bugs (and easily pick up 324 * thing so we can avoid introducing our own bugs (and easily pick up
325 * their fixes when they happen) --Mark */ 325 * their fixes when they happen) --Mark */
326 static int walk_page_buffers( handle_t *handle, 326 static int walk_page_buffers( handle_t *handle,
327 struct buffer_head *head, 327 struct buffer_head *head,
328 unsigned from, 328 unsigned from,
329 unsigned to, 329 unsigned to,
330 int *partial, 330 int *partial,
331 int (*fn)( handle_t *handle, 331 int (*fn)( handle_t *handle,
332 struct buffer_head *bh)) 332 struct buffer_head *bh))
333 { 333 {
334 struct buffer_head *bh; 334 struct buffer_head *bh;
335 unsigned block_start, block_end; 335 unsigned block_start, block_end;
336 unsigned blocksize = head->b_size; 336 unsigned blocksize = head->b_size;
337 int err, ret = 0; 337 int err, ret = 0;
338 struct buffer_head *next; 338 struct buffer_head *next;
339 339
340 for ( bh = head, block_start = 0; 340 for ( bh = head, block_start = 0;
341 ret == 0 && (bh != head || !block_start); 341 ret == 0 && (bh != head || !block_start);
342 block_start = block_end, bh = next) 342 block_start = block_end, bh = next)
343 { 343 {
344 next = bh->b_this_page; 344 next = bh->b_this_page;
345 block_end = block_start + blocksize; 345 block_end = block_start + blocksize;
346 if (block_end <= from || block_start >= to) { 346 if (block_end <= from || block_start >= to) {
347 if (partial && !buffer_uptodate(bh)) 347 if (partial && !buffer_uptodate(bh))
348 *partial = 1; 348 *partial = 1;
349 continue; 349 continue;
350 } 350 }
351 err = (*fn)(handle, bh); 351 err = (*fn)(handle, bh);
352 if (!ret) 352 if (!ret)
353 ret = err; 353 ret = err;
354 } 354 }
355 return ret; 355 return ret;
356 } 356 }
357 357
358 handle_t *ocfs2_start_walk_page_trans(struct inode *inode, 358 handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
359 struct page *page, 359 struct page *page,
360 unsigned from, 360 unsigned from,
361 unsigned to) 361 unsigned to)
362 { 362 {
363 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 363 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
364 handle_t *handle = NULL; 364 handle_t *handle = NULL;
365 int ret = 0; 365 int ret = 0;
366 366
367 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 367 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
368 if (!handle) { 368 if (!handle) {
369 ret = -ENOMEM; 369 ret = -ENOMEM;
370 mlog_errno(ret); 370 mlog_errno(ret);
371 goto out; 371 goto out;
372 } 372 }
373 373
374 if (ocfs2_should_order_data(inode)) { 374 if (ocfs2_should_order_data(inode)) {
375 ret = walk_page_buffers(handle, 375 ret = walk_page_buffers(handle,
376 page_buffers(page), 376 page_buffers(page),
377 from, to, NULL, 377 from, to, NULL,
378 ocfs2_journal_dirty_data); 378 ocfs2_journal_dirty_data);
379 if (ret < 0) 379 if (ret < 0)
380 mlog_errno(ret); 380 mlog_errno(ret);
381 } 381 }
382 out: 382 out:
383 if (ret) { 383 if (ret) {
384 if (handle) 384 if (handle)
385 ocfs2_commit_trans(osb, handle); 385 ocfs2_commit_trans(osb, handle);
386 handle = ERR_PTR(ret); 386 handle = ERR_PTR(ret);
387 } 387 }
388 return handle; 388 return handle;
389 } 389 }
390 390
391 static int ocfs2_commit_write(struct file *file, struct page *page, 391 static int ocfs2_commit_write(struct file *file, struct page *page,
392 unsigned from, unsigned to) 392 unsigned from, unsigned to)
393 { 393 {
394 int ret; 394 int ret;
395 struct buffer_head *di_bh = NULL; 395 struct buffer_head *di_bh = NULL;
396 struct inode *inode = page->mapping->host; 396 struct inode *inode = page->mapping->host;
397 handle_t *handle = NULL; 397 handle_t *handle = NULL;
398 struct ocfs2_dinode *di; 398 struct ocfs2_dinode *di;
399 399
400 mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to); 400 mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
401 401
402 /* NOTE: ocfs2_file_aio_write has ensured that it's safe for 402 /* NOTE: ocfs2_file_aio_write has ensured that it's safe for
403 * us to continue here without rechecking the I/O against 403 * us to continue here without rechecking the I/O against
404 * changed inode values. 404 * changed inode values.
405 * 405 *
406 * 1) We're currently holding the inode alloc lock, so no 406 * 1) We're currently holding the inode alloc lock, so no
407 * nodes can change it underneath us. 407 * nodes can change it underneath us.
408 * 408 *
409 * 2) We've had to take the metadata lock at least once 409 * 2) We've had to take the metadata lock at least once
410 * already to check for extending writes, suid removal, etc. 410 * already to check for extending writes, suid removal, etc.
411 * The meta data update code then ensures that we don't get a 411 * The meta data update code then ensures that we don't get a
412 * stale inode allocation image (i_size, i_clusters, etc). 412 * stale inode allocation image (i_size, i_clusters, etc).
413 */ 413 */
414 414
415 ret = ocfs2_meta_lock_with_page(inode, &di_bh, 1, page); 415 ret = ocfs2_meta_lock_with_page(inode, &di_bh, 1, page);
416 if (ret != 0) { 416 if (ret != 0) {
417 mlog_errno(ret); 417 mlog_errno(ret);
418 goto out; 418 goto out;
419 } 419 }
420 420
421 ret = ocfs2_data_lock_with_page(inode, 1, page); 421 ret = ocfs2_data_lock_with_page(inode, 1, page);
422 if (ret != 0) { 422 if (ret != 0) {
423 mlog_errno(ret); 423 mlog_errno(ret);
424 goto out_unlock_meta; 424 goto out_unlock_meta;
425 } 425 }
426 426
427 handle = ocfs2_start_walk_page_trans(inode, page, from, to); 427 handle = ocfs2_start_walk_page_trans(inode, page, from, to);
428 if (IS_ERR(handle)) { 428 if (IS_ERR(handle)) {
429 ret = PTR_ERR(handle); 429 ret = PTR_ERR(handle);
430 goto out_unlock_data; 430 goto out_unlock_data;
431 } 431 }
432 432
433 /* Mark our buffer early. We'd rather catch this error up here 433 /* Mark our buffer early. We'd rather catch this error up here
434 * as opposed to after a successful commit_write which would 434 * as opposed to after a successful commit_write which would
435 * require us to set back inode->i_size. */ 435 * require us to set back inode->i_size. */
436 ret = ocfs2_journal_access(handle, inode, di_bh, 436 ret = ocfs2_journal_access(handle, inode, di_bh,
437 OCFS2_JOURNAL_ACCESS_WRITE); 437 OCFS2_JOURNAL_ACCESS_WRITE);
438 if (ret < 0) { 438 if (ret < 0) {
439 mlog_errno(ret); 439 mlog_errno(ret);
440 goto out_commit; 440 goto out_commit;
441 } 441 }
442 442
443 /* might update i_size */ 443 /* might update i_size */
444 ret = generic_commit_write(file, page, from, to); 444 ret = generic_commit_write(file, page, from, to);
445 if (ret < 0) { 445 if (ret < 0) {
446 mlog_errno(ret); 446 mlog_errno(ret);
447 goto out_commit; 447 goto out_commit;
448 } 448 }
449 449
450 di = (struct ocfs2_dinode *)di_bh->b_data; 450 di = (struct ocfs2_dinode *)di_bh->b_data;
451 451
452 /* ocfs2_mark_inode_dirty() is too heavy to use here. */ 452 /* ocfs2_mark_inode_dirty() is too heavy to use here. */
453 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 453 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
454 di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); 454 di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
455 di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 455 di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
456 456
457 inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode))); 457 inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode)));
458 di->i_size = cpu_to_le64((u64)i_size_read(inode)); 458 di->i_size = cpu_to_le64((u64)i_size_read(inode));
459 459
460 ret = ocfs2_journal_dirty(handle, di_bh); 460 ret = ocfs2_journal_dirty(handle, di_bh);
461 if (ret < 0) { 461 if (ret < 0) {
462 mlog_errno(ret); 462 mlog_errno(ret);
463 goto out_commit; 463 goto out_commit;
464 } 464 }
465 465
466 out_commit: 466 out_commit:
467 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 467 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
468 out_unlock_data: 468 out_unlock_data:
469 ocfs2_data_unlock(inode, 1); 469 ocfs2_data_unlock(inode, 1);
470 out_unlock_meta: 470 out_unlock_meta:
471 ocfs2_meta_unlock(inode, 1); 471 ocfs2_meta_unlock(inode, 1);
472 out: 472 out:
473 if (di_bh) 473 if (di_bh)
474 brelse(di_bh); 474 brelse(di_bh);
475 475
476 mlog_exit(ret); 476 mlog_exit(ret);
477 return ret; 477 return ret;
478 } 478 }
479 479
480 static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) 480 static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
481 { 481 {
482 sector_t status; 482 sector_t status;
483 u64 p_blkno = 0; 483 u64 p_blkno = 0;
484 int err = 0; 484 int err = 0;
485 struct inode *inode = mapping->host; 485 struct inode *inode = mapping->host;
486 486
487 mlog_entry("(block = %llu)\n", (unsigned long long)block); 487 mlog_entry("(block = %llu)\n", (unsigned long long)block);
488 488
489 /* We don't need to lock journal system files, since they aren't 489 /* We don't need to lock journal system files, since they aren't
490 * accessed concurrently from multiple nodes. 490 * accessed concurrently from multiple nodes.
491 */ 491 */
492 if (!INODE_JOURNAL(inode)) { 492 if (!INODE_JOURNAL(inode)) {
493 err = ocfs2_meta_lock(inode, NULL, 0); 493 err = ocfs2_meta_lock(inode, NULL, 0);
494 if (err) { 494 if (err) {
495 if (err != -ENOENT) 495 if (err != -ENOENT)
496 mlog_errno(err); 496 mlog_errno(err);
497 goto bail; 497 goto bail;
498 } 498 }
499 down_read(&OCFS2_I(inode)->ip_alloc_sem); 499 down_read(&OCFS2_I(inode)->ip_alloc_sem);
500 } 500 }
501 501
502 err = ocfs2_extent_map_get_blocks(inode, block, 1, &p_blkno, 502 err = ocfs2_extent_map_get_blocks(inode, block, 1, &p_blkno,
503 NULL); 503 NULL);
504 504
505 if (!INODE_JOURNAL(inode)) { 505 if (!INODE_JOURNAL(inode)) {
506 up_read(&OCFS2_I(inode)->ip_alloc_sem); 506 up_read(&OCFS2_I(inode)->ip_alloc_sem);
507 ocfs2_meta_unlock(inode, 0); 507 ocfs2_meta_unlock(inode, 0);
508 } 508 }
509 509
510 if (err) { 510 if (err) {
511 mlog(ML_ERROR, "get_blocks() failed, block = %llu\n", 511 mlog(ML_ERROR, "get_blocks() failed, block = %llu\n",
512 (unsigned long long)block); 512 (unsigned long long)block);
513 mlog_errno(err); 513 mlog_errno(err);
514 goto bail; 514 goto bail;
515 } 515 }
516 516
517 517
518 bail: 518 bail:
519 status = err ? 0 : p_blkno; 519 status = err ? 0 : p_blkno;
520 520
521 mlog_exit((int)status); 521 mlog_exit((int)status);
522 522
523 return status; 523 return status;
524 } 524 }
525 525
526 /* 526 /*
527 * TODO: Make this into a generic get_blocks function. 527 * TODO: Make this into a generic get_blocks function.
528 * 528 *
529 * From do_direct_io in direct-io.c: 529 * From do_direct_io in direct-io.c:
530 * "So what we do is to permit the ->get_blocks function to populate 530 * "So what we do is to permit the ->get_blocks function to populate
531 * bh.b_size with the size of IO which is permitted at this offset and 531 * bh.b_size with the size of IO which is permitted at this offset and
532 * this i_blkbits." 532 * this i_blkbits."
533 * 533 *
534 * This function is called directly from get_more_blocks in direct-io.c. 534 * This function is called directly from get_more_blocks in direct-io.c.
535 * 535 *
536 * called like this: dio->get_blocks(dio->inode, fs_startblk, 536 * called like this: dio->get_blocks(dio->inode, fs_startblk,
537 * fs_count, map_bh, dio->rw == WRITE); 537 * fs_count, map_bh, dio->rw == WRITE);
538 */ 538 */
539 static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, 539 static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
540 struct buffer_head *bh_result, int create) 540 struct buffer_head *bh_result, int create)
541 { 541 {
542 int ret; 542 int ret;
543 u64 vbo_max; /* file offset, max_blocks from iblock */ 543 u64 vbo_max; /* file offset, max_blocks from iblock */
544 u64 p_blkno; 544 u64 p_blkno;
545 int contig_blocks; 545 int contig_blocks;
546 unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; 546 unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
547 unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; 547 unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
548 548
549 /* This function won't even be called if the request isn't all 549 /* This function won't even be called if the request isn't all
550 * nicely aligned and of the right size, so there's no need 550 * nicely aligned and of the right size, so there's no need
551 * for us to check any of that. */ 551 * for us to check any of that. */
552 552
553 vbo_max = ((u64)iblock + max_blocks) << blocksize_bits; 553 vbo_max = ((u64)iblock + max_blocks) << blocksize_bits;
554 554
555 spin_lock(&OCFS2_I(inode)->ip_lock); 555 spin_lock(&OCFS2_I(inode)->ip_lock);
556 if ((iblock + max_blocks) > 556 if ((iblock + max_blocks) >
557 ocfs2_clusters_to_blocks(inode->i_sb, 557 ocfs2_clusters_to_blocks(inode->i_sb,
558 OCFS2_I(inode)->ip_clusters)) { 558 OCFS2_I(inode)->ip_clusters)) {
559 spin_unlock(&OCFS2_I(inode)->ip_lock); 559 spin_unlock(&OCFS2_I(inode)->ip_lock);
560 ret = -EIO; 560 ret = -EIO;
561 goto bail; 561 goto bail;
562 } 562 }
563 spin_unlock(&OCFS2_I(inode)->ip_lock); 563 spin_unlock(&OCFS2_I(inode)->ip_lock);
564 564
565 /* This figures out the size of the next contiguous block, and 565 /* This figures out the size of the next contiguous block, and
566 * our logical offset */ 566 * our logical offset */
567 ret = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno, 567 ret = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno,
568 &contig_blocks); 568 &contig_blocks);
569 if (ret) { 569 if (ret) {
570 mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n", 570 mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
571 (unsigned long long)iblock); 571 (unsigned long long)iblock);
572 ret = -EIO; 572 ret = -EIO;
573 goto bail; 573 goto bail;
574 } 574 }
575 575
576 map_bh(bh_result, inode->i_sb, p_blkno); 576 map_bh(bh_result, inode->i_sb, p_blkno);
577 577
578 /* make sure we don't map more than max_blocks blocks here as 578 /* make sure we don't map more than max_blocks blocks here as
579 that's all the kernel will handle at this point. */ 579 that's all the kernel will handle at this point. */
580 if (max_blocks < contig_blocks) 580 if (max_blocks < contig_blocks)
581 contig_blocks = max_blocks; 581 contig_blocks = max_blocks;
582 bh_result->b_size = contig_blocks << blocksize_bits; 582 bh_result->b_size = contig_blocks << blocksize_bits;
583 bail: 583 bail:
584 return ret; 584 return ret;
585 } 585 }
586 586
587 /* 587 /*
588 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're 588 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're
589 * particularly interested in the aio/dio case. Like the core uses 589 * particularly interested in the aio/dio case. Like the core uses
590 * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from 590 * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from
591 * truncation on another. 591 * truncation on another.
592 */ 592 */
593 static void ocfs2_dio_end_io(struct kiocb *iocb, 593 static void ocfs2_dio_end_io(struct kiocb *iocb,
594 loff_t offset, 594 loff_t offset,
595 ssize_t bytes, 595 ssize_t bytes,
596 void *private) 596 void *private)
597 { 597 {
598 struct inode *inode = iocb->ki_filp->f_dentry->d_inode; 598 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
599 599
600 /* this io's submitter should not have unlocked this before we could */ 600 /* this io's submitter should not have unlocked this before we could */
601 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 601 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
602 ocfs2_iocb_clear_rw_locked(iocb); 602 ocfs2_iocb_clear_rw_locked(iocb);
603 up_read(&inode->i_alloc_sem); 603 up_read(&inode->i_alloc_sem);
604 ocfs2_rw_unlock(inode, 0); 604 ocfs2_rw_unlock(inode, 0);
605 } 605 }
606 606
607 static ssize_t ocfs2_direct_IO(int rw, 607 static ssize_t ocfs2_direct_IO(int rw,
608 struct kiocb *iocb, 608 struct kiocb *iocb,
609 const struct iovec *iov, 609 const struct iovec *iov,
610 loff_t offset, 610 loff_t offset,
611 unsigned long nr_segs) 611 unsigned long nr_segs)
612 { 612 {
613 struct file *file = iocb->ki_filp; 613 struct file *file = iocb->ki_filp;
614 struct inode *inode = file->f_dentry->d_inode->i_mapping->host; 614 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
615 int ret; 615 int ret;
616 616
617 mlog_entry_void(); 617 mlog_entry_void();
618 618
619 /* 619 /*
620 * We get PR data locks even for O_DIRECT. This allows 620 * We get PR data locks even for O_DIRECT. This allows
621 * concurrent O_DIRECT I/O but doesn't let O_DIRECT with 621 * concurrent O_DIRECT I/O but doesn't let O_DIRECT with
622 * extending and buffered zeroing writes race. If they did 622 * extending and buffered zeroing writes race. If they did
623 * race then the buffered zeroing could be written back after 623 * race then the buffered zeroing could be written back after
624 * the O_DIRECT I/O. It's one thing to tell people not to mix 624 * the O_DIRECT I/O. It's one thing to tell people not to mix
625 * buffered and O_DIRECT writes, but expecting them to 625 * buffered and O_DIRECT writes, but expecting them to
626 * understand that file extension is also an implicit buffered 626 * understand that file extension is also an implicit buffered
627 * write is too much. By getting the PR we force writeback of 627 * write is too much. By getting the PR we force writeback of
628 * the buffered zeroing before proceeding. 628 * the buffered zeroing before proceeding.
629 */ 629 */
630 ret = ocfs2_data_lock(inode, 0); 630 ret = ocfs2_data_lock(inode, 0);
631 if (ret < 0) { 631 if (ret < 0) {
632 mlog_errno(ret); 632 mlog_errno(ret);
633 goto out; 633 goto out;
634 } 634 }
635 ocfs2_data_unlock(inode, 0); 635 ocfs2_data_unlock(inode, 0);
636 636
637 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 637 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
638 inode->i_sb->s_bdev, iov, offset, 638 inode->i_sb->s_bdev, iov, offset,
639 nr_segs, 639 nr_segs,
640 ocfs2_direct_IO_get_blocks, 640 ocfs2_direct_IO_get_blocks,
641 ocfs2_dio_end_io); 641 ocfs2_dio_end_io);
642 out: 642 out:
643 mlog_exit(ret); 643 mlog_exit(ret);
644 return ret; 644 return ret;
645 } 645 }
646 646
647 const struct address_space_operations ocfs2_aops = { 647 const struct address_space_operations ocfs2_aops = {
648 .readpage = ocfs2_readpage, 648 .readpage = ocfs2_readpage,
649 .writepage = ocfs2_writepage, 649 .writepage = ocfs2_writepage,
650 .prepare_write = ocfs2_prepare_write, 650 .prepare_write = ocfs2_prepare_write,
651 .commit_write = ocfs2_commit_write, 651 .commit_write = ocfs2_commit_write,
652 .bmap = ocfs2_bmap, 652 .bmap = ocfs2_bmap,
653 .sync_page = block_sync_page, 653 .sync_page = block_sync_page,
654 .direct_IO = ocfs2_direct_IO 654 .direct_IO = ocfs2_direct_IO
655 }; 655 };
656 656
1 /* -*- mode: c; c-basic-offset: 8; -*- 1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0: 2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 * 3 *
4 * dir.c 4 * dir.c
5 * 5 *
6 * Creates, reads, walks and deletes directory-nodes 6 * Creates, reads, walks and deletes directory-nodes
7 * 7 *
8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
9 * 9 *
10 * Portions of this code from linux/fs/ext3/dir.c 10 * Portions of this code from linux/fs/ext3/dir.c
11 * 11 *
12 * Copyright (C) 1992, 1993, 1994, 1995 12 * Copyright (C) 1992, 1993, 1994, 1995
13 * Remy Card (card@masi.ibp.fr) 13 * Remy Card (card@masi.ibp.fr)
14 * Laboratoire MASI - Institut Blaise pascal 14 * Laboratoire MASI - Institut Blaise pascal
15 * Universite Pierre et Marie Curie (Paris VI) 15 * Universite Pierre et Marie Curie (Paris VI)
16 * 16 *
17 * from 17 * from
18 * 18 *
19 * linux/fs/minix/dir.c 19 * linux/fs/minix/dir.c
20 * 20 *
21 * Copyright (C) 1991, 1992 Linux Torvalds 21 * Copyright (C) 1991, 1992 Linux Torvalds
22 * 22 *
23 * This program is free software; you can redistribute it and/or 23 * This program is free software; you can redistribute it and/or
24 * modify it under the terms of the GNU General Public 24 * modify it under the terms of the GNU General Public
25 * License as published by the Free Software Foundation; either 25 * License as published by the Free Software Foundation; either
26 * version 2 of the License, or (at your option) any later version. 26 * version 2 of the License, or (at your option) any later version.
27 * 27 *
28 * This program is distributed in the hope that it will be useful, 28 * This program is distributed in the hope that it will be useful,
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of 29 * but WITHOUT ANY WARRANTY; without even the implied warranty of
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31 * General Public License for more details. 31 * General Public License for more details.
32 * 32 *
33 * You should have received a copy of the GNU General Public 33 * You should have received a copy of the GNU General Public
34 * License along with this program; if not, write to the 34 * License along with this program; if not, write to the
35 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 35 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
36 * Boston, MA 021110-1307, USA. 36 * Boston, MA 021110-1307, USA.
37 */ 37 */
38 38
39 #include <linux/fs.h> 39 #include <linux/fs.h>
40 #include <linux/types.h> 40 #include <linux/types.h>
41 #include <linux/slab.h> 41 #include <linux/slab.h>
42 #include <linux/highmem.h> 42 #include <linux/highmem.h>
43 43
44 #define MLOG_MASK_PREFIX ML_NAMEI 44 #define MLOG_MASK_PREFIX ML_NAMEI
45 #include <cluster/masklog.h> 45 #include <cluster/masklog.h>
46 46
47 #include "ocfs2.h" 47 #include "ocfs2.h"
48 48
49 #include "alloc.h" 49 #include "alloc.h"
50 #include "dir.h" 50 #include "dir.h"
51 #include "dlmglue.h" 51 #include "dlmglue.h"
52 #include "extent_map.h" 52 #include "extent_map.h"
53 #include "file.h" 53 #include "file.h"
54 #include "inode.h" 54 #include "inode.h"
55 #include "journal.h" 55 #include "journal.h"
56 #include "namei.h" 56 #include "namei.h"
57 #include "suballoc.h" 57 #include "suballoc.h"
58 #include "uptodate.h" 58 #include "uptodate.h"
59 59
60 #include "buffer_head_io.h" 60 #include "buffer_head_io.h"
61 61
62 static unsigned char ocfs2_filetype_table[] = { 62 static unsigned char ocfs2_filetype_table[] = {
63 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 63 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
64 }; 64 };
65 65
66 static int ocfs2_extend_dir(struct ocfs2_super *osb, 66 static int ocfs2_extend_dir(struct ocfs2_super *osb,
67 struct inode *dir, 67 struct inode *dir,
68 struct buffer_head *parent_fe_bh, 68 struct buffer_head *parent_fe_bh,
69 struct buffer_head **new_de_bh); 69 struct buffer_head **new_de_bh);
70 /* 70 /*
71 * ocfs2_readdir() 71 * ocfs2_readdir()
72 * 72 *
73 */ 73 */
74 int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) 74 int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
75 { 75 {
76 int error = 0; 76 int error = 0;
77 unsigned long offset, blk, last_ra_blk = 0; 77 unsigned long offset, blk, last_ra_blk = 0;
78 int i, stored; 78 int i, stored;
79 struct buffer_head * bh, * tmp; 79 struct buffer_head * bh, * tmp;
80 struct ocfs2_dir_entry * de; 80 struct ocfs2_dir_entry * de;
81 int err; 81 int err;
82 struct inode *inode = filp->f_dentry->d_inode; 82 struct inode *inode = filp->f_path.dentry->d_inode;
83 struct super_block * sb = inode->i_sb; 83 struct super_block * sb = inode->i_sb;
84 unsigned int ra_sectors = 16; 84 unsigned int ra_sectors = 16;
85 int lock_level = 0; 85 int lock_level = 0;
86 86
87 mlog_entry("dirino=%llu\n", 87 mlog_entry("dirino=%llu\n",
88 (unsigned long long)OCFS2_I(inode)->ip_blkno); 88 (unsigned long long)OCFS2_I(inode)->ip_blkno);
89 89
90 stored = 0; 90 stored = 0;
91 bh = NULL; 91 bh = NULL;
92 92
93 error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); 93 error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level);
94 if (lock_level && error >= 0) { 94 if (lock_level && error >= 0) {
95 /* We release EX lock which used to update atime 95 /* We release EX lock which used to update atime
96 * and get PR lock again to reduce contention 96 * and get PR lock again to reduce contention
97 * on commonly accessed directories. */ 97 * on commonly accessed directories. */
98 ocfs2_meta_unlock(inode, 1); 98 ocfs2_meta_unlock(inode, 1);
99 lock_level = 0; 99 lock_level = 0;
100 error = ocfs2_meta_lock(inode, NULL, 0); 100 error = ocfs2_meta_lock(inode, NULL, 0);
101 } 101 }
102 if (error < 0) { 102 if (error < 0) {
103 if (error != -ENOENT) 103 if (error != -ENOENT)
104 mlog_errno(error); 104 mlog_errno(error);
105 /* we haven't got any yet, so propagate the error. */ 105 /* we haven't got any yet, so propagate the error. */
106 stored = error; 106 stored = error;
107 goto bail_nolock; 107 goto bail_nolock;
108 } 108 }
109 109
110 offset = filp->f_pos & (sb->s_blocksize - 1); 110 offset = filp->f_pos & (sb->s_blocksize - 1);
111 111
112 while (!error && !stored && filp->f_pos < i_size_read(inode)) { 112 while (!error && !stored && filp->f_pos < i_size_read(inode)) {
113 blk = (filp->f_pos) >> sb->s_blocksize_bits; 113 blk = (filp->f_pos) >> sb->s_blocksize_bits;
114 bh = ocfs2_bread(inode, blk, &err, 0); 114 bh = ocfs2_bread(inode, blk, &err, 0);
115 if (!bh) { 115 if (!bh) {
116 mlog(ML_ERROR, 116 mlog(ML_ERROR,
117 "directory #%llu contains a hole at offset %lld\n", 117 "directory #%llu contains a hole at offset %lld\n",
118 (unsigned long long)OCFS2_I(inode)->ip_blkno, 118 (unsigned long long)OCFS2_I(inode)->ip_blkno,
119 filp->f_pos); 119 filp->f_pos);
120 filp->f_pos += sb->s_blocksize - offset; 120 filp->f_pos += sb->s_blocksize - offset;
121 continue; 121 continue;
122 } 122 }
123 123
124 /* The idea here is to begin with 8k read-ahead and to stay 124 /* The idea here is to begin with 8k read-ahead and to stay
125 * 4k ahead of our current position. 125 * 4k ahead of our current position.
126 * 126 *
127 * TODO: Use the pagecache for this. We just need to 127 * TODO: Use the pagecache for this. We just need to
128 * make sure it's cluster-safe... */ 128 * make sure it's cluster-safe... */
129 if (!last_ra_blk 129 if (!last_ra_blk
130 || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) { 130 || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
131 for (i = ra_sectors >> (sb->s_blocksize_bits - 9); 131 for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
132 i > 0; i--) { 132 i > 0; i--) {
133 tmp = ocfs2_bread(inode, ++blk, &err, 1); 133 tmp = ocfs2_bread(inode, ++blk, &err, 1);
134 if (tmp) 134 if (tmp)
135 brelse(tmp); 135 brelse(tmp);
136 } 136 }
137 last_ra_blk = blk; 137 last_ra_blk = blk;
138 ra_sectors = 8; 138 ra_sectors = 8;
139 } 139 }
140 140
141 revalidate: 141 revalidate:
142 /* If the dir block has changed since the last call to 142 /* If the dir block has changed since the last call to
143 * readdir(2), then we might be pointing to an invalid 143 * readdir(2), then we might be pointing to an invalid
144 * dirent right now. Scan from the start of the block 144 * dirent right now. Scan from the start of the block
145 * to make sure. */ 145 * to make sure. */
146 if (filp->f_version != inode->i_version) { 146 if (filp->f_version != inode->i_version) {
147 for (i = 0; i < sb->s_blocksize && i < offset; ) { 147 for (i = 0; i < sb->s_blocksize && i < offset; ) {
148 de = (struct ocfs2_dir_entry *) (bh->b_data + i); 148 de = (struct ocfs2_dir_entry *) (bh->b_data + i);
149 /* It's too expensive to do a full 149 /* It's too expensive to do a full
150 * dirent test each time round this 150 * dirent test each time round this
151 * loop, but we do have to test at 151 * loop, but we do have to test at
152 * least that it is non-zero. A 152 * least that it is non-zero. A
153 * failure will be detected in the 153 * failure will be detected in the
154 * dirent test below. */ 154 * dirent test below. */
155 if (le16_to_cpu(de->rec_len) < 155 if (le16_to_cpu(de->rec_len) <
156 OCFS2_DIR_REC_LEN(1)) 156 OCFS2_DIR_REC_LEN(1))
157 break; 157 break;
158 i += le16_to_cpu(de->rec_len); 158 i += le16_to_cpu(de->rec_len);
159 } 159 }
160 offset = i; 160 offset = i;
161 filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) 161 filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
162 | offset; 162 | offset;
163 filp->f_version = inode->i_version; 163 filp->f_version = inode->i_version;
164 } 164 }
165 165
166 while (!error && filp->f_pos < i_size_read(inode) 166 while (!error && filp->f_pos < i_size_read(inode)
167 && offset < sb->s_blocksize) { 167 && offset < sb->s_blocksize) {
168 de = (struct ocfs2_dir_entry *) (bh->b_data + offset); 168 de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
169 if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { 169 if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
170 /* On error, skip the f_pos to the 170 /* On error, skip the f_pos to the
171 next block. */ 171 next block. */
172 filp->f_pos = (filp->f_pos | 172 filp->f_pos = (filp->f_pos |
173 (sb->s_blocksize - 1)) + 1; 173 (sb->s_blocksize - 1)) + 1;
174 brelse(bh); 174 brelse(bh);
175 goto bail; 175 goto bail;
176 } 176 }
177 offset += le16_to_cpu(de->rec_len); 177 offset += le16_to_cpu(de->rec_len);
178 if (le64_to_cpu(de->inode)) { 178 if (le64_to_cpu(de->inode)) {
179 /* We might block in the next section 179 /* We might block in the next section
180 * if the data destination is 180 * if the data destination is
181 * currently swapped out. So, use a 181 * currently swapped out. So, use a
182 * version stamp to detect whether or 182 * version stamp to detect whether or
183 * not the directory has been modified 183 * not the directory has been modified
184 * during the copy operation. 184 * during the copy operation.
185 */ 185 */
186 unsigned long version = filp->f_version; 186 unsigned long version = filp->f_version;
187 unsigned char d_type = DT_UNKNOWN; 187 unsigned char d_type = DT_UNKNOWN;
188 188
189 if (de->file_type < OCFS2_FT_MAX) 189 if (de->file_type < OCFS2_FT_MAX)
190 d_type = ocfs2_filetype_table[de->file_type]; 190 d_type = ocfs2_filetype_table[de->file_type];
191 error = filldir(dirent, de->name, 191 error = filldir(dirent, de->name,
192 de->name_len, 192 de->name_len,
193 filp->f_pos, 193 filp->f_pos,
194 ino_from_blkno(sb, le64_to_cpu(de->inode)), 194 ino_from_blkno(sb, le64_to_cpu(de->inode)),
195 d_type); 195 d_type);
196 if (error) 196 if (error)
197 break; 197 break;
198 if (version != filp->f_version) 198 if (version != filp->f_version)
199 goto revalidate; 199 goto revalidate;
200 stored ++; 200 stored ++;
201 } 201 }
202 filp->f_pos += le16_to_cpu(de->rec_len); 202 filp->f_pos += le16_to_cpu(de->rec_len);
203 } 203 }
204 offset = 0; 204 offset = 0;
205 brelse(bh); 205 brelse(bh);
206 } 206 }
207 207
208 stored = 0; 208 stored = 0;
209 bail: 209 bail:
210 ocfs2_meta_unlock(inode, lock_level); 210 ocfs2_meta_unlock(inode, lock_level);
211 211
212 bail_nolock: 212 bail_nolock:
213 mlog_exit(stored); 213 mlog_exit(stored);
214 214
215 return stored; 215 return stored;
216 } 216 }
217 217
218 /* 218 /*
219 * NOTE: this should always be called with parent dir i_mutex taken. 219 * NOTE: this should always be called with parent dir i_mutex taken.
220 */ 220 */
221 int ocfs2_find_files_on_disk(const char *name, 221 int ocfs2_find_files_on_disk(const char *name,
222 int namelen, 222 int namelen,
223 u64 *blkno, 223 u64 *blkno,
224 struct inode *inode, 224 struct inode *inode,
225 struct buffer_head **dirent_bh, 225 struct buffer_head **dirent_bh,
226 struct ocfs2_dir_entry **dirent) 226 struct ocfs2_dir_entry **dirent)
227 { 227 {
228 int status = -ENOENT; 228 int status = -ENOENT;
229 229
230 mlog_entry("(name=%.*s, blkno=%p, inode=%p, dirent_bh=%p, dirent=%p)\n", 230 mlog_entry("(name=%.*s, blkno=%p, inode=%p, dirent_bh=%p, dirent=%p)\n",
231 namelen, name, blkno, inode, dirent_bh, dirent); 231 namelen, name, blkno, inode, dirent_bh, dirent);
232 232
233 *dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent); 233 *dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent);
234 if (!*dirent_bh || !*dirent) { 234 if (!*dirent_bh || !*dirent) {
235 status = -ENOENT; 235 status = -ENOENT;
236 goto leave; 236 goto leave;
237 } 237 }
238 238
239 *blkno = le64_to_cpu((*dirent)->inode); 239 *blkno = le64_to_cpu((*dirent)->inode);
240 240
241 status = 0; 241 status = 0;
242 leave: 242 leave:
243 if (status < 0) { 243 if (status < 0) {
244 *dirent = NULL; 244 *dirent = NULL;
245 if (*dirent_bh) { 245 if (*dirent_bh) {
246 brelse(*dirent_bh); 246 brelse(*dirent_bh);
247 *dirent_bh = NULL; 247 *dirent_bh = NULL;
248 } 248 }
249 } 249 }
250 250
251 mlog_exit(status); 251 mlog_exit(status);
252 return status; 252 return status;
253 } 253 }
254 254
255 /* Check for a name within a directory. 255 /* Check for a name within a directory.
256 * 256 *
257 * Return 0 if the name does not exist 257 * Return 0 if the name does not exist
258 * Return -EEXIST if the directory contains the name 258 * Return -EEXIST if the directory contains the name
259 * 259 *
260 * Callers should have i_mutex + a cluster lock on dir 260 * Callers should have i_mutex + a cluster lock on dir
261 */ 261 */
262 int ocfs2_check_dir_for_entry(struct inode *dir, 262 int ocfs2_check_dir_for_entry(struct inode *dir,
263 const char *name, 263 const char *name,
264 int namelen) 264 int namelen)
265 { 265 {
266 int ret; 266 int ret;
267 struct buffer_head *dirent_bh = NULL; 267 struct buffer_head *dirent_bh = NULL;
268 struct ocfs2_dir_entry *dirent = NULL; 268 struct ocfs2_dir_entry *dirent = NULL;
269 269
270 mlog_entry("dir %llu, name '%.*s'\n", 270 mlog_entry("dir %llu, name '%.*s'\n",
271 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); 271 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
272 272
273 ret = -EEXIST; 273 ret = -EEXIST;
274 dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent); 274 dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent);
275 if (dirent_bh) 275 if (dirent_bh)
276 goto bail; 276 goto bail;
277 277
278 ret = 0; 278 ret = 0;
279 bail: 279 bail:
280 if (dirent_bh) 280 if (dirent_bh)
281 brelse(dirent_bh); 281 brelse(dirent_bh);
282 282
283 mlog_exit(ret); 283 mlog_exit(ret);
284 return ret; 284 return ret;
285 } 285 }
286 286
287 /* 287 /*
288 * routine to check that the specified directory is empty (for rmdir) 288 * routine to check that the specified directory is empty (for rmdir)
289 */ 289 */
290 int ocfs2_empty_dir(struct inode *inode) 290 int ocfs2_empty_dir(struct inode *inode)
291 { 291 {
292 unsigned long offset; 292 unsigned long offset;
293 struct buffer_head * bh; 293 struct buffer_head * bh;
294 struct ocfs2_dir_entry * de, * de1; 294 struct ocfs2_dir_entry * de, * de1;
295 struct super_block * sb; 295 struct super_block * sb;
296 int err; 296 int err;
297 297
298 sb = inode->i_sb; 298 sb = inode->i_sb;
299 if ((i_size_read(inode) < 299 if ((i_size_read(inode) <
300 (OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) || 300 (OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) ||
301 !(bh = ocfs2_bread(inode, 0, &err, 0))) { 301 !(bh = ocfs2_bread(inode, 0, &err, 0))) {
302 mlog(ML_ERROR, "bad directory (dir #%llu) - no data block\n", 302 mlog(ML_ERROR, "bad directory (dir #%llu) - no data block\n",
303 (unsigned long long)OCFS2_I(inode)->ip_blkno); 303 (unsigned long long)OCFS2_I(inode)->ip_blkno);
304 return 1; 304 return 1;
305 } 305 }
306 306
307 de = (struct ocfs2_dir_entry *) bh->b_data; 307 de = (struct ocfs2_dir_entry *) bh->b_data;
308 de1 = (struct ocfs2_dir_entry *) 308 de1 = (struct ocfs2_dir_entry *)
309 ((char *)de + le16_to_cpu(de->rec_len)); 309 ((char *)de + le16_to_cpu(de->rec_len));
310 if ((le64_to_cpu(de->inode) != OCFS2_I(inode)->ip_blkno) || 310 if ((le64_to_cpu(de->inode) != OCFS2_I(inode)->ip_blkno) ||
311 !le64_to_cpu(de1->inode) || 311 !le64_to_cpu(de1->inode) ||
312 strcmp(".", de->name) || 312 strcmp(".", de->name) ||
313 strcmp("..", de1->name)) { 313 strcmp("..", de1->name)) {
314 mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n", 314 mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n",
315 (unsigned long long)OCFS2_I(inode)->ip_blkno); 315 (unsigned long long)OCFS2_I(inode)->ip_blkno);
316 brelse(bh); 316 brelse(bh);
317 return 1; 317 return 1;
318 } 318 }
319 offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); 319 offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
320 de = (struct ocfs2_dir_entry *)((char *)de1 + le16_to_cpu(de1->rec_len)); 320 de = (struct ocfs2_dir_entry *)((char *)de1 + le16_to_cpu(de1->rec_len));
321 while (offset < i_size_read(inode) ) { 321 while (offset < i_size_read(inode) ) {
322 if (!bh || (void *)de >= (void *)(bh->b_data + sb->s_blocksize)) { 322 if (!bh || (void *)de >= (void *)(bh->b_data + sb->s_blocksize)) {
323 brelse(bh); 323 brelse(bh);
324 bh = ocfs2_bread(inode, 324 bh = ocfs2_bread(inode,
325 offset >> sb->s_blocksize_bits, &err, 0); 325 offset >> sb->s_blocksize_bits, &err, 0);
326 if (!bh) { 326 if (!bh) {
327 mlog(ML_ERROR, "dir %llu has a hole at %lu\n", 327 mlog(ML_ERROR, "dir %llu has a hole at %lu\n",
328 (unsigned long long)OCFS2_I(inode)->ip_blkno, offset); 328 (unsigned long long)OCFS2_I(inode)->ip_blkno, offset);
329 offset += sb->s_blocksize; 329 offset += sb->s_blocksize;
330 continue; 330 continue;
331 } 331 }
332 de = (struct ocfs2_dir_entry *) bh->b_data; 332 de = (struct ocfs2_dir_entry *) bh->b_data;
333 } 333 }
334 if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { 334 if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
335 brelse(bh); 335 brelse(bh);
336 return 1; 336 return 1;
337 } 337 }
338 if (le64_to_cpu(de->inode)) { 338 if (le64_to_cpu(de->inode)) {
339 brelse(bh); 339 brelse(bh);
340 return 0; 340 return 0;
341 } 341 }
342 offset += le16_to_cpu(de->rec_len); 342 offset += le16_to_cpu(de->rec_len);
343 de = (struct ocfs2_dir_entry *) 343 de = (struct ocfs2_dir_entry *)
344 ((char *)de + le16_to_cpu(de->rec_len)); 344 ((char *)de + le16_to_cpu(de->rec_len));
345 } 345 }
346 brelse(bh); 346 brelse(bh);
347 return 1; 347 return 1;
348 } 348 }
349 349
350 /* returns a bh of the 1st new block in the allocation. */ 350 /* returns a bh of the 1st new block in the allocation. */
351 int ocfs2_do_extend_dir(struct super_block *sb, 351 int ocfs2_do_extend_dir(struct super_block *sb,
352 handle_t *handle, 352 handle_t *handle,
353 struct inode *dir, 353 struct inode *dir,
354 struct buffer_head *parent_fe_bh, 354 struct buffer_head *parent_fe_bh,
355 struct ocfs2_alloc_context *data_ac, 355 struct ocfs2_alloc_context *data_ac,
356 struct ocfs2_alloc_context *meta_ac, 356 struct ocfs2_alloc_context *meta_ac,
357 struct buffer_head **new_bh) 357 struct buffer_head **new_bh)
358 { 358 {
359 int status; 359 int status;
360 int extend; 360 int extend;
361 u64 p_blkno; 361 u64 p_blkno;
362 362
363 spin_lock(&OCFS2_I(dir)->ip_lock); 363 spin_lock(&OCFS2_I(dir)->ip_lock);
364 extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)); 364 extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters));
365 spin_unlock(&OCFS2_I(dir)->ip_lock); 365 spin_unlock(&OCFS2_I(dir)->ip_lock);
366 366
367 if (extend) { 367 if (extend) {
368 status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, 1, 368 status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, 1,
369 parent_fe_bh, handle, 369 parent_fe_bh, handle,
370 data_ac, meta_ac, NULL); 370 data_ac, meta_ac, NULL);
371 BUG_ON(status == -EAGAIN); 371 BUG_ON(status == -EAGAIN);
372 if (status < 0) { 372 if (status < 0) {
373 mlog_errno(status); 373 mlog_errno(status);
374 goto bail; 374 goto bail;
375 } 375 }
376 } 376 }
377 377
378 status = ocfs2_extent_map_get_blocks(dir, (dir->i_blocks >> 378 status = ocfs2_extent_map_get_blocks(dir, (dir->i_blocks >>
379 (sb->s_blocksize_bits - 9)), 379 (sb->s_blocksize_bits - 9)),
380 1, &p_blkno, NULL); 380 1, &p_blkno, NULL);
381 if (status < 0) { 381 if (status < 0) {
382 mlog_errno(status); 382 mlog_errno(status);
383 goto bail; 383 goto bail;
384 } 384 }
385 385
386 *new_bh = sb_getblk(sb, p_blkno); 386 *new_bh = sb_getblk(sb, p_blkno);
387 if (!*new_bh) { 387 if (!*new_bh) {
388 status = -EIO; 388 status = -EIO;
389 mlog_errno(status); 389 mlog_errno(status);
390 goto bail; 390 goto bail;
391 } 391 }
392 status = 0; 392 status = 0;
393 bail: 393 bail:
394 mlog_exit(status); 394 mlog_exit(status);
395 return status; 395 return status;
396 } 396 }
397 397
398 /* assumes you already have a cluster lock on the directory. */ 398 /* assumes you already have a cluster lock on the directory. */
399 static int ocfs2_extend_dir(struct ocfs2_super *osb, 399 static int ocfs2_extend_dir(struct ocfs2_super *osb,
400 struct inode *dir, 400 struct inode *dir,
401 struct buffer_head *parent_fe_bh, 401 struct buffer_head *parent_fe_bh,
402 struct buffer_head **new_de_bh) 402 struct buffer_head **new_de_bh)
403 { 403 {
404 int status = 0; 404 int status = 0;
405 int credits, num_free_extents; 405 int credits, num_free_extents;
406 loff_t dir_i_size; 406 loff_t dir_i_size;
407 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; 407 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
408 struct ocfs2_alloc_context *data_ac = NULL; 408 struct ocfs2_alloc_context *data_ac = NULL;
409 struct ocfs2_alloc_context *meta_ac = NULL; 409 struct ocfs2_alloc_context *meta_ac = NULL;
410 handle_t *handle = NULL; 410 handle_t *handle = NULL;
411 struct buffer_head *new_bh = NULL; 411 struct buffer_head *new_bh = NULL;
412 struct ocfs2_dir_entry * de; 412 struct ocfs2_dir_entry * de;
413 struct super_block *sb = osb->sb; 413 struct super_block *sb = osb->sb;
414 414
415 mlog_entry_void(); 415 mlog_entry_void();
416 416
417 dir_i_size = i_size_read(dir); 417 dir_i_size = i_size_read(dir);
418 mlog(0, "extending dir %llu (i_size = %lld)\n", 418 mlog(0, "extending dir %llu (i_size = %lld)\n",
419 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); 419 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size);
420 420
421 /* dir->i_size is always block aligned. */ 421 /* dir->i_size is always block aligned. */
422 spin_lock(&OCFS2_I(dir)->ip_lock); 422 spin_lock(&OCFS2_I(dir)->ip_lock);
423 if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { 423 if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
424 spin_unlock(&OCFS2_I(dir)->ip_lock); 424 spin_unlock(&OCFS2_I(dir)->ip_lock);
425 num_free_extents = ocfs2_num_free_extents(osb, dir, fe); 425 num_free_extents = ocfs2_num_free_extents(osb, dir, fe);
426 if (num_free_extents < 0) { 426 if (num_free_extents < 0) {
427 status = num_free_extents; 427 status = num_free_extents;
428 mlog_errno(status); 428 mlog_errno(status);
429 goto bail; 429 goto bail;
430 } 430 }
431 431
432 if (!num_free_extents) { 432 if (!num_free_extents) {
433 status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac); 433 status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac);
434 if (status < 0) { 434 if (status < 0) {
435 if (status != -ENOSPC) 435 if (status != -ENOSPC)
436 mlog_errno(status); 436 mlog_errno(status);
437 goto bail; 437 goto bail;
438 } 438 }
439 } 439 }
440 440
441 status = ocfs2_reserve_clusters(osb, 1, &data_ac); 441 status = ocfs2_reserve_clusters(osb, 1, &data_ac);
442 if (status < 0) { 442 if (status < 0) {
443 if (status != -ENOSPC) 443 if (status != -ENOSPC)
444 mlog_errno(status); 444 mlog_errno(status);
445 goto bail; 445 goto bail;
446 } 446 }
447 447
448 credits = ocfs2_calc_extend_credits(sb, fe, 1); 448 credits = ocfs2_calc_extend_credits(sb, fe, 1);
449 } else { 449 } else {
450 spin_unlock(&OCFS2_I(dir)->ip_lock); 450 spin_unlock(&OCFS2_I(dir)->ip_lock);
451 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; 451 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
452 } 452 }
453 453
454 handle = ocfs2_start_trans(osb, credits); 454 handle = ocfs2_start_trans(osb, credits);
455 if (IS_ERR(handle)) { 455 if (IS_ERR(handle)) {
456 status = PTR_ERR(handle); 456 status = PTR_ERR(handle);
457 handle = NULL; 457 handle = NULL;
458 mlog_errno(status); 458 mlog_errno(status);
459 goto bail; 459 goto bail;
460 } 460 }
461 461
462 status = ocfs2_do_extend_dir(osb->sb, handle, dir, parent_fe_bh, 462 status = ocfs2_do_extend_dir(osb->sb, handle, dir, parent_fe_bh,
463 data_ac, meta_ac, &new_bh); 463 data_ac, meta_ac, &new_bh);
464 if (status < 0) { 464 if (status < 0) {
465 mlog_errno(status); 465 mlog_errno(status);
466 goto bail; 466 goto bail;
467 } 467 }
468 468
469 ocfs2_set_new_buffer_uptodate(dir, new_bh); 469 ocfs2_set_new_buffer_uptodate(dir, new_bh);
470 470
471 status = ocfs2_journal_access(handle, dir, new_bh, 471 status = ocfs2_journal_access(handle, dir, new_bh,
472 OCFS2_JOURNAL_ACCESS_CREATE); 472 OCFS2_JOURNAL_ACCESS_CREATE);
473 if (status < 0) { 473 if (status < 0) {
474 mlog_errno(status); 474 mlog_errno(status);
475 goto bail; 475 goto bail;
476 } 476 }
477 memset(new_bh->b_data, 0, sb->s_blocksize); 477 memset(new_bh->b_data, 0, sb->s_blocksize);
478 de = (struct ocfs2_dir_entry *) new_bh->b_data; 478 de = (struct ocfs2_dir_entry *) new_bh->b_data;
479 de->inode = 0; 479 de->inode = 0;
480 de->rec_len = cpu_to_le16(sb->s_blocksize); 480 de->rec_len = cpu_to_le16(sb->s_blocksize);
481 status = ocfs2_journal_dirty(handle, new_bh); 481 status = ocfs2_journal_dirty(handle, new_bh);
482 if (status < 0) { 482 if (status < 0) {
483 mlog_errno(status); 483 mlog_errno(status);
484 goto bail; 484 goto bail;
485 } 485 }
486 486
487 dir_i_size += dir->i_sb->s_blocksize; 487 dir_i_size += dir->i_sb->s_blocksize;
488 i_size_write(dir, dir_i_size); 488 i_size_write(dir, dir_i_size);
489 dir->i_blocks = ocfs2_align_bytes_to_sectors(dir_i_size); 489 dir->i_blocks = ocfs2_align_bytes_to_sectors(dir_i_size);
490 status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh); 490 status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
491 if (status < 0) { 491 if (status < 0) {
492 mlog_errno(status); 492 mlog_errno(status);
493 goto bail; 493 goto bail;
494 } 494 }
495 495
496 *new_de_bh = new_bh; 496 *new_de_bh = new_bh;
497 get_bh(*new_de_bh); 497 get_bh(*new_de_bh);
498 bail: 498 bail:
499 if (handle) 499 if (handle)
500 ocfs2_commit_trans(osb, handle); 500 ocfs2_commit_trans(osb, handle);
501 501
502 if (data_ac) 502 if (data_ac)
503 ocfs2_free_alloc_context(data_ac); 503 ocfs2_free_alloc_context(data_ac);
504 if (meta_ac) 504 if (meta_ac)
505 ocfs2_free_alloc_context(meta_ac); 505 ocfs2_free_alloc_context(meta_ac);
506 506
507 if (new_bh) 507 if (new_bh)
508 brelse(new_bh); 508 brelse(new_bh);
509 509
510 mlog_exit(status); 510 mlog_exit(status);
511 return status; 511 return status;
512 } 512 }
513 513
514 /* 514 /*
515 * Search the dir for a good spot, extending it if necessary. The 515 * Search the dir for a good spot, extending it if necessary. The
516 * block containing an appropriate record is returned in ret_de_bh. 516 * block containing an appropriate record is returned in ret_de_bh.
517 */ 517 */
518 int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, 518 int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
519 struct inode *dir, 519 struct inode *dir,
520 struct buffer_head *parent_fe_bh, 520 struct buffer_head *parent_fe_bh,
521 const char *name, 521 const char *name,
522 int namelen, 522 int namelen,
523 struct buffer_head **ret_de_bh) 523 struct buffer_head **ret_de_bh)
524 { 524 {
525 unsigned long offset; 525 unsigned long offset;
526 struct buffer_head * bh = NULL; 526 struct buffer_head * bh = NULL;
527 unsigned short rec_len; 527 unsigned short rec_len;
528 struct ocfs2_dinode *fe; 528 struct ocfs2_dinode *fe;
529 struct ocfs2_dir_entry *de; 529 struct ocfs2_dir_entry *de;
530 struct super_block *sb; 530 struct super_block *sb;
531 int status; 531 int status;
532 532
533 mlog_entry_void(); 533 mlog_entry_void();
534 534
535 mlog(0, "getting ready to insert namelen %d into dir %llu\n", 535 mlog(0, "getting ready to insert namelen %d into dir %llu\n",
536 namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno); 536 namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno);
537 537
538 BUG_ON(!S_ISDIR(dir->i_mode)); 538 BUG_ON(!S_ISDIR(dir->i_mode));
539 fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; 539 fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
540 BUG_ON(le64_to_cpu(fe->i_size) != i_size_read(dir)); 540 BUG_ON(le64_to_cpu(fe->i_size) != i_size_read(dir));
541 541
542 sb = dir->i_sb; 542 sb = dir->i_sb;
543 543
544 if (!namelen) { 544 if (!namelen) {
545 status = -EINVAL; 545 status = -EINVAL;
546 mlog_errno(status); 546 mlog_errno(status);
547 goto bail; 547 goto bail;
548 } 548 }
549 549
550 bh = ocfs2_bread(dir, 0, &status, 0); 550 bh = ocfs2_bread(dir, 0, &status, 0);
551 if (!bh) { 551 if (!bh) {
552 mlog_errno(status); 552 mlog_errno(status);
553 goto bail; 553 goto bail;
554 } 554 }
555 555
556 rec_len = OCFS2_DIR_REC_LEN(namelen); 556 rec_len = OCFS2_DIR_REC_LEN(namelen);
557 offset = 0; 557 offset = 0;
558 de = (struct ocfs2_dir_entry *) bh->b_data; 558 de = (struct ocfs2_dir_entry *) bh->b_data;
559 while (1) { 559 while (1) {
560 if ((char *)de >= sb->s_blocksize + bh->b_data) { 560 if ((char *)de >= sb->s_blocksize + bh->b_data) {
561 brelse(bh); 561 brelse(bh);
562 bh = NULL; 562 bh = NULL;
563 563
564 if (i_size_read(dir) <= offset) { 564 if (i_size_read(dir) <= offset) {
565 status = ocfs2_extend_dir(osb, 565 status = ocfs2_extend_dir(osb,
566 dir, 566 dir,
567 parent_fe_bh, 567 parent_fe_bh,
568 &bh); 568 &bh);
569 if (status < 0) { 569 if (status < 0) {
570 mlog_errno(status); 570 mlog_errno(status);
571 goto bail; 571 goto bail;
572 } 572 }
573 BUG_ON(!bh); 573 BUG_ON(!bh);
574 *ret_de_bh = bh; 574 *ret_de_bh = bh;
575 get_bh(*ret_de_bh); 575 get_bh(*ret_de_bh);
576 goto bail; 576 goto bail;
577 } 577 }
578 bh = ocfs2_bread(dir, 578 bh = ocfs2_bread(dir,
579 offset >> sb->s_blocksize_bits, 579 offset >> sb->s_blocksize_bits,
580 &status, 580 &status,
581 0); 581 0);
582 if (!bh) { 582 if (!bh) {
583 mlog_errno(status); 583 mlog_errno(status);
584 goto bail; 584 goto bail;
585 } 585 }
586 /* move to next block */ 586 /* move to next block */
587 de = (struct ocfs2_dir_entry *) bh->b_data; 587 de = (struct ocfs2_dir_entry *) bh->b_data;
588 } 588 }
589 if (!ocfs2_check_dir_entry(dir, de, bh, offset)) { 589 if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
590 status = -ENOENT; 590 status = -ENOENT;
591 goto bail; 591 goto bail;
592 } 592 }
593 if (ocfs2_match(namelen, name, de)) { 593 if (ocfs2_match(namelen, name, de)) {
594 status = -EEXIST; 594 status = -EEXIST;
595 goto bail; 595 goto bail;
596 } 596 }
597 if (((le64_to_cpu(de->inode) == 0) && 597 if (((le64_to_cpu(de->inode) == 0) &&
598 (le16_to_cpu(de->rec_len) >= rec_len)) || 598 (le16_to_cpu(de->rec_len) >= rec_len)) ||
599 (le16_to_cpu(de->rec_len) >= 599 (le16_to_cpu(de->rec_len) >=
600 (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) { 600 (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
601 /* Ok, we found a spot. Return this bh and let 601 /* Ok, we found a spot. Return this bh and let
602 * the caller actually fill it in. */ 602 * the caller actually fill it in. */
603 *ret_de_bh = bh; 603 *ret_de_bh = bh;
604 get_bh(*ret_de_bh); 604 get_bh(*ret_de_bh);
605 status = 0; 605 status = 0;
606 goto bail; 606 goto bail;
607 } 607 }
608 offset += le16_to_cpu(de->rec_len); 608 offset += le16_to_cpu(de->rec_len);
609 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len)); 609 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
610 } 610 }
611 611
612 status = 0; 612 status = 0;
613 bail: 613 bail:
614 if (bh) 614 if (bh)
615 brelse(bh); 615 brelse(bh);
616 616
617 mlog_exit(status); 617 mlog_exit(status);
618 return status; 618 return status;
619 } 619 }
620 620
fs/ocfs2/dlm/dlmfs.c
1 /* -*- mode: c; c-basic-offset: 8; -*- 1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0: 2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 * 3 *
4 * dlmfs.c 4 * dlmfs.c
5 * 5 *
6 * Code which implements the kernel side of a minimal userspace 6 * Code which implements the kernel side of a minimal userspace
7 * interface to our DLM. This file handles the virtual file system 7 * interface to our DLM. This file handles the virtual file system
8 * used for communication with userspace. Credit should go to ramfs, 8 * used for communication with userspace. Credit should go to ramfs,
9 * which was a template for the fs side of this module. 9 * which was a template for the fs side of this module.
10 * 10 *
11 * Copyright (C) 2003, 2004 Oracle. All rights reserved. 11 * Copyright (C) 2003, 2004 Oracle. All rights reserved.
12 * 12 *
13 * This program is free software; you can redistribute it and/or 13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public 14 * modify it under the terms of the GNU General Public
15 * License as published by the Free Software Foundation; either 15 * License as published by the Free Software Foundation; either
16 * version 2 of the License, or (at your option) any later version. 16 * version 2 of the License, or (at your option) any later version.
17 * 17 *
18 * This program is distributed in the hope that it will be useful, 18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * General Public License for more details. 21 * General Public License for more details.
22 * 22 *
23 * You should have received a copy of the GNU General Public 23 * You should have received a copy of the GNU General Public
24 * License along with this program; if not, write to the 24 * License along with this program; if not, write to the
25 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 25 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 * Boston, MA 021110-1307, USA. 26 * Boston, MA 021110-1307, USA.
27 */ 27 */
28 28
29 /* Simple VFS hooks based on: */ 29 /* Simple VFS hooks based on: */
30 /* 30 /*
31 * Resizable simple ram filesystem for Linux. 31 * Resizable simple ram filesystem for Linux.
32 * 32 *
33 * Copyright (C) 2000 Linus Torvalds. 33 * Copyright (C) 2000 Linus Torvalds.
34 * 2000 Transmeta Corp. 34 * 2000 Transmeta Corp.
35 */ 35 */
36 36
37 #include <linux/module.h> 37 #include <linux/module.h>
38 #include <linux/fs.h> 38 #include <linux/fs.h>
39 #include <linux/pagemap.h> 39 #include <linux/pagemap.h>
40 #include <linux/types.h> 40 #include <linux/types.h>
41 #include <linux/slab.h> 41 #include <linux/slab.h>
42 #include <linux/highmem.h> 42 #include <linux/highmem.h>
43 #include <linux/init.h> 43 #include <linux/init.h>
44 #include <linux/string.h> 44 #include <linux/string.h>
45 #include <linux/smp_lock.h> 45 #include <linux/smp_lock.h>
46 #include <linux/backing-dev.h> 46 #include <linux/backing-dev.h>
47 47
48 #include <asm/uaccess.h> 48 #include <asm/uaccess.h>
49 49
50 50
51 #include "cluster/nodemanager.h" 51 #include "cluster/nodemanager.h"
52 #include "cluster/heartbeat.h" 52 #include "cluster/heartbeat.h"
53 #include "cluster/tcp.h" 53 #include "cluster/tcp.h"
54 54
55 #include "dlmapi.h" 55 #include "dlmapi.h"
56 56
57 #include "userdlm.h" 57 #include "userdlm.h"
58 58
59 #include "dlmfsver.h" 59 #include "dlmfsver.h"
60 60
61 #define MLOG_MASK_PREFIX ML_DLMFS 61 #define MLOG_MASK_PREFIX ML_DLMFS
62 #include "cluster/masklog.h" 62 #include "cluster/masklog.h"
63 63
64 static struct super_operations dlmfs_ops; 64 static struct super_operations dlmfs_ops;
65 static struct file_operations dlmfs_file_operations; 65 static struct file_operations dlmfs_file_operations;
66 static struct inode_operations dlmfs_dir_inode_operations; 66 static struct inode_operations dlmfs_dir_inode_operations;
67 static struct inode_operations dlmfs_root_inode_operations; 67 static struct inode_operations dlmfs_root_inode_operations;
68 static struct inode_operations dlmfs_file_inode_operations; 68 static struct inode_operations dlmfs_file_inode_operations;
69 static struct kmem_cache *dlmfs_inode_cache; 69 static struct kmem_cache *dlmfs_inode_cache;
70 70
71 struct workqueue_struct *user_dlm_worker; 71 struct workqueue_struct *user_dlm_worker;
72 72
73 /* 73 /*
74 * decodes a set of open flags into a valid lock level and a set of flags. 74 * decodes a set of open flags into a valid lock level and a set of flags.
75 * returns < 0 if we have invalid flags 75 * returns < 0 if we have invalid flags
76 * flags which mean something to us: 76 * flags which mean something to us:
77 * O_RDONLY -> PRMODE level 77 * O_RDONLY -> PRMODE level
78 * O_WRONLY -> EXMODE level 78 * O_WRONLY -> EXMODE level
79 * 79 *
80 * O_NONBLOCK -> LKM_NOQUEUE 80 * O_NONBLOCK -> LKM_NOQUEUE
81 */ 81 */
82 static int dlmfs_decode_open_flags(int open_flags, 82 static int dlmfs_decode_open_flags(int open_flags,
83 int *level, 83 int *level,
84 int *flags) 84 int *flags)
85 { 85 {
86 if (open_flags & (O_WRONLY|O_RDWR)) 86 if (open_flags & (O_WRONLY|O_RDWR))
87 *level = LKM_EXMODE; 87 *level = LKM_EXMODE;
88 else 88 else
89 *level = LKM_PRMODE; 89 *level = LKM_PRMODE;
90 90
91 *flags = 0; 91 *flags = 0;
92 if (open_flags & O_NONBLOCK) 92 if (open_flags & O_NONBLOCK)
93 *flags |= LKM_NOQUEUE; 93 *flags |= LKM_NOQUEUE;
94 94
95 return 0; 95 return 0;
96 } 96 }
97 97
98 static int dlmfs_file_open(struct inode *inode, 98 static int dlmfs_file_open(struct inode *inode,
99 struct file *file) 99 struct file *file)
100 { 100 {
101 int status, level, flags; 101 int status, level, flags;
102 struct dlmfs_filp_private *fp = NULL; 102 struct dlmfs_filp_private *fp = NULL;
103 struct dlmfs_inode_private *ip; 103 struct dlmfs_inode_private *ip;
104 104
105 if (S_ISDIR(inode->i_mode)) 105 if (S_ISDIR(inode->i_mode))
106 BUG(); 106 BUG();
107 107
108 mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino, 108 mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino,
109 file->f_flags); 109 file->f_flags);
110 110
111 status = dlmfs_decode_open_flags(file->f_flags, &level, &flags); 111 status = dlmfs_decode_open_flags(file->f_flags, &level, &flags);
112 if (status < 0) 112 if (status < 0)
113 goto bail; 113 goto bail;
114 114
115 /* We don't want to honor O_APPEND at read/write time as it 115 /* We don't want to honor O_APPEND at read/write time as it
116 * doesn't make sense for LVB writes. */ 116 * doesn't make sense for LVB writes. */
117 file->f_flags &= ~O_APPEND; 117 file->f_flags &= ~O_APPEND;
118 118
119 fp = kmalloc(sizeof(*fp), GFP_NOFS); 119 fp = kmalloc(sizeof(*fp), GFP_NOFS);
120 if (!fp) { 120 if (!fp) {
121 status = -ENOMEM; 121 status = -ENOMEM;
122 goto bail; 122 goto bail;
123 } 123 }
124 fp->fp_lock_level = level; 124 fp->fp_lock_level = level;
125 125
126 ip = DLMFS_I(inode); 126 ip = DLMFS_I(inode);
127 127
128 status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags); 128 status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags);
129 if (status < 0) { 129 if (status < 0) {
130 /* this is a strange error to return here but I want 130 /* this is a strange error to return here but I want
131 * to be able userspace to be able to distinguish a 131 * to be able userspace to be able to distinguish a
132 * valid lock request from one that simply couldn't be 132 * valid lock request from one that simply couldn't be
133 * granted. */ 133 * granted. */
134 if (flags & LKM_NOQUEUE && status == -EAGAIN) 134 if (flags & LKM_NOQUEUE && status == -EAGAIN)
135 status = -ETXTBSY; 135 status = -ETXTBSY;
136 kfree(fp); 136 kfree(fp);
137 goto bail; 137 goto bail;
138 } 138 }
139 139
140 file->private_data = fp; 140 file->private_data = fp;
141 bail: 141 bail:
142 return status; 142 return status;
143 } 143 }
144 144
145 static int dlmfs_file_release(struct inode *inode, 145 static int dlmfs_file_release(struct inode *inode,
146 struct file *file) 146 struct file *file)
147 { 147 {
148 int level, status; 148 int level, status;
149 struct dlmfs_inode_private *ip = DLMFS_I(inode); 149 struct dlmfs_inode_private *ip = DLMFS_I(inode);
150 struct dlmfs_filp_private *fp = 150 struct dlmfs_filp_private *fp =
151 (struct dlmfs_filp_private *) file->private_data; 151 (struct dlmfs_filp_private *) file->private_data;
152 152
153 if (S_ISDIR(inode->i_mode)) 153 if (S_ISDIR(inode->i_mode))
154 BUG(); 154 BUG();
155 155
156 mlog(0, "close called on inode %lu\n", inode->i_ino); 156 mlog(0, "close called on inode %lu\n", inode->i_ino);
157 157
158 status = 0; 158 status = 0;
159 if (fp) { 159 if (fp) {
160 level = fp->fp_lock_level; 160 level = fp->fp_lock_level;
161 if (level != LKM_IVMODE) 161 if (level != LKM_IVMODE)
162 user_dlm_cluster_unlock(&ip->ip_lockres, level); 162 user_dlm_cluster_unlock(&ip->ip_lockres, level);
163 163
164 kfree(fp); 164 kfree(fp);
165 file->private_data = NULL; 165 file->private_data = NULL;
166 } 166 }
167 167
168 return 0; 168 return 0;
169 } 169 }
170 170
171 static ssize_t dlmfs_file_read(struct file *filp, 171 static ssize_t dlmfs_file_read(struct file *filp,
172 char __user *buf, 172 char __user *buf,
173 size_t count, 173 size_t count,
174 loff_t *ppos) 174 loff_t *ppos)
175 { 175 {
176 int bytes_left; 176 int bytes_left;
177 ssize_t readlen; 177 ssize_t readlen;
178 char *lvb_buf; 178 char *lvb_buf;
179 struct inode *inode = filp->f_dentry->d_inode; 179 struct inode *inode = filp->f_path.dentry->d_inode;
180 180
181 mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", 181 mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
182 inode->i_ino, count, *ppos); 182 inode->i_ino, count, *ppos);
183 183
184 if (*ppos >= i_size_read(inode)) 184 if (*ppos >= i_size_read(inode))
185 return 0; 185 return 0;
186 186
187 if (!count) 187 if (!count)
188 return 0; 188 return 0;
189 189
190 if (!access_ok(VERIFY_WRITE, buf, count)) 190 if (!access_ok(VERIFY_WRITE, buf, count))
191 return -EFAULT; 191 return -EFAULT;
192 192
193 /* don't read past the lvb */ 193 /* don't read past the lvb */
194 if ((count + *ppos) > i_size_read(inode)) 194 if ((count + *ppos) > i_size_read(inode))
195 readlen = i_size_read(inode) - *ppos; 195 readlen = i_size_read(inode) - *ppos;
196 else 196 else
197 readlen = count - *ppos; 197 readlen = count - *ppos;
198 198
199 lvb_buf = kmalloc(readlen, GFP_NOFS); 199 lvb_buf = kmalloc(readlen, GFP_NOFS);
200 if (!lvb_buf) 200 if (!lvb_buf)
201 return -ENOMEM; 201 return -ENOMEM;
202 202
203 user_dlm_read_lvb(inode, lvb_buf, readlen); 203 user_dlm_read_lvb(inode, lvb_buf, readlen);
204 bytes_left = __copy_to_user(buf, lvb_buf, readlen); 204 bytes_left = __copy_to_user(buf, lvb_buf, readlen);
205 readlen -= bytes_left; 205 readlen -= bytes_left;
206 206
207 kfree(lvb_buf); 207 kfree(lvb_buf);
208 208
209 *ppos = *ppos + readlen; 209 *ppos = *ppos + readlen;
210 210
211 mlog(0, "read %zd bytes\n", readlen); 211 mlog(0, "read %zd bytes\n", readlen);
212 return readlen; 212 return readlen;
213 } 213 }
214 214
215 static ssize_t dlmfs_file_write(struct file *filp, 215 static ssize_t dlmfs_file_write(struct file *filp,
216 const char __user *buf, 216 const char __user *buf,
217 size_t count, 217 size_t count,
218 loff_t *ppos) 218 loff_t *ppos)
219 { 219 {
220 int bytes_left; 220 int bytes_left;
221 ssize_t writelen; 221 ssize_t writelen;
222 char *lvb_buf; 222 char *lvb_buf;
223 struct inode *inode = filp->f_dentry->d_inode; 223 struct inode *inode = filp->f_path.dentry->d_inode;
224 224
225 mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", 225 mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
226 inode->i_ino, count, *ppos); 226 inode->i_ino, count, *ppos);
227 227
228 if (*ppos >= i_size_read(inode)) 228 if (*ppos >= i_size_read(inode))
229 return -ENOSPC; 229 return -ENOSPC;
230 230
231 if (!count) 231 if (!count)
232 return 0; 232 return 0;
233 233
234 if (!access_ok(VERIFY_READ, buf, count)) 234 if (!access_ok(VERIFY_READ, buf, count))
235 return -EFAULT; 235 return -EFAULT;
236 236
237 /* don't write past the lvb */ 237 /* don't write past the lvb */
238 if ((count + *ppos) > i_size_read(inode)) 238 if ((count + *ppos) > i_size_read(inode))
239 writelen = i_size_read(inode) - *ppos; 239 writelen = i_size_read(inode) - *ppos;
240 else 240 else
241 writelen = count - *ppos; 241 writelen = count - *ppos;
242 242
243 lvb_buf = kmalloc(writelen, GFP_NOFS); 243 lvb_buf = kmalloc(writelen, GFP_NOFS);
244 if (!lvb_buf) 244 if (!lvb_buf)
245 return -ENOMEM; 245 return -ENOMEM;
246 246
247 bytes_left = copy_from_user(lvb_buf, buf, writelen); 247 bytes_left = copy_from_user(lvb_buf, buf, writelen);
248 writelen -= bytes_left; 248 writelen -= bytes_left;
249 if (writelen) 249 if (writelen)
250 user_dlm_write_lvb(inode, lvb_buf, writelen); 250 user_dlm_write_lvb(inode, lvb_buf, writelen);
251 251
252 kfree(lvb_buf); 252 kfree(lvb_buf);
253 253
254 *ppos = *ppos + writelen; 254 *ppos = *ppos + writelen;
255 mlog(0, "wrote %zd bytes\n", writelen); 255 mlog(0, "wrote %zd bytes\n", writelen);
256 return writelen; 256 return writelen;
257 } 257 }
258 258
259 static void dlmfs_init_once(void *foo, 259 static void dlmfs_init_once(void *foo,
260 struct kmem_cache *cachep, 260 struct kmem_cache *cachep,
261 unsigned long flags) 261 unsigned long flags)
262 { 262 {
263 struct dlmfs_inode_private *ip = 263 struct dlmfs_inode_private *ip =
264 (struct dlmfs_inode_private *) foo; 264 (struct dlmfs_inode_private *) foo;
265 265
266 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 266 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
267 SLAB_CTOR_CONSTRUCTOR) { 267 SLAB_CTOR_CONSTRUCTOR) {
268 ip->ip_dlm = NULL; 268 ip->ip_dlm = NULL;
269 ip->ip_parent = NULL; 269 ip->ip_parent = NULL;
270 270
271 inode_init_once(&ip->ip_vfs_inode); 271 inode_init_once(&ip->ip_vfs_inode);
272 } 272 }
273 } 273 }
274 274
275 static struct inode *dlmfs_alloc_inode(struct super_block *sb) 275 static struct inode *dlmfs_alloc_inode(struct super_block *sb)
276 { 276 {
277 struct dlmfs_inode_private *ip; 277 struct dlmfs_inode_private *ip;
278 278
279 ip = kmem_cache_alloc(dlmfs_inode_cache, GFP_NOFS); 279 ip = kmem_cache_alloc(dlmfs_inode_cache, GFP_NOFS);
280 if (!ip) 280 if (!ip)
281 return NULL; 281 return NULL;
282 282
283 return &ip->ip_vfs_inode; 283 return &ip->ip_vfs_inode;
284 } 284 }
285 285
286 static void dlmfs_destroy_inode(struct inode *inode) 286 static void dlmfs_destroy_inode(struct inode *inode)
287 { 287 {
288 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); 288 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
289 } 289 }
290 290
291 static void dlmfs_clear_inode(struct inode *inode) 291 static void dlmfs_clear_inode(struct inode *inode)
292 { 292 {
293 int status; 293 int status;
294 struct dlmfs_inode_private *ip; 294 struct dlmfs_inode_private *ip;
295 295
296 if (!inode) 296 if (!inode)
297 return; 297 return;
298 298
299 mlog(0, "inode %lu\n", inode->i_ino); 299 mlog(0, "inode %lu\n", inode->i_ino);
300 300
301 ip = DLMFS_I(inode); 301 ip = DLMFS_I(inode);
302 302
303 if (S_ISREG(inode->i_mode)) { 303 if (S_ISREG(inode->i_mode)) {
304 status = user_dlm_destroy_lock(&ip->ip_lockres); 304 status = user_dlm_destroy_lock(&ip->ip_lockres);
305 if (status < 0) 305 if (status < 0)
306 mlog_errno(status); 306 mlog_errno(status);
307 iput(ip->ip_parent); 307 iput(ip->ip_parent);
308 goto clear_fields; 308 goto clear_fields;
309 } 309 }
310 310
311 mlog(0, "we're a directory, ip->ip_dlm = 0x%p\n", ip->ip_dlm); 311 mlog(0, "we're a directory, ip->ip_dlm = 0x%p\n", ip->ip_dlm);
312 /* we must be a directory. If required, lets unregister the 312 /* we must be a directory. If required, lets unregister the
313 * dlm context now. */ 313 * dlm context now. */
314 if (ip->ip_dlm) 314 if (ip->ip_dlm)
315 user_dlm_unregister_context(ip->ip_dlm); 315 user_dlm_unregister_context(ip->ip_dlm);
316 clear_fields: 316 clear_fields:
317 ip->ip_parent = NULL; 317 ip->ip_parent = NULL;
318 ip->ip_dlm = NULL; 318 ip->ip_dlm = NULL;
319 } 319 }
320 320
321 static struct backing_dev_info dlmfs_backing_dev_info = { 321 static struct backing_dev_info dlmfs_backing_dev_info = {
322 .ra_pages = 0, /* No readahead */ 322 .ra_pages = 0, /* No readahead */
323 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 323 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
324 }; 324 };
325 325
326 static struct inode *dlmfs_get_root_inode(struct super_block *sb) 326 static struct inode *dlmfs_get_root_inode(struct super_block *sb)
327 { 327 {
328 struct inode *inode = new_inode(sb); 328 struct inode *inode = new_inode(sb);
329 int mode = S_IFDIR | 0755; 329 int mode = S_IFDIR | 0755;
330 struct dlmfs_inode_private *ip; 330 struct dlmfs_inode_private *ip;
331 331
332 if (inode) { 332 if (inode) {
333 ip = DLMFS_I(inode); 333 ip = DLMFS_I(inode);
334 334
335 inode->i_mode = mode; 335 inode->i_mode = mode;
336 inode->i_uid = current->fsuid; 336 inode->i_uid = current->fsuid;
337 inode->i_gid = current->fsgid; 337 inode->i_gid = current->fsgid;
338 inode->i_blocks = 0; 338 inode->i_blocks = 0;
339 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; 339 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
340 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 340 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
341 inc_nlink(inode); 341 inc_nlink(inode);
342 342
343 inode->i_fop = &simple_dir_operations; 343 inode->i_fop = &simple_dir_operations;
344 inode->i_op = &dlmfs_root_inode_operations; 344 inode->i_op = &dlmfs_root_inode_operations;
345 } 345 }
346 346
347 return inode; 347 return inode;
348 } 348 }
349 349
350 static struct inode *dlmfs_get_inode(struct inode *parent, 350 static struct inode *dlmfs_get_inode(struct inode *parent,
351 struct dentry *dentry, 351 struct dentry *dentry,
352 int mode) 352 int mode)
353 { 353 {
354 struct super_block *sb = parent->i_sb; 354 struct super_block *sb = parent->i_sb;
355 struct inode * inode = new_inode(sb); 355 struct inode * inode = new_inode(sb);
356 struct dlmfs_inode_private *ip; 356 struct dlmfs_inode_private *ip;
357 357
358 if (!inode) 358 if (!inode)
359 return NULL; 359 return NULL;
360 360
361 inode->i_mode = mode; 361 inode->i_mode = mode;
362 inode->i_uid = current->fsuid; 362 inode->i_uid = current->fsuid;
363 inode->i_gid = current->fsgid; 363 inode->i_gid = current->fsgid;
364 inode->i_blocks = 0; 364 inode->i_blocks = 0;
365 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; 365 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
366 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 366 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
367 367
368 ip = DLMFS_I(inode); 368 ip = DLMFS_I(inode);
369 ip->ip_dlm = DLMFS_I(parent)->ip_dlm; 369 ip->ip_dlm = DLMFS_I(parent)->ip_dlm;
370 370
371 switch (mode & S_IFMT) { 371 switch (mode & S_IFMT) {
372 default: 372 default:
373 /* for now we don't support anything other than 373 /* for now we don't support anything other than
374 * directories and regular files. */ 374 * directories and regular files. */
375 BUG(); 375 BUG();
376 break; 376 break;
377 case S_IFREG: 377 case S_IFREG:
378 inode->i_op = &dlmfs_file_inode_operations; 378 inode->i_op = &dlmfs_file_inode_operations;
379 inode->i_fop = &dlmfs_file_operations; 379 inode->i_fop = &dlmfs_file_operations;
380 380
381 i_size_write(inode, DLM_LVB_LEN); 381 i_size_write(inode, DLM_LVB_LEN);
382 382
383 user_dlm_lock_res_init(&ip->ip_lockres, dentry); 383 user_dlm_lock_res_init(&ip->ip_lockres, dentry);
384 384
385 /* released at clear_inode time, this insures that we 385 /* released at clear_inode time, this insures that we
386 * get to drop the dlm reference on each lock *before* 386 * get to drop the dlm reference on each lock *before*
387 * we call the unregister code for releasing parent 387 * we call the unregister code for releasing parent
388 * directories. */ 388 * directories. */
389 ip->ip_parent = igrab(parent); 389 ip->ip_parent = igrab(parent);
390 BUG_ON(!ip->ip_parent); 390 BUG_ON(!ip->ip_parent);
391 break; 391 break;
392 case S_IFDIR: 392 case S_IFDIR:
393 inode->i_op = &dlmfs_dir_inode_operations; 393 inode->i_op = &dlmfs_dir_inode_operations;
394 inode->i_fop = &simple_dir_operations; 394 inode->i_fop = &simple_dir_operations;
395 395
396 /* directory inodes start off with i_nlink == 396 /* directory inodes start off with i_nlink ==
397 * 2 (for "." entry) */ 397 * 2 (for "." entry) */
398 inc_nlink(inode); 398 inc_nlink(inode);
399 break; 399 break;
400 } 400 }
401 401
402 if (parent->i_mode & S_ISGID) { 402 if (parent->i_mode & S_ISGID) {
403 inode->i_gid = parent->i_gid; 403 inode->i_gid = parent->i_gid;
404 if (S_ISDIR(mode)) 404 if (S_ISDIR(mode))
405 inode->i_mode |= S_ISGID; 405 inode->i_mode |= S_ISGID;
406 } 406 }
407 407
408 return inode; 408 return inode;
409 } 409 }
410 410
411 /* 411 /*
412 * File creation. Allocate an inode, and we're done.. 412 * File creation. Allocate an inode, and we're done..
413 */ 413 */
414 /* SMP-safe */ 414 /* SMP-safe */
415 static int dlmfs_mkdir(struct inode * dir, 415 static int dlmfs_mkdir(struct inode * dir,
416 struct dentry * dentry, 416 struct dentry * dentry,
417 int mode) 417 int mode)
418 { 418 {
419 int status; 419 int status;
420 struct inode *inode = NULL; 420 struct inode *inode = NULL;
421 struct qstr *domain = &dentry->d_name; 421 struct qstr *domain = &dentry->d_name;
422 struct dlmfs_inode_private *ip; 422 struct dlmfs_inode_private *ip;
423 struct dlm_ctxt *dlm; 423 struct dlm_ctxt *dlm;
424 424
425 mlog(0, "mkdir %.*s\n", domain->len, domain->name); 425 mlog(0, "mkdir %.*s\n", domain->len, domain->name);
426 426
427 /* verify that we have a proper domain */ 427 /* verify that we have a proper domain */
428 if (domain->len >= O2NM_MAX_NAME_LEN) { 428 if (domain->len >= O2NM_MAX_NAME_LEN) {
429 status = -EINVAL; 429 status = -EINVAL;
430 mlog(ML_ERROR, "invalid domain name for directory.\n"); 430 mlog(ML_ERROR, "invalid domain name for directory.\n");
431 goto bail; 431 goto bail;
432 } 432 }
433 433
434 inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR); 434 inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR);
435 if (!inode) { 435 if (!inode) {
436 status = -ENOMEM; 436 status = -ENOMEM;
437 mlog_errno(status); 437 mlog_errno(status);
438 goto bail; 438 goto bail;
439 } 439 }
440 440
441 ip = DLMFS_I(inode); 441 ip = DLMFS_I(inode);
442 442
443 dlm = user_dlm_register_context(domain); 443 dlm = user_dlm_register_context(domain);
444 if (IS_ERR(dlm)) { 444 if (IS_ERR(dlm)) {
445 status = PTR_ERR(dlm); 445 status = PTR_ERR(dlm);
446 mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", 446 mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n",
447 status, domain->len, domain->name); 447 status, domain->len, domain->name);
448 goto bail; 448 goto bail;
449 } 449 }
450 ip->ip_dlm = dlm; 450 ip->ip_dlm = dlm;
451 451
452 inc_nlink(dir); 452 inc_nlink(dir);
453 d_instantiate(dentry, inode); 453 d_instantiate(dentry, inode);
454 dget(dentry); /* Extra count - pin the dentry in core */ 454 dget(dentry); /* Extra count - pin the dentry in core */
455 455
456 status = 0; 456 status = 0;
457 bail: 457 bail:
458 if (status < 0) 458 if (status < 0)
459 iput(inode); 459 iput(inode);
460 return status; 460 return status;
461 } 461 }
462 462
463 static int dlmfs_create(struct inode *dir, 463 static int dlmfs_create(struct inode *dir,
464 struct dentry *dentry, 464 struct dentry *dentry,
465 int mode, 465 int mode,
466 struct nameidata *nd) 466 struct nameidata *nd)
467 { 467 {
468 int status = 0; 468 int status = 0;
469 struct inode *inode; 469 struct inode *inode;
470 struct qstr *name = &dentry->d_name; 470 struct qstr *name = &dentry->d_name;
471 471
472 mlog(0, "create %.*s\n", name->len, name->name); 472 mlog(0, "create %.*s\n", name->len, name->name);
473 473
474 /* verify name is valid and doesn't contain any dlm reserved 474 /* verify name is valid and doesn't contain any dlm reserved
475 * characters */ 475 * characters */
476 if (name->len >= USER_DLM_LOCK_ID_MAX_LEN || 476 if (name->len >= USER_DLM_LOCK_ID_MAX_LEN ||
477 name->name[0] == '$') { 477 name->name[0] == '$') {
478 status = -EINVAL; 478 status = -EINVAL;
479 mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len, 479 mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len,
480 name->name); 480 name->name);
481 goto bail; 481 goto bail;
482 } 482 }
483 483
484 inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG); 484 inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG);
485 if (!inode) { 485 if (!inode) {
486 status = -ENOMEM; 486 status = -ENOMEM;
487 mlog_errno(status); 487 mlog_errno(status);
488 goto bail; 488 goto bail;
489 } 489 }
490 490
491 d_instantiate(dentry, inode); 491 d_instantiate(dentry, inode);
492 dget(dentry); /* Extra count - pin the dentry in core */ 492 dget(dentry); /* Extra count - pin the dentry in core */
493 bail: 493 bail:
494 return status; 494 return status;
495 } 495 }
496 496
497 static int dlmfs_unlink(struct inode *dir, 497 static int dlmfs_unlink(struct inode *dir,
498 struct dentry *dentry) 498 struct dentry *dentry)
499 { 499 {
500 int status; 500 int status;
501 struct inode *inode = dentry->d_inode; 501 struct inode *inode = dentry->d_inode;
502 502
503 mlog(0, "unlink inode %lu\n", inode->i_ino); 503 mlog(0, "unlink inode %lu\n", inode->i_ino);
504 504
505 /* if there are no current holders, or none that are waiting 505 /* if there are no current holders, or none that are waiting
506 * to acquire a lock, this basically destroys our lockres. */ 506 * to acquire a lock, this basically destroys our lockres. */
507 status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres); 507 status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres);
508 if (status < 0) { 508 if (status < 0) {
509 mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n", 509 mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n",
510 dentry->d_name.len, dentry->d_name.name, status); 510 dentry->d_name.len, dentry->d_name.name, status);
511 goto bail; 511 goto bail;
512 } 512 }
513 status = simple_unlink(dir, dentry); 513 status = simple_unlink(dir, dentry);
514 bail: 514 bail:
515 return status; 515 return status;
516 } 516 }
517 517
518 static int dlmfs_fill_super(struct super_block * sb, 518 static int dlmfs_fill_super(struct super_block * sb,
519 void * data, 519 void * data,
520 int silent) 520 int silent)
521 { 521 {
522 struct inode * inode; 522 struct inode * inode;
523 struct dentry * root; 523 struct dentry * root;
524 524
525 sb->s_maxbytes = MAX_LFS_FILESIZE; 525 sb->s_maxbytes = MAX_LFS_FILESIZE;
526 sb->s_blocksize = PAGE_CACHE_SIZE; 526 sb->s_blocksize = PAGE_CACHE_SIZE;
527 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 527 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
528 sb->s_magic = DLMFS_MAGIC; 528 sb->s_magic = DLMFS_MAGIC;
529 sb->s_op = &dlmfs_ops; 529 sb->s_op = &dlmfs_ops;
530 inode = dlmfs_get_root_inode(sb); 530 inode = dlmfs_get_root_inode(sb);
531 if (!inode) 531 if (!inode)
532 return -ENOMEM; 532 return -ENOMEM;
533 533
534 root = d_alloc_root(inode); 534 root = d_alloc_root(inode);
535 if (!root) { 535 if (!root) {
536 iput(inode); 536 iput(inode);
537 return -ENOMEM; 537 return -ENOMEM;
538 } 538 }
539 sb->s_root = root; 539 sb->s_root = root;
540 return 0; 540 return 0;
541 } 541 }
542 542
543 static struct file_operations dlmfs_file_operations = { 543 static struct file_operations dlmfs_file_operations = {
544 .open = dlmfs_file_open, 544 .open = dlmfs_file_open,
545 .release = dlmfs_file_release, 545 .release = dlmfs_file_release,
546 .read = dlmfs_file_read, 546 .read = dlmfs_file_read,
547 .write = dlmfs_file_write, 547 .write = dlmfs_file_write,
548 }; 548 };
549 549
550 static struct inode_operations dlmfs_dir_inode_operations = { 550 static struct inode_operations dlmfs_dir_inode_operations = {
551 .create = dlmfs_create, 551 .create = dlmfs_create,
552 .lookup = simple_lookup, 552 .lookup = simple_lookup,
553 .unlink = dlmfs_unlink, 553 .unlink = dlmfs_unlink,
554 }; 554 };
555 555
556 /* this way we can restrict mkdir to only the toplevel of the fs. */ 556 /* this way we can restrict mkdir to only the toplevel of the fs. */
557 static struct inode_operations dlmfs_root_inode_operations = { 557 static struct inode_operations dlmfs_root_inode_operations = {
558 .lookup = simple_lookup, 558 .lookup = simple_lookup,
559 .mkdir = dlmfs_mkdir, 559 .mkdir = dlmfs_mkdir,
560 .rmdir = simple_rmdir, 560 .rmdir = simple_rmdir,
561 }; 561 };
562 562
563 static struct super_operations dlmfs_ops = { 563 static struct super_operations dlmfs_ops = {
564 .statfs = simple_statfs, 564 .statfs = simple_statfs,
565 .alloc_inode = dlmfs_alloc_inode, 565 .alloc_inode = dlmfs_alloc_inode,
566 .destroy_inode = dlmfs_destroy_inode, 566 .destroy_inode = dlmfs_destroy_inode,
567 .clear_inode = dlmfs_clear_inode, 567 .clear_inode = dlmfs_clear_inode,
568 .drop_inode = generic_delete_inode, 568 .drop_inode = generic_delete_inode,
569 }; 569 };
570 570
571 static struct inode_operations dlmfs_file_inode_operations = { 571 static struct inode_operations dlmfs_file_inode_operations = {
572 .getattr = simple_getattr, 572 .getattr = simple_getattr,
573 }; 573 };
574 574
575 static int dlmfs_get_sb(struct file_system_type *fs_type, 575 static int dlmfs_get_sb(struct file_system_type *fs_type,
576 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 576 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
577 { 577 {
578 return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt); 578 return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt);
579 } 579 }
580 580
581 static struct file_system_type dlmfs_fs_type = { 581 static struct file_system_type dlmfs_fs_type = {
582 .owner = THIS_MODULE, 582 .owner = THIS_MODULE,
583 .name = "ocfs2_dlmfs", 583 .name = "ocfs2_dlmfs",
584 .get_sb = dlmfs_get_sb, 584 .get_sb = dlmfs_get_sb,
585 .kill_sb = kill_litter_super, 585 .kill_sb = kill_litter_super,
586 }; 586 };
587 587
588 static int __init init_dlmfs_fs(void) 588 static int __init init_dlmfs_fs(void)
589 { 589 {
590 int status; 590 int status;
591 int cleanup_inode = 0, cleanup_worker = 0; 591 int cleanup_inode = 0, cleanup_worker = 0;
592 592
593 dlmfs_print_version(); 593 dlmfs_print_version();
594 594
595 dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", 595 dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache",
596 sizeof(struct dlmfs_inode_private), 596 sizeof(struct dlmfs_inode_private),
597 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 597 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
598 SLAB_MEM_SPREAD), 598 SLAB_MEM_SPREAD),
599 dlmfs_init_once, NULL); 599 dlmfs_init_once, NULL);
600 if (!dlmfs_inode_cache) 600 if (!dlmfs_inode_cache)
601 return -ENOMEM; 601 return -ENOMEM;
602 cleanup_inode = 1; 602 cleanup_inode = 1;
603 603
604 user_dlm_worker = create_singlethread_workqueue("user_dlm"); 604 user_dlm_worker = create_singlethread_workqueue("user_dlm");
605 if (!user_dlm_worker) { 605 if (!user_dlm_worker) {
606 status = -ENOMEM; 606 status = -ENOMEM;
607 goto bail; 607 goto bail;
608 } 608 }
609 cleanup_worker = 1; 609 cleanup_worker = 1;
610 610
611 status = register_filesystem(&dlmfs_fs_type); 611 status = register_filesystem(&dlmfs_fs_type);
612 bail: 612 bail:
613 if (status) { 613 if (status) {
614 if (cleanup_inode) 614 if (cleanup_inode)
615 kmem_cache_destroy(dlmfs_inode_cache); 615 kmem_cache_destroy(dlmfs_inode_cache);
616 if (cleanup_worker) 616 if (cleanup_worker)
617 destroy_workqueue(user_dlm_worker); 617 destroy_workqueue(user_dlm_worker);
618 } else 618 } else
619 printk("OCFS2 User DLM kernel interface loaded\n"); 619 printk("OCFS2 User DLM kernel interface loaded\n");
620 return status; 620 return status;
621 } 621 }
622 622
623 static void __exit exit_dlmfs_fs(void) 623 static void __exit exit_dlmfs_fs(void)
624 { 624 {
625 unregister_filesystem(&dlmfs_fs_type); 625 unregister_filesystem(&dlmfs_fs_type);
626 626
627 flush_workqueue(user_dlm_worker); 627 flush_workqueue(user_dlm_worker);
628 destroy_workqueue(user_dlm_worker); 628 destroy_workqueue(user_dlm_worker);
629 629
630 kmem_cache_destroy(dlmfs_inode_cache); 630 kmem_cache_destroy(dlmfs_inode_cache);
631 } 631 }
632 632
633 MODULE_AUTHOR("Oracle"); 633 MODULE_AUTHOR("Oracle");
634 MODULE_LICENSE("GPL"); 634 MODULE_LICENSE("GPL");
635 635
636 module_init(init_dlmfs_fs) 636 module_init(init_dlmfs_fs)
637 module_exit(exit_dlmfs_fs) 637 module_exit(exit_dlmfs_fs)
638 638
1 /* -*- mode: c; c-basic-offset: 8; -*- 1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0: 2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 * 3 *
4 * file.c 4 * file.c
5 * 5 *
6 * File open, close, extend, truncate 6 * File open, close, extend, truncate
7 * 7 *
8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
9 * 9 *
10 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public 11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either 12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version. 13 * version 2 of the License, or (at your option) any later version.
14 * 14 *
15 * This program is distributed in the hope that it will be useful, 15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details. 18 * General Public License for more details.
19 * 19 *
20 * You should have received a copy of the GNU General Public 20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the 21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA. 23 * Boston, MA 021110-1307, USA.
24 */ 24 */
25 25
26 #include <linux/capability.h> 26 #include <linux/capability.h>
27 #include <linux/fs.h> 27 #include <linux/fs.h>
28 #include <linux/types.h> 28 #include <linux/types.h>
29 #include <linux/slab.h> 29 #include <linux/slab.h>
30 #include <linux/highmem.h> 30 #include <linux/highmem.h>
31 #include <linux/pagemap.h> 31 #include <linux/pagemap.h>
32 #include <linux/uio.h> 32 #include <linux/uio.h>
33 #include <linux/sched.h> 33 #include <linux/sched.h>
34 #include <linux/pipe_fs_i.h> 34 #include <linux/pipe_fs_i.h>
35 #include <linux/mount.h> 35 #include <linux/mount.h>
36 36
37 #define MLOG_MASK_PREFIX ML_INODE 37 #define MLOG_MASK_PREFIX ML_INODE
38 #include <cluster/masklog.h> 38 #include <cluster/masklog.h>
39 39
40 #include "ocfs2.h" 40 #include "ocfs2.h"
41 41
42 #include "alloc.h" 42 #include "alloc.h"
43 #include "aops.h" 43 #include "aops.h"
44 #include "dir.h" 44 #include "dir.h"
45 #include "dlmglue.h" 45 #include "dlmglue.h"
46 #include "extent_map.h" 46 #include "extent_map.h"
47 #include "file.h" 47 #include "file.h"
48 #include "sysfile.h" 48 #include "sysfile.h"
49 #include "inode.h" 49 #include "inode.h"
50 #include "ioctl.h" 50 #include "ioctl.h"
51 #include "journal.h" 51 #include "journal.h"
52 #include "mmap.h" 52 #include "mmap.h"
53 #include "suballoc.h" 53 #include "suballoc.h"
54 #include "super.h" 54 #include "super.h"
55 55
56 #include "buffer_head_io.h" 56 #include "buffer_head_io.h"
57 57
58 static int ocfs2_sync_inode(struct inode *inode) 58 static int ocfs2_sync_inode(struct inode *inode)
59 { 59 {
60 filemap_fdatawrite(inode->i_mapping); 60 filemap_fdatawrite(inode->i_mapping);
61 return sync_mapping_buffers(inode->i_mapping); 61 return sync_mapping_buffers(inode->i_mapping);
62 } 62 }
63 63
64 static int ocfs2_file_open(struct inode *inode, struct file *file) 64 static int ocfs2_file_open(struct inode *inode, struct file *file)
65 { 65 {
66 int status; 66 int status;
67 int mode = file->f_flags; 67 int mode = file->f_flags;
68 struct ocfs2_inode_info *oi = OCFS2_I(inode); 68 struct ocfs2_inode_info *oi = OCFS2_I(inode);
69 69
70 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, 70 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
71 file->f_dentry->d_name.len, file->f_dentry->d_name.name); 71 file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name);
72 72
73 spin_lock(&oi->ip_lock); 73 spin_lock(&oi->ip_lock);
74 74
75 /* Check that the inode hasn't been wiped from disk by another 75 /* Check that the inode hasn't been wiped from disk by another
76 * node. If it hasn't then we're safe as long as we hold the 76 * node. If it hasn't then we're safe as long as we hold the
77 * spin lock until our increment of open count. */ 77 * spin lock until our increment of open count. */
78 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 78 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
79 spin_unlock(&oi->ip_lock); 79 spin_unlock(&oi->ip_lock);
80 80
81 status = -ENOENT; 81 status = -ENOENT;
82 goto leave; 82 goto leave;
83 } 83 }
84 84
85 if (mode & O_DIRECT) 85 if (mode & O_DIRECT)
86 oi->ip_flags |= OCFS2_INODE_OPEN_DIRECT; 86 oi->ip_flags |= OCFS2_INODE_OPEN_DIRECT;
87 87
88 oi->ip_open_count++; 88 oi->ip_open_count++;
89 spin_unlock(&oi->ip_lock); 89 spin_unlock(&oi->ip_lock);
90 status = 0; 90 status = 0;
91 leave: 91 leave:
92 mlog_exit(status); 92 mlog_exit(status);
93 return status; 93 return status;
94 } 94 }
95 95
96 static int ocfs2_file_release(struct inode *inode, struct file *file) 96 static int ocfs2_file_release(struct inode *inode, struct file *file)
97 { 97 {
98 struct ocfs2_inode_info *oi = OCFS2_I(inode); 98 struct ocfs2_inode_info *oi = OCFS2_I(inode);
99 99
100 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, 100 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
101 file->f_dentry->d_name.len, 101 file->f_path.dentry->d_name.len,
102 file->f_dentry->d_name.name); 102 file->f_path.dentry->d_name.name);
103 103
104 spin_lock(&oi->ip_lock); 104 spin_lock(&oi->ip_lock);
105 if (!--oi->ip_open_count) 105 if (!--oi->ip_open_count)
106 oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; 106 oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT;
107 spin_unlock(&oi->ip_lock); 107 spin_unlock(&oi->ip_lock);
108 108
109 mlog_exit(0); 109 mlog_exit(0);
110 110
111 return 0; 111 return 0;
112 } 112 }
113 113
114 static int ocfs2_sync_file(struct file *file, 114 static int ocfs2_sync_file(struct file *file,
115 struct dentry *dentry, 115 struct dentry *dentry,
116 int datasync) 116 int datasync)
117 { 117 {
118 int err = 0; 118 int err = 0;
119 journal_t *journal; 119 journal_t *journal;
120 struct inode *inode = dentry->d_inode; 120 struct inode *inode = dentry->d_inode;
121 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 121 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
122 122
123 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, 123 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync,
124 dentry->d_name.len, dentry->d_name.name); 124 dentry->d_name.len, dentry->d_name.name);
125 125
126 err = ocfs2_sync_inode(dentry->d_inode); 126 err = ocfs2_sync_inode(dentry->d_inode);
127 if (err) 127 if (err)
128 goto bail; 128 goto bail;
129 129
130 journal = osb->journal->j_journal; 130 journal = osb->journal->j_journal;
131 err = journal_force_commit(journal); 131 err = journal_force_commit(journal);
132 132
133 bail: 133 bail:
134 mlog_exit(err); 134 mlog_exit(err);
135 135
136 return (err < 0) ? -EIO : 0; 136 return (err < 0) ? -EIO : 0;
137 } 137 }
138 138
139 int ocfs2_should_update_atime(struct inode *inode, 139 int ocfs2_should_update_atime(struct inode *inode,
140 struct vfsmount *vfsmnt) 140 struct vfsmount *vfsmnt)
141 { 141 {
142 struct timespec now; 142 struct timespec now;
143 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 143 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
144 144
145 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) 145 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
146 return 0; 146 return 0;
147 147
148 if ((inode->i_flags & S_NOATIME) || 148 if ((inode->i_flags & S_NOATIME) ||
149 ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) 149 ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)))
150 return 0; 150 return 0;
151 151
152 if ((vfsmnt->mnt_flags & MNT_NOATIME) || 152 if ((vfsmnt->mnt_flags & MNT_NOATIME) ||
153 ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) 153 ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
154 return 0; 154 return 0;
155 155
156 now = CURRENT_TIME; 156 now = CURRENT_TIME;
157 if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum)) 157 if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum))
158 return 0; 158 return 0;
159 else 159 else
160 return 1; 160 return 1;
161 } 161 }
162 162
163 int ocfs2_update_inode_atime(struct inode *inode, 163 int ocfs2_update_inode_atime(struct inode *inode,
164 struct buffer_head *bh) 164 struct buffer_head *bh)
165 { 165 {
166 int ret; 166 int ret;
167 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 167 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
168 handle_t *handle; 168 handle_t *handle;
169 169
170 mlog_entry_void(); 170 mlog_entry_void();
171 171
172 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 172 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
173 if (handle == NULL) { 173 if (handle == NULL) {
174 ret = -ENOMEM; 174 ret = -ENOMEM;
175 mlog_errno(ret); 175 mlog_errno(ret);
176 goto out; 176 goto out;
177 } 177 }
178 178
179 inode->i_atime = CURRENT_TIME; 179 inode->i_atime = CURRENT_TIME;
180 ret = ocfs2_mark_inode_dirty(handle, inode, bh); 180 ret = ocfs2_mark_inode_dirty(handle, inode, bh);
181 if (ret < 0) 181 if (ret < 0)
182 mlog_errno(ret); 182 mlog_errno(ret);
183 183
184 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 184 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
185 out: 185 out:
186 mlog_exit(ret); 186 mlog_exit(ret);
187 return ret; 187 return ret;
188 } 188 }
189 189
190 int ocfs2_set_inode_size(handle_t *handle, 190 int ocfs2_set_inode_size(handle_t *handle,
191 struct inode *inode, 191 struct inode *inode,
192 struct buffer_head *fe_bh, 192 struct buffer_head *fe_bh,
193 u64 new_i_size) 193 u64 new_i_size)
194 { 194 {
195 int status; 195 int status;
196 196
197 mlog_entry_void(); 197 mlog_entry_void();
198 i_size_write(inode, new_i_size); 198 i_size_write(inode, new_i_size);
199 inode->i_blocks = ocfs2_align_bytes_to_sectors(new_i_size); 199 inode->i_blocks = ocfs2_align_bytes_to_sectors(new_i_size);
200 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 200 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
201 201
202 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); 202 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
203 if (status < 0) { 203 if (status < 0) {
204 mlog_errno(status); 204 mlog_errno(status);
205 goto bail; 205 goto bail;
206 } 206 }
207 207
208 bail: 208 bail:
209 mlog_exit(status); 209 mlog_exit(status);
210 return status; 210 return status;
211 } 211 }
212 212
213 static int ocfs2_simple_size_update(struct inode *inode, 213 static int ocfs2_simple_size_update(struct inode *inode,
214 struct buffer_head *di_bh, 214 struct buffer_head *di_bh,
215 u64 new_i_size) 215 u64 new_i_size)
216 { 216 {
217 int ret; 217 int ret;
218 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 218 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
219 handle_t *handle = NULL; 219 handle_t *handle = NULL;
220 220
221 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 221 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
222 if (handle == NULL) { 222 if (handle == NULL) {
223 ret = -ENOMEM; 223 ret = -ENOMEM;
224 mlog_errno(ret); 224 mlog_errno(ret);
225 goto out; 225 goto out;
226 } 226 }
227 227
228 ret = ocfs2_set_inode_size(handle, inode, di_bh, 228 ret = ocfs2_set_inode_size(handle, inode, di_bh,
229 new_i_size); 229 new_i_size);
230 if (ret < 0) 230 if (ret < 0)
231 mlog_errno(ret); 231 mlog_errno(ret);
232 232
233 ocfs2_commit_trans(osb, handle); 233 ocfs2_commit_trans(osb, handle);
234 out: 234 out:
235 return ret; 235 return ret;
236 } 236 }
237 237
238 static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, 238 static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
239 struct inode *inode, 239 struct inode *inode,
240 struct buffer_head *fe_bh, 240 struct buffer_head *fe_bh,
241 u64 new_i_size) 241 u64 new_i_size)
242 { 242 {
243 int status; 243 int status;
244 handle_t *handle; 244 handle_t *handle;
245 245
246 mlog_entry_void(); 246 mlog_entry_void();
247 247
248 /* TODO: This needs to actually orphan the inode in this 248 /* TODO: This needs to actually orphan the inode in this
249 * transaction. */ 249 * transaction. */
250 250
251 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 251 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
252 if (IS_ERR(handle)) { 252 if (IS_ERR(handle)) {
253 status = PTR_ERR(handle); 253 status = PTR_ERR(handle);
254 mlog_errno(status); 254 mlog_errno(status);
255 goto out; 255 goto out;
256 } 256 }
257 257
258 status = ocfs2_set_inode_size(handle, inode, fe_bh, new_i_size); 258 status = ocfs2_set_inode_size(handle, inode, fe_bh, new_i_size);
259 if (status < 0) 259 if (status < 0)
260 mlog_errno(status); 260 mlog_errno(status);
261 261
262 ocfs2_commit_trans(osb, handle); 262 ocfs2_commit_trans(osb, handle);
263 out: 263 out:
264 mlog_exit(status); 264 mlog_exit(status);
265 return status; 265 return status;
266 } 266 }
267 267
268 static int ocfs2_truncate_file(struct inode *inode, 268 static int ocfs2_truncate_file(struct inode *inode,
269 struct buffer_head *di_bh, 269 struct buffer_head *di_bh,
270 u64 new_i_size) 270 u64 new_i_size)
271 { 271 {
272 int status = 0; 272 int status = 0;
273 struct ocfs2_dinode *fe = NULL; 273 struct ocfs2_dinode *fe = NULL;
274 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 274 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
275 struct ocfs2_truncate_context *tc = NULL; 275 struct ocfs2_truncate_context *tc = NULL;
276 276
277 mlog_entry("(inode = %llu, new_i_size = %llu\n", 277 mlog_entry("(inode = %llu, new_i_size = %llu\n",
278 (unsigned long long)OCFS2_I(inode)->ip_blkno, 278 (unsigned long long)OCFS2_I(inode)->ip_blkno,
279 (unsigned long long)new_i_size); 279 (unsigned long long)new_i_size);
280 280
281 truncate_inode_pages(inode->i_mapping, new_i_size); 281 truncate_inode_pages(inode->i_mapping, new_i_size);
282 282
283 fe = (struct ocfs2_dinode *) di_bh->b_data; 283 fe = (struct ocfs2_dinode *) di_bh->b_data;
284 if (!OCFS2_IS_VALID_DINODE(fe)) { 284 if (!OCFS2_IS_VALID_DINODE(fe)) {
285 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 285 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
286 status = -EIO; 286 status = -EIO;
287 goto bail; 287 goto bail;
288 } 288 }
289 289
290 mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), 290 mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
291 "Inode %llu, inode i_size = %lld != di " 291 "Inode %llu, inode i_size = %lld != di "
292 "i_size = %llu, i_flags = 0x%x\n", 292 "i_size = %llu, i_flags = 0x%x\n",
293 (unsigned long long)OCFS2_I(inode)->ip_blkno, 293 (unsigned long long)OCFS2_I(inode)->ip_blkno,
294 i_size_read(inode), 294 i_size_read(inode),
295 (unsigned long long)le64_to_cpu(fe->i_size), 295 (unsigned long long)le64_to_cpu(fe->i_size),
296 le32_to_cpu(fe->i_flags)); 296 le32_to_cpu(fe->i_flags));
297 297
298 if (new_i_size > le64_to_cpu(fe->i_size)) { 298 if (new_i_size > le64_to_cpu(fe->i_size)) {
299 mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n", 299 mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n",
300 (unsigned long long)le64_to_cpu(fe->i_size), 300 (unsigned long long)le64_to_cpu(fe->i_size),
301 (unsigned long long)new_i_size); 301 (unsigned long long)new_i_size);
302 status = -EINVAL; 302 status = -EINVAL;
303 mlog_errno(status); 303 mlog_errno(status);
304 goto bail; 304 goto bail;
305 } 305 }
306 306
307 mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n", 307 mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n",
308 (unsigned long long)le64_to_cpu(fe->i_blkno), 308 (unsigned long long)le64_to_cpu(fe->i_blkno),
309 (unsigned long long)le64_to_cpu(fe->i_size), 309 (unsigned long long)le64_to_cpu(fe->i_size),
310 (unsigned long long)new_i_size); 310 (unsigned long long)new_i_size);
311 311
312 /* lets handle the simple truncate cases before doing any more 312 /* lets handle the simple truncate cases before doing any more
313 * cluster locking. */ 313 * cluster locking. */
314 if (new_i_size == le64_to_cpu(fe->i_size)) 314 if (new_i_size == le64_to_cpu(fe->i_size))
315 goto bail; 315 goto bail;
316 316
317 /* This forces other nodes to sync and drop their pages. Do 317 /* This forces other nodes to sync and drop their pages. Do
318 * this even if we have a truncate without allocation change - 318 * this even if we have a truncate without allocation change -
319 * ocfs2 cluster sizes can be much greater than page size, so 319 * ocfs2 cluster sizes can be much greater than page size, so
320 * we have to truncate them anyway. */ 320 * we have to truncate them anyway. */
321 status = ocfs2_data_lock(inode, 1); 321 status = ocfs2_data_lock(inode, 1);
322 if (status < 0) { 322 if (status < 0) {
323 mlog_errno(status); 323 mlog_errno(status);
324 goto bail; 324 goto bail;
325 } 325 }
326 ocfs2_data_unlock(inode, 1); 326 ocfs2_data_unlock(inode, 1);
327 327
328 if (le32_to_cpu(fe->i_clusters) == 328 if (le32_to_cpu(fe->i_clusters) ==
329 ocfs2_clusters_for_bytes(osb->sb, new_i_size)) { 329 ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {
330 mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n", 330 mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",
331 fe->i_clusters); 331 fe->i_clusters);
332 /* No allocation change is required, so lets fast path 332 /* No allocation change is required, so lets fast path
333 * this truncate. */ 333 * this truncate. */
334 status = ocfs2_simple_size_update(inode, di_bh, new_i_size); 334 status = ocfs2_simple_size_update(inode, di_bh, new_i_size);
335 if (status < 0) 335 if (status < 0)
336 mlog_errno(status); 336 mlog_errno(status);
337 goto bail; 337 goto bail;
338 } 338 }
339 339
340 /* alright, we're going to need to do a full blown alloc size 340 /* alright, we're going to need to do a full blown alloc size
341 * change. Orphan the inode so that recovery can complete the 341 * change. Orphan the inode so that recovery can complete the
342 * truncate if necessary. This does the task of marking 342 * truncate if necessary. This does the task of marking
343 * i_size. */ 343 * i_size. */
344 status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); 344 status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size);
345 if (status < 0) { 345 if (status < 0) {
346 mlog_errno(status); 346 mlog_errno(status);
347 goto bail; 347 goto bail;
348 } 348 }
349 349
350 status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); 350 status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc);
351 if (status < 0) { 351 if (status < 0) {
352 mlog_errno(status); 352 mlog_errno(status);
353 goto bail; 353 goto bail;
354 } 354 }
355 355
356 status = ocfs2_commit_truncate(osb, inode, di_bh, tc); 356 status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
357 if (status < 0) { 357 if (status < 0) {
358 mlog_errno(status); 358 mlog_errno(status);
359 goto bail; 359 goto bail;
360 } 360 }
361 361
362 /* TODO: orphan dir cleanup here. */ 362 /* TODO: orphan dir cleanup here. */
363 bail: 363 bail:
364 364
365 mlog_exit(status); 365 mlog_exit(status);
366 return status; 366 return status;
367 } 367 }
368 368
369 /* 369 /*
370 * extend allocation only here. 370 * extend allocation only here.
371 * we'll update all the disk stuff, and oip->alloc_size 371 * we'll update all the disk stuff, and oip->alloc_size
372 * 372 *
373 * expect stuff to be locked, a transaction started and enough data / 373 * expect stuff to be locked, a transaction started and enough data /
374 * metadata reservations in the contexts. 374 * metadata reservations in the contexts.
375 * 375 *
376 * Will return -EAGAIN, and a reason if a restart is needed. 376 * Will return -EAGAIN, and a reason if a restart is needed.
377 * If passed in, *reason will always be set, even in error. 377 * If passed in, *reason will always be set, even in error.
378 */ 378 */
379 int ocfs2_do_extend_allocation(struct ocfs2_super *osb, 379 int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
380 struct inode *inode, 380 struct inode *inode,
381 u32 clusters_to_add, 381 u32 clusters_to_add,
382 struct buffer_head *fe_bh, 382 struct buffer_head *fe_bh,
383 handle_t *handle, 383 handle_t *handle,
384 struct ocfs2_alloc_context *data_ac, 384 struct ocfs2_alloc_context *data_ac,
385 struct ocfs2_alloc_context *meta_ac, 385 struct ocfs2_alloc_context *meta_ac,
386 enum ocfs2_alloc_restarted *reason_ret) 386 enum ocfs2_alloc_restarted *reason_ret)
387 { 387 {
388 int status = 0; 388 int status = 0;
389 int free_extents; 389 int free_extents;
390 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; 390 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
391 enum ocfs2_alloc_restarted reason = RESTART_NONE; 391 enum ocfs2_alloc_restarted reason = RESTART_NONE;
392 u32 bit_off, num_bits; 392 u32 bit_off, num_bits;
393 u64 block; 393 u64 block;
394 394
395 BUG_ON(!clusters_to_add); 395 BUG_ON(!clusters_to_add);
396 396
397 free_extents = ocfs2_num_free_extents(osb, inode, fe); 397 free_extents = ocfs2_num_free_extents(osb, inode, fe);
398 if (free_extents < 0) { 398 if (free_extents < 0) {
399 status = free_extents; 399 status = free_extents;
400 mlog_errno(status); 400 mlog_errno(status);
401 goto leave; 401 goto leave;
402 } 402 }
403 403
404 /* there are two cases which could cause us to EAGAIN in the 404 /* there are two cases which could cause us to EAGAIN in the
405 * we-need-more-metadata case: 405 * we-need-more-metadata case:
406 * 1) we haven't reserved *any* 406 * 1) we haven't reserved *any*
407 * 2) we are so fragmented, we've needed to add metadata too 407 * 2) we are so fragmented, we've needed to add metadata too
408 * many times. */ 408 * many times. */
409 if (!free_extents && !meta_ac) { 409 if (!free_extents && !meta_ac) {
410 mlog(0, "we haven't reserved any metadata!\n"); 410 mlog(0, "we haven't reserved any metadata!\n");
411 status = -EAGAIN; 411 status = -EAGAIN;
412 reason = RESTART_META; 412 reason = RESTART_META;
413 goto leave; 413 goto leave;
414 } else if ((!free_extents) 414 } else if ((!free_extents)
415 && (ocfs2_alloc_context_bits_left(meta_ac) 415 && (ocfs2_alloc_context_bits_left(meta_ac)
416 < ocfs2_extend_meta_needed(fe))) { 416 < ocfs2_extend_meta_needed(fe))) {
417 mlog(0, "filesystem is really fragmented...\n"); 417 mlog(0, "filesystem is really fragmented...\n");
418 status = -EAGAIN; 418 status = -EAGAIN;
419 reason = RESTART_META; 419 reason = RESTART_META;
420 goto leave; 420 goto leave;
421 } 421 }
422 422
423 status = ocfs2_claim_clusters(osb, handle, data_ac, 1, 423 status = ocfs2_claim_clusters(osb, handle, data_ac, 1,
424 &bit_off, &num_bits); 424 &bit_off, &num_bits);
425 if (status < 0) { 425 if (status < 0) {
426 if (status != -ENOSPC) 426 if (status != -ENOSPC)
427 mlog_errno(status); 427 mlog_errno(status);
428 goto leave; 428 goto leave;
429 } 429 }
430 430
431 BUG_ON(num_bits > clusters_to_add); 431 BUG_ON(num_bits > clusters_to_add);
432 432
433 /* reserve our write early -- insert_extent may update the inode */ 433 /* reserve our write early -- insert_extent may update the inode */
434 status = ocfs2_journal_access(handle, inode, fe_bh, 434 status = ocfs2_journal_access(handle, inode, fe_bh,
435 OCFS2_JOURNAL_ACCESS_WRITE); 435 OCFS2_JOURNAL_ACCESS_WRITE);
436 if (status < 0) { 436 if (status < 0) {
437 mlog_errno(status); 437 mlog_errno(status);
438 goto leave; 438 goto leave;
439 } 439 }
440 440
441 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 441 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
442 mlog(0, "Allocating %u clusters at block %u for inode %llu\n", 442 mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
443 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); 443 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
444 status = ocfs2_insert_extent(osb, handle, inode, fe_bh, block, 444 status = ocfs2_insert_extent(osb, handle, inode, fe_bh, block,
445 num_bits, meta_ac); 445 num_bits, meta_ac);
446 if (status < 0) { 446 if (status < 0) {
447 mlog_errno(status); 447 mlog_errno(status);
448 goto leave; 448 goto leave;
449 } 449 }
450 450
451 le32_add_cpu(&fe->i_clusters, num_bits); 451 le32_add_cpu(&fe->i_clusters, num_bits);
452 spin_lock(&OCFS2_I(inode)->ip_lock); 452 spin_lock(&OCFS2_I(inode)->ip_lock);
453 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 453 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
454 spin_unlock(&OCFS2_I(inode)->ip_lock); 454 spin_unlock(&OCFS2_I(inode)->ip_lock);
455 455
456 status = ocfs2_journal_dirty(handle, fe_bh); 456 status = ocfs2_journal_dirty(handle, fe_bh);
457 if (status < 0) { 457 if (status < 0) {
458 mlog_errno(status); 458 mlog_errno(status);
459 goto leave; 459 goto leave;
460 } 460 }
461 461
462 clusters_to_add -= num_bits; 462 clusters_to_add -= num_bits;
463 463
464 if (clusters_to_add) { 464 if (clusters_to_add) {
465 mlog(0, "need to alloc once more, clusters = %u, wanted = " 465 mlog(0, "need to alloc once more, clusters = %u, wanted = "
466 "%u\n", fe->i_clusters, clusters_to_add); 466 "%u\n", fe->i_clusters, clusters_to_add);
467 status = -EAGAIN; 467 status = -EAGAIN;
468 reason = RESTART_TRANS; 468 reason = RESTART_TRANS;
469 } 469 }
470 470
471 leave: 471 leave:
472 mlog_exit(status); 472 mlog_exit(status);
473 if (reason_ret) 473 if (reason_ret)
474 *reason_ret = reason; 474 *reason_ret = reason;
475 return status; 475 return status;
476 } 476 }
477 477
478 static int ocfs2_extend_allocation(struct inode *inode, 478 static int ocfs2_extend_allocation(struct inode *inode,
479 u32 clusters_to_add) 479 u32 clusters_to_add)
480 { 480 {
481 int status = 0; 481 int status = 0;
482 int restart_func = 0; 482 int restart_func = 0;
483 int drop_alloc_sem = 0; 483 int drop_alloc_sem = 0;
484 int credits, num_free_extents; 484 int credits, num_free_extents;
485 u32 prev_clusters; 485 u32 prev_clusters;
486 struct buffer_head *bh = NULL; 486 struct buffer_head *bh = NULL;
487 struct ocfs2_dinode *fe = NULL; 487 struct ocfs2_dinode *fe = NULL;
488 handle_t *handle = NULL; 488 handle_t *handle = NULL;
489 struct ocfs2_alloc_context *data_ac = NULL; 489 struct ocfs2_alloc_context *data_ac = NULL;
490 struct ocfs2_alloc_context *meta_ac = NULL; 490 struct ocfs2_alloc_context *meta_ac = NULL;
491 enum ocfs2_alloc_restarted why; 491 enum ocfs2_alloc_restarted why;
492 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 492 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
493 493
494 mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); 494 mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
495 495
496 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, 496 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
497 OCFS2_BH_CACHED, inode); 497 OCFS2_BH_CACHED, inode);
498 if (status < 0) { 498 if (status < 0) {
499 mlog_errno(status); 499 mlog_errno(status);
500 goto leave; 500 goto leave;
501 } 501 }
502 502
503 fe = (struct ocfs2_dinode *) bh->b_data; 503 fe = (struct ocfs2_dinode *) bh->b_data;
504 if (!OCFS2_IS_VALID_DINODE(fe)) { 504 if (!OCFS2_IS_VALID_DINODE(fe)) {
505 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 505 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
506 status = -EIO; 506 status = -EIO;
507 goto leave; 507 goto leave;
508 } 508 }
509 509
510 restart_all: 510 restart_all:
511 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); 511 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
512 512
513 mlog(0, "extend inode %llu, i_size = %lld, fe->i_clusters = %u, " 513 mlog(0, "extend inode %llu, i_size = %lld, fe->i_clusters = %u, "
514 "clusters_to_add = %u\n", 514 "clusters_to_add = %u\n",
515 (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), 515 (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
516 fe->i_clusters, clusters_to_add); 516 fe->i_clusters, clusters_to_add);
517 517
518 num_free_extents = ocfs2_num_free_extents(osb, 518 num_free_extents = ocfs2_num_free_extents(osb,
519 inode, 519 inode,
520 fe); 520 fe);
521 if (num_free_extents < 0) { 521 if (num_free_extents < 0) {
522 status = num_free_extents; 522 status = num_free_extents;
523 mlog_errno(status); 523 mlog_errno(status);
524 goto leave; 524 goto leave;
525 } 525 }
526 526
527 if (!num_free_extents) { 527 if (!num_free_extents) {
528 status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac); 528 status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac);
529 if (status < 0) { 529 if (status < 0) {
530 if (status != -ENOSPC) 530 if (status != -ENOSPC)
531 mlog_errno(status); 531 mlog_errno(status);
532 goto leave; 532 goto leave;
533 } 533 }
534 } 534 }
535 535
536 status = ocfs2_reserve_clusters(osb, clusters_to_add, &data_ac); 536 status = ocfs2_reserve_clusters(osb, clusters_to_add, &data_ac);
537 if (status < 0) { 537 if (status < 0) {
538 if (status != -ENOSPC) 538 if (status != -ENOSPC)
539 mlog_errno(status); 539 mlog_errno(status);
540 goto leave; 540 goto leave;
541 } 541 }
542 542
543 /* blocks peope in read/write from reading our allocation 543 /* blocks peope in read/write from reading our allocation
544 * until we're done changing it. We depend on i_mutex to block 544 * until we're done changing it. We depend on i_mutex to block
545 * other extend/truncate calls while we're here. Ordering wrt 545 * other extend/truncate calls while we're here. Ordering wrt
546 * start_trans is important here -- always do it before! */ 546 * start_trans is important here -- always do it before! */
547 down_write(&OCFS2_I(inode)->ip_alloc_sem); 547 down_write(&OCFS2_I(inode)->ip_alloc_sem);
548 drop_alloc_sem = 1; 548 drop_alloc_sem = 1;
549 549
550 credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add); 550 credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add);
551 handle = ocfs2_start_trans(osb, credits); 551 handle = ocfs2_start_trans(osb, credits);
552 if (IS_ERR(handle)) { 552 if (IS_ERR(handle)) {
553 status = PTR_ERR(handle); 553 status = PTR_ERR(handle);
554 handle = NULL; 554 handle = NULL;
555 mlog_errno(status); 555 mlog_errno(status);
556 goto leave; 556 goto leave;
557 } 557 }
558 558
559 restarted_transaction: 559 restarted_transaction:
560 /* reserve a write to the file entry early on - that we if we 560 /* reserve a write to the file entry early on - that we if we
561 * run out of credits in the allocation path, we can still 561 * run out of credits in the allocation path, we can still
562 * update i_size. */ 562 * update i_size. */
563 status = ocfs2_journal_access(handle, inode, bh, 563 status = ocfs2_journal_access(handle, inode, bh,
564 OCFS2_JOURNAL_ACCESS_WRITE); 564 OCFS2_JOURNAL_ACCESS_WRITE);
565 if (status < 0) { 565 if (status < 0) {
566 mlog_errno(status); 566 mlog_errno(status);
567 goto leave; 567 goto leave;
568 } 568 }
569 569
570 prev_clusters = OCFS2_I(inode)->ip_clusters; 570 prev_clusters = OCFS2_I(inode)->ip_clusters;
571 571
572 status = ocfs2_do_extend_allocation(osb, 572 status = ocfs2_do_extend_allocation(osb,
573 inode, 573 inode,
574 clusters_to_add, 574 clusters_to_add,
575 bh, 575 bh,
576 handle, 576 handle,
577 data_ac, 577 data_ac,
578 meta_ac, 578 meta_ac,
579 &why); 579 &why);
580 if ((status < 0) && (status != -EAGAIN)) { 580 if ((status < 0) && (status != -EAGAIN)) {
581 if (status != -ENOSPC) 581 if (status != -ENOSPC)
582 mlog_errno(status); 582 mlog_errno(status);
583 goto leave; 583 goto leave;
584 } 584 }
585 585
586 status = ocfs2_journal_dirty(handle, bh); 586 status = ocfs2_journal_dirty(handle, bh);
587 if (status < 0) { 587 if (status < 0) {
588 mlog_errno(status); 588 mlog_errno(status);
589 goto leave; 589 goto leave;
590 } 590 }
591 591
592 spin_lock(&OCFS2_I(inode)->ip_lock); 592 spin_lock(&OCFS2_I(inode)->ip_lock);
593 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); 593 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
594 spin_unlock(&OCFS2_I(inode)->ip_lock); 594 spin_unlock(&OCFS2_I(inode)->ip_lock);
595 595
596 if (why != RESTART_NONE && clusters_to_add) { 596 if (why != RESTART_NONE && clusters_to_add) {
597 if (why == RESTART_META) { 597 if (why == RESTART_META) {
598 mlog(0, "restarting function.\n"); 598 mlog(0, "restarting function.\n");
599 restart_func = 1; 599 restart_func = 1;
600 } else { 600 } else {
601 BUG_ON(why != RESTART_TRANS); 601 BUG_ON(why != RESTART_TRANS);
602 602
603 mlog(0, "restarting transaction.\n"); 603 mlog(0, "restarting transaction.\n");
604 /* TODO: This can be more intelligent. */ 604 /* TODO: This can be more intelligent. */
605 credits = ocfs2_calc_extend_credits(osb->sb, 605 credits = ocfs2_calc_extend_credits(osb->sb,
606 fe, 606 fe,
607 clusters_to_add); 607 clusters_to_add);
608 status = ocfs2_extend_trans(handle, credits); 608 status = ocfs2_extend_trans(handle, credits);
609 if (status < 0) { 609 if (status < 0) {
610 /* handle still has to be committed at 610 /* handle still has to be committed at
611 * this point. */ 611 * this point. */
612 status = -ENOMEM; 612 status = -ENOMEM;
613 mlog_errno(status); 613 mlog_errno(status);
614 goto leave; 614 goto leave;
615 } 615 }
616 goto restarted_transaction; 616 goto restarted_transaction;
617 } 617 }
618 } 618 }
619 619
620 mlog(0, "fe: i_clusters = %u, i_size=%llu\n", 620 mlog(0, "fe: i_clusters = %u, i_size=%llu\n",
621 fe->i_clusters, (unsigned long long)fe->i_size); 621 fe->i_clusters, (unsigned long long)fe->i_size);
622 mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", 622 mlog(0, "inode: ip_clusters=%u, i_size=%lld\n",
623 OCFS2_I(inode)->ip_clusters, i_size_read(inode)); 623 OCFS2_I(inode)->ip_clusters, i_size_read(inode));
624 624
625 leave: 625 leave:
626 if (drop_alloc_sem) { 626 if (drop_alloc_sem) {
627 up_write(&OCFS2_I(inode)->ip_alloc_sem); 627 up_write(&OCFS2_I(inode)->ip_alloc_sem);
628 drop_alloc_sem = 0; 628 drop_alloc_sem = 0;
629 } 629 }
630 if (handle) { 630 if (handle) {
631 ocfs2_commit_trans(osb, handle); 631 ocfs2_commit_trans(osb, handle);
632 handle = NULL; 632 handle = NULL;
633 } 633 }
634 if (data_ac) { 634 if (data_ac) {
635 ocfs2_free_alloc_context(data_ac); 635 ocfs2_free_alloc_context(data_ac);
636 data_ac = NULL; 636 data_ac = NULL;
637 } 637 }
638 if (meta_ac) { 638 if (meta_ac) {
639 ocfs2_free_alloc_context(meta_ac); 639 ocfs2_free_alloc_context(meta_ac);
640 meta_ac = NULL; 640 meta_ac = NULL;
641 } 641 }
642 if ((!status) && restart_func) { 642 if ((!status) && restart_func) {
643 restart_func = 0; 643 restart_func = 0;
644 goto restart_all; 644 goto restart_all;
645 } 645 }
646 if (bh) { 646 if (bh) {
647 brelse(bh); 647 brelse(bh);
648 bh = NULL; 648 bh = NULL;
649 } 649 }
650 650
651 mlog_exit(status); 651 mlog_exit(status);
652 return status; 652 return status;
653 } 653 }
654 654
655 /* Some parts of this taken from generic_cont_expand, which turned out 655 /* Some parts of this taken from generic_cont_expand, which turned out
656 * to be too fragile to do exactly what we need without us having to 656 * to be too fragile to do exactly what we need without us having to
657 * worry about recursive locking in ->prepare_write() and 657 * worry about recursive locking in ->prepare_write() and
658 * ->commit_write(). */ 658 * ->commit_write(). */
659 static int ocfs2_write_zero_page(struct inode *inode, 659 static int ocfs2_write_zero_page(struct inode *inode,
660 u64 size) 660 u64 size)
661 { 661 {
662 struct address_space *mapping = inode->i_mapping; 662 struct address_space *mapping = inode->i_mapping;
663 struct page *page; 663 struct page *page;
664 unsigned long index; 664 unsigned long index;
665 unsigned int offset; 665 unsigned int offset;
666 handle_t *handle = NULL; 666 handle_t *handle = NULL;
667 int ret; 667 int ret;
668 668
669 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ 669 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
670 /* ugh. in prepare/commit_write, if from==to==start of block, we 670 /* ugh. in prepare/commit_write, if from==to==start of block, we
671 ** skip the prepare. make sure we never send an offset for the start 671 ** skip the prepare. make sure we never send an offset for the start
672 ** of a block 672 ** of a block
673 */ 673 */
674 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { 674 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
675 offset++; 675 offset++;
676 } 676 }
677 index = size >> PAGE_CACHE_SHIFT; 677 index = size >> PAGE_CACHE_SHIFT;
678 678
679 page = grab_cache_page(mapping, index); 679 page = grab_cache_page(mapping, index);
680 if (!page) { 680 if (!page) {
681 ret = -ENOMEM; 681 ret = -ENOMEM;
682 mlog_errno(ret); 682 mlog_errno(ret);
683 goto out; 683 goto out;
684 } 684 }
685 685
686 ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); 686 ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
687 if (ret < 0) { 687 if (ret < 0) {
688 mlog_errno(ret); 688 mlog_errno(ret);
689 goto out_unlock; 689 goto out_unlock;
690 } 690 }
691 691
692 if (ocfs2_should_order_data(inode)) { 692 if (ocfs2_should_order_data(inode)) {
693 handle = ocfs2_start_walk_page_trans(inode, page, offset, 693 handle = ocfs2_start_walk_page_trans(inode, page, offset,
694 offset); 694 offset);
695 if (IS_ERR(handle)) { 695 if (IS_ERR(handle)) {
696 ret = PTR_ERR(handle); 696 ret = PTR_ERR(handle);
697 handle = NULL; 697 handle = NULL;
698 goto out_unlock; 698 goto out_unlock;
699 } 699 }
700 } 700 }
701 701
702 /* must not update i_size! */ 702 /* must not update i_size! */
703 ret = block_commit_write(page, offset, offset); 703 ret = block_commit_write(page, offset, offset);
704 if (ret < 0) 704 if (ret < 0)
705 mlog_errno(ret); 705 mlog_errno(ret);
706 else 706 else
707 ret = 0; 707 ret = 0;
708 708
709 if (handle) 709 if (handle)
710 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 710 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
711 out_unlock: 711 out_unlock:
712 unlock_page(page); 712 unlock_page(page);
713 page_cache_release(page); 713 page_cache_release(page);
714 out: 714 out:
715 return ret; 715 return ret;
716 } 716 }
717 717
718 static int ocfs2_zero_extend(struct inode *inode, 718 static int ocfs2_zero_extend(struct inode *inode,
719 u64 zero_to_size) 719 u64 zero_to_size)
720 { 720 {
721 int ret = 0; 721 int ret = 0;
722 u64 start_off; 722 u64 start_off;
723 struct super_block *sb = inode->i_sb; 723 struct super_block *sb = inode->i_sb;
724 724
725 start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); 725 start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
726 while (start_off < zero_to_size) { 726 while (start_off < zero_to_size) {
727 ret = ocfs2_write_zero_page(inode, start_off); 727 ret = ocfs2_write_zero_page(inode, start_off);
728 if (ret < 0) { 728 if (ret < 0) {
729 mlog_errno(ret); 729 mlog_errno(ret);
730 goto out; 730 goto out;
731 } 731 }
732 732
733 start_off += sb->s_blocksize; 733 start_off += sb->s_blocksize;
734 734
735 /* 735 /*
736 * Very large extends have the potential to lock up 736 * Very large extends have the potential to lock up
737 * the cpu for extended periods of time. 737 * the cpu for extended periods of time.
738 */ 738 */
739 cond_resched(); 739 cond_resched();
740 } 740 }
741 741
742 out: 742 out:
743 return ret; 743 return ret;
744 } 744 }
745 745
746 /* 746 /*
747 * A tail_to_skip value > 0 indicates that we're being called from 747 * A tail_to_skip value > 0 indicates that we're being called from
748 * ocfs2_file_aio_write(). This has the following implications: 748 * ocfs2_file_aio_write(). This has the following implications:
749 * 749 *
750 * - we don't want to update i_size 750 * - we don't want to update i_size
751 * - di_bh will be NULL, which is fine because it's only used in the 751 * - di_bh will be NULL, which is fine because it's only used in the
752 * case where we want to update i_size. 752 * case where we want to update i_size.
753 * - ocfs2_zero_extend() will then only be filling the hole created 753 * - ocfs2_zero_extend() will then only be filling the hole created
754 * between i_size and the start of the write. 754 * between i_size and the start of the write.
755 */ 755 */
756 static int ocfs2_extend_file(struct inode *inode, 756 static int ocfs2_extend_file(struct inode *inode,
757 struct buffer_head *di_bh, 757 struct buffer_head *di_bh,
758 u64 new_i_size, 758 u64 new_i_size,
759 size_t tail_to_skip) 759 size_t tail_to_skip)
760 { 760 {
761 int ret = 0; 761 int ret = 0;
762 u32 clusters_to_add; 762 u32 clusters_to_add;
763 763
764 BUG_ON(!tail_to_skip && !di_bh); 764 BUG_ON(!tail_to_skip && !di_bh);
765 765
766 /* setattr sometimes calls us like this. */ 766 /* setattr sometimes calls us like this. */
767 if (new_i_size == 0) 767 if (new_i_size == 0)
768 goto out; 768 goto out;
769 769
770 if (i_size_read(inode) == new_i_size) 770 if (i_size_read(inode) == new_i_size)
771 goto out; 771 goto out;
772 BUG_ON(new_i_size < i_size_read(inode)); 772 BUG_ON(new_i_size < i_size_read(inode));
773 773
774 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) - 774 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) -
775 OCFS2_I(inode)->ip_clusters; 775 OCFS2_I(inode)->ip_clusters;
776 776
777 /* 777 /*
778 * protect the pages that ocfs2_zero_extend is going to be 778 * protect the pages that ocfs2_zero_extend is going to be
779 * pulling into the page cache.. we do this before the 779 * pulling into the page cache.. we do this before the
780 * metadata extend so that we don't get into the situation 780 * metadata extend so that we don't get into the situation
781 * where we've extended the metadata but can't get the data 781 * where we've extended the metadata but can't get the data
782 * lock to zero. 782 * lock to zero.
783 */ 783 */
784 ret = ocfs2_data_lock(inode, 1); 784 ret = ocfs2_data_lock(inode, 1);
785 if (ret < 0) { 785 if (ret < 0) {
786 mlog_errno(ret); 786 mlog_errno(ret);
787 goto out; 787 goto out;
788 } 788 }
789 789
790 if (clusters_to_add) { 790 if (clusters_to_add) {
791 ret = ocfs2_extend_allocation(inode, clusters_to_add); 791 ret = ocfs2_extend_allocation(inode, clusters_to_add);
792 if (ret < 0) { 792 if (ret < 0) {
793 mlog_errno(ret); 793 mlog_errno(ret);
794 goto out_unlock; 794 goto out_unlock;
795 } 795 }
796 } 796 }
797 797
798 /* 798 /*
799 * Call this even if we don't add any clusters to the tree. We 799 * Call this even if we don't add any clusters to the tree. We
800 * still need to zero the area between the old i_size and the 800 * still need to zero the area between the old i_size and the
801 * new i_size. 801 * new i_size.
802 */ 802 */
803 ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip); 803 ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip);
804 if (ret < 0) { 804 if (ret < 0) {
805 mlog_errno(ret); 805 mlog_errno(ret);
806 goto out_unlock; 806 goto out_unlock;
807 } 807 }
808 808
809 if (!tail_to_skip) { 809 if (!tail_to_skip) {
810 /* We're being called from ocfs2_setattr() which wants 810 /* We're being called from ocfs2_setattr() which wants
811 * us to update i_size */ 811 * us to update i_size */
812 ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); 812 ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
813 if (ret < 0) 813 if (ret < 0)
814 mlog_errno(ret); 814 mlog_errno(ret);
815 } 815 }
816 816
817 out_unlock: 817 out_unlock:
818 ocfs2_data_unlock(inode, 1); 818 ocfs2_data_unlock(inode, 1);
819 819
820 out: 820 out:
821 return ret; 821 return ret;
822 } 822 }
823 823
824 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) 824 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
825 { 825 {
826 int status = 0, size_change; 826 int status = 0, size_change;
827 struct inode *inode = dentry->d_inode; 827 struct inode *inode = dentry->d_inode;
828 struct super_block *sb = inode->i_sb; 828 struct super_block *sb = inode->i_sb;
829 struct ocfs2_super *osb = OCFS2_SB(sb); 829 struct ocfs2_super *osb = OCFS2_SB(sb);
830 struct buffer_head *bh = NULL; 830 struct buffer_head *bh = NULL;
831 handle_t *handle = NULL; 831 handle_t *handle = NULL;
832 832
833 mlog_entry("(0x%p, '%.*s')\n", dentry, 833 mlog_entry("(0x%p, '%.*s')\n", dentry,
834 dentry->d_name.len, dentry->d_name.name); 834 dentry->d_name.len, dentry->d_name.name);
835 835
836 if (attr->ia_valid & ATTR_MODE) 836 if (attr->ia_valid & ATTR_MODE)
837 mlog(0, "mode change: %d\n", attr->ia_mode); 837 mlog(0, "mode change: %d\n", attr->ia_mode);
838 if (attr->ia_valid & ATTR_UID) 838 if (attr->ia_valid & ATTR_UID)
839 mlog(0, "uid change: %d\n", attr->ia_uid); 839 mlog(0, "uid change: %d\n", attr->ia_uid);
840 if (attr->ia_valid & ATTR_GID) 840 if (attr->ia_valid & ATTR_GID)
841 mlog(0, "gid change: %d\n", attr->ia_gid); 841 mlog(0, "gid change: %d\n", attr->ia_gid);
842 if (attr->ia_valid & ATTR_SIZE) 842 if (attr->ia_valid & ATTR_SIZE)
843 mlog(0, "size change...\n"); 843 mlog(0, "size change...\n");
844 if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME)) 844 if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME))
845 mlog(0, "time change...\n"); 845 mlog(0, "time change...\n");
846 846
847 #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \ 847 #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
848 | ATTR_GID | ATTR_UID | ATTR_MODE) 848 | ATTR_GID | ATTR_UID | ATTR_MODE)
849 if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) { 849 if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) {
850 mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid); 850 mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid);
851 return 0; 851 return 0;
852 } 852 }
853 853
854 status = inode_change_ok(inode, attr); 854 status = inode_change_ok(inode, attr);
855 if (status) 855 if (status)
856 return status; 856 return status;
857 857
858 size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; 858 size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
859 if (size_change) { 859 if (size_change) {
860 status = ocfs2_rw_lock(inode, 1); 860 status = ocfs2_rw_lock(inode, 1);
861 if (status < 0) { 861 if (status < 0) {
862 mlog_errno(status); 862 mlog_errno(status);
863 goto bail; 863 goto bail;
864 } 864 }
865 } 865 }
866 866
867 status = ocfs2_meta_lock(inode, &bh, 1); 867 status = ocfs2_meta_lock(inode, &bh, 1);
868 if (status < 0) { 868 if (status < 0) {
869 if (status != -ENOENT) 869 if (status != -ENOENT)
870 mlog_errno(status); 870 mlog_errno(status);
871 goto bail_unlock_rw; 871 goto bail_unlock_rw;
872 } 872 }
873 873
874 if (size_change && attr->ia_size != i_size_read(inode)) { 874 if (size_change && attr->ia_size != i_size_read(inode)) {
875 if (i_size_read(inode) > attr->ia_size) 875 if (i_size_read(inode) > attr->ia_size)
876 status = ocfs2_truncate_file(inode, bh, attr->ia_size); 876 status = ocfs2_truncate_file(inode, bh, attr->ia_size);
877 else 877 else
878 status = ocfs2_extend_file(inode, bh, attr->ia_size, 0); 878 status = ocfs2_extend_file(inode, bh, attr->ia_size, 0);
879 if (status < 0) { 879 if (status < 0) {
880 if (status != -ENOSPC) 880 if (status != -ENOSPC)
881 mlog_errno(status); 881 mlog_errno(status);
882 status = -ENOSPC; 882 status = -ENOSPC;
883 goto bail_unlock; 883 goto bail_unlock;
884 } 884 }
885 } 885 }
886 886
887 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 887 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
888 if (IS_ERR(handle)) { 888 if (IS_ERR(handle)) {
889 status = PTR_ERR(handle); 889 status = PTR_ERR(handle);
890 mlog_errno(status); 890 mlog_errno(status);
891 goto bail_unlock; 891 goto bail_unlock;
892 } 892 }
893 893
894 status = inode_setattr(inode, attr); 894 status = inode_setattr(inode, attr);
895 if (status < 0) { 895 if (status < 0) {
896 mlog_errno(status); 896 mlog_errno(status);
897 goto bail_commit; 897 goto bail_commit;
898 } 898 }
899 899
900 status = ocfs2_mark_inode_dirty(handle, inode, bh); 900 status = ocfs2_mark_inode_dirty(handle, inode, bh);
901 if (status < 0) 901 if (status < 0)
902 mlog_errno(status); 902 mlog_errno(status);
903 903
904 bail_commit: 904 bail_commit:
905 ocfs2_commit_trans(osb, handle); 905 ocfs2_commit_trans(osb, handle);
906 bail_unlock: 906 bail_unlock:
907 ocfs2_meta_unlock(inode, 1); 907 ocfs2_meta_unlock(inode, 1);
908 bail_unlock_rw: 908 bail_unlock_rw:
909 if (size_change) 909 if (size_change)
910 ocfs2_rw_unlock(inode, 1); 910 ocfs2_rw_unlock(inode, 1);
911 bail: 911 bail:
912 if (bh) 912 if (bh)
913 brelse(bh); 913 brelse(bh);
914 914
915 mlog_exit(status); 915 mlog_exit(status);
916 return status; 916 return status;
917 } 917 }
918 918
919 int ocfs2_getattr(struct vfsmount *mnt, 919 int ocfs2_getattr(struct vfsmount *mnt,
920 struct dentry *dentry, 920 struct dentry *dentry,
921 struct kstat *stat) 921 struct kstat *stat)
922 { 922 {
923 struct inode *inode = dentry->d_inode; 923 struct inode *inode = dentry->d_inode;
924 struct super_block *sb = dentry->d_inode->i_sb; 924 struct super_block *sb = dentry->d_inode->i_sb;
925 struct ocfs2_super *osb = sb->s_fs_info; 925 struct ocfs2_super *osb = sb->s_fs_info;
926 int err; 926 int err;
927 927
928 mlog_entry_void(); 928 mlog_entry_void();
929 929
930 err = ocfs2_inode_revalidate(dentry); 930 err = ocfs2_inode_revalidate(dentry);
931 if (err) { 931 if (err) {
932 if (err != -ENOENT) 932 if (err != -ENOENT)
933 mlog_errno(err); 933 mlog_errno(err);
934 goto bail; 934 goto bail;
935 } 935 }
936 936
937 generic_fillattr(inode, stat); 937 generic_fillattr(inode, stat);
938 938
939 /* We set the blksize from the cluster size for performance */ 939 /* We set the blksize from the cluster size for performance */
940 stat->blksize = osb->s_clustersize; 940 stat->blksize = osb->s_clustersize;
941 941
942 bail: 942 bail:
943 mlog_exit(err); 943 mlog_exit(err);
944 944
945 return err; 945 return err;
946 } 946 }
947 947
948 int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) 948 int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
949 { 949 {
950 int ret; 950 int ret;
951 951
952 mlog_entry_void(); 952 mlog_entry_void();
953 953
954 ret = ocfs2_meta_lock(inode, NULL, 0); 954 ret = ocfs2_meta_lock(inode, NULL, 0);
955 if (ret) { 955 if (ret) {
956 mlog_errno(ret); 956 mlog_errno(ret);
957 goto out; 957 goto out;
958 } 958 }
959 959
960 ret = generic_permission(inode, mask, NULL); 960 ret = generic_permission(inode, mask, NULL);
961 if (ret) 961 if (ret)
962 mlog_errno(ret); 962 mlog_errno(ret);
963 963
964 ocfs2_meta_unlock(inode, 0); 964 ocfs2_meta_unlock(inode, 0);
965 out: 965 out:
966 mlog_exit(ret); 966 mlog_exit(ret);
967 return ret; 967 return ret;
968 } 968 }
969 969
970 static int ocfs2_write_remove_suid(struct inode *inode) 970 static int ocfs2_write_remove_suid(struct inode *inode)
971 { 971 {
972 int ret; 972 int ret;
973 struct buffer_head *bh = NULL; 973 struct buffer_head *bh = NULL;
974 struct ocfs2_inode_info *oi = OCFS2_I(inode); 974 struct ocfs2_inode_info *oi = OCFS2_I(inode);
975 handle_t *handle; 975 handle_t *handle;
976 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 976 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
977 struct ocfs2_dinode *di; 977 struct ocfs2_dinode *di;
978 978
979 mlog_entry("(Inode %llu, mode 0%o)\n", 979 mlog_entry("(Inode %llu, mode 0%o)\n",
980 (unsigned long long)oi->ip_blkno, inode->i_mode); 980 (unsigned long long)oi->ip_blkno, inode->i_mode);
981 981
982 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 982 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
983 if (handle == NULL) { 983 if (handle == NULL) {
984 ret = -ENOMEM; 984 ret = -ENOMEM;
985 mlog_errno(ret); 985 mlog_errno(ret);
986 goto out; 986 goto out;
987 } 987 }
988 988
989 ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); 989 ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
990 if (ret < 0) { 990 if (ret < 0) {
991 mlog_errno(ret); 991 mlog_errno(ret);
992 goto out_trans; 992 goto out_trans;
993 } 993 }
994 994
995 ret = ocfs2_journal_access(handle, inode, bh, 995 ret = ocfs2_journal_access(handle, inode, bh,
996 OCFS2_JOURNAL_ACCESS_WRITE); 996 OCFS2_JOURNAL_ACCESS_WRITE);
997 if (ret < 0) { 997 if (ret < 0) {
998 mlog_errno(ret); 998 mlog_errno(ret);
999 goto out_bh; 999 goto out_bh;
1000 } 1000 }
1001 1001
1002 inode->i_mode &= ~S_ISUID; 1002 inode->i_mode &= ~S_ISUID;
1003 if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP)) 1003 if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP))
1004 inode->i_mode &= ~S_ISGID; 1004 inode->i_mode &= ~S_ISGID;
1005 1005
1006 di = (struct ocfs2_dinode *) bh->b_data; 1006 di = (struct ocfs2_dinode *) bh->b_data;
1007 di->i_mode = cpu_to_le16(inode->i_mode); 1007 di->i_mode = cpu_to_le16(inode->i_mode);
1008 1008
1009 ret = ocfs2_journal_dirty(handle, bh); 1009 ret = ocfs2_journal_dirty(handle, bh);
1010 if (ret < 0) 1010 if (ret < 0)
1011 mlog_errno(ret); 1011 mlog_errno(ret);
1012 out_bh: 1012 out_bh:
1013 brelse(bh); 1013 brelse(bh);
1014 out_trans: 1014 out_trans:
1015 ocfs2_commit_trans(osb, handle); 1015 ocfs2_commit_trans(osb, handle);
1016 out: 1016 out:
1017 mlog_exit(ret); 1017 mlog_exit(ret);
1018 return ret; 1018 return ret;
1019 } 1019 }
1020 1020
1021 static int ocfs2_prepare_inode_for_write(struct dentry *dentry, 1021 static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1022 loff_t *ppos, 1022 loff_t *ppos,
1023 size_t count, 1023 size_t count,
1024 int appending) 1024 int appending)
1025 { 1025 {
1026 int ret = 0, meta_level = appending; 1026 int ret = 0, meta_level = appending;
1027 struct inode *inode = dentry->d_inode; 1027 struct inode *inode = dentry->d_inode;
1028 u32 clusters; 1028 u32 clusters;
1029 loff_t newsize, saved_pos; 1029 loff_t newsize, saved_pos;
1030 1030
1031 /* 1031 /*
1032 * We sample i_size under a read level meta lock to see if our write 1032 * We sample i_size under a read level meta lock to see if our write
1033 * is extending the file, if it is we back off and get a write level 1033 * is extending the file, if it is we back off and get a write level
1034 * meta lock. 1034 * meta lock.
1035 */ 1035 */
1036 for(;;) { 1036 for(;;) {
1037 ret = ocfs2_meta_lock(inode, NULL, meta_level); 1037 ret = ocfs2_meta_lock(inode, NULL, meta_level);
1038 if (ret < 0) { 1038 if (ret < 0) {
1039 meta_level = -1; 1039 meta_level = -1;
1040 mlog_errno(ret); 1040 mlog_errno(ret);
1041 goto out; 1041 goto out;
1042 } 1042 }
1043 1043
1044 /* Clear suid / sgid if necessary. We do this here 1044 /* Clear suid / sgid if necessary. We do this here
1045 * instead of later in the write path because 1045 * instead of later in the write path because
1046 * remove_suid() calls ->setattr without any hint that 1046 * remove_suid() calls ->setattr without any hint that
1047 * we may have already done our cluster locking. Since 1047 * we may have already done our cluster locking. Since
1048 * ocfs2_setattr() *must* take cluster locks to 1048 * ocfs2_setattr() *must* take cluster locks to
1049 * proceeed, this will lead us to recursively lock the 1049 * proceeed, this will lead us to recursively lock the
1050 * inode. There's also the dinode i_size state which 1050 * inode. There's also the dinode i_size state which
1051 * can be lost via setattr during extending writes (we 1051 * can be lost via setattr during extending writes (we
1052 * set inode->i_size at the end of a write. */ 1052 * set inode->i_size at the end of a write. */
1053 if (should_remove_suid(dentry)) { 1053 if (should_remove_suid(dentry)) {
1054 if (meta_level == 0) { 1054 if (meta_level == 0) {
1055 ocfs2_meta_unlock(inode, meta_level); 1055 ocfs2_meta_unlock(inode, meta_level);
1056 meta_level = 1; 1056 meta_level = 1;
1057 continue; 1057 continue;
1058 } 1058 }
1059 1059
1060 ret = ocfs2_write_remove_suid(inode); 1060 ret = ocfs2_write_remove_suid(inode);
1061 if (ret < 0) { 1061 if (ret < 0) {
1062 mlog_errno(ret); 1062 mlog_errno(ret);
1063 goto out_unlock; 1063 goto out_unlock;
1064 } 1064 }
1065 } 1065 }
1066 1066
1067 /* work on a copy of ppos until we're sure that we won't have 1067 /* work on a copy of ppos until we're sure that we won't have
1068 * to recalculate it due to relocking. */ 1068 * to recalculate it due to relocking. */
1069 if (appending) { 1069 if (appending) {
1070 saved_pos = i_size_read(inode); 1070 saved_pos = i_size_read(inode);
1071 mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); 1071 mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos);
1072 } else { 1072 } else {
1073 saved_pos = *ppos; 1073 saved_pos = *ppos;
1074 } 1074 }
1075 newsize = count + saved_pos; 1075 newsize = count + saved_pos;
1076 1076
1077 mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", 1077 mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
1078 (long long) saved_pos, (long long) newsize, 1078 (long long) saved_pos, (long long) newsize,
1079 (long long) i_size_read(inode)); 1079 (long long) i_size_read(inode));
1080 1080
1081 /* No need for a higher level metadata lock if we're 1081 /* No need for a higher level metadata lock if we're
1082 * never going past i_size. */ 1082 * never going past i_size. */
1083 if (newsize <= i_size_read(inode)) 1083 if (newsize <= i_size_read(inode))
1084 break; 1084 break;
1085 1085
1086 if (meta_level == 0) { 1086 if (meta_level == 0) {
1087 ocfs2_meta_unlock(inode, meta_level); 1087 ocfs2_meta_unlock(inode, meta_level);
1088 meta_level = 1; 1088 meta_level = 1;
1089 continue; 1089 continue;
1090 } 1090 }
1091 1091
1092 spin_lock(&OCFS2_I(inode)->ip_lock); 1092 spin_lock(&OCFS2_I(inode)->ip_lock);
1093 clusters = ocfs2_clusters_for_bytes(inode->i_sb, newsize) - 1093 clusters = ocfs2_clusters_for_bytes(inode->i_sb, newsize) -
1094 OCFS2_I(inode)->ip_clusters; 1094 OCFS2_I(inode)->ip_clusters;
1095 spin_unlock(&OCFS2_I(inode)->ip_lock); 1095 spin_unlock(&OCFS2_I(inode)->ip_lock);
1096 1096
1097 mlog(0, "Writing at EOF, may need more allocation: " 1097 mlog(0, "Writing at EOF, may need more allocation: "
1098 "i_size = %lld, newsize = %lld, need %u clusters\n", 1098 "i_size = %lld, newsize = %lld, need %u clusters\n",
1099 (long long) i_size_read(inode), (long long) newsize, 1099 (long long) i_size_read(inode), (long long) newsize,
1100 clusters); 1100 clusters);
1101 1101
1102 /* We only want to continue the rest of this loop if 1102 /* We only want to continue the rest of this loop if
1103 * our extend will actually require more 1103 * our extend will actually require more
1104 * allocation. */ 1104 * allocation. */
1105 if (!clusters) 1105 if (!clusters)
1106 break; 1106 break;
1107 1107
1108 ret = ocfs2_extend_file(inode, NULL, newsize, count); 1108 ret = ocfs2_extend_file(inode, NULL, newsize, count);
1109 if (ret < 0) { 1109 if (ret < 0) {
1110 if (ret != -ENOSPC) 1110 if (ret != -ENOSPC)
1111 mlog_errno(ret); 1111 mlog_errno(ret);
1112 goto out_unlock; 1112 goto out_unlock;
1113 } 1113 }
1114 break; 1114 break;
1115 } 1115 }
1116 1116
1117 if (appending) 1117 if (appending)
1118 *ppos = saved_pos; 1118 *ppos = saved_pos;
1119 1119
1120 out_unlock: 1120 out_unlock:
1121 ocfs2_meta_unlock(inode, meta_level); 1121 ocfs2_meta_unlock(inode, meta_level);
1122 1122
1123 out: 1123 out:
1124 return ret; 1124 return ret;
1125 } 1125 }
1126 1126
1127 static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, 1127 static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1128 const struct iovec *iov, 1128 const struct iovec *iov,
1129 unsigned long nr_segs, 1129 unsigned long nr_segs,
1130 loff_t pos) 1130 loff_t pos)
1131 { 1131 {
1132 int ret, rw_level, have_alloc_sem = 0; 1132 int ret, rw_level, have_alloc_sem = 0;
1133 struct file *filp = iocb->ki_filp; 1133 struct file *filp = iocb->ki_filp;
1134 struct inode *inode = filp->f_dentry->d_inode; 1134 struct inode *inode = filp->f_path.dentry->d_inode;
1135 int appending = filp->f_flags & O_APPEND ? 1 : 0; 1135 int appending = filp->f_flags & O_APPEND ? 1 : 0;
1136 1136
1137 mlog_entry("(0x%p, %u, '%.*s')\n", filp, 1137 mlog_entry("(0x%p, %u, '%.*s')\n", filp,
1138 (unsigned int)nr_segs, 1138 (unsigned int)nr_segs,
1139 filp->f_dentry->d_name.len, 1139 filp->f_path.dentry->d_name.len,
1140 filp->f_dentry->d_name.name); 1140 filp->f_path.dentry->d_name.name);
1141 1141
1142 /* happy write of zero bytes */ 1142 /* happy write of zero bytes */
1143 if (iocb->ki_left == 0) 1143 if (iocb->ki_left == 0)
1144 return 0; 1144 return 0;
1145 1145
1146 mutex_lock(&inode->i_mutex); 1146 mutex_lock(&inode->i_mutex);
1147 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ 1147 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
1148 if (filp->f_flags & O_DIRECT) { 1148 if (filp->f_flags & O_DIRECT) {
1149 have_alloc_sem = 1; 1149 have_alloc_sem = 1;
1150 down_read(&inode->i_alloc_sem); 1150 down_read(&inode->i_alloc_sem);
1151 } 1151 }
1152 1152
1153 /* concurrent O_DIRECT writes are allowed */ 1153 /* concurrent O_DIRECT writes are allowed */
1154 rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1; 1154 rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1;
1155 ret = ocfs2_rw_lock(inode, rw_level); 1155 ret = ocfs2_rw_lock(inode, rw_level);
1156 if (ret < 0) { 1156 if (ret < 0) {
1157 rw_level = -1; 1157 rw_level = -1;
1158 mlog_errno(ret); 1158 mlog_errno(ret);
1159 goto out; 1159 goto out;
1160 } 1160 }
1161 1161
1162 ret = ocfs2_prepare_inode_for_write(filp->f_dentry, &iocb->ki_pos, 1162 ret = ocfs2_prepare_inode_for_write(filp->f_path.dentry, &iocb->ki_pos,
1163 iocb->ki_left, appending); 1163 iocb->ki_left, appending);
1164 if (ret < 0) { 1164 if (ret < 0) {
1165 mlog_errno(ret); 1165 mlog_errno(ret);
1166 goto out; 1166 goto out;
1167 } 1167 }
1168 1168
1169 /* communicate with ocfs2_dio_end_io */ 1169 /* communicate with ocfs2_dio_end_io */
1170 ocfs2_iocb_set_rw_locked(iocb); 1170 ocfs2_iocb_set_rw_locked(iocb);
1171 1171
1172 ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos); 1172 ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos);
1173 1173
1174 /* buffered aio wouldn't have proper lock coverage today */ 1174 /* buffered aio wouldn't have proper lock coverage today */
1175 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); 1175 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
1176 1176
1177 /* 1177 /*
1178 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io 1178 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
1179 * function pointer which is called when o_direct io completes so that 1179 * function pointer which is called when o_direct io completes so that
1180 * it can unlock our rw lock. (it's the clustered equivalent of 1180 * it can unlock our rw lock. (it's the clustered equivalent of
1181 * i_alloc_sem; protects truncate from racing with pending ios). 1181 * i_alloc_sem; protects truncate from racing with pending ios).
1182 * Unfortunately there are error cases which call end_io and others 1182 * Unfortunately there are error cases which call end_io and others
1183 * that don't. so we don't have to unlock the rw_lock if either an 1183 * that don't. so we don't have to unlock the rw_lock if either an
1184 * async dio is going to do it in the future or an end_io after an 1184 * async dio is going to do it in the future or an end_io after an
1185 * error has already done it. 1185 * error has already done it.
1186 */ 1186 */
1187 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { 1187 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
1188 rw_level = -1; 1188 rw_level = -1;
1189 have_alloc_sem = 0; 1189 have_alloc_sem = 0;
1190 } 1190 }
1191 1191
1192 out: 1192 out:
1193 if (have_alloc_sem) 1193 if (have_alloc_sem)
1194 up_read(&inode->i_alloc_sem); 1194 up_read(&inode->i_alloc_sem);
1195 if (rw_level != -1) 1195 if (rw_level != -1)
1196 ocfs2_rw_unlock(inode, rw_level); 1196 ocfs2_rw_unlock(inode, rw_level);
1197 mutex_unlock(&inode->i_mutex); 1197 mutex_unlock(&inode->i_mutex);
1198 1198
1199 mlog_exit(ret); 1199 mlog_exit(ret);
1200 return ret; 1200 return ret;
1201 } 1201 }
1202 1202
1203 static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, 1203 static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1204 struct file *out, 1204 struct file *out,
1205 loff_t *ppos, 1205 loff_t *ppos,
1206 size_t len, 1206 size_t len,
1207 unsigned int flags) 1207 unsigned int flags)
1208 { 1208 {
1209 int ret; 1209 int ret;
1210 struct inode *inode = out->f_dentry->d_inode; 1210 struct inode *inode = out->f_path.dentry->d_inode;
1211 1211
1212 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, 1212 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
1213 (unsigned int)len, 1213 (unsigned int)len,
1214 out->f_dentry->d_name.len, 1214 out->f_path.dentry->d_name.len,
1215 out->f_dentry->d_name.name); 1215 out->f_path.dentry->d_name.name);
1216 1216
1217 inode_double_lock(inode, pipe->inode); 1217 inode_double_lock(inode, pipe->inode);
1218 1218
1219 ret = ocfs2_rw_lock(inode, 1); 1219 ret = ocfs2_rw_lock(inode, 1);
1220 if (ret < 0) { 1220 if (ret < 0) {
1221 mlog_errno(ret); 1221 mlog_errno(ret);
1222 goto out; 1222 goto out;
1223 } 1223 }
1224 1224
1225 ret = ocfs2_prepare_inode_for_write(out->f_dentry, ppos, len, 0); 1225 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0);
1226 if (ret < 0) { 1226 if (ret < 0) {
1227 mlog_errno(ret); 1227 mlog_errno(ret);
1228 goto out_unlock; 1228 goto out_unlock;
1229 } 1229 }
1230 1230
1231 /* ok, we're done with i_size and alloc work */ 1231 /* ok, we're done with i_size and alloc work */
1232 ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); 1232 ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
1233 1233
1234 out_unlock: 1234 out_unlock:
1235 ocfs2_rw_unlock(inode, 1); 1235 ocfs2_rw_unlock(inode, 1);
1236 out: 1236 out:
1237 inode_double_unlock(inode, pipe->inode); 1237 inode_double_unlock(inode, pipe->inode);
1238 1238
1239 mlog_exit(ret); 1239 mlog_exit(ret);
1240 return ret; 1240 return ret;
1241 } 1241 }
1242 1242
1243 static ssize_t ocfs2_file_splice_read(struct file *in, 1243 static ssize_t ocfs2_file_splice_read(struct file *in,
1244 loff_t *ppos, 1244 loff_t *ppos,
1245 struct pipe_inode_info *pipe, 1245 struct pipe_inode_info *pipe,
1246 size_t len, 1246 size_t len,
1247 unsigned int flags) 1247 unsigned int flags)
1248 { 1248 {
1249 int ret = 0; 1249 int ret = 0;
1250 struct inode *inode = in->f_dentry->d_inode; 1250 struct inode *inode = in->f_path.dentry->d_inode;
1251 1251
1252 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, 1252 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
1253 (unsigned int)len, 1253 (unsigned int)len,
1254 in->f_dentry->d_name.len, 1254 in->f_path.dentry->d_name.len,
1255 in->f_dentry->d_name.name); 1255 in->f_path.dentry->d_name.name);
1256 1256
1257 /* 1257 /*
1258 * See the comment in ocfs2_file_aio_read() 1258 * See the comment in ocfs2_file_aio_read()
1259 */ 1259 */
1260 ret = ocfs2_meta_lock(inode, NULL, 0); 1260 ret = ocfs2_meta_lock(inode, NULL, 0);
1261 if (ret < 0) { 1261 if (ret < 0) {
1262 mlog_errno(ret); 1262 mlog_errno(ret);
1263 goto bail; 1263 goto bail;
1264 } 1264 }
1265 ocfs2_meta_unlock(inode, 0); 1265 ocfs2_meta_unlock(inode, 0);
1266 1266
1267 ret = generic_file_splice_read(in, ppos, pipe, len, flags); 1267 ret = generic_file_splice_read(in, ppos, pipe, len, flags);
1268 1268
1269 bail: 1269 bail:
1270 mlog_exit(ret); 1270 mlog_exit(ret);
1271 return ret; 1271 return ret;
1272 } 1272 }
1273 1273
1274 static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, 1274 static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
1275 const struct iovec *iov, 1275 const struct iovec *iov,
1276 unsigned long nr_segs, 1276 unsigned long nr_segs,
1277 loff_t pos) 1277 loff_t pos)
1278 { 1278 {
1279 int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; 1279 int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
1280 struct file *filp = iocb->ki_filp; 1280 struct file *filp = iocb->ki_filp;
1281 struct inode *inode = filp->f_dentry->d_inode; 1281 struct inode *inode = filp->f_path.dentry->d_inode;
1282 1282
1283 mlog_entry("(0x%p, %u, '%.*s')\n", filp, 1283 mlog_entry("(0x%p, %u, '%.*s')\n", filp,
1284 (unsigned int)nr_segs, 1284 (unsigned int)nr_segs,
1285 filp->f_dentry->d_name.len, 1285 filp->f_path.dentry->d_name.len,
1286 filp->f_dentry->d_name.name); 1286 filp->f_path.dentry->d_name.name);
1287 1287
1288 if (!inode) { 1288 if (!inode) {
1289 ret = -EINVAL; 1289 ret = -EINVAL;
1290 mlog_errno(ret); 1290 mlog_errno(ret);
1291 goto bail; 1291 goto bail;
1292 } 1292 }
1293 1293
1294 /* 1294 /*
1295 * buffered reads protect themselves in ->readpage(). O_DIRECT reads 1295 * buffered reads protect themselves in ->readpage(). O_DIRECT reads
1296 * need locks to protect pending reads from racing with truncate. 1296 * need locks to protect pending reads from racing with truncate.
1297 */ 1297 */
1298 if (filp->f_flags & O_DIRECT) { 1298 if (filp->f_flags & O_DIRECT) {
1299 down_read(&inode->i_alloc_sem); 1299 down_read(&inode->i_alloc_sem);
1300 have_alloc_sem = 1; 1300 have_alloc_sem = 1;
1301 1301
1302 ret = ocfs2_rw_lock(inode, 0); 1302 ret = ocfs2_rw_lock(inode, 0);
1303 if (ret < 0) { 1303 if (ret < 0) {
1304 mlog_errno(ret); 1304 mlog_errno(ret);
1305 goto bail; 1305 goto bail;
1306 } 1306 }
1307 rw_level = 0; 1307 rw_level = 0;
1308 /* communicate with ocfs2_dio_end_io */ 1308 /* communicate with ocfs2_dio_end_io */
1309 ocfs2_iocb_set_rw_locked(iocb); 1309 ocfs2_iocb_set_rw_locked(iocb);
1310 } 1310 }
1311 1311
1312 /* 1312 /*
1313 * We're fine letting folks race truncates and extending 1313 * We're fine letting folks race truncates and extending
1314 * writes with read across the cluster, just like they can 1314 * writes with read across the cluster, just like they can
1315 * locally. Hence no rw_lock during read. 1315 * locally. Hence no rw_lock during read.
1316 * 1316 *
1317 * Take and drop the meta data lock to update inode fields 1317 * Take and drop the meta data lock to update inode fields
1318 * like i_size. This allows the checks down below 1318 * like i_size. This allows the checks down below
1319 * generic_file_aio_read() a chance of actually working. 1319 * generic_file_aio_read() a chance of actually working.
1320 */ 1320 */
1321 ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); 1321 ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level);
1322 if (ret < 0) { 1322 if (ret < 0) {
1323 mlog_errno(ret); 1323 mlog_errno(ret);
1324 goto bail; 1324 goto bail;
1325 } 1325 }
1326 ocfs2_meta_unlock(inode, lock_level); 1326 ocfs2_meta_unlock(inode, lock_level);
1327 1327
1328 ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); 1328 ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
1329 if (ret == -EINVAL) 1329 if (ret == -EINVAL)
1330 mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); 1330 mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n");
1331 1331
1332 /* buffered aio wouldn't have proper lock coverage today */ 1332 /* buffered aio wouldn't have proper lock coverage today */
1333 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); 1333 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
1334 1334
1335 /* see ocfs2_file_aio_write */ 1335 /* see ocfs2_file_aio_write */
1336 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { 1336 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
1337 rw_level = -1; 1337 rw_level = -1;
1338 have_alloc_sem = 0; 1338 have_alloc_sem = 0;
1339 } 1339 }
1340 1340
1341 bail: 1341 bail:
1342 if (have_alloc_sem) 1342 if (have_alloc_sem)
1343 up_read(&inode->i_alloc_sem); 1343 up_read(&inode->i_alloc_sem);
1344 if (rw_level != -1) 1344 if (rw_level != -1)
1345 ocfs2_rw_unlock(inode, rw_level); 1345 ocfs2_rw_unlock(inode, rw_level);
1346 mlog_exit(ret); 1346 mlog_exit(ret);
1347 1347
1348 return ret; 1348 return ret;
1349 } 1349 }
1350 1350
1351 struct inode_operations ocfs2_file_iops = { 1351 struct inode_operations ocfs2_file_iops = {
1352 .setattr = ocfs2_setattr, 1352 .setattr = ocfs2_setattr,
1353 .getattr = ocfs2_getattr, 1353 .getattr = ocfs2_getattr,
1354 .permission = ocfs2_permission, 1354 .permission = ocfs2_permission,
1355 }; 1355 };
1356 1356
1357 struct inode_operations ocfs2_special_file_iops = { 1357 struct inode_operations ocfs2_special_file_iops = {
1358 .setattr = ocfs2_setattr, 1358 .setattr = ocfs2_setattr,
1359 .getattr = ocfs2_getattr, 1359 .getattr = ocfs2_getattr,
1360 .permission = ocfs2_permission, 1360 .permission = ocfs2_permission,
1361 }; 1361 };
1362 1362
1363 const struct file_operations ocfs2_fops = { 1363 const struct file_operations ocfs2_fops = {
1364 .read = do_sync_read, 1364 .read = do_sync_read,
1365 .write = do_sync_write, 1365 .write = do_sync_write,
1366 .sendfile = generic_file_sendfile, 1366 .sendfile = generic_file_sendfile,
1367 .mmap = ocfs2_mmap, 1367 .mmap = ocfs2_mmap,
1368 .fsync = ocfs2_sync_file, 1368 .fsync = ocfs2_sync_file,
1369 .release = ocfs2_file_release, 1369 .release = ocfs2_file_release,
1370 .open = ocfs2_file_open, 1370 .open = ocfs2_file_open,
1371 .aio_read = ocfs2_file_aio_read, 1371 .aio_read = ocfs2_file_aio_read,
1372 .aio_write = ocfs2_file_aio_write, 1372 .aio_write = ocfs2_file_aio_write,
1373 .ioctl = ocfs2_ioctl, 1373 .ioctl = ocfs2_ioctl,
1374 .splice_read = ocfs2_file_splice_read, 1374 .splice_read = ocfs2_file_splice_read,
1375 .splice_write = ocfs2_file_splice_write, 1375 .splice_write = ocfs2_file_splice_write,
1376 }; 1376 };
1377 1377
1378 const struct file_operations ocfs2_dops = { 1378 const struct file_operations ocfs2_dops = {
1379 .read = generic_read_dir, 1379 .read = generic_read_dir,
1380 .readdir = ocfs2_readdir, 1380 .readdir = ocfs2_readdir,
1381 .fsync = ocfs2_sync_file, 1381 .fsync = ocfs2_sync_file,
1382 .ioctl = ocfs2_ioctl, 1382 .ioctl = ocfs2_ioctl,
1383 }; 1383 };
1384 1384