Commit d28c91740ae2cd1d963f9e4e3889789894cb6d52
Committed by
Linus Torvalds
1 parent
6db5fc5d53
Exists in
master
and in
20 other branches
[PATCH] struct path: convert ocfs2
Signed-off-by: Josef Sipek <jsipek@fsl.cs.sunysb.edu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 4 changed files with 22 additions and 22 deletions Inline Diff
fs/ocfs2/aops.c
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | 4 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU General Public | 7 | * modify it under the terms of the GNU General Public |
8 | * License as published by the Free Software Foundation; either | 8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. | 9 | * version 2 of the License, or (at your option) any later version. |
10 | * | 10 | * |
11 | * This program is distributed in the hope that it will be useful, | 11 | * This program is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * General Public License for more details. | 14 | * General Public License for more details. |
15 | * | 15 | * |
16 | * You should have received a copy of the GNU General Public | 16 | * You should have received a copy of the GNU General Public |
17 | * License along with this program; if not, write to the | 17 | * License along with this program; if not, write to the |
18 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 18 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
19 | * Boston, MA 021110-1307, USA. | 19 | * Boston, MA 021110-1307, USA. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/highmem.h> | 24 | #include <linux/highmem.h> |
25 | #include <linux/pagemap.h> | 25 | #include <linux/pagemap.h> |
26 | #include <asm/byteorder.h> | 26 | #include <asm/byteorder.h> |
27 | 27 | ||
28 | #define MLOG_MASK_PREFIX ML_FILE_IO | 28 | #define MLOG_MASK_PREFIX ML_FILE_IO |
29 | #include <cluster/masklog.h> | 29 | #include <cluster/masklog.h> |
30 | 30 | ||
31 | #include "ocfs2.h" | 31 | #include "ocfs2.h" |
32 | 32 | ||
33 | #include "alloc.h" | 33 | #include "alloc.h" |
34 | #include "aops.h" | 34 | #include "aops.h" |
35 | #include "dlmglue.h" | 35 | #include "dlmglue.h" |
36 | #include "extent_map.h" | 36 | #include "extent_map.h" |
37 | #include "file.h" | 37 | #include "file.h" |
38 | #include "inode.h" | 38 | #include "inode.h" |
39 | #include "journal.h" | 39 | #include "journal.h" |
40 | #include "super.h" | 40 | #include "super.h" |
41 | #include "symlink.h" | 41 | #include "symlink.h" |
42 | 42 | ||
43 | #include "buffer_head_io.h" | 43 | #include "buffer_head_io.h" |
44 | 44 | ||
45 | static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, | 45 | static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, |
46 | struct buffer_head *bh_result, int create) | 46 | struct buffer_head *bh_result, int create) |
47 | { | 47 | { |
48 | int err = -EIO; | 48 | int err = -EIO; |
49 | int status; | 49 | int status; |
50 | struct ocfs2_dinode *fe = NULL; | 50 | struct ocfs2_dinode *fe = NULL; |
51 | struct buffer_head *bh = NULL; | 51 | struct buffer_head *bh = NULL; |
52 | struct buffer_head *buffer_cache_bh = NULL; | 52 | struct buffer_head *buffer_cache_bh = NULL; |
53 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 53 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
54 | void *kaddr; | 54 | void *kaddr; |
55 | 55 | ||
56 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, | 56 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, |
57 | (unsigned long long)iblock, bh_result, create); | 57 | (unsigned long long)iblock, bh_result, create); |
58 | 58 | ||
59 | BUG_ON(ocfs2_inode_is_fast_symlink(inode)); | 59 | BUG_ON(ocfs2_inode_is_fast_symlink(inode)); |
60 | 60 | ||
61 | if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) { | 61 | if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) { |
62 | mlog(ML_ERROR, "block offset > PATH_MAX: %llu", | 62 | mlog(ML_ERROR, "block offset > PATH_MAX: %llu", |
63 | (unsigned long long)iblock); | 63 | (unsigned long long)iblock); |
64 | goto bail; | 64 | goto bail; |
65 | } | 65 | } |
66 | 66 | ||
67 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 67 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), |
68 | OCFS2_I(inode)->ip_blkno, | 68 | OCFS2_I(inode)->ip_blkno, |
69 | &bh, OCFS2_BH_CACHED, inode); | 69 | &bh, OCFS2_BH_CACHED, inode); |
70 | if (status < 0) { | 70 | if (status < 0) { |
71 | mlog_errno(status); | 71 | mlog_errno(status); |
72 | goto bail; | 72 | goto bail; |
73 | } | 73 | } |
74 | fe = (struct ocfs2_dinode *) bh->b_data; | 74 | fe = (struct ocfs2_dinode *) bh->b_data; |
75 | 75 | ||
76 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 76 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
77 | mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", | 77 | mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", |
78 | (unsigned long long)fe->i_blkno, 7, fe->i_signature); | 78 | (unsigned long long)fe->i_blkno, 7, fe->i_signature); |
79 | goto bail; | 79 | goto bail; |
80 | } | 80 | } |
81 | 81 | ||
82 | if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, | 82 | if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, |
83 | le32_to_cpu(fe->i_clusters))) { | 83 | le32_to_cpu(fe->i_clusters))) { |
84 | mlog(ML_ERROR, "block offset is outside the allocated size: " | 84 | mlog(ML_ERROR, "block offset is outside the allocated size: " |
85 | "%llu\n", (unsigned long long)iblock); | 85 | "%llu\n", (unsigned long long)iblock); |
86 | goto bail; | 86 | goto bail; |
87 | } | 87 | } |
88 | 88 | ||
89 | /* We don't use the page cache to create symlink data, so if | 89 | /* We don't use the page cache to create symlink data, so if |
90 | * need be, copy it over from the buffer cache. */ | 90 | * need be, copy it over from the buffer cache. */ |
91 | if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) { | 91 | if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) { |
92 | u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + | 92 | u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + |
93 | iblock; | 93 | iblock; |
94 | buffer_cache_bh = sb_getblk(osb->sb, blkno); | 94 | buffer_cache_bh = sb_getblk(osb->sb, blkno); |
95 | if (!buffer_cache_bh) { | 95 | if (!buffer_cache_bh) { |
96 | mlog(ML_ERROR, "couldn't getblock for symlink!\n"); | 96 | mlog(ML_ERROR, "couldn't getblock for symlink!\n"); |
97 | goto bail; | 97 | goto bail; |
98 | } | 98 | } |
99 | 99 | ||
100 | /* we haven't locked out transactions, so a commit | 100 | /* we haven't locked out transactions, so a commit |
101 | * could've happened. Since we've got a reference on | 101 | * could've happened. Since we've got a reference on |
102 | * the bh, even if it commits while we're doing the | 102 | * the bh, even if it commits while we're doing the |
103 | * copy, the data is still good. */ | 103 | * copy, the data is still good. */ |
104 | if (buffer_jbd(buffer_cache_bh) | 104 | if (buffer_jbd(buffer_cache_bh) |
105 | && ocfs2_inode_is_new(inode)) { | 105 | && ocfs2_inode_is_new(inode)) { |
106 | kaddr = kmap_atomic(bh_result->b_page, KM_USER0); | 106 | kaddr = kmap_atomic(bh_result->b_page, KM_USER0); |
107 | if (!kaddr) { | 107 | if (!kaddr) { |
108 | mlog(ML_ERROR, "couldn't kmap!\n"); | 108 | mlog(ML_ERROR, "couldn't kmap!\n"); |
109 | goto bail; | 109 | goto bail; |
110 | } | 110 | } |
111 | memcpy(kaddr + (bh_result->b_size * iblock), | 111 | memcpy(kaddr + (bh_result->b_size * iblock), |
112 | buffer_cache_bh->b_data, | 112 | buffer_cache_bh->b_data, |
113 | bh_result->b_size); | 113 | bh_result->b_size); |
114 | kunmap_atomic(kaddr, KM_USER0); | 114 | kunmap_atomic(kaddr, KM_USER0); |
115 | set_buffer_uptodate(bh_result); | 115 | set_buffer_uptodate(bh_result); |
116 | } | 116 | } |
117 | brelse(buffer_cache_bh); | 117 | brelse(buffer_cache_bh); |
118 | } | 118 | } |
119 | 119 | ||
120 | map_bh(bh_result, inode->i_sb, | 120 | map_bh(bh_result, inode->i_sb, |
121 | le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock); | 121 | le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock); |
122 | 122 | ||
123 | err = 0; | 123 | err = 0; |
124 | 124 | ||
125 | bail: | 125 | bail: |
126 | if (bh) | 126 | if (bh) |
127 | brelse(bh); | 127 | brelse(bh); |
128 | 128 | ||
129 | mlog_exit(err); | 129 | mlog_exit(err); |
130 | return err; | 130 | return err; |
131 | } | 131 | } |
132 | 132 | ||
133 | static int ocfs2_get_block(struct inode *inode, sector_t iblock, | 133 | static int ocfs2_get_block(struct inode *inode, sector_t iblock, |
134 | struct buffer_head *bh_result, int create) | 134 | struct buffer_head *bh_result, int create) |
135 | { | 135 | { |
136 | int err = 0; | 136 | int err = 0; |
137 | u64 p_blkno, past_eof; | 137 | u64 p_blkno, past_eof; |
138 | 138 | ||
139 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, | 139 | mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, |
140 | (unsigned long long)iblock, bh_result, create); | 140 | (unsigned long long)iblock, bh_result, create); |
141 | 141 | ||
142 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) | 142 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) |
143 | mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n", | 143 | mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n", |
144 | inode, inode->i_ino); | 144 | inode, inode->i_ino); |
145 | 145 | ||
146 | if (S_ISLNK(inode->i_mode)) { | 146 | if (S_ISLNK(inode->i_mode)) { |
147 | /* this always does I/O for some reason. */ | 147 | /* this always does I/O for some reason. */ |
148 | err = ocfs2_symlink_get_block(inode, iblock, bh_result, create); | 148 | err = ocfs2_symlink_get_block(inode, iblock, bh_result, create); |
149 | goto bail; | 149 | goto bail; |
150 | } | 150 | } |
151 | 151 | ||
152 | /* this can happen if another node truncs after our extend! */ | 152 | /* this can happen if another node truncs after our extend! */ |
153 | spin_lock(&OCFS2_I(inode)->ip_lock); | 153 | spin_lock(&OCFS2_I(inode)->ip_lock); |
154 | if (iblock >= ocfs2_clusters_to_blocks(inode->i_sb, | 154 | if (iblock >= ocfs2_clusters_to_blocks(inode->i_sb, |
155 | OCFS2_I(inode)->ip_clusters)) | 155 | OCFS2_I(inode)->ip_clusters)) |
156 | err = -EIO; | 156 | err = -EIO; |
157 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 157 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
158 | if (err) | 158 | if (err) |
159 | goto bail; | 159 | goto bail; |
160 | 160 | ||
161 | err = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno, | 161 | err = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno, |
162 | NULL); | 162 | NULL); |
163 | if (err) { | 163 | if (err) { |
164 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " | 164 | mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " |
165 | "%llu, NULL)\n", err, inode, (unsigned long long)iblock, | 165 | "%llu, NULL)\n", err, inode, (unsigned long long)iblock, |
166 | (unsigned long long)p_blkno); | 166 | (unsigned long long)p_blkno); |
167 | goto bail; | 167 | goto bail; |
168 | } | 168 | } |
169 | 169 | ||
170 | map_bh(bh_result, inode->i_sb, p_blkno); | 170 | map_bh(bh_result, inode->i_sb, p_blkno); |
171 | 171 | ||
172 | if (bh_result->b_blocknr == 0) { | 172 | if (bh_result->b_blocknr == 0) { |
173 | err = -EIO; | 173 | err = -EIO; |
174 | mlog(ML_ERROR, "iblock = %llu p_blkno = %llu blkno=(%llu)\n", | 174 | mlog(ML_ERROR, "iblock = %llu p_blkno = %llu blkno=(%llu)\n", |
175 | (unsigned long long)iblock, | 175 | (unsigned long long)iblock, |
176 | (unsigned long long)p_blkno, | 176 | (unsigned long long)p_blkno, |
177 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 177 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
178 | } | 178 | } |
179 | 179 | ||
180 | past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); | 180 | past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); |
181 | mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, | 181 | mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, |
182 | (unsigned long long)past_eof); | 182 | (unsigned long long)past_eof); |
183 | 183 | ||
184 | if (create && (iblock >= past_eof)) | 184 | if (create && (iblock >= past_eof)) |
185 | set_buffer_new(bh_result); | 185 | set_buffer_new(bh_result); |
186 | 186 | ||
187 | bail: | 187 | bail: |
188 | if (err < 0) | 188 | if (err < 0) |
189 | err = -EIO; | 189 | err = -EIO; |
190 | 190 | ||
191 | mlog_exit(err); | 191 | mlog_exit(err); |
192 | return err; | 192 | return err; |
193 | } | 193 | } |
194 | 194 | ||
195 | static int ocfs2_readpage(struct file *file, struct page *page) | 195 | static int ocfs2_readpage(struct file *file, struct page *page) |
196 | { | 196 | { |
197 | struct inode *inode = page->mapping->host; | 197 | struct inode *inode = page->mapping->host; |
198 | loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT; | 198 | loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT; |
199 | int ret, unlock = 1; | 199 | int ret, unlock = 1; |
200 | 200 | ||
201 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); | 201 | mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); |
202 | 202 | ||
203 | ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); | 203 | ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); |
204 | if (ret != 0) { | 204 | if (ret != 0) { |
205 | if (ret == AOP_TRUNCATED_PAGE) | 205 | if (ret == AOP_TRUNCATED_PAGE) |
206 | unlock = 0; | 206 | unlock = 0; |
207 | mlog_errno(ret); | 207 | mlog_errno(ret); |
208 | goto out; | 208 | goto out; |
209 | } | 209 | } |
210 | 210 | ||
211 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | 211 | down_read(&OCFS2_I(inode)->ip_alloc_sem); |
212 | 212 | ||
213 | /* | 213 | /* |
214 | * i_size might have just been updated as we grabed the meta lock. We | 214 | * i_size might have just been updated as we grabed the meta lock. We |
215 | * might now be discovering a truncate that hit on another node. | 215 | * might now be discovering a truncate that hit on another node. |
216 | * block_read_full_page->get_block freaks out if it is asked to read | 216 | * block_read_full_page->get_block freaks out if it is asked to read |
217 | * beyond the end of a file, so we check here. Callers | 217 | * beyond the end of a file, so we check here. Callers |
218 | * (generic_file_read, fault->nopage) are clever enough to check i_size | 218 | * (generic_file_read, fault->nopage) are clever enough to check i_size |
219 | * and notice that the page they just read isn't needed. | 219 | * and notice that the page they just read isn't needed. |
220 | * | 220 | * |
221 | * XXX sys_readahead() seems to get that wrong? | 221 | * XXX sys_readahead() seems to get that wrong? |
222 | */ | 222 | */ |
223 | if (start >= i_size_read(inode)) { | 223 | if (start >= i_size_read(inode)) { |
224 | char *addr = kmap(page); | 224 | char *addr = kmap(page); |
225 | memset(addr, 0, PAGE_SIZE); | 225 | memset(addr, 0, PAGE_SIZE); |
226 | flush_dcache_page(page); | 226 | flush_dcache_page(page); |
227 | kunmap(page); | 227 | kunmap(page); |
228 | SetPageUptodate(page); | 228 | SetPageUptodate(page); |
229 | ret = 0; | 229 | ret = 0; |
230 | goto out_alloc; | 230 | goto out_alloc; |
231 | } | 231 | } |
232 | 232 | ||
233 | ret = ocfs2_data_lock_with_page(inode, 0, page); | 233 | ret = ocfs2_data_lock_with_page(inode, 0, page); |
234 | if (ret != 0) { | 234 | if (ret != 0) { |
235 | if (ret == AOP_TRUNCATED_PAGE) | 235 | if (ret == AOP_TRUNCATED_PAGE) |
236 | unlock = 0; | 236 | unlock = 0; |
237 | mlog_errno(ret); | 237 | mlog_errno(ret); |
238 | goto out_alloc; | 238 | goto out_alloc; |
239 | } | 239 | } |
240 | 240 | ||
241 | ret = block_read_full_page(page, ocfs2_get_block); | 241 | ret = block_read_full_page(page, ocfs2_get_block); |
242 | unlock = 0; | 242 | unlock = 0; |
243 | 243 | ||
244 | ocfs2_data_unlock(inode, 0); | 244 | ocfs2_data_unlock(inode, 0); |
245 | out_alloc: | 245 | out_alloc: |
246 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 246 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
247 | ocfs2_meta_unlock(inode, 0); | 247 | ocfs2_meta_unlock(inode, 0); |
248 | out: | 248 | out: |
249 | if (unlock) | 249 | if (unlock) |
250 | unlock_page(page); | 250 | unlock_page(page); |
251 | mlog_exit(ret); | 251 | mlog_exit(ret); |
252 | return ret; | 252 | return ret; |
253 | } | 253 | } |
254 | 254 | ||
255 | /* Note: Because we don't support holes, our allocation has | 255 | /* Note: Because we don't support holes, our allocation has |
256 | * already happened (allocation writes zeros to the file data) | 256 | * already happened (allocation writes zeros to the file data) |
257 | * so we don't have to worry about ordered writes in | 257 | * so we don't have to worry about ordered writes in |
258 | * ocfs2_writepage. | 258 | * ocfs2_writepage. |
259 | * | 259 | * |
260 | * ->writepage is called during the process of invalidating the page cache | 260 | * ->writepage is called during the process of invalidating the page cache |
261 | * during blocked lock processing. It can't block on any cluster locks | 261 | * during blocked lock processing. It can't block on any cluster locks |
262 | * to during block mapping. It's relying on the fact that the block | 262 | * to during block mapping. It's relying on the fact that the block |
263 | * mapping can't have disappeared under the dirty pages that it is | 263 | * mapping can't have disappeared under the dirty pages that it is |
264 | * being asked to write back. | 264 | * being asked to write back. |
265 | */ | 265 | */ |
266 | static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) | 266 | static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) |
267 | { | 267 | { |
268 | int ret; | 268 | int ret; |
269 | 269 | ||
270 | mlog_entry("(0x%p)\n", page); | 270 | mlog_entry("(0x%p)\n", page); |
271 | 271 | ||
272 | ret = block_write_full_page(page, ocfs2_get_block, wbc); | 272 | ret = block_write_full_page(page, ocfs2_get_block, wbc); |
273 | 273 | ||
274 | mlog_exit(ret); | 274 | mlog_exit(ret); |
275 | 275 | ||
276 | return ret; | 276 | return ret; |
277 | } | 277 | } |
278 | 278 | ||
279 | /* This can also be called from ocfs2_write_zero_page() which has done | 279 | /* This can also be called from ocfs2_write_zero_page() which has done |
280 | * it's own cluster locking. */ | 280 | * it's own cluster locking. */ |
281 | int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, | 281 | int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, |
282 | unsigned from, unsigned to) | 282 | unsigned from, unsigned to) |
283 | { | 283 | { |
284 | int ret; | 284 | int ret; |
285 | 285 | ||
286 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | 286 | down_read(&OCFS2_I(inode)->ip_alloc_sem); |
287 | 287 | ||
288 | ret = block_prepare_write(page, from, to, ocfs2_get_block); | 288 | ret = block_prepare_write(page, from, to, ocfs2_get_block); |
289 | 289 | ||
290 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 290 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
291 | 291 | ||
292 | return ret; | 292 | return ret; |
293 | } | 293 | } |
294 | 294 | ||
295 | /* | 295 | /* |
296 | * ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called | 296 | * ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called |
297 | * from loopback. It must be able to perform its own locking around | 297 | * from loopback. It must be able to perform its own locking around |
298 | * ocfs2_get_block(). | 298 | * ocfs2_get_block(). |
299 | */ | 299 | */ |
300 | static int ocfs2_prepare_write(struct file *file, struct page *page, | 300 | static int ocfs2_prepare_write(struct file *file, struct page *page, |
301 | unsigned from, unsigned to) | 301 | unsigned from, unsigned to) |
302 | { | 302 | { |
303 | struct inode *inode = page->mapping->host; | 303 | struct inode *inode = page->mapping->host; |
304 | int ret; | 304 | int ret; |
305 | 305 | ||
306 | mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to); | 306 | mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to); |
307 | 307 | ||
308 | ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); | 308 | ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); |
309 | if (ret != 0) { | 309 | if (ret != 0) { |
310 | mlog_errno(ret); | 310 | mlog_errno(ret); |
311 | goto out; | 311 | goto out; |
312 | } | 312 | } |
313 | 313 | ||
314 | ret = ocfs2_prepare_write_nolock(inode, page, from, to); | 314 | ret = ocfs2_prepare_write_nolock(inode, page, from, to); |
315 | 315 | ||
316 | ocfs2_meta_unlock(inode, 0); | 316 | ocfs2_meta_unlock(inode, 0); |
317 | out: | 317 | out: |
318 | mlog_exit(ret); | 318 | mlog_exit(ret); |
319 | return ret; | 319 | return ret; |
320 | } | 320 | } |
321 | 321 | ||
322 | /* Taken from ext3. We don't necessarily need the full blown | 322 | /* Taken from ext3. We don't necessarily need the full blown |
323 | * functionality yet, but IMHO it's better to cut and paste the whole | 323 | * functionality yet, but IMHO it's better to cut and paste the whole |
324 | * thing so we can avoid introducing our own bugs (and easily pick up | 324 | * thing so we can avoid introducing our own bugs (and easily pick up |
325 | * their fixes when they happen) --Mark */ | 325 | * their fixes when they happen) --Mark */ |
326 | static int walk_page_buffers( handle_t *handle, | 326 | static int walk_page_buffers( handle_t *handle, |
327 | struct buffer_head *head, | 327 | struct buffer_head *head, |
328 | unsigned from, | 328 | unsigned from, |
329 | unsigned to, | 329 | unsigned to, |
330 | int *partial, | 330 | int *partial, |
331 | int (*fn)( handle_t *handle, | 331 | int (*fn)( handle_t *handle, |
332 | struct buffer_head *bh)) | 332 | struct buffer_head *bh)) |
333 | { | 333 | { |
334 | struct buffer_head *bh; | 334 | struct buffer_head *bh; |
335 | unsigned block_start, block_end; | 335 | unsigned block_start, block_end; |
336 | unsigned blocksize = head->b_size; | 336 | unsigned blocksize = head->b_size; |
337 | int err, ret = 0; | 337 | int err, ret = 0; |
338 | struct buffer_head *next; | 338 | struct buffer_head *next; |
339 | 339 | ||
340 | for ( bh = head, block_start = 0; | 340 | for ( bh = head, block_start = 0; |
341 | ret == 0 && (bh != head || !block_start); | 341 | ret == 0 && (bh != head || !block_start); |
342 | block_start = block_end, bh = next) | 342 | block_start = block_end, bh = next) |
343 | { | 343 | { |
344 | next = bh->b_this_page; | 344 | next = bh->b_this_page; |
345 | block_end = block_start + blocksize; | 345 | block_end = block_start + blocksize; |
346 | if (block_end <= from || block_start >= to) { | 346 | if (block_end <= from || block_start >= to) { |
347 | if (partial && !buffer_uptodate(bh)) | 347 | if (partial && !buffer_uptodate(bh)) |
348 | *partial = 1; | 348 | *partial = 1; |
349 | continue; | 349 | continue; |
350 | } | 350 | } |
351 | err = (*fn)(handle, bh); | 351 | err = (*fn)(handle, bh); |
352 | if (!ret) | 352 | if (!ret) |
353 | ret = err; | 353 | ret = err; |
354 | } | 354 | } |
355 | return ret; | 355 | return ret; |
356 | } | 356 | } |
357 | 357 | ||
358 | handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | 358 | handle_t *ocfs2_start_walk_page_trans(struct inode *inode, |
359 | struct page *page, | 359 | struct page *page, |
360 | unsigned from, | 360 | unsigned from, |
361 | unsigned to) | 361 | unsigned to) |
362 | { | 362 | { |
363 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 363 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
364 | handle_t *handle = NULL; | 364 | handle_t *handle = NULL; |
365 | int ret = 0; | 365 | int ret = 0; |
366 | 366 | ||
367 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 367 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
368 | if (!handle) { | 368 | if (!handle) { |
369 | ret = -ENOMEM; | 369 | ret = -ENOMEM; |
370 | mlog_errno(ret); | 370 | mlog_errno(ret); |
371 | goto out; | 371 | goto out; |
372 | } | 372 | } |
373 | 373 | ||
374 | if (ocfs2_should_order_data(inode)) { | 374 | if (ocfs2_should_order_data(inode)) { |
375 | ret = walk_page_buffers(handle, | 375 | ret = walk_page_buffers(handle, |
376 | page_buffers(page), | 376 | page_buffers(page), |
377 | from, to, NULL, | 377 | from, to, NULL, |
378 | ocfs2_journal_dirty_data); | 378 | ocfs2_journal_dirty_data); |
379 | if (ret < 0) | 379 | if (ret < 0) |
380 | mlog_errno(ret); | 380 | mlog_errno(ret); |
381 | } | 381 | } |
382 | out: | 382 | out: |
383 | if (ret) { | 383 | if (ret) { |
384 | if (handle) | 384 | if (handle) |
385 | ocfs2_commit_trans(osb, handle); | 385 | ocfs2_commit_trans(osb, handle); |
386 | handle = ERR_PTR(ret); | 386 | handle = ERR_PTR(ret); |
387 | } | 387 | } |
388 | return handle; | 388 | return handle; |
389 | } | 389 | } |
390 | 390 | ||
391 | static int ocfs2_commit_write(struct file *file, struct page *page, | 391 | static int ocfs2_commit_write(struct file *file, struct page *page, |
392 | unsigned from, unsigned to) | 392 | unsigned from, unsigned to) |
393 | { | 393 | { |
394 | int ret; | 394 | int ret; |
395 | struct buffer_head *di_bh = NULL; | 395 | struct buffer_head *di_bh = NULL; |
396 | struct inode *inode = page->mapping->host; | 396 | struct inode *inode = page->mapping->host; |
397 | handle_t *handle = NULL; | 397 | handle_t *handle = NULL; |
398 | struct ocfs2_dinode *di; | 398 | struct ocfs2_dinode *di; |
399 | 399 | ||
400 | mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to); | 400 | mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to); |
401 | 401 | ||
402 | /* NOTE: ocfs2_file_aio_write has ensured that it's safe for | 402 | /* NOTE: ocfs2_file_aio_write has ensured that it's safe for |
403 | * us to continue here without rechecking the I/O against | 403 | * us to continue here without rechecking the I/O against |
404 | * changed inode values. | 404 | * changed inode values. |
405 | * | 405 | * |
406 | * 1) We're currently holding the inode alloc lock, so no | 406 | * 1) We're currently holding the inode alloc lock, so no |
407 | * nodes can change it underneath us. | 407 | * nodes can change it underneath us. |
408 | * | 408 | * |
409 | * 2) We've had to take the metadata lock at least once | 409 | * 2) We've had to take the metadata lock at least once |
410 | * already to check for extending writes, suid removal, etc. | 410 | * already to check for extending writes, suid removal, etc. |
411 | * The meta data update code then ensures that we don't get a | 411 | * The meta data update code then ensures that we don't get a |
412 | * stale inode allocation image (i_size, i_clusters, etc). | 412 | * stale inode allocation image (i_size, i_clusters, etc). |
413 | */ | 413 | */ |
414 | 414 | ||
415 | ret = ocfs2_meta_lock_with_page(inode, &di_bh, 1, page); | 415 | ret = ocfs2_meta_lock_with_page(inode, &di_bh, 1, page); |
416 | if (ret != 0) { | 416 | if (ret != 0) { |
417 | mlog_errno(ret); | 417 | mlog_errno(ret); |
418 | goto out; | 418 | goto out; |
419 | } | 419 | } |
420 | 420 | ||
421 | ret = ocfs2_data_lock_with_page(inode, 1, page); | 421 | ret = ocfs2_data_lock_with_page(inode, 1, page); |
422 | if (ret != 0) { | 422 | if (ret != 0) { |
423 | mlog_errno(ret); | 423 | mlog_errno(ret); |
424 | goto out_unlock_meta; | 424 | goto out_unlock_meta; |
425 | } | 425 | } |
426 | 426 | ||
427 | handle = ocfs2_start_walk_page_trans(inode, page, from, to); | 427 | handle = ocfs2_start_walk_page_trans(inode, page, from, to); |
428 | if (IS_ERR(handle)) { | 428 | if (IS_ERR(handle)) { |
429 | ret = PTR_ERR(handle); | 429 | ret = PTR_ERR(handle); |
430 | goto out_unlock_data; | 430 | goto out_unlock_data; |
431 | } | 431 | } |
432 | 432 | ||
433 | /* Mark our buffer early. We'd rather catch this error up here | 433 | /* Mark our buffer early. We'd rather catch this error up here |
434 | * as opposed to after a successful commit_write which would | 434 | * as opposed to after a successful commit_write which would |
435 | * require us to set back inode->i_size. */ | 435 | * require us to set back inode->i_size. */ |
436 | ret = ocfs2_journal_access(handle, inode, di_bh, | 436 | ret = ocfs2_journal_access(handle, inode, di_bh, |
437 | OCFS2_JOURNAL_ACCESS_WRITE); | 437 | OCFS2_JOURNAL_ACCESS_WRITE); |
438 | if (ret < 0) { | 438 | if (ret < 0) { |
439 | mlog_errno(ret); | 439 | mlog_errno(ret); |
440 | goto out_commit; | 440 | goto out_commit; |
441 | } | 441 | } |
442 | 442 | ||
443 | /* might update i_size */ | 443 | /* might update i_size */ |
444 | ret = generic_commit_write(file, page, from, to); | 444 | ret = generic_commit_write(file, page, from, to); |
445 | if (ret < 0) { | 445 | if (ret < 0) { |
446 | mlog_errno(ret); | 446 | mlog_errno(ret); |
447 | goto out_commit; | 447 | goto out_commit; |
448 | } | 448 | } |
449 | 449 | ||
450 | di = (struct ocfs2_dinode *)di_bh->b_data; | 450 | di = (struct ocfs2_dinode *)di_bh->b_data; |
451 | 451 | ||
452 | /* ocfs2_mark_inode_dirty() is too heavy to use here. */ | 452 | /* ocfs2_mark_inode_dirty() is too heavy to use here. */ |
453 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 453 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
454 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); | 454 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); |
455 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 455 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
456 | 456 | ||
457 | inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode))); | 457 | inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode))); |
458 | di->i_size = cpu_to_le64((u64)i_size_read(inode)); | 458 | di->i_size = cpu_to_le64((u64)i_size_read(inode)); |
459 | 459 | ||
460 | ret = ocfs2_journal_dirty(handle, di_bh); | 460 | ret = ocfs2_journal_dirty(handle, di_bh); |
461 | if (ret < 0) { | 461 | if (ret < 0) { |
462 | mlog_errno(ret); | 462 | mlog_errno(ret); |
463 | goto out_commit; | 463 | goto out_commit; |
464 | } | 464 | } |
465 | 465 | ||
466 | out_commit: | 466 | out_commit: |
467 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | 467 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); |
468 | out_unlock_data: | 468 | out_unlock_data: |
469 | ocfs2_data_unlock(inode, 1); | 469 | ocfs2_data_unlock(inode, 1); |
470 | out_unlock_meta: | 470 | out_unlock_meta: |
471 | ocfs2_meta_unlock(inode, 1); | 471 | ocfs2_meta_unlock(inode, 1); |
472 | out: | 472 | out: |
473 | if (di_bh) | 473 | if (di_bh) |
474 | brelse(di_bh); | 474 | brelse(di_bh); |
475 | 475 | ||
476 | mlog_exit(ret); | 476 | mlog_exit(ret); |
477 | return ret; | 477 | return ret; |
478 | } | 478 | } |
479 | 479 | ||
480 | static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) | 480 | static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) |
481 | { | 481 | { |
482 | sector_t status; | 482 | sector_t status; |
483 | u64 p_blkno = 0; | 483 | u64 p_blkno = 0; |
484 | int err = 0; | 484 | int err = 0; |
485 | struct inode *inode = mapping->host; | 485 | struct inode *inode = mapping->host; |
486 | 486 | ||
487 | mlog_entry("(block = %llu)\n", (unsigned long long)block); | 487 | mlog_entry("(block = %llu)\n", (unsigned long long)block); |
488 | 488 | ||
489 | /* We don't need to lock journal system files, since they aren't | 489 | /* We don't need to lock journal system files, since they aren't |
490 | * accessed concurrently from multiple nodes. | 490 | * accessed concurrently from multiple nodes. |
491 | */ | 491 | */ |
492 | if (!INODE_JOURNAL(inode)) { | 492 | if (!INODE_JOURNAL(inode)) { |
493 | err = ocfs2_meta_lock(inode, NULL, 0); | 493 | err = ocfs2_meta_lock(inode, NULL, 0); |
494 | if (err) { | 494 | if (err) { |
495 | if (err != -ENOENT) | 495 | if (err != -ENOENT) |
496 | mlog_errno(err); | 496 | mlog_errno(err); |
497 | goto bail; | 497 | goto bail; |
498 | } | 498 | } |
499 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | 499 | down_read(&OCFS2_I(inode)->ip_alloc_sem); |
500 | } | 500 | } |
501 | 501 | ||
502 | err = ocfs2_extent_map_get_blocks(inode, block, 1, &p_blkno, | 502 | err = ocfs2_extent_map_get_blocks(inode, block, 1, &p_blkno, |
503 | NULL); | 503 | NULL); |
504 | 504 | ||
505 | if (!INODE_JOURNAL(inode)) { | 505 | if (!INODE_JOURNAL(inode)) { |
506 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | 506 | up_read(&OCFS2_I(inode)->ip_alloc_sem); |
507 | ocfs2_meta_unlock(inode, 0); | 507 | ocfs2_meta_unlock(inode, 0); |
508 | } | 508 | } |
509 | 509 | ||
510 | if (err) { | 510 | if (err) { |
511 | mlog(ML_ERROR, "get_blocks() failed, block = %llu\n", | 511 | mlog(ML_ERROR, "get_blocks() failed, block = %llu\n", |
512 | (unsigned long long)block); | 512 | (unsigned long long)block); |
513 | mlog_errno(err); | 513 | mlog_errno(err); |
514 | goto bail; | 514 | goto bail; |
515 | } | 515 | } |
516 | 516 | ||
517 | 517 | ||
518 | bail: | 518 | bail: |
519 | status = err ? 0 : p_blkno; | 519 | status = err ? 0 : p_blkno; |
520 | 520 | ||
521 | mlog_exit((int)status); | 521 | mlog_exit((int)status); |
522 | 522 | ||
523 | return status; | 523 | return status; |
524 | } | 524 | } |
525 | 525 | ||
526 | /* | 526 | /* |
527 | * TODO: Make this into a generic get_blocks function. | 527 | * TODO: Make this into a generic get_blocks function. |
528 | * | 528 | * |
529 | * From do_direct_io in direct-io.c: | 529 | * From do_direct_io in direct-io.c: |
530 | * "So what we do is to permit the ->get_blocks function to populate | 530 | * "So what we do is to permit the ->get_blocks function to populate |
531 | * bh.b_size with the size of IO which is permitted at this offset and | 531 | * bh.b_size with the size of IO which is permitted at this offset and |
532 | * this i_blkbits." | 532 | * this i_blkbits." |
533 | * | 533 | * |
534 | * This function is called directly from get_more_blocks in direct-io.c. | 534 | * This function is called directly from get_more_blocks in direct-io.c. |
535 | * | 535 | * |
536 | * called like this: dio->get_blocks(dio->inode, fs_startblk, | 536 | * called like this: dio->get_blocks(dio->inode, fs_startblk, |
537 | * fs_count, map_bh, dio->rw == WRITE); | 537 | * fs_count, map_bh, dio->rw == WRITE); |
538 | */ | 538 | */ |
539 | static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | 539 | static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, |
540 | struct buffer_head *bh_result, int create) | 540 | struct buffer_head *bh_result, int create) |
541 | { | 541 | { |
542 | int ret; | 542 | int ret; |
543 | u64 vbo_max; /* file offset, max_blocks from iblock */ | 543 | u64 vbo_max; /* file offset, max_blocks from iblock */ |
544 | u64 p_blkno; | 544 | u64 p_blkno; |
545 | int contig_blocks; | 545 | int contig_blocks; |
546 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; | 546 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; |
547 | unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; | 547 | unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; |
548 | 548 | ||
549 | /* This function won't even be called if the request isn't all | 549 | /* This function won't even be called if the request isn't all |
550 | * nicely aligned and of the right size, so there's no need | 550 | * nicely aligned and of the right size, so there's no need |
551 | * for us to check any of that. */ | 551 | * for us to check any of that. */ |
552 | 552 | ||
553 | vbo_max = ((u64)iblock + max_blocks) << blocksize_bits; | 553 | vbo_max = ((u64)iblock + max_blocks) << blocksize_bits; |
554 | 554 | ||
555 | spin_lock(&OCFS2_I(inode)->ip_lock); | 555 | spin_lock(&OCFS2_I(inode)->ip_lock); |
556 | if ((iblock + max_blocks) > | 556 | if ((iblock + max_blocks) > |
557 | ocfs2_clusters_to_blocks(inode->i_sb, | 557 | ocfs2_clusters_to_blocks(inode->i_sb, |
558 | OCFS2_I(inode)->ip_clusters)) { | 558 | OCFS2_I(inode)->ip_clusters)) { |
559 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 559 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
560 | ret = -EIO; | 560 | ret = -EIO; |
561 | goto bail; | 561 | goto bail; |
562 | } | 562 | } |
563 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 563 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
564 | 564 | ||
565 | /* This figures out the size of the next contiguous block, and | 565 | /* This figures out the size of the next contiguous block, and |
566 | * our logical offset */ | 566 | * our logical offset */ |
567 | ret = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno, | 567 | ret = ocfs2_extent_map_get_blocks(inode, iblock, 1, &p_blkno, |
568 | &contig_blocks); | 568 | &contig_blocks); |
569 | if (ret) { | 569 | if (ret) { |
570 | mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n", | 570 | mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n", |
571 | (unsigned long long)iblock); | 571 | (unsigned long long)iblock); |
572 | ret = -EIO; | 572 | ret = -EIO; |
573 | goto bail; | 573 | goto bail; |
574 | } | 574 | } |
575 | 575 | ||
576 | map_bh(bh_result, inode->i_sb, p_blkno); | 576 | map_bh(bh_result, inode->i_sb, p_blkno); |
577 | 577 | ||
578 | /* make sure we don't map more than max_blocks blocks here as | 578 | /* make sure we don't map more than max_blocks blocks here as |
579 | that's all the kernel will handle at this point. */ | 579 | that's all the kernel will handle at this point. */ |
580 | if (max_blocks < contig_blocks) | 580 | if (max_blocks < contig_blocks) |
581 | contig_blocks = max_blocks; | 581 | contig_blocks = max_blocks; |
582 | bh_result->b_size = contig_blocks << blocksize_bits; | 582 | bh_result->b_size = contig_blocks << blocksize_bits; |
583 | bail: | 583 | bail: |
584 | return ret; | 584 | return ret; |
585 | } | 585 | } |
586 | 586 | ||
587 | /* | 587 | /* |
588 | * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're | 588 | * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're |
589 | * particularly interested in the aio/dio case. Like the core uses | 589 | * particularly interested in the aio/dio case. Like the core uses |
590 | * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from | 590 | * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from |
591 | * truncation on another. | 591 | * truncation on another. |
592 | */ | 592 | */ |
593 | static void ocfs2_dio_end_io(struct kiocb *iocb, | 593 | static void ocfs2_dio_end_io(struct kiocb *iocb, |
594 | loff_t offset, | 594 | loff_t offset, |
595 | ssize_t bytes, | 595 | ssize_t bytes, |
596 | void *private) | 596 | void *private) |
597 | { | 597 | { |
598 | struct inode *inode = iocb->ki_filp->f_dentry->d_inode; | 598 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; |
599 | 599 | ||
600 | /* this io's submitter should not have unlocked this before we could */ | 600 | /* this io's submitter should not have unlocked this before we could */ |
601 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); | 601 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); |
602 | ocfs2_iocb_clear_rw_locked(iocb); | 602 | ocfs2_iocb_clear_rw_locked(iocb); |
603 | up_read(&inode->i_alloc_sem); | 603 | up_read(&inode->i_alloc_sem); |
604 | ocfs2_rw_unlock(inode, 0); | 604 | ocfs2_rw_unlock(inode, 0); |
605 | } | 605 | } |
606 | 606 | ||
607 | static ssize_t ocfs2_direct_IO(int rw, | 607 | static ssize_t ocfs2_direct_IO(int rw, |
608 | struct kiocb *iocb, | 608 | struct kiocb *iocb, |
609 | const struct iovec *iov, | 609 | const struct iovec *iov, |
610 | loff_t offset, | 610 | loff_t offset, |
611 | unsigned long nr_segs) | 611 | unsigned long nr_segs) |
612 | { | 612 | { |
613 | struct file *file = iocb->ki_filp; | 613 | struct file *file = iocb->ki_filp; |
614 | struct inode *inode = file->f_dentry->d_inode->i_mapping->host; | 614 | struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; |
615 | int ret; | 615 | int ret; |
616 | 616 | ||
617 | mlog_entry_void(); | 617 | mlog_entry_void(); |
618 | 618 | ||
619 | /* | 619 | /* |
620 | * We get PR data locks even for O_DIRECT. This allows | 620 | * We get PR data locks even for O_DIRECT. This allows |
621 | * concurrent O_DIRECT I/O but doesn't let O_DIRECT with | 621 | * concurrent O_DIRECT I/O but doesn't let O_DIRECT with |
622 | * extending and buffered zeroing writes race. If they did | 622 | * extending and buffered zeroing writes race. If they did |
623 | * race then the buffered zeroing could be written back after | 623 | * race then the buffered zeroing could be written back after |
624 | * the O_DIRECT I/O. It's one thing to tell people not to mix | 624 | * the O_DIRECT I/O. It's one thing to tell people not to mix |
625 | * buffered and O_DIRECT writes, but expecting them to | 625 | * buffered and O_DIRECT writes, but expecting them to |
626 | * understand that file extension is also an implicit buffered | 626 | * understand that file extension is also an implicit buffered |
627 | * write is too much. By getting the PR we force writeback of | 627 | * write is too much. By getting the PR we force writeback of |
628 | * the buffered zeroing before proceeding. | 628 | * the buffered zeroing before proceeding. |
629 | */ | 629 | */ |
630 | ret = ocfs2_data_lock(inode, 0); | 630 | ret = ocfs2_data_lock(inode, 0); |
631 | if (ret < 0) { | 631 | if (ret < 0) { |
632 | mlog_errno(ret); | 632 | mlog_errno(ret); |
633 | goto out; | 633 | goto out; |
634 | } | 634 | } |
635 | ocfs2_data_unlock(inode, 0); | 635 | ocfs2_data_unlock(inode, 0); |
636 | 636 | ||
637 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 637 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, |
638 | inode->i_sb->s_bdev, iov, offset, | 638 | inode->i_sb->s_bdev, iov, offset, |
639 | nr_segs, | 639 | nr_segs, |
640 | ocfs2_direct_IO_get_blocks, | 640 | ocfs2_direct_IO_get_blocks, |
641 | ocfs2_dio_end_io); | 641 | ocfs2_dio_end_io); |
642 | out: | 642 | out: |
643 | mlog_exit(ret); | 643 | mlog_exit(ret); |
644 | return ret; | 644 | return ret; |
645 | } | 645 | } |
646 | 646 | ||
647 | const struct address_space_operations ocfs2_aops = { | 647 | const struct address_space_operations ocfs2_aops = { |
648 | .readpage = ocfs2_readpage, | 648 | .readpage = ocfs2_readpage, |
649 | .writepage = ocfs2_writepage, | 649 | .writepage = ocfs2_writepage, |
650 | .prepare_write = ocfs2_prepare_write, | 650 | .prepare_write = ocfs2_prepare_write, |
651 | .commit_write = ocfs2_commit_write, | 651 | .commit_write = ocfs2_commit_write, |
652 | .bmap = ocfs2_bmap, | 652 | .bmap = ocfs2_bmap, |
653 | .sync_page = block_sync_page, | 653 | .sync_page = block_sync_page, |
654 | .direct_IO = ocfs2_direct_IO | 654 | .direct_IO = ocfs2_direct_IO |
655 | }; | 655 | }; |
656 | 656 |
fs/ocfs2/dir.c
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * dir.c | 4 | * dir.c |
5 | * | 5 | * |
6 | * Creates, reads, walks and deletes directory-nodes | 6 | * Creates, reads, walks and deletes directory-nodes |
7 | * | 7 | * |
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. |
9 | * | 9 | * |
10 | * Portions of this code from linux/fs/ext3/dir.c | 10 | * Portions of this code from linux/fs/ext3/dir.c |
11 | * | 11 | * |
12 | * Copyright (C) 1992, 1993, 1994, 1995 | 12 | * Copyright (C) 1992, 1993, 1994, 1995 |
13 | * Remy Card (card@masi.ibp.fr) | 13 | * Remy Card (card@masi.ibp.fr) |
14 | * Laboratoire MASI - Institut Blaise pascal | 14 | * Laboratoire MASI - Institut Blaise pascal |
15 | * Universite Pierre et Marie Curie (Paris VI) | 15 | * Universite Pierre et Marie Curie (Paris VI) |
16 | * | 16 | * |
17 | * from | 17 | * from |
18 | * | 18 | * |
19 | * linux/fs/minix/dir.c | 19 | * linux/fs/minix/dir.c |
20 | * | 20 | * |
21 | * Copyright (C) 1991, 1992 Linux Torvalds | 21 | * Copyright (C) 1991, 1992 Linux Torvalds |
22 | * | 22 | * |
23 | * This program is free software; you can redistribute it and/or | 23 | * This program is free software; you can redistribute it and/or |
24 | * modify it under the terms of the GNU General Public | 24 | * modify it under the terms of the GNU General Public |
25 | * License as published by the Free Software Foundation; either | 25 | * License as published by the Free Software Foundation; either |
26 | * version 2 of the License, or (at your option) any later version. | 26 | * version 2 of the License, or (at your option) any later version. |
27 | * | 27 | * |
28 | * This program is distributed in the hope that it will be useful, | 28 | * This program is distributed in the hope that it will be useful, |
29 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 29 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
30 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 30 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
31 | * General Public License for more details. | 31 | * General Public License for more details. |
32 | * | 32 | * |
33 | * You should have received a copy of the GNU General Public | 33 | * You should have received a copy of the GNU General Public |
34 | * License along with this program; if not, write to the | 34 | * License along with this program; if not, write to the |
35 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 35 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
36 | * Boston, MA 021110-1307, USA. | 36 | * Boston, MA 021110-1307, USA. |
37 | */ | 37 | */ |
38 | 38 | ||
39 | #include <linux/fs.h> | 39 | #include <linux/fs.h> |
40 | #include <linux/types.h> | 40 | #include <linux/types.h> |
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/highmem.h> | 42 | #include <linux/highmem.h> |
43 | 43 | ||
44 | #define MLOG_MASK_PREFIX ML_NAMEI | 44 | #define MLOG_MASK_PREFIX ML_NAMEI |
45 | #include <cluster/masklog.h> | 45 | #include <cluster/masklog.h> |
46 | 46 | ||
47 | #include "ocfs2.h" | 47 | #include "ocfs2.h" |
48 | 48 | ||
49 | #include "alloc.h" | 49 | #include "alloc.h" |
50 | #include "dir.h" | 50 | #include "dir.h" |
51 | #include "dlmglue.h" | 51 | #include "dlmglue.h" |
52 | #include "extent_map.h" | 52 | #include "extent_map.h" |
53 | #include "file.h" | 53 | #include "file.h" |
54 | #include "inode.h" | 54 | #include "inode.h" |
55 | #include "journal.h" | 55 | #include "journal.h" |
56 | #include "namei.h" | 56 | #include "namei.h" |
57 | #include "suballoc.h" | 57 | #include "suballoc.h" |
58 | #include "uptodate.h" | 58 | #include "uptodate.h" |
59 | 59 | ||
60 | #include "buffer_head_io.h" | 60 | #include "buffer_head_io.h" |
61 | 61 | ||
62 | static unsigned char ocfs2_filetype_table[] = { | 62 | static unsigned char ocfs2_filetype_table[] = { |
63 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 63 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
64 | }; | 64 | }; |
65 | 65 | ||
66 | static int ocfs2_extend_dir(struct ocfs2_super *osb, | 66 | static int ocfs2_extend_dir(struct ocfs2_super *osb, |
67 | struct inode *dir, | 67 | struct inode *dir, |
68 | struct buffer_head *parent_fe_bh, | 68 | struct buffer_head *parent_fe_bh, |
69 | struct buffer_head **new_de_bh); | 69 | struct buffer_head **new_de_bh); |
70 | /* | 70 | /* |
71 | * ocfs2_readdir() | 71 | * ocfs2_readdir() |
72 | * | 72 | * |
73 | */ | 73 | */ |
74 | int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) | 74 | int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) |
75 | { | 75 | { |
76 | int error = 0; | 76 | int error = 0; |
77 | unsigned long offset, blk, last_ra_blk = 0; | 77 | unsigned long offset, blk, last_ra_blk = 0; |
78 | int i, stored; | 78 | int i, stored; |
79 | struct buffer_head * bh, * tmp; | 79 | struct buffer_head * bh, * tmp; |
80 | struct ocfs2_dir_entry * de; | 80 | struct ocfs2_dir_entry * de; |
81 | int err; | 81 | int err; |
82 | struct inode *inode = filp->f_dentry->d_inode; | 82 | struct inode *inode = filp->f_path.dentry->d_inode; |
83 | struct super_block * sb = inode->i_sb; | 83 | struct super_block * sb = inode->i_sb; |
84 | unsigned int ra_sectors = 16; | 84 | unsigned int ra_sectors = 16; |
85 | int lock_level = 0; | 85 | int lock_level = 0; |
86 | 86 | ||
87 | mlog_entry("dirino=%llu\n", | 87 | mlog_entry("dirino=%llu\n", |
88 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 88 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
89 | 89 | ||
90 | stored = 0; | 90 | stored = 0; |
91 | bh = NULL; | 91 | bh = NULL; |
92 | 92 | ||
93 | error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); | 93 | error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); |
94 | if (lock_level && error >= 0) { | 94 | if (lock_level && error >= 0) { |
95 | /* We release EX lock which used to update atime | 95 | /* We release EX lock which used to update atime |
96 | * and get PR lock again to reduce contention | 96 | * and get PR lock again to reduce contention |
97 | * on commonly accessed directories. */ | 97 | * on commonly accessed directories. */ |
98 | ocfs2_meta_unlock(inode, 1); | 98 | ocfs2_meta_unlock(inode, 1); |
99 | lock_level = 0; | 99 | lock_level = 0; |
100 | error = ocfs2_meta_lock(inode, NULL, 0); | 100 | error = ocfs2_meta_lock(inode, NULL, 0); |
101 | } | 101 | } |
102 | if (error < 0) { | 102 | if (error < 0) { |
103 | if (error != -ENOENT) | 103 | if (error != -ENOENT) |
104 | mlog_errno(error); | 104 | mlog_errno(error); |
105 | /* we haven't got any yet, so propagate the error. */ | 105 | /* we haven't got any yet, so propagate the error. */ |
106 | stored = error; | 106 | stored = error; |
107 | goto bail_nolock; | 107 | goto bail_nolock; |
108 | } | 108 | } |
109 | 109 | ||
110 | offset = filp->f_pos & (sb->s_blocksize - 1); | 110 | offset = filp->f_pos & (sb->s_blocksize - 1); |
111 | 111 | ||
112 | while (!error && !stored && filp->f_pos < i_size_read(inode)) { | 112 | while (!error && !stored && filp->f_pos < i_size_read(inode)) { |
113 | blk = (filp->f_pos) >> sb->s_blocksize_bits; | 113 | blk = (filp->f_pos) >> sb->s_blocksize_bits; |
114 | bh = ocfs2_bread(inode, blk, &err, 0); | 114 | bh = ocfs2_bread(inode, blk, &err, 0); |
115 | if (!bh) { | 115 | if (!bh) { |
116 | mlog(ML_ERROR, | 116 | mlog(ML_ERROR, |
117 | "directory #%llu contains a hole at offset %lld\n", | 117 | "directory #%llu contains a hole at offset %lld\n", |
118 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 118 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
119 | filp->f_pos); | 119 | filp->f_pos); |
120 | filp->f_pos += sb->s_blocksize - offset; | 120 | filp->f_pos += sb->s_blocksize - offset; |
121 | continue; | 121 | continue; |
122 | } | 122 | } |
123 | 123 | ||
124 | /* The idea here is to begin with 8k read-ahead and to stay | 124 | /* The idea here is to begin with 8k read-ahead and to stay |
125 | * 4k ahead of our current position. | 125 | * 4k ahead of our current position. |
126 | * | 126 | * |
127 | * TODO: Use the pagecache for this. We just need to | 127 | * TODO: Use the pagecache for this. We just need to |
128 | * make sure it's cluster-safe... */ | 128 | * make sure it's cluster-safe... */ |
129 | if (!last_ra_blk | 129 | if (!last_ra_blk |
130 | || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) { | 130 | || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) { |
131 | for (i = ra_sectors >> (sb->s_blocksize_bits - 9); | 131 | for (i = ra_sectors >> (sb->s_blocksize_bits - 9); |
132 | i > 0; i--) { | 132 | i > 0; i--) { |
133 | tmp = ocfs2_bread(inode, ++blk, &err, 1); | 133 | tmp = ocfs2_bread(inode, ++blk, &err, 1); |
134 | if (tmp) | 134 | if (tmp) |
135 | brelse(tmp); | 135 | brelse(tmp); |
136 | } | 136 | } |
137 | last_ra_blk = blk; | 137 | last_ra_blk = blk; |
138 | ra_sectors = 8; | 138 | ra_sectors = 8; |
139 | } | 139 | } |
140 | 140 | ||
141 | revalidate: | 141 | revalidate: |
142 | /* If the dir block has changed since the last call to | 142 | /* If the dir block has changed since the last call to |
143 | * readdir(2), then we might be pointing to an invalid | 143 | * readdir(2), then we might be pointing to an invalid |
144 | * dirent right now. Scan from the start of the block | 144 | * dirent right now. Scan from the start of the block |
145 | * to make sure. */ | 145 | * to make sure. */ |
146 | if (filp->f_version != inode->i_version) { | 146 | if (filp->f_version != inode->i_version) { |
147 | for (i = 0; i < sb->s_blocksize && i < offset; ) { | 147 | for (i = 0; i < sb->s_blocksize && i < offset; ) { |
148 | de = (struct ocfs2_dir_entry *) (bh->b_data + i); | 148 | de = (struct ocfs2_dir_entry *) (bh->b_data + i); |
149 | /* It's too expensive to do a full | 149 | /* It's too expensive to do a full |
150 | * dirent test each time round this | 150 | * dirent test each time round this |
151 | * loop, but we do have to test at | 151 | * loop, but we do have to test at |
152 | * least that it is non-zero. A | 152 | * least that it is non-zero. A |
153 | * failure will be detected in the | 153 | * failure will be detected in the |
154 | * dirent test below. */ | 154 | * dirent test below. */ |
155 | if (le16_to_cpu(de->rec_len) < | 155 | if (le16_to_cpu(de->rec_len) < |
156 | OCFS2_DIR_REC_LEN(1)) | 156 | OCFS2_DIR_REC_LEN(1)) |
157 | break; | 157 | break; |
158 | i += le16_to_cpu(de->rec_len); | 158 | i += le16_to_cpu(de->rec_len); |
159 | } | 159 | } |
160 | offset = i; | 160 | offset = i; |
161 | filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | 161 | filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) |
162 | | offset; | 162 | | offset; |
163 | filp->f_version = inode->i_version; | 163 | filp->f_version = inode->i_version; |
164 | } | 164 | } |
165 | 165 | ||
166 | while (!error && filp->f_pos < i_size_read(inode) | 166 | while (!error && filp->f_pos < i_size_read(inode) |
167 | && offset < sb->s_blocksize) { | 167 | && offset < sb->s_blocksize) { |
168 | de = (struct ocfs2_dir_entry *) (bh->b_data + offset); | 168 | de = (struct ocfs2_dir_entry *) (bh->b_data + offset); |
169 | if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { | 169 | if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { |
170 | /* On error, skip the f_pos to the | 170 | /* On error, skip the f_pos to the |
171 | next block. */ | 171 | next block. */ |
172 | filp->f_pos = (filp->f_pos | | 172 | filp->f_pos = (filp->f_pos | |
173 | (sb->s_blocksize - 1)) + 1; | 173 | (sb->s_blocksize - 1)) + 1; |
174 | brelse(bh); | 174 | brelse(bh); |
175 | goto bail; | 175 | goto bail; |
176 | } | 176 | } |
177 | offset += le16_to_cpu(de->rec_len); | 177 | offset += le16_to_cpu(de->rec_len); |
178 | if (le64_to_cpu(de->inode)) { | 178 | if (le64_to_cpu(de->inode)) { |
179 | /* We might block in the next section | 179 | /* We might block in the next section |
180 | * if the data destination is | 180 | * if the data destination is |
181 | * currently swapped out. So, use a | 181 | * currently swapped out. So, use a |
182 | * version stamp to detect whether or | 182 | * version stamp to detect whether or |
183 | * not the directory has been modified | 183 | * not the directory has been modified |
184 | * during the copy operation. | 184 | * during the copy operation. |
185 | */ | 185 | */ |
186 | unsigned long version = filp->f_version; | 186 | unsigned long version = filp->f_version; |
187 | unsigned char d_type = DT_UNKNOWN; | 187 | unsigned char d_type = DT_UNKNOWN; |
188 | 188 | ||
189 | if (de->file_type < OCFS2_FT_MAX) | 189 | if (de->file_type < OCFS2_FT_MAX) |
190 | d_type = ocfs2_filetype_table[de->file_type]; | 190 | d_type = ocfs2_filetype_table[de->file_type]; |
191 | error = filldir(dirent, de->name, | 191 | error = filldir(dirent, de->name, |
192 | de->name_len, | 192 | de->name_len, |
193 | filp->f_pos, | 193 | filp->f_pos, |
194 | ino_from_blkno(sb, le64_to_cpu(de->inode)), | 194 | ino_from_blkno(sb, le64_to_cpu(de->inode)), |
195 | d_type); | 195 | d_type); |
196 | if (error) | 196 | if (error) |
197 | break; | 197 | break; |
198 | if (version != filp->f_version) | 198 | if (version != filp->f_version) |
199 | goto revalidate; | 199 | goto revalidate; |
200 | stored ++; | 200 | stored ++; |
201 | } | 201 | } |
202 | filp->f_pos += le16_to_cpu(de->rec_len); | 202 | filp->f_pos += le16_to_cpu(de->rec_len); |
203 | } | 203 | } |
204 | offset = 0; | 204 | offset = 0; |
205 | brelse(bh); | 205 | brelse(bh); |
206 | } | 206 | } |
207 | 207 | ||
208 | stored = 0; | 208 | stored = 0; |
209 | bail: | 209 | bail: |
210 | ocfs2_meta_unlock(inode, lock_level); | 210 | ocfs2_meta_unlock(inode, lock_level); |
211 | 211 | ||
212 | bail_nolock: | 212 | bail_nolock: |
213 | mlog_exit(stored); | 213 | mlog_exit(stored); |
214 | 214 | ||
215 | return stored; | 215 | return stored; |
216 | } | 216 | } |
217 | 217 | ||
218 | /* | 218 | /* |
219 | * NOTE: this should always be called with parent dir i_mutex taken. | 219 | * NOTE: this should always be called with parent dir i_mutex taken. |
220 | */ | 220 | */ |
221 | int ocfs2_find_files_on_disk(const char *name, | 221 | int ocfs2_find_files_on_disk(const char *name, |
222 | int namelen, | 222 | int namelen, |
223 | u64 *blkno, | 223 | u64 *blkno, |
224 | struct inode *inode, | 224 | struct inode *inode, |
225 | struct buffer_head **dirent_bh, | 225 | struct buffer_head **dirent_bh, |
226 | struct ocfs2_dir_entry **dirent) | 226 | struct ocfs2_dir_entry **dirent) |
227 | { | 227 | { |
228 | int status = -ENOENT; | 228 | int status = -ENOENT; |
229 | 229 | ||
230 | mlog_entry("(name=%.*s, blkno=%p, inode=%p, dirent_bh=%p, dirent=%p)\n", | 230 | mlog_entry("(name=%.*s, blkno=%p, inode=%p, dirent_bh=%p, dirent=%p)\n", |
231 | namelen, name, blkno, inode, dirent_bh, dirent); | 231 | namelen, name, blkno, inode, dirent_bh, dirent); |
232 | 232 | ||
233 | *dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent); | 233 | *dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent); |
234 | if (!*dirent_bh || !*dirent) { | 234 | if (!*dirent_bh || !*dirent) { |
235 | status = -ENOENT; | 235 | status = -ENOENT; |
236 | goto leave; | 236 | goto leave; |
237 | } | 237 | } |
238 | 238 | ||
239 | *blkno = le64_to_cpu((*dirent)->inode); | 239 | *blkno = le64_to_cpu((*dirent)->inode); |
240 | 240 | ||
241 | status = 0; | 241 | status = 0; |
242 | leave: | 242 | leave: |
243 | if (status < 0) { | 243 | if (status < 0) { |
244 | *dirent = NULL; | 244 | *dirent = NULL; |
245 | if (*dirent_bh) { | 245 | if (*dirent_bh) { |
246 | brelse(*dirent_bh); | 246 | brelse(*dirent_bh); |
247 | *dirent_bh = NULL; | 247 | *dirent_bh = NULL; |
248 | } | 248 | } |
249 | } | 249 | } |
250 | 250 | ||
251 | mlog_exit(status); | 251 | mlog_exit(status); |
252 | return status; | 252 | return status; |
253 | } | 253 | } |
254 | 254 | ||
255 | /* Check for a name within a directory. | 255 | /* Check for a name within a directory. |
256 | * | 256 | * |
257 | * Return 0 if the name does not exist | 257 | * Return 0 if the name does not exist |
258 | * Return -EEXIST if the directory contains the name | 258 | * Return -EEXIST if the directory contains the name |
259 | * | 259 | * |
260 | * Callers should have i_mutex + a cluster lock on dir | 260 | * Callers should have i_mutex + a cluster lock on dir |
261 | */ | 261 | */ |
262 | int ocfs2_check_dir_for_entry(struct inode *dir, | 262 | int ocfs2_check_dir_for_entry(struct inode *dir, |
263 | const char *name, | 263 | const char *name, |
264 | int namelen) | 264 | int namelen) |
265 | { | 265 | { |
266 | int ret; | 266 | int ret; |
267 | struct buffer_head *dirent_bh = NULL; | 267 | struct buffer_head *dirent_bh = NULL; |
268 | struct ocfs2_dir_entry *dirent = NULL; | 268 | struct ocfs2_dir_entry *dirent = NULL; |
269 | 269 | ||
270 | mlog_entry("dir %llu, name '%.*s'\n", | 270 | mlog_entry("dir %llu, name '%.*s'\n", |
271 | (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); | 271 | (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); |
272 | 272 | ||
273 | ret = -EEXIST; | 273 | ret = -EEXIST; |
274 | dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent); | 274 | dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent); |
275 | if (dirent_bh) | 275 | if (dirent_bh) |
276 | goto bail; | 276 | goto bail; |
277 | 277 | ||
278 | ret = 0; | 278 | ret = 0; |
279 | bail: | 279 | bail: |
280 | if (dirent_bh) | 280 | if (dirent_bh) |
281 | brelse(dirent_bh); | 281 | brelse(dirent_bh); |
282 | 282 | ||
283 | mlog_exit(ret); | 283 | mlog_exit(ret); |
284 | return ret; | 284 | return ret; |
285 | } | 285 | } |
286 | 286 | ||
287 | /* | 287 | /* |
288 | * routine to check that the specified directory is empty (for rmdir) | 288 | * routine to check that the specified directory is empty (for rmdir) |
289 | */ | 289 | */ |
290 | int ocfs2_empty_dir(struct inode *inode) | 290 | int ocfs2_empty_dir(struct inode *inode) |
291 | { | 291 | { |
292 | unsigned long offset; | 292 | unsigned long offset; |
293 | struct buffer_head * bh; | 293 | struct buffer_head * bh; |
294 | struct ocfs2_dir_entry * de, * de1; | 294 | struct ocfs2_dir_entry * de, * de1; |
295 | struct super_block * sb; | 295 | struct super_block * sb; |
296 | int err; | 296 | int err; |
297 | 297 | ||
298 | sb = inode->i_sb; | 298 | sb = inode->i_sb; |
299 | if ((i_size_read(inode) < | 299 | if ((i_size_read(inode) < |
300 | (OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) || | 300 | (OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) || |
301 | !(bh = ocfs2_bread(inode, 0, &err, 0))) { | 301 | !(bh = ocfs2_bread(inode, 0, &err, 0))) { |
302 | mlog(ML_ERROR, "bad directory (dir #%llu) - no data block\n", | 302 | mlog(ML_ERROR, "bad directory (dir #%llu) - no data block\n", |
303 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 303 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
304 | return 1; | 304 | return 1; |
305 | } | 305 | } |
306 | 306 | ||
307 | de = (struct ocfs2_dir_entry *) bh->b_data; | 307 | de = (struct ocfs2_dir_entry *) bh->b_data; |
308 | de1 = (struct ocfs2_dir_entry *) | 308 | de1 = (struct ocfs2_dir_entry *) |
309 | ((char *)de + le16_to_cpu(de->rec_len)); | 309 | ((char *)de + le16_to_cpu(de->rec_len)); |
310 | if ((le64_to_cpu(de->inode) != OCFS2_I(inode)->ip_blkno) || | 310 | if ((le64_to_cpu(de->inode) != OCFS2_I(inode)->ip_blkno) || |
311 | !le64_to_cpu(de1->inode) || | 311 | !le64_to_cpu(de1->inode) || |
312 | strcmp(".", de->name) || | 312 | strcmp(".", de->name) || |
313 | strcmp("..", de1->name)) { | 313 | strcmp("..", de1->name)) { |
314 | mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n", | 314 | mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n", |
315 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 315 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
316 | brelse(bh); | 316 | brelse(bh); |
317 | return 1; | 317 | return 1; |
318 | } | 318 | } |
319 | offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); | 319 | offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); |
320 | de = (struct ocfs2_dir_entry *)((char *)de1 + le16_to_cpu(de1->rec_len)); | 320 | de = (struct ocfs2_dir_entry *)((char *)de1 + le16_to_cpu(de1->rec_len)); |
321 | while (offset < i_size_read(inode) ) { | 321 | while (offset < i_size_read(inode) ) { |
322 | if (!bh || (void *)de >= (void *)(bh->b_data + sb->s_blocksize)) { | 322 | if (!bh || (void *)de >= (void *)(bh->b_data + sb->s_blocksize)) { |
323 | brelse(bh); | 323 | brelse(bh); |
324 | bh = ocfs2_bread(inode, | 324 | bh = ocfs2_bread(inode, |
325 | offset >> sb->s_blocksize_bits, &err, 0); | 325 | offset >> sb->s_blocksize_bits, &err, 0); |
326 | if (!bh) { | 326 | if (!bh) { |
327 | mlog(ML_ERROR, "dir %llu has a hole at %lu\n", | 327 | mlog(ML_ERROR, "dir %llu has a hole at %lu\n", |
328 | (unsigned long long)OCFS2_I(inode)->ip_blkno, offset); | 328 | (unsigned long long)OCFS2_I(inode)->ip_blkno, offset); |
329 | offset += sb->s_blocksize; | 329 | offset += sb->s_blocksize; |
330 | continue; | 330 | continue; |
331 | } | 331 | } |
332 | de = (struct ocfs2_dir_entry *) bh->b_data; | 332 | de = (struct ocfs2_dir_entry *) bh->b_data; |
333 | } | 333 | } |
334 | if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { | 334 | if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { |
335 | brelse(bh); | 335 | brelse(bh); |
336 | return 1; | 336 | return 1; |
337 | } | 337 | } |
338 | if (le64_to_cpu(de->inode)) { | 338 | if (le64_to_cpu(de->inode)) { |
339 | brelse(bh); | 339 | brelse(bh); |
340 | return 0; | 340 | return 0; |
341 | } | 341 | } |
342 | offset += le16_to_cpu(de->rec_len); | 342 | offset += le16_to_cpu(de->rec_len); |
343 | de = (struct ocfs2_dir_entry *) | 343 | de = (struct ocfs2_dir_entry *) |
344 | ((char *)de + le16_to_cpu(de->rec_len)); | 344 | ((char *)de + le16_to_cpu(de->rec_len)); |
345 | } | 345 | } |
346 | brelse(bh); | 346 | brelse(bh); |
347 | return 1; | 347 | return 1; |
348 | } | 348 | } |
349 | 349 | ||
350 | /* returns a bh of the 1st new block in the allocation. */ | 350 | /* returns a bh of the 1st new block in the allocation. */ |
351 | int ocfs2_do_extend_dir(struct super_block *sb, | 351 | int ocfs2_do_extend_dir(struct super_block *sb, |
352 | handle_t *handle, | 352 | handle_t *handle, |
353 | struct inode *dir, | 353 | struct inode *dir, |
354 | struct buffer_head *parent_fe_bh, | 354 | struct buffer_head *parent_fe_bh, |
355 | struct ocfs2_alloc_context *data_ac, | 355 | struct ocfs2_alloc_context *data_ac, |
356 | struct ocfs2_alloc_context *meta_ac, | 356 | struct ocfs2_alloc_context *meta_ac, |
357 | struct buffer_head **new_bh) | 357 | struct buffer_head **new_bh) |
358 | { | 358 | { |
359 | int status; | 359 | int status; |
360 | int extend; | 360 | int extend; |
361 | u64 p_blkno; | 361 | u64 p_blkno; |
362 | 362 | ||
363 | spin_lock(&OCFS2_I(dir)->ip_lock); | 363 | spin_lock(&OCFS2_I(dir)->ip_lock); |
364 | extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)); | 364 | extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)); |
365 | spin_unlock(&OCFS2_I(dir)->ip_lock); | 365 | spin_unlock(&OCFS2_I(dir)->ip_lock); |
366 | 366 | ||
367 | if (extend) { | 367 | if (extend) { |
368 | status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, 1, | 368 | status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, 1, |
369 | parent_fe_bh, handle, | 369 | parent_fe_bh, handle, |
370 | data_ac, meta_ac, NULL); | 370 | data_ac, meta_ac, NULL); |
371 | BUG_ON(status == -EAGAIN); | 371 | BUG_ON(status == -EAGAIN); |
372 | if (status < 0) { | 372 | if (status < 0) { |
373 | mlog_errno(status); | 373 | mlog_errno(status); |
374 | goto bail; | 374 | goto bail; |
375 | } | 375 | } |
376 | } | 376 | } |
377 | 377 | ||
378 | status = ocfs2_extent_map_get_blocks(dir, (dir->i_blocks >> | 378 | status = ocfs2_extent_map_get_blocks(dir, (dir->i_blocks >> |
379 | (sb->s_blocksize_bits - 9)), | 379 | (sb->s_blocksize_bits - 9)), |
380 | 1, &p_blkno, NULL); | 380 | 1, &p_blkno, NULL); |
381 | if (status < 0) { | 381 | if (status < 0) { |
382 | mlog_errno(status); | 382 | mlog_errno(status); |
383 | goto bail; | 383 | goto bail; |
384 | } | 384 | } |
385 | 385 | ||
386 | *new_bh = sb_getblk(sb, p_blkno); | 386 | *new_bh = sb_getblk(sb, p_blkno); |
387 | if (!*new_bh) { | 387 | if (!*new_bh) { |
388 | status = -EIO; | 388 | status = -EIO; |
389 | mlog_errno(status); | 389 | mlog_errno(status); |
390 | goto bail; | 390 | goto bail; |
391 | } | 391 | } |
392 | status = 0; | 392 | status = 0; |
393 | bail: | 393 | bail: |
394 | mlog_exit(status); | 394 | mlog_exit(status); |
395 | return status; | 395 | return status; |
396 | } | 396 | } |
397 | 397 | ||
398 | /* assumes you already have a cluster lock on the directory. */ | 398 | /* assumes you already have a cluster lock on the directory. */ |
399 | static int ocfs2_extend_dir(struct ocfs2_super *osb, | 399 | static int ocfs2_extend_dir(struct ocfs2_super *osb, |
400 | struct inode *dir, | 400 | struct inode *dir, |
401 | struct buffer_head *parent_fe_bh, | 401 | struct buffer_head *parent_fe_bh, |
402 | struct buffer_head **new_de_bh) | 402 | struct buffer_head **new_de_bh) |
403 | { | 403 | { |
404 | int status = 0; | 404 | int status = 0; |
405 | int credits, num_free_extents; | 405 | int credits, num_free_extents; |
406 | loff_t dir_i_size; | 406 | loff_t dir_i_size; |
407 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; | 407 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; |
408 | struct ocfs2_alloc_context *data_ac = NULL; | 408 | struct ocfs2_alloc_context *data_ac = NULL; |
409 | struct ocfs2_alloc_context *meta_ac = NULL; | 409 | struct ocfs2_alloc_context *meta_ac = NULL; |
410 | handle_t *handle = NULL; | 410 | handle_t *handle = NULL; |
411 | struct buffer_head *new_bh = NULL; | 411 | struct buffer_head *new_bh = NULL; |
412 | struct ocfs2_dir_entry * de; | 412 | struct ocfs2_dir_entry * de; |
413 | struct super_block *sb = osb->sb; | 413 | struct super_block *sb = osb->sb; |
414 | 414 | ||
415 | mlog_entry_void(); | 415 | mlog_entry_void(); |
416 | 416 | ||
417 | dir_i_size = i_size_read(dir); | 417 | dir_i_size = i_size_read(dir); |
418 | mlog(0, "extending dir %llu (i_size = %lld)\n", | 418 | mlog(0, "extending dir %llu (i_size = %lld)\n", |
419 | (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); | 419 | (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); |
420 | 420 | ||
421 | /* dir->i_size is always block aligned. */ | 421 | /* dir->i_size is always block aligned. */ |
422 | spin_lock(&OCFS2_I(dir)->ip_lock); | 422 | spin_lock(&OCFS2_I(dir)->ip_lock); |
423 | if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { | 423 | if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { |
424 | spin_unlock(&OCFS2_I(dir)->ip_lock); | 424 | spin_unlock(&OCFS2_I(dir)->ip_lock); |
425 | num_free_extents = ocfs2_num_free_extents(osb, dir, fe); | 425 | num_free_extents = ocfs2_num_free_extents(osb, dir, fe); |
426 | if (num_free_extents < 0) { | 426 | if (num_free_extents < 0) { |
427 | status = num_free_extents; | 427 | status = num_free_extents; |
428 | mlog_errno(status); | 428 | mlog_errno(status); |
429 | goto bail; | 429 | goto bail; |
430 | } | 430 | } |
431 | 431 | ||
432 | if (!num_free_extents) { | 432 | if (!num_free_extents) { |
433 | status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac); | 433 | status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac); |
434 | if (status < 0) { | 434 | if (status < 0) { |
435 | if (status != -ENOSPC) | 435 | if (status != -ENOSPC) |
436 | mlog_errno(status); | 436 | mlog_errno(status); |
437 | goto bail; | 437 | goto bail; |
438 | } | 438 | } |
439 | } | 439 | } |
440 | 440 | ||
441 | status = ocfs2_reserve_clusters(osb, 1, &data_ac); | 441 | status = ocfs2_reserve_clusters(osb, 1, &data_ac); |
442 | if (status < 0) { | 442 | if (status < 0) { |
443 | if (status != -ENOSPC) | 443 | if (status != -ENOSPC) |
444 | mlog_errno(status); | 444 | mlog_errno(status); |
445 | goto bail; | 445 | goto bail; |
446 | } | 446 | } |
447 | 447 | ||
448 | credits = ocfs2_calc_extend_credits(sb, fe, 1); | 448 | credits = ocfs2_calc_extend_credits(sb, fe, 1); |
449 | } else { | 449 | } else { |
450 | spin_unlock(&OCFS2_I(dir)->ip_lock); | 450 | spin_unlock(&OCFS2_I(dir)->ip_lock); |
451 | credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; | 451 | credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; |
452 | } | 452 | } |
453 | 453 | ||
454 | handle = ocfs2_start_trans(osb, credits); | 454 | handle = ocfs2_start_trans(osb, credits); |
455 | if (IS_ERR(handle)) { | 455 | if (IS_ERR(handle)) { |
456 | status = PTR_ERR(handle); | 456 | status = PTR_ERR(handle); |
457 | handle = NULL; | 457 | handle = NULL; |
458 | mlog_errno(status); | 458 | mlog_errno(status); |
459 | goto bail; | 459 | goto bail; |
460 | } | 460 | } |
461 | 461 | ||
462 | status = ocfs2_do_extend_dir(osb->sb, handle, dir, parent_fe_bh, | 462 | status = ocfs2_do_extend_dir(osb->sb, handle, dir, parent_fe_bh, |
463 | data_ac, meta_ac, &new_bh); | 463 | data_ac, meta_ac, &new_bh); |
464 | if (status < 0) { | 464 | if (status < 0) { |
465 | mlog_errno(status); | 465 | mlog_errno(status); |
466 | goto bail; | 466 | goto bail; |
467 | } | 467 | } |
468 | 468 | ||
469 | ocfs2_set_new_buffer_uptodate(dir, new_bh); | 469 | ocfs2_set_new_buffer_uptodate(dir, new_bh); |
470 | 470 | ||
471 | status = ocfs2_journal_access(handle, dir, new_bh, | 471 | status = ocfs2_journal_access(handle, dir, new_bh, |
472 | OCFS2_JOURNAL_ACCESS_CREATE); | 472 | OCFS2_JOURNAL_ACCESS_CREATE); |
473 | if (status < 0) { | 473 | if (status < 0) { |
474 | mlog_errno(status); | 474 | mlog_errno(status); |
475 | goto bail; | 475 | goto bail; |
476 | } | 476 | } |
477 | memset(new_bh->b_data, 0, sb->s_blocksize); | 477 | memset(new_bh->b_data, 0, sb->s_blocksize); |
478 | de = (struct ocfs2_dir_entry *) new_bh->b_data; | 478 | de = (struct ocfs2_dir_entry *) new_bh->b_data; |
479 | de->inode = 0; | 479 | de->inode = 0; |
480 | de->rec_len = cpu_to_le16(sb->s_blocksize); | 480 | de->rec_len = cpu_to_le16(sb->s_blocksize); |
481 | status = ocfs2_journal_dirty(handle, new_bh); | 481 | status = ocfs2_journal_dirty(handle, new_bh); |
482 | if (status < 0) { | 482 | if (status < 0) { |
483 | mlog_errno(status); | 483 | mlog_errno(status); |
484 | goto bail; | 484 | goto bail; |
485 | } | 485 | } |
486 | 486 | ||
487 | dir_i_size += dir->i_sb->s_blocksize; | 487 | dir_i_size += dir->i_sb->s_blocksize; |
488 | i_size_write(dir, dir_i_size); | 488 | i_size_write(dir, dir_i_size); |
489 | dir->i_blocks = ocfs2_align_bytes_to_sectors(dir_i_size); | 489 | dir->i_blocks = ocfs2_align_bytes_to_sectors(dir_i_size); |
490 | status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh); | 490 | status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh); |
491 | if (status < 0) { | 491 | if (status < 0) { |
492 | mlog_errno(status); | 492 | mlog_errno(status); |
493 | goto bail; | 493 | goto bail; |
494 | } | 494 | } |
495 | 495 | ||
496 | *new_de_bh = new_bh; | 496 | *new_de_bh = new_bh; |
497 | get_bh(*new_de_bh); | 497 | get_bh(*new_de_bh); |
498 | bail: | 498 | bail: |
499 | if (handle) | 499 | if (handle) |
500 | ocfs2_commit_trans(osb, handle); | 500 | ocfs2_commit_trans(osb, handle); |
501 | 501 | ||
502 | if (data_ac) | 502 | if (data_ac) |
503 | ocfs2_free_alloc_context(data_ac); | 503 | ocfs2_free_alloc_context(data_ac); |
504 | if (meta_ac) | 504 | if (meta_ac) |
505 | ocfs2_free_alloc_context(meta_ac); | 505 | ocfs2_free_alloc_context(meta_ac); |
506 | 506 | ||
507 | if (new_bh) | 507 | if (new_bh) |
508 | brelse(new_bh); | 508 | brelse(new_bh); |
509 | 509 | ||
510 | mlog_exit(status); | 510 | mlog_exit(status); |
511 | return status; | 511 | return status; |
512 | } | 512 | } |
513 | 513 | ||
514 | /* | 514 | /* |
515 | * Search the dir for a good spot, extending it if necessary. The | 515 | * Search the dir for a good spot, extending it if necessary. The |
516 | * block containing an appropriate record is returned in ret_de_bh. | 516 | * block containing an appropriate record is returned in ret_de_bh. |
517 | */ | 517 | */ |
518 | int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | 518 | int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, |
519 | struct inode *dir, | 519 | struct inode *dir, |
520 | struct buffer_head *parent_fe_bh, | 520 | struct buffer_head *parent_fe_bh, |
521 | const char *name, | 521 | const char *name, |
522 | int namelen, | 522 | int namelen, |
523 | struct buffer_head **ret_de_bh) | 523 | struct buffer_head **ret_de_bh) |
524 | { | 524 | { |
525 | unsigned long offset; | 525 | unsigned long offset; |
526 | struct buffer_head * bh = NULL; | 526 | struct buffer_head * bh = NULL; |
527 | unsigned short rec_len; | 527 | unsigned short rec_len; |
528 | struct ocfs2_dinode *fe; | 528 | struct ocfs2_dinode *fe; |
529 | struct ocfs2_dir_entry *de; | 529 | struct ocfs2_dir_entry *de; |
530 | struct super_block *sb; | 530 | struct super_block *sb; |
531 | int status; | 531 | int status; |
532 | 532 | ||
533 | mlog_entry_void(); | 533 | mlog_entry_void(); |
534 | 534 | ||
535 | mlog(0, "getting ready to insert namelen %d into dir %llu\n", | 535 | mlog(0, "getting ready to insert namelen %d into dir %llu\n", |
536 | namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno); | 536 | namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno); |
537 | 537 | ||
538 | BUG_ON(!S_ISDIR(dir->i_mode)); | 538 | BUG_ON(!S_ISDIR(dir->i_mode)); |
539 | fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; | 539 | fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; |
540 | BUG_ON(le64_to_cpu(fe->i_size) != i_size_read(dir)); | 540 | BUG_ON(le64_to_cpu(fe->i_size) != i_size_read(dir)); |
541 | 541 | ||
542 | sb = dir->i_sb; | 542 | sb = dir->i_sb; |
543 | 543 | ||
544 | if (!namelen) { | 544 | if (!namelen) { |
545 | status = -EINVAL; | 545 | status = -EINVAL; |
546 | mlog_errno(status); | 546 | mlog_errno(status); |
547 | goto bail; | 547 | goto bail; |
548 | } | 548 | } |
549 | 549 | ||
550 | bh = ocfs2_bread(dir, 0, &status, 0); | 550 | bh = ocfs2_bread(dir, 0, &status, 0); |
551 | if (!bh) { | 551 | if (!bh) { |
552 | mlog_errno(status); | 552 | mlog_errno(status); |
553 | goto bail; | 553 | goto bail; |
554 | } | 554 | } |
555 | 555 | ||
556 | rec_len = OCFS2_DIR_REC_LEN(namelen); | 556 | rec_len = OCFS2_DIR_REC_LEN(namelen); |
557 | offset = 0; | 557 | offset = 0; |
558 | de = (struct ocfs2_dir_entry *) bh->b_data; | 558 | de = (struct ocfs2_dir_entry *) bh->b_data; |
559 | while (1) { | 559 | while (1) { |
560 | if ((char *)de >= sb->s_blocksize + bh->b_data) { | 560 | if ((char *)de >= sb->s_blocksize + bh->b_data) { |
561 | brelse(bh); | 561 | brelse(bh); |
562 | bh = NULL; | 562 | bh = NULL; |
563 | 563 | ||
564 | if (i_size_read(dir) <= offset) { | 564 | if (i_size_read(dir) <= offset) { |
565 | status = ocfs2_extend_dir(osb, | 565 | status = ocfs2_extend_dir(osb, |
566 | dir, | 566 | dir, |
567 | parent_fe_bh, | 567 | parent_fe_bh, |
568 | &bh); | 568 | &bh); |
569 | if (status < 0) { | 569 | if (status < 0) { |
570 | mlog_errno(status); | 570 | mlog_errno(status); |
571 | goto bail; | 571 | goto bail; |
572 | } | 572 | } |
573 | BUG_ON(!bh); | 573 | BUG_ON(!bh); |
574 | *ret_de_bh = bh; | 574 | *ret_de_bh = bh; |
575 | get_bh(*ret_de_bh); | 575 | get_bh(*ret_de_bh); |
576 | goto bail; | 576 | goto bail; |
577 | } | 577 | } |
578 | bh = ocfs2_bread(dir, | 578 | bh = ocfs2_bread(dir, |
579 | offset >> sb->s_blocksize_bits, | 579 | offset >> sb->s_blocksize_bits, |
580 | &status, | 580 | &status, |
581 | 0); | 581 | 0); |
582 | if (!bh) { | 582 | if (!bh) { |
583 | mlog_errno(status); | 583 | mlog_errno(status); |
584 | goto bail; | 584 | goto bail; |
585 | } | 585 | } |
586 | /* move to next block */ | 586 | /* move to next block */ |
587 | de = (struct ocfs2_dir_entry *) bh->b_data; | 587 | de = (struct ocfs2_dir_entry *) bh->b_data; |
588 | } | 588 | } |
589 | if (!ocfs2_check_dir_entry(dir, de, bh, offset)) { | 589 | if (!ocfs2_check_dir_entry(dir, de, bh, offset)) { |
590 | status = -ENOENT; | 590 | status = -ENOENT; |
591 | goto bail; | 591 | goto bail; |
592 | } | 592 | } |
593 | if (ocfs2_match(namelen, name, de)) { | 593 | if (ocfs2_match(namelen, name, de)) { |
594 | status = -EEXIST; | 594 | status = -EEXIST; |
595 | goto bail; | 595 | goto bail; |
596 | } | 596 | } |
597 | if (((le64_to_cpu(de->inode) == 0) && | 597 | if (((le64_to_cpu(de->inode) == 0) && |
598 | (le16_to_cpu(de->rec_len) >= rec_len)) || | 598 | (le16_to_cpu(de->rec_len) >= rec_len)) || |
599 | (le16_to_cpu(de->rec_len) >= | 599 | (le16_to_cpu(de->rec_len) >= |
600 | (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) { | 600 | (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) { |
601 | /* Ok, we found a spot. Return this bh and let | 601 | /* Ok, we found a spot. Return this bh and let |
602 | * the caller actually fill it in. */ | 602 | * the caller actually fill it in. */ |
603 | *ret_de_bh = bh; | 603 | *ret_de_bh = bh; |
604 | get_bh(*ret_de_bh); | 604 | get_bh(*ret_de_bh); |
605 | status = 0; | 605 | status = 0; |
606 | goto bail; | 606 | goto bail; |
607 | } | 607 | } |
608 | offset += le16_to_cpu(de->rec_len); | 608 | offset += le16_to_cpu(de->rec_len); |
609 | de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len)); | 609 | de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len)); |
610 | } | 610 | } |
611 | 611 | ||
612 | status = 0; | 612 | status = 0; |
613 | bail: | 613 | bail: |
614 | if (bh) | 614 | if (bh) |
615 | brelse(bh); | 615 | brelse(bh); |
616 | 616 | ||
617 | mlog_exit(status); | 617 | mlog_exit(status); |
618 | return status; | 618 | return status; |
619 | } | 619 | } |
620 | 620 |
fs/ocfs2/dlm/dlmfs.c
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * dlmfs.c | 4 | * dlmfs.c |
5 | * | 5 | * |
6 | * Code which implements the kernel side of a minimal userspace | 6 | * Code which implements the kernel side of a minimal userspace |
7 | * interface to our DLM. This file handles the virtual file system | 7 | * interface to our DLM. This file handles the virtual file system |
8 | * used for communication with userspace. Credit should go to ramfs, | 8 | * used for communication with userspace. Credit should go to ramfs, |
9 | * which was a template for the fs side of this module. | 9 | * which was a template for the fs side of this module. |
10 | * | 10 | * |
11 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. | 11 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. |
12 | * | 12 | * |
13 | * This program is free software; you can redistribute it and/or | 13 | * This program is free software; you can redistribute it and/or |
14 | * modify it under the terms of the GNU General Public | 14 | * modify it under the terms of the GNU General Public |
15 | * License as published by the Free Software Foundation; either | 15 | * License as published by the Free Software Foundation; either |
16 | * version 2 of the License, or (at your option) any later version. | 16 | * version 2 of the License, or (at your option) any later version. |
17 | * | 17 | * |
18 | * This program is distributed in the hope that it will be useful, | 18 | * This program is distributed in the hope that it will be useful, |
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
21 | * General Public License for more details. | 21 | * General Public License for more details. |
22 | * | 22 | * |
23 | * You should have received a copy of the GNU General Public | 23 | * You should have received a copy of the GNU General Public |
24 | * License along with this program; if not, write to the | 24 | * License along with this program; if not, write to the |
25 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 25 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
26 | * Boston, MA 021110-1307, USA. | 26 | * Boston, MA 021110-1307, USA. |
27 | */ | 27 | */ |
28 | 28 | ||
29 | /* Simple VFS hooks based on: */ | 29 | /* Simple VFS hooks based on: */ |
30 | /* | 30 | /* |
31 | * Resizable simple ram filesystem for Linux. | 31 | * Resizable simple ram filesystem for Linux. |
32 | * | 32 | * |
33 | * Copyright (C) 2000 Linus Torvalds. | 33 | * Copyright (C) 2000 Linus Torvalds. |
34 | * 2000 Transmeta Corp. | 34 | * 2000 Transmeta Corp. |
35 | */ | 35 | */ |
36 | 36 | ||
37 | #include <linux/module.h> | 37 | #include <linux/module.h> |
38 | #include <linux/fs.h> | 38 | #include <linux/fs.h> |
39 | #include <linux/pagemap.h> | 39 | #include <linux/pagemap.h> |
40 | #include <linux/types.h> | 40 | #include <linux/types.h> |
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/highmem.h> | 42 | #include <linux/highmem.h> |
43 | #include <linux/init.h> | 43 | #include <linux/init.h> |
44 | #include <linux/string.h> | 44 | #include <linux/string.h> |
45 | #include <linux/smp_lock.h> | 45 | #include <linux/smp_lock.h> |
46 | #include <linux/backing-dev.h> | 46 | #include <linux/backing-dev.h> |
47 | 47 | ||
48 | #include <asm/uaccess.h> | 48 | #include <asm/uaccess.h> |
49 | 49 | ||
50 | 50 | ||
51 | #include "cluster/nodemanager.h" | 51 | #include "cluster/nodemanager.h" |
52 | #include "cluster/heartbeat.h" | 52 | #include "cluster/heartbeat.h" |
53 | #include "cluster/tcp.h" | 53 | #include "cluster/tcp.h" |
54 | 54 | ||
55 | #include "dlmapi.h" | 55 | #include "dlmapi.h" |
56 | 56 | ||
57 | #include "userdlm.h" | 57 | #include "userdlm.h" |
58 | 58 | ||
59 | #include "dlmfsver.h" | 59 | #include "dlmfsver.h" |
60 | 60 | ||
61 | #define MLOG_MASK_PREFIX ML_DLMFS | 61 | #define MLOG_MASK_PREFIX ML_DLMFS |
62 | #include "cluster/masklog.h" | 62 | #include "cluster/masklog.h" |
63 | 63 | ||
64 | static struct super_operations dlmfs_ops; | 64 | static struct super_operations dlmfs_ops; |
65 | static struct file_operations dlmfs_file_operations; | 65 | static struct file_operations dlmfs_file_operations; |
66 | static struct inode_operations dlmfs_dir_inode_operations; | 66 | static struct inode_operations dlmfs_dir_inode_operations; |
67 | static struct inode_operations dlmfs_root_inode_operations; | 67 | static struct inode_operations dlmfs_root_inode_operations; |
68 | static struct inode_operations dlmfs_file_inode_operations; | 68 | static struct inode_operations dlmfs_file_inode_operations; |
69 | static struct kmem_cache *dlmfs_inode_cache; | 69 | static struct kmem_cache *dlmfs_inode_cache; |
70 | 70 | ||
71 | struct workqueue_struct *user_dlm_worker; | 71 | struct workqueue_struct *user_dlm_worker; |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * decodes a set of open flags into a valid lock level and a set of flags. | 74 | * decodes a set of open flags into a valid lock level and a set of flags. |
75 | * returns < 0 if we have invalid flags | 75 | * returns < 0 if we have invalid flags |
76 | * flags which mean something to us: | 76 | * flags which mean something to us: |
77 | * O_RDONLY -> PRMODE level | 77 | * O_RDONLY -> PRMODE level |
78 | * O_WRONLY -> EXMODE level | 78 | * O_WRONLY -> EXMODE level |
79 | * | 79 | * |
80 | * O_NONBLOCK -> LKM_NOQUEUE | 80 | * O_NONBLOCK -> LKM_NOQUEUE |
81 | */ | 81 | */ |
82 | static int dlmfs_decode_open_flags(int open_flags, | 82 | static int dlmfs_decode_open_flags(int open_flags, |
83 | int *level, | 83 | int *level, |
84 | int *flags) | 84 | int *flags) |
85 | { | 85 | { |
86 | if (open_flags & (O_WRONLY|O_RDWR)) | 86 | if (open_flags & (O_WRONLY|O_RDWR)) |
87 | *level = LKM_EXMODE; | 87 | *level = LKM_EXMODE; |
88 | else | 88 | else |
89 | *level = LKM_PRMODE; | 89 | *level = LKM_PRMODE; |
90 | 90 | ||
91 | *flags = 0; | 91 | *flags = 0; |
92 | if (open_flags & O_NONBLOCK) | 92 | if (open_flags & O_NONBLOCK) |
93 | *flags |= LKM_NOQUEUE; | 93 | *flags |= LKM_NOQUEUE; |
94 | 94 | ||
95 | return 0; | 95 | return 0; |
96 | } | 96 | } |
97 | 97 | ||
98 | static int dlmfs_file_open(struct inode *inode, | 98 | static int dlmfs_file_open(struct inode *inode, |
99 | struct file *file) | 99 | struct file *file) |
100 | { | 100 | { |
101 | int status, level, flags; | 101 | int status, level, flags; |
102 | struct dlmfs_filp_private *fp = NULL; | 102 | struct dlmfs_filp_private *fp = NULL; |
103 | struct dlmfs_inode_private *ip; | 103 | struct dlmfs_inode_private *ip; |
104 | 104 | ||
105 | if (S_ISDIR(inode->i_mode)) | 105 | if (S_ISDIR(inode->i_mode)) |
106 | BUG(); | 106 | BUG(); |
107 | 107 | ||
108 | mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino, | 108 | mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino, |
109 | file->f_flags); | 109 | file->f_flags); |
110 | 110 | ||
111 | status = dlmfs_decode_open_flags(file->f_flags, &level, &flags); | 111 | status = dlmfs_decode_open_flags(file->f_flags, &level, &flags); |
112 | if (status < 0) | 112 | if (status < 0) |
113 | goto bail; | 113 | goto bail; |
114 | 114 | ||
115 | /* We don't want to honor O_APPEND at read/write time as it | 115 | /* We don't want to honor O_APPEND at read/write time as it |
116 | * doesn't make sense for LVB writes. */ | 116 | * doesn't make sense for LVB writes. */ |
117 | file->f_flags &= ~O_APPEND; | 117 | file->f_flags &= ~O_APPEND; |
118 | 118 | ||
119 | fp = kmalloc(sizeof(*fp), GFP_NOFS); | 119 | fp = kmalloc(sizeof(*fp), GFP_NOFS); |
120 | if (!fp) { | 120 | if (!fp) { |
121 | status = -ENOMEM; | 121 | status = -ENOMEM; |
122 | goto bail; | 122 | goto bail; |
123 | } | 123 | } |
124 | fp->fp_lock_level = level; | 124 | fp->fp_lock_level = level; |
125 | 125 | ||
126 | ip = DLMFS_I(inode); | 126 | ip = DLMFS_I(inode); |
127 | 127 | ||
128 | status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags); | 128 | status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags); |
129 | if (status < 0) { | 129 | if (status < 0) { |
130 | /* this is a strange error to return here but I want | 130 | /* this is a strange error to return here but I want |
131 | * to be able userspace to be able to distinguish a | 131 | * to be able userspace to be able to distinguish a |
132 | * valid lock request from one that simply couldn't be | 132 | * valid lock request from one that simply couldn't be |
133 | * granted. */ | 133 | * granted. */ |
134 | if (flags & LKM_NOQUEUE && status == -EAGAIN) | 134 | if (flags & LKM_NOQUEUE && status == -EAGAIN) |
135 | status = -ETXTBSY; | 135 | status = -ETXTBSY; |
136 | kfree(fp); | 136 | kfree(fp); |
137 | goto bail; | 137 | goto bail; |
138 | } | 138 | } |
139 | 139 | ||
140 | file->private_data = fp; | 140 | file->private_data = fp; |
141 | bail: | 141 | bail: |
142 | return status; | 142 | return status; |
143 | } | 143 | } |
144 | 144 | ||
145 | static int dlmfs_file_release(struct inode *inode, | 145 | static int dlmfs_file_release(struct inode *inode, |
146 | struct file *file) | 146 | struct file *file) |
147 | { | 147 | { |
148 | int level, status; | 148 | int level, status; |
149 | struct dlmfs_inode_private *ip = DLMFS_I(inode); | 149 | struct dlmfs_inode_private *ip = DLMFS_I(inode); |
150 | struct dlmfs_filp_private *fp = | 150 | struct dlmfs_filp_private *fp = |
151 | (struct dlmfs_filp_private *) file->private_data; | 151 | (struct dlmfs_filp_private *) file->private_data; |
152 | 152 | ||
153 | if (S_ISDIR(inode->i_mode)) | 153 | if (S_ISDIR(inode->i_mode)) |
154 | BUG(); | 154 | BUG(); |
155 | 155 | ||
156 | mlog(0, "close called on inode %lu\n", inode->i_ino); | 156 | mlog(0, "close called on inode %lu\n", inode->i_ino); |
157 | 157 | ||
158 | status = 0; | 158 | status = 0; |
159 | if (fp) { | 159 | if (fp) { |
160 | level = fp->fp_lock_level; | 160 | level = fp->fp_lock_level; |
161 | if (level != LKM_IVMODE) | 161 | if (level != LKM_IVMODE) |
162 | user_dlm_cluster_unlock(&ip->ip_lockres, level); | 162 | user_dlm_cluster_unlock(&ip->ip_lockres, level); |
163 | 163 | ||
164 | kfree(fp); | 164 | kfree(fp); |
165 | file->private_data = NULL; | 165 | file->private_data = NULL; |
166 | } | 166 | } |
167 | 167 | ||
168 | return 0; | 168 | return 0; |
169 | } | 169 | } |
170 | 170 | ||
171 | static ssize_t dlmfs_file_read(struct file *filp, | 171 | static ssize_t dlmfs_file_read(struct file *filp, |
172 | char __user *buf, | 172 | char __user *buf, |
173 | size_t count, | 173 | size_t count, |
174 | loff_t *ppos) | 174 | loff_t *ppos) |
175 | { | 175 | { |
176 | int bytes_left; | 176 | int bytes_left; |
177 | ssize_t readlen; | 177 | ssize_t readlen; |
178 | char *lvb_buf; | 178 | char *lvb_buf; |
179 | struct inode *inode = filp->f_dentry->d_inode; | 179 | struct inode *inode = filp->f_path.dentry->d_inode; |
180 | 180 | ||
181 | mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", | 181 | mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", |
182 | inode->i_ino, count, *ppos); | 182 | inode->i_ino, count, *ppos); |
183 | 183 | ||
184 | if (*ppos >= i_size_read(inode)) | 184 | if (*ppos >= i_size_read(inode)) |
185 | return 0; | 185 | return 0; |
186 | 186 | ||
187 | if (!count) | 187 | if (!count) |
188 | return 0; | 188 | return 0; |
189 | 189 | ||
190 | if (!access_ok(VERIFY_WRITE, buf, count)) | 190 | if (!access_ok(VERIFY_WRITE, buf, count)) |
191 | return -EFAULT; | 191 | return -EFAULT; |
192 | 192 | ||
193 | /* don't read past the lvb */ | 193 | /* don't read past the lvb */ |
194 | if ((count + *ppos) > i_size_read(inode)) | 194 | if ((count + *ppos) > i_size_read(inode)) |
195 | readlen = i_size_read(inode) - *ppos; | 195 | readlen = i_size_read(inode) - *ppos; |
196 | else | 196 | else |
197 | readlen = count - *ppos; | 197 | readlen = count - *ppos; |
198 | 198 | ||
199 | lvb_buf = kmalloc(readlen, GFP_NOFS); | 199 | lvb_buf = kmalloc(readlen, GFP_NOFS); |
200 | if (!lvb_buf) | 200 | if (!lvb_buf) |
201 | return -ENOMEM; | 201 | return -ENOMEM; |
202 | 202 | ||
203 | user_dlm_read_lvb(inode, lvb_buf, readlen); | 203 | user_dlm_read_lvb(inode, lvb_buf, readlen); |
204 | bytes_left = __copy_to_user(buf, lvb_buf, readlen); | 204 | bytes_left = __copy_to_user(buf, lvb_buf, readlen); |
205 | readlen -= bytes_left; | 205 | readlen -= bytes_left; |
206 | 206 | ||
207 | kfree(lvb_buf); | 207 | kfree(lvb_buf); |
208 | 208 | ||
209 | *ppos = *ppos + readlen; | 209 | *ppos = *ppos + readlen; |
210 | 210 | ||
211 | mlog(0, "read %zd bytes\n", readlen); | 211 | mlog(0, "read %zd bytes\n", readlen); |
212 | return readlen; | 212 | return readlen; |
213 | } | 213 | } |
214 | 214 | ||
215 | static ssize_t dlmfs_file_write(struct file *filp, | 215 | static ssize_t dlmfs_file_write(struct file *filp, |
216 | const char __user *buf, | 216 | const char __user *buf, |
217 | size_t count, | 217 | size_t count, |
218 | loff_t *ppos) | 218 | loff_t *ppos) |
219 | { | 219 | { |
220 | int bytes_left; | 220 | int bytes_left; |
221 | ssize_t writelen; | 221 | ssize_t writelen; |
222 | char *lvb_buf; | 222 | char *lvb_buf; |
223 | struct inode *inode = filp->f_dentry->d_inode; | 223 | struct inode *inode = filp->f_path.dentry->d_inode; |
224 | 224 | ||
225 | mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", | 225 | mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", |
226 | inode->i_ino, count, *ppos); | 226 | inode->i_ino, count, *ppos); |
227 | 227 | ||
228 | if (*ppos >= i_size_read(inode)) | 228 | if (*ppos >= i_size_read(inode)) |
229 | return -ENOSPC; | 229 | return -ENOSPC; |
230 | 230 | ||
231 | if (!count) | 231 | if (!count) |
232 | return 0; | 232 | return 0; |
233 | 233 | ||
234 | if (!access_ok(VERIFY_READ, buf, count)) | 234 | if (!access_ok(VERIFY_READ, buf, count)) |
235 | return -EFAULT; | 235 | return -EFAULT; |
236 | 236 | ||
237 | /* don't write past the lvb */ | 237 | /* don't write past the lvb */ |
238 | if ((count + *ppos) > i_size_read(inode)) | 238 | if ((count + *ppos) > i_size_read(inode)) |
239 | writelen = i_size_read(inode) - *ppos; | 239 | writelen = i_size_read(inode) - *ppos; |
240 | else | 240 | else |
241 | writelen = count - *ppos; | 241 | writelen = count - *ppos; |
242 | 242 | ||
243 | lvb_buf = kmalloc(writelen, GFP_NOFS); | 243 | lvb_buf = kmalloc(writelen, GFP_NOFS); |
244 | if (!lvb_buf) | 244 | if (!lvb_buf) |
245 | return -ENOMEM; | 245 | return -ENOMEM; |
246 | 246 | ||
247 | bytes_left = copy_from_user(lvb_buf, buf, writelen); | 247 | bytes_left = copy_from_user(lvb_buf, buf, writelen); |
248 | writelen -= bytes_left; | 248 | writelen -= bytes_left; |
249 | if (writelen) | 249 | if (writelen) |
250 | user_dlm_write_lvb(inode, lvb_buf, writelen); | 250 | user_dlm_write_lvb(inode, lvb_buf, writelen); |
251 | 251 | ||
252 | kfree(lvb_buf); | 252 | kfree(lvb_buf); |
253 | 253 | ||
254 | *ppos = *ppos + writelen; | 254 | *ppos = *ppos + writelen; |
255 | mlog(0, "wrote %zd bytes\n", writelen); | 255 | mlog(0, "wrote %zd bytes\n", writelen); |
256 | return writelen; | 256 | return writelen; |
257 | } | 257 | } |
258 | 258 | ||
259 | static void dlmfs_init_once(void *foo, | 259 | static void dlmfs_init_once(void *foo, |
260 | struct kmem_cache *cachep, | 260 | struct kmem_cache *cachep, |
261 | unsigned long flags) | 261 | unsigned long flags) |
262 | { | 262 | { |
263 | struct dlmfs_inode_private *ip = | 263 | struct dlmfs_inode_private *ip = |
264 | (struct dlmfs_inode_private *) foo; | 264 | (struct dlmfs_inode_private *) foo; |
265 | 265 | ||
266 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | 266 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == |
267 | SLAB_CTOR_CONSTRUCTOR) { | 267 | SLAB_CTOR_CONSTRUCTOR) { |
268 | ip->ip_dlm = NULL; | 268 | ip->ip_dlm = NULL; |
269 | ip->ip_parent = NULL; | 269 | ip->ip_parent = NULL; |
270 | 270 | ||
271 | inode_init_once(&ip->ip_vfs_inode); | 271 | inode_init_once(&ip->ip_vfs_inode); |
272 | } | 272 | } |
273 | } | 273 | } |
274 | 274 | ||
275 | static struct inode *dlmfs_alloc_inode(struct super_block *sb) | 275 | static struct inode *dlmfs_alloc_inode(struct super_block *sb) |
276 | { | 276 | { |
277 | struct dlmfs_inode_private *ip; | 277 | struct dlmfs_inode_private *ip; |
278 | 278 | ||
279 | ip = kmem_cache_alloc(dlmfs_inode_cache, GFP_NOFS); | 279 | ip = kmem_cache_alloc(dlmfs_inode_cache, GFP_NOFS); |
280 | if (!ip) | 280 | if (!ip) |
281 | return NULL; | 281 | return NULL; |
282 | 282 | ||
283 | return &ip->ip_vfs_inode; | 283 | return &ip->ip_vfs_inode; |
284 | } | 284 | } |
285 | 285 | ||
286 | static void dlmfs_destroy_inode(struct inode *inode) | 286 | static void dlmfs_destroy_inode(struct inode *inode) |
287 | { | 287 | { |
288 | kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); | 288 | kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); |
289 | } | 289 | } |
290 | 290 | ||
291 | static void dlmfs_clear_inode(struct inode *inode) | 291 | static void dlmfs_clear_inode(struct inode *inode) |
292 | { | 292 | { |
293 | int status; | 293 | int status; |
294 | struct dlmfs_inode_private *ip; | 294 | struct dlmfs_inode_private *ip; |
295 | 295 | ||
296 | if (!inode) | 296 | if (!inode) |
297 | return; | 297 | return; |
298 | 298 | ||
299 | mlog(0, "inode %lu\n", inode->i_ino); | 299 | mlog(0, "inode %lu\n", inode->i_ino); |
300 | 300 | ||
301 | ip = DLMFS_I(inode); | 301 | ip = DLMFS_I(inode); |
302 | 302 | ||
303 | if (S_ISREG(inode->i_mode)) { | 303 | if (S_ISREG(inode->i_mode)) { |
304 | status = user_dlm_destroy_lock(&ip->ip_lockres); | 304 | status = user_dlm_destroy_lock(&ip->ip_lockres); |
305 | if (status < 0) | 305 | if (status < 0) |
306 | mlog_errno(status); | 306 | mlog_errno(status); |
307 | iput(ip->ip_parent); | 307 | iput(ip->ip_parent); |
308 | goto clear_fields; | 308 | goto clear_fields; |
309 | } | 309 | } |
310 | 310 | ||
311 | mlog(0, "we're a directory, ip->ip_dlm = 0x%p\n", ip->ip_dlm); | 311 | mlog(0, "we're a directory, ip->ip_dlm = 0x%p\n", ip->ip_dlm); |
312 | /* we must be a directory. If required, lets unregister the | 312 | /* we must be a directory. If required, lets unregister the |
313 | * dlm context now. */ | 313 | * dlm context now. */ |
314 | if (ip->ip_dlm) | 314 | if (ip->ip_dlm) |
315 | user_dlm_unregister_context(ip->ip_dlm); | 315 | user_dlm_unregister_context(ip->ip_dlm); |
316 | clear_fields: | 316 | clear_fields: |
317 | ip->ip_parent = NULL; | 317 | ip->ip_parent = NULL; |
318 | ip->ip_dlm = NULL; | 318 | ip->ip_dlm = NULL; |
319 | } | 319 | } |
320 | 320 | ||
321 | static struct backing_dev_info dlmfs_backing_dev_info = { | 321 | static struct backing_dev_info dlmfs_backing_dev_info = { |
322 | .ra_pages = 0, /* No readahead */ | 322 | .ra_pages = 0, /* No readahead */ |
323 | .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, | 323 | .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, |
324 | }; | 324 | }; |
325 | 325 | ||
326 | static struct inode *dlmfs_get_root_inode(struct super_block *sb) | 326 | static struct inode *dlmfs_get_root_inode(struct super_block *sb) |
327 | { | 327 | { |
328 | struct inode *inode = new_inode(sb); | 328 | struct inode *inode = new_inode(sb); |
329 | int mode = S_IFDIR | 0755; | 329 | int mode = S_IFDIR | 0755; |
330 | struct dlmfs_inode_private *ip; | 330 | struct dlmfs_inode_private *ip; |
331 | 331 | ||
332 | if (inode) { | 332 | if (inode) { |
333 | ip = DLMFS_I(inode); | 333 | ip = DLMFS_I(inode); |
334 | 334 | ||
335 | inode->i_mode = mode; | 335 | inode->i_mode = mode; |
336 | inode->i_uid = current->fsuid; | 336 | inode->i_uid = current->fsuid; |
337 | inode->i_gid = current->fsgid; | 337 | inode->i_gid = current->fsgid; |
338 | inode->i_blocks = 0; | 338 | inode->i_blocks = 0; |
339 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; | 339 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; |
340 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 340 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
341 | inc_nlink(inode); | 341 | inc_nlink(inode); |
342 | 342 | ||
343 | inode->i_fop = &simple_dir_operations; | 343 | inode->i_fop = &simple_dir_operations; |
344 | inode->i_op = &dlmfs_root_inode_operations; | 344 | inode->i_op = &dlmfs_root_inode_operations; |
345 | } | 345 | } |
346 | 346 | ||
347 | return inode; | 347 | return inode; |
348 | } | 348 | } |
349 | 349 | ||
350 | static struct inode *dlmfs_get_inode(struct inode *parent, | 350 | static struct inode *dlmfs_get_inode(struct inode *parent, |
351 | struct dentry *dentry, | 351 | struct dentry *dentry, |
352 | int mode) | 352 | int mode) |
353 | { | 353 | { |
354 | struct super_block *sb = parent->i_sb; | 354 | struct super_block *sb = parent->i_sb; |
355 | struct inode * inode = new_inode(sb); | 355 | struct inode * inode = new_inode(sb); |
356 | struct dlmfs_inode_private *ip; | 356 | struct dlmfs_inode_private *ip; |
357 | 357 | ||
358 | if (!inode) | 358 | if (!inode) |
359 | return NULL; | 359 | return NULL; |
360 | 360 | ||
361 | inode->i_mode = mode; | 361 | inode->i_mode = mode; |
362 | inode->i_uid = current->fsuid; | 362 | inode->i_uid = current->fsuid; |
363 | inode->i_gid = current->fsgid; | 363 | inode->i_gid = current->fsgid; |
364 | inode->i_blocks = 0; | 364 | inode->i_blocks = 0; |
365 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; | 365 | inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; |
366 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 366 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
367 | 367 | ||
368 | ip = DLMFS_I(inode); | 368 | ip = DLMFS_I(inode); |
369 | ip->ip_dlm = DLMFS_I(parent)->ip_dlm; | 369 | ip->ip_dlm = DLMFS_I(parent)->ip_dlm; |
370 | 370 | ||
371 | switch (mode & S_IFMT) { | 371 | switch (mode & S_IFMT) { |
372 | default: | 372 | default: |
373 | /* for now we don't support anything other than | 373 | /* for now we don't support anything other than |
374 | * directories and regular files. */ | 374 | * directories and regular files. */ |
375 | BUG(); | 375 | BUG(); |
376 | break; | 376 | break; |
377 | case S_IFREG: | 377 | case S_IFREG: |
378 | inode->i_op = &dlmfs_file_inode_operations; | 378 | inode->i_op = &dlmfs_file_inode_operations; |
379 | inode->i_fop = &dlmfs_file_operations; | 379 | inode->i_fop = &dlmfs_file_operations; |
380 | 380 | ||
381 | i_size_write(inode, DLM_LVB_LEN); | 381 | i_size_write(inode, DLM_LVB_LEN); |
382 | 382 | ||
383 | user_dlm_lock_res_init(&ip->ip_lockres, dentry); | 383 | user_dlm_lock_res_init(&ip->ip_lockres, dentry); |
384 | 384 | ||
385 | /* released at clear_inode time, this insures that we | 385 | /* released at clear_inode time, this insures that we |
386 | * get to drop the dlm reference on each lock *before* | 386 | * get to drop the dlm reference on each lock *before* |
387 | * we call the unregister code for releasing parent | 387 | * we call the unregister code for releasing parent |
388 | * directories. */ | 388 | * directories. */ |
389 | ip->ip_parent = igrab(parent); | 389 | ip->ip_parent = igrab(parent); |
390 | BUG_ON(!ip->ip_parent); | 390 | BUG_ON(!ip->ip_parent); |
391 | break; | 391 | break; |
392 | case S_IFDIR: | 392 | case S_IFDIR: |
393 | inode->i_op = &dlmfs_dir_inode_operations; | 393 | inode->i_op = &dlmfs_dir_inode_operations; |
394 | inode->i_fop = &simple_dir_operations; | 394 | inode->i_fop = &simple_dir_operations; |
395 | 395 | ||
396 | /* directory inodes start off with i_nlink == | 396 | /* directory inodes start off with i_nlink == |
397 | * 2 (for "." entry) */ | 397 | * 2 (for "." entry) */ |
398 | inc_nlink(inode); | 398 | inc_nlink(inode); |
399 | break; | 399 | break; |
400 | } | 400 | } |
401 | 401 | ||
402 | if (parent->i_mode & S_ISGID) { | 402 | if (parent->i_mode & S_ISGID) { |
403 | inode->i_gid = parent->i_gid; | 403 | inode->i_gid = parent->i_gid; |
404 | if (S_ISDIR(mode)) | 404 | if (S_ISDIR(mode)) |
405 | inode->i_mode |= S_ISGID; | 405 | inode->i_mode |= S_ISGID; |
406 | } | 406 | } |
407 | 407 | ||
408 | return inode; | 408 | return inode; |
409 | } | 409 | } |
410 | 410 | ||
411 | /* | 411 | /* |
412 | * File creation. Allocate an inode, and we're done.. | 412 | * File creation. Allocate an inode, and we're done.. |
413 | */ | 413 | */ |
414 | /* SMP-safe */ | 414 | /* SMP-safe */ |
415 | static int dlmfs_mkdir(struct inode * dir, | 415 | static int dlmfs_mkdir(struct inode * dir, |
416 | struct dentry * dentry, | 416 | struct dentry * dentry, |
417 | int mode) | 417 | int mode) |
418 | { | 418 | { |
419 | int status; | 419 | int status; |
420 | struct inode *inode = NULL; | 420 | struct inode *inode = NULL; |
421 | struct qstr *domain = &dentry->d_name; | 421 | struct qstr *domain = &dentry->d_name; |
422 | struct dlmfs_inode_private *ip; | 422 | struct dlmfs_inode_private *ip; |
423 | struct dlm_ctxt *dlm; | 423 | struct dlm_ctxt *dlm; |
424 | 424 | ||
425 | mlog(0, "mkdir %.*s\n", domain->len, domain->name); | 425 | mlog(0, "mkdir %.*s\n", domain->len, domain->name); |
426 | 426 | ||
427 | /* verify that we have a proper domain */ | 427 | /* verify that we have a proper domain */ |
428 | if (domain->len >= O2NM_MAX_NAME_LEN) { | 428 | if (domain->len >= O2NM_MAX_NAME_LEN) { |
429 | status = -EINVAL; | 429 | status = -EINVAL; |
430 | mlog(ML_ERROR, "invalid domain name for directory.\n"); | 430 | mlog(ML_ERROR, "invalid domain name for directory.\n"); |
431 | goto bail; | 431 | goto bail; |
432 | } | 432 | } |
433 | 433 | ||
434 | inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR); | 434 | inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR); |
435 | if (!inode) { | 435 | if (!inode) { |
436 | status = -ENOMEM; | 436 | status = -ENOMEM; |
437 | mlog_errno(status); | 437 | mlog_errno(status); |
438 | goto bail; | 438 | goto bail; |
439 | } | 439 | } |
440 | 440 | ||
441 | ip = DLMFS_I(inode); | 441 | ip = DLMFS_I(inode); |
442 | 442 | ||
443 | dlm = user_dlm_register_context(domain); | 443 | dlm = user_dlm_register_context(domain); |
444 | if (IS_ERR(dlm)) { | 444 | if (IS_ERR(dlm)) { |
445 | status = PTR_ERR(dlm); | 445 | status = PTR_ERR(dlm); |
446 | mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", | 446 | mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", |
447 | status, domain->len, domain->name); | 447 | status, domain->len, domain->name); |
448 | goto bail; | 448 | goto bail; |
449 | } | 449 | } |
450 | ip->ip_dlm = dlm; | 450 | ip->ip_dlm = dlm; |
451 | 451 | ||
452 | inc_nlink(dir); | 452 | inc_nlink(dir); |
453 | d_instantiate(dentry, inode); | 453 | d_instantiate(dentry, inode); |
454 | dget(dentry); /* Extra count - pin the dentry in core */ | 454 | dget(dentry); /* Extra count - pin the dentry in core */ |
455 | 455 | ||
456 | status = 0; | 456 | status = 0; |
457 | bail: | 457 | bail: |
458 | if (status < 0) | 458 | if (status < 0) |
459 | iput(inode); | 459 | iput(inode); |
460 | return status; | 460 | return status; |
461 | } | 461 | } |
462 | 462 | ||
463 | static int dlmfs_create(struct inode *dir, | 463 | static int dlmfs_create(struct inode *dir, |
464 | struct dentry *dentry, | 464 | struct dentry *dentry, |
465 | int mode, | 465 | int mode, |
466 | struct nameidata *nd) | 466 | struct nameidata *nd) |
467 | { | 467 | { |
468 | int status = 0; | 468 | int status = 0; |
469 | struct inode *inode; | 469 | struct inode *inode; |
470 | struct qstr *name = &dentry->d_name; | 470 | struct qstr *name = &dentry->d_name; |
471 | 471 | ||
472 | mlog(0, "create %.*s\n", name->len, name->name); | 472 | mlog(0, "create %.*s\n", name->len, name->name); |
473 | 473 | ||
474 | /* verify name is valid and doesn't contain any dlm reserved | 474 | /* verify name is valid and doesn't contain any dlm reserved |
475 | * characters */ | 475 | * characters */ |
476 | if (name->len >= USER_DLM_LOCK_ID_MAX_LEN || | 476 | if (name->len >= USER_DLM_LOCK_ID_MAX_LEN || |
477 | name->name[0] == '$') { | 477 | name->name[0] == '$') { |
478 | status = -EINVAL; | 478 | status = -EINVAL; |
479 | mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len, | 479 | mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len, |
480 | name->name); | 480 | name->name); |
481 | goto bail; | 481 | goto bail; |
482 | } | 482 | } |
483 | 483 | ||
484 | inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG); | 484 | inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG); |
485 | if (!inode) { | 485 | if (!inode) { |
486 | status = -ENOMEM; | 486 | status = -ENOMEM; |
487 | mlog_errno(status); | 487 | mlog_errno(status); |
488 | goto bail; | 488 | goto bail; |
489 | } | 489 | } |
490 | 490 | ||
491 | d_instantiate(dentry, inode); | 491 | d_instantiate(dentry, inode); |
492 | dget(dentry); /* Extra count - pin the dentry in core */ | 492 | dget(dentry); /* Extra count - pin the dentry in core */ |
493 | bail: | 493 | bail: |
494 | return status; | 494 | return status; |
495 | } | 495 | } |
496 | 496 | ||
497 | static int dlmfs_unlink(struct inode *dir, | 497 | static int dlmfs_unlink(struct inode *dir, |
498 | struct dentry *dentry) | 498 | struct dentry *dentry) |
499 | { | 499 | { |
500 | int status; | 500 | int status; |
501 | struct inode *inode = dentry->d_inode; | 501 | struct inode *inode = dentry->d_inode; |
502 | 502 | ||
503 | mlog(0, "unlink inode %lu\n", inode->i_ino); | 503 | mlog(0, "unlink inode %lu\n", inode->i_ino); |
504 | 504 | ||
505 | /* if there are no current holders, or none that are waiting | 505 | /* if there are no current holders, or none that are waiting |
506 | * to acquire a lock, this basically destroys our lockres. */ | 506 | * to acquire a lock, this basically destroys our lockres. */ |
507 | status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres); | 507 | status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres); |
508 | if (status < 0) { | 508 | if (status < 0) { |
509 | mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n", | 509 | mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n", |
510 | dentry->d_name.len, dentry->d_name.name, status); | 510 | dentry->d_name.len, dentry->d_name.name, status); |
511 | goto bail; | 511 | goto bail; |
512 | } | 512 | } |
513 | status = simple_unlink(dir, dentry); | 513 | status = simple_unlink(dir, dentry); |
514 | bail: | 514 | bail: |
515 | return status; | 515 | return status; |
516 | } | 516 | } |
517 | 517 | ||
518 | static int dlmfs_fill_super(struct super_block * sb, | 518 | static int dlmfs_fill_super(struct super_block * sb, |
519 | void * data, | 519 | void * data, |
520 | int silent) | 520 | int silent) |
521 | { | 521 | { |
522 | struct inode * inode; | 522 | struct inode * inode; |
523 | struct dentry * root; | 523 | struct dentry * root; |
524 | 524 | ||
525 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 525 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
526 | sb->s_blocksize = PAGE_CACHE_SIZE; | 526 | sb->s_blocksize = PAGE_CACHE_SIZE; |
527 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | 527 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; |
528 | sb->s_magic = DLMFS_MAGIC; | 528 | sb->s_magic = DLMFS_MAGIC; |
529 | sb->s_op = &dlmfs_ops; | 529 | sb->s_op = &dlmfs_ops; |
530 | inode = dlmfs_get_root_inode(sb); | 530 | inode = dlmfs_get_root_inode(sb); |
531 | if (!inode) | 531 | if (!inode) |
532 | return -ENOMEM; | 532 | return -ENOMEM; |
533 | 533 | ||
534 | root = d_alloc_root(inode); | 534 | root = d_alloc_root(inode); |
535 | if (!root) { | 535 | if (!root) { |
536 | iput(inode); | 536 | iput(inode); |
537 | return -ENOMEM; | 537 | return -ENOMEM; |
538 | } | 538 | } |
539 | sb->s_root = root; | 539 | sb->s_root = root; |
540 | return 0; | 540 | return 0; |
541 | } | 541 | } |
542 | 542 | ||
543 | static struct file_operations dlmfs_file_operations = { | 543 | static struct file_operations dlmfs_file_operations = { |
544 | .open = dlmfs_file_open, | 544 | .open = dlmfs_file_open, |
545 | .release = dlmfs_file_release, | 545 | .release = dlmfs_file_release, |
546 | .read = dlmfs_file_read, | 546 | .read = dlmfs_file_read, |
547 | .write = dlmfs_file_write, | 547 | .write = dlmfs_file_write, |
548 | }; | 548 | }; |
549 | 549 | ||
550 | static struct inode_operations dlmfs_dir_inode_operations = { | 550 | static struct inode_operations dlmfs_dir_inode_operations = { |
551 | .create = dlmfs_create, | 551 | .create = dlmfs_create, |
552 | .lookup = simple_lookup, | 552 | .lookup = simple_lookup, |
553 | .unlink = dlmfs_unlink, | 553 | .unlink = dlmfs_unlink, |
554 | }; | 554 | }; |
555 | 555 | ||
556 | /* this way we can restrict mkdir to only the toplevel of the fs. */ | 556 | /* this way we can restrict mkdir to only the toplevel of the fs. */ |
557 | static struct inode_operations dlmfs_root_inode_operations = { | 557 | static struct inode_operations dlmfs_root_inode_operations = { |
558 | .lookup = simple_lookup, | 558 | .lookup = simple_lookup, |
559 | .mkdir = dlmfs_mkdir, | 559 | .mkdir = dlmfs_mkdir, |
560 | .rmdir = simple_rmdir, | 560 | .rmdir = simple_rmdir, |
561 | }; | 561 | }; |
562 | 562 | ||
563 | static struct super_operations dlmfs_ops = { | 563 | static struct super_operations dlmfs_ops = { |
564 | .statfs = simple_statfs, | 564 | .statfs = simple_statfs, |
565 | .alloc_inode = dlmfs_alloc_inode, | 565 | .alloc_inode = dlmfs_alloc_inode, |
566 | .destroy_inode = dlmfs_destroy_inode, | 566 | .destroy_inode = dlmfs_destroy_inode, |
567 | .clear_inode = dlmfs_clear_inode, | 567 | .clear_inode = dlmfs_clear_inode, |
568 | .drop_inode = generic_delete_inode, | 568 | .drop_inode = generic_delete_inode, |
569 | }; | 569 | }; |
570 | 570 | ||
571 | static struct inode_operations dlmfs_file_inode_operations = { | 571 | static struct inode_operations dlmfs_file_inode_operations = { |
572 | .getattr = simple_getattr, | 572 | .getattr = simple_getattr, |
573 | }; | 573 | }; |
574 | 574 | ||
575 | static int dlmfs_get_sb(struct file_system_type *fs_type, | 575 | static int dlmfs_get_sb(struct file_system_type *fs_type, |
576 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 576 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) |
577 | { | 577 | { |
578 | return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt); | 578 | return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt); |
579 | } | 579 | } |
580 | 580 | ||
581 | static struct file_system_type dlmfs_fs_type = { | 581 | static struct file_system_type dlmfs_fs_type = { |
582 | .owner = THIS_MODULE, | 582 | .owner = THIS_MODULE, |
583 | .name = "ocfs2_dlmfs", | 583 | .name = "ocfs2_dlmfs", |
584 | .get_sb = dlmfs_get_sb, | 584 | .get_sb = dlmfs_get_sb, |
585 | .kill_sb = kill_litter_super, | 585 | .kill_sb = kill_litter_super, |
586 | }; | 586 | }; |
587 | 587 | ||
588 | static int __init init_dlmfs_fs(void) | 588 | static int __init init_dlmfs_fs(void) |
589 | { | 589 | { |
590 | int status; | 590 | int status; |
591 | int cleanup_inode = 0, cleanup_worker = 0; | 591 | int cleanup_inode = 0, cleanup_worker = 0; |
592 | 592 | ||
593 | dlmfs_print_version(); | 593 | dlmfs_print_version(); |
594 | 594 | ||
595 | dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", | 595 | dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", |
596 | sizeof(struct dlmfs_inode_private), | 596 | sizeof(struct dlmfs_inode_private), |
597 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | 597 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| |
598 | SLAB_MEM_SPREAD), | 598 | SLAB_MEM_SPREAD), |
599 | dlmfs_init_once, NULL); | 599 | dlmfs_init_once, NULL); |
600 | if (!dlmfs_inode_cache) | 600 | if (!dlmfs_inode_cache) |
601 | return -ENOMEM; | 601 | return -ENOMEM; |
602 | cleanup_inode = 1; | 602 | cleanup_inode = 1; |
603 | 603 | ||
604 | user_dlm_worker = create_singlethread_workqueue("user_dlm"); | 604 | user_dlm_worker = create_singlethread_workqueue("user_dlm"); |
605 | if (!user_dlm_worker) { | 605 | if (!user_dlm_worker) { |
606 | status = -ENOMEM; | 606 | status = -ENOMEM; |
607 | goto bail; | 607 | goto bail; |
608 | } | 608 | } |
609 | cleanup_worker = 1; | 609 | cleanup_worker = 1; |
610 | 610 | ||
611 | status = register_filesystem(&dlmfs_fs_type); | 611 | status = register_filesystem(&dlmfs_fs_type); |
612 | bail: | 612 | bail: |
613 | if (status) { | 613 | if (status) { |
614 | if (cleanup_inode) | 614 | if (cleanup_inode) |
615 | kmem_cache_destroy(dlmfs_inode_cache); | 615 | kmem_cache_destroy(dlmfs_inode_cache); |
616 | if (cleanup_worker) | 616 | if (cleanup_worker) |
617 | destroy_workqueue(user_dlm_worker); | 617 | destroy_workqueue(user_dlm_worker); |
618 | } else | 618 | } else |
619 | printk("OCFS2 User DLM kernel interface loaded\n"); | 619 | printk("OCFS2 User DLM kernel interface loaded\n"); |
620 | return status; | 620 | return status; |
621 | } | 621 | } |
622 | 622 | ||
623 | static void __exit exit_dlmfs_fs(void) | 623 | static void __exit exit_dlmfs_fs(void) |
624 | { | 624 | { |
625 | unregister_filesystem(&dlmfs_fs_type); | 625 | unregister_filesystem(&dlmfs_fs_type); |
626 | 626 | ||
627 | flush_workqueue(user_dlm_worker); | 627 | flush_workqueue(user_dlm_worker); |
628 | destroy_workqueue(user_dlm_worker); | 628 | destroy_workqueue(user_dlm_worker); |
629 | 629 | ||
630 | kmem_cache_destroy(dlmfs_inode_cache); | 630 | kmem_cache_destroy(dlmfs_inode_cache); |
631 | } | 631 | } |
632 | 632 | ||
633 | MODULE_AUTHOR("Oracle"); | 633 | MODULE_AUTHOR("Oracle"); |
634 | MODULE_LICENSE("GPL"); | 634 | MODULE_LICENSE("GPL"); |
635 | 635 | ||
636 | module_init(init_dlmfs_fs) | 636 | module_init(init_dlmfs_fs) |
637 | module_exit(exit_dlmfs_fs) | 637 | module_exit(exit_dlmfs_fs) |
638 | 638 |
fs/ocfs2/file.c
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * file.c | 4 | * file.c |
5 | * | 5 | * |
6 | * File open, close, extend, truncate | 6 | * File open, close, extend, truncate |
7 | * | 7 | * |
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public | 11 | * modify it under the terms of the GNU General Public |
12 | * License as published by the Free Software Foundation; either | 12 | * License as published by the Free Software Foundation; either |
13 | * version 2 of the License, or (at your option) any later version. | 13 | * version 2 of the License, or (at your option) any later version. |
14 | * | 14 | * |
15 | * This program is distributed in the hope that it will be useful, | 15 | * This program is distributed in the hope that it will be useful, |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 | * General Public License for more details. | 18 | * General Public License for more details. |
19 | * | 19 | * |
20 | * You should have received a copy of the GNU General Public | 20 | * You should have received a copy of the GNU General Public |
21 | * License along with this program; if not, write to the | 21 | * License along with this program; if not, write to the |
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
23 | * Boston, MA 021110-1307, USA. | 23 | * Boston, MA 021110-1307, USA. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/capability.h> | 26 | #include <linux/capability.h> |
27 | #include <linux/fs.h> | 27 | #include <linux/fs.h> |
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/highmem.h> | 30 | #include <linux/highmem.h> |
31 | #include <linux/pagemap.h> | 31 | #include <linux/pagemap.h> |
32 | #include <linux/uio.h> | 32 | #include <linux/uio.h> |
33 | #include <linux/sched.h> | 33 | #include <linux/sched.h> |
34 | #include <linux/pipe_fs_i.h> | 34 | #include <linux/pipe_fs_i.h> |
35 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
36 | 36 | ||
37 | #define MLOG_MASK_PREFIX ML_INODE | 37 | #define MLOG_MASK_PREFIX ML_INODE |
38 | #include <cluster/masklog.h> | 38 | #include <cluster/masklog.h> |
39 | 39 | ||
40 | #include "ocfs2.h" | 40 | #include "ocfs2.h" |
41 | 41 | ||
42 | #include "alloc.h" | 42 | #include "alloc.h" |
43 | #include "aops.h" | 43 | #include "aops.h" |
44 | #include "dir.h" | 44 | #include "dir.h" |
45 | #include "dlmglue.h" | 45 | #include "dlmglue.h" |
46 | #include "extent_map.h" | 46 | #include "extent_map.h" |
47 | #include "file.h" | 47 | #include "file.h" |
48 | #include "sysfile.h" | 48 | #include "sysfile.h" |
49 | #include "inode.h" | 49 | #include "inode.h" |
50 | #include "ioctl.h" | 50 | #include "ioctl.h" |
51 | #include "journal.h" | 51 | #include "journal.h" |
52 | #include "mmap.h" | 52 | #include "mmap.h" |
53 | #include "suballoc.h" | 53 | #include "suballoc.h" |
54 | #include "super.h" | 54 | #include "super.h" |
55 | 55 | ||
56 | #include "buffer_head_io.h" | 56 | #include "buffer_head_io.h" |
57 | 57 | ||
58 | static int ocfs2_sync_inode(struct inode *inode) | 58 | static int ocfs2_sync_inode(struct inode *inode) |
59 | { | 59 | { |
60 | filemap_fdatawrite(inode->i_mapping); | 60 | filemap_fdatawrite(inode->i_mapping); |
61 | return sync_mapping_buffers(inode->i_mapping); | 61 | return sync_mapping_buffers(inode->i_mapping); |
62 | } | 62 | } |
63 | 63 | ||
64 | static int ocfs2_file_open(struct inode *inode, struct file *file) | 64 | static int ocfs2_file_open(struct inode *inode, struct file *file) |
65 | { | 65 | { |
66 | int status; | 66 | int status; |
67 | int mode = file->f_flags; | 67 | int mode = file->f_flags; |
68 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 68 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
69 | 69 | ||
70 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, | 70 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, |
71 | file->f_dentry->d_name.len, file->f_dentry->d_name.name); | 71 | file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name); |
72 | 72 | ||
73 | spin_lock(&oi->ip_lock); | 73 | spin_lock(&oi->ip_lock); |
74 | 74 | ||
75 | /* Check that the inode hasn't been wiped from disk by another | 75 | /* Check that the inode hasn't been wiped from disk by another |
76 | * node. If it hasn't then we're safe as long as we hold the | 76 | * node. If it hasn't then we're safe as long as we hold the |
77 | * spin lock until our increment of open count. */ | 77 | * spin lock until our increment of open count. */ |
78 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { | 78 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { |
79 | spin_unlock(&oi->ip_lock); | 79 | spin_unlock(&oi->ip_lock); |
80 | 80 | ||
81 | status = -ENOENT; | 81 | status = -ENOENT; |
82 | goto leave; | 82 | goto leave; |
83 | } | 83 | } |
84 | 84 | ||
85 | if (mode & O_DIRECT) | 85 | if (mode & O_DIRECT) |
86 | oi->ip_flags |= OCFS2_INODE_OPEN_DIRECT; | 86 | oi->ip_flags |= OCFS2_INODE_OPEN_DIRECT; |
87 | 87 | ||
88 | oi->ip_open_count++; | 88 | oi->ip_open_count++; |
89 | spin_unlock(&oi->ip_lock); | 89 | spin_unlock(&oi->ip_lock); |
90 | status = 0; | 90 | status = 0; |
91 | leave: | 91 | leave: |
92 | mlog_exit(status); | 92 | mlog_exit(status); |
93 | return status; | 93 | return status; |
94 | } | 94 | } |
95 | 95 | ||
96 | static int ocfs2_file_release(struct inode *inode, struct file *file) | 96 | static int ocfs2_file_release(struct inode *inode, struct file *file) |
97 | { | 97 | { |
98 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 98 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
99 | 99 | ||
100 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, | 100 | mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, |
101 | file->f_dentry->d_name.len, | 101 | file->f_path.dentry->d_name.len, |
102 | file->f_dentry->d_name.name); | 102 | file->f_path.dentry->d_name.name); |
103 | 103 | ||
104 | spin_lock(&oi->ip_lock); | 104 | spin_lock(&oi->ip_lock); |
105 | if (!--oi->ip_open_count) | 105 | if (!--oi->ip_open_count) |
106 | oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; | 106 | oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; |
107 | spin_unlock(&oi->ip_lock); | 107 | spin_unlock(&oi->ip_lock); |
108 | 108 | ||
109 | mlog_exit(0); | 109 | mlog_exit(0); |
110 | 110 | ||
111 | return 0; | 111 | return 0; |
112 | } | 112 | } |
113 | 113 | ||
114 | static int ocfs2_sync_file(struct file *file, | 114 | static int ocfs2_sync_file(struct file *file, |
115 | struct dentry *dentry, | 115 | struct dentry *dentry, |
116 | int datasync) | 116 | int datasync) |
117 | { | 117 | { |
118 | int err = 0; | 118 | int err = 0; |
119 | journal_t *journal; | 119 | journal_t *journal; |
120 | struct inode *inode = dentry->d_inode; | 120 | struct inode *inode = dentry->d_inode; |
121 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 121 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
122 | 122 | ||
123 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, | 123 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, |
124 | dentry->d_name.len, dentry->d_name.name); | 124 | dentry->d_name.len, dentry->d_name.name); |
125 | 125 | ||
126 | err = ocfs2_sync_inode(dentry->d_inode); | 126 | err = ocfs2_sync_inode(dentry->d_inode); |
127 | if (err) | 127 | if (err) |
128 | goto bail; | 128 | goto bail; |
129 | 129 | ||
130 | journal = osb->journal->j_journal; | 130 | journal = osb->journal->j_journal; |
131 | err = journal_force_commit(journal); | 131 | err = journal_force_commit(journal); |
132 | 132 | ||
133 | bail: | 133 | bail: |
134 | mlog_exit(err); | 134 | mlog_exit(err); |
135 | 135 | ||
136 | return (err < 0) ? -EIO : 0; | 136 | return (err < 0) ? -EIO : 0; |
137 | } | 137 | } |
138 | 138 | ||
139 | int ocfs2_should_update_atime(struct inode *inode, | 139 | int ocfs2_should_update_atime(struct inode *inode, |
140 | struct vfsmount *vfsmnt) | 140 | struct vfsmount *vfsmnt) |
141 | { | 141 | { |
142 | struct timespec now; | 142 | struct timespec now; |
143 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 143 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
144 | 144 | ||
145 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | 145 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) |
146 | return 0; | 146 | return 0; |
147 | 147 | ||
148 | if ((inode->i_flags & S_NOATIME) || | 148 | if ((inode->i_flags & S_NOATIME) || |
149 | ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) | 149 | ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) |
150 | return 0; | 150 | return 0; |
151 | 151 | ||
152 | if ((vfsmnt->mnt_flags & MNT_NOATIME) || | 152 | if ((vfsmnt->mnt_flags & MNT_NOATIME) || |
153 | ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) | 153 | ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) |
154 | return 0; | 154 | return 0; |
155 | 155 | ||
156 | now = CURRENT_TIME; | 156 | now = CURRENT_TIME; |
157 | if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum)) | 157 | if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum)) |
158 | return 0; | 158 | return 0; |
159 | else | 159 | else |
160 | return 1; | 160 | return 1; |
161 | } | 161 | } |
162 | 162 | ||
163 | int ocfs2_update_inode_atime(struct inode *inode, | 163 | int ocfs2_update_inode_atime(struct inode *inode, |
164 | struct buffer_head *bh) | 164 | struct buffer_head *bh) |
165 | { | 165 | { |
166 | int ret; | 166 | int ret; |
167 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 167 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
168 | handle_t *handle; | 168 | handle_t *handle; |
169 | 169 | ||
170 | mlog_entry_void(); | 170 | mlog_entry_void(); |
171 | 171 | ||
172 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 172 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
173 | if (handle == NULL) { | 173 | if (handle == NULL) { |
174 | ret = -ENOMEM; | 174 | ret = -ENOMEM; |
175 | mlog_errno(ret); | 175 | mlog_errno(ret); |
176 | goto out; | 176 | goto out; |
177 | } | 177 | } |
178 | 178 | ||
179 | inode->i_atime = CURRENT_TIME; | 179 | inode->i_atime = CURRENT_TIME; |
180 | ret = ocfs2_mark_inode_dirty(handle, inode, bh); | 180 | ret = ocfs2_mark_inode_dirty(handle, inode, bh); |
181 | if (ret < 0) | 181 | if (ret < 0) |
182 | mlog_errno(ret); | 182 | mlog_errno(ret); |
183 | 183 | ||
184 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | 184 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); |
185 | out: | 185 | out: |
186 | mlog_exit(ret); | 186 | mlog_exit(ret); |
187 | return ret; | 187 | return ret; |
188 | } | 188 | } |
189 | 189 | ||
190 | int ocfs2_set_inode_size(handle_t *handle, | 190 | int ocfs2_set_inode_size(handle_t *handle, |
191 | struct inode *inode, | 191 | struct inode *inode, |
192 | struct buffer_head *fe_bh, | 192 | struct buffer_head *fe_bh, |
193 | u64 new_i_size) | 193 | u64 new_i_size) |
194 | { | 194 | { |
195 | int status; | 195 | int status; |
196 | 196 | ||
197 | mlog_entry_void(); | 197 | mlog_entry_void(); |
198 | i_size_write(inode, new_i_size); | 198 | i_size_write(inode, new_i_size); |
199 | inode->i_blocks = ocfs2_align_bytes_to_sectors(new_i_size); | 199 | inode->i_blocks = ocfs2_align_bytes_to_sectors(new_i_size); |
200 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 200 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
201 | 201 | ||
202 | status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); | 202 | status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); |
203 | if (status < 0) { | 203 | if (status < 0) { |
204 | mlog_errno(status); | 204 | mlog_errno(status); |
205 | goto bail; | 205 | goto bail; |
206 | } | 206 | } |
207 | 207 | ||
208 | bail: | 208 | bail: |
209 | mlog_exit(status); | 209 | mlog_exit(status); |
210 | return status; | 210 | return status; |
211 | } | 211 | } |
212 | 212 | ||
213 | static int ocfs2_simple_size_update(struct inode *inode, | 213 | static int ocfs2_simple_size_update(struct inode *inode, |
214 | struct buffer_head *di_bh, | 214 | struct buffer_head *di_bh, |
215 | u64 new_i_size) | 215 | u64 new_i_size) |
216 | { | 216 | { |
217 | int ret; | 217 | int ret; |
218 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 218 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
219 | handle_t *handle = NULL; | 219 | handle_t *handle = NULL; |
220 | 220 | ||
221 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 221 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
222 | if (handle == NULL) { | 222 | if (handle == NULL) { |
223 | ret = -ENOMEM; | 223 | ret = -ENOMEM; |
224 | mlog_errno(ret); | 224 | mlog_errno(ret); |
225 | goto out; | 225 | goto out; |
226 | } | 226 | } |
227 | 227 | ||
228 | ret = ocfs2_set_inode_size(handle, inode, di_bh, | 228 | ret = ocfs2_set_inode_size(handle, inode, di_bh, |
229 | new_i_size); | 229 | new_i_size); |
230 | if (ret < 0) | 230 | if (ret < 0) |
231 | mlog_errno(ret); | 231 | mlog_errno(ret); |
232 | 232 | ||
233 | ocfs2_commit_trans(osb, handle); | 233 | ocfs2_commit_trans(osb, handle); |
234 | out: | 234 | out: |
235 | return ret; | 235 | return ret; |
236 | } | 236 | } |
237 | 237 | ||
238 | static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | 238 | static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, |
239 | struct inode *inode, | 239 | struct inode *inode, |
240 | struct buffer_head *fe_bh, | 240 | struct buffer_head *fe_bh, |
241 | u64 new_i_size) | 241 | u64 new_i_size) |
242 | { | 242 | { |
243 | int status; | 243 | int status; |
244 | handle_t *handle; | 244 | handle_t *handle; |
245 | 245 | ||
246 | mlog_entry_void(); | 246 | mlog_entry_void(); |
247 | 247 | ||
248 | /* TODO: This needs to actually orphan the inode in this | 248 | /* TODO: This needs to actually orphan the inode in this |
249 | * transaction. */ | 249 | * transaction. */ |
250 | 250 | ||
251 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 251 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
252 | if (IS_ERR(handle)) { | 252 | if (IS_ERR(handle)) { |
253 | status = PTR_ERR(handle); | 253 | status = PTR_ERR(handle); |
254 | mlog_errno(status); | 254 | mlog_errno(status); |
255 | goto out; | 255 | goto out; |
256 | } | 256 | } |
257 | 257 | ||
258 | status = ocfs2_set_inode_size(handle, inode, fe_bh, new_i_size); | 258 | status = ocfs2_set_inode_size(handle, inode, fe_bh, new_i_size); |
259 | if (status < 0) | 259 | if (status < 0) |
260 | mlog_errno(status); | 260 | mlog_errno(status); |
261 | 261 | ||
262 | ocfs2_commit_trans(osb, handle); | 262 | ocfs2_commit_trans(osb, handle); |
263 | out: | 263 | out: |
264 | mlog_exit(status); | 264 | mlog_exit(status); |
265 | return status; | 265 | return status; |
266 | } | 266 | } |
267 | 267 | ||
268 | static int ocfs2_truncate_file(struct inode *inode, | 268 | static int ocfs2_truncate_file(struct inode *inode, |
269 | struct buffer_head *di_bh, | 269 | struct buffer_head *di_bh, |
270 | u64 new_i_size) | 270 | u64 new_i_size) |
271 | { | 271 | { |
272 | int status = 0; | 272 | int status = 0; |
273 | struct ocfs2_dinode *fe = NULL; | 273 | struct ocfs2_dinode *fe = NULL; |
274 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 274 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
275 | struct ocfs2_truncate_context *tc = NULL; | 275 | struct ocfs2_truncate_context *tc = NULL; |
276 | 276 | ||
277 | mlog_entry("(inode = %llu, new_i_size = %llu\n", | 277 | mlog_entry("(inode = %llu, new_i_size = %llu\n", |
278 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 278 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
279 | (unsigned long long)new_i_size); | 279 | (unsigned long long)new_i_size); |
280 | 280 | ||
281 | truncate_inode_pages(inode->i_mapping, new_i_size); | 281 | truncate_inode_pages(inode->i_mapping, new_i_size); |
282 | 282 | ||
283 | fe = (struct ocfs2_dinode *) di_bh->b_data; | 283 | fe = (struct ocfs2_dinode *) di_bh->b_data; |
284 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 284 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
285 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); | 285 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); |
286 | status = -EIO; | 286 | status = -EIO; |
287 | goto bail; | 287 | goto bail; |
288 | } | 288 | } |
289 | 289 | ||
290 | mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), | 290 | mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), |
291 | "Inode %llu, inode i_size = %lld != di " | 291 | "Inode %llu, inode i_size = %lld != di " |
292 | "i_size = %llu, i_flags = 0x%x\n", | 292 | "i_size = %llu, i_flags = 0x%x\n", |
293 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 293 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
294 | i_size_read(inode), | 294 | i_size_read(inode), |
295 | (unsigned long long)le64_to_cpu(fe->i_size), | 295 | (unsigned long long)le64_to_cpu(fe->i_size), |
296 | le32_to_cpu(fe->i_flags)); | 296 | le32_to_cpu(fe->i_flags)); |
297 | 297 | ||
298 | if (new_i_size > le64_to_cpu(fe->i_size)) { | 298 | if (new_i_size > le64_to_cpu(fe->i_size)) { |
299 | mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n", | 299 | mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n", |
300 | (unsigned long long)le64_to_cpu(fe->i_size), | 300 | (unsigned long long)le64_to_cpu(fe->i_size), |
301 | (unsigned long long)new_i_size); | 301 | (unsigned long long)new_i_size); |
302 | status = -EINVAL; | 302 | status = -EINVAL; |
303 | mlog_errno(status); | 303 | mlog_errno(status); |
304 | goto bail; | 304 | goto bail; |
305 | } | 305 | } |
306 | 306 | ||
307 | mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n", | 307 | mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n", |
308 | (unsigned long long)le64_to_cpu(fe->i_blkno), | 308 | (unsigned long long)le64_to_cpu(fe->i_blkno), |
309 | (unsigned long long)le64_to_cpu(fe->i_size), | 309 | (unsigned long long)le64_to_cpu(fe->i_size), |
310 | (unsigned long long)new_i_size); | 310 | (unsigned long long)new_i_size); |
311 | 311 | ||
312 | /* lets handle the simple truncate cases before doing any more | 312 | /* lets handle the simple truncate cases before doing any more |
313 | * cluster locking. */ | 313 | * cluster locking. */ |
314 | if (new_i_size == le64_to_cpu(fe->i_size)) | 314 | if (new_i_size == le64_to_cpu(fe->i_size)) |
315 | goto bail; | 315 | goto bail; |
316 | 316 | ||
317 | /* This forces other nodes to sync and drop their pages. Do | 317 | /* This forces other nodes to sync and drop their pages. Do |
318 | * this even if we have a truncate without allocation change - | 318 | * this even if we have a truncate without allocation change - |
319 | * ocfs2 cluster sizes can be much greater than page size, so | 319 | * ocfs2 cluster sizes can be much greater than page size, so |
320 | * we have to truncate them anyway. */ | 320 | * we have to truncate them anyway. */ |
321 | status = ocfs2_data_lock(inode, 1); | 321 | status = ocfs2_data_lock(inode, 1); |
322 | if (status < 0) { | 322 | if (status < 0) { |
323 | mlog_errno(status); | 323 | mlog_errno(status); |
324 | goto bail; | 324 | goto bail; |
325 | } | 325 | } |
326 | ocfs2_data_unlock(inode, 1); | 326 | ocfs2_data_unlock(inode, 1); |
327 | 327 | ||
328 | if (le32_to_cpu(fe->i_clusters) == | 328 | if (le32_to_cpu(fe->i_clusters) == |
329 | ocfs2_clusters_for_bytes(osb->sb, new_i_size)) { | 329 | ocfs2_clusters_for_bytes(osb->sb, new_i_size)) { |
330 | mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n", | 330 | mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n", |
331 | fe->i_clusters); | 331 | fe->i_clusters); |
332 | /* No allocation change is required, so lets fast path | 332 | /* No allocation change is required, so lets fast path |
333 | * this truncate. */ | 333 | * this truncate. */ |
334 | status = ocfs2_simple_size_update(inode, di_bh, new_i_size); | 334 | status = ocfs2_simple_size_update(inode, di_bh, new_i_size); |
335 | if (status < 0) | 335 | if (status < 0) |
336 | mlog_errno(status); | 336 | mlog_errno(status); |
337 | goto bail; | 337 | goto bail; |
338 | } | 338 | } |
339 | 339 | ||
340 | /* alright, we're going to need to do a full blown alloc size | 340 | /* alright, we're going to need to do a full blown alloc size |
341 | * change. Orphan the inode so that recovery can complete the | 341 | * change. Orphan the inode so that recovery can complete the |
342 | * truncate if necessary. This does the task of marking | 342 | * truncate if necessary. This does the task of marking |
343 | * i_size. */ | 343 | * i_size. */ |
344 | status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); | 344 | status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); |
345 | if (status < 0) { | 345 | if (status < 0) { |
346 | mlog_errno(status); | 346 | mlog_errno(status); |
347 | goto bail; | 347 | goto bail; |
348 | } | 348 | } |
349 | 349 | ||
350 | status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); | 350 | status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); |
351 | if (status < 0) { | 351 | if (status < 0) { |
352 | mlog_errno(status); | 352 | mlog_errno(status); |
353 | goto bail; | 353 | goto bail; |
354 | } | 354 | } |
355 | 355 | ||
356 | status = ocfs2_commit_truncate(osb, inode, di_bh, tc); | 356 | status = ocfs2_commit_truncate(osb, inode, di_bh, tc); |
357 | if (status < 0) { | 357 | if (status < 0) { |
358 | mlog_errno(status); | 358 | mlog_errno(status); |
359 | goto bail; | 359 | goto bail; |
360 | } | 360 | } |
361 | 361 | ||
362 | /* TODO: orphan dir cleanup here. */ | 362 | /* TODO: orphan dir cleanup here. */ |
363 | bail: | 363 | bail: |
364 | 364 | ||
365 | mlog_exit(status); | 365 | mlog_exit(status); |
366 | return status; | 366 | return status; |
367 | } | 367 | } |
368 | 368 | ||
369 | /* | 369 | /* |
370 | * extend allocation only here. | 370 | * extend allocation only here. |
371 | * we'll update all the disk stuff, and oip->alloc_size | 371 | * we'll update all the disk stuff, and oip->alloc_size |
372 | * | 372 | * |
373 | * expect stuff to be locked, a transaction started and enough data / | 373 | * expect stuff to be locked, a transaction started and enough data / |
374 | * metadata reservations in the contexts. | 374 | * metadata reservations in the contexts. |
375 | * | 375 | * |
376 | * Will return -EAGAIN, and a reason if a restart is needed. | 376 | * Will return -EAGAIN, and a reason if a restart is needed. |
377 | * If passed in, *reason will always be set, even in error. | 377 | * If passed in, *reason will always be set, even in error. |
378 | */ | 378 | */ |
379 | int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | 379 | int ocfs2_do_extend_allocation(struct ocfs2_super *osb, |
380 | struct inode *inode, | 380 | struct inode *inode, |
381 | u32 clusters_to_add, | 381 | u32 clusters_to_add, |
382 | struct buffer_head *fe_bh, | 382 | struct buffer_head *fe_bh, |
383 | handle_t *handle, | 383 | handle_t *handle, |
384 | struct ocfs2_alloc_context *data_ac, | 384 | struct ocfs2_alloc_context *data_ac, |
385 | struct ocfs2_alloc_context *meta_ac, | 385 | struct ocfs2_alloc_context *meta_ac, |
386 | enum ocfs2_alloc_restarted *reason_ret) | 386 | enum ocfs2_alloc_restarted *reason_ret) |
387 | { | 387 | { |
388 | int status = 0; | 388 | int status = 0; |
389 | int free_extents; | 389 | int free_extents; |
390 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; | 390 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; |
391 | enum ocfs2_alloc_restarted reason = RESTART_NONE; | 391 | enum ocfs2_alloc_restarted reason = RESTART_NONE; |
392 | u32 bit_off, num_bits; | 392 | u32 bit_off, num_bits; |
393 | u64 block; | 393 | u64 block; |
394 | 394 | ||
395 | BUG_ON(!clusters_to_add); | 395 | BUG_ON(!clusters_to_add); |
396 | 396 | ||
397 | free_extents = ocfs2_num_free_extents(osb, inode, fe); | 397 | free_extents = ocfs2_num_free_extents(osb, inode, fe); |
398 | if (free_extents < 0) { | 398 | if (free_extents < 0) { |
399 | status = free_extents; | 399 | status = free_extents; |
400 | mlog_errno(status); | 400 | mlog_errno(status); |
401 | goto leave; | 401 | goto leave; |
402 | } | 402 | } |
403 | 403 | ||
404 | /* there are two cases which could cause us to EAGAIN in the | 404 | /* there are two cases which could cause us to EAGAIN in the |
405 | * we-need-more-metadata case: | 405 | * we-need-more-metadata case: |
406 | * 1) we haven't reserved *any* | 406 | * 1) we haven't reserved *any* |
407 | * 2) we are so fragmented, we've needed to add metadata too | 407 | * 2) we are so fragmented, we've needed to add metadata too |
408 | * many times. */ | 408 | * many times. */ |
409 | if (!free_extents && !meta_ac) { | 409 | if (!free_extents && !meta_ac) { |
410 | mlog(0, "we haven't reserved any metadata!\n"); | 410 | mlog(0, "we haven't reserved any metadata!\n"); |
411 | status = -EAGAIN; | 411 | status = -EAGAIN; |
412 | reason = RESTART_META; | 412 | reason = RESTART_META; |
413 | goto leave; | 413 | goto leave; |
414 | } else if ((!free_extents) | 414 | } else if ((!free_extents) |
415 | && (ocfs2_alloc_context_bits_left(meta_ac) | 415 | && (ocfs2_alloc_context_bits_left(meta_ac) |
416 | < ocfs2_extend_meta_needed(fe))) { | 416 | < ocfs2_extend_meta_needed(fe))) { |
417 | mlog(0, "filesystem is really fragmented...\n"); | 417 | mlog(0, "filesystem is really fragmented...\n"); |
418 | status = -EAGAIN; | 418 | status = -EAGAIN; |
419 | reason = RESTART_META; | 419 | reason = RESTART_META; |
420 | goto leave; | 420 | goto leave; |
421 | } | 421 | } |
422 | 422 | ||
423 | status = ocfs2_claim_clusters(osb, handle, data_ac, 1, | 423 | status = ocfs2_claim_clusters(osb, handle, data_ac, 1, |
424 | &bit_off, &num_bits); | 424 | &bit_off, &num_bits); |
425 | if (status < 0) { | 425 | if (status < 0) { |
426 | if (status != -ENOSPC) | 426 | if (status != -ENOSPC) |
427 | mlog_errno(status); | 427 | mlog_errno(status); |
428 | goto leave; | 428 | goto leave; |
429 | } | 429 | } |
430 | 430 | ||
431 | BUG_ON(num_bits > clusters_to_add); | 431 | BUG_ON(num_bits > clusters_to_add); |
432 | 432 | ||
433 | /* reserve our write early -- insert_extent may update the inode */ | 433 | /* reserve our write early -- insert_extent may update the inode */ |
434 | status = ocfs2_journal_access(handle, inode, fe_bh, | 434 | status = ocfs2_journal_access(handle, inode, fe_bh, |
435 | OCFS2_JOURNAL_ACCESS_WRITE); | 435 | OCFS2_JOURNAL_ACCESS_WRITE); |
436 | if (status < 0) { | 436 | if (status < 0) { |
437 | mlog_errno(status); | 437 | mlog_errno(status); |
438 | goto leave; | 438 | goto leave; |
439 | } | 439 | } |
440 | 440 | ||
441 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); | 441 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); |
442 | mlog(0, "Allocating %u clusters at block %u for inode %llu\n", | 442 | mlog(0, "Allocating %u clusters at block %u for inode %llu\n", |
443 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | 443 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); |
444 | status = ocfs2_insert_extent(osb, handle, inode, fe_bh, block, | 444 | status = ocfs2_insert_extent(osb, handle, inode, fe_bh, block, |
445 | num_bits, meta_ac); | 445 | num_bits, meta_ac); |
446 | if (status < 0) { | 446 | if (status < 0) { |
447 | mlog_errno(status); | 447 | mlog_errno(status); |
448 | goto leave; | 448 | goto leave; |
449 | } | 449 | } |
450 | 450 | ||
451 | le32_add_cpu(&fe->i_clusters, num_bits); | 451 | le32_add_cpu(&fe->i_clusters, num_bits); |
452 | spin_lock(&OCFS2_I(inode)->ip_lock); | 452 | spin_lock(&OCFS2_I(inode)->ip_lock); |
453 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | 453 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); |
454 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 454 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
455 | 455 | ||
456 | status = ocfs2_journal_dirty(handle, fe_bh); | 456 | status = ocfs2_journal_dirty(handle, fe_bh); |
457 | if (status < 0) { | 457 | if (status < 0) { |
458 | mlog_errno(status); | 458 | mlog_errno(status); |
459 | goto leave; | 459 | goto leave; |
460 | } | 460 | } |
461 | 461 | ||
462 | clusters_to_add -= num_bits; | 462 | clusters_to_add -= num_bits; |
463 | 463 | ||
464 | if (clusters_to_add) { | 464 | if (clusters_to_add) { |
465 | mlog(0, "need to alloc once more, clusters = %u, wanted = " | 465 | mlog(0, "need to alloc once more, clusters = %u, wanted = " |
466 | "%u\n", fe->i_clusters, clusters_to_add); | 466 | "%u\n", fe->i_clusters, clusters_to_add); |
467 | status = -EAGAIN; | 467 | status = -EAGAIN; |
468 | reason = RESTART_TRANS; | 468 | reason = RESTART_TRANS; |
469 | } | 469 | } |
470 | 470 | ||
471 | leave: | 471 | leave: |
472 | mlog_exit(status); | 472 | mlog_exit(status); |
473 | if (reason_ret) | 473 | if (reason_ret) |
474 | *reason_ret = reason; | 474 | *reason_ret = reason; |
475 | return status; | 475 | return status; |
476 | } | 476 | } |
477 | 477 | ||
478 | static int ocfs2_extend_allocation(struct inode *inode, | 478 | static int ocfs2_extend_allocation(struct inode *inode, |
479 | u32 clusters_to_add) | 479 | u32 clusters_to_add) |
480 | { | 480 | { |
481 | int status = 0; | 481 | int status = 0; |
482 | int restart_func = 0; | 482 | int restart_func = 0; |
483 | int drop_alloc_sem = 0; | 483 | int drop_alloc_sem = 0; |
484 | int credits, num_free_extents; | 484 | int credits, num_free_extents; |
485 | u32 prev_clusters; | 485 | u32 prev_clusters; |
486 | struct buffer_head *bh = NULL; | 486 | struct buffer_head *bh = NULL; |
487 | struct ocfs2_dinode *fe = NULL; | 487 | struct ocfs2_dinode *fe = NULL; |
488 | handle_t *handle = NULL; | 488 | handle_t *handle = NULL; |
489 | struct ocfs2_alloc_context *data_ac = NULL; | 489 | struct ocfs2_alloc_context *data_ac = NULL; |
490 | struct ocfs2_alloc_context *meta_ac = NULL; | 490 | struct ocfs2_alloc_context *meta_ac = NULL; |
491 | enum ocfs2_alloc_restarted why; | 491 | enum ocfs2_alloc_restarted why; |
492 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 492 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
493 | 493 | ||
494 | mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); | 494 | mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); |
495 | 495 | ||
496 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, | 496 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, |
497 | OCFS2_BH_CACHED, inode); | 497 | OCFS2_BH_CACHED, inode); |
498 | if (status < 0) { | 498 | if (status < 0) { |
499 | mlog_errno(status); | 499 | mlog_errno(status); |
500 | goto leave; | 500 | goto leave; |
501 | } | 501 | } |
502 | 502 | ||
503 | fe = (struct ocfs2_dinode *) bh->b_data; | 503 | fe = (struct ocfs2_dinode *) bh->b_data; |
504 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 504 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
505 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); | 505 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); |
506 | status = -EIO; | 506 | status = -EIO; |
507 | goto leave; | 507 | goto leave; |
508 | } | 508 | } |
509 | 509 | ||
510 | restart_all: | 510 | restart_all: |
511 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); | 511 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); |
512 | 512 | ||
513 | mlog(0, "extend inode %llu, i_size = %lld, fe->i_clusters = %u, " | 513 | mlog(0, "extend inode %llu, i_size = %lld, fe->i_clusters = %u, " |
514 | "clusters_to_add = %u\n", | 514 | "clusters_to_add = %u\n", |
515 | (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), | 515 | (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), |
516 | fe->i_clusters, clusters_to_add); | 516 | fe->i_clusters, clusters_to_add); |
517 | 517 | ||
518 | num_free_extents = ocfs2_num_free_extents(osb, | 518 | num_free_extents = ocfs2_num_free_extents(osb, |
519 | inode, | 519 | inode, |
520 | fe); | 520 | fe); |
521 | if (num_free_extents < 0) { | 521 | if (num_free_extents < 0) { |
522 | status = num_free_extents; | 522 | status = num_free_extents; |
523 | mlog_errno(status); | 523 | mlog_errno(status); |
524 | goto leave; | 524 | goto leave; |
525 | } | 525 | } |
526 | 526 | ||
527 | if (!num_free_extents) { | 527 | if (!num_free_extents) { |
528 | status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac); | 528 | status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac); |
529 | if (status < 0) { | 529 | if (status < 0) { |
530 | if (status != -ENOSPC) | 530 | if (status != -ENOSPC) |
531 | mlog_errno(status); | 531 | mlog_errno(status); |
532 | goto leave; | 532 | goto leave; |
533 | } | 533 | } |
534 | } | 534 | } |
535 | 535 | ||
536 | status = ocfs2_reserve_clusters(osb, clusters_to_add, &data_ac); | 536 | status = ocfs2_reserve_clusters(osb, clusters_to_add, &data_ac); |
537 | if (status < 0) { | 537 | if (status < 0) { |
538 | if (status != -ENOSPC) | 538 | if (status != -ENOSPC) |
539 | mlog_errno(status); | 539 | mlog_errno(status); |
540 | goto leave; | 540 | goto leave; |
541 | } | 541 | } |
542 | 542 | ||
543 | /* blocks peope in read/write from reading our allocation | 543 | /* blocks peope in read/write from reading our allocation |
544 | * until we're done changing it. We depend on i_mutex to block | 544 | * until we're done changing it. We depend on i_mutex to block |
545 | * other extend/truncate calls while we're here. Ordering wrt | 545 | * other extend/truncate calls while we're here. Ordering wrt |
546 | * start_trans is important here -- always do it before! */ | 546 | * start_trans is important here -- always do it before! */ |
547 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 547 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
548 | drop_alloc_sem = 1; | 548 | drop_alloc_sem = 1; |
549 | 549 | ||
550 | credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add); | 550 | credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add); |
551 | handle = ocfs2_start_trans(osb, credits); | 551 | handle = ocfs2_start_trans(osb, credits); |
552 | if (IS_ERR(handle)) { | 552 | if (IS_ERR(handle)) { |
553 | status = PTR_ERR(handle); | 553 | status = PTR_ERR(handle); |
554 | handle = NULL; | 554 | handle = NULL; |
555 | mlog_errno(status); | 555 | mlog_errno(status); |
556 | goto leave; | 556 | goto leave; |
557 | } | 557 | } |
558 | 558 | ||
559 | restarted_transaction: | 559 | restarted_transaction: |
560 | /* reserve a write to the file entry early on - that we if we | 560 | /* reserve a write to the file entry early on - that we if we |
561 | * run out of credits in the allocation path, we can still | 561 | * run out of credits in the allocation path, we can still |
562 | * update i_size. */ | 562 | * update i_size. */ |
563 | status = ocfs2_journal_access(handle, inode, bh, | 563 | status = ocfs2_journal_access(handle, inode, bh, |
564 | OCFS2_JOURNAL_ACCESS_WRITE); | 564 | OCFS2_JOURNAL_ACCESS_WRITE); |
565 | if (status < 0) { | 565 | if (status < 0) { |
566 | mlog_errno(status); | 566 | mlog_errno(status); |
567 | goto leave; | 567 | goto leave; |
568 | } | 568 | } |
569 | 569 | ||
570 | prev_clusters = OCFS2_I(inode)->ip_clusters; | 570 | prev_clusters = OCFS2_I(inode)->ip_clusters; |
571 | 571 | ||
572 | status = ocfs2_do_extend_allocation(osb, | 572 | status = ocfs2_do_extend_allocation(osb, |
573 | inode, | 573 | inode, |
574 | clusters_to_add, | 574 | clusters_to_add, |
575 | bh, | 575 | bh, |
576 | handle, | 576 | handle, |
577 | data_ac, | 577 | data_ac, |
578 | meta_ac, | 578 | meta_ac, |
579 | &why); | 579 | &why); |
580 | if ((status < 0) && (status != -EAGAIN)) { | 580 | if ((status < 0) && (status != -EAGAIN)) { |
581 | if (status != -ENOSPC) | 581 | if (status != -ENOSPC) |
582 | mlog_errno(status); | 582 | mlog_errno(status); |
583 | goto leave; | 583 | goto leave; |
584 | } | 584 | } |
585 | 585 | ||
586 | status = ocfs2_journal_dirty(handle, bh); | 586 | status = ocfs2_journal_dirty(handle, bh); |
587 | if (status < 0) { | 587 | if (status < 0) { |
588 | mlog_errno(status); | 588 | mlog_errno(status); |
589 | goto leave; | 589 | goto leave; |
590 | } | 590 | } |
591 | 591 | ||
592 | spin_lock(&OCFS2_I(inode)->ip_lock); | 592 | spin_lock(&OCFS2_I(inode)->ip_lock); |
593 | clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); | 593 | clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); |
594 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 594 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
595 | 595 | ||
596 | if (why != RESTART_NONE && clusters_to_add) { | 596 | if (why != RESTART_NONE && clusters_to_add) { |
597 | if (why == RESTART_META) { | 597 | if (why == RESTART_META) { |
598 | mlog(0, "restarting function.\n"); | 598 | mlog(0, "restarting function.\n"); |
599 | restart_func = 1; | 599 | restart_func = 1; |
600 | } else { | 600 | } else { |
601 | BUG_ON(why != RESTART_TRANS); | 601 | BUG_ON(why != RESTART_TRANS); |
602 | 602 | ||
603 | mlog(0, "restarting transaction.\n"); | 603 | mlog(0, "restarting transaction.\n"); |
604 | /* TODO: This can be more intelligent. */ | 604 | /* TODO: This can be more intelligent. */ |
605 | credits = ocfs2_calc_extend_credits(osb->sb, | 605 | credits = ocfs2_calc_extend_credits(osb->sb, |
606 | fe, | 606 | fe, |
607 | clusters_to_add); | 607 | clusters_to_add); |
608 | status = ocfs2_extend_trans(handle, credits); | 608 | status = ocfs2_extend_trans(handle, credits); |
609 | if (status < 0) { | 609 | if (status < 0) { |
610 | /* handle still has to be committed at | 610 | /* handle still has to be committed at |
611 | * this point. */ | 611 | * this point. */ |
612 | status = -ENOMEM; | 612 | status = -ENOMEM; |
613 | mlog_errno(status); | 613 | mlog_errno(status); |
614 | goto leave; | 614 | goto leave; |
615 | } | 615 | } |
616 | goto restarted_transaction; | 616 | goto restarted_transaction; |
617 | } | 617 | } |
618 | } | 618 | } |
619 | 619 | ||
620 | mlog(0, "fe: i_clusters = %u, i_size=%llu\n", | 620 | mlog(0, "fe: i_clusters = %u, i_size=%llu\n", |
621 | fe->i_clusters, (unsigned long long)fe->i_size); | 621 | fe->i_clusters, (unsigned long long)fe->i_size); |
622 | mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", | 622 | mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", |
623 | OCFS2_I(inode)->ip_clusters, i_size_read(inode)); | 623 | OCFS2_I(inode)->ip_clusters, i_size_read(inode)); |
624 | 624 | ||
625 | leave: | 625 | leave: |
626 | if (drop_alloc_sem) { | 626 | if (drop_alloc_sem) { |
627 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 627 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
628 | drop_alloc_sem = 0; | 628 | drop_alloc_sem = 0; |
629 | } | 629 | } |
630 | if (handle) { | 630 | if (handle) { |
631 | ocfs2_commit_trans(osb, handle); | 631 | ocfs2_commit_trans(osb, handle); |
632 | handle = NULL; | 632 | handle = NULL; |
633 | } | 633 | } |
634 | if (data_ac) { | 634 | if (data_ac) { |
635 | ocfs2_free_alloc_context(data_ac); | 635 | ocfs2_free_alloc_context(data_ac); |
636 | data_ac = NULL; | 636 | data_ac = NULL; |
637 | } | 637 | } |
638 | if (meta_ac) { | 638 | if (meta_ac) { |
639 | ocfs2_free_alloc_context(meta_ac); | 639 | ocfs2_free_alloc_context(meta_ac); |
640 | meta_ac = NULL; | 640 | meta_ac = NULL; |
641 | } | 641 | } |
642 | if ((!status) && restart_func) { | 642 | if ((!status) && restart_func) { |
643 | restart_func = 0; | 643 | restart_func = 0; |
644 | goto restart_all; | 644 | goto restart_all; |
645 | } | 645 | } |
646 | if (bh) { | 646 | if (bh) { |
647 | brelse(bh); | 647 | brelse(bh); |
648 | bh = NULL; | 648 | bh = NULL; |
649 | } | 649 | } |
650 | 650 | ||
651 | mlog_exit(status); | 651 | mlog_exit(status); |
652 | return status; | 652 | return status; |
653 | } | 653 | } |
654 | 654 | ||
655 | /* Some parts of this taken from generic_cont_expand, which turned out | 655 | /* Some parts of this taken from generic_cont_expand, which turned out |
656 | * to be too fragile to do exactly what we need without us having to | 656 | * to be too fragile to do exactly what we need without us having to |
657 | * worry about recursive locking in ->prepare_write() and | 657 | * worry about recursive locking in ->prepare_write() and |
658 | * ->commit_write(). */ | 658 | * ->commit_write(). */ |
659 | static int ocfs2_write_zero_page(struct inode *inode, | 659 | static int ocfs2_write_zero_page(struct inode *inode, |
660 | u64 size) | 660 | u64 size) |
661 | { | 661 | { |
662 | struct address_space *mapping = inode->i_mapping; | 662 | struct address_space *mapping = inode->i_mapping; |
663 | struct page *page; | 663 | struct page *page; |
664 | unsigned long index; | 664 | unsigned long index; |
665 | unsigned int offset; | 665 | unsigned int offset; |
666 | handle_t *handle = NULL; | 666 | handle_t *handle = NULL; |
667 | int ret; | 667 | int ret; |
668 | 668 | ||
669 | offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ | 669 | offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ |
670 | /* ugh. in prepare/commit_write, if from==to==start of block, we | 670 | /* ugh. in prepare/commit_write, if from==to==start of block, we |
671 | ** skip the prepare. make sure we never send an offset for the start | 671 | ** skip the prepare. make sure we never send an offset for the start |
672 | ** of a block | 672 | ** of a block |
673 | */ | 673 | */ |
674 | if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { | 674 | if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { |
675 | offset++; | 675 | offset++; |
676 | } | 676 | } |
677 | index = size >> PAGE_CACHE_SHIFT; | 677 | index = size >> PAGE_CACHE_SHIFT; |
678 | 678 | ||
679 | page = grab_cache_page(mapping, index); | 679 | page = grab_cache_page(mapping, index); |
680 | if (!page) { | 680 | if (!page) { |
681 | ret = -ENOMEM; | 681 | ret = -ENOMEM; |
682 | mlog_errno(ret); | 682 | mlog_errno(ret); |
683 | goto out; | 683 | goto out; |
684 | } | 684 | } |
685 | 685 | ||
686 | ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); | 686 | ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); |
687 | if (ret < 0) { | 687 | if (ret < 0) { |
688 | mlog_errno(ret); | 688 | mlog_errno(ret); |
689 | goto out_unlock; | 689 | goto out_unlock; |
690 | } | 690 | } |
691 | 691 | ||
692 | if (ocfs2_should_order_data(inode)) { | 692 | if (ocfs2_should_order_data(inode)) { |
693 | handle = ocfs2_start_walk_page_trans(inode, page, offset, | 693 | handle = ocfs2_start_walk_page_trans(inode, page, offset, |
694 | offset); | 694 | offset); |
695 | if (IS_ERR(handle)) { | 695 | if (IS_ERR(handle)) { |
696 | ret = PTR_ERR(handle); | 696 | ret = PTR_ERR(handle); |
697 | handle = NULL; | 697 | handle = NULL; |
698 | goto out_unlock; | 698 | goto out_unlock; |
699 | } | 699 | } |
700 | } | 700 | } |
701 | 701 | ||
702 | /* must not update i_size! */ | 702 | /* must not update i_size! */ |
703 | ret = block_commit_write(page, offset, offset); | 703 | ret = block_commit_write(page, offset, offset); |
704 | if (ret < 0) | 704 | if (ret < 0) |
705 | mlog_errno(ret); | 705 | mlog_errno(ret); |
706 | else | 706 | else |
707 | ret = 0; | 707 | ret = 0; |
708 | 708 | ||
709 | if (handle) | 709 | if (handle) |
710 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | 710 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); |
711 | out_unlock: | 711 | out_unlock: |
712 | unlock_page(page); | 712 | unlock_page(page); |
713 | page_cache_release(page); | 713 | page_cache_release(page); |
714 | out: | 714 | out: |
715 | return ret; | 715 | return ret; |
716 | } | 716 | } |
717 | 717 | ||
718 | static int ocfs2_zero_extend(struct inode *inode, | 718 | static int ocfs2_zero_extend(struct inode *inode, |
719 | u64 zero_to_size) | 719 | u64 zero_to_size) |
720 | { | 720 | { |
721 | int ret = 0; | 721 | int ret = 0; |
722 | u64 start_off; | 722 | u64 start_off; |
723 | struct super_block *sb = inode->i_sb; | 723 | struct super_block *sb = inode->i_sb; |
724 | 724 | ||
725 | start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); | 725 | start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); |
726 | while (start_off < zero_to_size) { | 726 | while (start_off < zero_to_size) { |
727 | ret = ocfs2_write_zero_page(inode, start_off); | 727 | ret = ocfs2_write_zero_page(inode, start_off); |
728 | if (ret < 0) { | 728 | if (ret < 0) { |
729 | mlog_errno(ret); | 729 | mlog_errno(ret); |
730 | goto out; | 730 | goto out; |
731 | } | 731 | } |
732 | 732 | ||
733 | start_off += sb->s_blocksize; | 733 | start_off += sb->s_blocksize; |
734 | 734 | ||
735 | /* | 735 | /* |
736 | * Very large extends have the potential to lock up | 736 | * Very large extends have the potential to lock up |
737 | * the cpu for extended periods of time. | 737 | * the cpu for extended periods of time. |
738 | */ | 738 | */ |
739 | cond_resched(); | 739 | cond_resched(); |
740 | } | 740 | } |
741 | 741 | ||
742 | out: | 742 | out: |
743 | return ret; | 743 | return ret; |
744 | } | 744 | } |
745 | 745 | ||
746 | /* | 746 | /* |
747 | * A tail_to_skip value > 0 indicates that we're being called from | 747 | * A tail_to_skip value > 0 indicates that we're being called from |
748 | * ocfs2_file_aio_write(). This has the following implications: | 748 | * ocfs2_file_aio_write(). This has the following implications: |
749 | * | 749 | * |
750 | * - we don't want to update i_size | 750 | * - we don't want to update i_size |
751 | * - di_bh will be NULL, which is fine because it's only used in the | 751 | * - di_bh will be NULL, which is fine because it's only used in the |
752 | * case where we want to update i_size. | 752 | * case where we want to update i_size. |
753 | * - ocfs2_zero_extend() will then only be filling the hole created | 753 | * - ocfs2_zero_extend() will then only be filling the hole created |
754 | * between i_size and the start of the write. | 754 | * between i_size and the start of the write. |
755 | */ | 755 | */ |
756 | static int ocfs2_extend_file(struct inode *inode, | 756 | static int ocfs2_extend_file(struct inode *inode, |
757 | struct buffer_head *di_bh, | 757 | struct buffer_head *di_bh, |
758 | u64 new_i_size, | 758 | u64 new_i_size, |
759 | size_t tail_to_skip) | 759 | size_t tail_to_skip) |
760 | { | 760 | { |
761 | int ret = 0; | 761 | int ret = 0; |
762 | u32 clusters_to_add; | 762 | u32 clusters_to_add; |
763 | 763 | ||
764 | BUG_ON(!tail_to_skip && !di_bh); | 764 | BUG_ON(!tail_to_skip && !di_bh); |
765 | 765 | ||
766 | /* setattr sometimes calls us like this. */ | 766 | /* setattr sometimes calls us like this. */ |
767 | if (new_i_size == 0) | 767 | if (new_i_size == 0) |
768 | goto out; | 768 | goto out; |
769 | 769 | ||
770 | if (i_size_read(inode) == new_i_size) | 770 | if (i_size_read(inode) == new_i_size) |
771 | goto out; | 771 | goto out; |
772 | BUG_ON(new_i_size < i_size_read(inode)); | 772 | BUG_ON(new_i_size < i_size_read(inode)); |
773 | 773 | ||
774 | clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) - | 774 | clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) - |
775 | OCFS2_I(inode)->ip_clusters; | 775 | OCFS2_I(inode)->ip_clusters; |
776 | 776 | ||
777 | /* | 777 | /* |
778 | * protect the pages that ocfs2_zero_extend is going to be | 778 | * protect the pages that ocfs2_zero_extend is going to be |
779 | * pulling into the page cache.. we do this before the | 779 | * pulling into the page cache.. we do this before the |
780 | * metadata extend so that we don't get into the situation | 780 | * metadata extend so that we don't get into the situation |
781 | * where we've extended the metadata but can't get the data | 781 | * where we've extended the metadata but can't get the data |
782 | * lock to zero. | 782 | * lock to zero. |
783 | */ | 783 | */ |
784 | ret = ocfs2_data_lock(inode, 1); | 784 | ret = ocfs2_data_lock(inode, 1); |
785 | if (ret < 0) { | 785 | if (ret < 0) { |
786 | mlog_errno(ret); | 786 | mlog_errno(ret); |
787 | goto out; | 787 | goto out; |
788 | } | 788 | } |
789 | 789 | ||
790 | if (clusters_to_add) { | 790 | if (clusters_to_add) { |
791 | ret = ocfs2_extend_allocation(inode, clusters_to_add); | 791 | ret = ocfs2_extend_allocation(inode, clusters_to_add); |
792 | if (ret < 0) { | 792 | if (ret < 0) { |
793 | mlog_errno(ret); | 793 | mlog_errno(ret); |
794 | goto out_unlock; | 794 | goto out_unlock; |
795 | } | 795 | } |
796 | } | 796 | } |
797 | 797 | ||
798 | /* | 798 | /* |
799 | * Call this even if we don't add any clusters to the tree. We | 799 | * Call this even if we don't add any clusters to the tree. We |
800 | * still need to zero the area between the old i_size and the | 800 | * still need to zero the area between the old i_size and the |
801 | * new i_size. | 801 | * new i_size. |
802 | */ | 802 | */ |
803 | ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip); | 803 | ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip); |
804 | if (ret < 0) { | 804 | if (ret < 0) { |
805 | mlog_errno(ret); | 805 | mlog_errno(ret); |
806 | goto out_unlock; | 806 | goto out_unlock; |
807 | } | 807 | } |
808 | 808 | ||
809 | if (!tail_to_skip) { | 809 | if (!tail_to_skip) { |
810 | /* We're being called from ocfs2_setattr() which wants | 810 | /* We're being called from ocfs2_setattr() which wants |
811 | * us to update i_size */ | 811 | * us to update i_size */ |
812 | ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); | 812 | ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); |
813 | if (ret < 0) | 813 | if (ret < 0) |
814 | mlog_errno(ret); | 814 | mlog_errno(ret); |
815 | } | 815 | } |
816 | 816 | ||
817 | out_unlock: | 817 | out_unlock: |
818 | ocfs2_data_unlock(inode, 1); | 818 | ocfs2_data_unlock(inode, 1); |
819 | 819 | ||
820 | out: | 820 | out: |
821 | return ret; | 821 | return ret; |
822 | } | 822 | } |
823 | 823 | ||
824 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | 824 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) |
825 | { | 825 | { |
826 | int status = 0, size_change; | 826 | int status = 0, size_change; |
827 | struct inode *inode = dentry->d_inode; | 827 | struct inode *inode = dentry->d_inode; |
828 | struct super_block *sb = inode->i_sb; | 828 | struct super_block *sb = inode->i_sb; |
829 | struct ocfs2_super *osb = OCFS2_SB(sb); | 829 | struct ocfs2_super *osb = OCFS2_SB(sb); |
830 | struct buffer_head *bh = NULL; | 830 | struct buffer_head *bh = NULL; |
831 | handle_t *handle = NULL; | 831 | handle_t *handle = NULL; |
832 | 832 | ||
833 | mlog_entry("(0x%p, '%.*s')\n", dentry, | 833 | mlog_entry("(0x%p, '%.*s')\n", dentry, |
834 | dentry->d_name.len, dentry->d_name.name); | 834 | dentry->d_name.len, dentry->d_name.name); |
835 | 835 | ||
836 | if (attr->ia_valid & ATTR_MODE) | 836 | if (attr->ia_valid & ATTR_MODE) |
837 | mlog(0, "mode change: %d\n", attr->ia_mode); | 837 | mlog(0, "mode change: %d\n", attr->ia_mode); |
838 | if (attr->ia_valid & ATTR_UID) | 838 | if (attr->ia_valid & ATTR_UID) |
839 | mlog(0, "uid change: %d\n", attr->ia_uid); | 839 | mlog(0, "uid change: %d\n", attr->ia_uid); |
840 | if (attr->ia_valid & ATTR_GID) | 840 | if (attr->ia_valid & ATTR_GID) |
841 | mlog(0, "gid change: %d\n", attr->ia_gid); | 841 | mlog(0, "gid change: %d\n", attr->ia_gid); |
842 | if (attr->ia_valid & ATTR_SIZE) | 842 | if (attr->ia_valid & ATTR_SIZE) |
843 | mlog(0, "size change...\n"); | 843 | mlog(0, "size change...\n"); |
844 | if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME)) | 844 | if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME)) |
845 | mlog(0, "time change...\n"); | 845 | mlog(0, "time change...\n"); |
846 | 846 | ||
847 | #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \ | 847 | #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \ |
848 | | ATTR_GID | ATTR_UID | ATTR_MODE) | 848 | | ATTR_GID | ATTR_UID | ATTR_MODE) |
849 | if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) { | 849 | if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) { |
850 | mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid); | 850 | mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid); |
851 | return 0; | 851 | return 0; |
852 | } | 852 | } |
853 | 853 | ||
854 | status = inode_change_ok(inode, attr); | 854 | status = inode_change_ok(inode, attr); |
855 | if (status) | 855 | if (status) |
856 | return status; | 856 | return status; |
857 | 857 | ||
858 | size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; | 858 | size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; |
859 | if (size_change) { | 859 | if (size_change) { |
860 | status = ocfs2_rw_lock(inode, 1); | 860 | status = ocfs2_rw_lock(inode, 1); |
861 | if (status < 0) { | 861 | if (status < 0) { |
862 | mlog_errno(status); | 862 | mlog_errno(status); |
863 | goto bail; | 863 | goto bail; |
864 | } | 864 | } |
865 | } | 865 | } |
866 | 866 | ||
867 | status = ocfs2_meta_lock(inode, &bh, 1); | 867 | status = ocfs2_meta_lock(inode, &bh, 1); |
868 | if (status < 0) { | 868 | if (status < 0) { |
869 | if (status != -ENOENT) | 869 | if (status != -ENOENT) |
870 | mlog_errno(status); | 870 | mlog_errno(status); |
871 | goto bail_unlock_rw; | 871 | goto bail_unlock_rw; |
872 | } | 872 | } |
873 | 873 | ||
874 | if (size_change && attr->ia_size != i_size_read(inode)) { | 874 | if (size_change && attr->ia_size != i_size_read(inode)) { |
875 | if (i_size_read(inode) > attr->ia_size) | 875 | if (i_size_read(inode) > attr->ia_size) |
876 | status = ocfs2_truncate_file(inode, bh, attr->ia_size); | 876 | status = ocfs2_truncate_file(inode, bh, attr->ia_size); |
877 | else | 877 | else |
878 | status = ocfs2_extend_file(inode, bh, attr->ia_size, 0); | 878 | status = ocfs2_extend_file(inode, bh, attr->ia_size, 0); |
879 | if (status < 0) { | 879 | if (status < 0) { |
880 | if (status != -ENOSPC) | 880 | if (status != -ENOSPC) |
881 | mlog_errno(status); | 881 | mlog_errno(status); |
882 | status = -ENOSPC; | 882 | status = -ENOSPC; |
883 | goto bail_unlock; | 883 | goto bail_unlock; |
884 | } | 884 | } |
885 | } | 885 | } |
886 | 886 | ||
887 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 887 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
888 | if (IS_ERR(handle)) { | 888 | if (IS_ERR(handle)) { |
889 | status = PTR_ERR(handle); | 889 | status = PTR_ERR(handle); |
890 | mlog_errno(status); | 890 | mlog_errno(status); |
891 | goto bail_unlock; | 891 | goto bail_unlock; |
892 | } | 892 | } |
893 | 893 | ||
894 | status = inode_setattr(inode, attr); | 894 | status = inode_setattr(inode, attr); |
895 | if (status < 0) { | 895 | if (status < 0) { |
896 | mlog_errno(status); | 896 | mlog_errno(status); |
897 | goto bail_commit; | 897 | goto bail_commit; |
898 | } | 898 | } |
899 | 899 | ||
900 | status = ocfs2_mark_inode_dirty(handle, inode, bh); | 900 | status = ocfs2_mark_inode_dirty(handle, inode, bh); |
901 | if (status < 0) | 901 | if (status < 0) |
902 | mlog_errno(status); | 902 | mlog_errno(status); |
903 | 903 | ||
904 | bail_commit: | 904 | bail_commit: |
905 | ocfs2_commit_trans(osb, handle); | 905 | ocfs2_commit_trans(osb, handle); |
906 | bail_unlock: | 906 | bail_unlock: |
907 | ocfs2_meta_unlock(inode, 1); | 907 | ocfs2_meta_unlock(inode, 1); |
908 | bail_unlock_rw: | 908 | bail_unlock_rw: |
909 | if (size_change) | 909 | if (size_change) |
910 | ocfs2_rw_unlock(inode, 1); | 910 | ocfs2_rw_unlock(inode, 1); |
911 | bail: | 911 | bail: |
912 | if (bh) | 912 | if (bh) |
913 | brelse(bh); | 913 | brelse(bh); |
914 | 914 | ||
915 | mlog_exit(status); | 915 | mlog_exit(status); |
916 | return status; | 916 | return status; |
917 | } | 917 | } |
918 | 918 | ||
919 | int ocfs2_getattr(struct vfsmount *mnt, | 919 | int ocfs2_getattr(struct vfsmount *mnt, |
920 | struct dentry *dentry, | 920 | struct dentry *dentry, |
921 | struct kstat *stat) | 921 | struct kstat *stat) |
922 | { | 922 | { |
923 | struct inode *inode = dentry->d_inode; | 923 | struct inode *inode = dentry->d_inode; |
924 | struct super_block *sb = dentry->d_inode->i_sb; | 924 | struct super_block *sb = dentry->d_inode->i_sb; |
925 | struct ocfs2_super *osb = sb->s_fs_info; | 925 | struct ocfs2_super *osb = sb->s_fs_info; |
926 | int err; | 926 | int err; |
927 | 927 | ||
928 | mlog_entry_void(); | 928 | mlog_entry_void(); |
929 | 929 | ||
930 | err = ocfs2_inode_revalidate(dentry); | 930 | err = ocfs2_inode_revalidate(dentry); |
931 | if (err) { | 931 | if (err) { |
932 | if (err != -ENOENT) | 932 | if (err != -ENOENT) |
933 | mlog_errno(err); | 933 | mlog_errno(err); |
934 | goto bail; | 934 | goto bail; |
935 | } | 935 | } |
936 | 936 | ||
937 | generic_fillattr(inode, stat); | 937 | generic_fillattr(inode, stat); |
938 | 938 | ||
939 | /* We set the blksize from the cluster size for performance */ | 939 | /* We set the blksize from the cluster size for performance */ |
940 | stat->blksize = osb->s_clustersize; | 940 | stat->blksize = osb->s_clustersize; |
941 | 941 | ||
942 | bail: | 942 | bail: |
943 | mlog_exit(err); | 943 | mlog_exit(err); |
944 | 944 | ||
945 | return err; | 945 | return err; |
946 | } | 946 | } |
947 | 947 | ||
948 | int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | 948 | int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) |
949 | { | 949 | { |
950 | int ret; | 950 | int ret; |
951 | 951 | ||
952 | mlog_entry_void(); | 952 | mlog_entry_void(); |
953 | 953 | ||
954 | ret = ocfs2_meta_lock(inode, NULL, 0); | 954 | ret = ocfs2_meta_lock(inode, NULL, 0); |
955 | if (ret) { | 955 | if (ret) { |
956 | mlog_errno(ret); | 956 | mlog_errno(ret); |
957 | goto out; | 957 | goto out; |
958 | } | 958 | } |
959 | 959 | ||
960 | ret = generic_permission(inode, mask, NULL); | 960 | ret = generic_permission(inode, mask, NULL); |
961 | if (ret) | 961 | if (ret) |
962 | mlog_errno(ret); | 962 | mlog_errno(ret); |
963 | 963 | ||
964 | ocfs2_meta_unlock(inode, 0); | 964 | ocfs2_meta_unlock(inode, 0); |
965 | out: | 965 | out: |
966 | mlog_exit(ret); | 966 | mlog_exit(ret); |
967 | return ret; | 967 | return ret; |
968 | } | 968 | } |
969 | 969 | ||
970 | static int ocfs2_write_remove_suid(struct inode *inode) | 970 | static int ocfs2_write_remove_suid(struct inode *inode) |
971 | { | 971 | { |
972 | int ret; | 972 | int ret; |
973 | struct buffer_head *bh = NULL; | 973 | struct buffer_head *bh = NULL; |
974 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 974 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
975 | handle_t *handle; | 975 | handle_t *handle; |
976 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 976 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
977 | struct ocfs2_dinode *di; | 977 | struct ocfs2_dinode *di; |
978 | 978 | ||
979 | mlog_entry("(Inode %llu, mode 0%o)\n", | 979 | mlog_entry("(Inode %llu, mode 0%o)\n", |
980 | (unsigned long long)oi->ip_blkno, inode->i_mode); | 980 | (unsigned long long)oi->ip_blkno, inode->i_mode); |
981 | 981 | ||
982 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 982 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
983 | if (handle == NULL) { | 983 | if (handle == NULL) { |
984 | ret = -ENOMEM; | 984 | ret = -ENOMEM; |
985 | mlog_errno(ret); | 985 | mlog_errno(ret); |
986 | goto out; | 986 | goto out; |
987 | } | 987 | } |
988 | 988 | ||
989 | ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); | 989 | ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); |
990 | if (ret < 0) { | 990 | if (ret < 0) { |
991 | mlog_errno(ret); | 991 | mlog_errno(ret); |
992 | goto out_trans; | 992 | goto out_trans; |
993 | } | 993 | } |
994 | 994 | ||
995 | ret = ocfs2_journal_access(handle, inode, bh, | 995 | ret = ocfs2_journal_access(handle, inode, bh, |
996 | OCFS2_JOURNAL_ACCESS_WRITE); | 996 | OCFS2_JOURNAL_ACCESS_WRITE); |
997 | if (ret < 0) { | 997 | if (ret < 0) { |
998 | mlog_errno(ret); | 998 | mlog_errno(ret); |
999 | goto out_bh; | 999 | goto out_bh; |
1000 | } | 1000 | } |
1001 | 1001 | ||
1002 | inode->i_mode &= ~S_ISUID; | 1002 | inode->i_mode &= ~S_ISUID; |
1003 | if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP)) | 1003 | if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP)) |
1004 | inode->i_mode &= ~S_ISGID; | 1004 | inode->i_mode &= ~S_ISGID; |
1005 | 1005 | ||
1006 | di = (struct ocfs2_dinode *) bh->b_data; | 1006 | di = (struct ocfs2_dinode *) bh->b_data; |
1007 | di->i_mode = cpu_to_le16(inode->i_mode); | 1007 | di->i_mode = cpu_to_le16(inode->i_mode); |
1008 | 1008 | ||
1009 | ret = ocfs2_journal_dirty(handle, bh); | 1009 | ret = ocfs2_journal_dirty(handle, bh); |
1010 | if (ret < 0) | 1010 | if (ret < 0) |
1011 | mlog_errno(ret); | 1011 | mlog_errno(ret); |
1012 | out_bh: | 1012 | out_bh: |
1013 | brelse(bh); | 1013 | brelse(bh); |
1014 | out_trans: | 1014 | out_trans: |
1015 | ocfs2_commit_trans(osb, handle); | 1015 | ocfs2_commit_trans(osb, handle); |
1016 | out: | 1016 | out: |
1017 | mlog_exit(ret); | 1017 | mlog_exit(ret); |
1018 | return ret; | 1018 | return ret; |
1019 | } | 1019 | } |
1020 | 1020 | ||
1021 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | 1021 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, |
1022 | loff_t *ppos, | 1022 | loff_t *ppos, |
1023 | size_t count, | 1023 | size_t count, |
1024 | int appending) | 1024 | int appending) |
1025 | { | 1025 | { |
1026 | int ret = 0, meta_level = appending; | 1026 | int ret = 0, meta_level = appending; |
1027 | struct inode *inode = dentry->d_inode; | 1027 | struct inode *inode = dentry->d_inode; |
1028 | u32 clusters; | 1028 | u32 clusters; |
1029 | loff_t newsize, saved_pos; | 1029 | loff_t newsize, saved_pos; |
1030 | 1030 | ||
1031 | /* | 1031 | /* |
1032 | * We sample i_size under a read level meta lock to see if our write | 1032 | * We sample i_size under a read level meta lock to see if our write |
1033 | * is extending the file, if it is we back off and get a write level | 1033 | * is extending the file, if it is we back off and get a write level |
1034 | * meta lock. | 1034 | * meta lock. |
1035 | */ | 1035 | */ |
1036 | for(;;) { | 1036 | for(;;) { |
1037 | ret = ocfs2_meta_lock(inode, NULL, meta_level); | 1037 | ret = ocfs2_meta_lock(inode, NULL, meta_level); |
1038 | if (ret < 0) { | 1038 | if (ret < 0) { |
1039 | meta_level = -1; | 1039 | meta_level = -1; |
1040 | mlog_errno(ret); | 1040 | mlog_errno(ret); |
1041 | goto out; | 1041 | goto out; |
1042 | } | 1042 | } |
1043 | 1043 | ||
1044 | /* Clear suid / sgid if necessary. We do this here | 1044 | /* Clear suid / sgid if necessary. We do this here |
1045 | * instead of later in the write path because | 1045 | * instead of later in the write path because |
1046 | * remove_suid() calls ->setattr without any hint that | 1046 | * remove_suid() calls ->setattr without any hint that |
1047 | * we may have already done our cluster locking. Since | 1047 | * we may have already done our cluster locking. Since |
1048 | * ocfs2_setattr() *must* take cluster locks to | 1048 | * ocfs2_setattr() *must* take cluster locks to |
1049 | * proceeed, this will lead us to recursively lock the | 1049 | * proceeed, this will lead us to recursively lock the |
1050 | * inode. There's also the dinode i_size state which | 1050 | * inode. There's also the dinode i_size state which |
1051 | * can be lost via setattr during extending writes (we | 1051 | * can be lost via setattr during extending writes (we |
1052 | * set inode->i_size at the end of a write. */ | 1052 | * set inode->i_size at the end of a write. */ |
1053 | if (should_remove_suid(dentry)) { | 1053 | if (should_remove_suid(dentry)) { |
1054 | if (meta_level == 0) { | 1054 | if (meta_level == 0) { |
1055 | ocfs2_meta_unlock(inode, meta_level); | 1055 | ocfs2_meta_unlock(inode, meta_level); |
1056 | meta_level = 1; | 1056 | meta_level = 1; |
1057 | continue; | 1057 | continue; |
1058 | } | 1058 | } |
1059 | 1059 | ||
1060 | ret = ocfs2_write_remove_suid(inode); | 1060 | ret = ocfs2_write_remove_suid(inode); |
1061 | if (ret < 0) { | 1061 | if (ret < 0) { |
1062 | mlog_errno(ret); | 1062 | mlog_errno(ret); |
1063 | goto out_unlock; | 1063 | goto out_unlock; |
1064 | } | 1064 | } |
1065 | } | 1065 | } |
1066 | 1066 | ||
1067 | /* work on a copy of ppos until we're sure that we won't have | 1067 | /* work on a copy of ppos until we're sure that we won't have |
1068 | * to recalculate it due to relocking. */ | 1068 | * to recalculate it due to relocking. */ |
1069 | if (appending) { | 1069 | if (appending) { |
1070 | saved_pos = i_size_read(inode); | 1070 | saved_pos = i_size_read(inode); |
1071 | mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); | 1071 | mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); |
1072 | } else { | 1072 | } else { |
1073 | saved_pos = *ppos; | 1073 | saved_pos = *ppos; |
1074 | } | 1074 | } |
1075 | newsize = count + saved_pos; | 1075 | newsize = count + saved_pos; |
1076 | 1076 | ||
1077 | mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", | 1077 | mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", |
1078 | (long long) saved_pos, (long long) newsize, | 1078 | (long long) saved_pos, (long long) newsize, |
1079 | (long long) i_size_read(inode)); | 1079 | (long long) i_size_read(inode)); |
1080 | 1080 | ||
1081 | /* No need for a higher level metadata lock if we're | 1081 | /* No need for a higher level metadata lock if we're |
1082 | * never going past i_size. */ | 1082 | * never going past i_size. */ |
1083 | if (newsize <= i_size_read(inode)) | 1083 | if (newsize <= i_size_read(inode)) |
1084 | break; | 1084 | break; |
1085 | 1085 | ||
1086 | if (meta_level == 0) { | 1086 | if (meta_level == 0) { |
1087 | ocfs2_meta_unlock(inode, meta_level); | 1087 | ocfs2_meta_unlock(inode, meta_level); |
1088 | meta_level = 1; | 1088 | meta_level = 1; |
1089 | continue; | 1089 | continue; |
1090 | } | 1090 | } |
1091 | 1091 | ||
1092 | spin_lock(&OCFS2_I(inode)->ip_lock); | 1092 | spin_lock(&OCFS2_I(inode)->ip_lock); |
1093 | clusters = ocfs2_clusters_for_bytes(inode->i_sb, newsize) - | 1093 | clusters = ocfs2_clusters_for_bytes(inode->i_sb, newsize) - |
1094 | OCFS2_I(inode)->ip_clusters; | 1094 | OCFS2_I(inode)->ip_clusters; |
1095 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 1095 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
1096 | 1096 | ||
1097 | mlog(0, "Writing at EOF, may need more allocation: " | 1097 | mlog(0, "Writing at EOF, may need more allocation: " |
1098 | "i_size = %lld, newsize = %lld, need %u clusters\n", | 1098 | "i_size = %lld, newsize = %lld, need %u clusters\n", |
1099 | (long long) i_size_read(inode), (long long) newsize, | 1099 | (long long) i_size_read(inode), (long long) newsize, |
1100 | clusters); | 1100 | clusters); |
1101 | 1101 | ||
1102 | /* We only want to continue the rest of this loop if | 1102 | /* We only want to continue the rest of this loop if |
1103 | * our extend will actually require more | 1103 | * our extend will actually require more |
1104 | * allocation. */ | 1104 | * allocation. */ |
1105 | if (!clusters) | 1105 | if (!clusters) |
1106 | break; | 1106 | break; |
1107 | 1107 | ||
1108 | ret = ocfs2_extend_file(inode, NULL, newsize, count); | 1108 | ret = ocfs2_extend_file(inode, NULL, newsize, count); |
1109 | if (ret < 0) { | 1109 | if (ret < 0) { |
1110 | if (ret != -ENOSPC) | 1110 | if (ret != -ENOSPC) |
1111 | mlog_errno(ret); | 1111 | mlog_errno(ret); |
1112 | goto out_unlock; | 1112 | goto out_unlock; |
1113 | } | 1113 | } |
1114 | break; | 1114 | break; |
1115 | } | 1115 | } |
1116 | 1116 | ||
1117 | if (appending) | 1117 | if (appending) |
1118 | *ppos = saved_pos; | 1118 | *ppos = saved_pos; |
1119 | 1119 | ||
1120 | out_unlock: | 1120 | out_unlock: |
1121 | ocfs2_meta_unlock(inode, meta_level); | 1121 | ocfs2_meta_unlock(inode, meta_level); |
1122 | 1122 | ||
1123 | out: | 1123 | out: |
1124 | return ret; | 1124 | return ret; |
1125 | } | 1125 | } |
1126 | 1126 | ||
1127 | static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | 1127 | static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, |
1128 | const struct iovec *iov, | 1128 | const struct iovec *iov, |
1129 | unsigned long nr_segs, | 1129 | unsigned long nr_segs, |
1130 | loff_t pos) | 1130 | loff_t pos) |
1131 | { | 1131 | { |
1132 | int ret, rw_level, have_alloc_sem = 0; | 1132 | int ret, rw_level, have_alloc_sem = 0; |
1133 | struct file *filp = iocb->ki_filp; | 1133 | struct file *filp = iocb->ki_filp; |
1134 | struct inode *inode = filp->f_dentry->d_inode; | 1134 | struct inode *inode = filp->f_path.dentry->d_inode; |
1135 | int appending = filp->f_flags & O_APPEND ? 1 : 0; | 1135 | int appending = filp->f_flags & O_APPEND ? 1 : 0; |
1136 | 1136 | ||
1137 | mlog_entry("(0x%p, %u, '%.*s')\n", filp, | 1137 | mlog_entry("(0x%p, %u, '%.*s')\n", filp, |
1138 | (unsigned int)nr_segs, | 1138 | (unsigned int)nr_segs, |
1139 | filp->f_dentry->d_name.len, | 1139 | filp->f_path.dentry->d_name.len, |
1140 | filp->f_dentry->d_name.name); | 1140 | filp->f_path.dentry->d_name.name); |
1141 | 1141 | ||
1142 | /* happy write of zero bytes */ | 1142 | /* happy write of zero bytes */ |
1143 | if (iocb->ki_left == 0) | 1143 | if (iocb->ki_left == 0) |
1144 | return 0; | 1144 | return 0; |
1145 | 1145 | ||
1146 | mutex_lock(&inode->i_mutex); | 1146 | mutex_lock(&inode->i_mutex); |
1147 | /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ | 1147 | /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ |
1148 | if (filp->f_flags & O_DIRECT) { | 1148 | if (filp->f_flags & O_DIRECT) { |
1149 | have_alloc_sem = 1; | 1149 | have_alloc_sem = 1; |
1150 | down_read(&inode->i_alloc_sem); | 1150 | down_read(&inode->i_alloc_sem); |
1151 | } | 1151 | } |
1152 | 1152 | ||
1153 | /* concurrent O_DIRECT writes are allowed */ | 1153 | /* concurrent O_DIRECT writes are allowed */ |
1154 | rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1; | 1154 | rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1; |
1155 | ret = ocfs2_rw_lock(inode, rw_level); | 1155 | ret = ocfs2_rw_lock(inode, rw_level); |
1156 | if (ret < 0) { | 1156 | if (ret < 0) { |
1157 | rw_level = -1; | 1157 | rw_level = -1; |
1158 | mlog_errno(ret); | 1158 | mlog_errno(ret); |
1159 | goto out; | 1159 | goto out; |
1160 | } | 1160 | } |
1161 | 1161 | ||
1162 | ret = ocfs2_prepare_inode_for_write(filp->f_dentry, &iocb->ki_pos, | 1162 | ret = ocfs2_prepare_inode_for_write(filp->f_path.dentry, &iocb->ki_pos, |
1163 | iocb->ki_left, appending); | 1163 | iocb->ki_left, appending); |
1164 | if (ret < 0) { | 1164 | if (ret < 0) { |
1165 | mlog_errno(ret); | 1165 | mlog_errno(ret); |
1166 | goto out; | 1166 | goto out; |
1167 | } | 1167 | } |
1168 | 1168 | ||
1169 | /* communicate with ocfs2_dio_end_io */ | 1169 | /* communicate with ocfs2_dio_end_io */ |
1170 | ocfs2_iocb_set_rw_locked(iocb); | 1170 | ocfs2_iocb_set_rw_locked(iocb); |
1171 | 1171 | ||
1172 | ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos); | 1172 | ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos); |
1173 | 1173 | ||
1174 | /* buffered aio wouldn't have proper lock coverage today */ | 1174 | /* buffered aio wouldn't have proper lock coverage today */ |
1175 | BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); | 1175 | BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); |
1176 | 1176 | ||
1177 | /* | 1177 | /* |
1178 | * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io | 1178 | * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io |
1179 | * function pointer which is called when o_direct io completes so that | 1179 | * function pointer which is called when o_direct io completes so that |
1180 | * it can unlock our rw lock. (it's the clustered equivalent of | 1180 | * it can unlock our rw lock. (it's the clustered equivalent of |
1181 | * i_alloc_sem; protects truncate from racing with pending ios). | 1181 | * i_alloc_sem; protects truncate from racing with pending ios). |
1182 | * Unfortunately there are error cases which call end_io and others | 1182 | * Unfortunately there are error cases which call end_io and others |
1183 | * that don't. so we don't have to unlock the rw_lock if either an | 1183 | * that don't. so we don't have to unlock the rw_lock if either an |
1184 | * async dio is going to do it in the future or an end_io after an | 1184 | * async dio is going to do it in the future or an end_io after an |
1185 | * error has already done it. | 1185 | * error has already done it. |
1186 | */ | 1186 | */ |
1187 | if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { | 1187 | if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { |
1188 | rw_level = -1; | 1188 | rw_level = -1; |
1189 | have_alloc_sem = 0; | 1189 | have_alloc_sem = 0; |
1190 | } | 1190 | } |
1191 | 1191 | ||
1192 | out: | 1192 | out: |
1193 | if (have_alloc_sem) | 1193 | if (have_alloc_sem) |
1194 | up_read(&inode->i_alloc_sem); | 1194 | up_read(&inode->i_alloc_sem); |
1195 | if (rw_level != -1) | 1195 | if (rw_level != -1) |
1196 | ocfs2_rw_unlock(inode, rw_level); | 1196 | ocfs2_rw_unlock(inode, rw_level); |
1197 | mutex_unlock(&inode->i_mutex); | 1197 | mutex_unlock(&inode->i_mutex); |
1198 | 1198 | ||
1199 | mlog_exit(ret); | 1199 | mlog_exit(ret); |
1200 | return ret; | 1200 | return ret; |
1201 | } | 1201 | } |
1202 | 1202 | ||
1203 | static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | 1203 | static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, |
1204 | struct file *out, | 1204 | struct file *out, |
1205 | loff_t *ppos, | 1205 | loff_t *ppos, |
1206 | size_t len, | 1206 | size_t len, |
1207 | unsigned int flags) | 1207 | unsigned int flags) |
1208 | { | 1208 | { |
1209 | int ret; | 1209 | int ret; |
1210 | struct inode *inode = out->f_dentry->d_inode; | 1210 | struct inode *inode = out->f_path.dentry->d_inode; |
1211 | 1211 | ||
1212 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, | 1212 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, |
1213 | (unsigned int)len, | 1213 | (unsigned int)len, |
1214 | out->f_dentry->d_name.len, | 1214 | out->f_path.dentry->d_name.len, |
1215 | out->f_dentry->d_name.name); | 1215 | out->f_path.dentry->d_name.name); |
1216 | 1216 | ||
1217 | inode_double_lock(inode, pipe->inode); | 1217 | inode_double_lock(inode, pipe->inode); |
1218 | 1218 | ||
1219 | ret = ocfs2_rw_lock(inode, 1); | 1219 | ret = ocfs2_rw_lock(inode, 1); |
1220 | if (ret < 0) { | 1220 | if (ret < 0) { |
1221 | mlog_errno(ret); | 1221 | mlog_errno(ret); |
1222 | goto out; | 1222 | goto out; |
1223 | } | 1223 | } |
1224 | 1224 | ||
1225 | ret = ocfs2_prepare_inode_for_write(out->f_dentry, ppos, len, 0); | 1225 | ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0); |
1226 | if (ret < 0) { | 1226 | if (ret < 0) { |
1227 | mlog_errno(ret); | 1227 | mlog_errno(ret); |
1228 | goto out_unlock; | 1228 | goto out_unlock; |
1229 | } | 1229 | } |
1230 | 1230 | ||
1231 | /* ok, we're done with i_size and alloc work */ | 1231 | /* ok, we're done with i_size and alloc work */ |
1232 | ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); | 1232 | ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); |
1233 | 1233 | ||
1234 | out_unlock: | 1234 | out_unlock: |
1235 | ocfs2_rw_unlock(inode, 1); | 1235 | ocfs2_rw_unlock(inode, 1); |
1236 | out: | 1236 | out: |
1237 | inode_double_unlock(inode, pipe->inode); | 1237 | inode_double_unlock(inode, pipe->inode); |
1238 | 1238 | ||
1239 | mlog_exit(ret); | 1239 | mlog_exit(ret); |
1240 | return ret; | 1240 | return ret; |
1241 | } | 1241 | } |
1242 | 1242 | ||
1243 | static ssize_t ocfs2_file_splice_read(struct file *in, | 1243 | static ssize_t ocfs2_file_splice_read(struct file *in, |
1244 | loff_t *ppos, | 1244 | loff_t *ppos, |
1245 | struct pipe_inode_info *pipe, | 1245 | struct pipe_inode_info *pipe, |
1246 | size_t len, | 1246 | size_t len, |
1247 | unsigned int flags) | 1247 | unsigned int flags) |
1248 | { | 1248 | { |
1249 | int ret = 0; | 1249 | int ret = 0; |
1250 | struct inode *inode = in->f_dentry->d_inode; | 1250 | struct inode *inode = in->f_path.dentry->d_inode; |
1251 | 1251 | ||
1252 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, | 1252 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, |
1253 | (unsigned int)len, | 1253 | (unsigned int)len, |
1254 | in->f_dentry->d_name.len, | 1254 | in->f_path.dentry->d_name.len, |
1255 | in->f_dentry->d_name.name); | 1255 | in->f_path.dentry->d_name.name); |
1256 | 1256 | ||
1257 | /* | 1257 | /* |
1258 | * See the comment in ocfs2_file_aio_read() | 1258 | * See the comment in ocfs2_file_aio_read() |
1259 | */ | 1259 | */ |
1260 | ret = ocfs2_meta_lock(inode, NULL, 0); | 1260 | ret = ocfs2_meta_lock(inode, NULL, 0); |
1261 | if (ret < 0) { | 1261 | if (ret < 0) { |
1262 | mlog_errno(ret); | 1262 | mlog_errno(ret); |
1263 | goto bail; | 1263 | goto bail; |
1264 | } | 1264 | } |
1265 | ocfs2_meta_unlock(inode, 0); | 1265 | ocfs2_meta_unlock(inode, 0); |
1266 | 1266 | ||
1267 | ret = generic_file_splice_read(in, ppos, pipe, len, flags); | 1267 | ret = generic_file_splice_read(in, ppos, pipe, len, flags); |
1268 | 1268 | ||
1269 | bail: | 1269 | bail: |
1270 | mlog_exit(ret); | 1270 | mlog_exit(ret); |
1271 | return ret; | 1271 | return ret; |
1272 | } | 1272 | } |
1273 | 1273 | ||
1274 | static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | 1274 | static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, |
1275 | const struct iovec *iov, | 1275 | const struct iovec *iov, |
1276 | unsigned long nr_segs, | 1276 | unsigned long nr_segs, |
1277 | loff_t pos) | 1277 | loff_t pos) |
1278 | { | 1278 | { |
1279 | int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; | 1279 | int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; |
1280 | struct file *filp = iocb->ki_filp; | 1280 | struct file *filp = iocb->ki_filp; |
1281 | struct inode *inode = filp->f_dentry->d_inode; | 1281 | struct inode *inode = filp->f_path.dentry->d_inode; |
1282 | 1282 | ||
1283 | mlog_entry("(0x%p, %u, '%.*s')\n", filp, | 1283 | mlog_entry("(0x%p, %u, '%.*s')\n", filp, |
1284 | (unsigned int)nr_segs, | 1284 | (unsigned int)nr_segs, |
1285 | filp->f_dentry->d_name.len, | 1285 | filp->f_path.dentry->d_name.len, |
1286 | filp->f_dentry->d_name.name); | 1286 | filp->f_path.dentry->d_name.name); |
1287 | 1287 | ||
1288 | if (!inode) { | 1288 | if (!inode) { |
1289 | ret = -EINVAL; | 1289 | ret = -EINVAL; |
1290 | mlog_errno(ret); | 1290 | mlog_errno(ret); |
1291 | goto bail; | 1291 | goto bail; |
1292 | } | 1292 | } |
1293 | 1293 | ||
1294 | /* | 1294 | /* |
1295 | * buffered reads protect themselves in ->readpage(). O_DIRECT reads | 1295 | * buffered reads protect themselves in ->readpage(). O_DIRECT reads |
1296 | * need locks to protect pending reads from racing with truncate. | 1296 | * need locks to protect pending reads from racing with truncate. |
1297 | */ | 1297 | */ |
1298 | if (filp->f_flags & O_DIRECT) { | 1298 | if (filp->f_flags & O_DIRECT) { |
1299 | down_read(&inode->i_alloc_sem); | 1299 | down_read(&inode->i_alloc_sem); |
1300 | have_alloc_sem = 1; | 1300 | have_alloc_sem = 1; |
1301 | 1301 | ||
1302 | ret = ocfs2_rw_lock(inode, 0); | 1302 | ret = ocfs2_rw_lock(inode, 0); |
1303 | if (ret < 0) { | 1303 | if (ret < 0) { |
1304 | mlog_errno(ret); | 1304 | mlog_errno(ret); |
1305 | goto bail; | 1305 | goto bail; |
1306 | } | 1306 | } |
1307 | rw_level = 0; | 1307 | rw_level = 0; |
1308 | /* communicate with ocfs2_dio_end_io */ | 1308 | /* communicate with ocfs2_dio_end_io */ |
1309 | ocfs2_iocb_set_rw_locked(iocb); | 1309 | ocfs2_iocb_set_rw_locked(iocb); |
1310 | } | 1310 | } |
1311 | 1311 | ||
1312 | /* | 1312 | /* |
1313 | * We're fine letting folks race truncates and extending | 1313 | * We're fine letting folks race truncates and extending |
1314 | * writes with read across the cluster, just like they can | 1314 | * writes with read across the cluster, just like they can |
1315 | * locally. Hence no rw_lock during read. | 1315 | * locally. Hence no rw_lock during read. |
1316 | * | 1316 | * |
1317 | * Take and drop the meta data lock to update inode fields | 1317 | * Take and drop the meta data lock to update inode fields |
1318 | * like i_size. This allows the checks down below | 1318 | * like i_size. This allows the checks down below |
1319 | * generic_file_aio_read() a chance of actually working. | 1319 | * generic_file_aio_read() a chance of actually working. |
1320 | */ | 1320 | */ |
1321 | ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); | 1321 | ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); |
1322 | if (ret < 0) { | 1322 | if (ret < 0) { |
1323 | mlog_errno(ret); | 1323 | mlog_errno(ret); |
1324 | goto bail; | 1324 | goto bail; |
1325 | } | 1325 | } |
1326 | ocfs2_meta_unlock(inode, lock_level); | 1326 | ocfs2_meta_unlock(inode, lock_level); |
1327 | 1327 | ||
1328 | ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); | 1328 | ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); |
1329 | if (ret == -EINVAL) | 1329 | if (ret == -EINVAL) |
1330 | mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); | 1330 | mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); |
1331 | 1331 | ||
1332 | /* buffered aio wouldn't have proper lock coverage today */ | 1332 | /* buffered aio wouldn't have proper lock coverage today */ |
1333 | BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); | 1333 | BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); |
1334 | 1334 | ||
1335 | /* see ocfs2_file_aio_write */ | 1335 | /* see ocfs2_file_aio_write */ |
1336 | if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { | 1336 | if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { |
1337 | rw_level = -1; | 1337 | rw_level = -1; |
1338 | have_alloc_sem = 0; | 1338 | have_alloc_sem = 0; |
1339 | } | 1339 | } |
1340 | 1340 | ||
1341 | bail: | 1341 | bail: |
1342 | if (have_alloc_sem) | 1342 | if (have_alloc_sem) |
1343 | up_read(&inode->i_alloc_sem); | 1343 | up_read(&inode->i_alloc_sem); |
1344 | if (rw_level != -1) | 1344 | if (rw_level != -1) |
1345 | ocfs2_rw_unlock(inode, rw_level); | 1345 | ocfs2_rw_unlock(inode, rw_level); |
1346 | mlog_exit(ret); | 1346 | mlog_exit(ret); |
1347 | 1347 | ||
1348 | return ret; | 1348 | return ret; |
1349 | } | 1349 | } |
1350 | 1350 | ||
1351 | struct inode_operations ocfs2_file_iops = { | 1351 | struct inode_operations ocfs2_file_iops = { |
1352 | .setattr = ocfs2_setattr, | 1352 | .setattr = ocfs2_setattr, |
1353 | .getattr = ocfs2_getattr, | 1353 | .getattr = ocfs2_getattr, |
1354 | .permission = ocfs2_permission, | 1354 | .permission = ocfs2_permission, |
1355 | }; | 1355 | }; |
1356 | 1356 | ||
1357 | struct inode_operations ocfs2_special_file_iops = { | 1357 | struct inode_operations ocfs2_special_file_iops = { |
1358 | .setattr = ocfs2_setattr, | 1358 | .setattr = ocfs2_setattr, |
1359 | .getattr = ocfs2_getattr, | 1359 | .getattr = ocfs2_getattr, |
1360 | .permission = ocfs2_permission, | 1360 | .permission = ocfs2_permission, |
1361 | }; | 1361 | }; |
1362 | 1362 | ||
1363 | const struct file_operations ocfs2_fops = { | 1363 | const struct file_operations ocfs2_fops = { |
1364 | .read = do_sync_read, | 1364 | .read = do_sync_read, |
1365 | .write = do_sync_write, | 1365 | .write = do_sync_write, |
1366 | .sendfile = generic_file_sendfile, | 1366 | .sendfile = generic_file_sendfile, |
1367 | .mmap = ocfs2_mmap, | 1367 | .mmap = ocfs2_mmap, |
1368 | .fsync = ocfs2_sync_file, | 1368 | .fsync = ocfs2_sync_file, |
1369 | .release = ocfs2_file_release, | 1369 | .release = ocfs2_file_release, |
1370 | .open = ocfs2_file_open, | 1370 | .open = ocfs2_file_open, |
1371 | .aio_read = ocfs2_file_aio_read, | 1371 | .aio_read = ocfs2_file_aio_read, |
1372 | .aio_write = ocfs2_file_aio_write, | 1372 | .aio_write = ocfs2_file_aio_write, |
1373 | .ioctl = ocfs2_ioctl, | 1373 | .ioctl = ocfs2_ioctl, |
1374 | .splice_read = ocfs2_file_splice_read, | 1374 | .splice_read = ocfs2_file_splice_read, |
1375 | .splice_write = ocfs2_file_splice_write, | 1375 | .splice_write = ocfs2_file_splice_write, |
1376 | }; | 1376 | }; |
1377 | 1377 | ||
1378 | const struct file_operations ocfs2_dops = { | 1378 | const struct file_operations ocfs2_dops = { |
1379 | .read = generic_read_dir, | 1379 | .read = generic_read_dir, |
1380 | .readdir = ocfs2_readdir, | 1380 | .readdir = ocfs2_readdir, |
1381 | .fsync = ocfs2_sync_file, | 1381 | .fsync = ocfs2_sync_file, |
1382 | .ioctl = ocfs2_ioctl, | 1382 | .ioctl = ocfs2_ioctl, |
1383 | }; | 1383 | }; |
1384 | 1384 |