Commit 9dbe9610b9df4efe0946299804ed46bb8f91dec2

Authored by Steven Whitehouse
1 parent c9aecf7371

GFS2: Add Orlov allocator

Just like ext3, this works on the root directory and any directory
with the +T flag set. Also, just like ext3, any subdirectory created
in one of the just mentioned cases will be allocated to a random
resource group (GFS2 equivalent of a block group).

If you are creating a set of directories, each of which will contain a
job running on a different node, then by setting +T on the parent
directory before creating the subdirectories, each will land up in a
different resource group, and thus resource group contention between
nodes will be kept to a minimum.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

Showing 8 changed files with 38 additions and 15 deletions Inline Diff

1 /* 1 /*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10 #include <linux/sched.h> 10 #include <linux/sched.h>
11 #include <linux/slab.h> 11 #include <linux/slab.h>
12 #include <linux/spinlock.h> 12 #include <linux/spinlock.h>
13 #include <linux/completion.h> 13 #include <linux/completion.h>
14 #include <linux/buffer_head.h> 14 #include <linux/buffer_head.h>
15 #include <linux/pagemap.h> 15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h> 16 #include <linux/pagevec.h>
17 #include <linux/mpage.h> 17 #include <linux/mpage.h>
18 #include <linux/fs.h> 18 #include <linux/fs.h>
19 #include <linux/writeback.h> 19 #include <linux/writeback.h>
20 #include <linux/swap.h> 20 #include <linux/swap.h>
21 #include <linux/gfs2_ondisk.h> 21 #include <linux/gfs2_ondisk.h>
22 #include <linux/backing-dev.h> 22 #include <linux/backing-dev.h>
23 23
24 #include "gfs2.h" 24 #include "gfs2.h"
25 #include "incore.h" 25 #include "incore.h"
26 #include "bmap.h" 26 #include "bmap.h"
27 #include "glock.h" 27 #include "glock.h"
28 #include "inode.h" 28 #include "inode.h"
29 #include "log.h" 29 #include "log.h"
30 #include "meta_io.h" 30 #include "meta_io.h"
31 #include "quota.h" 31 #include "quota.h"
32 #include "trans.h" 32 #include "trans.h"
33 #include "rgrp.h" 33 #include "rgrp.h"
34 #include "super.h" 34 #include "super.h"
35 #include "util.h" 35 #include "util.h"
36 #include "glops.h" 36 #include "glops.h"
37 37
38 38
39 static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, 39 static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
40 unsigned int from, unsigned int to) 40 unsigned int from, unsigned int to)
41 { 41 {
42 struct buffer_head *head = page_buffers(page); 42 struct buffer_head *head = page_buffers(page);
43 unsigned int bsize = head->b_size; 43 unsigned int bsize = head->b_size;
44 struct buffer_head *bh; 44 struct buffer_head *bh;
45 unsigned int start, end; 45 unsigned int start, end;
46 46
47 for (bh = head, start = 0; bh != head || !start; 47 for (bh = head, start = 0; bh != head || !start;
48 bh = bh->b_this_page, start = end) { 48 bh = bh->b_this_page, start = end) {
49 end = start + bsize; 49 end = start + bsize;
50 if (end <= from || start >= to) 50 if (end <= from || start >= to)
51 continue; 51 continue;
52 if (gfs2_is_jdata(ip)) 52 if (gfs2_is_jdata(ip))
53 set_buffer_uptodate(bh); 53 set_buffer_uptodate(bh);
54 gfs2_trans_add_bh(ip->i_gl, bh, 0); 54 gfs2_trans_add_bh(ip->i_gl, bh, 0);
55 } 55 }
56 } 56 }
57 57
58 /** 58 /**
59 * gfs2_get_block_noalloc - Fills in a buffer head with details about a block 59 * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
60 * @inode: The inode 60 * @inode: The inode
61 * @lblock: The block number to look up 61 * @lblock: The block number to look up
62 * @bh_result: The buffer head to return the result in 62 * @bh_result: The buffer head to return the result in
63 * @create: Non-zero if we may add block to the file 63 * @create: Non-zero if we may add block to the file
64 * 64 *
65 * Returns: errno 65 * Returns: errno
66 */ 66 */
67 67
68 static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, 68 static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
69 struct buffer_head *bh_result, int create) 69 struct buffer_head *bh_result, int create)
70 { 70 {
71 int error; 71 int error;
72 72
73 error = gfs2_block_map(inode, lblock, bh_result, 0); 73 error = gfs2_block_map(inode, lblock, bh_result, 0);
74 if (error) 74 if (error)
75 return error; 75 return error;
76 if (!buffer_mapped(bh_result)) 76 if (!buffer_mapped(bh_result))
77 return -EIO; 77 return -EIO;
78 return 0; 78 return 0;
79 } 79 }
80 80
81 static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, 81 static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
82 struct buffer_head *bh_result, int create) 82 struct buffer_head *bh_result, int create)
83 { 83 {
84 return gfs2_block_map(inode, lblock, bh_result, 0); 84 return gfs2_block_map(inode, lblock, bh_result, 0);
85 } 85 }
86 86
87 /** 87 /**
88 * gfs2_writepage_common - Common bits of writepage 88 * gfs2_writepage_common - Common bits of writepage
89 * @page: The page to be written 89 * @page: The page to be written
90 * @wbc: The writeback control 90 * @wbc: The writeback control
91 * 91 *
92 * Returns: 1 if writepage is ok, otherwise an error code or zero if no error. 92 * Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
93 */ 93 */
94 94
95 static int gfs2_writepage_common(struct page *page, 95 static int gfs2_writepage_common(struct page *page,
96 struct writeback_control *wbc) 96 struct writeback_control *wbc)
97 { 97 {
98 struct inode *inode = page->mapping->host; 98 struct inode *inode = page->mapping->host;
99 struct gfs2_inode *ip = GFS2_I(inode); 99 struct gfs2_inode *ip = GFS2_I(inode);
100 struct gfs2_sbd *sdp = GFS2_SB(inode); 100 struct gfs2_sbd *sdp = GFS2_SB(inode);
101 loff_t i_size = i_size_read(inode); 101 loff_t i_size = i_size_read(inode);
102 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 102 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
103 unsigned offset; 103 unsigned offset;
104 104
105 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) 105 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
106 goto out; 106 goto out;
107 if (current->journal_info) 107 if (current->journal_info)
108 goto redirty; 108 goto redirty;
109 /* Is the page fully outside i_size? (truncate in progress) */ 109 /* Is the page fully outside i_size? (truncate in progress) */
110 offset = i_size & (PAGE_CACHE_SIZE-1); 110 offset = i_size & (PAGE_CACHE_SIZE-1);
111 if (page->index > end_index || (page->index == end_index && !offset)) { 111 if (page->index > end_index || (page->index == end_index && !offset)) {
112 page->mapping->a_ops->invalidatepage(page, 0); 112 page->mapping->a_ops->invalidatepage(page, 0);
113 goto out; 113 goto out;
114 } 114 }
115 return 1; 115 return 1;
116 redirty: 116 redirty:
117 redirty_page_for_writepage(wbc, page); 117 redirty_page_for_writepage(wbc, page);
118 out: 118 out:
119 unlock_page(page); 119 unlock_page(page);
120 return 0; 120 return 0;
121 } 121 }
122 122
123 /** 123 /**
124 * gfs2_writeback_writepage - Write page for writeback mappings 124 * gfs2_writeback_writepage - Write page for writeback mappings
125 * @page: The page 125 * @page: The page
126 * @wbc: The writeback control 126 * @wbc: The writeback control
127 * 127 *
128 */ 128 */
129 129
130 static int gfs2_writeback_writepage(struct page *page, 130 static int gfs2_writeback_writepage(struct page *page,
131 struct writeback_control *wbc) 131 struct writeback_control *wbc)
132 { 132 {
133 int ret; 133 int ret;
134 134
135 ret = gfs2_writepage_common(page, wbc); 135 ret = gfs2_writepage_common(page, wbc);
136 if (ret <= 0) 136 if (ret <= 0)
137 return ret; 137 return ret;
138 138
139 return nobh_writepage(page, gfs2_get_block_noalloc, wbc); 139 return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
140 } 140 }
141 141
142 /** 142 /**
143 * gfs2_ordered_writepage - Write page for ordered data files 143 * gfs2_ordered_writepage - Write page for ordered data files
144 * @page: The page to write 144 * @page: The page to write
145 * @wbc: The writeback control 145 * @wbc: The writeback control
146 * 146 *
147 */ 147 */
148 148
149 static int gfs2_ordered_writepage(struct page *page, 149 static int gfs2_ordered_writepage(struct page *page,
150 struct writeback_control *wbc) 150 struct writeback_control *wbc)
151 { 151 {
152 struct inode *inode = page->mapping->host; 152 struct inode *inode = page->mapping->host;
153 struct gfs2_inode *ip = GFS2_I(inode); 153 struct gfs2_inode *ip = GFS2_I(inode);
154 int ret; 154 int ret;
155 155
156 ret = gfs2_writepage_common(page, wbc); 156 ret = gfs2_writepage_common(page, wbc);
157 if (ret <= 0) 157 if (ret <= 0)
158 return ret; 158 return ret;
159 159
160 if (!page_has_buffers(page)) { 160 if (!page_has_buffers(page)) {
161 create_empty_buffers(page, inode->i_sb->s_blocksize, 161 create_empty_buffers(page, inode->i_sb->s_blocksize,
162 (1 << BH_Dirty)|(1 << BH_Uptodate)); 162 (1 << BH_Dirty)|(1 << BH_Uptodate));
163 } 163 }
164 gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1); 164 gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1);
165 return block_write_full_page(page, gfs2_get_block_noalloc, wbc); 165 return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
166 } 166 }
167 167
168 /** 168 /**
169 * __gfs2_jdata_writepage - The core of jdata writepage 169 * __gfs2_jdata_writepage - The core of jdata writepage
170 * @page: The page to write 170 * @page: The page to write
171 * @wbc: The writeback control 171 * @wbc: The writeback control
172 * 172 *
173 * This is shared between writepage and writepages and implements the 173 * This is shared between writepage and writepages and implements the
174 * core of the writepage operation. If a transaction is required then 174 * core of the writepage operation. If a transaction is required then
175 * PageChecked will have been set and the transaction will have 175 * PageChecked will have been set and the transaction will have
176 * already been started before this is called. 176 * already been started before this is called.
177 */ 177 */
178 178
179 static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) 179 static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
180 { 180 {
181 struct inode *inode = page->mapping->host; 181 struct inode *inode = page->mapping->host;
182 struct gfs2_inode *ip = GFS2_I(inode); 182 struct gfs2_inode *ip = GFS2_I(inode);
183 struct gfs2_sbd *sdp = GFS2_SB(inode); 183 struct gfs2_sbd *sdp = GFS2_SB(inode);
184 184
185 if (PageChecked(page)) { 185 if (PageChecked(page)) {
186 ClearPageChecked(page); 186 ClearPageChecked(page);
187 if (!page_has_buffers(page)) { 187 if (!page_has_buffers(page)) {
188 create_empty_buffers(page, inode->i_sb->s_blocksize, 188 create_empty_buffers(page, inode->i_sb->s_blocksize,
189 (1 << BH_Dirty)|(1 << BH_Uptodate)); 189 (1 << BH_Dirty)|(1 << BH_Uptodate));
190 } 190 }
191 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); 191 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
192 } 192 }
193 return block_write_full_page(page, gfs2_get_block_noalloc, wbc); 193 return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
194 } 194 }
195 195
196 /** 196 /**
197 * gfs2_jdata_writepage - Write complete page 197 * gfs2_jdata_writepage - Write complete page
198 * @page: Page to write 198 * @page: Page to write
199 * 199 *
200 * Returns: errno 200 * Returns: errno
201 * 201 *
202 */ 202 */
203 203
204 static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) 204 static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
205 { 205 {
206 struct inode *inode = page->mapping->host; 206 struct inode *inode = page->mapping->host;
207 struct gfs2_sbd *sdp = GFS2_SB(inode); 207 struct gfs2_sbd *sdp = GFS2_SB(inode);
208 int ret; 208 int ret;
209 int done_trans = 0; 209 int done_trans = 0;
210 210
211 if (PageChecked(page)) { 211 if (PageChecked(page)) {
212 if (wbc->sync_mode != WB_SYNC_ALL) 212 if (wbc->sync_mode != WB_SYNC_ALL)
213 goto out_ignore; 213 goto out_ignore;
214 ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); 214 ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
215 if (ret) 215 if (ret)
216 goto out_ignore; 216 goto out_ignore;
217 done_trans = 1; 217 done_trans = 1;
218 } 218 }
219 ret = gfs2_writepage_common(page, wbc); 219 ret = gfs2_writepage_common(page, wbc);
220 if (ret > 0) 220 if (ret > 0)
221 ret = __gfs2_jdata_writepage(page, wbc); 221 ret = __gfs2_jdata_writepage(page, wbc);
222 if (done_trans) 222 if (done_trans)
223 gfs2_trans_end(sdp); 223 gfs2_trans_end(sdp);
224 return ret; 224 return ret;
225 225
226 out_ignore: 226 out_ignore:
227 redirty_page_for_writepage(wbc, page); 227 redirty_page_for_writepage(wbc, page);
228 unlock_page(page); 228 unlock_page(page);
229 return 0; 229 return 0;
230 } 230 }
231 231
232 /** 232 /**
233 * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk 233 * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk
234 * @mapping: The mapping to write 234 * @mapping: The mapping to write
235 * @wbc: Write-back control 235 * @wbc: Write-back control
236 * 236 *
237 * For the data=writeback case we can already ignore buffer heads 237 * For the data=writeback case we can already ignore buffer heads
238 * and write whole extents at once. This is a big reduction in the 238 * and write whole extents at once. This is a big reduction in the
239 * number of I/O requests we send and the bmap calls we make in this case. 239 * number of I/O requests we send and the bmap calls we make in this case.
240 */ 240 */
241 static int gfs2_writeback_writepages(struct address_space *mapping, 241 static int gfs2_writeback_writepages(struct address_space *mapping,
242 struct writeback_control *wbc) 242 struct writeback_control *wbc)
243 { 243 {
244 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); 244 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
245 } 245 }
246 246
247 /** 247 /**
248 * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages 248 * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
249 * @mapping: The mapping 249 * @mapping: The mapping
250 * @wbc: The writeback control 250 * @wbc: The writeback control
251 * @writepage: The writepage function to call for each page 251 * @writepage: The writepage function to call for each page
252 * @pvec: The vector of pages 252 * @pvec: The vector of pages
253 * @nr_pages: The number of pages to write 253 * @nr_pages: The number of pages to write
254 * 254 *
255 * Returns: non-zero if loop should terminate, zero otherwise 255 * Returns: non-zero if loop should terminate, zero otherwise
256 */ 256 */
257 257
258 static int gfs2_write_jdata_pagevec(struct address_space *mapping, 258 static int gfs2_write_jdata_pagevec(struct address_space *mapping,
259 struct writeback_control *wbc, 259 struct writeback_control *wbc,
260 struct pagevec *pvec, 260 struct pagevec *pvec,
261 int nr_pages, pgoff_t end) 261 int nr_pages, pgoff_t end)
262 { 262 {
263 struct inode *inode = mapping->host; 263 struct inode *inode = mapping->host;
264 struct gfs2_sbd *sdp = GFS2_SB(inode); 264 struct gfs2_sbd *sdp = GFS2_SB(inode);
265 loff_t i_size = i_size_read(inode); 265 loff_t i_size = i_size_read(inode);
266 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 266 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
267 unsigned offset = i_size & (PAGE_CACHE_SIZE-1); 267 unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
268 unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); 268 unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
269 int i; 269 int i;
270 int ret; 270 int ret;
271 271
272 ret = gfs2_trans_begin(sdp, nrblocks, nrblocks); 272 ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
273 if (ret < 0) 273 if (ret < 0)
274 return ret; 274 return ret;
275 275
276 for(i = 0; i < nr_pages; i++) { 276 for(i = 0; i < nr_pages; i++) {
277 struct page *page = pvec->pages[i]; 277 struct page *page = pvec->pages[i];
278 278
279 lock_page(page); 279 lock_page(page);
280 280
281 if (unlikely(page->mapping != mapping)) { 281 if (unlikely(page->mapping != mapping)) {
282 unlock_page(page); 282 unlock_page(page);
283 continue; 283 continue;
284 } 284 }
285 285
286 if (!wbc->range_cyclic && page->index > end) { 286 if (!wbc->range_cyclic && page->index > end) {
287 ret = 1; 287 ret = 1;
288 unlock_page(page); 288 unlock_page(page);
289 continue; 289 continue;
290 } 290 }
291 291
292 if (wbc->sync_mode != WB_SYNC_NONE) 292 if (wbc->sync_mode != WB_SYNC_NONE)
293 wait_on_page_writeback(page); 293 wait_on_page_writeback(page);
294 294
295 if (PageWriteback(page) || 295 if (PageWriteback(page) ||
296 !clear_page_dirty_for_io(page)) { 296 !clear_page_dirty_for_io(page)) {
297 unlock_page(page); 297 unlock_page(page);
298 continue; 298 continue;
299 } 299 }
300 300
301 /* Is the page fully outside i_size? (truncate in progress) */ 301 /* Is the page fully outside i_size? (truncate in progress) */
302 if (page->index > end_index || (page->index == end_index && !offset)) { 302 if (page->index > end_index || (page->index == end_index && !offset)) {
303 page->mapping->a_ops->invalidatepage(page, 0); 303 page->mapping->a_ops->invalidatepage(page, 0);
304 unlock_page(page); 304 unlock_page(page);
305 continue; 305 continue;
306 } 306 }
307 307
308 ret = __gfs2_jdata_writepage(page, wbc); 308 ret = __gfs2_jdata_writepage(page, wbc);
309 309
310 if (ret || (--(wbc->nr_to_write) <= 0)) 310 if (ret || (--(wbc->nr_to_write) <= 0))
311 ret = 1; 311 ret = 1;
312 } 312 }
313 gfs2_trans_end(sdp); 313 gfs2_trans_end(sdp);
314 return ret; 314 return ret;
315 } 315 }
316 316
317 /** 317 /**
318 * gfs2_write_cache_jdata - Like write_cache_pages but different 318 * gfs2_write_cache_jdata - Like write_cache_pages but different
319 * @mapping: The mapping to write 319 * @mapping: The mapping to write
320 * @wbc: The writeback control 320 * @wbc: The writeback control
321 * @writepage: The writepage function to call 321 * @writepage: The writepage function to call
322 * @data: The data to pass to writepage 322 * @data: The data to pass to writepage
323 * 323 *
324 * The reason that we use our own function here is that we need to 324 * The reason that we use our own function here is that we need to
325 * start transactions before we grab page locks. This allows us 325 * start transactions before we grab page locks. This allows us
326 * to get the ordering right. 326 * to get the ordering right.
327 */ 327 */
328 328
329 static int gfs2_write_cache_jdata(struct address_space *mapping, 329 static int gfs2_write_cache_jdata(struct address_space *mapping,
330 struct writeback_control *wbc) 330 struct writeback_control *wbc)
331 { 331 {
332 int ret = 0; 332 int ret = 0;
333 int done = 0; 333 int done = 0;
334 struct pagevec pvec; 334 struct pagevec pvec;
335 int nr_pages; 335 int nr_pages;
336 pgoff_t index; 336 pgoff_t index;
337 pgoff_t end; 337 pgoff_t end;
338 int scanned = 0; 338 int scanned = 0;
339 int range_whole = 0; 339 int range_whole = 0;
340 340
341 pagevec_init(&pvec, 0); 341 pagevec_init(&pvec, 0);
342 if (wbc->range_cyclic) { 342 if (wbc->range_cyclic) {
343 index = mapping->writeback_index; /* Start from prev offset */ 343 index = mapping->writeback_index; /* Start from prev offset */
344 end = -1; 344 end = -1;
345 } else { 345 } else {
346 index = wbc->range_start >> PAGE_CACHE_SHIFT; 346 index = wbc->range_start >> PAGE_CACHE_SHIFT;
347 end = wbc->range_end >> PAGE_CACHE_SHIFT; 347 end = wbc->range_end >> PAGE_CACHE_SHIFT;
348 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 348 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
349 range_whole = 1; 349 range_whole = 1;
350 scanned = 1; 350 scanned = 1;
351 } 351 }
352 352
353 retry: 353 retry:
354 while (!done && (index <= end) && 354 while (!done && (index <= end) &&
355 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 355 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
356 PAGECACHE_TAG_DIRTY, 356 PAGECACHE_TAG_DIRTY,
357 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { 357 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
358 scanned = 1; 358 scanned = 1;
359 ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end); 359 ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end);
360 if (ret) 360 if (ret)
361 done = 1; 361 done = 1;
362 if (ret > 0) 362 if (ret > 0)
363 ret = 0; 363 ret = 0;
364 364
365 pagevec_release(&pvec); 365 pagevec_release(&pvec);
366 cond_resched(); 366 cond_resched();
367 } 367 }
368 368
369 if (!scanned && !done) { 369 if (!scanned && !done) {
370 /* 370 /*
371 * We hit the last page and there is more work to be done: wrap 371 * We hit the last page and there is more work to be done: wrap
372 * back to the start of the file 372 * back to the start of the file
373 */ 373 */
374 scanned = 1; 374 scanned = 1;
375 index = 0; 375 index = 0;
376 goto retry; 376 goto retry;
377 } 377 }
378 378
379 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 379 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
380 mapping->writeback_index = index; 380 mapping->writeback_index = index;
381 return ret; 381 return ret;
382 } 382 }
383 383
384 384
385 /** 385 /**
386 * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk 386 * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
387 * @mapping: The mapping to write 387 * @mapping: The mapping to write
388 * @wbc: The writeback control 388 * @wbc: The writeback control
389 * 389 *
390 */ 390 */
391 391
392 static int gfs2_jdata_writepages(struct address_space *mapping, 392 static int gfs2_jdata_writepages(struct address_space *mapping,
393 struct writeback_control *wbc) 393 struct writeback_control *wbc)
394 { 394 {
395 struct gfs2_inode *ip = GFS2_I(mapping->host); 395 struct gfs2_inode *ip = GFS2_I(mapping->host);
396 struct gfs2_sbd *sdp = GFS2_SB(mapping->host); 396 struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
397 int ret; 397 int ret;
398 398
399 ret = gfs2_write_cache_jdata(mapping, wbc); 399 ret = gfs2_write_cache_jdata(mapping, wbc);
400 if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) { 400 if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
401 gfs2_log_flush(sdp, ip->i_gl); 401 gfs2_log_flush(sdp, ip->i_gl);
402 ret = gfs2_write_cache_jdata(mapping, wbc); 402 ret = gfs2_write_cache_jdata(mapping, wbc);
403 } 403 }
404 return ret; 404 return ret;
405 } 405 }
406 406
407 /** 407 /**
408 * stuffed_readpage - Fill in a Linux page with stuffed file data 408 * stuffed_readpage - Fill in a Linux page with stuffed file data
409 * @ip: the inode 409 * @ip: the inode
410 * @page: the page 410 * @page: the page
411 * 411 *
412 * Returns: errno 412 * Returns: errno
413 */ 413 */
414 414
415 static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) 415 static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
416 { 416 {
417 struct buffer_head *dibh; 417 struct buffer_head *dibh;
418 u64 dsize = i_size_read(&ip->i_inode); 418 u64 dsize = i_size_read(&ip->i_inode);
419 void *kaddr; 419 void *kaddr;
420 int error; 420 int error;
421 421
422 /* 422 /*
423 * Due to the order of unstuffing files and ->fault(), we can be 423 * Due to the order of unstuffing files and ->fault(), we can be
424 * asked for a zero page in the case of a stuffed file being extended, 424 * asked for a zero page in the case of a stuffed file being extended,
425 * so we need to supply one here. It doesn't happen often. 425 * so we need to supply one here. It doesn't happen often.
426 */ 426 */
427 if (unlikely(page->index)) { 427 if (unlikely(page->index)) {
428 zero_user(page, 0, PAGE_CACHE_SIZE); 428 zero_user(page, 0, PAGE_CACHE_SIZE);
429 SetPageUptodate(page); 429 SetPageUptodate(page);
430 return 0; 430 return 0;
431 } 431 }
432 432
433 error = gfs2_meta_inode_buffer(ip, &dibh); 433 error = gfs2_meta_inode_buffer(ip, &dibh);
434 if (error) 434 if (error)
435 return error; 435 return error;
436 436
437 kaddr = kmap_atomic(page); 437 kaddr = kmap_atomic(page);
438 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode))) 438 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
439 dsize = (dibh->b_size - sizeof(struct gfs2_dinode)); 439 dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
440 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); 440 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
441 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize); 441 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
442 kunmap_atomic(kaddr); 442 kunmap_atomic(kaddr);
443 flush_dcache_page(page); 443 flush_dcache_page(page);
444 brelse(dibh); 444 brelse(dibh);
445 SetPageUptodate(page); 445 SetPageUptodate(page);
446 446
447 return 0; 447 return 0;
448 } 448 }
449 449
450 450
451 /** 451 /**
452 * __gfs2_readpage - readpage 452 * __gfs2_readpage - readpage
453 * @file: The file to read a page for 453 * @file: The file to read a page for
454 * @page: The page to read 454 * @page: The page to read
455 * 455 *
456 * This is the core of gfs2's readpage. Its used by the internal file 456 * This is the core of gfs2's readpage. Its used by the internal file
457 * reading code as in that case we already hold the glock. Also its 457 * reading code as in that case we already hold the glock. Also its
458 * called by gfs2_readpage() once the required lock has been granted. 458 * called by gfs2_readpage() once the required lock has been granted.
459 * 459 *
460 */ 460 */
461 461
462 static int __gfs2_readpage(void *file, struct page *page) 462 static int __gfs2_readpage(void *file, struct page *page)
463 { 463 {
464 struct gfs2_inode *ip = GFS2_I(page->mapping->host); 464 struct gfs2_inode *ip = GFS2_I(page->mapping->host);
465 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); 465 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
466 int error; 466 int error;
467 467
468 if (gfs2_is_stuffed(ip)) { 468 if (gfs2_is_stuffed(ip)) {
469 error = stuffed_readpage(ip, page); 469 error = stuffed_readpage(ip, page);
470 unlock_page(page); 470 unlock_page(page);
471 } else { 471 } else {
472 error = mpage_readpage(page, gfs2_block_map); 472 error = mpage_readpage(page, gfs2_block_map);
473 } 473 }
474 474
475 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 475 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
476 return -EIO; 476 return -EIO;
477 477
478 return error; 478 return error;
479 } 479 }
480 480
481 /** 481 /**
482 * gfs2_readpage - read a page of a file 482 * gfs2_readpage - read a page of a file
483 * @file: The file to read 483 * @file: The file to read
484 * @page: The page of the file 484 * @page: The page of the file
485 * 485 *
486 * This deals with the locking required. We have to unlock and 486 * This deals with the locking required. We have to unlock and
487 * relock the page in order to get the locking in the right 487 * relock the page in order to get the locking in the right
488 * order. 488 * order.
489 */ 489 */
490 490
491 static int gfs2_readpage(struct file *file, struct page *page) 491 static int gfs2_readpage(struct file *file, struct page *page)
492 { 492 {
493 struct address_space *mapping = page->mapping; 493 struct address_space *mapping = page->mapping;
494 struct gfs2_inode *ip = GFS2_I(mapping->host); 494 struct gfs2_inode *ip = GFS2_I(mapping->host);
495 struct gfs2_holder gh; 495 struct gfs2_holder gh;
496 int error; 496 int error;
497 497
498 unlock_page(page); 498 unlock_page(page);
499 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); 499 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
500 error = gfs2_glock_nq(&gh); 500 error = gfs2_glock_nq(&gh);
501 if (unlikely(error)) 501 if (unlikely(error))
502 goto out; 502 goto out;
503 error = AOP_TRUNCATED_PAGE; 503 error = AOP_TRUNCATED_PAGE;
504 lock_page(page); 504 lock_page(page);
505 if (page->mapping == mapping && !PageUptodate(page)) 505 if (page->mapping == mapping && !PageUptodate(page))
506 error = __gfs2_readpage(file, page); 506 error = __gfs2_readpage(file, page);
507 else 507 else
508 unlock_page(page); 508 unlock_page(page);
509 gfs2_glock_dq(&gh); 509 gfs2_glock_dq(&gh);
510 out: 510 out:
511 gfs2_holder_uninit(&gh); 511 gfs2_holder_uninit(&gh);
512 if (error && error != AOP_TRUNCATED_PAGE) 512 if (error && error != AOP_TRUNCATED_PAGE)
513 lock_page(page); 513 lock_page(page);
514 return error; 514 return error;
515 } 515 }
516 516
517 /** 517 /**
518 * gfs2_internal_read - read an internal file 518 * gfs2_internal_read - read an internal file
519 * @ip: The gfs2 inode 519 * @ip: The gfs2 inode
520 * @buf: The buffer to fill 520 * @buf: The buffer to fill
521 * @pos: The file position 521 * @pos: The file position
522 * @size: The amount to read 522 * @size: The amount to read
523 * 523 *
524 */ 524 */
525 525
526 int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, 526 int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
527 unsigned size) 527 unsigned size)
528 { 528 {
529 struct address_space *mapping = ip->i_inode.i_mapping; 529 struct address_space *mapping = ip->i_inode.i_mapping;
530 unsigned long index = *pos / PAGE_CACHE_SIZE; 530 unsigned long index = *pos / PAGE_CACHE_SIZE;
531 unsigned offset = *pos & (PAGE_CACHE_SIZE - 1); 531 unsigned offset = *pos & (PAGE_CACHE_SIZE - 1);
532 unsigned copied = 0; 532 unsigned copied = 0;
533 unsigned amt; 533 unsigned amt;
534 struct page *page; 534 struct page *page;
535 void *p; 535 void *p;
536 536
537 do { 537 do {
538 amt = size - copied; 538 amt = size - copied;
539 if (offset + size > PAGE_CACHE_SIZE) 539 if (offset + size > PAGE_CACHE_SIZE)
540 amt = PAGE_CACHE_SIZE - offset; 540 amt = PAGE_CACHE_SIZE - offset;
541 page = read_cache_page(mapping, index, __gfs2_readpage, NULL); 541 page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
542 if (IS_ERR(page)) 542 if (IS_ERR(page))
543 return PTR_ERR(page); 543 return PTR_ERR(page);
544 p = kmap_atomic(page); 544 p = kmap_atomic(page);
545 memcpy(buf + copied, p + offset, amt); 545 memcpy(buf + copied, p + offset, amt);
546 kunmap_atomic(p); 546 kunmap_atomic(p);
547 mark_page_accessed(page); 547 mark_page_accessed(page);
548 page_cache_release(page); 548 page_cache_release(page);
549 copied += amt; 549 copied += amt;
550 index++; 550 index++;
551 offset = 0; 551 offset = 0;
552 } while(copied < size); 552 } while(copied < size);
553 (*pos) += size; 553 (*pos) += size;
554 return size; 554 return size;
555 } 555 }
556 556
557 /** 557 /**
558 * gfs2_readpages - Read a bunch of pages at once 558 * gfs2_readpages - Read a bunch of pages at once
559 * 559 *
560 * Some notes: 560 * Some notes:
561 * 1. This is only for readahead, so we can simply ignore any things 561 * 1. This is only for readahead, so we can simply ignore any things
562 * which are slightly inconvenient (such as locking conflicts between 562 * which are slightly inconvenient (such as locking conflicts between
563 * the page lock and the glock) and return having done no I/O. Its 563 * the page lock and the glock) and return having done no I/O. Its
564 * obviously not something we'd want to do on too regular a basis. 564 * obviously not something we'd want to do on too regular a basis.
565 * Any I/O we ignore at this time will be done via readpage later. 565 * Any I/O we ignore at this time will be done via readpage later.
566 * 2. We don't handle stuffed files here we let readpage do the honours. 566 * 2. We don't handle stuffed files here we let readpage do the honours.
567 * 3. mpage_readpages() does most of the heavy lifting in the common case. 567 * 3. mpage_readpages() does most of the heavy lifting in the common case.
568 * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places. 568 * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places.
569 */ 569 */
570 570
571 static int gfs2_readpages(struct file *file, struct address_space *mapping, 571 static int gfs2_readpages(struct file *file, struct address_space *mapping,
572 struct list_head *pages, unsigned nr_pages) 572 struct list_head *pages, unsigned nr_pages)
573 { 573 {
574 struct inode *inode = mapping->host; 574 struct inode *inode = mapping->host;
575 struct gfs2_inode *ip = GFS2_I(inode); 575 struct gfs2_inode *ip = GFS2_I(inode);
576 struct gfs2_sbd *sdp = GFS2_SB(inode); 576 struct gfs2_sbd *sdp = GFS2_SB(inode);
577 struct gfs2_holder gh; 577 struct gfs2_holder gh;
578 int ret; 578 int ret;
579 579
580 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); 580 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
581 ret = gfs2_glock_nq(&gh); 581 ret = gfs2_glock_nq(&gh);
582 if (unlikely(ret)) 582 if (unlikely(ret))
583 goto out_uninit; 583 goto out_uninit;
584 if (!gfs2_is_stuffed(ip)) 584 if (!gfs2_is_stuffed(ip))
585 ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map); 585 ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map);
586 gfs2_glock_dq(&gh); 586 gfs2_glock_dq(&gh);
587 out_uninit: 587 out_uninit:
588 gfs2_holder_uninit(&gh); 588 gfs2_holder_uninit(&gh);
589 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 589 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
590 ret = -EIO; 590 ret = -EIO;
591 return ret; 591 return ret;
592 } 592 }
593 593
594 /** 594 /**
595 * gfs2_write_begin - Begin to write to a file 595 * gfs2_write_begin - Begin to write to a file
596 * @file: The file to write to 596 * @file: The file to write to
597 * @mapping: The mapping in which to write 597 * @mapping: The mapping in which to write
598 * @pos: The file offset at which to start writing 598 * @pos: The file offset at which to start writing
599 * @len: Length of the write 599 * @len: Length of the write
600 * @flags: Various flags 600 * @flags: Various flags
601 * @pagep: Pointer to return the page 601 * @pagep: Pointer to return the page
602 * @fsdata: Pointer to return fs data (unused by GFS2) 602 * @fsdata: Pointer to return fs data (unused by GFS2)
603 * 603 *
604 * Returns: errno 604 * Returns: errno
605 */ 605 */
606 606
607 static int gfs2_write_begin(struct file *file, struct address_space *mapping, 607 static int gfs2_write_begin(struct file *file, struct address_space *mapping,
608 loff_t pos, unsigned len, unsigned flags, 608 loff_t pos, unsigned len, unsigned flags,
609 struct page **pagep, void **fsdata) 609 struct page **pagep, void **fsdata)
610 { 610 {
611 struct gfs2_inode *ip = GFS2_I(mapping->host); 611 struct gfs2_inode *ip = GFS2_I(mapping->host);
612 struct gfs2_sbd *sdp = GFS2_SB(mapping->host); 612 struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
613 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 613 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
614 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 614 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
615 unsigned requested = 0; 615 unsigned requested = 0;
616 int alloc_required; 616 int alloc_required;
617 int error = 0; 617 int error = 0;
618 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 618 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
619 unsigned from = pos & (PAGE_CACHE_SIZE - 1); 619 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
620 struct page *page; 620 struct page *page;
621 621
622 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 622 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
623 error = gfs2_glock_nq(&ip->i_gh); 623 error = gfs2_glock_nq(&ip->i_gh);
624 if (unlikely(error)) 624 if (unlikely(error))
625 goto out_uninit; 625 goto out_uninit;
626 if (&ip->i_inode == sdp->sd_rindex) { 626 if (&ip->i_inode == sdp->sd_rindex) {
627 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 627 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
628 GL_NOCACHE, &m_ip->i_gh); 628 GL_NOCACHE, &m_ip->i_gh);
629 if (unlikely(error)) { 629 if (unlikely(error)) {
630 gfs2_glock_dq(&ip->i_gh); 630 gfs2_glock_dq(&ip->i_gh);
631 goto out_uninit; 631 goto out_uninit;
632 } 632 }
633 } 633 }
634 634
635 alloc_required = gfs2_write_alloc_required(ip, pos, len); 635 alloc_required = gfs2_write_alloc_required(ip, pos, len);
636 636
637 if (alloc_required || gfs2_is_jdata(ip)) 637 if (alloc_required || gfs2_is_jdata(ip))
638 gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); 638 gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
639 639
640 if (alloc_required) { 640 if (alloc_required) {
641 error = gfs2_quota_lock_check(ip); 641 error = gfs2_quota_lock_check(ip);
642 if (error) 642 if (error)
643 goto out_unlock; 643 goto out_unlock;
644 644
645 requested = data_blocks + ind_blocks; 645 requested = data_blocks + ind_blocks;
646 error = gfs2_inplace_reserve(ip, requested); 646 error = gfs2_inplace_reserve(ip, requested, 0);
647 if (error) 647 if (error)
648 goto out_qunlock; 648 goto out_qunlock;
649 } 649 }
650 650
651 rblocks = RES_DINODE + ind_blocks; 651 rblocks = RES_DINODE + ind_blocks;
652 if (gfs2_is_jdata(ip)) 652 if (gfs2_is_jdata(ip))
653 rblocks += data_blocks ? data_blocks : 1; 653 rblocks += data_blocks ? data_blocks : 1;
654 if (ind_blocks || data_blocks) 654 if (ind_blocks || data_blocks)
655 rblocks += RES_STATFS + RES_QUOTA; 655 rblocks += RES_STATFS + RES_QUOTA;
656 if (&ip->i_inode == sdp->sd_rindex) 656 if (&ip->i_inode == sdp->sd_rindex)
657 rblocks += 2 * RES_STATFS; 657 rblocks += 2 * RES_STATFS;
658 if (alloc_required) 658 if (alloc_required)
659 rblocks += gfs2_rg_blocks(ip, requested); 659 rblocks += gfs2_rg_blocks(ip, requested);
660 660
661 error = gfs2_trans_begin(sdp, rblocks, 661 error = gfs2_trans_begin(sdp, rblocks,
662 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); 662 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
663 if (error) 663 if (error)
664 goto out_trans_fail; 664 goto out_trans_fail;
665 665
666 error = -ENOMEM; 666 error = -ENOMEM;
667 flags |= AOP_FLAG_NOFS; 667 flags |= AOP_FLAG_NOFS;
668 page = grab_cache_page_write_begin(mapping, index, flags); 668 page = grab_cache_page_write_begin(mapping, index, flags);
669 *pagep = page; 669 *pagep = page;
670 if (unlikely(!page)) 670 if (unlikely(!page))
671 goto out_endtrans; 671 goto out_endtrans;
672 672
673 if (gfs2_is_stuffed(ip)) { 673 if (gfs2_is_stuffed(ip)) {
674 error = 0; 674 error = 0;
675 if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { 675 if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
676 error = gfs2_unstuff_dinode(ip, page); 676 error = gfs2_unstuff_dinode(ip, page);
677 if (error == 0) 677 if (error == 0)
678 goto prepare_write; 678 goto prepare_write;
679 } else if (!PageUptodate(page)) { 679 } else if (!PageUptodate(page)) {
680 error = stuffed_readpage(ip, page); 680 error = stuffed_readpage(ip, page);
681 } 681 }
682 goto out; 682 goto out;
683 } 683 }
684 684
685 prepare_write: 685 prepare_write:
686 error = __block_write_begin(page, from, len, gfs2_block_map); 686 error = __block_write_begin(page, from, len, gfs2_block_map);
687 out: 687 out:
688 if (error == 0) 688 if (error == 0)
689 return 0; 689 return 0;
690 690
691 unlock_page(page); 691 unlock_page(page);
692 page_cache_release(page); 692 page_cache_release(page);
693 693
694 gfs2_trans_end(sdp); 694 gfs2_trans_end(sdp);
695 if (pos + len > ip->i_inode.i_size) 695 if (pos + len > ip->i_inode.i_size)
696 gfs2_trim_blocks(&ip->i_inode); 696 gfs2_trim_blocks(&ip->i_inode);
697 goto out_trans_fail; 697 goto out_trans_fail;
698 698
699 out_endtrans: 699 out_endtrans:
700 gfs2_trans_end(sdp); 700 gfs2_trans_end(sdp);
701 out_trans_fail: 701 out_trans_fail:
702 if (alloc_required) { 702 if (alloc_required) {
703 gfs2_inplace_release(ip); 703 gfs2_inplace_release(ip);
704 out_qunlock: 704 out_qunlock:
705 gfs2_quota_unlock(ip); 705 gfs2_quota_unlock(ip);
706 } 706 }
707 out_unlock: 707 out_unlock:
708 if (&ip->i_inode == sdp->sd_rindex) { 708 if (&ip->i_inode == sdp->sd_rindex) {
709 gfs2_glock_dq(&m_ip->i_gh); 709 gfs2_glock_dq(&m_ip->i_gh);
710 gfs2_holder_uninit(&m_ip->i_gh); 710 gfs2_holder_uninit(&m_ip->i_gh);
711 } 711 }
712 gfs2_glock_dq(&ip->i_gh); 712 gfs2_glock_dq(&ip->i_gh);
713 out_uninit: 713 out_uninit:
714 gfs2_holder_uninit(&ip->i_gh); 714 gfs2_holder_uninit(&ip->i_gh);
715 return error; 715 return error;
716 } 716 }
717 717
718 /** 718 /**
719 * adjust_fs_space - Adjusts the free space available due to gfs2_grow 719 * adjust_fs_space - Adjusts the free space available due to gfs2_grow
720 * @inode: the rindex inode 720 * @inode: the rindex inode
721 */ 721 */
722 static void adjust_fs_space(struct inode *inode) 722 static void adjust_fs_space(struct inode *inode)
723 { 723 {
724 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 724 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
725 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 725 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
726 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); 726 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
727 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; 727 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
728 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; 728 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
729 struct buffer_head *m_bh, *l_bh; 729 struct buffer_head *m_bh, *l_bh;
730 u64 fs_total, new_free; 730 u64 fs_total, new_free;
731 731
732 /* Total up the file system space, according to the latest rindex. */ 732 /* Total up the file system space, according to the latest rindex. */
733 fs_total = gfs2_ri_total(sdp); 733 fs_total = gfs2_ri_total(sdp);
734 if (gfs2_meta_inode_buffer(m_ip, &m_bh) != 0) 734 if (gfs2_meta_inode_buffer(m_ip, &m_bh) != 0)
735 return; 735 return;
736 736
737 spin_lock(&sdp->sd_statfs_spin); 737 spin_lock(&sdp->sd_statfs_spin);
738 gfs2_statfs_change_in(m_sc, m_bh->b_data + 738 gfs2_statfs_change_in(m_sc, m_bh->b_data +
739 sizeof(struct gfs2_dinode)); 739 sizeof(struct gfs2_dinode));
740 if (fs_total > (m_sc->sc_total + l_sc->sc_total)) 740 if (fs_total > (m_sc->sc_total + l_sc->sc_total))
741 new_free = fs_total - (m_sc->sc_total + l_sc->sc_total); 741 new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
742 else 742 else
743 new_free = 0; 743 new_free = 0;
744 spin_unlock(&sdp->sd_statfs_spin); 744 spin_unlock(&sdp->sd_statfs_spin);
745 fs_warn(sdp, "File system extended by %llu blocks.\n", 745 fs_warn(sdp, "File system extended by %llu blocks.\n",
746 (unsigned long long)new_free); 746 (unsigned long long)new_free);
747 gfs2_statfs_change(sdp, new_free, new_free, 0); 747 gfs2_statfs_change(sdp, new_free, new_free, 0);
748 748
749 if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0) 749 if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0)
750 goto out; 750 goto out;
751 update_statfs(sdp, m_bh, l_bh); 751 update_statfs(sdp, m_bh, l_bh);
752 brelse(l_bh); 752 brelse(l_bh);
753 out: 753 out:
754 brelse(m_bh); 754 brelse(m_bh);
755 } 755 }
756 756
757 /** 757 /**
758 * gfs2_stuffed_write_end - Write end for stuffed files 758 * gfs2_stuffed_write_end - Write end for stuffed files
759 * @inode: The inode 759 * @inode: The inode
760 * @dibh: The buffer_head containing the on-disk inode 760 * @dibh: The buffer_head containing the on-disk inode
761 * @pos: The file position 761 * @pos: The file position
762 * @len: The length of the write 762 * @len: The length of the write
763 * @copied: How much was actually copied by the VFS 763 * @copied: How much was actually copied by the VFS
764 * @page: The page 764 * @page: The page
765 * 765 *
766 * This copies the data from the page into the inode block after 766 * This copies the data from the page into the inode block after
767 * the inode data structure itself. 767 * the inode data structure itself.
768 * 768 *
769 * Returns: errno 769 * Returns: errno
770 */ 770 */
771 static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, 771 static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
772 loff_t pos, unsigned len, unsigned copied, 772 loff_t pos, unsigned len, unsigned copied,
773 struct page *page) 773 struct page *page)
774 { 774 {
775 struct gfs2_inode *ip = GFS2_I(inode); 775 struct gfs2_inode *ip = GFS2_I(inode);
776 struct gfs2_sbd *sdp = GFS2_SB(inode); 776 struct gfs2_sbd *sdp = GFS2_SB(inode);
777 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 777 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
778 u64 to = pos + copied; 778 u64 to = pos + copied;
779 void *kaddr; 779 void *kaddr;
780 unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); 780 unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
781 781
782 BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode))); 782 BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
783 kaddr = kmap_atomic(page); 783 kaddr = kmap_atomic(page);
784 memcpy(buf + pos, kaddr + pos, copied); 784 memcpy(buf + pos, kaddr + pos, copied);
785 memset(kaddr + pos + copied, 0, len - copied); 785 memset(kaddr + pos + copied, 0, len - copied);
786 flush_dcache_page(page); 786 flush_dcache_page(page);
787 kunmap_atomic(kaddr); 787 kunmap_atomic(kaddr);
788 788
789 if (!PageUptodate(page)) 789 if (!PageUptodate(page))
790 SetPageUptodate(page); 790 SetPageUptodate(page);
791 unlock_page(page); 791 unlock_page(page);
792 page_cache_release(page); 792 page_cache_release(page);
793 793
794 if (copied) { 794 if (copied) {
795 if (inode->i_size < to) 795 if (inode->i_size < to)
796 i_size_write(inode, to); 796 i_size_write(inode, to);
797 mark_inode_dirty(inode); 797 mark_inode_dirty(inode);
798 } 798 }
799 799
800 if (inode == sdp->sd_rindex) { 800 if (inode == sdp->sd_rindex) {
801 adjust_fs_space(inode); 801 adjust_fs_space(inode);
802 sdp->sd_rindex_uptodate = 0; 802 sdp->sd_rindex_uptodate = 0;
803 } 803 }
804 804
805 brelse(dibh); 805 brelse(dibh);
806 gfs2_trans_end(sdp); 806 gfs2_trans_end(sdp);
807 if (inode == sdp->sd_rindex) { 807 if (inode == sdp->sd_rindex) {
808 gfs2_glock_dq(&m_ip->i_gh); 808 gfs2_glock_dq(&m_ip->i_gh);
809 gfs2_holder_uninit(&m_ip->i_gh); 809 gfs2_holder_uninit(&m_ip->i_gh);
810 } 810 }
811 gfs2_glock_dq(&ip->i_gh); 811 gfs2_glock_dq(&ip->i_gh);
812 gfs2_holder_uninit(&ip->i_gh); 812 gfs2_holder_uninit(&ip->i_gh);
813 return copied; 813 return copied;
814 } 814 }
815 815
816 /** 816 /**
817 * gfs2_write_end 817 * gfs2_write_end
818 * @file: The file to write to 818 * @file: The file to write to
819 * @mapping: The address space to write to 819 * @mapping: The address space to write to
820 * @pos: The file position 820 * @pos: The file position
821 * @len: The length of the data 821 * @len: The length of the data
822 * @copied: 822 * @copied:
823 * @page: The page that has been written 823 * @page: The page that has been written
824 * @fsdata: The fsdata (unused in GFS2) 824 * @fsdata: The fsdata (unused in GFS2)
825 * 825 *
826 * The main write_end function for GFS2. We have a separate one for 826 * The main write_end function for GFS2. We have a separate one for
827 * stuffed files as they are slightly different, otherwise we just 827 * stuffed files as they are slightly different, otherwise we just
828 * put our locking around the VFS provided functions. 828 * put our locking around the VFS provided functions.
829 * 829 *
830 * Returns: errno 830 * Returns: errno
831 */ 831 */
832 832
833 static int gfs2_write_end(struct file *file, struct address_space *mapping, 833 static int gfs2_write_end(struct file *file, struct address_space *mapping,
834 loff_t pos, unsigned len, unsigned copied, 834 loff_t pos, unsigned len, unsigned copied,
835 struct page *page, void *fsdata) 835 struct page *page, void *fsdata)
836 { 836 {
837 struct inode *inode = page->mapping->host; 837 struct inode *inode = page->mapping->host;
838 struct gfs2_inode *ip = GFS2_I(inode); 838 struct gfs2_inode *ip = GFS2_I(inode);
839 struct gfs2_sbd *sdp = GFS2_SB(inode); 839 struct gfs2_sbd *sdp = GFS2_SB(inode);
840 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 840 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
841 struct buffer_head *dibh; 841 struct buffer_head *dibh;
842 unsigned int from = pos & (PAGE_CACHE_SIZE - 1); 842 unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
843 unsigned int to = from + len; 843 unsigned int to = from + len;
844 int ret; 844 int ret;
845 845
846 BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL); 846 BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
847 847
848 ret = gfs2_meta_inode_buffer(ip, &dibh); 848 ret = gfs2_meta_inode_buffer(ip, &dibh);
849 if (unlikely(ret)) { 849 if (unlikely(ret)) {
850 unlock_page(page); 850 unlock_page(page);
851 page_cache_release(page); 851 page_cache_release(page);
852 goto failed; 852 goto failed;
853 } 853 }
854 854
855 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 855 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
856 856
857 if (gfs2_is_stuffed(ip)) 857 if (gfs2_is_stuffed(ip))
858 return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); 858 return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
859 859
860 if (!gfs2_is_writeback(ip)) 860 if (!gfs2_is_writeback(ip))
861 gfs2_page_add_databufs(ip, page, from, to); 861 gfs2_page_add_databufs(ip, page, from, to);
862 862
863 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); 863 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
864 864
865 if (inode == sdp->sd_rindex) { 865 if (inode == sdp->sd_rindex) {
866 adjust_fs_space(inode); 866 adjust_fs_space(inode);
867 sdp->sd_rindex_uptodate = 0; 867 sdp->sd_rindex_uptodate = 0;
868 } 868 }
869 869
870 brelse(dibh); 870 brelse(dibh);
871 failed: 871 failed:
872 gfs2_trans_end(sdp); 872 gfs2_trans_end(sdp);
873 gfs2_inplace_release(ip); 873 gfs2_inplace_release(ip);
874 if (ip->i_res->rs_qa_qd_num) 874 if (ip->i_res->rs_qa_qd_num)
875 gfs2_quota_unlock(ip); 875 gfs2_quota_unlock(ip);
876 if (inode == sdp->sd_rindex) { 876 if (inode == sdp->sd_rindex) {
877 gfs2_glock_dq(&m_ip->i_gh); 877 gfs2_glock_dq(&m_ip->i_gh);
878 gfs2_holder_uninit(&m_ip->i_gh); 878 gfs2_holder_uninit(&m_ip->i_gh);
879 } 879 }
880 gfs2_glock_dq(&ip->i_gh); 880 gfs2_glock_dq(&ip->i_gh);
881 gfs2_holder_uninit(&ip->i_gh); 881 gfs2_holder_uninit(&ip->i_gh);
882 return ret; 882 return ret;
883 } 883 }
884 884
885 /** 885 /**
886 * gfs2_set_page_dirty - Page dirtying function 886 * gfs2_set_page_dirty - Page dirtying function
887 * @page: The page to dirty 887 * @page: The page to dirty
888 * 888 *
889 * Returns: 1 if it dirtyed the page, or 0 otherwise 889 * Returns: 1 if it dirtyed the page, or 0 otherwise
890 */ 890 */
891 891
892 static int gfs2_set_page_dirty(struct page *page) 892 static int gfs2_set_page_dirty(struct page *page)
893 { 893 {
894 SetPageChecked(page); 894 SetPageChecked(page);
895 return __set_page_dirty_buffers(page); 895 return __set_page_dirty_buffers(page);
896 } 896 }
897 897
898 /** 898 /**
899 * gfs2_bmap - Block map function 899 * gfs2_bmap - Block map function
900 * @mapping: Address space info 900 * @mapping: Address space info
901 * @lblock: The block to map 901 * @lblock: The block to map
902 * 902 *
903 * Returns: The disk address for the block or 0 on hole or error 903 * Returns: The disk address for the block or 0 on hole or error
904 */ 904 */
905 905
906 static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock) 906 static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
907 { 907 {
908 struct gfs2_inode *ip = GFS2_I(mapping->host); 908 struct gfs2_inode *ip = GFS2_I(mapping->host);
909 struct gfs2_holder i_gh; 909 struct gfs2_holder i_gh;
910 sector_t dblock = 0; 910 sector_t dblock = 0;
911 int error; 911 int error;
912 912
913 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 913 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
914 if (error) 914 if (error)
915 return 0; 915 return 0;
916 916
917 if (!gfs2_is_stuffed(ip)) 917 if (!gfs2_is_stuffed(ip))
918 dblock = generic_block_bmap(mapping, lblock, gfs2_block_map); 918 dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
919 919
920 gfs2_glock_dq_uninit(&i_gh); 920 gfs2_glock_dq_uninit(&i_gh);
921 921
922 return dblock; 922 return dblock;
923 } 923 }
924 924
925 static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh) 925 static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
926 { 926 {
927 struct gfs2_bufdata *bd; 927 struct gfs2_bufdata *bd;
928 928
929 lock_buffer(bh); 929 lock_buffer(bh);
930 gfs2_log_lock(sdp); 930 gfs2_log_lock(sdp);
931 clear_buffer_dirty(bh); 931 clear_buffer_dirty(bh);
932 bd = bh->b_private; 932 bd = bh->b_private;
933 if (bd) { 933 if (bd) {
934 if (!list_empty(&bd->bd_list) && !buffer_pinned(bh)) 934 if (!list_empty(&bd->bd_list) && !buffer_pinned(bh))
935 list_del_init(&bd->bd_list); 935 list_del_init(&bd->bd_list);
936 else 936 else
937 gfs2_remove_from_journal(bh, current->journal_info, 0); 937 gfs2_remove_from_journal(bh, current->journal_info, 0);
938 } 938 }
939 bh->b_bdev = NULL; 939 bh->b_bdev = NULL;
940 clear_buffer_mapped(bh); 940 clear_buffer_mapped(bh);
941 clear_buffer_req(bh); 941 clear_buffer_req(bh);
942 clear_buffer_new(bh); 942 clear_buffer_new(bh);
943 gfs2_log_unlock(sdp); 943 gfs2_log_unlock(sdp);
944 unlock_buffer(bh); 944 unlock_buffer(bh);
945 } 945 }
946 946
947 static void gfs2_invalidatepage(struct page *page, unsigned long offset) 947 static void gfs2_invalidatepage(struct page *page, unsigned long offset)
948 { 948 {
949 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); 949 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
950 struct buffer_head *bh, *head; 950 struct buffer_head *bh, *head;
951 unsigned long pos = 0; 951 unsigned long pos = 0;
952 952
953 BUG_ON(!PageLocked(page)); 953 BUG_ON(!PageLocked(page));
954 if (offset == 0) 954 if (offset == 0)
955 ClearPageChecked(page); 955 ClearPageChecked(page);
956 if (!page_has_buffers(page)) 956 if (!page_has_buffers(page))
957 goto out; 957 goto out;
958 958
959 bh = head = page_buffers(page); 959 bh = head = page_buffers(page);
960 do { 960 do {
961 if (offset <= pos) 961 if (offset <= pos)
962 gfs2_discard(sdp, bh); 962 gfs2_discard(sdp, bh);
963 pos += bh->b_size; 963 pos += bh->b_size;
964 bh = bh->b_this_page; 964 bh = bh->b_this_page;
965 } while (bh != head); 965 } while (bh != head);
966 out: 966 out:
967 if (offset == 0) 967 if (offset == 0)
968 try_to_release_page(page, 0); 968 try_to_release_page(page, 0);
969 } 969 }
970 970
971 /** 971 /**
972 * gfs2_ok_for_dio - check that dio is valid on this file 972 * gfs2_ok_for_dio - check that dio is valid on this file
973 * @ip: The inode 973 * @ip: The inode
974 * @rw: READ or WRITE 974 * @rw: READ or WRITE
975 * @offset: The offset at which we are reading or writing 975 * @offset: The offset at which we are reading or writing
976 * 976 *
977 * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) 977 * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
978 * 1 (to accept the i/o request) 978 * 1 (to accept the i/o request)
979 */ 979 */
980 static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) 980 static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
981 { 981 {
982 /* 982 /*
983 * Should we return an error here? I can't see that O_DIRECT for 983 * Should we return an error here? I can't see that O_DIRECT for
984 * a stuffed file makes any sense. For now we'll silently fall 984 * a stuffed file makes any sense. For now we'll silently fall
985 * back to buffered I/O 985 * back to buffered I/O
986 */ 986 */
987 if (gfs2_is_stuffed(ip)) 987 if (gfs2_is_stuffed(ip))
988 return 0; 988 return 0;
989 989
990 if (offset >= i_size_read(&ip->i_inode)) 990 if (offset >= i_size_read(&ip->i_inode))
991 return 0; 991 return 0;
992 return 1; 992 return 1;
993 } 993 }
994 994
995 995
996 996
997 static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, 997 static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
998 const struct iovec *iov, loff_t offset, 998 const struct iovec *iov, loff_t offset,
999 unsigned long nr_segs) 999 unsigned long nr_segs)
1000 { 1000 {
1001 struct file *file = iocb->ki_filp; 1001 struct file *file = iocb->ki_filp;
1002 struct inode *inode = file->f_mapping->host; 1002 struct inode *inode = file->f_mapping->host;
1003 struct gfs2_inode *ip = GFS2_I(inode); 1003 struct gfs2_inode *ip = GFS2_I(inode);
1004 struct gfs2_holder gh; 1004 struct gfs2_holder gh;
1005 int rv; 1005 int rv;
1006 1006
1007 /* 1007 /*
1008 * Deferred lock, even if its a write, since we do no allocation 1008 * Deferred lock, even if its a write, since we do no allocation
1009 * on this path. All we need change is atime, and this lock mode 1009 * on this path. All we need change is atime, and this lock mode
1010 * ensures that other nodes have flushed their buffered read caches 1010 * ensures that other nodes have flushed their buffered read caches
1011 * (i.e. their page cache entries for this inode). We do not, 1011 * (i.e. their page cache entries for this inode). We do not,
1012 * unfortunately have the option of only flushing a range like 1012 * unfortunately have the option of only flushing a range like
1013 * the VFS does. 1013 * the VFS does.
1014 */ 1014 */
1015 gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); 1015 gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
1016 rv = gfs2_glock_nq(&gh); 1016 rv = gfs2_glock_nq(&gh);
1017 if (rv) 1017 if (rv)
1018 return rv; 1018 return rv;
1019 rv = gfs2_ok_for_dio(ip, rw, offset); 1019 rv = gfs2_ok_for_dio(ip, rw, offset);
1020 if (rv != 1) 1020 if (rv != 1)
1021 goto out; /* dio not valid, fall back to buffered i/o */ 1021 goto out; /* dio not valid, fall back to buffered i/o */
1022 1022
1023 rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 1023 rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
1024 offset, nr_segs, gfs2_get_block_direct, 1024 offset, nr_segs, gfs2_get_block_direct,
1025 NULL, NULL, 0); 1025 NULL, NULL, 0);
1026 out: 1026 out:
1027 gfs2_glock_dq(&gh); 1027 gfs2_glock_dq(&gh);
1028 gfs2_holder_uninit(&gh); 1028 gfs2_holder_uninit(&gh);
1029 return rv; 1029 return rv;
1030 } 1030 }
1031 1031
1032 /** 1032 /**
1033 * gfs2_releasepage - free the metadata associated with a page 1033 * gfs2_releasepage - free the metadata associated with a page
1034 * @page: the page that's being released 1034 * @page: the page that's being released
1035 * @gfp_mask: passed from Linux VFS, ignored by us 1035 * @gfp_mask: passed from Linux VFS, ignored by us
1036 * 1036 *
1037 * Call try_to_free_buffers() if the buffers in this page can be 1037 * Call try_to_free_buffers() if the buffers in this page can be
1038 * released. 1038 * released.
1039 * 1039 *
1040 * Returns: 0 1040 * Returns: 0
1041 */ 1041 */
1042 1042
1043 int gfs2_releasepage(struct page *page, gfp_t gfp_mask) 1043 int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1044 { 1044 {
1045 struct address_space *mapping = page->mapping; 1045 struct address_space *mapping = page->mapping;
1046 struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); 1046 struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
1047 struct buffer_head *bh, *head; 1047 struct buffer_head *bh, *head;
1048 struct gfs2_bufdata *bd; 1048 struct gfs2_bufdata *bd;
1049 1049
1050 if (!page_has_buffers(page)) 1050 if (!page_has_buffers(page))
1051 return 0; 1051 return 0;
1052 1052
1053 gfs2_log_lock(sdp); 1053 gfs2_log_lock(sdp);
1054 spin_lock(&sdp->sd_ail_lock); 1054 spin_lock(&sdp->sd_ail_lock);
1055 head = bh = page_buffers(page); 1055 head = bh = page_buffers(page);
1056 do { 1056 do {
1057 if (atomic_read(&bh->b_count)) 1057 if (atomic_read(&bh->b_count))
1058 goto cannot_release; 1058 goto cannot_release;
1059 bd = bh->b_private; 1059 bd = bh->b_private;
1060 if (bd && bd->bd_ail) 1060 if (bd && bd->bd_ail)
1061 goto cannot_release; 1061 goto cannot_release;
1062 if (buffer_pinned(bh) || buffer_dirty(bh)) 1062 if (buffer_pinned(bh) || buffer_dirty(bh))
1063 goto not_possible; 1063 goto not_possible;
1064 bh = bh->b_this_page; 1064 bh = bh->b_this_page;
1065 } while(bh != head); 1065 } while(bh != head);
1066 spin_unlock(&sdp->sd_ail_lock); 1066 spin_unlock(&sdp->sd_ail_lock);
1067 gfs2_log_unlock(sdp); 1067 gfs2_log_unlock(sdp);
1068 1068
1069 head = bh = page_buffers(page); 1069 head = bh = page_buffers(page);
1070 do { 1070 do {
1071 gfs2_log_lock(sdp); 1071 gfs2_log_lock(sdp);
1072 bd = bh->b_private; 1072 bd = bh->b_private;
1073 if (bd) { 1073 if (bd) {
1074 gfs2_assert_warn(sdp, bd->bd_bh == bh); 1074 gfs2_assert_warn(sdp, bd->bd_bh == bh);
1075 if (!list_empty(&bd->bd_list)) { 1075 if (!list_empty(&bd->bd_list)) {
1076 if (!buffer_pinned(bh)) 1076 if (!buffer_pinned(bh))
1077 list_del_init(&bd->bd_list); 1077 list_del_init(&bd->bd_list);
1078 else 1078 else
1079 bd = NULL; 1079 bd = NULL;
1080 } 1080 }
1081 if (bd) 1081 if (bd)
1082 bd->bd_bh = NULL; 1082 bd->bd_bh = NULL;
1083 bh->b_private = NULL; 1083 bh->b_private = NULL;
1084 } 1084 }
1085 gfs2_log_unlock(sdp); 1085 gfs2_log_unlock(sdp);
1086 if (bd) 1086 if (bd)
1087 kmem_cache_free(gfs2_bufdata_cachep, bd); 1087 kmem_cache_free(gfs2_bufdata_cachep, bd);
1088 1088
1089 bh = bh->b_this_page; 1089 bh = bh->b_this_page;
1090 } while (bh != head); 1090 } while (bh != head);
1091 1091
1092 return try_to_free_buffers(page); 1092 return try_to_free_buffers(page);
1093 1093
1094 not_possible: /* Should never happen */ 1094 not_possible: /* Should never happen */
1095 WARN_ON(buffer_dirty(bh)); 1095 WARN_ON(buffer_dirty(bh));
1096 WARN_ON(buffer_pinned(bh)); 1096 WARN_ON(buffer_pinned(bh));
1097 cannot_release: 1097 cannot_release:
1098 spin_unlock(&sdp->sd_ail_lock); 1098 spin_unlock(&sdp->sd_ail_lock);
1099 gfs2_log_unlock(sdp); 1099 gfs2_log_unlock(sdp);
1100 return 0; 1100 return 0;
1101 } 1101 }
1102 1102
1103 static const struct address_space_operations gfs2_writeback_aops = { 1103 static const struct address_space_operations gfs2_writeback_aops = {
1104 .writepage = gfs2_writeback_writepage, 1104 .writepage = gfs2_writeback_writepage,
1105 .writepages = gfs2_writeback_writepages, 1105 .writepages = gfs2_writeback_writepages,
1106 .readpage = gfs2_readpage, 1106 .readpage = gfs2_readpage,
1107 .readpages = gfs2_readpages, 1107 .readpages = gfs2_readpages,
1108 .write_begin = gfs2_write_begin, 1108 .write_begin = gfs2_write_begin,
1109 .write_end = gfs2_write_end, 1109 .write_end = gfs2_write_end,
1110 .bmap = gfs2_bmap, 1110 .bmap = gfs2_bmap,
1111 .invalidatepage = gfs2_invalidatepage, 1111 .invalidatepage = gfs2_invalidatepage,
1112 .releasepage = gfs2_releasepage, 1112 .releasepage = gfs2_releasepage,
1113 .direct_IO = gfs2_direct_IO, 1113 .direct_IO = gfs2_direct_IO,
1114 .migratepage = buffer_migrate_page, 1114 .migratepage = buffer_migrate_page,
1115 .is_partially_uptodate = block_is_partially_uptodate, 1115 .is_partially_uptodate = block_is_partially_uptodate,
1116 .error_remove_page = generic_error_remove_page, 1116 .error_remove_page = generic_error_remove_page,
1117 }; 1117 };
1118 1118
1119 static const struct address_space_operations gfs2_ordered_aops = { 1119 static const struct address_space_operations gfs2_ordered_aops = {
1120 .writepage = gfs2_ordered_writepage, 1120 .writepage = gfs2_ordered_writepage,
1121 .readpage = gfs2_readpage, 1121 .readpage = gfs2_readpage,
1122 .readpages = gfs2_readpages, 1122 .readpages = gfs2_readpages,
1123 .write_begin = gfs2_write_begin, 1123 .write_begin = gfs2_write_begin,
1124 .write_end = gfs2_write_end, 1124 .write_end = gfs2_write_end,
1125 .set_page_dirty = gfs2_set_page_dirty, 1125 .set_page_dirty = gfs2_set_page_dirty,
1126 .bmap = gfs2_bmap, 1126 .bmap = gfs2_bmap,
1127 .invalidatepage = gfs2_invalidatepage, 1127 .invalidatepage = gfs2_invalidatepage,
1128 .releasepage = gfs2_releasepage, 1128 .releasepage = gfs2_releasepage,
1129 .direct_IO = gfs2_direct_IO, 1129 .direct_IO = gfs2_direct_IO,
1130 .migratepage = buffer_migrate_page, 1130 .migratepage = buffer_migrate_page,
1131 .is_partially_uptodate = block_is_partially_uptodate, 1131 .is_partially_uptodate = block_is_partially_uptodate,
1132 .error_remove_page = generic_error_remove_page, 1132 .error_remove_page = generic_error_remove_page,
1133 }; 1133 };
1134 1134
1135 static const struct address_space_operations gfs2_jdata_aops = { 1135 static const struct address_space_operations gfs2_jdata_aops = {
1136 .writepage = gfs2_jdata_writepage, 1136 .writepage = gfs2_jdata_writepage,
1137 .writepages = gfs2_jdata_writepages, 1137 .writepages = gfs2_jdata_writepages,
1138 .readpage = gfs2_readpage, 1138 .readpage = gfs2_readpage,
1139 .readpages = gfs2_readpages, 1139 .readpages = gfs2_readpages,
1140 .write_begin = gfs2_write_begin, 1140 .write_begin = gfs2_write_begin,
1141 .write_end = gfs2_write_end, 1141 .write_end = gfs2_write_end,
1142 .set_page_dirty = gfs2_set_page_dirty, 1142 .set_page_dirty = gfs2_set_page_dirty,
1143 .bmap = gfs2_bmap, 1143 .bmap = gfs2_bmap,
1144 .invalidatepage = gfs2_invalidatepage, 1144 .invalidatepage = gfs2_invalidatepage,
1145 .releasepage = gfs2_releasepage, 1145 .releasepage = gfs2_releasepage,
1146 .is_partially_uptodate = block_is_partially_uptodate, 1146 .is_partially_uptodate = block_is_partially_uptodate,
1147 .error_remove_page = generic_error_remove_page, 1147 .error_remove_page = generic_error_remove_page,
1148 }; 1148 };
1149 1149
1150 void gfs2_set_aops(struct inode *inode) 1150 void gfs2_set_aops(struct inode *inode)
1151 { 1151 {
1152 struct gfs2_inode *ip = GFS2_I(inode); 1152 struct gfs2_inode *ip = GFS2_I(inode);
1153 1153
1154 if (gfs2_is_writeback(ip)) 1154 if (gfs2_is_writeback(ip))
1155 inode->i_mapping->a_ops = &gfs2_writeback_aops; 1155 inode->i_mapping->a_ops = &gfs2_writeback_aops;
1156 else if (gfs2_is_ordered(ip)) 1156 else if (gfs2_is_ordered(ip))
1157 inode->i_mapping->a_ops = &gfs2_ordered_aops; 1157 inode->i_mapping->a_ops = &gfs2_ordered_aops;
1158 else if (gfs2_is_jdata(ip)) 1158 else if (gfs2_is_jdata(ip))
1159 inode->i_mapping->a_ops = &gfs2_jdata_aops; 1159 inode->i_mapping->a_ops = &gfs2_jdata_aops;
1160 else 1160 else
1161 BUG(); 1161 BUG();
1162 } 1162 }
1163 1163
1164 1164
1 /* 1 /*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10 #include <linux/spinlock.h> 10 #include <linux/spinlock.h>
11 #include <linux/completion.h> 11 #include <linux/completion.h>
12 #include <linux/buffer_head.h> 12 #include <linux/buffer_head.h>
13 #include <linux/blkdev.h> 13 #include <linux/blkdev.h>
14 #include <linux/gfs2_ondisk.h> 14 #include <linux/gfs2_ondisk.h>
15 #include <linux/crc32.h> 15 #include <linux/crc32.h>
16 16
17 #include "gfs2.h" 17 #include "gfs2.h"
18 #include "incore.h" 18 #include "incore.h"
19 #include "bmap.h" 19 #include "bmap.h"
20 #include "glock.h" 20 #include "glock.h"
21 #include "inode.h" 21 #include "inode.h"
22 #include "meta_io.h" 22 #include "meta_io.h"
23 #include "quota.h" 23 #include "quota.h"
24 #include "rgrp.h" 24 #include "rgrp.h"
25 #include "super.h" 25 #include "super.h"
26 #include "trans.h" 26 #include "trans.h"
27 #include "dir.h" 27 #include "dir.h"
28 #include "util.h" 28 #include "util.h"
29 #include "trace_gfs2.h" 29 #include "trace_gfs2.h"
30 30
31 /* This doesn't need to be that large as max 64 bit pointers in a 4k 31 /* This doesn't need to be that large as max 64 bit pointers in a 4k
32 * block is 512, so __u16 is fine for that. It saves stack space to 32 * block is 512, so __u16 is fine for that. It saves stack space to
33 * keep it small. 33 * keep it small.
34 */ 34 */
35 struct metapath { 35 struct metapath {
36 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT]; 36 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
37 __u16 mp_list[GFS2_MAX_META_HEIGHT]; 37 __u16 mp_list[GFS2_MAX_META_HEIGHT];
38 }; 38 };
39 39
40 struct strip_mine { 40 struct strip_mine {
41 int sm_first; 41 int sm_first;
42 unsigned int sm_height; 42 unsigned int sm_height;
43 }; 43 };
44 44
45 /** 45 /**
46 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page 46 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
47 * @ip: the inode 47 * @ip: the inode
48 * @dibh: the dinode buffer 48 * @dibh: the dinode buffer
49 * @block: the block number that was allocated 49 * @block: the block number that was allocated
50 * @page: The (optional) page. This is looked up if @page is NULL 50 * @page: The (optional) page. This is looked up if @page is NULL
51 * 51 *
52 * Returns: errno 52 * Returns: errno
53 */ 53 */
54 54
55 static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, 55 static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
56 u64 block, struct page *page) 56 u64 block, struct page *page)
57 { 57 {
58 struct inode *inode = &ip->i_inode; 58 struct inode *inode = &ip->i_inode;
59 struct buffer_head *bh; 59 struct buffer_head *bh;
60 int release = 0; 60 int release = 0;
61 61
62 if (!page || page->index) { 62 if (!page || page->index) {
63 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS); 63 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
64 if (!page) 64 if (!page)
65 return -ENOMEM; 65 return -ENOMEM;
66 release = 1; 66 release = 1;
67 } 67 }
68 68
69 if (!PageUptodate(page)) { 69 if (!PageUptodate(page)) {
70 void *kaddr = kmap(page); 70 void *kaddr = kmap(page);
71 u64 dsize = i_size_read(inode); 71 u64 dsize = i_size_read(inode);
72 72
73 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode))) 73 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
74 dsize = dibh->b_size - sizeof(struct gfs2_dinode); 74 dsize = dibh->b_size - sizeof(struct gfs2_dinode);
75 75
76 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); 76 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
77 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize); 77 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
78 kunmap(page); 78 kunmap(page);
79 79
80 SetPageUptodate(page); 80 SetPageUptodate(page);
81 } 81 }
82 82
83 if (!page_has_buffers(page)) 83 if (!page_has_buffers(page))
84 create_empty_buffers(page, 1 << inode->i_blkbits, 84 create_empty_buffers(page, 1 << inode->i_blkbits,
85 (1 << BH_Uptodate)); 85 (1 << BH_Uptodate));
86 86
87 bh = page_buffers(page); 87 bh = page_buffers(page);
88 88
89 if (!buffer_mapped(bh)) 89 if (!buffer_mapped(bh))
90 map_bh(bh, inode->i_sb, block); 90 map_bh(bh, inode->i_sb, block);
91 91
92 set_buffer_uptodate(bh); 92 set_buffer_uptodate(bh);
93 if (!gfs2_is_jdata(ip)) 93 if (!gfs2_is_jdata(ip))
94 mark_buffer_dirty(bh); 94 mark_buffer_dirty(bh);
95 if (!gfs2_is_writeback(ip)) 95 if (!gfs2_is_writeback(ip))
96 gfs2_trans_add_bh(ip->i_gl, bh, 0); 96 gfs2_trans_add_bh(ip->i_gl, bh, 0);
97 97
98 if (release) { 98 if (release) {
99 unlock_page(page); 99 unlock_page(page);
100 page_cache_release(page); 100 page_cache_release(page);
101 } 101 }
102 102
103 return 0; 103 return 0;
104 } 104 }
105 105
106 /** 106 /**
107 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big 107 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
108 * @ip: The GFS2 inode to unstuff 108 * @ip: The GFS2 inode to unstuff
109 * @page: The (optional) page. This is looked up if the @page is NULL 109 * @page: The (optional) page. This is looked up if the @page is NULL
110 * 110 *
111 * This routine unstuffs a dinode and returns it to a "normal" state such 111 * This routine unstuffs a dinode and returns it to a "normal" state such
112 * that the height can be grown in the traditional way. 112 * that the height can be grown in the traditional way.
113 * 113 *
114 * Returns: errno 114 * Returns: errno
115 */ 115 */
116 116
117 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) 117 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
118 { 118 {
119 struct buffer_head *bh, *dibh; 119 struct buffer_head *bh, *dibh;
120 struct gfs2_dinode *di; 120 struct gfs2_dinode *di;
121 u64 block = 0; 121 u64 block = 0;
122 int isdir = gfs2_is_dir(ip); 122 int isdir = gfs2_is_dir(ip);
123 int error; 123 int error;
124 124
125 down_write(&ip->i_rw_mutex); 125 down_write(&ip->i_rw_mutex);
126 126
127 error = gfs2_meta_inode_buffer(ip, &dibh); 127 error = gfs2_meta_inode_buffer(ip, &dibh);
128 if (error) 128 if (error)
129 goto out; 129 goto out;
130 130
131 if (i_size_read(&ip->i_inode)) { 131 if (i_size_read(&ip->i_inode)) {
132 /* Get a free block, fill it with the stuffed data, 132 /* Get a free block, fill it with the stuffed data,
133 and write it out to disk */ 133 and write it out to disk */
134 134
135 unsigned int n = 1; 135 unsigned int n = 1;
136 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); 136 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
137 if (error) 137 if (error)
138 goto out_brelse; 138 goto out_brelse;
139 if (isdir) { 139 if (isdir) {
140 gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1); 140 gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
141 error = gfs2_dir_get_new_buffer(ip, block, &bh); 141 error = gfs2_dir_get_new_buffer(ip, block, &bh);
142 if (error) 142 if (error)
143 goto out_brelse; 143 goto out_brelse;
144 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header), 144 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
145 dibh, sizeof(struct gfs2_dinode)); 145 dibh, sizeof(struct gfs2_dinode));
146 brelse(bh); 146 brelse(bh);
147 } else { 147 } else {
148 error = gfs2_unstuffer_page(ip, dibh, block, page); 148 error = gfs2_unstuffer_page(ip, dibh, block, page);
149 if (error) 149 if (error)
150 goto out_brelse; 150 goto out_brelse;
151 } 151 }
152 } 152 }
153 153
154 /* Set up the pointer to the new block */ 154 /* Set up the pointer to the new block */
155 155
156 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 156 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
157 di = (struct gfs2_dinode *)dibh->b_data; 157 di = (struct gfs2_dinode *)dibh->b_data;
158 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 158 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
159 159
160 if (i_size_read(&ip->i_inode)) { 160 if (i_size_read(&ip->i_inode)) {
161 *(__be64 *)(di + 1) = cpu_to_be64(block); 161 *(__be64 *)(di + 1) = cpu_to_be64(block);
162 gfs2_add_inode_blocks(&ip->i_inode, 1); 162 gfs2_add_inode_blocks(&ip->i_inode, 1);
163 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 163 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
164 } 164 }
165 165
166 ip->i_height = 1; 166 ip->i_height = 1;
167 di->di_height = cpu_to_be16(1); 167 di->di_height = cpu_to_be16(1);
168 168
169 out_brelse: 169 out_brelse:
170 brelse(dibh); 170 brelse(dibh);
171 out: 171 out:
172 up_write(&ip->i_rw_mutex); 172 up_write(&ip->i_rw_mutex);
173 return error; 173 return error;
174 } 174 }
175 175
176 176
177 /** 177 /**
178 * find_metapath - Find path through the metadata tree 178 * find_metapath - Find path through the metadata tree
179 * @sdp: The superblock 179 * @sdp: The superblock
180 * @mp: The metapath to return the result in 180 * @mp: The metapath to return the result in
181 * @block: The disk block to look up 181 * @block: The disk block to look up
182 * @height: The pre-calculated height of the metadata tree 182 * @height: The pre-calculated height of the metadata tree
183 * 183 *
184 * This routine returns a struct metapath structure that defines a path 184 * This routine returns a struct metapath structure that defines a path
185 * through the metadata of inode "ip" to get to block "block". 185 * through the metadata of inode "ip" to get to block "block".
186 * 186 *
187 * Example: 187 * Example:
188 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a 188 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
189 * filesystem with a blocksize of 4096. 189 * filesystem with a blocksize of 4096.
190 * 190 *
191 * find_metapath() would return a struct metapath structure set to: 191 * find_metapath() would return a struct metapath structure set to:
192 * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48, 192 * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
193 * and mp_list[2] = 165. 193 * and mp_list[2] = 165.
194 * 194 *
195 * That means that in order to get to the block containing the byte at 195 * That means that in order to get to the block containing the byte at
196 * offset 101342453, we would load the indirect block pointed to by pointer 196 * offset 101342453, we would load the indirect block pointed to by pointer
197 * 0 in the dinode. We would then load the indirect block pointed to by 197 * 0 in the dinode. We would then load the indirect block pointed to by
198 * pointer 48 in that indirect block. We would then load the data block 198 * pointer 48 in that indirect block. We would then load the data block
199 * pointed to by pointer 165 in that indirect block. 199 * pointed to by pointer 165 in that indirect block.
200 * 200 *
201 * ---------------------------------------- 201 * ----------------------------------------
202 * | Dinode | | 202 * | Dinode | |
203 * | | 4| 203 * | | 4|
204 * | |0 1 2 3 4 5 9| 204 * | |0 1 2 3 4 5 9|
205 * | | 6| 205 * | | 6|
206 * ---------------------------------------- 206 * ----------------------------------------
207 * | 207 * |
208 * | 208 * |
209 * V 209 * V
210 * ---------------------------------------- 210 * ----------------------------------------
211 * | Indirect Block | 211 * | Indirect Block |
212 * | 5| 212 * | 5|
213 * | 4 4 4 4 4 5 5 1| 213 * | 4 4 4 4 4 5 5 1|
214 * |0 5 6 7 8 9 0 1 2| 214 * |0 5 6 7 8 9 0 1 2|
215 * ---------------------------------------- 215 * ----------------------------------------
216 * | 216 * |
217 * | 217 * |
218 * V 218 * V
219 * ---------------------------------------- 219 * ----------------------------------------
220 * | Indirect Block | 220 * | Indirect Block |
221 * | 1 1 1 1 1 5| 221 * | 1 1 1 1 1 5|
222 * | 6 6 6 6 6 1| 222 * | 6 6 6 6 6 1|
223 * |0 3 4 5 6 7 2| 223 * |0 3 4 5 6 7 2|
224 * ---------------------------------------- 224 * ----------------------------------------
225 * | 225 * |
226 * | 226 * |
227 * V 227 * V
228 * ---------------------------------------- 228 * ----------------------------------------
229 * | Data block containing offset | 229 * | Data block containing offset |
230 * | 101342453 | 230 * | 101342453 |
231 * | | 231 * | |
232 * | | 232 * | |
233 * ---------------------------------------- 233 * ----------------------------------------
234 * 234 *
235 */ 235 */
236 236
237 static void find_metapath(const struct gfs2_sbd *sdp, u64 block, 237 static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
238 struct metapath *mp, unsigned int height) 238 struct metapath *mp, unsigned int height)
239 { 239 {
240 unsigned int i; 240 unsigned int i;
241 241
242 for (i = height; i--;) 242 for (i = height; i--;)
243 mp->mp_list[i] = do_div(block, sdp->sd_inptrs); 243 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
244 244
245 } 245 }
246 246
247 static inline unsigned int metapath_branch_start(const struct metapath *mp) 247 static inline unsigned int metapath_branch_start(const struct metapath *mp)
248 { 248 {
249 if (mp->mp_list[0] == 0) 249 if (mp->mp_list[0] == 0)
250 return 2; 250 return 2;
251 return 1; 251 return 1;
252 } 252 }
253 253
254 /** 254 /**
255 * metapointer - Return pointer to start of metadata in a buffer 255 * metapointer - Return pointer to start of metadata in a buffer
256 * @height: The metadata height (0 = dinode) 256 * @height: The metadata height (0 = dinode)
257 * @mp: The metapath 257 * @mp: The metapath
258 * 258 *
259 * Return a pointer to the block number of the next height of the metadata 259 * Return a pointer to the block number of the next height of the metadata
260 * tree given a buffer containing the pointer to the current height of the 260 * tree given a buffer containing the pointer to the current height of the
261 * metadata tree. 261 * metadata tree.
262 */ 262 */
263 263
264 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp) 264 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
265 { 265 {
266 struct buffer_head *bh = mp->mp_bh[height]; 266 struct buffer_head *bh = mp->mp_bh[height];
267 unsigned int head_size = (height > 0) ? 267 unsigned int head_size = (height > 0) ?
268 sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); 268 sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
269 return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height]; 269 return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height];
270 } 270 }
271 271
272 static void gfs2_metapath_ra(struct gfs2_glock *gl, 272 static void gfs2_metapath_ra(struct gfs2_glock *gl,
273 const struct buffer_head *bh, const __be64 *pos) 273 const struct buffer_head *bh, const __be64 *pos)
274 { 274 {
275 struct buffer_head *rabh; 275 struct buffer_head *rabh;
276 const __be64 *endp = (const __be64 *)(bh->b_data + bh->b_size); 276 const __be64 *endp = (const __be64 *)(bh->b_data + bh->b_size);
277 const __be64 *t; 277 const __be64 *t;
278 278
279 for (t = pos; t < endp; t++) { 279 for (t = pos; t < endp; t++) {
280 if (!*t) 280 if (!*t)
281 continue; 281 continue;
282 282
283 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE); 283 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
284 if (trylock_buffer(rabh)) { 284 if (trylock_buffer(rabh)) {
285 if (!buffer_uptodate(rabh)) { 285 if (!buffer_uptodate(rabh)) {
286 rabh->b_end_io = end_buffer_read_sync; 286 rabh->b_end_io = end_buffer_read_sync;
287 submit_bh(READA | REQ_META, rabh); 287 submit_bh(READA | REQ_META, rabh);
288 continue; 288 continue;
289 } 289 }
290 unlock_buffer(rabh); 290 unlock_buffer(rabh);
291 } 291 }
292 brelse(rabh); 292 brelse(rabh);
293 } 293 }
294 } 294 }
295 295
296 /** 296 /**
297 * lookup_metapath - Walk the metadata tree to a specific point 297 * lookup_metapath - Walk the metadata tree to a specific point
298 * @ip: The inode 298 * @ip: The inode
299 * @mp: The metapath 299 * @mp: The metapath
300 * 300 *
301 * Assumes that the inode's buffer has already been looked up and 301 * Assumes that the inode's buffer has already been looked up and
302 * hooked onto mp->mp_bh[0] and that the metapath has been initialised 302 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
303 * by find_metapath(). 303 * by find_metapath().
304 * 304 *
305 * If this function encounters part of the tree which has not been 305 * If this function encounters part of the tree which has not been
306 * allocated, it returns the current height of the tree at the point 306 * allocated, it returns the current height of the tree at the point
307 * at which it found the unallocated block. Blocks which are found are 307 * at which it found the unallocated block. Blocks which are found are
308 * added to the mp->mp_bh[] list. 308 * added to the mp->mp_bh[] list.
309 * 309 *
310 * Returns: error or height of metadata tree 310 * Returns: error or height of metadata tree
311 */ 311 */
312 312
313 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) 313 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
314 { 314 {
315 unsigned int end_of_metadata = ip->i_height - 1; 315 unsigned int end_of_metadata = ip->i_height - 1;
316 unsigned int x; 316 unsigned int x;
317 __be64 *ptr; 317 __be64 *ptr;
318 u64 dblock; 318 u64 dblock;
319 int ret; 319 int ret;
320 320
321 for (x = 0; x < end_of_metadata; x++) { 321 for (x = 0; x < end_of_metadata; x++) {
322 ptr = metapointer(x, mp); 322 ptr = metapointer(x, mp);
323 dblock = be64_to_cpu(*ptr); 323 dblock = be64_to_cpu(*ptr);
324 if (!dblock) 324 if (!dblock)
325 return x + 1; 325 return x + 1;
326 326
327 ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, &mp->mp_bh[x+1]); 327 ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, &mp->mp_bh[x+1]);
328 if (ret) 328 if (ret)
329 return ret; 329 return ret;
330 } 330 }
331 331
332 return ip->i_height; 332 return ip->i_height;
333 } 333 }
334 334
335 static inline void release_metapath(struct metapath *mp) 335 static inline void release_metapath(struct metapath *mp)
336 { 336 {
337 int i; 337 int i;
338 338
339 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { 339 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
340 if (mp->mp_bh[i] == NULL) 340 if (mp->mp_bh[i] == NULL)
341 break; 341 break;
342 brelse(mp->mp_bh[i]); 342 brelse(mp->mp_bh[i]);
343 } 343 }
344 } 344 }
345 345
346 /** 346 /**
347 * gfs2_extent_length - Returns length of an extent of blocks 347 * gfs2_extent_length - Returns length of an extent of blocks
348 * @start: Start of the buffer 348 * @start: Start of the buffer
349 * @len: Length of the buffer in bytes 349 * @len: Length of the buffer in bytes
350 * @ptr: Current position in the buffer 350 * @ptr: Current position in the buffer
351 * @limit: Max extent length to return (0 = unlimited) 351 * @limit: Max extent length to return (0 = unlimited)
352 * @eob: Set to 1 if we hit "end of block" 352 * @eob: Set to 1 if we hit "end of block"
353 * 353 *
354 * If the first block is zero (unallocated) it will return the number of 354 * If the first block is zero (unallocated) it will return the number of
355 * unallocated blocks in the extent, otherwise it will return the number 355 * unallocated blocks in the extent, otherwise it will return the number
356 * of contiguous blocks in the extent. 356 * of contiguous blocks in the extent.
357 * 357 *
358 * Returns: The length of the extent (minimum of one block) 358 * Returns: The length of the extent (minimum of one block)
359 */ 359 */
360 360
361 static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, unsigned limit, int *eob) 361 static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, unsigned limit, int *eob)
362 { 362 {
363 const __be64 *end = (start + len); 363 const __be64 *end = (start + len);
364 const __be64 *first = ptr; 364 const __be64 *first = ptr;
365 u64 d = be64_to_cpu(*ptr); 365 u64 d = be64_to_cpu(*ptr);
366 366
367 *eob = 0; 367 *eob = 0;
368 do { 368 do {
369 ptr++; 369 ptr++;
370 if (ptr >= end) 370 if (ptr >= end)
371 break; 371 break;
372 if (limit && --limit == 0) 372 if (limit && --limit == 0)
373 break; 373 break;
374 if (d) 374 if (d)
375 d++; 375 d++;
376 } while(be64_to_cpu(*ptr) == d); 376 } while(be64_to_cpu(*ptr) == d);
377 if (ptr >= end) 377 if (ptr >= end)
378 *eob = 1; 378 *eob = 1;
379 return (ptr - first); 379 return (ptr - first);
380 } 380 }
381 381
382 static inline void bmap_lock(struct gfs2_inode *ip, int create) 382 static inline void bmap_lock(struct gfs2_inode *ip, int create)
383 { 383 {
384 if (create) 384 if (create)
385 down_write(&ip->i_rw_mutex); 385 down_write(&ip->i_rw_mutex);
386 else 386 else
387 down_read(&ip->i_rw_mutex); 387 down_read(&ip->i_rw_mutex);
388 } 388 }
389 389
390 static inline void bmap_unlock(struct gfs2_inode *ip, int create) 390 static inline void bmap_unlock(struct gfs2_inode *ip, int create)
391 { 391 {
392 if (create) 392 if (create)
393 up_write(&ip->i_rw_mutex); 393 up_write(&ip->i_rw_mutex);
394 else 394 else
395 up_read(&ip->i_rw_mutex); 395 up_read(&ip->i_rw_mutex);
396 } 396 }
397 397
398 static inline __be64 *gfs2_indirect_init(struct metapath *mp, 398 static inline __be64 *gfs2_indirect_init(struct metapath *mp,
399 struct gfs2_glock *gl, unsigned int i, 399 struct gfs2_glock *gl, unsigned int i,
400 unsigned offset, u64 bn) 400 unsigned offset, u64 bn)
401 { 401 {
402 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + 402 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
403 ((i > 1) ? sizeof(struct gfs2_meta_header) : 403 ((i > 1) ? sizeof(struct gfs2_meta_header) :
404 sizeof(struct gfs2_dinode))); 404 sizeof(struct gfs2_dinode)));
405 BUG_ON(i < 1); 405 BUG_ON(i < 1);
406 BUG_ON(mp->mp_bh[i] != NULL); 406 BUG_ON(mp->mp_bh[i] != NULL);
407 mp->mp_bh[i] = gfs2_meta_new(gl, bn); 407 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
408 gfs2_trans_add_bh(gl, mp->mp_bh[i], 1); 408 gfs2_trans_add_bh(gl, mp->mp_bh[i], 1);
409 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); 409 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
410 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); 410 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
411 ptr += offset; 411 ptr += offset;
412 *ptr = cpu_to_be64(bn); 412 *ptr = cpu_to_be64(bn);
413 return ptr; 413 return ptr;
414 } 414 }
415 415
416 enum alloc_state { 416 enum alloc_state {
417 ALLOC_DATA = 0, 417 ALLOC_DATA = 0,
418 ALLOC_GROW_DEPTH = 1, 418 ALLOC_GROW_DEPTH = 1,
419 ALLOC_GROW_HEIGHT = 2, 419 ALLOC_GROW_HEIGHT = 2,
420 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ 420 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
421 }; 421 };
422 422
423 /** 423 /**
424 * gfs2_bmap_alloc - Build a metadata tree of the requested height 424 * gfs2_bmap_alloc - Build a metadata tree of the requested height
425 * @inode: The GFS2 inode 425 * @inode: The GFS2 inode
426 * @lblock: The logical starting block of the extent 426 * @lblock: The logical starting block of the extent
427 * @bh_map: This is used to return the mapping details 427 * @bh_map: This is used to return the mapping details
428 * @mp: The metapath 428 * @mp: The metapath
429 * @sheight: The starting height (i.e. whats already mapped) 429 * @sheight: The starting height (i.e. whats already mapped)
430 * @height: The height to build to 430 * @height: The height to build to
431 * @maxlen: The max number of data blocks to alloc 431 * @maxlen: The max number of data blocks to alloc
432 * 432 *
433 * In this routine we may have to alloc: 433 * In this routine we may have to alloc:
434 * i) Indirect blocks to grow the metadata tree height 434 * i) Indirect blocks to grow the metadata tree height
435 * ii) Indirect blocks to fill in lower part of the metadata tree 435 * ii) Indirect blocks to fill in lower part of the metadata tree
436 * iii) Data blocks 436 * iii) Data blocks
437 * 437 *
438 * The function is in two parts. The first part works out the total 438 * The function is in two parts. The first part works out the total
439 * number of blocks which we need. The second part does the actual 439 * number of blocks which we need. The second part does the actual
440 * allocation asking for an extent at a time (if enough contiguous free 440 * allocation asking for an extent at a time (if enough contiguous free
441 * blocks are available, there will only be one request per bmap call) 441 * blocks are available, there will only be one request per bmap call)
442 * and uses the state machine to initialise the blocks in order. 442 * and uses the state machine to initialise the blocks in order.
443 * 443 *
444 * Returns: errno on error 444 * Returns: errno on error
445 */ 445 */
446 446
447 static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, 447 static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
448 struct buffer_head *bh_map, struct metapath *mp, 448 struct buffer_head *bh_map, struct metapath *mp,
449 const unsigned int sheight, 449 const unsigned int sheight,
450 const unsigned int height, 450 const unsigned int height,
451 const unsigned int maxlen) 451 const unsigned int maxlen)
452 { 452 {
453 struct gfs2_inode *ip = GFS2_I(inode); 453 struct gfs2_inode *ip = GFS2_I(inode);
454 struct gfs2_sbd *sdp = GFS2_SB(inode); 454 struct gfs2_sbd *sdp = GFS2_SB(inode);
455 struct super_block *sb = sdp->sd_vfs; 455 struct super_block *sb = sdp->sd_vfs;
456 struct buffer_head *dibh = mp->mp_bh[0]; 456 struct buffer_head *dibh = mp->mp_bh[0];
457 u64 bn, dblock = 0; 457 u64 bn, dblock = 0;
458 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; 458 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
459 unsigned dblks = 0; 459 unsigned dblks = 0;
460 unsigned ptrs_per_blk; 460 unsigned ptrs_per_blk;
461 const unsigned end_of_metadata = height - 1; 461 const unsigned end_of_metadata = height - 1;
462 int ret; 462 int ret;
463 int eob = 0; 463 int eob = 0;
464 enum alloc_state state; 464 enum alloc_state state;
465 __be64 *ptr; 465 __be64 *ptr;
466 __be64 zero_bn = 0; 466 __be64 zero_bn = 0;
467 467
468 BUG_ON(sheight < 1); 468 BUG_ON(sheight < 1);
469 BUG_ON(dibh == NULL); 469 BUG_ON(dibh == NULL);
470 470
471 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 471 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
472 472
473 if (height == sheight) { 473 if (height == sheight) {
474 struct buffer_head *bh; 474 struct buffer_head *bh;
475 /* Bottom indirect block exists, find unalloced extent size */ 475 /* Bottom indirect block exists, find unalloced extent size */
476 ptr = metapointer(end_of_metadata, mp); 476 ptr = metapointer(end_of_metadata, mp);
477 bh = mp->mp_bh[end_of_metadata]; 477 bh = mp->mp_bh[end_of_metadata];
478 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, 478 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen,
479 &eob); 479 &eob);
480 BUG_ON(dblks < 1); 480 BUG_ON(dblks < 1);
481 state = ALLOC_DATA; 481 state = ALLOC_DATA;
482 } else { 482 } else {
483 /* Need to allocate indirect blocks */ 483 /* Need to allocate indirect blocks */
484 ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs; 484 ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs;
485 dblks = min(maxlen, ptrs_per_blk - mp->mp_list[end_of_metadata]); 485 dblks = min(maxlen, ptrs_per_blk - mp->mp_list[end_of_metadata]);
486 if (height == ip->i_height) { 486 if (height == ip->i_height) {
487 /* Writing into existing tree, extend tree down */ 487 /* Writing into existing tree, extend tree down */
488 iblks = height - sheight; 488 iblks = height - sheight;
489 state = ALLOC_GROW_DEPTH; 489 state = ALLOC_GROW_DEPTH;
490 } else { 490 } else {
491 /* Building up tree height */ 491 /* Building up tree height */
492 state = ALLOC_GROW_HEIGHT; 492 state = ALLOC_GROW_HEIGHT;
493 iblks = height - ip->i_height; 493 iblks = height - ip->i_height;
494 branch_start = metapath_branch_start(mp); 494 branch_start = metapath_branch_start(mp);
495 iblks += (height - branch_start); 495 iblks += (height - branch_start);
496 } 496 }
497 } 497 }
498 498
499 /* start of the second part of the function (state machine) */ 499 /* start of the second part of the function (state machine) */
500 500
501 blks = dblks + iblks; 501 blks = dblks + iblks;
502 i = sheight; 502 i = sheight;
503 do { 503 do {
504 int error; 504 int error;
505 n = blks - alloced; 505 n = blks - alloced;
506 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); 506 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
507 if (error) 507 if (error)
508 return error; 508 return error;
509 alloced += n; 509 alloced += n;
510 if (state != ALLOC_DATA || gfs2_is_jdata(ip)) 510 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
511 gfs2_trans_add_unrevoke(sdp, bn, n); 511 gfs2_trans_add_unrevoke(sdp, bn, n);
512 switch (state) { 512 switch (state) {
513 /* Growing height of tree */ 513 /* Growing height of tree */
514 case ALLOC_GROW_HEIGHT: 514 case ALLOC_GROW_HEIGHT:
515 if (i == 1) { 515 if (i == 1) {
516 ptr = (__be64 *)(dibh->b_data + 516 ptr = (__be64 *)(dibh->b_data +
517 sizeof(struct gfs2_dinode)); 517 sizeof(struct gfs2_dinode));
518 zero_bn = *ptr; 518 zero_bn = *ptr;
519 } 519 }
520 for (; i - 1 < height - ip->i_height && n > 0; i++, n--) 520 for (; i - 1 < height - ip->i_height && n > 0; i++, n--)
521 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++); 521 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
522 if (i - 1 == height - ip->i_height) { 522 if (i - 1 == height - ip->i_height) {
523 i--; 523 i--;
524 gfs2_buffer_copy_tail(mp->mp_bh[i], 524 gfs2_buffer_copy_tail(mp->mp_bh[i],
525 sizeof(struct gfs2_meta_header), 525 sizeof(struct gfs2_meta_header),
526 dibh, sizeof(struct gfs2_dinode)); 526 dibh, sizeof(struct gfs2_dinode));
527 gfs2_buffer_clear_tail(dibh, 527 gfs2_buffer_clear_tail(dibh,
528 sizeof(struct gfs2_dinode) + 528 sizeof(struct gfs2_dinode) +
529 sizeof(__be64)); 529 sizeof(__be64));
530 ptr = (__be64 *)(mp->mp_bh[i]->b_data + 530 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
531 sizeof(struct gfs2_meta_header)); 531 sizeof(struct gfs2_meta_header));
532 *ptr = zero_bn; 532 *ptr = zero_bn;
533 state = ALLOC_GROW_DEPTH; 533 state = ALLOC_GROW_DEPTH;
534 for(i = branch_start; i < height; i++) { 534 for(i = branch_start; i < height; i++) {
535 if (mp->mp_bh[i] == NULL) 535 if (mp->mp_bh[i] == NULL)
536 break; 536 break;
537 brelse(mp->mp_bh[i]); 537 brelse(mp->mp_bh[i]);
538 mp->mp_bh[i] = NULL; 538 mp->mp_bh[i] = NULL;
539 } 539 }
540 i = branch_start; 540 i = branch_start;
541 } 541 }
542 if (n == 0) 542 if (n == 0)
543 break; 543 break;
544 /* Branching from existing tree */ 544 /* Branching from existing tree */
545 case ALLOC_GROW_DEPTH: 545 case ALLOC_GROW_DEPTH:
546 if (i > 1 && i < height) 546 if (i > 1 && i < height)
547 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1); 547 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1);
548 for (; i < height && n > 0; i++, n--) 548 for (; i < height && n > 0; i++, n--)
549 gfs2_indirect_init(mp, ip->i_gl, i, 549 gfs2_indirect_init(mp, ip->i_gl, i,
550 mp->mp_list[i-1], bn++); 550 mp->mp_list[i-1], bn++);
551 if (i == height) 551 if (i == height)
552 state = ALLOC_DATA; 552 state = ALLOC_DATA;
553 if (n == 0) 553 if (n == 0)
554 break; 554 break;
555 /* Tree complete, adding data blocks */ 555 /* Tree complete, adding data blocks */
556 case ALLOC_DATA: 556 case ALLOC_DATA:
557 BUG_ON(n > dblks); 557 BUG_ON(n > dblks);
558 BUG_ON(mp->mp_bh[end_of_metadata] == NULL); 558 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
559 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 1); 559 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 1);
560 dblks = n; 560 dblks = n;
561 ptr = metapointer(end_of_metadata, mp); 561 ptr = metapointer(end_of_metadata, mp);
562 dblock = bn; 562 dblock = bn;
563 while (n-- > 0) 563 while (n-- > 0)
564 *ptr++ = cpu_to_be64(bn++); 564 *ptr++ = cpu_to_be64(bn++);
565 if (buffer_zeronew(bh_map)) { 565 if (buffer_zeronew(bh_map)) {
566 ret = sb_issue_zeroout(sb, dblock, dblks, 566 ret = sb_issue_zeroout(sb, dblock, dblks,
567 GFP_NOFS); 567 GFP_NOFS);
568 if (ret) { 568 if (ret) {
569 fs_err(sdp, 569 fs_err(sdp,
570 "Failed to zero data buffers\n"); 570 "Failed to zero data buffers\n");
571 clear_buffer_zeronew(bh_map); 571 clear_buffer_zeronew(bh_map);
572 } 572 }
573 } 573 }
574 break; 574 break;
575 } 575 }
576 } while ((state != ALLOC_DATA) || !dblock); 576 } while ((state != ALLOC_DATA) || !dblock);
577 577
578 ip->i_height = height; 578 ip->i_height = height;
579 gfs2_add_inode_blocks(&ip->i_inode, alloced); 579 gfs2_add_inode_blocks(&ip->i_inode, alloced);
580 gfs2_dinode_out(ip, mp->mp_bh[0]->b_data); 580 gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
581 map_bh(bh_map, inode->i_sb, dblock); 581 map_bh(bh_map, inode->i_sb, dblock);
582 bh_map->b_size = dblks << inode->i_blkbits; 582 bh_map->b_size = dblks << inode->i_blkbits;
583 set_buffer_new(bh_map); 583 set_buffer_new(bh_map);
584 return 0; 584 return 0;
585 } 585 }
586 586
587 /** 587 /**
588 * gfs2_block_map - Map a block from an inode to a disk block 588 * gfs2_block_map - Map a block from an inode to a disk block
589 * @inode: The inode 589 * @inode: The inode
590 * @lblock: The logical block number 590 * @lblock: The logical block number
591 * @bh_map: The bh to be mapped 591 * @bh_map: The bh to be mapped
592 * @create: True if its ok to alloc blocks to satify the request 592 * @create: True if its ok to alloc blocks to satify the request
593 * 593 *
594 * Sets buffer_mapped() if successful, sets buffer_boundary() if a 594 * Sets buffer_mapped() if successful, sets buffer_boundary() if a
595 * read of metadata will be required before the next block can be 595 * read of metadata will be required before the next block can be
596 * mapped. Sets buffer_new() if new blocks were allocated. 596 * mapped. Sets buffer_new() if new blocks were allocated.
597 * 597 *
598 * Returns: errno 598 * Returns: errno
599 */ 599 */
600 600
601 int gfs2_block_map(struct inode *inode, sector_t lblock, 601 int gfs2_block_map(struct inode *inode, sector_t lblock,
602 struct buffer_head *bh_map, int create) 602 struct buffer_head *bh_map, int create)
603 { 603 {
604 struct gfs2_inode *ip = GFS2_I(inode); 604 struct gfs2_inode *ip = GFS2_I(inode);
605 struct gfs2_sbd *sdp = GFS2_SB(inode); 605 struct gfs2_sbd *sdp = GFS2_SB(inode);
606 unsigned int bsize = sdp->sd_sb.sb_bsize; 606 unsigned int bsize = sdp->sd_sb.sb_bsize;
607 const unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; 607 const unsigned int maxlen = bh_map->b_size >> inode->i_blkbits;
608 const u64 *arr = sdp->sd_heightsize; 608 const u64 *arr = sdp->sd_heightsize;
609 __be64 *ptr; 609 __be64 *ptr;
610 u64 size; 610 u64 size;
611 struct metapath mp; 611 struct metapath mp;
612 int ret; 612 int ret;
613 int eob; 613 int eob;
614 unsigned int len; 614 unsigned int len;
615 struct buffer_head *bh; 615 struct buffer_head *bh;
616 u8 height; 616 u8 height;
617 617
618 BUG_ON(maxlen == 0); 618 BUG_ON(maxlen == 0);
619 619
620 memset(mp.mp_bh, 0, sizeof(mp.mp_bh)); 620 memset(mp.mp_bh, 0, sizeof(mp.mp_bh));
621 bmap_lock(ip, create); 621 bmap_lock(ip, create);
622 clear_buffer_mapped(bh_map); 622 clear_buffer_mapped(bh_map);
623 clear_buffer_new(bh_map); 623 clear_buffer_new(bh_map);
624 clear_buffer_boundary(bh_map); 624 clear_buffer_boundary(bh_map);
625 trace_gfs2_bmap(ip, bh_map, lblock, create, 1); 625 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
626 if (gfs2_is_dir(ip)) { 626 if (gfs2_is_dir(ip)) {
627 bsize = sdp->sd_jbsize; 627 bsize = sdp->sd_jbsize;
628 arr = sdp->sd_jheightsize; 628 arr = sdp->sd_jheightsize;
629 } 629 }
630 630
631 ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]); 631 ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
632 if (ret) 632 if (ret)
633 goto out; 633 goto out;
634 634
635 height = ip->i_height; 635 height = ip->i_height;
636 size = (lblock + 1) * bsize; 636 size = (lblock + 1) * bsize;
637 while (size > arr[height]) 637 while (size > arr[height])
638 height++; 638 height++;
639 find_metapath(sdp, lblock, &mp, height); 639 find_metapath(sdp, lblock, &mp, height);
640 ret = 1; 640 ret = 1;
641 if (height > ip->i_height || gfs2_is_stuffed(ip)) 641 if (height > ip->i_height || gfs2_is_stuffed(ip))
642 goto do_alloc; 642 goto do_alloc;
643 ret = lookup_metapath(ip, &mp); 643 ret = lookup_metapath(ip, &mp);
644 if (ret < 0) 644 if (ret < 0)
645 goto out; 645 goto out;
646 if (ret != ip->i_height) 646 if (ret != ip->i_height)
647 goto do_alloc; 647 goto do_alloc;
648 ptr = metapointer(ip->i_height - 1, &mp); 648 ptr = metapointer(ip->i_height - 1, &mp);
649 if (*ptr == 0) 649 if (*ptr == 0)
650 goto do_alloc; 650 goto do_alloc;
651 map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr)); 651 map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr));
652 bh = mp.mp_bh[ip->i_height - 1]; 652 bh = mp.mp_bh[ip->i_height - 1];
653 len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob); 653 len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob);
654 bh_map->b_size = (len << inode->i_blkbits); 654 bh_map->b_size = (len << inode->i_blkbits);
655 if (eob) 655 if (eob)
656 set_buffer_boundary(bh_map); 656 set_buffer_boundary(bh_map);
657 ret = 0; 657 ret = 0;
658 out: 658 out:
659 release_metapath(&mp); 659 release_metapath(&mp);
660 trace_gfs2_bmap(ip, bh_map, lblock, create, ret); 660 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
661 bmap_unlock(ip, create); 661 bmap_unlock(ip, create);
662 return ret; 662 return ret;
663 663
664 do_alloc: 664 do_alloc:
665 /* All allocations are done here, firstly check create flag */ 665 /* All allocations are done here, firstly check create flag */
666 if (!create) { 666 if (!create) {
667 BUG_ON(gfs2_is_stuffed(ip)); 667 BUG_ON(gfs2_is_stuffed(ip));
668 ret = 0; 668 ret = 0;
669 goto out; 669 goto out;
670 } 670 }
671 671
672 /* At this point ret is the tree depth of already allocated blocks */ 672 /* At this point ret is the tree depth of already allocated blocks */
673 ret = gfs2_bmap_alloc(inode, lblock, bh_map, &mp, ret, height, maxlen); 673 ret = gfs2_bmap_alloc(inode, lblock, bh_map, &mp, ret, height, maxlen);
674 goto out; 674 goto out;
675 } 675 }
676 676
677 /* 677 /*
678 * Deprecated: do not use in new code 678 * Deprecated: do not use in new code
679 */ 679 */
680 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen) 680 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
681 { 681 {
682 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 }; 682 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
683 int ret; 683 int ret;
684 int create = *new; 684 int create = *new;
685 685
686 BUG_ON(!extlen); 686 BUG_ON(!extlen);
687 BUG_ON(!dblock); 687 BUG_ON(!dblock);
688 BUG_ON(!new); 688 BUG_ON(!new);
689 689
690 bh.b_size = 1 << (inode->i_blkbits + (create ? 0 : 5)); 690 bh.b_size = 1 << (inode->i_blkbits + (create ? 0 : 5));
691 ret = gfs2_block_map(inode, lblock, &bh, create); 691 ret = gfs2_block_map(inode, lblock, &bh, create);
692 *extlen = bh.b_size >> inode->i_blkbits; 692 *extlen = bh.b_size >> inode->i_blkbits;
693 *dblock = bh.b_blocknr; 693 *dblock = bh.b_blocknr;
694 if (buffer_new(&bh)) 694 if (buffer_new(&bh))
695 *new = 1; 695 *new = 1;
696 else 696 else
697 *new = 0; 697 *new = 0;
698 return ret; 698 return ret;
699 } 699 }
700 700
701 /** 701 /**
702 * do_strip - Look for a layer a particular layer of the file and strip it off 702 * do_strip - Look for a layer a particular layer of the file and strip it off
703 * @ip: the inode 703 * @ip: the inode
704 * @dibh: the dinode buffer 704 * @dibh: the dinode buffer
705 * @bh: A buffer of pointers 705 * @bh: A buffer of pointers
706 * @top: The first pointer in the buffer 706 * @top: The first pointer in the buffer
707 * @bottom: One more than the last pointer 707 * @bottom: One more than the last pointer
708 * @height: the height this buffer is at 708 * @height: the height this buffer is at
709 * @data: a pointer to a struct strip_mine 709 * @data: a pointer to a struct strip_mine
710 * 710 *
711 * Returns: errno 711 * Returns: errno
712 */ 712 */
713 713
714 static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, 714 static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
715 struct buffer_head *bh, __be64 *top, __be64 *bottom, 715 struct buffer_head *bh, __be64 *top, __be64 *bottom,
716 unsigned int height, struct strip_mine *sm) 716 unsigned int height, struct strip_mine *sm)
717 { 717 {
718 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 718 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
719 struct gfs2_rgrp_list rlist; 719 struct gfs2_rgrp_list rlist;
720 u64 bn, bstart; 720 u64 bn, bstart;
721 u32 blen, btotal; 721 u32 blen, btotal;
722 __be64 *p; 722 __be64 *p;
723 unsigned int rg_blocks = 0; 723 unsigned int rg_blocks = 0;
724 int metadata; 724 int metadata;
725 unsigned int revokes = 0; 725 unsigned int revokes = 0;
726 int x; 726 int x;
727 int error; 727 int error;
728 728
729 error = gfs2_rindex_update(sdp); 729 error = gfs2_rindex_update(sdp);
730 if (error) 730 if (error)
731 return error; 731 return error;
732 732
733 if (!*top) 733 if (!*top)
734 sm->sm_first = 0; 734 sm->sm_first = 0;
735 735
736 if (height != sm->sm_height) 736 if (height != sm->sm_height)
737 return 0; 737 return 0;
738 738
739 if (sm->sm_first) { 739 if (sm->sm_first) {
740 top++; 740 top++;
741 sm->sm_first = 0; 741 sm->sm_first = 0;
742 } 742 }
743 743
744 metadata = (height != ip->i_height - 1); 744 metadata = (height != ip->i_height - 1);
745 if (metadata) 745 if (metadata)
746 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; 746 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
747 else if (ip->i_depth) 747 else if (ip->i_depth)
748 revokes = sdp->sd_inptrs; 748 revokes = sdp->sd_inptrs;
749 749
750 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); 750 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
751 bstart = 0; 751 bstart = 0;
752 blen = 0; 752 blen = 0;
753 753
754 for (p = top; p < bottom; p++) { 754 for (p = top; p < bottom; p++) {
755 if (!*p) 755 if (!*p)
756 continue; 756 continue;
757 757
758 bn = be64_to_cpu(*p); 758 bn = be64_to_cpu(*p);
759 759
760 if (bstart + blen == bn) 760 if (bstart + blen == bn)
761 blen++; 761 blen++;
762 else { 762 else {
763 if (bstart) 763 if (bstart)
764 gfs2_rlist_add(ip, &rlist, bstart); 764 gfs2_rlist_add(ip, &rlist, bstart);
765 765
766 bstart = bn; 766 bstart = bn;
767 blen = 1; 767 blen = 1;
768 } 768 }
769 } 769 }
770 770
771 if (bstart) 771 if (bstart)
772 gfs2_rlist_add(ip, &rlist, bstart); 772 gfs2_rlist_add(ip, &rlist, bstart);
773 else 773 else
774 goto out; /* Nothing to do */ 774 goto out; /* Nothing to do */
775 775
776 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); 776 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE);
777 777
778 for (x = 0; x < rlist.rl_rgrps; x++) { 778 for (x = 0; x < rlist.rl_rgrps; x++) {
779 struct gfs2_rgrpd *rgd; 779 struct gfs2_rgrpd *rgd;
780 rgd = rlist.rl_ghs[x].gh_gl->gl_object; 780 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
781 rg_blocks += rgd->rd_length; 781 rg_blocks += rgd->rd_length;
782 } 782 }
783 783
784 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); 784 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
785 if (error) 785 if (error)
786 goto out_rlist; 786 goto out_rlist;
787 787
788 if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ 788 if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
789 gfs2_rs_deltree(ip, ip->i_res); 789 gfs2_rs_deltree(ip, ip->i_res);
790 790
791 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + 791 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
792 RES_INDIRECT + RES_STATFS + RES_QUOTA, 792 RES_INDIRECT + RES_STATFS + RES_QUOTA,
793 revokes); 793 revokes);
794 if (error) 794 if (error)
795 goto out_rg_gunlock; 795 goto out_rg_gunlock;
796 796
797 down_write(&ip->i_rw_mutex); 797 down_write(&ip->i_rw_mutex);
798 798
799 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 799 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
800 gfs2_trans_add_bh(ip->i_gl, bh, 1); 800 gfs2_trans_add_bh(ip->i_gl, bh, 1);
801 801
802 bstart = 0; 802 bstart = 0;
803 blen = 0; 803 blen = 0;
804 btotal = 0; 804 btotal = 0;
805 805
806 for (p = top; p < bottom; p++) { 806 for (p = top; p < bottom; p++) {
807 if (!*p) 807 if (!*p)
808 continue; 808 continue;
809 809
810 bn = be64_to_cpu(*p); 810 bn = be64_to_cpu(*p);
811 811
812 if (bstart + blen == bn) 812 if (bstart + blen == bn)
813 blen++; 813 blen++;
814 else { 814 else {
815 if (bstart) { 815 if (bstart) {
816 __gfs2_free_blocks(ip, bstart, blen, metadata); 816 __gfs2_free_blocks(ip, bstart, blen, metadata);
817 btotal += blen; 817 btotal += blen;
818 } 818 }
819 819
820 bstart = bn; 820 bstart = bn;
821 blen = 1; 821 blen = 1;
822 } 822 }
823 823
824 *p = 0; 824 *p = 0;
825 gfs2_add_inode_blocks(&ip->i_inode, -1); 825 gfs2_add_inode_blocks(&ip->i_inode, -1);
826 } 826 }
827 if (bstart) { 827 if (bstart) {
828 __gfs2_free_blocks(ip, bstart, blen, metadata); 828 __gfs2_free_blocks(ip, bstart, blen, metadata);
829 btotal += blen; 829 btotal += blen;
830 } 830 }
831 831
832 gfs2_statfs_change(sdp, 0, +btotal, 0); 832 gfs2_statfs_change(sdp, 0, +btotal, 0);
833 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, 833 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
834 ip->i_inode.i_gid); 834 ip->i_inode.i_gid);
835 835
836 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 836 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
837 837
838 gfs2_dinode_out(ip, dibh->b_data); 838 gfs2_dinode_out(ip, dibh->b_data);
839 839
840 up_write(&ip->i_rw_mutex); 840 up_write(&ip->i_rw_mutex);
841 841
842 gfs2_trans_end(sdp); 842 gfs2_trans_end(sdp);
843 843
844 out_rg_gunlock: 844 out_rg_gunlock:
845 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); 845 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
846 out_rlist: 846 out_rlist:
847 gfs2_rlist_free(&rlist); 847 gfs2_rlist_free(&rlist);
848 out: 848 out:
849 return error; 849 return error;
850 } 850 }
851 851
852 /** 852 /**
853 * recursive_scan - recursively scan through the end of a file 853 * recursive_scan - recursively scan through the end of a file
854 * @ip: the inode 854 * @ip: the inode
855 * @dibh: the dinode buffer 855 * @dibh: the dinode buffer
856 * @mp: the path through the metadata to the point to start 856 * @mp: the path through the metadata to the point to start
857 * @height: the height the recursion is at 857 * @height: the height the recursion is at
858 * @block: the indirect block to look at 858 * @block: the indirect block to look at
859 * @first: 1 if this is the first block 859 * @first: 1 if this is the first block
860 * @sm: data opaque to this function to pass to @bc 860 * @sm: data opaque to this function to pass to @bc
861 * 861 *
862 * When this is first called @height and @block should be zero and 862 * When this is first called @height and @block should be zero and
863 * @first should be 1. 863 * @first should be 1.
864 * 864 *
865 * Returns: errno 865 * Returns: errno
866 */ 866 */
867 867
868 static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, 868 static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
869 struct metapath *mp, unsigned int height, 869 struct metapath *mp, unsigned int height,
870 u64 block, int first, struct strip_mine *sm) 870 u64 block, int first, struct strip_mine *sm)
871 { 871 {
872 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 872 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
873 struct buffer_head *bh = NULL; 873 struct buffer_head *bh = NULL;
874 __be64 *top, *bottom; 874 __be64 *top, *bottom;
875 u64 bn; 875 u64 bn;
876 int error; 876 int error;
877 int mh_size = sizeof(struct gfs2_meta_header); 877 int mh_size = sizeof(struct gfs2_meta_header);
878 878
879 if (!height) { 879 if (!height) {
880 error = gfs2_meta_inode_buffer(ip, &bh); 880 error = gfs2_meta_inode_buffer(ip, &bh);
881 if (error) 881 if (error)
882 return error; 882 return error;
883 dibh = bh; 883 dibh = bh;
884 884
885 top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0]; 885 top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
886 bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs; 886 bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
887 } else { 887 } else {
888 error = gfs2_meta_indirect_buffer(ip, height, block, &bh); 888 error = gfs2_meta_indirect_buffer(ip, height, block, &bh);
889 if (error) 889 if (error)
890 return error; 890 return error;
891 891
892 top = (__be64 *)(bh->b_data + mh_size) + 892 top = (__be64 *)(bh->b_data + mh_size) +
893 (first ? mp->mp_list[height] : 0); 893 (first ? mp->mp_list[height] : 0);
894 894
895 bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs; 895 bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
896 } 896 }
897 897
898 error = do_strip(ip, dibh, bh, top, bottom, height, sm); 898 error = do_strip(ip, dibh, bh, top, bottom, height, sm);
899 if (error) 899 if (error)
900 goto out; 900 goto out;
901 901
902 if (height < ip->i_height - 1) { 902 if (height < ip->i_height - 1) {
903 903
904 gfs2_metapath_ra(ip->i_gl, bh, top); 904 gfs2_metapath_ra(ip->i_gl, bh, top);
905 905
906 for (; top < bottom; top++, first = 0) { 906 for (; top < bottom; top++, first = 0) {
907 if (!*top) 907 if (!*top)
908 continue; 908 continue;
909 909
910 bn = be64_to_cpu(*top); 910 bn = be64_to_cpu(*top);
911 911
912 error = recursive_scan(ip, dibh, mp, height + 1, bn, 912 error = recursive_scan(ip, dibh, mp, height + 1, bn,
913 first, sm); 913 first, sm);
914 if (error) 914 if (error)
915 break; 915 break;
916 } 916 }
917 } 917 }
918 out: 918 out:
919 brelse(bh); 919 brelse(bh);
920 return error; 920 return error;
921 } 921 }
922 922
923 923
924 /** 924 /**
925 * gfs2_block_truncate_page - Deal with zeroing out data for truncate 925 * gfs2_block_truncate_page - Deal with zeroing out data for truncate
926 * 926 *
927 * This is partly borrowed from ext3. 927 * This is partly borrowed from ext3.
928 */ 928 */
929 static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) 929 static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
930 { 930 {
931 struct inode *inode = mapping->host; 931 struct inode *inode = mapping->host;
932 struct gfs2_inode *ip = GFS2_I(inode); 932 struct gfs2_inode *ip = GFS2_I(inode);
933 unsigned long index = from >> PAGE_CACHE_SHIFT; 933 unsigned long index = from >> PAGE_CACHE_SHIFT;
934 unsigned offset = from & (PAGE_CACHE_SIZE-1); 934 unsigned offset = from & (PAGE_CACHE_SIZE-1);
935 unsigned blocksize, iblock, length, pos; 935 unsigned blocksize, iblock, length, pos;
936 struct buffer_head *bh; 936 struct buffer_head *bh;
937 struct page *page; 937 struct page *page;
938 int err; 938 int err;
939 939
940 page = find_or_create_page(mapping, index, GFP_NOFS); 940 page = find_or_create_page(mapping, index, GFP_NOFS);
941 if (!page) 941 if (!page)
942 return 0; 942 return 0;
943 943
944 blocksize = inode->i_sb->s_blocksize; 944 blocksize = inode->i_sb->s_blocksize;
945 length = blocksize - (offset & (blocksize - 1)); 945 length = blocksize - (offset & (blocksize - 1));
946 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 946 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
947 947
948 if (!page_has_buffers(page)) 948 if (!page_has_buffers(page))
949 create_empty_buffers(page, blocksize, 0); 949 create_empty_buffers(page, blocksize, 0);
950 950
951 /* Find the buffer that contains "offset" */ 951 /* Find the buffer that contains "offset" */
952 bh = page_buffers(page); 952 bh = page_buffers(page);
953 pos = blocksize; 953 pos = blocksize;
954 while (offset >= pos) { 954 while (offset >= pos) {
955 bh = bh->b_this_page; 955 bh = bh->b_this_page;
956 iblock++; 956 iblock++;
957 pos += blocksize; 957 pos += blocksize;
958 } 958 }
959 959
960 err = 0; 960 err = 0;
961 961
962 if (!buffer_mapped(bh)) { 962 if (!buffer_mapped(bh)) {
963 gfs2_block_map(inode, iblock, bh, 0); 963 gfs2_block_map(inode, iblock, bh, 0);
964 /* unmapped? It's a hole - nothing to do */ 964 /* unmapped? It's a hole - nothing to do */
965 if (!buffer_mapped(bh)) 965 if (!buffer_mapped(bh))
966 goto unlock; 966 goto unlock;
967 } 967 }
968 968
969 /* Ok, it's mapped. Make sure it's up-to-date */ 969 /* Ok, it's mapped. Make sure it's up-to-date */
970 if (PageUptodate(page)) 970 if (PageUptodate(page))
971 set_buffer_uptodate(bh); 971 set_buffer_uptodate(bh);
972 972
973 if (!buffer_uptodate(bh)) { 973 if (!buffer_uptodate(bh)) {
974 err = -EIO; 974 err = -EIO;
975 ll_rw_block(READ, 1, &bh); 975 ll_rw_block(READ, 1, &bh);
976 wait_on_buffer(bh); 976 wait_on_buffer(bh);
977 /* Uhhuh. Read error. Complain and punt. */ 977 /* Uhhuh. Read error. Complain and punt. */
978 if (!buffer_uptodate(bh)) 978 if (!buffer_uptodate(bh))
979 goto unlock; 979 goto unlock;
980 err = 0; 980 err = 0;
981 } 981 }
982 982
983 if (!gfs2_is_writeback(ip)) 983 if (!gfs2_is_writeback(ip))
984 gfs2_trans_add_bh(ip->i_gl, bh, 0); 984 gfs2_trans_add_bh(ip->i_gl, bh, 0);
985 985
986 zero_user(page, offset, length); 986 zero_user(page, offset, length);
987 mark_buffer_dirty(bh); 987 mark_buffer_dirty(bh);
988 unlock: 988 unlock:
989 unlock_page(page); 989 unlock_page(page);
990 page_cache_release(page); 990 page_cache_release(page);
991 return err; 991 return err;
992 } 992 }
993 993
994 static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) 994 static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
995 { 995 {
996 struct gfs2_inode *ip = GFS2_I(inode); 996 struct gfs2_inode *ip = GFS2_I(inode);
997 struct gfs2_sbd *sdp = GFS2_SB(inode); 997 struct gfs2_sbd *sdp = GFS2_SB(inode);
998 struct address_space *mapping = inode->i_mapping; 998 struct address_space *mapping = inode->i_mapping;
999 struct buffer_head *dibh; 999 struct buffer_head *dibh;
1000 int journaled = gfs2_is_jdata(ip); 1000 int journaled = gfs2_is_jdata(ip);
1001 int error; 1001 int error;
1002 1002
1003 error = gfs2_trans_begin(sdp, 1003 error = gfs2_trans_begin(sdp,
1004 RES_DINODE + (journaled ? RES_JDATA : 0), 0); 1004 RES_DINODE + (journaled ? RES_JDATA : 0), 0);
1005 if (error) 1005 if (error)
1006 return error; 1006 return error;
1007 1007
1008 error = gfs2_meta_inode_buffer(ip, &dibh); 1008 error = gfs2_meta_inode_buffer(ip, &dibh);
1009 if (error) 1009 if (error)
1010 goto out; 1010 goto out;
1011 1011
1012 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1012 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1013 1013
1014 if (gfs2_is_stuffed(ip)) { 1014 if (gfs2_is_stuffed(ip)) {
1015 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); 1015 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
1016 } else { 1016 } else {
1017 if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) { 1017 if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) {
1018 error = gfs2_block_truncate_page(mapping, newsize); 1018 error = gfs2_block_truncate_page(mapping, newsize);
1019 if (error) 1019 if (error)
1020 goto out_brelse; 1020 goto out_brelse;
1021 } 1021 }
1022 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; 1022 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
1023 } 1023 }
1024 1024
1025 i_size_write(inode, newsize); 1025 i_size_write(inode, newsize);
1026 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1026 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1027 gfs2_dinode_out(ip, dibh->b_data); 1027 gfs2_dinode_out(ip, dibh->b_data);
1028 1028
1029 truncate_pagecache(inode, oldsize, newsize); 1029 truncate_pagecache(inode, oldsize, newsize);
1030 out_brelse: 1030 out_brelse:
1031 brelse(dibh); 1031 brelse(dibh);
1032 out: 1032 out:
1033 gfs2_trans_end(sdp); 1033 gfs2_trans_end(sdp);
1034 return error; 1034 return error;
1035 } 1035 }
1036 1036
1037 static int trunc_dealloc(struct gfs2_inode *ip, u64 size) 1037 static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
1038 { 1038 {
1039 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1039 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1040 unsigned int height = ip->i_height; 1040 unsigned int height = ip->i_height;
1041 u64 lblock; 1041 u64 lblock;
1042 struct metapath mp; 1042 struct metapath mp;
1043 int error; 1043 int error;
1044 1044
1045 if (!size) 1045 if (!size)
1046 lblock = 0; 1046 lblock = 0;
1047 else 1047 else
1048 lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; 1048 lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
1049 1049
1050 find_metapath(sdp, lblock, &mp, ip->i_height); 1050 find_metapath(sdp, lblock, &mp, ip->i_height);
1051 error = gfs2_rindex_update(sdp); 1051 error = gfs2_rindex_update(sdp);
1052 if (error) 1052 if (error)
1053 return error; 1053 return error;
1054 1054
1055 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1055 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1056 if (error) 1056 if (error)
1057 return error; 1057 return error;
1058 1058
1059 while (height--) { 1059 while (height--) {
1060 struct strip_mine sm; 1060 struct strip_mine sm;
1061 sm.sm_first = !!size; 1061 sm.sm_first = !!size;
1062 sm.sm_height = height; 1062 sm.sm_height = height;
1063 1063
1064 error = recursive_scan(ip, NULL, &mp, 0, 0, 1, &sm); 1064 error = recursive_scan(ip, NULL, &mp, 0, 0, 1, &sm);
1065 if (error) 1065 if (error)
1066 break; 1066 break;
1067 } 1067 }
1068 1068
1069 gfs2_quota_unhold(ip); 1069 gfs2_quota_unhold(ip);
1070 1070
1071 return error; 1071 return error;
1072 } 1072 }
1073 1073
1074 static int trunc_end(struct gfs2_inode *ip) 1074 static int trunc_end(struct gfs2_inode *ip)
1075 { 1075 {
1076 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1076 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1077 struct buffer_head *dibh; 1077 struct buffer_head *dibh;
1078 int error; 1078 int error;
1079 1079
1080 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1080 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1081 if (error) 1081 if (error)
1082 return error; 1082 return error;
1083 1083
1084 down_write(&ip->i_rw_mutex); 1084 down_write(&ip->i_rw_mutex);
1085 1085
1086 error = gfs2_meta_inode_buffer(ip, &dibh); 1086 error = gfs2_meta_inode_buffer(ip, &dibh);
1087 if (error) 1087 if (error)
1088 goto out; 1088 goto out;
1089 1089
1090 if (!i_size_read(&ip->i_inode)) { 1090 if (!i_size_read(&ip->i_inode)) {
1091 ip->i_height = 0; 1091 ip->i_height = 0;
1092 ip->i_goal = ip->i_no_addr; 1092 ip->i_goal = ip->i_no_addr;
1093 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 1093 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1094 } 1094 }
1095 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1095 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1096 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; 1096 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
1097 1097
1098 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1098 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1099 gfs2_dinode_out(ip, dibh->b_data); 1099 gfs2_dinode_out(ip, dibh->b_data);
1100 brelse(dibh); 1100 brelse(dibh);
1101 1101
1102 out: 1102 out:
1103 up_write(&ip->i_rw_mutex); 1103 up_write(&ip->i_rw_mutex);
1104 gfs2_trans_end(sdp); 1104 gfs2_trans_end(sdp);
1105 return error; 1105 return error;
1106 } 1106 }
1107 1107
1108 /** 1108 /**
1109 * do_shrink - make a file smaller 1109 * do_shrink - make a file smaller
1110 * @inode: the inode 1110 * @inode: the inode
1111 * @oldsize: the current inode size 1111 * @oldsize: the current inode size
1112 * @newsize: the size to make the file 1112 * @newsize: the size to make the file
1113 * 1113 *
1114 * Called with an exclusive lock on @inode. The @size must 1114 * Called with an exclusive lock on @inode. The @size must
1115 * be equal to or smaller than the current inode size. 1115 * be equal to or smaller than the current inode size.
1116 * 1116 *
1117 * Returns: errno 1117 * Returns: errno
1118 */ 1118 */
1119 1119
1120 static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize) 1120 static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize)
1121 { 1121 {
1122 struct gfs2_inode *ip = GFS2_I(inode); 1122 struct gfs2_inode *ip = GFS2_I(inode);
1123 int error; 1123 int error;
1124 1124
1125 error = trunc_start(inode, oldsize, newsize); 1125 error = trunc_start(inode, oldsize, newsize);
1126 if (error < 0) 1126 if (error < 0)
1127 return error; 1127 return error;
1128 if (gfs2_is_stuffed(ip)) 1128 if (gfs2_is_stuffed(ip))
1129 return 0; 1129 return 0;
1130 1130
1131 error = trunc_dealloc(ip, newsize); 1131 error = trunc_dealloc(ip, newsize);
1132 if (error == 0) 1132 if (error == 0)
1133 error = trunc_end(ip); 1133 error = trunc_end(ip);
1134 1134
1135 return error; 1135 return error;
1136 } 1136 }
1137 1137
1138 void gfs2_trim_blocks(struct inode *inode) 1138 void gfs2_trim_blocks(struct inode *inode)
1139 { 1139 {
1140 u64 size = inode->i_size; 1140 u64 size = inode->i_size;
1141 int ret; 1141 int ret;
1142 1142
1143 ret = do_shrink(inode, size, size); 1143 ret = do_shrink(inode, size, size);
1144 WARN_ON(ret != 0); 1144 WARN_ON(ret != 0);
1145 } 1145 }
1146 1146
1147 /** 1147 /**
1148 * do_grow - Touch and update inode size 1148 * do_grow - Touch and update inode size
1149 * @inode: The inode 1149 * @inode: The inode
1150 * @size: The new size 1150 * @size: The new size
1151 * 1151 *
1152 * This function updates the timestamps on the inode and 1152 * This function updates the timestamps on the inode and
1153 * may also increase the size of the inode. This function 1153 * may also increase the size of the inode. This function
1154 * must not be called with @size any smaller than the current 1154 * must not be called with @size any smaller than the current
1155 * inode size. 1155 * inode size.
1156 * 1156 *
1157 * Although it is not strictly required to unstuff files here, 1157 * Although it is not strictly required to unstuff files here,
1158 * earlier versions of GFS2 have a bug in the stuffed file reading 1158 * earlier versions of GFS2 have a bug in the stuffed file reading
1159 * code which will result in a buffer overrun if the size is larger 1159 * code which will result in a buffer overrun if the size is larger
1160 * than the max stuffed file size. In order to prevent this from 1160 * than the max stuffed file size. In order to prevent this from
1161 * occurring, such files are unstuffed, but in other cases we can 1161 * occurring, such files are unstuffed, but in other cases we can
1162 * just update the inode size directly. 1162 * just update the inode size directly.
1163 * 1163 *
1164 * Returns: 0 on success, or -ve on error 1164 * Returns: 0 on success, or -ve on error
1165 */ 1165 */
1166 1166
1167 static int do_grow(struct inode *inode, u64 size) 1167 static int do_grow(struct inode *inode, u64 size)
1168 { 1168 {
1169 struct gfs2_inode *ip = GFS2_I(inode); 1169 struct gfs2_inode *ip = GFS2_I(inode);
1170 struct gfs2_sbd *sdp = GFS2_SB(inode); 1170 struct gfs2_sbd *sdp = GFS2_SB(inode);
1171 struct buffer_head *dibh; 1171 struct buffer_head *dibh;
1172 int error; 1172 int error;
1173 int unstuff = 0; 1173 int unstuff = 0;
1174 1174
1175 if (gfs2_is_stuffed(ip) && 1175 if (gfs2_is_stuffed(ip) &&
1176 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { 1176 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
1177 error = gfs2_quota_lock_check(ip); 1177 error = gfs2_quota_lock_check(ip);
1178 if (error) 1178 if (error)
1179 return error; 1179 return error;
1180 1180
1181 error = gfs2_inplace_reserve(ip, 1); 1181 error = gfs2_inplace_reserve(ip, 1, 0);
1182 if (error) 1182 if (error)
1183 goto do_grow_qunlock; 1183 goto do_grow_qunlock;
1184 unstuff = 1; 1184 unstuff = 1;
1185 } 1185 }
1186 1186
1187 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0); 1187 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0);
1188 if (error) 1188 if (error)
1189 goto do_grow_release; 1189 goto do_grow_release;
1190 1190
1191 if (unstuff) { 1191 if (unstuff) {
1192 error = gfs2_unstuff_dinode(ip, NULL); 1192 error = gfs2_unstuff_dinode(ip, NULL);
1193 if (error) 1193 if (error)
1194 goto do_end_trans; 1194 goto do_end_trans;
1195 } 1195 }
1196 1196
1197 error = gfs2_meta_inode_buffer(ip, &dibh); 1197 error = gfs2_meta_inode_buffer(ip, &dibh);
1198 if (error) 1198 if (error)
1199 goto do_end_trans; 1199 goto do_end_trans;
1200 1200
1201 i_size_write(inode, size); 1201 i_size_write(inode, size);
1202 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1202 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1203 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1203 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1204 gfs2_dinode_out(ip, dibh->b_data); 1204 gfs2_dinode_out(ip, dibh->b_data);
1205 brelse(dibh); 1205 brelse(dibh);
1206 1206
1207 do_end_trans: 1207 do_end_trans:
1208 gfs2_trans_end(sdp); 1208 gfs2_trans_end(sdp);
1209 do_grow_release: 1209 do_grow_release:
1210 if (unstuff) { 1210 if (unstuff) {
1211 gfs2_inplace_release(ip); 1211 gfs2_inplace_release(ip);
1212 do_grow_qunlock: 1212 do_grow_qunlock:
1213 gfs2_quota_unlock(ip); 1213 gfs2_quota_unlock(ip);
1214 } 1214 }
1215 return error; 1215 return error;
1216 } 1216 }
1217 1217
1218 /** 1218 /**
1219 * gfs2_setattr_size - make a file a given size 1219 * gfs2_setattr_size - make a file a given size
1220 * @inode: the inode 1220 * @inode: the inode
1221 * @newsize: the size to make the file 1221 * @newsize: the size to make the file
1222 * 1222 *
1223 * The file size can grow, shrink, or stay the same size. This 1223 * The file size can grow, shrink, or stay the same size. This
1224 * is called holding i_mutex and an exclusive glock on the inode 1224 * is called holding i_mutex and an exclusive glock on the inode
1225 * in question. 1225 * in question.
1226 * 1226 *
1227 * Returns: errno 1227 * Returns: errno
1228 */ 1228 */
1229 1229
1230 int gfs2_setattr_size(struct inode *inode, u64 newsize) 1230 int gfs2_setattr_size(struct inode *inode, u64 newsize)
1231 { 1231 {
1232 int ret; 1232 int ret;
1233 u64 oldsize; 1233 u64 oldsize;
1234 1234
1235 BUG_ON(!S_ISREG(inode->i_mode)); 1235 BUG_ON(!S_ISREG(inode->i_mode));
1236 1236
1237 ret = inode_newsize_ok(inode, newsize); 1237 ret = inode_newsize_ok(inode, newsize);
1238 if (ret) 1238 if (ret)
1239 return ret; 1239 return ret;
1240 1240
1241 inode_dio_wait(inode); 1241 inode_dio_wait(inode);
1242 1242
1243 oldsize = inode->i_size; 1243 oldsize = inode->i_size;
1244 if (newsize >= oldsize) 1244 if (newsize >= oldsize)
1245 return do_grow(inode, newsize); 1245 return do_grow(inode, newsize);
1246 1246
1247 return do_shrink(inode, oldsize, newsize); 1247 return do_shrink(inode, oldsize, newsize);
1248 } 1248 }
1249 1249
1250 int gfs2_truncatei_resume(struct gfs2_inode *ip) 1250 int gfs2_truncatei_resume(struct gfs2_inode *ip)
1251 { 1251 {
1252 int error; 1252 int error;
1253 error = trunc_dealloc(ip, i_size_read(&ip->i_inode)); 1253 error = trunc_dealloc(ip, i_size_read(&ip->i_inode));
1254 if (!error) 1254 if (!error)
1255 error = trunc_end(ip); 1255 error = trunc_end(ip);
1256 return error; 1256 return error;
1257 } 1257 }
1258 1258
1259 int gfs2_file_dealloc(struct gfs2_inode *ip) 1259 int gfs2_file_dealloc(struct gfs2_inode *ip)
1260 { 1260 {
1261 return trunc_dealloc(ip, 0); 1261 return trunc_dealloc(ip, 0);
1262 } 1262 }
1263 1263
1264 /** 1264 /**
1265 * gfs2_write_alloc_required - figure out if a write will require an allocation 1265 * gfs2_write_alloc_required - figure out if a write will require an allocation
1266 * @ip: the file being written to 1266 * @ip: the file being written to
1267 * @offset: the offset to write to 1267 * @offset: the offset to write to
1268 * @len: the number of bytes being written 1268 * @len: the number of bytes being written
1269 * 1269 *
1270 * Returns: 1 if an alloc is required, 0 otherwise 1270 * Returns: 1 if an alloc is required, 0 otherwise
1271 */ 1271 */
1272 1272
1273 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, 1273 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1274 unsigned int len) 1274 unsigned int len)
1275 { 1275 {
1276 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1276 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1277 struct buffer_head bh; 1277 struct buffer_head bh;
1278 unsigned int shift; 1278 unsigned int shift;
1279 u64 lblock, lblock_stop, size; 1279 u64 lblock, lblock_stop, size;
1280 u64 end_of_file; 1280 u64 end_of_file;
1281 1281
1282 if (!len) 1282 if (!len)
1283 return 0; 1283 return 0;
1284 1284
1285 if (gfs2_is_stuffed(ip)) { 1285 if (gfs2_is_stuffed(ip)) {
1286 if (offset + len > 1286 if (offset + len >
1287 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) 1287 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
1288 return 1; 1288 return 1;
1289 return 0; 1289 return 0;
1290 } 1290 }
1291 1291
1292 shift = sdp->sd_sb.sb_bsize_shift; 1292 shift = sdp->sd_sb.sb_bsize_shift;
1293 BUG_ON(gfs2_is_dir(ip)); 1293 BUG_ON(gfs2_is_dir(ip));
1294 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; 1294 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
1295 lblock = offset >> shift; 1295 lblock = offset >> shift;
1296 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; 1296 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1297 if (lblock_stop > end_of_file) 1297 if (lblock_stop > end_of_file)
1298 return 1; 1298 return 1;
1299 1299
1300 size = (lblock_stop - lblock) << shift; 1300 size = (lblock_stop - lblock) << shift;
1301 do { 1301 do {
1302 bh.b_state = 0; 1302 bh.b_state = 0;
1303 bh.b_size = size; 1303 bh.b_size = size;
1304 gfs2_block_map(&ip->i_inode, lblock, &bh, 0); 1304 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
1305 if (!buffer_mapped(&bh)) 1305 if (!buffer_mapped(&bh))
1306 return 1; 1306 return 1;
1307 size -= bh.b_size; 1307 size -= bh.b_size;
1308 lblock += (bh.b_size >> ip->i_inode.i_blkbits); 1308 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1309 } while(size > 0); 1309 } while(size > 0);
1310 1310
1311 return 0; 1311 return 0;
1312 } 1312 }
1313 1313
1314 1314
1 /* 1 /*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10 #include <linux/slab.h> 10 #include <linux/slab.h>
11 #include <linux/spinlock.h> 11 #include <linux/spinlock.h>
12 #include <linux/completion.h> 12 #include <linux/completion.h>
13 #include <linux/buffer_head.h> 13 #include <linux/buffer_head.h>
14 #include <linux/pagemap.h> 14 #include <linux/pagemap.h>
15 #include <linux/uio.h> 15 #include <linux/uio.h>
16 #include <linux/blkdev.h> 16 #include <linux/blkdev.h>
17 #include <linux/mm.h> 17 #include <linux/mm.h>
18 #include <linux/mount.h> 18 #include <linux/mount.h>
19 #include <linux/fs.h> 19 #include <linux/fs.h>
20 #include <linux/gfs2_ondisk.h> 20 #include <linux/gfs2_ondisk.h>
21 #include <linux/falloc.h> 21 #include <linux/falloc.h>
22 #include <linux/swap.h> 22 #include <linux/swap.h>
23 #include <linux/crc32.h> 23 #include <linux/crc32.h>
24 #include <linux/writeback.h> 24 #include <linux/writeback.h>
25 #include <asm/uaccess.h> 25 #include <asm/uaccess.h>
26 #include <linux/dlm.h> 26 #include <linux/dlm.h>
27 #include <linux/dlm_plock.h> 27 #include <linux/dlm_plock.h>
28 28
29 #include "gfs2.h" 29 #include "gfs2.h"
30 #include "incore.h" 30 #include "incore.h"
31 #include "bmap.h" 31 #include "bmap.h"
32 #include "dir.h" 32 #include "dir.h"
33 #include "glock.h" 33 #include "glock.h"
34 #include "glops.h" 34 #include "glops.h"
35 #include "inode.h" 35 #include "inode.h"
36 #include "log.h" 36 #include "log.h"
37 #include "meta_io.h" 37 #include "meta_io.h"
38 #include "quota.h" 38 #include "quota.h"
39 #include "rgrp.h" 39 #include "rgrp.h"
40 #include "trans.h" 40 #include "trans.h"
41 #include "util.h" 41 #include "util.h"
42 42
43 /** 43 /**
44 * gfs2_llseek - seek to a location in a file 44 * gfs2_llseek - seek to a location in a file
45 * @file: the file 45 * @file: the file
46 * @offset: the offset 46 * @offset: the offset
47 * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END) 47 * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
48 * 48 *
49 * SEEK_END requires the glock for the file because it references the 49 * SEEK_END requires the glock for the file because it references the
50 * file's size. 50 * file's size.
51 * 51 *
52 * Returns: The new offset, or errno 52 * Returns: The new offset, or errno
53 */ 53 */
54 54
55 static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) 55 static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
56 { 56 {
57 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 57 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
58 struct gfs2_holder i_gh; 58 struct gfs2_holder i_gh;
59 loff_t error; 59 loff_t error;
60 60
61 switch (origin) { 61 switch (origin) {
62 case SEEK_END: /* These reference inode->i_size */ 62 case SEEK_END: /* These reference inode->i_size */
63 case SEEK_DATA: 63 case SEEK_DATA:
64 case SEEK_HOLE: 64 case SEEK_HOLE:
65 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, 65 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
66 &i_gh); 66 &i_gh);
67 if (!error) { 67 if (!error) {
68 error = generic_file_llseek(file, offset, origin); 68 error = generic_file_llseek(file, offset, origin);
69 gfs2_glock_dq_uninit(&i_gh); 69 gfs2_glock_dq_uninit(&i_gh);
70 } 70 }
71 break; 71 break;
72 case SEEK_CUR: 72 case SEEK_CUR:
73 case SEEK_SET: 73 case SEEK_SET:
74 error = generic_file_llseek(file, offset, origin); 74 error = generic_file_llseek(file, offset, origin);
75 break; 75 break;
76 default: 76 default:
77 error = -EINVAL; 77 error = -EINVAL;
78 } 78 }
79 79
80 return error; 80 return error;
81 } 81 }
82 82
83 /** 83 /**
84 * gfs2_readdir - Read directory entries from a directory 84 * gfs2_readdir - Read directory entries from a directory
85 * @file: The directory to read from 85 * @file: The directory to read from
86 * @dirent: Buffer for dirents 86 * @dirent: Buffer for dirents
87 * @filldir: Function used to do the copying 87 * @filldir: Function used to do the copying
88 * 88 *
89 * Returns: errno 89 * Returns: errno
90 */ 90 */
91 91
92 static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) 92 static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
93 { 93 {
94 struct inode *dir = file->f_mapping->host; 94 struct inode *dir = file->f_mapping->host;
95 struct gfs2_inode *dip = GFS2_I(dir); 95 struct gfs2_inode *dip = GFS2_I(dir);
96 struct gfs2_holder d_gh; 96 struct gfs2_holder d_gh;
97 u64 offset = file->f_pos; 97 u64 offset = file->f_pos;
98 int error; 98 int error;
99 99
100 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 100 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
101 error = gfs2_glock_nq(&d_gh); 101 error = gfs2_glock_nq(&d_gh);
102 if (error) { 102 if (error) {
103 gfs2_holder_uninit(&d_gh); 103 gfs2_holder_uninit(&d_gh);
104 return error; 104 return error;
105 } 105 }
106 106
107 error = gfs2_dir_read(dir, &offset, dirent, filldir, &file->f_ra); 107 error = gfs2_dir_read(dir, &offset, dirent, filldir, &file->f_ra);
108 108
109 gfs2_glock_dq_uninit(&d_gh); 109 gfs2_glock_dq_uninit(&d_gh);
110 110
111 file->f_pos = offset; 111 file->f_pos = offset;
112 112
113 return error; 113 return error;
114 } 114 }
115 115
116 /** 116 /**
117 * fsflags_cvt 117 * fsflags_cvt
118 * @table: A table of 32 u32 flags 118 * @table: A table of 32 u32 flags
119 * @val: a 32 bit value to convert 119 * @val: a 32 bit value to convert
120 * 120 *
121 * This function can be used to convert between fsflags values and 121 * This function can be used to convert between fsflags values and
122 * GFS2's own flags values. 122 * GFS2's own flags values.
123 * 123 *
124 * Returns: the converted flags 124 * Returns: the converted flags
125 */ 125 */
126 static u32 fsflags_cvt(const u32 *table, u32 val) 126 static u32 fsflags_cvt(const u32 *table, u32 val)
127 { 127 {
128 u32 res = 0; 128 u32 res = 0;
129 while(val) { 129 while(val) {
130 if (val & 1) 130 if (val & 1)
131 res |= *table; 131 res |= *table;
132 table++; 132 table++;
133 val >>= 1; 133 val >>= 1;
134 } 134 }
135 return res; 135 return res;
136 } 136 }
137 137
138 static const u32 fsflags_to_gfs2[32] = { 138 static const u32 fsflags_to_gfs2[32] = {
139 [3] = GFS2_DIF_SYNC, 139 [3] = GFS2_DIF_SYNC,
140 [4] = GFS2_DIF_IMMUTABLE, 140 [4] = GFS2_DIF_IMMUTABLE,
141 [5] = GFS2_DIF_APPENDONLY, 141 [5] = GFS2_DIF_APPENDONLY,
142 [7] = GFS2_DIF_NOATIME, 142 [7] = GFS2_DIF_NOATIME,
143 [12] = GFS2_DIF_EXHASH, 143 [12] = GFS2_DIF_EXHASH,
144 [14] = GFS2_DIF_INHERIT_JDATA, 144 [14] = GFS2_DIF_INHERIT_JDATA,
145 [17] = GFS2_DIF_TOPDIR, 145 [17] = GFS2_DIF_TOPDIR,
146 }; 146 };
147 147
148 static const u32 gfs2_to_fsflags[32] = { 148 static const u32 gfs2_to_fsflags[32] = {
149 [gfs2fl_Sync] = FS_SYNC_FL, 149 [gfs2fl_Sync] = FS_SYNC_FL,
150 [gfs2fl_Immutable] = FS_IMMUTABLE_FL, 150 [gfs2fl_Immutable] = FS_IMMUTABLE_FL,
151 [gfs2fl_AppendOnly] = FS_APPEND_FL, 151 [gfs2fl_AppendOnly] = FS_APPEND_FL,
152 [gfs2fl_NoAtime] = FS_NOATIME_FL, 152 [gfs2fl_NoAtime] = FS_NOATIME_FL,
153 [gfs2fl_ExHash] = FS_INDEX_FL, 153 [gfs2fl_ExHash] = FS_INDEX_FL,
154 [gfs2fl_TopLevel] = FS_TOPDIR_FL, 154 [gfs2fl_TopLevel] = FS_TOPDIR_FL,
155 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, 155 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
156 }; 156 };
157 157
158 static int gfs2_get_flags(struct file *filp, u32 __user *ptr) 158 static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
159 { 159 {
160 struct inode *inode = filp->f_path.dentry->d_inode; 160 struct inode *inode = filp->f_path.dentry->d_inode;
161 struct gfs2_inode *ip = GFS2_I(inode); 161 struct gfs2_inode *ip = GFS2_I(inode);
162 struct gfs2_holder gh; 162 struct gfs2_holder gh;
163 int error; 163 int error;
164 u32 fsflags; 164 u32 fsflags;
165 165
166 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); 166 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
167 error = gfs2_glock_nq(&gh); 167 error = gfs2_glock_nq(&gh);
168 if (error) 168 if (error)
169 return error; 169 return error;
170 170
171 fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_diskflags); 171 fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_diskflags);
172 if (!S_ISDIR(inode->i_mode) && ip->i_diskflags & GFS2_DIF_JDATA) 172 if (!S_ISDIR(inode->i_mode) && ip->i_diskflags & GFS2_DIF_JDATA)
173 fsflags |= FS_JOURNAL_DATA_FL; 173 fsflags |= FS_JOURNAL_DATA_FL;
174 if (put_user(fsflags, ptr)) 174 if (put_user(fsflags, ptr))
175 error = -EFAULT; 175 error = -EFAULT;
176 176
177 gfs2_glock_dq(&gh); 177 gfs2_glock_dq(&gh);
178 gfs2_holder_uninit(&gh); 178 gfs2_holder_uninit(&gh);
179 return error; 179 return error;
180 } 180 }
181 181
182 void gfs2_set_inode_flags(struct inode *inode) 182 void gfs2_set_inode_flags(struct inode *inode)
183 { 183 {
184 struct gfs2_inode *ip = GFS2_I(inode); 184 struct gfs2_inode *ip = GFS2_I(inode);
185 unsigned int flags = inode->i_flags; 185 unsigned int flags = inode->i_flags;
186 186
187 flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC); 187 flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
188 if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode)) 188 if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
189 inode->i_flags |= S_NOSEC; 189 inode->i_flags |= S_NOSEC;
190 if (ip->i_diskflags & GFS2_DIF_IMMUTABLE) 190 if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
191 flags |= S_IMMUTABLE; 191 flags |= S_IMMUTABLE;
192 if (ip->i_diskflags & GFS2_DIF_APPENDONLY) 192 if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
193 flags |= S_APPEND; 193 flags |= S_APPEND;
194 if (ip->i_diskflags & GFS2_DIF_NOATIME) 194 if (ip->i_diskflags & GFS2_DIF_NOATIME)
195 flags |= S_NOATIME; 195 flags |= S_NOATIME;
196 if (ip->i_diskflags & GFS2_DIF_SYNC) 196 if (ip->i_diskflags & GFS2_DIF_SYNC)
197 flags |= S_SYNC; 197 flags |= S_SYNC;
198 inode->i_flags = flags; 198 inode->i_flags = flags;
199 } 199 }
200 200
201 /* Flags that can be set by user space */ 201 /* Flags that can be set by user space */
202 #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ 202 #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \
203 GFS2_DIF_IMMUTABLE| \ 203 GFS2_DIF_IMMUTABLE| \
204 GFS2_DIF_APPENDONLY| \ 204 GFS2_DIF_APPENDONLY| \
205 GFS2_DIF_NOATIME| \ 205 GFS2_DIF_NOATIME| \
206 GFS2_DIF_SYNC| \ 206 GFS2_DIF_SYNC| \
207 GFS2_DIF_SYSTEM| \ 207 GFS2_DIF_SYSTEM| \
208 GFS2_DIF_TOPDIR| \ 208 GFS2_DIF_TOPDIR| \
209 GFS2_DIF_INHERIT_JDATA) 209 GFS2_DIF_INHERIT_JDATA)
210 210
211 /** 211 /**
212 * gfs2_set_flags - set flags on an inode 212 * gfs2_set_flags - set flags on an inode
213 * @inode: The inode 213 * @inode: The inode
214 * @flags: The flags to set 214 * @flags: The flags to set
215 * @mask: Indicates which flags are valid 215 * @mask: Indicates which flags are valid
216 * 216 *
217 */ 217 */
218 static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) 218 static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
219 { 219 {
220 struct inode *inode = filp->f_path.dentry->d_inode; 220 struct inode *inode = filp->f_path.dentry->d_inode;
221 struct gfs2_inode *ip = GFS2_I(inode); 221 struct gfs2_inode *ip = GFS2_I(inode);
222 struct gfs2_sbd *sdp = GFS2_SB(inode); 222 struct gfs2_sbd *sdp = GFS2_SB(inode);
223 struct buffer_head *bh; 223 struct buffer_head *bh;
224 struct gfs2_holder gh; 224 struct gfs2_holder gh;
225 int error; 225 int error;
226 u32 new_flags, flags; 226 u32 new_flags, flags;
227 227
228 error = mnt_want_write_file(filp); 228 error = mnt_want_write_file(filp);
229 if (error) 229 if (error)
230 return error; 230 return error;
231 231
232 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 232 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
233 if (error) 233 if (error)
234 goto out_drop_write; 234 goto out_drop_write;
235 235
236 error = -EACCES; 236 error = -EACCES;
237 if (!inode_owner_or_capable(inode)) 237 if (!inode_owner_or_capable(inode))
238 goto out; 238 goto out;
239 239
240 error = 0; 240 error = 0;
241 flags = ip->i_diskflags; 241 flags = ip->i_diskflags;
242 new_flags = (flags & ~mask) | (reqflags & mask); 242 new_flags = (flags & ~mask) | (reqflags & mask);
243 if ((new_flags ^ flags) == 0) 243 if ((new_flags ^ flags) == 0)
244 goto out; 244 goto out;
245 245
246 error = -EINVAL; 246 error = -EINVAL;
247 if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET) 247 if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET)
248 goto out; 248 goto out;
249 249
250 error = -EPERM; 250 error = -EPERM;
251 if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE)) 251 if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE))
252 goto out; 252 goto out;
253 if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY)) 253 if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY))
254 goto out; 254 goto out;
255 if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) && 255 if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) &&
256 !capable(CAP_LINUX_IMMUTABLE)) 256 !capable(CAP_LINUX_IMMUTABLE))
257 goto out; 257 goto out;
258 if (!IS_IMMUTABLE(inode)) { 258 if (!IS_IMMUTABLE(inode)) {
259 error = gfs2_permission(inode, MAY_WRITE); 259 error = gfs2_permission(inode, MAY_WRITE);
260 if (error) 260 if (error)
261 goto out; 261 goto out;
262 } 262 }
263 if ((flags ^ new_flags) & GFS2_DIF_JDATA) { 263 if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
264 if (flags & GFS2_DIF_JDATA) 264 if (flags & GFS2_DIF_JDATA)
265 gfs2_log_flush(sdp, ip->i_gl); 265 gfs2_log_flush(sdp, ip->i_gl);
266 error = filemap_fdatawrite(inode->i_mapping); 266 error = filemap_fdatawrite(inode->i_mapping);
267 if (error) 267 if (error)
268 goto out; 268 goto out;
269 error = filemap_fdatawait(inode->i_mapping); 269 error = filemap_fdatawait(inode->i_mapping);
270 if (error) 270 if (error)
271 goto out; 271 goto out;
272 } 272 }
273 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 273 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
274 if (error) 274 if (error)
275 goto out; 275 goto out;
276 error = gfs2_meta_inode_buffer(ip, &bh); 276 error = gfs2_meta_inode_buffer(ip, &bh);
277 if (error) 277 if (error)
278 goto out_trans_end; 278 goto out_trans_end;
279 gfs2_trans_add_bh(ip->i_gl, bh, 1); 279 gfs2_trans_add_bh(ip->i_gl, bh, 1);
280 ip->i_diskflags = new_flags; 280 ip->i_diskflags = new_flags;
281 gfs2_dinode_out(ip, bh->b_data); 281 gfs2_dinode_out(ip, bh->b_data);
282 brelse(bh); 282 brelse(bh);
283 gfs2_set_inode_flags(inode); 283 gfs2_set_inode_flags(inode);
284 gfs2_set_aops(inode); 284 gfs2_set_aops(inode);
285 out_trans_end: 285 out_trans_end:
286 gfs2_trans_end(sdp); 286 gfs2_trans_end(sdp);
287 out: 287 out:
288 gfs2_glock_dq_uninit(&gh); 288 gfs2_glock_dq_uninit(&gh);
289 out_drop_write: 289 out_drop_write:
290 mnt_drop_write_file(filp); 290 mnt_drop_write_file(filp);
291 return error; 291 return error;
292 } 292 }
293 293
294 static int gfs2_set_flags(struct file *filp, u32 __user *ptr) 294 static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
295 { 295 {
296 struct inode *inode = filp->f_path.dentry->d_inode; 296 struct inode *inode = filp->f_path.dentry->d_inode;
297 u32 fsflags, gfsflags; 297 u32 fsflags, gfsflags;
298 298
299 if (get_user(fsflags, ptr)) 299 if (get_user(fsflags, ptr))
300 return -EFAULT; 300 return -EFAULT;
301 301
302 gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags); 302 gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
303 if (!S_ISDIR(inode->i_mode)) { 303 if (!S_ISDIR(inode->i_mode)) {
304 gfsflags &= ~GFS2_DIF_TOPDIR; 304 gfsflags &= ~GFS2_DIF_TOPDIR;
305 if (gfsflags & GFS2_DIF_INHERIT_JDATA) 305 if (gfsflags & GFS2_DIF_INHERIT_JDATA)
306 gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA); 306 gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
307 return do_gfs2_set_flags(filp, gfsflags, ~0); 307 return do_gfs2_set_flags(filp, gfsflags, ~0);
308 } 308 }
309 return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA); 309 return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
310 } 310 }
311 311
312 static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 312 static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
313 { 313 {
314 switch(cmd) { 314 switch(cmd) {
315 case FS_IOC_GETFLAGS: 315 case FS_IOC_GETFLAGS:
316 return gfs2_get_flags(filp, (u32 __user *)arg); 316 return gfs2_get_flags(filp, (u32 __user *)arg);
317 case FS_IOC_SETFLAGS: 317 case FS_IOC_SETFLAGS:
318 return gfs2_set_flags(filp, (u32 __user *)arg); 318 return gfs2_set_flags(filp, (u32 __user *)arg);
319 case FITRIM: 319 case FITRIM:
320 return gfs2_fitrim(filp, (void __user *)arg); 320 return gfs2_fitrim(filp, (void __user *)arg);
321 } 321 }
322 return -ENOTTY; 322 return -ENOTTY;
323 } 323 }
324 324
325 /** 325 /**
326 * gfs2_size_hint - Give a hint to the size of a write request 326 * gfs2_size_hint - Give a hint to the size of a write request
327 * @file: The struct file 327 * @file: The struct file
328 * @offset: The file offset of the write 328 * @offset: The file offset of the write
329 * @size: The length of the write 329 * @size: The length of the write
330 * 330 *
331 * When we are about to do a write, this function records the total 331 * When we are about to do a write, this function records the total
332 * write size in order to provide a suitable hint to the lower layers 332 * write size in order to provide a suitable hint to the lower layers
333 * about how many blocks will be required. 333 * about how many blocks will be required.
334 * 334 *
335 */ 335 */
336 336
337 static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) 337 static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
338 { 338 {
339 struct inode *inode = filep->f_dentry->d_inode; 339 struct inode *inode = filep->f_dentry->d_inode;
340 struct gfs2_sbd *sdp = GFS2_SB(inode); 340 struct gfs2_sbd *sdp = GFS2_SB(inode);
341 struct gfs2_inode *ip = GFS2_I(inode); 341 struct gfs2_inode *ip = GFS2_I(inode);
342 size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift; 342 size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
343 int hint = min_t(size_t, INT_MAX, blks); 343 int hint = min_t(size_t, INT_MAX, blks);
344 344
345 atomic_set(&ip->i_res->rs_sizehint, hint); 345 atomic_set(&ip->i_res->rs_sizehint, hint);
346 } 346 }
347 347
348 /** 348 /**
349 * gfs2_allocate_page_backing - Use bmap to allocate blocks 349 * gfs2_allocate_page_backing - Use bmap to allocate blocks
350 * @page: The (locked) page to allocate backing for 350 * @page: The (locked) page to allocate backing for
351 * 351 *
352 * We try to allocate all the blocks required for the page in 352 * We try to allocate all the blocks required for the page in
353 * one go. This might fail for various reasons, so we keep 353 * one go. This might fail for various reasons, so we keep
354 * trying until all the blocks to back this page are allocated. 354 * trying until all the blocks to back this page are allocated.
355 * If some of the blocks are already allocated, thats ok too. 355 * If some of the blocks are already allocated, thats ok too.
356 */ 356 */
357 357
358 static int gfs2_allocate_page_backing(struct page *page) 358 static int gfs2_allocate_page_backing(struct page *page)
359 { 359 {
360 struct inode *inode = page->mapping->host; 360 struct inode *inode = page->mapping->host;
361 struct buffer_head bh; 361 struct buffer_head bh;
362 unsigned long size = PAGE_CACHE_SIZE; 362 unsigned long size = PAGE_CACHE_SIZE;
363 u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 363 u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
364 364
365 do { 365 do {
366 bh.b_state = 0; 366 bh.b_state = 0;
367 bh.b_size = size; 367 bh.b_size = size;
368 gfs2_block_map(inode, lblock, &bh, 1); 368 gfs2_block_map(inode, lblock, &bh, 1);
369 if (!buffer_mapped(&bh)) 369 if (!buffer_mapped(&bh))
370 return -EIO; 370 return -EIO;
371 size -= bh.b_size; 371 size -= bh.b_size;
372 lblock += (bh.b_size >> inode->i_blkbits); 372 lblock += (bh.b_size >> inode->i_blkbits);
373 } while(size > 0); 373 } while(size > 0);
374 return 0; 374 return 0;
375 } 375 }
376 376
377 /** 377 /**
378 * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable 378 * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable
379 * @vma: The virtual memory area 379 * @vma: The virtual memory area
380 * @page: The page which is about to become writable 380 * @page: The page which is about to become writable
381 * 381 *
382 * When the page becomes writable, we need to ensure that we have 382 * When the page becomes writable, we need to ensure that we have
383 * blocks allocated on disk to back that page. 383 * blocks allocated on disk to back that page.
384 */ 384 */
385 385
386 static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 386 static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
387 { 387 {
388 struct page *page = vmf->page; 388 struct page *page = vmf->page;
389 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 389 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
390 struct gfs2_inode *ip = GFS2_I(inode); 390 struct gfs2_inode *ip = GFS2_I(inode);
391 struct gfs2_sbd *sdp = GFS2_SB(inode); 391 struct gfs2_sbd *sdp = GFS2_SB(inode);
392 unsigned long last_index; 392 unsigned long last_index;
393 u64 pos = page->index << PAGE_CACHE_SHIFT; 393 u64 pos = page->index << PAGE_CACHE_SHIFT;
394 unsigned int data_blocks, ind_blocks, rblocks; 394 unsigned int data_blocks, ind_blocks, rblocks;
395 struct gfs2_holder gh; 395 struct gfs2_holder gh;
396 loff_t size; 396 loff_t size;
397 int ret; 397 int ret;
398 398
399 sb_start_pagefault(inode->i_sb); 399 sb_start_pagefault(inode->i_sb);
400 400
401 /* Update file times before taking page lock */ 401 /* Update file times before taking page lock */
402 file_update_time(vma->vm_file); 402 file_update_time(vma->vm_file);
403 403
404 ret = gfs2_rs_alloc(ip); 404 ret = gfs2_rs_alloc(ip);
405 if (ret) 405 if (ret)
406 return ret; 406 return ret;
407 407
408 gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE); 408 gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
409 409
410 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 410 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
411 ret = gfs2_glock_nq(&gh); 411 ret = gfs2_glock_nq(&gh);
412 if (ret) 412 if (ret)
413 goto out; 413 goto out;
414 414
415 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); 415 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
416 set_bit(GIF_SW_PAGED, &ip->i_flags); 416 set_bit(GIF_SW_PAGED, &ip->i_flags);
417 417
418 if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) { 418 if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) {
419 lock_page(page); 419 lock_page(page);
420 if (!PageUptodate(page) || page->mapping != inode->i_mapping) { 420 if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
421 ret = -EAGAIN; 421 ret = -EAGAIN;
422 unlock_page(page); 422 unlock_page(page);
423 } 423 }
424 goto out_unlock; 424 goto out_unlock;
425 } 425 }
426 426
427 ret = gfs2_rindex_update(sdp); 427 ret = gfs2_rindex_update(sdp);
428 if (ret) 428 if (ret)
429 goto out_unlock; 429 goto out_unlock;
430 430
431 ret = gfs2_quota_lock_check(ip); 431 ret = gfs2_quota_lock_check(ip);
432 if (ret) 432 if (ret)
433 goto out_unlock; 433 goto out_unlock;
434 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); 434 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
435 ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 435 ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0);
436 if (ret) 436 if (ret)
437 goto out_quota_unlock; 437 goto out_quota_unlock;
438 438
439 rblocks = RES_DINODE + ind_blocks; 439 rblocks = RES_DINODE + ind_blocks;
440 if (gfs2_is_jdata(ip)) 440 if (gfs2_is_jdata(ip))
441 rblocks += data_blocks ? data_blocks : 1; 441 rblocks += data_blocks ? data_blocks : 1;
442 if (ind_blocks || data_blocks) { 442 if (ind_blocks || data_blocks) {
443 rblocks += RES_STATFS + RES_QUOTA; 443 rblocks += RES_STATFS + RES_QUOTA;
444 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); 444 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
445 } 445 }
446 ret = gfs2_trans_begin(sdp, rblocks, 0); 446 ret = gfs2_trans_begin(sdp, rblocks, 0);
447 if (ret) 447 if (ret)
448 goto out_trans_fail; 448 goto out_trans_fail;
449 449
450 lock_page(page); 450 lock_page(page);
451 ret = -EINVAL; 451 ret = -EINVAL;
452 size = i_size_read(inode); 452 size = i_size_read(inode);
453 last_index = (size - 1) >> PAGE_CACHE_SHIFT; 453 last_index = (size - 1) >> PAGE_CACHE_SHIFT;
454 /* Check page index against inode size */ 454 /* Check page index against inode size */
455 if (size == 0 || (page->index > last_index)) 455 if (size == 0 || (page->index > last_index))
456 goto out_trans_end; 456 goto out_trans_end;
457 457
458 ret = -EAGAIN; 458 ret = -EAGAIN;
459 /* If truncated, we must retry the operation, we may have raced 459 /* If truncated, we must retry the operation, we may have raced
460 * with the glock demotion code. 460 * with the glock demotion code.
461 */ 461 */
462 if (!PageUptodate(page) || page->mapping != inode->i_mapping) 462 if (!PageUptodate(page) || page->mapping != inode->i_mapping)
463 goto out_trans_end; 463 goto out_trans_end;
464 464
465 /* Unstuff, if required, and allocate backing blocks for page */ 465 /* Unstuff, if required, and allocate backing blocks for page */
466 ret = 0; 466 ret = 0;
467 if (gfs2_is_stuffed(ip)) 467 if (gfs2_is_stuffed(ip))
468 ret = gfs2_unstuff_dinode(ip, page); 468 ret = gfs2_unstuff_dinode(ip, page);
469 if (ret == 0) 469 if (ret == 0)
470 ret = gfs2_allocate_page_backing(page); 470 ret = gfs2_allocate_page_backing(page);
471 471
472 out_trans_end: 472 out_trans_end:
473 if (ret) 473 if (ret)
474 unlock_page(page); 474 unlock_page(page);
475 gfs2_trans_end(sdp); 475 gfs2_trans_end(sdp);
476 out_trans_fail: 476 out_trans_fail:
477 gfs2_inplace_release(ip); 477 gfs2_inplace_release(ip);
478 out_quota_unlock: 478 out_quota_unlock:
479 gfs2_quota_unlock(ip); 479 gfs2_quota_unlock(ip);
480 out_unlock: 480 out_unlock:
481 gfs2_glock_dq(&gh); 481 gfs2_glock_dq(&gh);
482 out: 482 out:
483 gfs2_holder_uninit(&gh); 483 gfs2_holder_uninit(&gh);
484 if (ret == 0) { 484 if (ret == 0) {
485 set_page_dirty(page); 485 set_page_dirty(page);
486 wait_on_page_writeback(page); 486 wait_on_page_writeback(page);
487 } 487 }
488 sb_end_pagefault(inode->i_sb); 488 sb_end_pagefault(inode->i_sb);
489 return block_page_mkwrite_return(ret); 489 return block_page_mkwrite_return(ret);
490 } 490 }
491 491
492 static const struct vm_operations_struct gfs2_vm_ops = { 492 static const struct vm_operations_struct gfs2_vm_ops = {
493 .fault = filemap_fault, 493 .fault = filemap_fault,
494 .page_mkwrite = gfs2_page_mkwrite, 494 .page_mkwrite = gfs2_page_mkwrite,
495 .remap_pages = generic_file_remap_pages, 495 .remap_pages = generic_file_remap_pages,
496 }; 496 };
497 497
498 /** 498 /**
499 * gfs2_mmap - 499 * gfs2_mmap -
500 * @file: The file to map 500 * @file: The file to map
501 * @vma: The VMA which described the mapping 501 * @vma: The VMA which described the mapping
502 * 502 *
503 * There is no need to get a lock here unless we should be updating 503 * There is no need to get a lock here unless we should be updating
504 * atime. We ignore any locking errors since the only consequence is 504 * atime. We ignore any locking errors since the only consequence is
505 * a missed atime update (which will just be deferred until later). 505 * a missed atime update (which will just be deferred until later).
506 * 506 *
507 * Returns: 0 507 * Returns: 0
508 */ 508 */
509 509
510 static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) 510 static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
511 { 511 {
512 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 512 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
513 513
514 if (!(file->f_flags & O_NOATIME) && 514 if (!(file->f_flags & O_NOATIME) &&
515 !IS_NOATIME(&ip->i_inode)) { 515 !IS_NOATIME(&ip->i_inode)) {
516 struct gfs2_holder i_gh; 516 struct gfs2_holder i_gh;
517 int error; 517 int error;
518 518
519 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, 519 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
520 &i_gh); 520 &i_gh);
521 if (error) 521 if (error)
522 return error; 522 return error;
523 /* grab lock to update inode */ 523 /* grab lock to update inode */
524 gfs2_glock_dq_uninit(&i_gh); 524 gfs2_glock_dq_uninit(&i_gh);
525 file_accessed(file); 525 file_accessed(file);
526 } 526 }
527 vma->vm_ops = &gfs2_vm_ops; 527 vma->vm_ops = &gfs2_vm_ops;
528 528
529 return 0; 529 return 0;
530 } 530 }
531 531
532 /** 532 /**
533 * gfs2_open - open a file 533 * gfs2_open - open a file
534 * @inode: the inode to open 534 * @inode: the inode to open
535 * @file: the struct file for this opening 535 * @file: the struct file for this opening
536 * 536 *
537 * Returns: errno 537 * Returns: errno
538 */ 538 */
539 539
540 static int gfs2_open(struct inode *inode, struct file *file) 540 static int gfs2_open(struct inode *inode, struct file *file)
541 { 541 {
542 struct gfs2_inode *ip = GFS2_I(inode); 542 struct gfs2_inode *ip = GFS2_I(inode);
543 struct gfs2_holder i_gh; 543 struct gfs2_holder i_gh;
544 struct gfs2_file *fp; 544 struct gfs2_file *fp;
545 int error; 545 int error;
546 546
547 fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL); 547 fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL);
548 if (!fp) 548 if (!fp)
549 return -ENOMEM; 549 return -ENOMEM;
550 550
551 mutex_init(&fp->f_fl_mutex); 551 mutex_init(&fp->f_fl_mutex);
552 552
553 gfs2_assert_warn(GFS2_SB(inode), !file->private_data); 553 gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
554 file->private_data = fp; 554 file->private_data = fp;
555 555
556 if (S_ISREG(ip->i_inode.i_mode)) { 556 if (S_ISREG(ip->i_inode.i_mode)) {
557 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, 557 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
558 &i_gh); 558 &i_gh);
559 if (error) 559 if (error)
560 goto fail; 560 goto fail;
561 561
562 if (!(file->f_flags & O_LARGEFILE) && 562 if (!(file->f_flags & O_LARGEFILE) &&
563 i_size_read(inode) > MAX_NON_LFS) { 563 i_size_read(inode) > MAX_NON_LFS) {
564 error = -EOVERFLOW; 564 error = -EOVERFLOW;
565 goto fail_gunlock; 565 goto fail_gunlock;
566 } 566 }
567 567
568 gfs2_glock_dq_uninit(&i_gh); 568 gfs2_glock_dq_uninit(&i_gh);
569 } 569 }
570 570
571 return 0; 571 return 0;
572 572
573 fail_gunlock: 573 fail_gunlock:
574 gfs2_glock_dq_uninit(&i_gh); 574 gfs2_glock_dq_uninit(&i_gh);
575 fail: 575 fail:
576 file->private_data = NULL; 576 file->private_data = NULL;
577 kfree(fp); 577 kfree(fp);
578 return error; 578 return error;
579 } 579 }
580 580
581 /** 581 /**
582 * gfs2_release - called to close a struct file 582 * gfs2_release - called to close a struct file
583 * @inode: the inode the struct file belongs to 583 * @inode: the inode the struct file belongs to
584 * @file: the struct file being closed 584 * @file: the struct file being closed
585 * 585 *
586 * Returns: errno 586 * Returns: errno
587 */ 587 */
588 588
589 static int gfs2_release(struct inode *inode, struct file *file) 589 static int gfs2_release(struct inode *inode, struct file *file)
590 { 590 {
591 struct gfs2_inode *ip = GFS2_I(inode); 591 struct gfs2_inode *ip = GFS2_I(inode);
592 592
593 kfree(file->private_data); 593 kfree(file->private_data);
594 file->private_data = NULL; 594 file->private_data = NULL;
595 595
596 if ((file->f_mode & FMODE_WRITE) && 596 if ((file->f_mode & FMODE_WRITE) &&
597 (atomic_read(&inode->i_writecount) == 1)) 597 (atomic_read(&inode->i_writecount) == 1))
598 gfs2_rs_delete(ip); 598 gfs2_rs_delete(ip);
599 599
600 return 0; 600 return 0;
601 } 601 }
602 602
603 /** 603 /**
604 * gfs2_fsync - sync the dirty data for a file (across the cluster) 604 * gfs2_fsync - sync the dirty data for a file (across the cluster)
605 * @file: the file that points to the dentry 605 * @file: the file that points to the dentry
606 * @start: the start position in the file to sync 606 * @start: the start position in the file to sync
607 * @end: the end position in the file to sync 607 * @end: the end position in the file to sync
608 * @datasync: set if we can ignore timestamp changes 608 * @datasync: set if we can ignore timestamp changes
609 * 609 *
610 * We split the data flushing here so that we don't wait for the data 610 * We split the data flushing here so that we don't wait for the data
611 * until after we've also sent the metadata to disk. Note that for 611 * until after we've also sent the metadata to disk. Note that for
612 * data=ordered, we will write & wait for the data at the log flush 612 * data=ordered, we will write & wait for the data at the log flush
613 * stage anyway, so this is unlikely to make much of a difference 613 * stage anyway, so this is unlikely to make much of a difference
614 * except in the data=writeback case. 614 * except in the data=writeback case.
615 * 615 *
616 * If the fdatawrite fails due to any reason except -EIO, we will 616 * If the fdatawrite fails due to any reason except -EIO, we will
617 * continue the remainder of the fsync, although we'll still report 617 * continue the remainder of the fsync, although we'll still report
618 * the error at the end. This is to match filemap_write_and_wait_range() 618 * the error at the end. This is to match filemap_write_and_wait_range()
619 * behaviour. 619 * behaviour.
620 * 620 *
621 * Returns: errno 621 * Returns: errno
622 */ 622 */
623 623
624 static int gfs2_fsync(struct file *file, loff_t start, loff_t end, 624 static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
625 int datasync) 625 int datasync)
626 { 626 {
627 struct address_space *mapping = file->f_mapping; 627 struct address_space *mapping = file->f_mapping;
628 struct inode *inode = mapping->host; 628 struct inode *inode = mapping->host;
629 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); 629 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
630 struct gfs2_inode *ip = GFS2_I(inode); 630 struct gfs2_inode *ip = GFS2_I(inode);
631 int ret = 0, ret1 = 0; 631 int ret = 0, ret1 = 0;
632 632
633 if (mapping->nrpages) { 633 if (mapping->nrpages) {
634 ret1 = filemap_fdatawrite_range(mapping, start, end); 634 ret1 = filemap_fdatawrite_range(mapping, start, end);
635 if (ret1 == -EIO) 635 if (ret1 == -EIO)
636 return ret1; 636 return ret1;
637 } 637 }
638 638
639 if (datasync) 639 if (datasync)
640 sync_state &= ~I_DIRTY_SYNC; 640 sync_state &= ~I_DIRTY_SYNC;
641 641
642 if (sync_state) { 642 if (sync_state) {
643 ret = sync_inode_metadata(inode, 1); 643 ret = sync_inode_metadata(inode, 1);
644 if (ret) 644 if (ret)
645 return ret; 645 return ret;
646 if (gfs2_is_jdata(ip)) 646 if (gfs2_is_jdata(ip))
647 filemap_write_and_wait(mapping); 647 filemap_write_and_wait(mapping);
648 gfs2_ail_flush(ip->i_gl, 1); 648 gfs2_ail_flush(ip->i_gl, 1);
649 } 649 }
650 650
651 if (mapping->nrpages) 651 if (mapping->nrpages)
652 ret = filemap_fdatawait_range(mapping, start, end); 652 ret = filemap_fdatawait_range(mapping, start, end);
653 653
654 return ret ? ret : ret1; 654 return ret ? ret : ret1;
655 } 655 }
656 656
657 /** 657 /**
658 * gfs2_file_aio_write - Perform a write to a file 658 * gfs2_file_aio_write - Perform a write to a file
659 * @iocb: The io context 659 * @iocb: The io context
660 * @iov: The data to write 660 * @iov: The data to write
661 * @nr_segs: Number of @iov segments 661 * @nr_segs: Number of @iov segments
662 * @pos: The file position 662 * @pos: The file position
663 * 663 *
664 * We have to do a lock/unlock here to refresh the inode size for 664 * We have to do a lock/unlock here to refresh the inode size for
665 * O_APPEND writes, otherwise we can land up writing at the wrong 665 * O_APPEND writes, otherwise we can land up writing at the wrong
666 * offset. There is still a race, but provided the app is using its 666 * offset. There is still a race, but provided the app is using its
667 * own file locking, this will make O_APPEND work as expected. 667 * own file locking, this will make O_APPEND work as expected.
668 * 668 *
669 */ 669 */
670 670
671 static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 671 static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
672 unsigned long nr_segs, loff_t pos) 672 unsigned long nr_segs, loff_t pos)
673 { 673 {
674 struct file *file = iocb->ki_filp; 674 struct file *file = iocb->ki_filp;
675 size_t writesize = iov_length(iov, nr_segs); 675 size_t writesize = iov_length(iov, nr_segs);
676 struct dentry *dentry = file->f_dentry; 676 struct dentry *dentry = file->f_dentry;
677 struct gfs2_inode *ip = GFS2_I(dentry->d_inode); 677 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
678 int ret; 678 int ret;
679 679
680 ret = gfs2_rs_alloc(ip); 680 ret = gfs2_rs_alloc(ip);
681 if (ret) 681 if (ret)
682 return ret; 682 return ret;
683 683
684 gfs2_size_hint(file, pos, writesize); 684 gfs2_size_hint(file, pos, writesize);
685 685
686 if (file->f_flags & O_APPEND) { 686 if (file->f_flags & O_APPEND) {
687 struct gfs2_holder gh; 687 struct gfs2_holder gh;
688 688
689 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); 689 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
690 if (ret) 690 if (ret)
691 return ret; 691 return ret;
692 gfs2_glock_dq_uninit(&gh); 692 gfs2_glock_dq_uninit(&gh);
693 } 693 }
694 694
695 return generic_file_aio_write(iocb, iov, nr_segs, pos); 695 return generic_file_aio_write(iocb, iov, nr_segs, pos);
696 } 696 }
697 697
698 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, 698 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
699 int mode) 699 int mode)
700 { 700 {
701 struct gfs2_inode *ip = GFS2_I(inode); 701 struct gfs2_inode *ip = GFS2_I(inode);
702 struct buffer_head *dibh; 702 struct buffer_head *dibh;
703 int error; 703 int error;
704 loff_t size = len; 704 loff_t size = len;
705 unsigned int nr_blks; 705 unsigned int nr_blks;
706 sector_t lblock = offset >> inode->i_blkbits; 706 sector_t lblock = offset >> inode->i_blkbits;
707 707
708 error = gfs2_meta_inode_buffer(ip, &dibh); 708 error = gfs2_meta_inode_buffer(ip, &dibh);
709 if (unlikely(error)) 709 if (unlikely(error))
710 return error; 710 return error;
711 711
712 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 712 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
713 713
714 if (gfs2_is_stuffed(ip)) { 714 if (gfs2_is_stuffed(ip)) {
715 error = gfs2_unstuff_dinode(ip, NULL); 715 error = gfs2_unstuff_dinode(ip, NULL);
716 if (unlikely(error)) 716 if (unlikely(error))
717 goto out; 717 goto out;
718 } 718 }
719 719
720 while (len) { 720 while (len) {
721 struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; 721 struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
722 bh_map.b_size = len; 722 bh_map.b_size = len;
723 set_buffer_zeronew(&bh_map); 723 set_buffer_zeronew(&bh_map);
724 724
725 error = gfs2_block_map(inode, lblock, &bh_map, 1); 725 error = gfs2_block_map(inode, lblock, &bh_map, 1);
726 if (unlikely(error)) 726 if (unlikely(error))
727 goto out; 727 goto out;
728 len -= bh_map.b_size; 728 len -= bh_map.b_size;
729 nr_blks = bh_map.b_size >> inode->i_blkbits; 729 nr_blks = bh_map.b_size >> inode->i_blkbits;
730 lblock += nr_blks; 730 lblock += nr_blks;
731 if (!buffer_new(&bh_map)) 731 if (!buffer_new(&bh_map))
732 continue; 732 continue;
733 if (unlikely(!buffer_zeronew(&bh_map))) { 733 if (unlikely(!buffer_zeronew(&bh_map))) {
734 error = -EIO; 734 error = -EIO;
735 goto out; 735 goto out;
736 } 736 }
737 } 737 }
738 if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) 738 if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE))
739 i_size_write(inode, offset + size); 739 i_size_write(inode, offset + size);
740 740
741 mark_inode_dirty(inode); 741 mark_inode_dirty(inode);
742 742
743 out: 743 out:
744 brelse(dibh); 744 brelse(dibh);
745 return error; 745 return error;
746 } 746 }
747 747
748 static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, 748 static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
749 unsigned int *data_blocks, unsigned int *ind_blocks) 749 unsigned int *data_blocks, unsigned int *ind_blocks)
750 { 750 {
751 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 751 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
752 unsigned int max_blocks = ip->i_rgd->rd_free_clone; 752 unsigned int max_blocks = ip->i_rgd->rd_free_clone;
753 unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); 753 unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
754 754
755 for (tmp = max_data; tmp > sdp->sd_diptrs;) { 755 for (tmp = max_data; tmp > sdp->sd_diptrs;) {
756 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); 756 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
757 max_data -= tmp; 757 max_data -= tmp;
758 } 758 }
759 /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, 759 /* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
760 so it might end up with fewer data blocks */ 760 so it might end up with fewer data blocks */
761 if (max_data <= *data_blocks) 761 if (max_data <= *data_blocks)
762 return; 762 return;
763 *data_blocks = max_data; 763 *data_blocks = max_data;
764 *ind_blocks = max_blocks - max_data; 764 *ind_blocks = max_blocks - max_data;
765 *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; 765 *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
766 if (*len > max) { 766 if (*len > max) {
767 *len = max; 767 *len = max;
768 gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); 768 gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
769 } 769 }
770 } 770 }
771 771
772 static long gfs2_fallocate(struct file *file, int mode, loff_t offset, 772 static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
773 loff_t len) 773 loff_t len)
774 { 774 {
775 struct inode *inode = file->f_path.dentry->d_inode; 775 struct inode *inode = file->f_path.dentry->d_inode;
776 struct gfs2_sbd *sdp = GFS2_SB(inode); 776 struct gfs2_sbd *sdp = GFS2_SB(inode);
777 struct gfs2_inode *ip = GFS2_I(inode); 777 struct gfs2_inode *ip = GFS2_I(inode);
778 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 778 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
779 loff_t bytes, max_bytes; 779 loff_t bytes, max_bytes;
780 int error; 780 int error;
781 const loff_t pos = offset; 781 const loff_t pos = offset;
782 const loff_t count = len; 782 const loff_t count = len;
783 loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); 783 loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
784 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; 784 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
785 loff_t max_chunk_size = UINT_MAX & bsize_mask; 785 loff_t max_chunk_size = UINT_MAX & bsize_mask;
786 next = (next + 1) << sdp->sd_sb.sb_bsize_shift; 786 next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
787 787
788 /* We only support the FALLOC_FL_KEEP_SIZE mode */ 788 /* We only support the FALLOC_FL_KEEP_SIZE mode */
789 if (mode & ~FALLOC_FL_KEEP_SIZE) 789 if (mode & ~FALLOC_FL_KEEP_SIZE)
790 return -EOPNOTSUPP; 790 return -EOPNOTSUPP;
791 791
792 offset &= bsize_mask; 792 offset &= bsize_mask;
793 793
794 len = next - offset; 794 len = next - offset;
795 bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; 795 bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
796 if (!bytes) 796 if (!bytes)
797 bytes = UINT_MAX; 797 bytes = UINT_MAX;
798 bytes &= bsize_mask; 798 bytes &= bsize_mask;
799 if (bytes == 0) 799 if (bytes == 0)
800 bytes = sdp->sd_sb.sb_bsize; 800 bytes = sdp->sd_sb.sb_bsize;
801 801
802 error = gfs2_rs_alloc(ip); 802 error = gfs2_rs_alloc(ip);
803 if (error) 803 if (error)
804 return error; 804 return error;
805 805
806 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 806 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
807 error = gfs2_glock_nq(&ip->i_gh); 807 error = gfs2_glock_nq(&ip->i_gh);
808 if (unlikely(error)) 808 if (unlikely(error))
809 goto out_uninit; 809 goto out_uninit;
810 810
811 gfs2_size_hint(file, offset, len); 811 gfs2_size_hint(file, offset, len);
812 812
813 while (len > 0) { 813 while (len > 0) {
814 if (len < bytes) 814 if (len < bytes)
815 bytes = len; 815 bytes = len;
816 if (!gfs2_write_alloc_required(ip, offset, bytes)) { 816 if (!gfs2_write_alloc_required(ip, offset, bytes)) {
817 len -= bytes; 817 len -= bytes;
818 offset += bytes; 818 offset += bytes;
819 continue; 819 continue;
820 } 820 }
821 error = gfs2_quota_lock_check(ip); 821 error = gfs2_quota_lock_check(ip);
822 if (error) 822 if (error)
823 goto out_unlock; 823 goto out_unlock;
824 824
825 retry: 825 retry:
826 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); 826 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
827 827
828 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 828 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0);
829 if (error) { 829 if (error) {
830 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { 830 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
831 bytes >>= 1; 831 bytes >>= 1;
832 bytes &= bsize_mask; 832 bytes &= bsize_mask;
833 if (bytes == 0) 833 if (bytes == 0)
834 bytes = sdp->sd_sb.sb_bsize; 834 bytes = sdp->sd_sb.sb_bsize;
835 goto retry; 835 goto retry;
836 } 836 }
837 goto out_qunlock; 837 goto out_qunlock;
838 } 838 }
839 max_bytes = bytes; 839 max_bytes = bytes;
840 calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, 840 calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len,
841 &max_bytes, &data_blocks, &ind_blocks); 841 &max_bytes, &data_blocks, &ind_blocks);
842 842
843 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + 843 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
844 RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); 844 RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
845 if (gfs2_is_jdata(ip)) 845 if (gfs2_is_jdata(ip))
846 rblocks += data_blocks ? data_blocks : 1; 846 rblocks += data_blocks ? data_blocks : 1;
847 847
848 error = gfs2_trans_begin(sdp, rblocks, 848 error = gfs2_trans_begin(sdp, rblocks,
849 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); 849 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
850 if (error) 850 if (error)
851 goto out_trans_fail; 851 goto out_trans_fail;
852 852
853 error = fallocate_chunk(inode, offset, max_bytes, mode); 853 error = fallocate_chunk(inode, offset, max_bytes, mode);
854 gfs2_trans_end(sdp); 854 gfs2_trans_end(sdp);
855 855
856 if (error) 856 if (error)
857 goto out_trans_fail; 857 goto out_trans_fail;
858 858
859 len -= max_bytes; 859 len -= max_bytes;
860 offset += max_bytes; 860 offset += max_bytes;
861 gfs2_inplace_release(ip); 861 gfs2_inplace_release(ip);
862 gfs2_quota_unlock(ip); 862 gfs2_quota_unlock(ip);
863 } 863 }
864 864
865 if (error == 0) 865 if (error == 0)
866 error = generic_write_sync(file, pos, count); 866 error = generic_write_sync(file, pos, count);
867 goto out_unlock; 867 goto out_unlock;
868 868
869 out_trans_fail: 869 out_trans_fail:
870 gfs2_inplace_release(ip); 870 gfs2_inplace_release(ip);
871 out_qunlock: 871 out_qunlock:
872 gfs2_quota_unlock(ip); 872 gfs2_quota_unlock(ip);
873 out_unlock: 873 out_unlock:
874 gfs2_glock_dq(&ip->i_gh); 874 gfs2_glock_dq(&ip->i_gh);
875 out_uninit: 875 out_uninit:
876 gfs2_holder_uninit(&ip->i_gh); 876 gfs2_holder_uninit(&ip->i_gh);
877 return error; 877 return error;
878 } 878 }
879 879
880 #ifdef CONFIG_GFS2_FS_LOCKING_DLM 880 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
881 881
882 /** 882 /**
883 * gfs2_setlease - acquire/release a file lease 883 * gfs2_setlease - acquire/release a file lease
884 * @file: the file pointer 884 * @file: the file pointer
885 * @arg: lease type 885 * @arg: lease type
886 * @fl: file lock 886 * @fl: file lock
887 * 887 *
888 * We don't currently have a way to enforce a lease across the whole 888 * We don't currently have a way to enforce a lease across the whole
889 * cluster; until we do, disable leases (by just returning -EINVAL), 889 * cluster; until we do, disable leases (by just returning -EINVAL),
890 * unless the administrator has requested purely local locking. 890 * unless the administrator has requested purely local locking.
891 * 891 *
892 * Locking: called under lock_flocks 892 * Locking: called under lock_flocks
893 * 893 *
894 * Returns: errno 894 * Returns: errno
895 */ 895 */
896 896
897 static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) 897 static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl)
898 { 898 {
899 return -EINVAL; 899 return -EINVAL;
900 } 900 }
901 901
902 /** 902 /**
903 * gfs2_lock - acquire/release a posix lock on a file 903 * gfs2_lock - acquire/release a posix lock on a file
904 * @file: the file pointer 904 * @file: the file pointer
905 * @cmd: either modify or retrieve lock state, possibly wait 905 * @cmd: either modify or retrieve lock state, possibly wait
906 * @fl: type and range of lock 906 * @fl: type and range of lock
907 * 907 *
908 * Returns: errno 908 * Returns: errno
909 */ 909 */
910 910
911 static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) 911 static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
912 { 912 {
913 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 913 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
914 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); 914 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
915 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 915 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
916 916
917 if (!(fl->fl_flags & FL_POSIX)) 917 if (!(fl->fl_flags & FL_POSIX))
918 return -ENOLCK; 918 return -ENOLCK;
919 if (__mandatory_lock(&ip->i_inode) && fl->fl_type != F_UNLCK) 919 if (__mandatory_lock(&ip->i_inode) && fl->fl_type != F_UNLCK)
920 return -ENOLCK; 920 return -ENOLCK;
921 921
922 if (cmd == F_CANCELLK) { 922 if (cmd == F_CANCELLK) {
923 /* Hack: */ 923 /* Hack: */
924 cmd = F_SETLK; 924 cmd = F_SETLK;
925 fl->fl_type = F_UNLCK; 925 fl->fl_type = F_UNLCK;
926 } 926 }
927 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 927 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
928 return -EIO; 928 return -EIO;
929 if (IS_GETLK(cmd)) 929 if (IS_GETLK(cmd))
930 return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); 930 return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
931 else if (fl->fl_type == F_UNLCK) 931 else if (fl->fl_type == F_UNLCK)
932 return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); 932 return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
933 else 933 else
934 return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); 934 return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
935 } 935 }
936 936
937 static int do_flock(struct file *file, int cmd, struct file_lock *fl) 937 static int do_flock(struct file *file, int cmd, struct file_lock *fl)
938 { 938 {
939 struct gfs2_file *fp = file->private_data; 939 struct gfs2_file *fp = file->private_data;
940 struct gfs2_holder *fl_gh = &fp->f_fl_gh; 940 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
941 struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode); 941 struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode);
942 struct gfs2_glock *gl; 942 struct gfs2_glock *gl;
943 unsigned int state; 943 unsigned int state;
944 int flags; 944 int flags;
945 int error = 0; 945 int error = 0;
946 946
947 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; 947 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
948 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; 948 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
949 949
950 mutex_lock(&fp->f_fl_mutex); 950 mutex_lock(&fp->f_fl_mutex);
951 951
952 gl = fl_gh->gh_gl; 952 gl = fl_gh->gh_gl;
953 if (gl) { 953 if (gl) {
954 if (fl_gh->gh_state == state) 954 if (fl_gh->gh_state == state)
955 goto out; 955 goto out;
956 flock_lock_file_wait(file, 956 flock_lock_file_wait(file,
957 &(struct file_lock){.fl_type = F_UNLCK}); 957 &(struct file_lock){.fl_type = F_UNLCK});
958 gfs2_glock_dq_wait(fl_gh); 958 gfs2_glock_dq_wait(fl_gh);
959 gfs2_holder_reinit(state, flags, fl_gh); 959 gfs2_holder_reinit(state, flags, fl_gh);
960 } else { 960 } else {
961 error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, 961 error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
962 &gfs2_flock_glops, CREATE, &gl); 962 &gfs2_flock_glops, CREATE, &gl);
963 if (error) 963 if (error)
964 goto out; 964 goto out;
965 gfs2_holder_init(gl, state, flags, fl_gh); 965 gfs2_holder_init(gl, state, flags, fl_gh);
966 gfs2_glock_put(gl); 966 gfs2_glock_put(gl);
967 } 967 }
968 error = gfs2_glock_nq(fl_gh); 968 error = gfs2_glock_nq(fl_gh);
969 if (error) { 969 if (error) {
970 gfs2_holder_uninit(fl_gh); 970 gfs2_holder_uninit(fl_gh);
971 if (error == GLR_TRYFAILED) 971 if (error == GLR_TRYFAILED)
972 error = -EAGAIN; 972 error = -EAGAIN;
973 } else { 973 } else {
974 error = flock_lock_file_wait(file, fl); 974 error = flock_lock_file_wait(file, fl);
975 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); 975 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
976 } 976 }
977 977
978 out: 978 out:
979 mutex_unlock(&fp->f_fl_mutex); 979 mutex_unlock(&fp->f_fl_mutex);
980 return error; 980 return error;
981 } 981 }
982 982
983 static void do_unflock(struct file *file, struct file_lock *fl) 983 static void do_unflock(struct file *file, struct file_lock *fl)
984 { 984 {
985 struct gfs2_file *fp = file->private_data; 985 struct gfs2_file *fp = file->private_data;
986 struct gfs2_holder *fl_gh = &fp->f_fl_gh; 986 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
987 987
988 mutex_lock(&fp->f_fl_mutex); 988 mutex_lock(&fp->f_fl_mutex);
989 flock_lock_file_wait(file, fl); 989 flock_lock_file_wait(file, fl);
990 if (fl_gh->gh_gl) { 990 if (fl_gh->gh_gl) {
991 gfs2_glock_dq_wait(fl_gh); 991 gfs2_glock_dq_wait(fl_gh);
992 gfs2_holder_uninit(fl_gh); 992 gfs2_holder_uninit(fl_gh);
993 } 993 }
994 mutex_unlock(&fp->f_fl_mutex); 994 mutex_unlock(&fp->f_fl_mutex);
995 } 995 }
996 996
997 /** 997 /**
998 * gfs2_flock - acquire/release a flock lock on a file 998 * gfs2_flock - acquire/release a flock lock on a file
999 * @file: the file pointer 999 * @file: the file pointer
1000 * @cmd: either modify or retrieve lock state, possibly wait 1000 * @cmd: either modify or retrieve lock state, possibly wait
1001 * @fl: type and range of lock 1001 * @fl: type and range of lock
1002 * 1002 *
1003 * Returns: errno 1003 * Returns: errno
1004 */ 1004 */
1005 1005
1006 static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) 1006 static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
1007 { 1007 {
1008 if (!(fl->fl_flags & FL_FLOCK)) 1008 if (!(fl->fl_flags & FL_FLOCK))
1009 return -ENOLCK; 1009 return -ENOLCK;
1010 if (fl->fl_type & LOCK_MAND) 1010 if (fl->fl_type & LOCK_MAND)
1011 return -EOPNOTSUPP; 1011 return -EOPNOTSUPP;
1012 1012
1013 if (fl->fl_type == F_UNLCK) { 1013 if (fl->fl_type == F_UNLCK) {
1014 do_unflock(file, fl); 1014 do_unflock(file, fl);
1015 return 0; 1015 return 0;
1016 } else { 1016 } else {
1017 return do_flock(file, cmd, fl); 1017 return do_flock(file, cmd, fl);
1018 } 1018 }
1019 } 1019 }
1020 1020
1021 const struct file_operations gfs2_file_fops = { 1021 const struct file_operations gfs2_file_fops = {
1022 .llseek = gfs2_llseek, 1022 .llseek = gfs2_llseek,
1023 .read = do_sync_read, 1023 .read = do_sync_read,
1024 .aio_read = generic_file_aio_read, 1024 .aio_read = generic_file_aio_read,
1025 .write = do_sync_write, 1025 .write = do_sync_write,
1026 .aio_write = gfs2_file_aio_write, 1026 .aio_write = gfs2_file_aio_write,
1027 .unlocked_ioctl = gfs2_ioctl, 1027 .unlocked_ioctl = gfs2_ioctl,
1028 .mmap = gfs2_mmap, 1028 .mmap = gfs2_mmap,
1029 .open = gfs2_open, 1029 .open = gfs2_open,
1030 .release = gfs2_release, 1030 .release = gfs2_release,
1031 .fsync = gfs2_fsync, 1031 .fsync = gfs2_fsync,
1032 .lock = gfs2_lock, 1032 .lock = gfs2_lock,
1033 .flock = gfs2_flock, 1033 .flock = gfs2_flock,
1034 .splice_read = generic_file_splice_read, 1034 .splice_read = generic_file_splice_read,
1035 .splice_write = generic_file_splice_write, 1035 .splice_write = generic_file_splice_write,
1036 .setlease = gfs2_setlease, 1036 .setlease = gfs2_setlease,
1037 .fallocate = gfs2_fallocate, 1037 .fallocate = gfs2_fallocate,
1038 }; 1038 };
1039 1039
1040 const struct file_operations gfs2_dir_fops = { 1040 const struct file_operations gfs2_dir_fops = {
1041 .readdir = gfs2_readdir, 1041 .readdir = gfs2_readdir,
1042 .unlocked_ioctl = gfs2_ioctl, 1042 .unlocked_ioctl = gfs2_ioctl,
1043 .open = gfs2_open, 1043 .open = gfs2_open,
1044 .release = gfs2_release, 1044 .release = gfs2_release,
1045 .fsync = gfs2_fsync, 1045 .fsync = gfs2_fsync,
1046 .lock = gfs2_lock, 1046 .lock = gfs2_lock,
1047 .flock = gfs2_flock, 1047 .flock = gfs2_flock,
1048 .llseek = default_llseek, 1048 .llseek = default_llseek,
1049 }; 1049 };
1050 1050
1051 #endif /* CONFIG_GFS2_FS_LOCKING_DLM */ 1051 #endif /* CONFIG_GFS2_FS_LOCKING_DLM */
1052 1052
1053 const struct file_operations gfs2_file_fops_nolock = { 1053 const struct file_operations gfs2_file_fops_nolock = {
1054 .llseek = gfs2_llseek, 1054 .llseek = gfs2_llseek,
1055 .read = do_sync_read, 1055 .read = do_sync_read,
1056 .aio_read = generic_file_aio_read, 1056 .aio_read = generic_file_aio_read,
1057 .write = do_sync_write, 1057 .write = do_sync_write,
1058 .aio_write = gfs2_file_aio_write, 1058 .aio_write = gfs2_file_aio_write,
1059 .unlocked_ioctl = gfs2_ioctl, 1059 .unlocked_ioctl = gfs2_ioctl,
1060 .mmap = gfs2_mmap, 1060 .mmap = gfs2_mmap,
1061 .open = gfs2_open, 1061 .open = gfs2_open,
1062 .release = gfs2_release, 1062 .release = gfs2_release,
1063 .fsync = gfs2_fsync, 1063 .fsync = gfs2_fsync,
1064 .splice_read = generic_file_splice_read, 1064 .splice_read = generic_file_splice_read,
1065 .splice_write = generic_file_splice_write, 1065 .splice_write = generic_file_splice_write,
1066 .setlease = generic_setlease, 1066 .setlease = generic_setlease,
1067 .fallocate = gfs2_fallocate, 1067 .fallocate = gfs2_fallocate,
1068 }; 1068 };
1069 1069
1070 const struct file_operations gfs2_dir_fops_nolock = { 1070 const struct file_operations gfs2_dir_fops_nolock = {
1071 .readdir = gfs2_readdir, 1071 .readdir = gfs2_readdir,
1072 .unlocked_ioctl = gfs2_ioctl, 1072 .unlocked_ioctl = gfs2_ioctl,
1073 .open = gfs2_open, 1073 .open = gfs2_open,
1074 .release = gfs2_release, 1074 .release = gfs2_release,
1075 .fsync = gfs2_fsync, 1075 .fsync = gfs2_fsync,
1076 .llseek = default_llseek, 1076 .llseek = default_llseek,
1077 }; 1077 };
1078 1078
1079 1079
1 /* 1 /*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10 #include <linux/slab.h> 10 #include <linux/slab.h>
11 #include <linux/spinlock.h> 11 #include <linux/spinlock.h>
12 #include <linux/completion.h> 12 #include <linux/completion.h>
13 #include <linux/buffer_head.h> 13 #include <linux/buffer_head.h>
14 #include <linux/namei.h> 14 #include <linux/namei.h>
15 #include <linux/mm.h> 15 #include <linux/mm.h>
16 #include <linux/xattr.h> 16 #include <linux/xattr.h>
17 #include <linux/posix_acl.h> 17 #include <linux/posix_acl.h>
18 #include <linux/gfs2_ondisk.h> 18 #include <linux/gfs2_ondisk.h>
19 #include <linux/crc32.h> 19 #include <linux/crc32.h>
20 #include <linux/fiemap.h> 20 #include <linux/fiemap.h>
21 #include <linux/security.h> 21 #include <linux/security.h>
22 #include <asm/uaccess.h> 22 #include <asm/uaccess.h>
23 23
24 #include "gfs2.h" 24 #include "gfs2.h"
25 #include "incore.h" 25 #include "incore.h"
26 #include "acl.h" 26 #include "acl.h"
27 #include "bmap.h" 27 #include "bmap.h"
28 #include "dir.h" 28 #include "dir.h"
29 #include "xattr.h" 29 #include "xattr.h"
30 #include "glock.h" 30 #include "glock.h"
31 #include "inode.h" 31 #include "inode.h"
32 #include "meta_io.h" 32 #include "meta_io.h"
33 #include "quota.h" 33 #include "quota.h"
34 #include "rgrp.h" 34 #include "rgrp.h"
35 #include "trans.h" 35 #include "trans.h"
36 #include "util.h" 36 #include "util.h"
37 #include "super.h" 37 #include "super.h"
38 #include "glops.h" 38 #include "glops.h"
39 39
40 struct gfs2_skip_data { 40 struct gfs2_skip_data {
41 u64 no_addr; 41 u64 no_addr;
42 int skipped; 42 int skipped;
43 int non_block; 43 int non_block;
44 }; 44 };
45 45
46 static int iget_test(struct inode *inode, void *opaque) 46 static int iget_test(struct inode *inode, void *opaque)
47 { 47 {
48 struct gfs2_inode *ip = GFS2_I(inode); 48 struct gfs2_inode *ip = GFS2_I(inode);
49 struct gfs2_skip_data *data = opaque; 49 struct gfs2_skip_data *data = opaque;
50 50
51 if (ip->i_no_addr == data->no_addr) { 51 if (ip->i_no_addr == data->no_addr) {
52 if (data->non_block && 52 if (data->non_block &&
53 inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { 53 inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
54 data->skipped = 1; 54 data->skipped = 1;
55 return 0; 55 return 0;
56 } 56 }
57 return 1; 57 return 1;
58 } 58 }
59 return 0; 59 return 0;
60 } 60 }
61 61
62 static int iget_set(struct inode *inode, void *opaque) 62 static int iget_set(struct inode *inode, void *opaque)
63 { 63 {
64 struct gfs2_inode *ip = GFS2_I(inode); 64 struct gfs2_inode *ip = GFS2_I(inode);
65 struct gfs2_skip_data *data = opaque; 65 struct gfs2_skip_data *data = opaque;
66 66
67 if (data->skipped) 67 if (data->skipped)
68 return -ENOENT; 68 return -ENOENT;
69 inode->i_ino = (unsigned long)(data->no_addr); 69 inode->i_ino = (unsigned long)(data->no_addr);
70 ip->i_no_addr = data->no_addr; 70 ip->i_no_addr = data->no_addr;
71 return 0; 71 return 0;
72 } 72 }
73 73
74 struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block) 74 struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
75 { 75 {
76 unsigned long hash = (unsigned long)no_addr; 76 unsigned long hash = (unsigned long)no_addr;
77 struct gfs2_skip_data data; 77 struct gfs2_skip_data data;
78 78
79 data.no_addr = no_addr; 79 data.no_addr = no_addr;
80 data.skipped = 0; 80 data.skipped = 0;
81 data.non_block = non_block; 81 data.non_block = non_block;
82 return ilookup5(sb, hash, iget_test, &data); 82 return ilookup5(sb, hash, iget_test, &data);
83 } 83 }
84 84
85 static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr, 85 static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
86 int non_block) 86 int non_block)
87 { 87 {
88 struct gfs2_skip_data data; 88 struct gfs2_skip_data data;
89 unsigned long hash = (unsigned long)no_addr; 89 unsigned long hash = (unsigned long)no_addr;
90 90
91 data.no_addr = no_addr; 91 data.no_addr = no_addr;
92 data.skipped = 0; 92 data.skipped = 0;
93 data.non_block = non_block; 93 data.non_block = non_block;
94 return iget5_locked(sb, hash, iget_test, iget_set, &data); 94 return iget5_locked(sb, hash, iget_test, iget_set, &data);
95 } 95 }
96 96
97 /** 97 /**
98 * gfs2_set_iop - Sets inode operations 98 * gfs2_set_iop - Sets inode operations
99 * @inode: The inode with correct i_mode filled in 99 * @inode: The inode with correct i_mode filled in
100 * 100 *
101 * GFS2 lookup code fills in vfs inode contents based on info obtained 101 * GFS2 lookup code fills in vfs inode contents based on info obtained
102 * from directory entry inside gfs2_inode_lookup(). 102 * from directory entry inside gfs2_inode_lookup().
103 */ 103 */
104 104
105 static void gfs2_set_iop(struct inode *inode) 105 static void gfs2_set_iop(struct inode *inode)
106 { 106 {
107 struct gfs2_sbd *sdp = GFS2_SB(inode); 107 struct gfs2_sbd *sdp = GFS2_SB(inode);
108 umode_t mode = inode->i_mode; 108 umode_t mode = inode->i_mode;
109 109
110 if (S_ISREG(mode)) { 110 if (S_ISREG(mode)) {
111 inode->i_op = &gfs2_file_iops; 111 inode->i_op = &gfs2_file_iops;
112 if (gfs2_localflocks(sdp)) 112 if (gfs2_localflocks(sdp))
113 inode->i_fop = &gfs2_file_fops_nolock; 113 inode->i_fop = &gfs2_file_fops_nolock;
114 else 114 else
115 inode->i_fop = &gfs2_file_fops; 115 inode->i_fop = &gfs2_file_fops;
116 } else if (S_ISDIR(mode)) { 116 } else if (S_ISDIR(mode)) {
117 inode->i_op = &gfs2_dir_iops; 117 inode->i_op = &gfs2_dir_iops;
118 if (gfs2_localflocks(sdp)) 118 if (gfs2_localflocks(sdp))
119 inode->i_fop = &gfs2_dir_fops_nolock; 119 inode->i_fop = &gfs2_dir_fops_nolock;
120 else 120 else
121 inode->i_fop = &gfs2_dir_fops; 121 inode->i_fop = &gfs2_dir_fops;
122 } else if (S_ISLNK(mode)) { 122 } else if (S_ISLNK(mode)) {
123 inode->i_op = &gfs2_symlink_iops; 123 inode->i_op = &gfs2_symlink_iops;
124 } else { 124 } else {
125 inode->i_op = &gfs2_file_iops; 125 inode->i_op = &gfs2_file_iops;
126 init_special_inode(inode, inode->i_mode, inode->i_rdev); 126 init_special_inode(inode, inode->i_mode, inode->i_rdev);
127 } 127 }
128 } 128 }
129 129
130 /** 130 /**
131 * gfs2_inode_lookup - Lookup an inode 131 * gfs2_inode_lookup - Lookup an inode
132 * @sb: The super block 132 * @sb: The super block
133 * @no_addr: The inode number 133 * @no_addr: The inode number
134 * @type: The type of the inode 134 * @type: The type of the inode
135 * non_block: Can we block on inodes that are being freed? 135 * non_block: Can we block on inodes that are being freed?
136 * 136 *
137 * Returns: A VFS inode, or an error 137 * Returns: A VFS inode, or an error
138 */ 138 */
139 139
140 struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, 140 struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
141 u64 no_addr, u64 no_formal_ino, int non_block) 141 u64 no_addr, u64 no_formal_ino, int non_block)
142 { 142 {
143 struct inode *inode; 143 struct inode *inode;
144 struct gfs2_inode *ip; 144 struct gfs2_inode *ip;
145 struct gfs2_glock *io_gl = NULL; 145 struct gfs2_glock *io_gl = NULL;
146 int error; 146 int error;
147 147
148 inode = gfs2_iget(sb, no_addr, non_block); 148 inode = gfs2_iget(sb, no_addr, non_block);
149 ip = GFS2_I(inode); 149 ip = GFS2_I(inode);
150 150
151 if (!inode) 151 if (!inode)
152 return ERR_PTR(-ENOBUFS); 152 return ERR_PTR(-ENOBUFS);
153 153
154 if (inode->i_state & I_NEW) { 154 if (inode->i_state & I_NEW) {
155 struct gfs2_sbd *sdp = GFS2_SB(inode); 155 struct gfs2_sbd *sdp = GFS2_SB(inode);
156 ip->i_no_formal_ino = no_formal_ino; 156 ip->i_no_formal_ino = no_formal_ino;
157 157
158 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 158 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
159 if (unlikely(error)) 159 if (unlikely(error))
160 goto fail; 160 goto fail;
161 ip->i_gl->gl_object = ip; 161 ip->i_gl->gl_object = ip;
162 162
163 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 163 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
164 if (unlikely(error)) 164 if (unlikely(error))
165 goto fail_put; 165 goto fail_put;
166 166
167 set_bit(GIF_INVALID, &ip->i_flags); 167 set_bit(GIF_INVALID, &ip->i_flags);
168 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 168 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
169 if (unlikely(error)) 169 if (unlikely(error))
170 goto fail_iopen; 170 goto fail_iopen;
171 171
172 ip->i_iopen_gh.gh_gl->gl_object = ip; 172 ip->i_iopen_gh.gh_gl->gl_object = ip;
173 gfs2_glock_put(io_gl); 173 gfs2_glock_put(io_gl);
174 io_gl = NULL; 174 io_gl = NULL;
175 175
176 if (type == DT_UNKNOWN) { 176 if (type == DT_UNKNOWN) {
177 /* Inode glock must be locked already */ 177 /* Inode glock must be locked already */
178 error = gfs2_inode_refresh(GFS2_I(inode)); 178 error = gfs2_inode_refresh(GFS2_I(inode));
179 if (error) 179 if (error)
180 goto fail_refresh; 180 goto fail_refresh;
181 } else { 181 } else {
182 inode->i_mode = DT2IF(type); 182 inode->i_mode = DT2IF(type);
183 } 183 }
184 184
185 gfs2_set_iop(inode); 185 gfs2_set_iop(inode);
186 unlock_new_inode(inode); 186 unlock_new_inode(inode);
187 } 187 }
188 188
189 return inode; 189 return inode;
190 190
191 fail_refresh: 191 fail_refresh:
192 ip->i_iopen_gh.gh_gl->gl_object = NULL; 192 ip->i_iopen_gh.gh_gl->gl_object = NULL;
193 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 193 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
194 fail_iopen: 194 fail_iopen:
195 if (io_gl) 195 if (io_gl)
196 gfs2_glock_put(io_gl); 196 gfs2_glock_put(io_gl);
197 fail_put: 197 fail_put:
198 ip->i_gl->gl_object = NULL; 198 ip->i_gl->gl_object = NULL;
199 gfs2_glock_put(ip->i_gl); 199 gfs2_glock_put(ip->i_gl);
200 fail: 200 fail:
201 iget_failed(inode); 201 iget_failed(inode);
202 return ERR_PTR(error); 202 return ERR_PTR(error);
203 } 203 }
204 204
205 struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, 205 struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
206 u64 *no_formal_ino, unsigned int blktype) 206 u64 *no_formal_ino, unsigned int blktype)
207 { 207 {
208 struct super_block *sb = sdp->sd_vfs; 208 struct super_block *sb = sdp->sd_vfs;
209 struct gfs2_holder i_gh; 209 struct gfs2_holder i_gh;
210 struct inode *inode = NULL; 210 struct inode *inode = NULL;
211 int error; 211 int error;
212 212
213 /* Must not read in block until block type is verified */ 213 /* Must not read in block until block type is verified */
214 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, 214 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
215 LM_ST_EXCLUSIVE, GL_SKIP, &i_gh); 215 LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
216 if (error) 216 if (error)
217 return ERR_PTR(error); 217 return ERR_PTR(error);
218 218
219 error = gfs2_check_blk_type(sdp, no_addr, blktype); 219 error = gfs2_check_blk_type(sdp, no_addr, blktype);
220 if (error) 220 if (error)
221 goto fail; 221 goto fail;
222 222
223 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1); 223 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1);
224 if (IS_ERR(inode)) 224 if (IS_ERR(inode))
225 goto fail; 225 goto fail;
226 226
227 /* Two extra checks for NFS only */ 227 /* Two extra checks for NFS only */
228 if (no_formal_ino) { 228 if (no_formal_ino) {
229 error = -ESTALE; 229 error = -ESTALE;
230 if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino) 230 if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino)
231 goto fail_iput; 231 goto fail_iput;
232 232
233 error = -EIO; 233 error = -EIO;
234 if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) 234 if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM)
235 goto fail_iput; 235 goto fail_iput;
236 236
237 error = 0; 237 error = 0;
238 } 238 }
239 239
240 fail: 240 fail:
241 gfs2_glock_dq_uninit(&i_gh); 241 gfs2_glock_dq_uninit(&i_gh);
242 return error ? ERR_PTR(error) : inode; 242 return error ? ERR_PTR(error) : inode;
243 fail_iput: 243 fail_iput:
244 iput(inode); 244 iput(inode);
245 goto fail; 245 goto fail;
246 } 246 }
247 247
248 248
249 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) 249 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
250 { 250 {
251 struct qstr qstr; 251 struct qstr qstr;
252 struct inode *inode; 252 struct inode *inode;
253 gfs2_str2qstr(&qstr, name); 253 gfs2_str2qstr(&qstr, name);
254 inode = gfs2_lookupi(dip, &qstr, 1); 254 inode = gfs2_lookupi(dip, &qstr, 1);
255 /* gfs2_lookupi has inconsistent callers: vfs 255 /* gfs2_lookupi has inconsistent callers: vfs
256 * related routines expect NULL for no entry found, 256 * related routines expect NULL for no entry found,
257 * gfs2_lookup_simple callers expect ENOENT 257 * gfs2_lookup_simple callers expect ENOENT
258 * and do not check for NULL. 258 * and do not check for NULL.
259 */ 259 */
260 if (inode == NULL) 260 if (inode == NULL)
261 return ERR_PTR(-ENOENT); 261 return ERR_PTR(-ENOENT);
262 else 262 else
263 return inode; 263 return inode;
264 } 264 }
265 265
266 266
267 /** 267 /**
268 * gfs2_lookupi - Look up a filename in a directory and return its inode 268 * gfs2_lookupi - Look up a filename in a directory and return its inode
269 * @d_gh: An initialized holder for the directory glock 269 * @d_gh: An initialized holder for the directory glock
270 * @name: The name of the inode to look for 270 * @name: The name of the inode to look for
271 * @is_root: If 1, ignore the caller's permissions 271 * @is_root: If 1, ignore the caller's permissions
272 * @i_gh: An uninitialized holder for the new inode glock 272 * @i_gh: An uninitialized holder for the new inode glock
273 * 273 *
274 * This can be called via the VFS filldir function when NFS is doing 274 * This can be called via the VFS filldir function when NFS is doing
275 * a readdirplus and the inode which its intending to stat isn't 275 * a readdirplus and the inode which its intending to stat isn't
276 * already in cache. In this case we must not take the directory glock 276 * already in cache. In this case we must not take the directory glock
277 * again, since the readdir call will have already taken that lock. 277 * again, since the readdir call will have already taken that lock.
278 * 278 *
279 * Returns: errno 279 * Returns: errno
280 */ 280 */
281 281
282 struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 282 struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
283 int is_root) 283 int is_root)
284 { 284 {
285 struct super_block *sb = dir->i_sb; 285 struct super_block *sb = dir->i_sb;
286 struct gfs2_inode *dip = GFS2_I(dir); 286 struct gfs2_inode *dip = GFS2_I(dir);
287 struct gfs2_holder d_gh; 287 struct gfs2_holder d_gh;
288 int error = 0; 288 int error = 0;
289 struct inode *inode = NULL; 289 struct inode *inode = NULL;
290 int unlock = 0; 290 int unlock = 0;
291 291
292 if (!name->len || name->len > GFS2_FNAMESIZE) 292 if (!name->len || name->len > GFS2_FNAMESIZE)
293 return ERR_PTR(-ENAMETOOLONG); 293 return ERR_PTR(-ENAMETOOLONG);
294 294
295 if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) || 295 if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) ||
296 (name->len == 2 && memcmp(name->name, "..", 2) == 0 && 296 (name->len == 2 && memcmp(name->name, "..", 2) == 0 &&
297 dir == sb->s_root->d_inode)) { 297 dir == sb->s_root->d_inode)) {
298 igrab(dir); 298 igrab(dir);
299 return dir; 299 return dir;
300 } 300 }
301 301
302 if (gfs2_glock_is_locked_by_me(dip->i_gl) == NULL) { 302 if (gfs2_glock_is_locked_by_me(dip->i_gl) == NULL) {
303 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 303 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
304 if (error) 304 if (error)
305 return ERR_PTR(error); 305 return ERR_PTR(error);
306 unlock = 1; 306 unlock = 1;
307 } 307 }
308 308
309 if (!is_root) { 309 if (!is_root) {
310 error = gfs2_permission(dir, MAY_EXEC); 310 error = gfs2_permission(dir, MAY_EXEC);
311 if (error) 311 if (error)
312 goto out; 312 goto out;
313 } 313 }
314 314
315 inode = gfs2_dir_search(dir, name); 315 inode = gfs2_dir_search(dir, name);
316 if (IS_ERR(inode)) 316 if (IS_ERR(inode))
317 error = PTR_ERR(inode); 317 error = PTR_ERR(inode);
318 out: 318 out:
319 if (unlock) 319 if (unlock)
320 gfs2_glock_dq_uninit(&d_gh); 320 gfs2_glock_dq_uninit(&d_gh);
321 if (error == -ENOENT) 321 if (error == -ENOENT)
322 return NULL; 322 return NULL;
323 return inode ? inode : ERR_PTR(error); 323 return inode ? inode : ERR_PTR(error);
324 } 324 }
325 325
326 /** 326 /**
327 * create_ok - OK to create a new on-disk inode here? 327 * create_ok - OK to create a new on-disk inode here?
328 * @dip: Directory in which dinode is to be created 328 * @dip: Directory in which dinode is to be created
329 * @name: Name of new dinode 329 * @name: Name of new dinode
330 * @mode: 330 * @mode:
331 * 331 *
332 * Returns: errno 332 * Returns: errno
333 */ 333 */
334 334
335 static int create_ok(struct gfs2_inode *dip, const struct qstr *name, 335 static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
336 umode_t mode) 336 umode_t mode)
337 { 337 {
338 int error; 338 int error;
339 339
340 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 340 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
341 if (error) 341 if (error)
342 return error; 342 return error;
343 343
344 /* Don't create entries in an unlinked directory */ 344 /* Don't create entries in an unlinked directory */
345 if (!dip->i_inode.i_nlink) 345 if (!dip->i_inode.i_nlink)
346 return -ENOENT; 346 return -ENOENT;
347 347
348 error = gfs2_dir_check(&dip->i_inode, name, NULL); 348 error = gfs2_dir_check(&dip->i_inode, name, NULL);
349 switch (error) { 349 switch (error) {
350 case -ENOENT: 350 case -ENOENT:
351 error = 0; 351 error = 0;
352 break; 352 break;
353 case 0: 353 case 0:
354 return -EEXIST; 354 return -EEXIST;
355 default: 355 default:
356 return error; 356 return error;
357 } 357 }
358 358
359 if (dip->i_entries == (u32)-1) 359 if (dip->i_entries == (u32)-1)
360 return -EFBIG; 360 return -EFBIG;
361 if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) 361 if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1)
362 return -EMLINK; 362 return -EMLINK;
363 363
364 return 0; 364 return 0;
365 } 365 }
366 366
367 static void munge_mode_uid_gid(const struct gfs2_inode *dip, 367 static void munge_mode_uid_gid(const struct gfs2_inode *dip,
368 struct inode *inode) 368 struct inode *inode)
369 { 369 {
370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && 370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
371 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { 371 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) {
372 if (S_ISDIR(inode->i_mode)) 372 if (S_ISDIR(inode->i_mode))
373 inode->i_mode |= S_ISUID; 373 inode->i_mode |= S_ISUID;
374 else if (dip->i_inode.i_uid != current_fsuid()) 374 else if (dip->i_inode.i_uid != current_fsuid())
375 inode->i_mode &= ~07111; 375 inode->i_mode &= ~07111;
376 inode->i_uid = dip->i_inode.i_uid; 376 inode->i_uid = dip->i_inode.i_uid;
377 } else 377 } else
378 inode->i_uid = current_fsuid(); 378 inode->i_uid = current_fsuid();
379 379
380 if (dip->i_inode.i_mode & S_ISGID) { 380 if (dip->i_inode.i_mode & S_ISGID) {
381 if (S_ISDIR(inode->i_mode)) 381 if (S_ISDIR(inode->i_mode))
382 inode->i_mode |= S_ISGID; 382 inode->i_mode |= S_ISGID;
383 inode->i_gid = dip->i_inode.i_gid; 383 inode->i_gid = dip->i_inode.i_gid;
384 } else 384 } else
385 inode->i_gid = current_fsgid(); 385 inode->i_gid = current_fsgid();
386 } 386 }
387 387
388 static int alloc_dinode(struct gfs2_inode *ip) 388 static int alloc_dinode(struct gfs2_inode *ip, u32 flags)
389 { 389 {
390 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 390 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
391 int error; 391 int error;
392 int dblocks = 1; 392 int dblocks = 1;
393 393
394 error = gfs2_inplace_reserve(ip, RES_DINODE); 394 error = gfs2_inplace_reserve(ip, RES_DINODE, flags);
395 if (error) 395 if (error)
396 goto out; 396 goto out;
397 397
398 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0); 398 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0);
399 if (error) 399 if (error)
400 goto out_ipreserv; 400 goto out_ipreserv;
401 401
402 error = gfs2_alloc_blocks(ip, &ip->i_no_addr, &dblocks, 1, &ip->i_generation); 402 error = gfs2_alloc_blocks(ip, &ip->i_no_addr, &dblocks, 1, &ip->i_generation);
403 ip->i_no_formal_ino = ip->i_generation; 403 ip->i_no_formal_ino = ip->i_generation;
404 ip->i_inode.i_ino = ip->i_no_addr; 404 ip->i_inode.i_ino = ip->i_no_addr;
405 ip->i_goal = ip->i_no_addr; 405 ip->i_goal = ip->i_no_addr;
406 406
407 gfs2_trans_end(sdp); 407 gfs2_trans_end(sdp);
408 408
409 out_ipreserv: 409 out_ipreserv:
410 gfs2_inplace_release(ip); 410 gfs2_inplace_release(ip);
411 out: 411 out:
412 return error; 412 return error;
413 } 413 }
414 414
415 static void gfs2_init_dir(struct buffer_head *dibh, 415 static void gfs2_init_dir(struct buffer_head *dibh,
416 const struct gfs2_inode *parent) 416 const struct gfs2_inode *parent)
417 { 417 {
418 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; 418 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
419 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); 419 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
420 420
421 gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent); 421 gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
422 dent->de_inum = di->di_num; /* already GFS2 endian */ 422 dent->de_inum = di->di_num; /* already GFS2 endian */
423 dent->de_type = cpu_to_be16(DT_DIR); 423 dent->de_type = cpu_to_be16(DT_DIR);
424 424
425 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); 425 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
426 gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); 426 gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
427 gfs2_inum_out(parent, dent); 427 gfs2_inum_out(parent, dent);
428 dent->de_type = cpu_to_be16(DT_DIR); 428 dent->de_type = cpu_to_be16(DT_DIR);
429 429
430 } 430 }
431 431
432 /** 432 /**
433 * init_dinode - Fill in a new dinode structure 433 * init_dinode - Fill in a new dinode structure
434 * @dip: The directory this inode is being created in 434 * @dip: The directory this inode is being created in
435 * @ip: The inode 435 * @ip: The inode
436 * @symname: The symlink destination (if a symlink) 436 * @symname: The symlink destination (if a symlink)
437 * @bhp: The buffer head (returned to caller) 437 * @bhp: The buffer head (returned to caller)
438 * 438 *
439 */ 439 */
440 440
441 static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, 441 static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
442 const char *symname, struct buffer_head **bhp) 442 const char *symname, struct buffer_head **bhp)
443 { 443 {
444 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 444 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
445 struct gfs2_dinode *di; 445 struct gfs2_dinode *di;
446 struct buffer_head *dibh; 446 struct buffer_head *dibh;
447 struct timespec tv = CURRENT_TIME; 447 struct timespec tv = CURRENT_TIME;
448 448
449 dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr); 449 dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr);
450 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 450 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
451 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); 451 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
452 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 452 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
453 di = (struct gfs2_dinode *)dibh->b_data; 453 di = (struct gfs2_dinode *)dibh->b_data;
454 454
455 di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 455 di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
456 di->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 456 di->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
457 di->di_mode = cpu_to_be32(ip->i_inode.i_mode); 457 di->di_mode = cpu_to_be32(ip->i_inode.i_mode);
458 di->di_uid = cpu_to_be32(ip->i_inode.i_uid); 458 di->di_uid = cpu_to_be32(ip->i_inode.i_uid);
459 di->di_gid = cpu_to_be32(ip->i_inode.i_gid); 459 di->di_gid = cpu_to_be32(ip->i_inode.i_gid);
460 di->di_nlink = 0; 460 di->di_nlink = 0;
461 di->di_size = cpu_to_be64(ip->i_inode.i_size); 461 di->di_size = cpu_to_be64(ip->i_inode.i_size);
462 di->di_blocks = cpu_to_be64(1); 462 di->di_blocks = cpu_to_be64(1);
463 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 463 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
464 di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev)); 464 di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev));
465 di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev)); 465 di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev));
466 di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_no_addr); 466 di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_no_addr);
467 di->di_generation = cpu_to_be64(ip->i_generation); 467 di->di_generation = cpu_to_be64(ip->i_generation);
468 di->di_flags = 0; 468 di->di_flags = 0;
469 di->__pad1 = 0; 469 di->__pad1 = 0;
470 di->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) ? GFS2_FORMAT_DE : 0); 470 di->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) ? GFS2_FORMAT_DE : 0);
471 di->di_height = 0; 471 di->di_height = 0;
472 di->__pad2 = 0; 472 di->__pad2 = 0;
473 di->__pad3 = 0; 473 di->__pad3 = 0;
474 di->di_depth = 0; 474 di->di_depth = 0;
475 di->di_entries = 0; 475 di->di_entries = 0;
476 memset(&di->__pad4, 0, sizeof(di->__pad4)); 476 memset(&di->__pad4, 0, sizeof(di->__pad4));
477 di->di_eattr = 0; 477 di->di_eattr = 0;
478 di->di_atime_nsec = cpu_to_be32(tv.tv_nsec); 478 di->di_atime_nsec = cpu_to_be32(tv.tv_nsec);
479 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 479 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
480 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 480 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
481 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 481 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
482 482
483 switch(ip->i_inode.i_mode & S_IFMT) { 483 switch(ip->i_inode.i_mode & S_IFMT) {
484 case S_IFREG: 484 case S_IFREG:
485 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || 485 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
486 gfs2_tune_get(sdp, gt_new_files_jdata)) 486 gfs2_tune_get(sdp, gt_new_files_jdata))
487 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); 487 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
488 break; 488 break;
489 case S_IFDIR: 489 case S_IFDIR:
490 di->di_flags |= cpu_to_be32(dip->i_diskflags & 490 di->di_flags |= cpu_to_be32(dip->i_diskflags &
491 GFS2_DIF_INHERIT_JDATA); 491 GFS2_DIF_INHERIT_JDATA);
492 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); 492 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
493 di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)); 493 di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
494 di->di_entries = cpu_to_be32(2); 494 di->di_entries = cpu_to_be32(2);
495 gfs2_init_dir(dibh, dip); 495 gfs2_init_dir(dibh, dip);
496 break; 496 break;
497 case S_IFLNK: 497 case S_IFLNK:
498 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, ip->i_inode.i_size); 498 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, ip->i_inode.i_size);
499 break; 499 break;
500 } 500 }
501 501
502 set_buffer_uptodate(dibh); 502 set_buffer_uptodate(dibh);
503 503
504 *bhp = dibh; 504 *bhp = dibh;
505 } 505 }
506 506
507 static int make_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, 507 static int make_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
508 const char *symname, struct buffer_head **bhp) 508 const char *symname, struct buffer_head **bhp)
509 { 509 {
510 struct inode *inode = &ip->i_inode; 510 struct inode *inode = &ip->i_inode;
511 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 511 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
512 int error; 512 int error;
513 513
514 error = gfs2_rindex_update(sdp); 514 error = gfs2_rindex_update(sdp);
515 if (error) 515 if (error)
516 return error; 516 return error;
517 517
518 error = gfs2_quota_lock(dip, inode->i_uid, inode->i_gid); 518 error = gfs2_quota_lock(dip, inode->i_uid, inode->i_gid);
519 if (error) 519 if (error)
520 return error; 520 return error;
521 521
522 error = gfs2_quota_check(dip, inode->i_uid, inode->i_gid); 522 error = gfs2_quota_check(dip, inode->i_uid, inode->i_gid);
523 if (error) 523 if (error)
524 goto out_quota; 524 goto out_quota;
525 525
526 error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0); 526 error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0);
527 if (error) 527 if (error)
528 goto out_quota; 528 goto out_quota;
529 529
530 init_dinode(dip, ip, symname, bhp); 530 init_dinode(dip, ip, symname, bhp);
531 gfs2_quota_change(dip, +1, inode->i_uid, inode->i_gid); 531 gfs2_quota_change(dip, +1, inode->i_uid, inode->i_gid);
532 gfs2_trans_end(sdp); 532 gfs2_trans_end(sdp);
533 533
534 out_quota: 534 out_quota:
535 gfs2_quota_unlock(dip); 535 gfs2_quota_unlock(dip);
536 return error; 536 return error;
537 } 537 }
538 538
539 static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, 539 static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
540 struct gfs2_inode *ip) 540 struct gfs2_inode *ip)
541 { 541 {
542 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 542 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
543 int alloc_required; 543 int alloc_required;
544 struct buffer_head *dibh; 544 struct buffer_head *dibh;
545 int error; 545 int error;
546 546
547 error = gfs2_rindex_update(sdp); 547 error = gfs2_rindex_update(sdp);
548 if (error) 548 if (error)
549 return error; 549 return error;
550 550
551 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 551 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
552 if (error) 552 if (error)
553 goto fail; 553 goto fail;
554 554
555 error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); 555 error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name);
556 if (alloc_required < 0) 556 if (alloc_required < 0)
557 goto fail_quota_locks; 557 goto fail_quota_locks;
558 if (alloc_required) { 558 if (alloc_required) {
559 error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); 559 error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid);
560 if (error) 560 if (error)
561 goto fail_quota_locks; 561 goto fail_quota_locks;
562 562
563 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); 563 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0);
564 if (error) 564 if (error)
565 goto fail_quota_locks; 565 goto fail_quota_locks;
566 566
567 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 567 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
568 dip->i_rgd->rd_length + 568 dip->i_rgd->rd_length +
569 2 * RES_DINODE + 569 2 * RES_DINODE +
570 RES_STATFS + RES_QUOTA, 0); 570 RES_STATFS + RES_QUOTA, 0);
571 if (error) 571 if (error)
572 goto fail_ipreserv; 572 goto fail_ipreserv;
573 } else { 573 } else {
574 error = gfs2_trans_begin(sdp, RES_LEAF + 2 * RES_DINODE, 0); 574 error = gfs2_trans_begin(sdp, RES_LEAF + 2 * RES_DINODE, 0);
575 if (error) 575 if (error)
576 goto fail_quota_locks; 576 goto fail_quota_locks;
577 } 577 }
578 578
579 error = gfs2_dir_add(&dip->i_inode, name, ip); 579 error = gfs2_dir_add(&dip->i_inode, name, ip);
580 if (error) 580 if (error)
581 goto fail_end_trans; 581 goto fail_end_trans;
582 582
583 error = gfs2_meta_inode_buffer(ip, &dibh); 583 error = gfs2_meta_inode_buffer(ip, &dibh);
584 if (error) 584 if (error)
585 goto fail_end_trans; 585 goto fail_end_trans;
586 set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1); 586 set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1);
587 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 587 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
588 gfs2_dinode_out(ip, dibh->b_data); 588 gfs2_dinode_out(ip, dibh->b_data);
589 brelse(dibh); 589 brelse(dibh);
590 return 0; 590 return 0;
591 591
592 fail_end_trans: 592 fail_end_trans:
593 gfs2_trans_end(sdp); 593 gfs2_trans_end(sdp);
594 594
595 fail_ipreserv: 595 fail_ipreserv:
596 if (alloc_required) 596 if (alloc_required)
597 gfs2_inplace_release(dip); 597 gfs2_inplace_release(dip);
598 598
599 fail_quota_locks: 599 fail_quota_locks:
600 gfs2_quota_unlock(dip); 600 gfs2_quota_unlock(dip);
601 601
602 fail: 602 fail:
603 return error; 603 return error;
604 } 604 }
605 605
606 static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 606 static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
607 void *fs_info) 607 void *fs_info)
608 { 608 {
609 const struct xattr *xattr; 609 const struct xattr *xattr;
610 int err = 0; 610 int err = 0;
611 611
612 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 612 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
613 err = __gfs2_xattr_set(inode, xattr->name, xattr->value, 613 err = __gfs2_xattr_set(inode, xattr->name, xattr->value,
614 xattr->value_len, 0, 614 xattr->value_len, 0,
615 GFS2_EATYPE_SECURITY); 615 GFS2_EATYPE_SECURITY);
616 if (err < 0) 616 if (err < 0)
617 break; 617 break;
618 } 618 }
619 return err; 619 return err;
620 } 620 }
621 621
622 static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, 622 static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
623 const struct qstr *qstr) 623 const struct qstr *qstr)
624 { 624 {
625 return security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr, 625 return security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr,
626 &gfs2_initxattrs, NULL); 626 &gfs2_initxattrs, NULL);
627 } 627 }
628 628
629 /** 629 /**
630 * gfs2_create_inode - Create a new inode 630 * gfs2_create_inode - Create a new inode
631 * @dir: The parent directory 631 * @dir: The parent directory
632 * @dentry: The new dentry 632 * @dentry: The new dentry
633 * @mode: The permissions on the new inode 633 * @mode: The permissions on the new inode
634 * @dev: For device nodes, this is the device number 634 * @dev: For device nodes, this is the device number
635 * @symname: For symlinks, this is the link destination 635 * @symname: For symlinks, this is the link destination
636 * @size: The initial size of the inode (ignored for directories) 636 * @size: The initial size of the inode (ignored for directories)
637 * 637 *
638 * Returns: 0 on success, or error code 638 * Returns: 0 on success, or error code
639 */ 639 */
640 640
641 static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, 641 static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
642 umode_t mode, dev_t dev, const char *symname, 642 umode_t mode, dev_t dev, const char *symname,
643 unsigned int size, int excl) 643 unsigned int size, int excl)
644 { 644 {
645 const struct qstr *name = &dentry->d_name; 645 const struct qstr *name = &dentry->d_name;
646 struct gfs2_holder ghs[2]; 646 struct gfs2_holder ghs[2];
647 struct inode *inode = NULL; 647 struct inode *inode = NULL;
648 struct gfs2_inode *dip = GFS2_I(dir), *ip; 648 struct gfs2_inode *dip = GFS2_I(dir), *ip;
649 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 649 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
650 struct gfs2_glock *io_gl; 650 struct gfs2_glock *io_gl;
651 int error; 651 int error;
652 struct buffer_head *bh = NULL; 652 struct buffer_head *bh = NULL;
653 u32 aflags = 0;
653 654
654 if (!name->len || name->len > GFS2_FNAMESIZE) 655 if (!name->len || name->len > GFS2_FNAMESIZE)
655 return -ENAMETOOLONG; 656 return -ENAMETOOLONG;
656 657
657 error = gfs2_rs_alloc(dip); 658 error = gfs2_rs_alloc(dip);
658 if (error) 659 if (error)
659 return error; 660 return error;
660 661
661 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 662 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
662 if (error) 663 if (error)
663 goto fail; 664 goto fail;
664 665
665 error = create_ok(dip, name, mode); 666 error = create_ok(dip, name, mode);
666 if ((error == -EEXIST) && S_ISREG(mode) && !excl) { 667 if ((error == -EEXIST) && S_ISREG(mode) && !excl) {
667 inode = gfs2_lookupi(dir, &dentry->d_name, 0); 668 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
668 gfs2_glock_dq_uninit(ghs); 669 gfs2_glock_dq_uninit(ghs);
669 d_instantiate(dentry, inode); 670 d_instantiate(dentry, inode);
670 return IS_ERR(inode) ? PTR_ERR(inode) : 0; 671 return IS_ERR(inode) ? PTR_ERR(inode) : 0;
671 } 672 }
672 if (error) 673 if (error)
673 goto fail_gunlock; 674 goto fail_gunlock;
674 675
675 inode = new_inode(sdp->sd_vfs); 676 inode = new_inode(sdp->sd_vfs);
676 ip = GFS2_I(inode); 677 ip = GFS2_I(inode);
677 error = gfs2_rs_alloc(ip); 678 error = gfs2_rs_alloc(ip);
678 if (error) 679 if (error)
679 goto fail_free_inode; 680 goto fail_free_inode;
680 681
681 set_bit(GIF_INVALID, &ip->i_flags); 682 set_bit(GIF_INVALID, &ip->i_flags);
682 inode->i_mode = mode; 683 inode->i_mode = mode;
683 inode->i_rdev = dev; 684 inode->i_rdev = dev;
684 inode->i_size = size; 685 inode->i_size = size;
685 munge_mode_uid_gid(dip, inode); 686 munge_mode_uid_gid(dip, inode);
686 ip->i_goal = dip->i_goal; 687 ip->i_goal = dip->i_goal;
687 688
688 error = alloc_dinode(ip); 689 if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) ||
690 (dip->i_diskflags & GFS2_DIF_TOPDIR))
691 aflags |= GFS2_AF_ORLOV;
692
693 error = alloc_dinode(ip, aflags);
689 if (error) 694 if (error)
690 goto fail_free_inode; 695 goto fail_free_inode;
691 696
692 error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 697 error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
693 if (error) 698 if (error)
694 goto fail_free_inode; 699 goto fail_free_inode;
695 700
696 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 701 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
697 if (error) 702 if (error)
698 goto fail_free_inode; 703 goto fail_free_inode;
699 704
700 error = make_dinode(dip, ip, symname, &bh); 705 error = make_dinode(dip, ip, symname, &bh);
701 if (error) 706 if (error)
702 goto fail_gunlock2; 707 goto fail_gunlock2;
703 708
704 error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 709 error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
705 if (error) 710 if (error)
706 goto fail_gunlock2; 711 goto fail_gunlock2;
707 712
708 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 713 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
709 if (error) 714 if (error)
710 goto fail_gunlock2; 715 goto fail_gunlock2;
711 716
712 ip->i_iopen_gh.gh_gl->gl_object = ip; 717 ip->i_iopen_gh.gh_gl->gl_object = ip;
713 gfs2_glock_put(io_gl); 718 gfs2_glock_put(io_gl);
714 gfs2_set_iop(inode); 719 gfs2_set_iop(inode);
715 insert_inode_hash(inode); 720 insert_inode_hash(inode);
716 721
717 error = gfs2_inode_refresh(ip); 722 error = gfs2_inode_refresh(ip);
718 if (error) 723 if (error)
719 goto fail_gunlock3; 724 goto fail_gunlock3;
720 725
721 error = gfs2_acl_create(dip, inode); 726 error = gfs2_acl_create(dip, inode);
722 if (error) 727 if (error)
723 goto fail_gunlock3; 728 goto fail_gunlock3;
724 729
725 error = gfs2_security_init(dip, ip, name); 730 error = gfs2_security_init(dip, ip, name);
726 if (error) 731 if (error)
727 goto fail_gunlock3; 732 goto fail_gunlock3;
728 733
729 error = link_dinode(dip, name, ip); 734 error = link_dinode(dip, name, ip);
730 if (error) 735 if (error)
731 goto fail_gunlock3; 736 goto fail_gunlock3;
732 737
733 if (bh) 738 if (bh)
734 brelse(bh); 739 brelse(bh);
735 740
736 gfs2_trans_end(sdp); 741 gfs2_trans_end(sdp);
737 gfs2_inplace_release(dip); 742 gfs2_inplace_release(dip);
738 gfs2_quota_unlock(dip); 743 gfs2_quota_unlock(dip);
739 mark_inode_dirty(inode); 744 mark_inode_dirty(inode);
740 gfs2_glock_dq_uninit_m(2, ghs); 745 gfs2_glock_dq_uninit_m(2, ghs);
741 d_instantiate(dentry, inode); 746 d_instantiate(dentry, inode);
742 return 0; 747 return 0;
743 748
744 fail_gunlock3: 749 fail_gunlock3:
745 gfs2_glock_dq_uninit(ghs + 1); 750 gfs2_glock_dq_uninit(ghs + 1);
746 if (ip->i_gl) 751 if (ip->i_gl)
747 gfs2_glock_put(ip->i_gl); 752 gfs2_glock_put(ip->i_gl);
748 goto fail_gunlock; 753 goto fail_gunlock;
749 754
750 fail_gunlock2: 755 fail_gunlock2:
751 gfs2_glock_dq_uninit(ghs + 1); 756 gfs2_glock_dq_uninit(ghs + 1);
752 fail_free_inode: 757 fail_free_inode:
753 if (ip->i_gl) 758 if (ip->i_gl)
754 gfs2_glock_put(ip->i_gl); 759 gfs2_glock_put(ip->i_gl);
755 gfs2_rs_delete(ip); 760 gfs2_rs_delete(ip);
756 free_inode_nonrcu(inode); 761 free_inode_nonrcu(inode);
757 inode = NULL; 762 inode = NULL;
758 fail_gunlock: 763 fail_gunlock:
759 gfs2_glock_dq_uninit(ghs); 764 gfs2_glock_dq_uninit(ghs);
760 if (inode && !IS_ERR(inode)) { 765 if (inode && !IS_ERR(inode)) {
761 set_bit(GIF_ALLOC_FAILED, &GFS2_I(inode)->i_flags); 766 set_bit(GIF_ALLOC_FAILED, &GFS2_I(inode)->i_flags);
762 iput(inode); 767 iput(inode);
763 } 768 }
764 fail: 769 fail:
765 if (bh) 770 if (bh)
766 brelse(bh); 771 brelse(bh);
767 return error; 772 return error;
768 } 773 }
769 774
770 /** 775 /**
771 * gfs2_create - Create a file 776 * gfs2_create - Create a file
772 * @dir: The directory in which to create the file 777 * @dir: The directory in which to create the file
773 * @dentry: The dentry of the new file 778 * @dentry: The dentry of the new file
774 * @mode: The mode of the new file 779 * @mode: The mode of the new file
775 * 780 *
776 * Returns: errno 781 * Returns: errno
777 */ 782 */
778 783
779 static int gfs2_create(struct inode *dir, struct dentry *dentry, 784 static int gfs2_create(struct inode *dir, struct dentry *dentry,
780 umode_t mode, bool excl) 785 umode_t mode, bool excl)
781 { 786 {
782 return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl); 787 return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl);
783 } 788 }
784 789
785 /** 790 /**
786 * gfs2_lookup - Look up a filename in a directory and return its inode 791 * gfs2_lookup - Look up a filename in a directory and return its inode
787 * @dir: The directory inode 792 * @dir: The directory inode
788 * @dentry: The dentry of the new inode 793 * @dentry: The dentry of the new inode
789 * @nd: passed from Linux VFS, ignored by us 794 * @nd: passed from Linux VFS, ignored by us
790 * 795 *
791 * Called by the VFS layer. Lock dir and call gfs2_lookupi() 796 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
792 * 797 *
793 * Returns: errno 798 * Returns: errno
794 */ 799 */
795 800
796 static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, 801 static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
797 unsigned int flags) 802 unsigned int flags)
798 { 803 {
799 struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0); 804 struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0);
800 if (inode && !IS_ERR(inode)) { 805 if (inode && !IS_ERR(inode)) {
801 struct gfs2_glock *gl = GFS2_I(inode)->i_gl; 806 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
802 struct gfs2_holder gh; 807 struct gfs2_holder gh;
803 int error; 808 int error;
804 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); 809 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
805 if (error) { 810 if (error) {
806 iput(inode); 811 iput(inode);
807 return ERR_PTR(error); 812 return ERR_PTR(error);
808 } 813 }
809 gfs2_glock_dq_uninit(&gh); 814 gfs2_glock_dq_uninit(&gh);
810 } 815 }
811 return d_splice_alias(inode, dentry); 816 return d_splice_alias(inode, dentry);
812 } 817 }
813 818
814 /** 819 /**
815 * gfs2_link - Link to a file 820 * gfs2_link - Link to a file
816 * @old_dentry: The inode to link 821 * @old_dentry: The inode to link
817 * @dir: Add link to this directory 822 * @dir: Add link to this directory
818 * @dentry: The name of the link 823 * @dentry: The name of the link
819 * 824 *
820 * Link the inode in "old_dentry" into the directory "dir" with the 825 * Link the inode in "old_dentry" into the directory "dir" with the
821 * name in "dentry". 826 * name in "dentry".
822 * 827 *
823 * Returns: errno 828 * Returns: errno
824 */ 829 */
825 830
826 static int gfs2_link(struct dentry *old_dentry, struct inode *dir, 831 static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
827 struct dentry *dentry) 832 struct dentry *dentry)
828 { 833 {
829 struct gfs2_inode *dip = GFS2_I(dir); 834 struct gfs2_inode *dip = GFS2_I(dir);
830 struct gfs2_sbd *sdp = GFS2_SB(dir); 835 struct gfs2_sbd *sdp = GFS2_SB(dir);
831 struct inode *inode = old_dentry->d_inode; 836 struct inode *inode = old_dentry->d_inode;
832 struct gfs2_inode *ip = GFS2_I(inode); 837 struct gfs2_inode *ip = GFS2_I(inode);
833 struct gfs2_holder ghs[2]; 838 struct gfs2_holder ghs[2];
834 struct buffer_head *dibh; 839 struct buffer_head *dibh;
835 int alloc_required; 840 int alloc_required;
836 int error; 841 int error;
837 842
838 if (S_ISDIR(inode->i_mode)) 843 if (S_ISDIR(inode->i_mode))
839 return -EPERM; 844 return -EPERM;
840 845
841 error = gfs2_rs_alloc(dip); 846 error = gfs2_rs_alloc(dip);
842 if (error) 847 if (error)
843 return error; 848 return error;
844 849
845 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 850 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
846 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 851 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
847 852
848 error = gfs2_glock_nq(ghs); /* parent */ 853 error = gfs2_glock_nq(ghs); /* parent */
849 if (error) 854 if (error)
850 goto out_parent; 855 goto out_parent;
851 856
852 error = gfs2_glock_nq(ghs + 1); /* child */ 857 error = gfs2_glock_nq(ghs + 1); /* child */
853 if (error) 858 if (error)
854 goto out_child; 859 goto out_child;
855 860
856 error = -ENOENT; 861 error = -ENOENT;
857 if (inode->i_nlink == 0) 862 if (inode->i_nlink == 0)
858 goto out_gunlock; 863 goto out_gunlock;
859 864
860 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); 865 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
861 if (error) 866 if (error)
862 goto out_gunlock; 867 goto out_gunlock;
863 868
864 error = gfs2_dir_check(dir, &dentry->d_name, NULL); 869 error = gfs2_dir_check(dir, &dentry->d_name, NULL);
865 switch (error) { 870 switch (error) {
866 case -ENOENT: 871 case -ENOENT:
867 break; 872 break;
868 case 0: 873 case 0:
869 error = -EEXIST; 874 error = -EEXIST;
870 default: 875 default:
871 goto out_gunlock; 876 goto out_gunlock;
872 } 877 }
873 878
874 error = -EINVAL; 879 error = -EINVAL;
875 if (!dip->i_inode.i_nlink) 880 if (!dip->i_inode.i_nlink)
876 goto out_gunlock; 881 goto out_gunlock;
877 error = -EFBIG; 882 error = -EFBIG;
878 if (dip->i_entries == (u32)-1) 883 if (dip->i_entries == (u32)-1)
879 goto out_gunlock; 884 goto out_gunlock;
880 error = -EPERM; 885 error = -EPERM;
881 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 886 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
882 goto out_gunlock; 887 goto out_gunlock;
883 error = -EINVAL; 888 error = -EINVAL;
884 if (!ip->i_inode.i_nlink) 889 if (!ip->i_inode.i_nlink)
885 goto out_gunlock; 890 goto out_gunlock;
886 error = -EMLINK; 891 error = -EMLINK;
887 if (ip->i_inode.i_nlink == (u32)-1) 892 if (ip->i_inode.i_nlink == (u32)-1)
888 goto out_gunlock; 893 goto out_gunlock;
889 894
890 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name); 895 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
891 if (error < 0) 896 if (error < 0)
892 goto out_gunlock; 897 goto out_gunlock;
893 error = 0; 898 error = 0;
894 899
895 if (alloc_required) { 900 if (alloc_required) {
896 error = gfs2_quota_lock_check(dip); 901 error = gfs2_quota_lock_check(dip);
897 if (error) 902 if (error)
898 goto out_gunlock; 903 goto out_gunlock;
899 904
900 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); 905 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0);
901 if (error) 906 if (error)
902 goto out_gunlock_q; 907 goto out_gunlock_q;
903 908
904 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 909 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
905 gfs2_rg_blocks(dip, sdp->sd_max_dirres) + 910 gfs2_rg_blocks(dip, sdp->sd_max_dirres) +
906 2 * RES_DINODE + RES_STATFS + 911 2 * RES_DINODE + RES_STATFS +
907 RES_QUOTA, 0); 912 RES_QUOTA, 0);
908 if (error) 913 if (error)
909 goto out_ipres; 914 goto out_ipres;
910 } else { 915 } else {
911 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0); 916 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
912 if (error) 917 if (error)
913 goto out_ipres; 918 goto out_ipres;
914 } 919 }
915 920
916 error = gfs2_meta_inode_buffer(ip, &dibh); 921 error = gfs2_meta_inode_buffer(ip, &dibh);
917 if (error) 922 if (error)
918 goto out_end_trans; 923 goto out_end_trans;
919 924
920 error = gfs2_dir_add(dir, &dentry->d_name, ip); 925 error = gfs2_dir_add(dir, &dentry->d_name, ip);
921 if (error) 926 if (error)
922 goto out_brelse; 927 goto out_brelse;
923 928
924 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 929 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
925 inc_nlink(&ip->i_inode); 930 inc_nlink(&ip->i_inode);
926 ip->i_inode.i_ctime = CURRENT_TIME; 931 ip->i_inode.i_ctime = CURRENT_TIME;
927 ihold(inode); 932 ihold(inode);
928 d_instantiate(dentry, inode); 933 d_instantiate(dentry, inode);
929 mark_inode_dirty(inode); 934 mark_inode_dirty(inode);
930 935
931 out_brelse: 936 out_brelse:
932 brelse(dibh); 937 brelse(dibh);
933 out_end_trans: 938 out_end_trans:
934 gfs2_trans_end(sdp); 939 gfs2_trans_end(sdp);
935 out_ipres: 940 out_ipres:
936 if (alloc_required) 941 if (alloc_required)
937 gfs2_inplace_release(dip); 942 gfs2_inplace_release(dip);
938 out_gunlock_q: 943 out_gunlock_q:
939 if (alloc_required) 944 if (alloc_required)
940 gfs2_quota_unlock(dip); 945 gfs2_quota_unlock(dip);
941 out_gunlock: 946 out_gunlock:
942 gfs2_glock_dq(ghs + 1); 947 gfs2_glock_dq(ghs + 1);
943 out_child: 948 out_child:
944 gfs2_glock_dq(ghs); 949 gfs2_glock_dq(ghs);
945 out_parent: 950 out_parent:
946 gfs2_holder_uninit(ghs); 951 gfs2_holder_uninit(ghs);
947 gfs2_holder_uninit(ghs + 1); 952 gfs2_holder_uninit(ghs + 1);
948 return error; 953 return error;
949 } 954 }
950 955
951 /* 956 /*
952 * gfs2_unlink_ok - check to see that a inode is still in a directory 957 * gfs2_unlink_ok - check to see that a inode is still in a directory
953 * @dip: the directory 958 * @dip: the directory
954 * @name: the name of the file 959 * @name: the name of the file
955 * @ip: the inode 960 * @ip: the inode
956 * 961 *
957 * Assumes that the lock on (at least) @dip is held. 962 * Assumes that the lock on (at least) @dip is held.
958 * 963 *
959 * Returns: 0 if the parent/child relationship is correct, errno if it isn't 964 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
960 */ 965 */
961 966
962 static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, 967 static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
963 const struct gfs2_inode *ip) 968 const struct gfs2_inode *ip)
964 { 969 {
965 int error; 970 int error;
966 971
967 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) 972 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
968 return -EPERM; 973 return -EPERM;
969 974
970 if ((dip->i_inode.i_mode & S_ISVTX) && 975 if ((dip->i_inode.i_mode & S_ISVTX) &&
971 dip->i_inode.i_uid != current_fsuid() && 976 dip->i_inode.i_uid != current_fsuid() &&
972 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) 977 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
973 return -EPERM; 978 return -EPERM;
974 979
975 if (IS_APPEND(&dip->i_inode)) 980 if (IS_APPEND(&dip->i_inode))
976 return -EPERM; 981 return -EPERM;
977 982
978 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 983 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
979 if (error) 984 if (error)
980 return error; 985 return error;
981 986
982 error = gfs2_dir_check(&dip->i_inode, name, ip); 987 error = gfs2_dir_check(&dip->i_inode, name, ip);
983 if (error) 988 if (error)
984 return error; 989 return error;
985 990
986 return 0; 991 return 0;
987 } 992 }
988 993
989 /** 994 /**
990 * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it 995 * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it
991 * @dip: The parent directory 996 * @dip: The parent directory
992 * @name: The name of the entry in the parent directory 997 * @name: The name of the entry in the parent directory
993 * @bh: The inode buffer for the inode to be removed 998 * @bh: The inode buffer for the inode to be removed
994 * @inode: The inode to be removed 999 * @inode: The inode to be removed
995 * 1000 *
996 * Called with all the locks and in a transaction. This will only be 1001 * Called with all the locks and in a transaction. This will only be
997 * called for a directory after it has been checked to ensure it is empty. 1002 * called for a directory after it has been checked to ensure it is empty.
998 * 1003 *
999 * Returns: 0 on success, or an error 1004 * Returns: 0 on success, or an error
1000 */ 1005 */
1001 1006
1002 static int gfs2_unlink_inode(struct gfs2_inode *dip, 1007 static int gfs2_unlink_inode(struct gfs2_inode *dip,
1003 const struct dentry *dentry, 1008 const struct dentry *dentry,
1004 struct buffer_head *bh) 1009 struct buffer_head *bh)
1005 { 1010 {
1006 struct inode *inode = dentry->d_inode; 1011 struct inode *inode = dentry->d_inode;
1007 struct gfs2_inode *ip = GFS2_I(inode); 1012 struct gfs2_inode *ip = GFS2_I(inode);
1008 int error; 1013 int error;
1009 1014
1010 error = gfs2_dir_del(dip, dentry); 1015 error = gfs2_dir_del(dip, dentry);
1011 if (error) 1016 if (error)
1012 return error; 1017 return error;
1013 1018
1014 ip->i_entries = 0; 1019 ip->i_entries = 0;
1015 inode->i_ctime = CURRENT_TIME; 1020 inode->i_ctime = CURRENT_TIME;
1016 if (S_ISDIR(inode->i_mode)) 1021 if (S_ISDIR(inode->i_mode))
1017 clear_nlink(inode); 1022 clear_nlink(inode);
1018 else 1023 else
1019 drop_nlink(inode); 1024 drop_nlink(inode);
1020 mark_inode_dirty(inode); 1025 mark_inode_dirty(inode);
1021 if (inode->i_nlink == 0) 1026 if (inode->i_nlink == 0)
1022 gfs2_unlink_di(inode); 1027 gfs2_unlink_di(inode);
1023 return 0; 1028 return 0;
1024 } 1029 }
1025 1030
1026 1031
1027 /** 1032 /**
1028 * gfs2_unlink - Unlink an inode (this does rmdir as well) 1033 * gfs2_unlink - Unlink an inode (this does rmdir as well)
1029 * @dir: The inode of the directory containing the inode to unlink 1034 * @dir: The inode of the directory containing the inode to unlink
1030 * @dentry: The file itself 1035 * @dentry: The file itself
1031 * 1036 *
1032 * This routine uses the type of the inode as a flag to figure out 1037 * This routine uses the type of the inode as a flag to figure out
1033 * whether this is an unlink or an rmdir. 1038 * whether this is an unlink or an rmdir.
1034 * 1039 *
1035 * Returns: errno 1040 * Returns: errno
1036 */ 1041 */
1037 1042
1038 static int gfs2_unlink(struct inode *dir, struct dentry *dentry) 1043 static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1039 { 1044 {
1040 struct gfs2_inode *dip = GFS2_I(dir); 1045 struct gfs2_inode *dip = GFS2_I(dir);
1041 struct gfs2_sbd *sdp = GFS2_SB(dir); 1046 struct gfs2_sbd *sdp = GFS2_SB(dir);
1042 struct inode *inode = dentry->d_inode; 1047 struct inode *inode = dentry->d_inode;
1043 struct gfs2_inode *ip = GFS2_I(inode); 1048 struct gfs2_inode *ip = GFS2_I(inode);
1044 struct buffer_head *bh; 1049 struct buffer_head *bh;
1045 struct gfs2_holder ghs[3]; 1050 struct gfs2_holder ghs[3];
1046 struct gfs2_rgrpd *rgd; 1051 struct gfs2_rgrpd *rgd;
1047 int error; 1052 int error;
1048 1053
1049 error = gfs2_rindex_update(sdp); 1054 error = gfs2_rindex_update(sdp);
1050 if (error) 1055 if (error)
1051 return error; 1056 return error;
1052 1057
1053 error = -EROFS; 1058 error = -EROFS;
1054 1059
1055 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 1060 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1056 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 1061 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
1057 1062
1058 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); 1063 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1059 if (!rgd) 1064 if (!rgd)
1060 goto out_inodes; 1065 goto out_inodes;
1061 1066
1062 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); 1067 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
1063 1068
1064 1069
1065 error = gfs2_glock_nq(ghs); /* parent */ 1070 error = gfs2_glock_nq(ghs); /* parent */
1066 if (error) 1071 if (error)
1067 goto out_parent; 1072 goto out_parent;
1068 1073
1069 error = gfs2_glock_nq(ghs + 1); /* child */ 1074 error = gfs2_glock_nq(ghs + 1); /* child */
1070 if (error) 1075 if (error)
1071 goto out_child; 1076 goto out_child;
1072 1077
1073 error = -ENOENT; 1078 error = -ENOENT;
1074 if (inode->i_nlink == 0) 1079 if (inode->i_nlink == 0)
1075 goto out_rgrp; 1080 goto out_rgrp;
1076 1081
1077 if (S_ISDIR(inode->i_mode)) { 1082 if (S_ISDIR(inode->i_mode)) {
1078 error = -ENOTEMPTY; 1083 error = -ENOTEMPTY;
1079 if (ip->i_entries > 2 || inode->i_nlink > 2) 1084 if (ip->i_entries > 2 || inode->i_nlink > 2)
1080 goto out_rgrp; 1085 goto out_rgrp;
1081 } 1086 }
1082 1087
1083 error = gfs2_glock_nq(ghs + 2); /* rgrp */ 1088 error = gfs2_glock_nq(ghs + 2); /* rgrp */
1084 if (error) 1089 if (error)
1085 goto out_rgrp; 1090 goto out_rgrp;
1086 1091
1087 error = gfs2_unlink_ok(dip, &dentry->d_name, ip); 1092 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
1088 if (error) 1093 if (error)
1089 goto out_gunlock; 1094 goto out_gunlock;
1090 1095
1091 error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0); 1096 error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0);
1092 if (error) 1097 if (error)
1093 goto out_gunlock; 1098 goto out_gunlock;
1094 1099
1095 error = gfs2_meta_inode_buffer(ip, &bh); 1100 error = gfs2_meta_inode_buffer(ip, &bh);
1096 if (error) 1101 if (error)
1097 goto out_end_trans; 1102 goto out_end_trans;
1098 1103
1099 error = gfs2_unlink_inode(dip, dentry, bh); 1104 error = gfs2_unlink_inode(dip, dentry, bh);
1100 brelse(bh); 1105 brelse(bh);
1101 1106
1102 out_end_trans: 1107 out_end_trans:
1103 gfs2_trans_end(sdp); 1108 gfs2_trans_end(sdp);
1104 out_gunlock: 1109 out_gunlock:
1105 gfs2_glock_dq(ghs + 2); 1110 gfs2_glock_dq(ghs + 2);
1106 out_rgrp: 1111 out_rgrp:
1107 gfs2_glock_dq(ghs + 1); 1112 gfs2_glock_dq(ghs + 1);
1108 out_child: 1113 out_child:
1109 gfs2_glock_dq(ghs); 1114 gfs2_glock_dq(ghs);
1110 out_parent: 1115 out_parent:
1111 gfs2_holder_uninit(ghs + 2); 1116 gfs2_holder_uninit(ghs + 2);
1112 out_inodes: 1117 out_inodes:
1113 gfs2_holder_uninit(ghs + 1); 1118 gfs2_holder_uninit(ghs + 1);
1114 gfs2_holder_uninit(ghs); 1119 gfs2_holder_uninit(ghs);
1115 return error; 1120 return error;
1116 } 1121 }
1117 1122
1118 /** 1123 /**
1119 * gfs2_symlink - Create a symlink 1124 * gfs2_symlink - Create a symlink
1120 * @dir: The directory to create the symlink in 1125 * @dir: The directory to create the symlink in
1121 * @dentry: The dentry to put the symlink in 1126 * @dentry: The dentry to put the symlink in
1122 * @symname: The thing which the link points to 1127 * @symname: The thing which the link points to
1123 * 1128 *
1124 * Returns: errno 1129 * Returns: errno
1125 */ 1130 */
1126 1131
1127 static int gfs2_symlink(struct inode *dir, struct dentry *dentry, 1132 static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
1128 const char *symname) 1133 const char *symname)
1129 { 1134 {
1130 struct gfs2_sbd *sdp = GFS2_SB(dir); 1135 struct gfs2_sbd *sdp = GFS2_SB(dir);
1131 unsigned int size; 1136 unsigned int size;
1132 1137
1133 size = strlen(symname); 1138 size = strlen(symname);
1134 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) 1139 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
1135 return -ENAMETOOLONG; 1140 return -ENAMETOOLONG;
1136 1141
1137 return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size, 0); 1142 return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size, 0);
1138 } 1143 }
1139 1144
1140 /** 1145 /**
1141 * gfs2_mkdir - Make a directory 1146 * gfs2_mkdir - Make a directory
1142 * @dir: The parent directory of the new one 1147 * @dir: The parent directory of the new one
1143 * @dentry: The dentry of the new directory 1148 * @dentry: The dentry of the new directory
1144 * @mode: The mode of the new directory 1149 * @mode: The mode of the new directory
1145 * 1150 *
1146 * Returns: errno 1151 * Returns: errno
1147 */ 1152 */
1148 1153
1149 static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 1154 static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1150 { 1155 {
1151 return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0); 1156 return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0);
1152 } 1157 }
1153 1158
1154 /** 1159 /**
1155 * gfs2_mknod - Make a special file 1160 * gfs2_mknod - Make a special file
1156 * @dir: The directory in which the special file will reside 1161 * @dir: The directory in which the special file will reside
1157 * @dentry: The dentry of the special file 1162 * @dentry: The dentry of the special file
1158 * @mode: The mode of the special file 1163 * @mode: The mode of the special file
1159 * @dev: The device specification of the special file 1164 * @dev: The device specification of the special file
1160 * 1165 *
1161 */ 1166 */
1162 1167
1163 static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, 1168 static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
1164 dev_t dev) 1169 dev_t dev)
1165 { 1170 {
1166 return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0); 1171 return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0);
1167 } 1172 }
1168 1173
1169 /* 1174 /*
1170 * gfs2_ok_to_move - check if it's ok to move a directory to another directory 1175 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1171 * @this: move this 1176 * @this: move this
1172 * @to: to here 1177 * @to: to here
1173 * 1178 *
1174 * Follow @to back to the root and make sure we don't encounter @this 1179 * Follow @to back to the root and make sure we don't encounter @this
1175 * Assumes we already hold the rename lock. 1180 * Assumes we already hold the rename lock.
1176 * 1181 *
1177 * Returns: errno 1182 * Returns: errno
1178 */ 1183 */
1179 1184
1180 static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) 1185 static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1181 { 1186 {
1182 struct inode *dir = &to->i_inode; 1187 struct inode *dir = &to->i_inode;
1183 struct super_block *sb = dir->i_sb; 1188 struct super_block *sb = dir->i_sb;
1184 struct inode *tmp; 1189 struct inode *tmp;
1185 int error = 0; 1190 int error = 0;
1186 1191
1187 igrab(dir); 1192 igrab(dir);
1188 1193
1189 for (;;) { 1194 for (;;) {
1190 if (dir == &this->i_inode) { 1195 if (dir == &this->i_inode) {
1191 error = -EINVAL; 1196 error = -EINVAL;
1192 break; 1197 break;
1193 } 1198 }
1194 if (dir == sb->s_root->d_inode) { 1199 if (dir == sb->s_root->d_inode) {
1195 error = 0; 1200 error = 0;
1196 break; 1201 break;
1197 } 1202 }
1198 1203
1199 tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); 1204 tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
1200 if (IS_ERR(tmp)) { 1205 if (IS_ERR(tmp)) {
1201 error = PTR_ERR(tmp); 1206 error = PTR_ERR(tmp);
1202 break; 1207 break;
1203 } 1208 }
1204 1209
1205 iput(dir); 1210 iput(dir);
1206 dir = tmp; 1211 dir = tmp;
1207 } 1212 }
1208 1213
1209 iput(dir); 1214 iput(dir);
1210 1215
1211 return error; 1216 return error;
1212 } 1217 }
1213 1218
1214 /** 1219 /**
1215 * gfs2_rename - Rename a file 1220 * gfs2_rename - Rename a file
1216 * @odir: Parent directory of old file name 1221 * @odir: Parent directory of old file name
1217 * @odentry: The old dentry of the file 1222 * @odentry: The old dentry of the file
1218 * @ndir: Parent directory of new file name 1223 * @ndir: Parent directory of new file name
1219 * @ndentry: The new dentry of the file 1224 * @ndentry: The new dentry of the file
1220 * 1225 *
1221 * Returns: errno 1226 * Returns: errno
1222 */ 1227 */
1223 1228
1224 static int gfs2_rename(struct inode *odir, struct dentry *odentry, 1229 static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1225 struct inode *ndir, struct dentry *ndentry) 1230 struct inode *ndir, struct dentry *ndentry)
1226 { 1231 {
1227 struct gfs2_inode *odip = GFS2_I(odir); 1232 struct gfs2_inode *odip = GFS2_I(odir);
1228 struct gfs2_inode *ndip = GFS2_I(ndir); 1233 struct gfs2_inode *ndip = GFS2_I(ndir);
1229 struct gfs2_inode *ip = GFS2_I(odentry->d_inode); 1234 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
1230 struct gfs2_inode *nip = NULL; 1235 struct gfs2_inode *nip = NULL;
1231 struct gfs2_sbd *sdp = GFS2_SB(odir); 1236 struct gfs2_sbd *sdp = GFS2_SB(odir);
1232 struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; 1237 struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
1233 struct gfs2_rgrpd *nrgd; 1238 struct gfs2_rgrpd *nrgd;
1234 unsigned int num_gh; 1239 unsigned int num_gh;
1235 int dir_rename = 0; 1240 int dir_rename = 0;
1236 int alloc_required = 0; 1241 int alloc_required = 0;
1237 unsigned int x; 1242 unsigned int x;
1238 int error; 1243 int error;
1239 1244
1240 if (ndentry->d_inode) { 1245 if (ndentry->d_inode) {
1241 nip = GFS2_I(ndentry->d_inode); 1246 nip = GFS2_I(ndentry->d_inode);
1242 if (ip == nip) 1247 if (ip == nip)
1243 return 0; 1248 return 0;
1244 } 1249 }
1245 1250
1246 error = gfs2_rindex_update(sdp); 1251 error = gfs2_rindex_update(sdp);
1247 if (error) 1252 if (error)
1248 return error; 1253 return error;
1249 1254
1250 error = gfs2_rs_alloc(ndip); 1255 error = gfs2_rs_alloc(ndip);
1251 if (error) 1256 if (error)
1252 return error; 1257 return error;
1253 1258
1254 if (odip != ndip) { 1259 if (odip != ndip) {
1255 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 1260 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
1256 0, &r_gh); 1261 0, &r_gh);
1257 if (error) 1262 if (error)
1258 goto out; 1263 goto out;
1259 1264
1260 if (S_ISDIR(ip->i_inode.i_mode)) { 1265 if (S_ISDIR(ip->i_inode.i_mode)) {
1261 dir_rename = 1; 1266 dir_rename = 1;
1262 /* don't move a dirctory into it's subdir */ 1267 /* don't move a dirctory into it's subdir */
1263 error = gfs2_ok_to_move(ip, ndip); 1268 error = gfs2_ok_to_move(ip, ndip);
1264 if (error) 1269 if (error)
1265 goto out_gunlock_r; 1270 goto out_gunlock_r;
1266 } 1271 }
1267 } 1272 }
1268 1273
1269 num_gh = 1; 1274 num_gh = 1;
1270 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 1275 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1271 if (odip != ndip) { 1276 if (odip != ndip) {
1272 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); 1277 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1273 num_gh++; 1278 num_gh++;
1274 } 1279 }
1275 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); 1280 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1276 num_gh++; 1281 num_gh++;
1277 1282
1278 if (nip) { 1283 if (nip) {
1279 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh); 1284 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1280 num_gh++; 1285 num_gh++;
1281 /* grab the resource lock for unlink flag twiddling 1286 /* grab the resource lock for unlink flag twiddling
1282 * this is the case of the target file already existing 1287 * this is the case of the target file already existing
1283 * so we unlink before doing the rename 1288 * so we unlink before doing the rename
1284 */ 1289 */
1285 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr, 1); 1290 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr, 1);
1286 if (nrgd) 1291 if (nrgd)
1287 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); 1292 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
1288 } 1293 }
1289 1294
1290 for (x = 0; x < num_gh; x++) { 1295 for (x = 0; x < num_gh; x++) {
1291 error = gfs2_glock_nq(ghs + x); 1296 error = gfs2_glock_nq(ghs + x);
1292 if (error) 1297 if (error)
1293 goto out_gunlock; 1298 goto out_gunlock;
1294 } 1299 }
1295 1300
1296 error = -ENOENT; 1301 error = -ENOENT;
1297 if (ip->i_inode.i_nlink == 0) 1302 if (ip->i_inode.i_nlink == 0)
1298 goto out_gunlock; 1303 goto out_gunlock;
1299 1304
1300 /* Check out the old directory */ 1305 /* Check out the old directory */
1301 1306
1302 error = gfs2_unlink_ok(odip, &odentry->d_name, ip); 1307 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
1303 if (error) 1308 if (error)
1304 goto out_gunlock; 1309 goto out_gunlock;
1305 1310
1306 /* Check out the new directory */ 1311 /* Check out the new directory */
1307 1312
1308 if (nip) { 1313 if (nip) {
1309 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip); 1314 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
1310 if (error) 1315 if (error)
1311 goto out_gunlock; 1316 goto out_gunlock;
1312 1317
1313 if (nip->i_inode.i_nlink == 0) { 1318 if (nip->i_inode.i_nlink == 0) {
1314 error = -EAGAIN; 1319 error = -EAGAIN;
1315 goto out_gunlock; 1320 goto out_gunlock;
1316 } 1321 }
1317 1322
1318 if (S_ISDIR(nip->i_inode.i_mode)) { 1323 if (S_ISDIR(nip->i_inode.i_mode)) {
1319 if (nip->i_entries < 2) { 1324 if (nip->i_entries < 2) {
1320 gfs2_consist_inode(nip); 1325 gfs2_consist_inode(nip);
1321 error = -EIO; 1326 error = -EIO;
1322 goto out_gunlock; 1327 goto out_gunlock;
1323 } 1328 }
1324 if (nip->i_entries > 2) { 1329 if (nip->i_entries > 2) {
1325 error = -ENOTEMPTY; 1330 error = -ENOTEMPTY;
1326 goto out_gunlock; 1331 goto out_gunlock;
1327 } 1332 }
1328 } 1333 }
1329 } else { 1334 } else {
1330 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC); 1335 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC);
1331 if (error) 1336 if (error)
1332 goto out_gunlock; 1337 goto out_gunlock;
1333 1338
1334 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL); 1339 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
1335 switch (error) { 1340 switch (error) {
1336 case -ENOENT: 1341 case -ENOENT:
1337 error = 0; 1342 error = 0;
1338 break; 1343 break;
1339 case 0: 1344 case 0:
1340 error = -EEXIST; 1345 error = -EEXIST;
1341 default: 1346 default:
1342 goto out_gunlock; 1347 goto out_gunlock;
1343 }; 1348 };
1344 1349
1345 if (odip != ndip) { 1350 if (odip != ndip) {
1346 if (!ndip->i_inode.i_nlink) { 1351 if (!ndip->i_inode.i_nlink) {
1347 error = -ENOENT; 1352 error = -ENOENT;
1348 goto out_gunlock; 1353 goto out_gunlock;
1349 } 1354 }
1350 if (ndip->i_entries == (u32)-1) { 1355 if (ndip->i_entries == (u32)-1) {
1351 error = -EFBIG; 1356 error = -EFBIG;
1352 goto out_gunlock; 1357 goto out_gunlock;
1353 } 1358 }
1354 if (S_ISDIR(ip->i_inode.i_mode) && 1359 if (S_ISDIR(ip->i_inode.i_mode) &&
1355 ndip->i_inode.i_nlink == (u32)-1) { 1360 ndip->i_inode.i_nlink == (u32)-1) {
1356 error = -EMLINK; 1361 error = -EMLINK;
1357 goto out_gunlock; 1362 goto out_gunlock;
1358 } 1363 }
1359 } 1364 }
1360 } 1365 }
1361 1366
1362 /* Check out the dir to be renamed */ 1367 /* Check out the dir to be renamed */
1363 1368
1364 if (dir_rename) { 1369 if (dir_rename) {
1365 error = gfs2_permission(odentry->d_inode, MAY_WRITE); 1370 error = gfs2_permission(odentry->d_inode, MAY_WRITE);
1366 if (error) 1371 if (error)
1367 goto out_gunlock; 1372 goto out_gunlock;
1368 } 1373 }
1369 1374
1370 if (nip == NULL) 1375 if (nip == NULL)
1371 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); 1376 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
1372 error = alloc_required; 1377 error = alloc_required;
1373 if (error < 0) 1378 if (error < 0)
1374 goto out_gunlock; 1379 goto out_gunlock;
1375 1380
1376 if (alloc_required) { 1381 if (alloc_required) {
1377 error = gfs2_quota_lock_check(ndip); 1382 error = gfs2_quota_lock_check(ndip);
1378 if (error) 1383 if (error)
1379 goto out_gunlock; 1384 goto out_gunlock;
1380 1385
1381 error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres); 1386 error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0);
1382 if (error) 1387 if (error)
1383 goto out_gunlock_q; 1388 goto out_gunlock_q;
1384 1389
1385 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 1390 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
1386 gfs2_rg_blocks(ndip, sdp->sd_max_dirres) + 1391 gfs2_rg_blocks(ndip, sdp->sd_max_dirres) +
1387 4 * RES_DINODE + 4 * RES_LEAF + 1392 4 * RES_DINODE + 4 * RES_LEAF +
1388 RES_STATFS + RES_QUOTA + 4, 0); 1393 RES_STATFS + RES_QUOTA + 4, 0);
1389 if (error) 1394 if (error)
1390 goto out_ipreserv; 1395 goto out_ipreserv;
1391 } else { 1396 } else {
1392 error = gfs2_trans_begin(sdp, 4 * RES_DINODE + 1397 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
1393 5 * RES_LEAF + 4, 0); 1398 5 * RES_LEAF + 4, 0);
1394 if (error) 1399 if (error)
1395 goto out_gunlock; 1400 goto out_gunlock;
1396 } 1401 }
1397 1402
1398 /* Remove the target file, if it exists */ 1403 /* Remove the target file, if it exists */
1399 1404
1400 if (nip) { 1405 if (nip) {
1401 struct buffer_head *bh; 1406 struct buffer_head *bh;
1402 error = gfs2_meta_inode_buffer(nip, &bh); 1407 error = gfs2_meta_inode_buffer(nip, &bh);
1403 if (error) 1408 if (error)
1404 goto out_end_trans; 1409 goto out_end_trans;
1405 error = gfs2_unlink_inode(ndip, ndentry, bh); 1410 error = gfs2_unlink_inode(ndip, ndentry, bh);
1406 brelse(bh); 1411 brelse(bh);
1407 } 1412 }
1408 1413
1409 if (dir_rename) { 1414 if (dir_rename) {
1410 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); 1415 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
1411 if (error) 1416 if (error)
1412 goto out_end_trans; 1417 goto out_end_trans;
1413 } else { 1418 } else {
1414 struct buffer_head *dibh; 1419 struct buffer_head *dibh;
1415 error = gfs2_meta_inode_buffer(ip, &dibh); 1420 error = gfs2_meta_inode_buffer(ip, &dibh);
1416 if (error) 1421 if (error)
1417 goto out_end_trans; 1422 goto out_end_trans;
1418 ip->i_inode.i_ctime = CURRENT_TIME; 1423 ip->i_inode.i_ctime = CURRENT_TIME;
1419 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1424 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1420 gfs2_dinode_out(ip, dibh->b_data); 1425 gfs2_dinode_out(ip, dibh->b_data);
1421 brelse(dibh); 1426 brelse(dibh);
1422 } 1427 }
1423 1428
1424 error = gfs2_dir_del(odip, odentry); 1429 error = gfs2_dir_del(odip, odentry);
1425 if (error) 1430 if (error)
1426 goto out_end_trans; 1431 goto out_end_trans;
1427 1432
1428 error = gfs2_dir_add(ndir, &ndentry->d_name, ip); 1433 error = gfs2_dir_add(ndir, &ndentry->d_name, ip);
1429 if (error) 1434 if (error)
1430 goto out_end_trans; 1435 goto out_end_trans;
1431 1436
1432 out_end_trans: 1437 out_end_trans:
1433 gfs2_trans_end(sdp); 1438 gfs2_trans_end(sdp);
1434 out_ipreserv: 1439 out_ipreserv:
1435 if (alloc_required) 1440 if (alloc_required)
1436 gfs2_inplace_release(ndip); 1441 gfs2_inplace_release(ndip);
1437 out_gunlock_q: 1442 out_gunlock_q:
1438 if (alloc_required) 1443 if (alloc_required)
1439 gfs2_quota_unlock(ndip); 1444 gfs2_quota_unlock(ndip);
1440 out_gunlock: 1445 out_gunlock:
1441 while (x--) { 1446 while (x--) {
1442 gfs2_glock_dq(ghs + x); 1447 gfs2_glock_dq(ghs + x);
1443 gfs2_holder_uninit(ghs + x); 1448 gfs2_holder_uninit(ghs + x);
1444 } 1449 }
1445 out_gunlock_r: 1450 out_gunlock_r:
1446 if (r_gh.gh_gl) 1451 if (r_gh.gh_gl)
1447 gfs2_glock_dq_uninit(&r_gh); 1452 gfs2_glock_dq_uninit(&r_gh);
1448 out: 1453 out:
1449 return error; 1454 return error;
1450 } 1455 }
1451 1456
1452 /** 1457 /**
1453 * gfs2_follow_link - Follow a symbolic link 1458 * gfs2_follow_link - Follow a symbolic link
1454 * @dentry: The dentry of the link 1459 * @dentry: The dentry of the link
1455 * @nd: Data that we pass to vfs_follow_link() 1460 * @nd: Data that we pass to vfs_follow_link()
1456 * 1461 *
1457 * This can handle symlinks of any size. 1462 * This can handle symlinks of any size.
1458 * 1463 *
1459 * Returns: 0 on success or error code 1464 * Returns: 0 on success or error code
1460 */ 1465 */
1461 1466
1462 static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) 1467 static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
1463 { 1468 {
1464 struct gfs2_inode *ip = GFS2_I(dentry->d_inode); 1469 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
1465 struct gfs2_holder i_gh; 1470 struct gfs2_holder i_gh;
1466 struct buffer_head *dibh; 1471 struct buffer_head *dibh;
1467 unsigned int size; 1472 unsigned int size;
1468 char *buf; 1473 char *buf;
1469 int error; 1474 int error;
1470 1475
1471 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); 1476 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
1472 error = gfs2_glock_nq(&i_gh); 1477 error = gfs2_glock_nq(&i_gh);
1473 if (error) { 1478 if (error) {
1474 gfs2_holder_uninit(&i_gh); 1479 gfs2_holder_uninit(&i_gh);
1475 nd_set_link(nd, ERR_PTR(error)); 1480 nd_set_link(nd, ERR_PTR(error));
1476 return NULL; 1481 return NULL;
1477 } 1482 }
1478 1483
1479 size = (unsigned int)i_size_read(&ip->i_inode); 1484 size = (unsigned int)i_size_read(&ip->i_inode);
1480 if (size == 0) { 1485 if (size == 0) {
1481 gfs2_consist_inode(ip); 1486 gfs2_consist_inode(ip);
1482 buf = ERR_PTR(-EIO); 1487 buf = ERR_PTR(-EIO);
1483 goto out; 1488 goto out;
1484 } 1489 }
1485 1490
1486 error = gfs2_meta_inode_buffer(ip, &dibh); 1491 error = gfs2_meta_inode_buffer(ip, &dibh);
1487 if (error) { 1492 if (error) {
1488 buf = ERR_PTR(error); 1493 buf = ERR_PTR(error);
1489 goto out; 1494 goto out;
1490 } 1495 }
1491 1496
1492 buf = kzalloc(size + 1, GFP_NOFS); 1497 buf = kzalloc(size + 1, GFP_NOFS);
1493 if (!buf) 1498 if (!buf)
1494 buf = ERR_PTR(-ENOMEM); 1499 buf = ERR_PTR(-ENOMEM);
1495 else 1500 else
1496 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size); 1501 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
1497 brelse(dibh); 1502 brelse(dibh);
1498 out: 1503 out:
1499 gfs2_glock_dq_uninit(&i_gh); 1504 gfs2_glock_dq_uninit(&i_gh);
1500 nd_set_link(nd, buf); 1505 nd_set_link(nd, buf);
1501 return NULL; 1506 return NULL;
1502 } 1507 }
1503 1508
1504 static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p) 1509 static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1505 { 1510 {
1506 char *s = nd_get_link(nd); 1511 char *s = nd_get_link(nd);
1507 if (!IS_ERR(s)) 1512 if (!IS_ERR(s))
1508 kfree(s); 1513 kfree(s);
1509 } 1514 }
1510 1515
1511 /** 1516 /**
1512 * gfs2_permission - 1517 * gfs2_permission -
1513 * @inode: The inode 1518 * @inode: The inode
1514 * @mask: The mask to be tested 1519 * @mask: The mask to be tested
1515 * @flags: Indicates whether this is an RCU path walk or not 1520 * @flags: Indicates whether this is an RCU path walk or not
1516 * 1521 *
1517 * This may be called from the VFS directly, or from within GFS2 with the 1522 * This may be called from the VFS directly, or from within GFS2 with the
1518 * inode locked, so we look to see if the glock is already locked and only 1523 * inode locked, so we look to see if the glock is already locked and only
1519 * lock the glock if its not already been done. 1524 * lock the glock if its not already been done.
1520 * 1525 *
1521 * Returns: errno 1526 * Returns: errno
1522 */ 1527 */
1523 1528
1524 int gfs2_permission(struct inode *inode, int mask) 1529 int gfs2_permission(struct inode *inode, int mask)
1525 { 1530 {
1526 struct gfs2_inode *ip; 1531 struct gfs2_inode *ip;
1527 struct gfs2_holder i_gh; 1532 struct gfs2_holder i_gh;
1528 int error; 1533 int error;
1529 int unlock = 0; 1534 int unlock = 0;
1530 1535
1531 1536
1532 ip = GFS2_I(inode); 1537 ip = GFS2_I(inode);
1533 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { 1538 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1534 if (mask & MAY_NOT_BLOCK) 1539 if (mask & MAY_NOT_BLOCK)
1535 return -ECHILD; 1540 return -ECHILD;
1536 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 1541 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1537 if (error) 1542 if (error)
1538 return error; 1543 return error;
1539 unlock = 1; 1544 unlock = 1;
1540 } 1545 }
1541 1546
1542 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) 1547 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1543 error = -EACCES; 1548 error = -EACCES;
1544 else 1549 else
1545 error = generic_permission(inode, mask); 1550 error = generic_permission(inode, mask);
1546 if (unlock) 1551 if (unlock)
1547 gfs2_glock_dq_uninit(&i_gh); 1552 gfs2_glock_dq_uninit(&i_gh);
1548 1553
1549 return error; 1554 return error;
1550 } 1555 }
1551 1556
1552 static int __gfs2_setattr_simple(struct inode *inode, struct iattr *attr) 1557 static int __gfs2_setattr_simple(struct inode *inode, struct iattr *attr)
1553 { 1558 {
1554 setattr_copy(inode, attr); 1559 setattr_copy(inode, attr);
1555 mark_inode_dirty(inode); 1560 mark_inode_dirty(inode);
1556 return 0; 1561 return 0;
1557 } 1562 }
1558 1563
1559 /** 1564 /**
1560 * gfs2_setattr_simple - 1565 * gfs2_setattr_simple -
1561 * @ip: 1566 * @ip:
1562 * @attr: 1567 * @attr:
1563 * 1568 *
1564 * Returns: errno 1569 * Returns: errno
1565 */ 1570 */
1566 1571
1567 int gfs2_setattr_simple(struct inode *inode, struct iattr *attr) 1572 int gfs2_setattr_simple(struct inode *inode, struct iattr *attr)
1568 { 1573 {
1569 int error; 1574 int error;
1570 1575
1571 if (current->journal_info) 1576 if (current->journal_info)
1572 return __gfs2_setattr_simple(inode, attr); 1577 return __gfs2_setattr_simple(inode, attr);
1573 1578
1574 error = gfs2_trans_begin(GFS2_SB(inode), RES_DINODE, 0); 1579 error = gfs2_trans_begin(GFS2_SB(inode), RES_DINODE, 0);
1575 if (error) 1580 if (error)
1576 return error; 1581 return error;
1577 1582
1578 error = __gfs2_setattr_simple(inode, attr); 1583 error = __gfs2_setattr_simple(inode, attr);
1579 gfs2_trans_end(GFS2_SB(inode)); 1584 gfs2_trans_end(GFS2_SB(inode));
1580 return error; 1585 return error;
1581 } 1586 }
1582 1587
1583 static int setattr_chown(struct inode *inode, struct iattr *attr) 1588 static int setattr_chown(struct inode *inode, struct iattr *attr)
1584 { 1589 {
1585 struct gfs2_inode *ip = GFS2_I(inode); 1590 struct gfs2_inode *ip = GFS2_I(inode);
1586 struct gfs2_sbd *sdp = GFS2_SB(inode); 1591 struct gfs2_sbd *sdp = GFS2_SB(inode);
1587 u32 ouid, ogid, nuid, ngid; 1592 u32 ouid, ogid, nuid, ngid;
1588 int error; 1593 int error;
1589 1594
1590 ouid = inode->i_uid; 1595 ouid = inode->i_uid;
1591 ogid = inode->i_gid; 1596 ogid = inode->i_gid;
1592 nuid = attr->ia_uid; 1597 nuid = attr->ia_uid;
1593 ngid = attr->ia_gid; 1598 ngid = attr->ia_gid;
1594 1599
1595 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid) 1600 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
1596 ouid = nuid = NO_QUOTA_CHANGE; 1601 ouid = nuid = NO_QUOTA_CHANGE;
1597 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) 1602 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
1598 ogid = ngid = NO_QUOTA_CHANGE; 1603 ogid = ngid = NO_QUOTA_CHANGE;
1599 1604
1600 error = gfs2_quota_lock(ip, nuid, ngid); 1605 error = gfs2_quota_lock(ip, nuid, ngid);
1601 if (error) 1606 if (error)
1602 return error; 1607 return error;
1603 1608
1604 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { 1609 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1605 error = gfs2_quota_check(ip, nuid, ngid); 1610 error = gfs2_quota_check(ip, nuid, ngid);
1606 if (error) 1611 if (error)
1607 goto out_gunlock_q; 1612 goto out_gunlock_q;
1608 } 1613 }
1609 1614
1610 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0); 1615 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
1611 if (error) 1616 if (error)
1612 goto out_gunlock_q; 1617 goto out_gunlock_q;
1613 1618
1614 error = gfs2_setattr_simple(inode, attr); 1619 error = gfs2_setattr_simple(inode, attr);
1615 if (error) 1620 if (error)
1616 goto out_end_trans; 1621 goto out_end_trans;
1617 1622
1618 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { 1623 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1619 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); 1624 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
1620 gfs2_quota_change(ip, -blocks, ouid, ogid); 1625 gfs2_quota_change(ip, -blocks, ouid, ogid);
1621 gfs2_quota_change(ip, blocks, nuid, ngid); 1626 gfs2_quota_change(ip, blocks, nuid, ngid);
1622 } 1627 }
1623 1628
1624 out_end_trans: 1629 out_end_trans:
1625 gfs2_trans_end(sdp); 1630 gfs2_trans_end(sdp);
1626 out_gunlock_q: 1631 out_gunlock_q:
1627 gfs2_quota_unlock(ip); 1632 gfs2_quota_unlock(ip);
1628 return error; 1633 return error;
1629 } 1634 }
1630 1635
1631 /** 1636 /**
1632 * gfs2_setattr - Change attributes on an inode 1637 * gfs2_setattr - Change attributes on an inode
1633 * @dentry: The dentry which is changing 1638 * @dentry: The dentry which is changing
1634 * @attr: The structure describing the change 1639 * @attr: The structure describing the change
1635 * 1640 *
1636 * The VFS layer wants to change one or more of an inodes attributes. Write 1641 * The VFS layer wants to change one or more of an inodes attributes. Write
1637 * that change out to disk. 1642 * that change out to disk.
1638 * 1643 *
1639 * Returns: errno 1644 * Returns: errno
1640 */ 1645 */
1641 1646
1642 static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) 1647 static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1643 { 1648 {
1644 struct inode *inode = dentry->d_inode; 1649 struct inode *inode = dentry->d_inode;
1645 struct gfs2_inode *ip = GFS2_I(inode); 1650 struct gfs2_inode *ip = GFS2_I(inode);
1646 struct gfs2_holder i_gh; 1651 struct gfs2_holder i_gh;
1647 int error; 1652 int error;
1648 1653
1649 error = gfs2_rs_alloc(ip); 1654 error = gfs2_rs_alloc(ip);
1650 if (error) 1655 if (error)
1651 return error; 1656 return error;
1652 1657
1653 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); 1658 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1654 if (error) 1659 if (error)
1655 return error; 1660 return error;
1656 1661
1657 error = -EPERM; 1662 error = -EPERM;
1658 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 1663 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1659 goto out; 1664 goto out;
1660 1665
1661 error = inode_change_ok(inode, attr); 1666 error = inode_change_ok(inode, attr);
1662 if (error) 1667 if (error)
1663 goto out; 1668 goto out;
1664 1669
1665 if (attr->ia_valid & ATTR_SIZE) 1670 if (attr->ia_valid & ATTR_SIZE)
1666 error = gfs2_setattr_size(inode, attr->ia_size); 1671 error = gfs2_setattr_size(inode, attr->ia_size);
1667 else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) 1672 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1668 error = setattr_chown(inode, attr); 1673 error = setattr_chown(inode, attr);
1669 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) 1674 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1670 error = gfs2_acl_chmod(ip, attr); 1675 error = gfs2_acl_chmod(ip, attr);
1671 else 1676 else
1672 error = gfs2_setattr_simple(inode, attr); 1677 error = gfs2_setattr_simple(inode, attr);
1673 1678
1674 out: 1679 out:
1675 if (!error) 1680 if (!error)
1676 mark_inode_dirty(inode); 1681 mark_inode_dirty(inode);
1677 gfs2_glock_dq_uninit(&i_gh); 1682 gfs2_glock_dq_uninit(&i_gh);
1678 return error; 1683 return error;
1679 } 1684 }
1680 1685
1681 /** 1686 /**
1682 * gfs2_getattr - Read out an inode's attributes 1687 * gfs2_getattr - Read out an inode's attributes
1683 * @mnt: The vfsmount the inode is being accessed from 1688 * @mnt: The vfsmount the inode is being accessed from
1684 * @dentry: The dentry to stat 1689 * @dentry: The dentry to stat
1685 * @stat: The inode's stats 1690 * @stat: The inode's stats
1686 * 1691 *
1687 * This may be called from the VFS directly, or from within GFS2 with the 1692 * This may be called from the VFS directly, or from within GFS2 with the
1688 * inode locked, so we look to see if the glock is already locked and only 1693 * inode locked, so we look to see if the glock is already locked and only
1689 * lock the glock if its not already been done. Note that its the NFS 1694 * lock the glock if its not already been done. Note that its the NFS
1690 * readdirplus operation which causes this to be called (from filldir) 1695 * readdirplus operation which causes this to be called (from filldir)
1691 * with the glock already held. 1696 * with the glock already held.
1692 * 1697 *
1693 * Returns: errno 1698 * Returns: errno
1694 */ 1699 */
1695 1700
1696 static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 1701 static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1697 struct kstat *stat) 1702 struct kstat *stat)
1698 { 1703 {
1699 struct inode *inode = dentry->d_inode; 1704 struct inode *inode = dentry->d_inode;
1700 struct gfs2_inode *ip = GFS2_I(inode); 1705 struct gfs2_inode *ip = GFS2_I(inode);
1701 struct gfs2_holder gh; 1706 struct gfs2_holder gh;
1702 int error; 1707 int error;
1703 int unlock = 0; 1708 int unlock = 0;
1704 1709
1705 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { 1710 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1706 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); 1711 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1707 if (error) 1712 if (error)
1708 return error; 1713 return error;
1709 unlock = 1; 1714 unlock = 1;
1710 } 1715 }
1711 1716
1712 generic_fillattr(inode, stat); 1717 generic_fillattr(inode, stat);
1713 if (unlock) 1718 if (unlock)
1714 gfs2_glock_dq_uninit(&gh); 1719 gfs2_glock_dq_uninit(&gh);
1715 1720
1716 return 0; 1721 return 0;
1717 } 1722 }
1718 1723
1719 static int gfs2_setxattr(struct dentry *dentry, const char *name, 1724 static int gfs2_setxattr(struct dentry *dentry, const char *name,
1720 const void *data, size_t size, int flags) 1725 const void *data, size_t size, int flags)
1721 { 1726 {
1722 struct inode *inode = dentry->d_inode; 1727 struct inode *inode = dentry->d_inode;
1723 struct gfs2_inode *ip = GFS2_I(inode); 1728 struct gfs2_inode *ip = GFS2_I(inode);
1724 struct gfs2_holder gh; 1729 struct gfs2_holder gh;
1725 int ret; 1730 int ret;
1726 1731
1727 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 1732 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1728 ret = gfs2_glock_nq(&gh); 1733 ret = gfs2_glock_nq(&gh);
1729 if (ret == 0) { 1734 if (ret == 0) {
1730 ret = gfs2_rs_alloc(ip); 1735 ret = gfs2_rs_alloc(ip);
1731 if (ret == 0) 1736 if (ret == 0)
1732 ret = generic_setxattr(dentry, name, data, size, flags); 1737 ret = generic_setxattr(dentry, name, data, size, flags);
1733 gfs2_glock_dq(&gh); 1738 gfs2_glock_dq(&gh);
1734 } 1739 }
1735 gfs2_holder_uninit(&gh); 1740 gfs2_holder_uninit(&gh);
1736 return ret; 1741 return ret;
1737 } 1742 }
1738 1743
1739 static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, 1744 static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1740 void *data, size_t size) 1745 void *data, size_t size)
1741 { 1746 {
1742 struct inode *inode = dentry->d_inode; 1747 struct inode *inode = dentry->d_inode;
1743 struct gfs2_inode *ip = GFS2_I(inode); 1748 struct gfs2_inode *ip = GFS2_I(inode);
1744 struct gfs2_holder gh; 1749 struct gfs2_holder gh;
1745 int ret; 1750 int ret;
1746 1751
1747 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); 1752 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1748 ret = gfs2_glock_nq(&gh); 1753 ret = gfs2_glock_nq(&gh);
1749 if (ret == 0) { 1754 if (ret == 0) {
1750 ret = generic_getxattr(dentry, name, data, size); 1755 ret = generic_getxattr(dentry, name, data, size);
1751 gfs2_glock_dq(&gh); 1756 gfs2_glock_dq(&gh);
1752 } 1757 }
1753 gfs2_holder_uninit(&gh); 1758 gfs2_holder_uninit(&gh);
1754 return ret; 1759 return ret;
1755 } 1760 }
1756 1761
1757 static int gfs2_removexattr(struct dentry *dentry, const char *name) 1762 static int gfs2_removexattr(struct dentry *dentry, const char *name)
1758 { 1763 {
1759 struct inode *inode = dentry->d_inode; 1764 struct inode *inode = dentry->d_inode;
1760 struct gfs2_inode *ip = GFS2_I(inode); 1765 struct gfs2_inode *ip = GFS2_I(inode);
1761 struct gfs2_holder gh; 1766 struct gfs2_holder gh;
1762 int ret; 1767 int ret;
1763 1768
1764 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 1769 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1765 ret = gfs2_glock_nq(&gh); 1770 ret = gfs2_glock_nq(&gh);
1766 if (ret == 0) { 1771 if (ret == 0) {
1767 ret = gfs2_rs_alloc(ip); 1772 ret = gfs2_rs_alloc(ip);
1768 if (ret == 0) 1773 if (ret == 0)
1769 ret = generic_removexattr(dentry, name); 1774 ret = generic_removexattr(dentry, name);
1770 gfs2_glock_dq(&gh); 1775 gfs2_glock_dq(&gh);
1771 } 1776 }
1772 gfs2_holder_uninit(&gh); 1777 gfs2_holder_uninit(&gh);
1773 return ret; 1778 return ret;
1774 } 1779 }
1775 1780
1776 static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1781 static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1777 u64 start, u64 len) 1782 u64 start, u64 len)
1778 { 1783 {
1779 struct gfs2_inode *ip = GFS2_I(inode); 1784 struct gfs2_inode *ip = GFS2_I(inode);
1780 struct gfs2_holder gh; 1785 struct gfs2_holder gh;
1781 int ret; 1786 int ret;
1782 1787
1783 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); 1788 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
1784 if (ret) 1789 if (ret)
1785 return ret; 1790 return ret;
1786 1791
1787 mutex_lock(&inode->i_mutex); 1792 mutex_lock(&inode->i_mutex);
1788 1793
1789 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); 1794 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
1790 if (ret) 1795 if (ret)
1791 goto out; 1796 goto out;
1792 1797
1793 if (gfs2_is_stuffed(ip)) { 1798 if (gfs2_is_stuffed(ip)) {
1794 u64 phys = ip->i_no_addr << inode->i_blkbits; 1799 u64 phys = ip->i_no_addr << inode->i_blkbits;
1795 u64 size = i_size_read(inode); 1800 u64 size = i_size_read(inode);
1796 u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED| 1801 u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
1797 FIEMAP_EXTENT_DATA_INLINE; 1802 FIEMAP_EXTENT_DATA_INLINE;
1798 phys += sizeof(struct gfs2_dinode); 1803 phys += sizeof(struct gfs2_dinode);
1799 phys += start; 1804 phys += start;
1800 if (start + len > size) 1805 if (start + len > size)
1801 len = size - start; 1806 len = size - start;
1802 if (start < size) 1807 if (start < size)
1803 ret = fiemap_fill_next_extent(fieinfo, start, phys, 1808 ret = fiemap_fill_next_extent(fieinfo, start, phys,
1804 len, flags); 1809 len, flags);
1805 if (ret == 1) 1810 if (ret == 1)
1806 ret = 0; 1811 ret = 0;
1807 } else { 1812 } else {
1808 ret = __generic_block_fiemap(inode, fieinfo, start, len, 1813 ret = __generic_block_fiemap(inode, fieinfo, start, len,
1809 gfs2_block_map); 1814 gfs2_block_map);
1810 } 1815 }
1811 1816
1812 gfs2_glock_dq_uninit(&gh); 1817 gfs2_glock_dq_uninit(&gh);
1813 out: 1818 out:
1814 mutex_unlock(&inode->i_mutex); 1819 mutex_unlock(&inode->i_mutex);
1815 return ret; 1820 return ret;
1816 } 1821 }
1817 1822
1818 const struct inode_operations gfs2_file_iops = { 1823 const struct inode_operations gfs2_file_iops = {
1819 .permission = gfs2_permission, 1824 .permission = gfs2_permission,
1820 .setattr = gfs2_setattr, 1825 .setattr = gfs2_setattr,
1821 .getattr = gfs2_getattr, 1826 .getattr = gfs2_getattr,
1822 .setxattr = gfs2_setxattr, 1827 .setxattr = gfs2_setxattr,
1823 .getxattr = gfs2_getxattr, 1828 .getxattr = gfs2_getxattr,
1824 .listxattr = gfs2_listxattr, 1829 .listxattr = gfs2_listxattr,
1825 .removexattr = gfs2_removexattr, 1830 .removexattr = gfs2_removexattr,
1826 .fiemap = gfs2_fiemap, 1831 .fiemap = gfs2_fiemap,
1827 .get_acl = gfs2_get_acl, 1832 .get_acl = gfs2_get_acl,
1828 }; 1833 };
1829 1834
1830 const struct inode_operations gfs2_dir_iops = { 1835 const struct inode_operations gfs2_dir_iops = {
1831 .create = gfs2_create, 1836 .create = gfs2_create,
1832 .lookup = gfs2_lookup, 1837 .lookup = gfs2_lookup,
1833 .link = gfs2_link, 1838 .link = gfs2_link,
1834 .unlink = gfs2_unlink, 1839 .unlink = gfs2_unlink,
1835 .symlink = gfs2_symlink, 1840 .symlink = gfs2_symlink,
1836 .mkdir = gfs2_mkdir, 1841 .mkdir = gfs2_mkdir,
1837 .rmdir = gfs2_unlink, 1842 .rmdir = gfs2_unlink,
1838 .mknod = gfs2_mknod, 1843 .mknod = gfs2_mknod,
1839 .rename = gfs2_rename, 1844 .rename = gfs2_rename,
1840 .permission = gfs2_permission, 1845 .permission = gfs2_permission,
1841 .setattr = gfs2_setattr, 1846 .setattr = gfs2_setattr,
1842 .getattr = gfs2_getattr, 1847 .getattr = gfs2_getattr,
1843 .setxattr = gfs2_setxattr, 1848 .setxattr = gfs2_setxattr,
1844 .getxattr = gfs2_getxattr, 1849 .getxattr = gfs2_getxattr,
1845 .listxattr = gfs2_listxattr, 1850 .listxattr = gfs2_listxattr,
1846 .removexattr = gfs2_removexattr, 1851 .removexattr = gfs2_removexattr,
1847 .fiemap = gfs2_fiemap, 1852 .fiemap = gfs2_fiemap,
1848 .get_acl = gfs2_get_acl, 1853 .get_acl = gfs2_get_acl,
1849 }; 1854 };
1850 1855
1851 const struct inode_operations gfs2_symlink_iops = { 1856 const struct inode_operations gfs2_symlink_iops = {
1852 .readlink = generic_readlink, 1857 .readlink = generic_readlink,
1853 .follow_link = gfs2_follow_link, 1858 .follow_link = gfs2_follow_link,
1854 .put_link = gfs2_put_link, 1859 .put_link = gfs2_put_link,
1855 .permission = gfs2_permission, 1860 .permission = gfs2_permission,
1856 .setattr = gfs2_setattr, 1861 .setattr = gfs2_setattr,
1857 .getattr = gfs2_getattr, 1862 .getattr = gfs2_getattr,
1858 .setxattr = gfs2_setxattr, 1863 .setxattr = gfs2_setxattr,
1859 .getxattr = gfs2_getxattr, 1864 .getxattr = gfs2_getxattr,
1860 .listxattr = gfs2_listxattr, 1865 .listxattr = gfs2_listxattr,
1861 .removexattr = gfs2_removexattr, 1866 .removexattr = gfs2_removexattr,
1862 .fiemap = gfs2_fiemap, 1867 .fiemap = gfs2_fiemap,
1863 .get_acl = gfs2_get_acl, 1868 .get_acl = gfs2_get_acl,
1864 }; 1869 };
1865 1870
1866 1871
1 /* 1 /*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10 /* 10 /*
11 * Quota change tags are associated with each transaction that allocates or 11 * Quota change tags are associated with each transaction that allocates or
12 * deallocates space. Those changes are accumulated locally to each node (in a 12 * deallocates space. Those changes are accumulated locally to each node (in a
13 * per-node file) and then are periodically synced to the quota file. This 13 * per-node file) and then are periodically synced to the quota file. This
14 * avoids the bottleneck of constantly touching the quota file, but introduces 14 * avoids the bottleneck of constantly touching the quota file, but introduces
15 * fuzziness in the current usage value of IDs that are being used on different 15 * fuzziness in the current usage value of IDs that are being used on different
16 * nodes in the cluster simultaneously. So, it is possible for a user on 16 * nodes in the cluster simultaneously. So, it is possible for a user on
17 * multiple nodes to overrun their quota, but that overrun is controlable. 17 * multiple nodes to overrun their quota, but that overrun is controlable.
18 * Since quota tags are part of transactions, there is no need for a quota check 18 * Since quota tags are part of transactions, there is no need for a quota check
19 * program to be run on node crashes or anything like that. 19 * program to be run on node crashes or anything like that.
20 * 20 *
21 * There are couple of knobs that let the administrator manage the quota 21 * There are couple of knobs that let the administrator manage the quota
22 * fuzziness. "quota_quantum" sets the maximum time a quota change can be 22 * fuzziness. "quota_quantum" sets the maximum time a quota change can be
23 * sitting on one node before being synced to the quota file. (The default is 23 * sitting on one node before being synced to the quota file. (The default is
24 * 60 seconds.) Another knob, "quota_scale" controls how quickly the frequency 24 * 60 seconds.) Another knob, "quota_scale" controls how quickly the frequency
25 * of quota file syncs increases as the user moves closer to their limit. The 25 * of quota file syncs increases as the user moves closer to their limit. The
26 * more frequent the syncs, the more accurate the quota enforcement, but that 26 * more frequent the syncs, the more accurate the quota enforcement, but that
27 * means that there is more contention between the nodes for the quota file. 27 * means that there is more contention between the nodes for the quota file.
28 * The default value is one. This sets the maximum theoretical quota overrun 28 * The default value is one. This sets the maximum theoretical quota overrun
29 * (with infinite node with infinite bandwidth) to twice the user's limit. (In 29 * (with infinite node with infinite bandwidth) to twice the user's limit. (In
30 * practice, the maximum overrun you see should be much less.) A "quota_scale" 30 * practice, the maximum overrun you see should be much less.) A "quota_scale"
31 * number greater than one makes quota syncs more frequent and reduces the 31 * number greater than one makes quota syncs more frequent and reduces the
32 * maximum overrun. Numbers less than one (but greater than zero) make quota 32 * maximum overrun. Numbers less than one (but greater than zero) make quota
33 * syncs less frequent. 33 * syncs less frequent.
34 * 34 *
35 * GFS quotas also use per-ID Lock Value Blocks (LVBs) to cache the contents of 35 * GFS quotas also use per-ID Lock Value Blocks (LVBs) to cache the contents of
36 * the quota file, so it is not being constantly read. 36 * the quota file, so it is not being constantly read.
37 */ 37 */
38 38
39 #include <linux/sched.h> 39 #include <linux/sched.h>
40 #include <linux/slab.h> 40 #include <linux/slab.h>
41 #include <linux/mm.h> 41 #include <linux/mm.h>
42 #include <linux/spinlock.h> 42 #include <linux/spinlock.h>
43 #include <linux/completion.h> 43 #include <linux/completion.h>
44 #include <linux/buffer_head.h> 44 #include <linux/buffer_head.h>
45 #include <linux/sort.h> 45 #include <linux/sort.h>
46 #include <linux/fs.h> 46 #include <linux/fs.h>
47 #include <linux/bio.h> 47 #include <linux/bio.h>
48 #include <linux/gfs2_ondisk.h> 48 #include <linux/gfs2_ondisk.h>
49 #include <linux/kthread.h> 49 #include <linux/kthread.h>
50 #include <linux/freezer.h> 50 #include <linux/freezer.h>
51 #include <linux/quota.h> 51 #include <linux/quota.h>
52 #include <linux/dqblk_xfs.h> 52 #include <linux/dqblk_xfs.h>
53 53
54 #include "gfs2.h" 54 #include "gfs2.h"
55 #include "incore.h" 55 #include "incore.h"
56 #include "bmap.h" 56 #include "bmap.h"
57 #include "glock.h" 57 #include "glock.h"
58 #include "glops.h" 58 #include "glops.h"
59 #include "log.h" 59 #include "log.h"
60 #include "meta_io.h" 60 #include "meta_io.h"
61 #include "quota.h" 61 #include "quota.h"
62 #include "rgrp.h" 62 #include "rgrp.h"
63 #include "super.h" 63 #include "super.h"
64 #include "trans.h" 64 #include "trans.h"
65 #include "inode.h" 65 #include "inode.h"
66 #include "util.h" 66 #include "util.h"
67 67
68 #define QUOTA_USER 1 68 #define QUOTA_USER 1
69 #define QUOTA_GROUP 0 69 #define QUOTA_GROUP 0
70 70
71 struct gfs2_quota_change_host { 71 struct gfs2_quota_change_host {
72 u64 qc_change; 72 u64 qc_change;
73 u32 qc_flags; /* GFS2_QCF_... */ 73 u32 qc_flags; /* GFS2_QCF_... */
74 u32 qc_id; 74 u32 qc_id;
75 }; 75 };
76 76
77 static LIST_HEAD(qd_lru_list); 77 static LIST_HEAD(qd_lru_list);
78 static atomic_t qd_lru_count = ATOMIC_INIT(0); 78 static atomic_t qd_lru_count = ATOMIC_INIT(0);
79 static DEFINE_SPINLOCK(qd_lru_lock); 79 static DEFINE_SPINLOCK(qd_lru_lock);
80 80
81 int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) 81 int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
82 { 82 {
83 struct gfs2_quota_data *qd; 83 struct gfs2_quota_data *qd;
84 struct gfs2_sbd *sdp; 84 struct gfs2_sbd *sdp;
85 int nr_to_scan = sc->nr_to_scan; 85 int nr_to_scan = sc->nr_to_scan;
86 86
87 if (nr_to_scan == 0) 87 if (nr_to_scan == 0)
88 goto out; 88 goto out;
89 89
90 if (!(sc->gfp_mask & __GFP_FS)) 90 if (!(sc->gfp_mask & __GFP_FS))
91 return -1; 91 return -1;
92 92
93 spin_lock(&qd_lru_lock); 93 spin_lock(&qd_lru_lock);
94 while (nr_to_scan && !list_empty(&qd_lru_list)) { 94 while (nr_to_scan && !list_empty(&qd_lru_list)) {
95 qd = list_entry(qd_lru_list.next, 95 qd = list_entry(qd_lru_list.next,
96 struct gfs2_quota_data, qd_reclaim); 96 struct gfs2_quota_data, qd_reclaim);
97 sdp = qd->qd_gl->gl_sbd; 97 sdp = qd->qd_gl->gl_sbd;
98 98
99 /* Free from the filesystem-specific list */ 99 /* Free from the filesystem-specific list */
100 list_del(&qd->qd_list); 100 list_del(&qd->qd_list);
101 101
102 gfs2_assert_warn(sdp, !qd->qd_change); 102 gfs2_assert_warn(sdp, !qd->qd_change);
103 gfs2_assert_warn(sdp, !qd->qd_slot_count); 103 gfs2_assert_warn(sdp, !qd->qd_slot_count);
104 gfs2_assert_warn(sdp, !qd->qd_bh_count); 104 gfs2_assert_warn(sdp, !qd->qd_bh_count);
105 105
106 gfs2_glock_put(qd->qd_gl); 106 gfs2_glock_put(qd->qd_gl);
107 atomic_dec(&sdp->sd_quota_count); 107 atomic_dec(&sdp->sd_quota_count);
108 108
109 /* Delete it from the common reclaim list */ 109 /* Delete it from the common reclaim list */
110 list_del_init(&qd->qd_reclaim); 110 list_del_init(&qd->qd_reclaim);
111 atomic_dec(&qd_lru_count); 111 atomic_dec(&qd_lru_count);
112 spin_unlock(&qd_lru_lock); 112 spin_unlock(&qd_lru_lock);
113 kmem_cache_free(gfs2_quotad_cachep, qd); 113 kmem_cache_free(gfs2_quotad_cachep, qd);
114 spin_lock(&qd_lru_lock); 114 spin_lock(&qd_lru_lock);
115 nr_to_scan--; 115 nr_to_scan--;
116 } 116 }
117 spin_unlock(&qd_lru_lock); 117 spin_unlock(&qd_lru_lock);
118 118
119 out: 119 out:
120 return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100; 120 return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100;
121 } 121 }
122 122
123 static u64 qd2offset(struct gfs2_quota_data *qd) 123 static u64 qd2offset(struct gfs2_quota_data *qd)
124 { 124 {
125 u64 offset; 125 u64 offset;
126 126
127 offset = 2 * (u64)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags); 127 offset = 2 * (u64)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags);
128 offset *= sizeof(struct gfs2_quota); 128 offset *= sizeof(struct gfs2_quota);
129 129
130 return offset; 130 return offset;
131 } 131 }
132 132
133 static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id, 133 static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
134 struct gfs2_quota_data **qdp) 134 struct gfs2_quota_data **qdp)
135 { 135 {
136 struct gfs2_quota_data *qd; 136 struct gfs2_quota_data *qd;
137 int error; 137 int error;
138 138
139 qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS); 139 qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS);
140 if (!qd) 140 if (!qd)
141 return -ENOMEM; 141 return -ENOMEM;
142 142
143 atomic_set(&qd->qd_count, 1); 143 atomic_set(&qd->qd_count, 1);
144 qd->qd_id = id; 144 qd->qd_id = id;
145 if (user) 145 if (user)
146 set_bit(QDF_USER, &qd->qd_flags); 146 set_bit(QDF_USER, &qd->qd_flags);
147 qd->qd_slot = -1; 147 qd->qd_slot = -1;
148 INIT_LIST_HEAD(&qd->qd_reclaim); 148 INIT_LIST_HEAD(&qd->qd_reclaim);
149 149
150 error = gfs2_glock_get(sdp, 2 * (u64)id + !user, 150 error = gfs2_glock_get(sdp, 2 * (u64)id + !user,
151 &gfs2_quota_glops, CREATE, &qd->qd_gl); 151 &gfs2_quota_glops, CREATE, &qd->qd_gl);
152 if (error) 152 if (error)
153 goto fail; 153 goto fail;
154 154
155 *qdp = qd; 155 *qdp = qd;
156 156
157 return 0; 157 return 0;
158 158
159 fail: 159 fail:
160 kmem_cache_free(gfs2_quotad_cachep, qd); 160 kmem_cache_free(gfs2_quotad_cachep, qd);
161 return error; 161 return error;
162 } 162 }
163 163
164 static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, 164 static int qd_get(struct gfs2_sbd *sdp, int user, u32 id,
165 struct gfs2_quota_data **qdp) 165 struct gfs2_quota_data **qdp)
166 { 166 {
167 struct gfs2_quota_data *qd = NULL, *new_qd = NULL; 167 struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
168 int error, found; 168 int error, found;
169 169
170 *qdp = NULL; 170 *qdp = NULL;
171 171
172 for (;;) { 172 for (;;) {
173 found = 0; 173 found = 0;
174 spin_lock(&qd_lru_lock); 174 spin_lock(&qd_lru_lock);
175 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { 175 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
176 if (qd->qd_id == id && 176 if (qd->qd_id == id &&
177 !test_bit(QDF_USER, &qd->qd_flags) == !user) { 177 !test_bit(QDF_USER, &qd->qd_flags) == !user) {
178 if (!atomic_read(&qd->qd_count) && 178 if (!atomic_read(&qd->qd_count) &&
179 !list_empty(&qd->qd_reclaim)) { 179 !list_empty(&qd->qd_reclaim)) {
180 /* Remove it from reclaim list */ 180 /* Remove it from reclaim list */
181 list_del_init(&qd->qd_reclaim); 181 list_del_init(&qd->qd_reclaim);
182 atomic_dec(&qd_lru_count); 182 atomic_dec(&qd_lru_count);
183 } 183 }
184 atomic_inc(&qd->qd_count); 184 atomic_inc(&qd->qd_count);
185 found = 1; 185 found = 1;
186 break; 186 break;
187 } 187 }
188 } 188 }
189 189
190 if (!found) 190 if (!found)
191 qd = NULL; 191 qd = NULL;
192 192
193 if (!qd && new_qd) { 193 if (!qd && new_qd) {
194 qd = new_qd; 194 qd = new_qd;
195 list_add(&qd->qd_list, &sdp->sd_quota_list); 195 list_add(&qd->qd_list, &sdp->sd_quota_list);
196 atomic_inc(&sdp->sd_quota_count); 196 atomic_inc(&sdp->sd_quota_count);
197 new_qd = NULL; 197 new_qd = NULL;
198 } 198 }
199 199
200 spin_unlock(&qd_lru_lock); 200 spin_unlock(&qd_lru_lock);
201 201
202 if (qd) { 202 if (qd) {
203 if (new_qd) { 203 if (new_qd) {
204 gfs2_glock_put(new_qd->qd_gl); 204 gfs2_glock_put(new_qd->qd_gl);
205 kmem_cache_free(gfs2_quotad_cachep, new_qd); 205 kmem_cache_free(gfs2_quotad_cachep, new_qd);
206 } 206 }
207 *qdp = qd; 207 *qdp = qd;
208 return 0; 208 return 0;
209 } 209 }
210 210
211 error = qd_alloc(sdp, user, id, &new_qd); 211 error = qd_alloc(sdp, user, id, &new_qd);
212 if (error) 212 if (error)
213 return error; 213 return error;
214 } 214 }
215 } 215 }
216 216
217 static void qd_hold(struct gfs2_quota_data *qd) 217 static void qd_hold(struct gfs2_quota_data *qd)
218 { 218 {
219 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 219 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
220 gfs2_assert(sdp, atomic_read(&qd->qd_count)); 220 gfs2_assert(sdp, atomic_read(&qd->qd_count));
221 atomic_inc(&qd->qd_count); 221 atomic_inc(&qd->qd_count);
222 } 222 }
223 223
224 static void qd_put(struct gfs2_quota_data *qd) 224 static void qd_put(struct gfs2_quota_data *qd)
225 { 225 {
226 if (atomic_dec_and_lock(&qd->qd_count, &qd_lru_lock)) { 226 if (atomic_dec_and_lock(&qd->qd_count, &qd_lru_lock)) {
227 /* Add to the reclaim list */ 227 /* Add to the reclaim list */
228 list_add_tail(&qd->qd_reclaim, &qd_lru_list); 228 list_add_tail(&qd->qd_reclaim, &qd_lru_list);
229 atomic_inc(&qd_lru_count); 229 atomic_inc(&qd_lru_count);
230 spin_unlock(&qd_lru_lock); 230 spin_unlock(&qd_lru_lock);
231 } 231 }
232 } 232 }
233 233
234 static int slot_get(struct gfs2_quota_data *qd) 234 static int slot_get(struct gfs2_quota_data *qd)
235 { 235 {
236 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 236 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
237 unsigned int c, o = 0, b; 237 unsigned int c, o = 0, b;
238 unsigned char byte = 0; 238 unsigned char byte = 0;
239 239
240 spin_lock(&qd_lru_lock); 240 spin_lock(&qd_lru_lock);
241 241
242 if (qd->qd_slot_count++) { 242 if (qd->qd_slot_count++) {
243 spin_unlock(&qd_lru_lock); 243 spin_unlock(&qd_lru_lock);
244 return 0; 244 return 0;
245 } 245 }
246 246
247 for (c = 0; c < sdp->sd_quota_chunks; c++) 247 for (c = 0; c < sdp->sd_quota_chunks; c++)
248 for (o = 0; o < PAGE_SIZE; o++) { 248 for (o = 0; o < PAGE_SIZE; o++) {
249 byte = sdp->sd_quota_bitmap[c][o]; 249 byte = sdp->sd_quota_bitmap[c][o];
250 if (byte != 0xFF) 250 if (byte != 0xFF)
251 goto found; 251 goto found;
252 } 252 }
253 253
254 goto fail; 254 goto fail;
255 255
256 found: 256 found:
257 for (b = 0; b < 8; b++) 257 for (b = 0; b < 8; b++)
258 if (!(byte & (1 << b))) 258 if (!(byte & (1 << b)))
259 break; 259 break;
260 qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b; 260 qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b;
261 261
262 if (qd->qd_slot >= sdp->sd_quota_slots) 262 if (qd->qd_slot >= sdp->sd_quota_slots)
263 goto fail; 263 goto fail;
264 264
265 sdp->sd_quota_bitmap[c][o] |= 1 << b; 265 sdp->sd_quota_bitmap[c][o] |= 1 << b;
266 266
267 spin_unlock(&qd_lru_lock); 267 spin_unlock(&qd_lru_lock);
268 268
269 return 0; 269 return 0;
270 270
271 fail: 271 fail:
272 qd->qd_slot_count--; 272 qd->qd_slot_count--;
273 spin_unlock(&qd_lru_lock); 273 spin_unlock(&qd_lru_lock);
274 return -ENOSPC; 274 return -ENOSPC;
275 } 275 }
276 276
277 static void slot_hold(struct gfs2_quota_data *qd) 277 static void slot_hold(struct gfs2_quota_data *qd)
278 { 278 {
279 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 279 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
280 280
281 spin_lock(&qd_lru_lock); 281 spin_lock(&qd_lru_lock);
282 gfs2_assert(sdp, qd->qd_slot_count); 282 gfs2_assert(sdp, qd->qd_slot_count);
283 qd->qd_slot_count++; 283 qd->qd_slot_count++;
284 spin_unlock(&qd_lru_lock); 284 spin_unlock(&qd_lru_lock);
285 } 285 }
286 286
287 static void slot_put(struct gfs2_quota_data *qd) 287 static void slot_put(struct gfs2_quota_data *qd)
288 { 288 {
289 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 289 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
290 290
291 spin_lock(&qd_lru_lock); 291 spin_lock(&qd_lru_lock);
292 gfs2_assert(sdp, qd->qd_slot_count); 292 gfs2_assert(sdp, qd->qd_slot_count);
293 if (!--qd->qd_slot_count) { 293 if (!--qd->qd_slot_count) {
294 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0); 294 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
295 qd->qd_slot = -1; 295 qd->qd_slot = -1;
296 } 296 }
297 spin_unlock(&qd_lru_lock); 297 spin_unlock(&qd_lru_lock);
298 } 298 }
299 299
300 static int bh_get(struct gfs2_quota_data *qd) 300 static int bh_get(struct gfs2_quota_data *qd)
301 { 301 {
302 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 302 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
303 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); 303 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
304 unsigned int block, offset; 304 unsigned int block, offset;
305 struct buffer_head *bh; 305 struct buffer_head *bh;
306 int error; 306 int error;
307 struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; 307 struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
308 308
309 mutex_lock(&sdp->sd_quota_mutex); 309 mutex_lock(&sdp->sd_quota_mutex);
310 310
311 if (qd->qd_bh_count++) { 311 if (qd->qd_bh_count++) {
312 mutex_unlock(&sdp->sd_quota_mutex); 312 mutex_unlock(&sdp->sd_quota_mutex);
313 return 0; 313 return 0;
314 } 314 }
315 315
316 block = qd->qd_slot / sdp->sd_qc_per_block; 316 block = qd->qd_slot / sdp->sd_qc_per_block;
317 offset = qd->qd_slot % sdp->sd_qc_per_block; 317 offset = qd->qd_slot % sdp->sd_qc_per_block;
318 318
319 bh_map.b_size = 1 << ip->i_inode.i_blkbits; 319 bh_map.b_size = 1 << ip->i_inode.i_blkbits;
320 error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0); 320 error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0);
321 if (error) 321 if (error)
322 goto fail; 322 goto fail;
323 error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh); 323 error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh);
324 if (error) 324 if (error)
325 goto fail; 325 goto fail;
326 error = -EIO; 326 error = -EIO;
327 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) 327 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC))
328 goto fail_brelse; 328 goto fail_brelse;
329 329
330 qd->qd_bh = bh; 330 qd->qd_bh = bh;
331 qd->qd_bh_qc = (struct gfs2_quota_change *) 331 qd->qd_bh_qc = (struct gfs2_quota_change *)
332 (bh->b_data + sizeof(struct gfs2_meta_header) + 332 (bh->b_data + sizeof(struct gfs2_meta_header) +
333 offset * sizeof(struct gfs2_quota_change)); 333 offset * sizeof(struct gfs2_quota_change));
334 334
335 mutex_unlock(&sdp->sd_quota_mutex); 335 mutex_unlock(&sdp->sd_quota_mutex);
336 336
337 return 0; 337 return 0;
338 338
339 fail_brelse: 339 fail_brelse:
340 brelse(bh); 340 brelse(bh);
341 fail: 341 fail:
342 qd->qd_bh_count--; 342 qd->qd_bh_count--;
343 mutex_unlock(&sdp->sd_quota_mutex); 343 mutex_unlock(&sdp->sd_quota_mutex);
344 return error; 344 return error;
345 } 345 }
346 346
347 static void bh_put(struct gfs2_quota_data *qd) 347 static void bh_put(struct gfs2_quota_data *qd)
348 { 348 {
349 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 349 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
350 350
351 mutex_lock(&sdp->sd_quota_mutex); 351 mutex_lock(&sdp->sd_quota_mutex);
352 gfs2_assert(sdp, qd->qd_bh_count); 352 gfs2_assert(sdp, qd->qd_bh_count);
353 if (!--qd->qd_bh_count) { 353 if (!--qd->qd_bh_count) {
354 brelse(qd->qd_bh); 354 brelse(qd->qd_bh);
355 qd->qd_bh = NULL; 355 qd->qd_bh = NULL;
356 qd->qd_bh_qc = NULL; 356 qd->qd_bh_qc = NULL;
357 } 357 }
358 mutex_unlock(&sdp->sd_quota_mutex); 358 mutex_unlock(&sdp->sd_quota_mutex);
359 } 359 }
360 360
361 static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) 361 static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
362 { 362 {
363 struct gfs2_quota_data *qd = NULL; 363 struct gfs2_quota_data *qd = NULL;
364 int error; 364 int error;
365 int found = 0; 365 int found = 0;
366 366
367 *qdp = NULL; 367 *qdp = NULL;
368 368
369 if (sdp->sd_vfs->s_flags & MS_RDONLY) 369 if (sdp->sd_vfs->s_flags & MS_RDONLY)
370 return 0; 370 return 0;
371 371
372 spin_lock(&qd_lru_lock); 372 spin_lock(&qd_lru_lock);
373 373
374 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { 374 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
375 if (test_bit(QDF_LOCKED, &qd->qd_flags) || 375 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
376 !test_bit(QDF_CHANGE, &qd->qd_flags) || 376 !test_bit(QDF_CHANGE, &qd->qd_flags) ||
377 qd->qd_sync_gen >= sdp->sd_quota_sync_gen) 377 qd->qd_sync_gen >= sdp->sd_quota_sync_gen)
378 continue; 378 continue;
379 379
380 list_move_tail(&qd->qd_list, &sdp->sd_quota_list); 380 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
381 381
382 set_bit(QDF_LOCKED, &qd->qd_flags); 382 set_bit(QDF_LOCKED, &qd->qd_flags);
383 gfs2_assert_warn(sdp, atomic_read(&qd->qd_count)); 383 gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
384 atomic_inc(&qd->qd_count); 384 atomic_inc(&qd->qd_count);
385 qd->qd_change_sync = qd->qd_change; 385 qd->qd_change_sync = qd->qd_change;
386 gfs2_assert_warn(sdp, qd->qd_slot_count); 386 gfs2_assert_warn(sdp, qd->qd_slot_count);
387 qd->qd_slot_count++; 387 qd->qd_slot_count++;
388 found = 1; 388 found = 1;
389 389
390 break; 390 break;
391 } 391 }
392 392
393 if (!found) 393 if (!found)
394 qd = NULL; 394 qd = NULL;
395 395
396 spin_unlock(&qd_lru_lock); 396 spin_unlock(&qd_lru_lock);
397 397
398 if (qd) { 398 if (qd) {
399 gfs2_assert_warn(sdp, qd->qd_change_sync); 399 gfs2_assert_warn(sdp, qd->qd_change_sync);
400 error = bh_get(qd); 400 error = bh_get(qd);
401 if (error) { 401 if (error) {
402 clear_bit(QDF_LOCKED, &qd->qd_flags); 402 clear_bit(QDF_LOCKED, &qd->qd_flags);
403 slot_put(qd); 403 slot_put(qd);
404 qd_put(qd); 404 qd_put(qd);
405 return error; 405 return error;
406 } 406 }
407 } 407 }
408 408
409 *qdp = qd; 409 *qdp = qd;
410 410
411 return 0; 411 return 0;
412 } 412 }
413 413
414 static int qd_trylock(struct gfs2_quota_data *qd) 414 static int qd_trylock(struct gfs2_quota_data *qd)
415 { 415 {
416 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 416 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
417 417
418 if (sdp->sd_vfs->s_flags & MS_RDONLY) 418 if (sdp->sd_vfs->s_flags & MS_RDONLY)
419 return 0; 419 return 0;
420 420
421 spin_lock(&qd_lru_lock); 421 spin_lock(&qd_lru_lock);
422 422
423 if (test_bit(QDF_LOCKED, &qd->qd_flags) || 423 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
424 !test_bit(QDF_CHANGE, &qd->qd_flags)) { 424 !test_bit(QDF_CHANGE, &qd->qd_flags)) {
425 spin_unlock(&qd_lru_lock); 425 spin_unlock(&qd_lru_lock);
426 return 0; 426 return 0;
427 } 427 }
428 428
429 list_move_tail(&qd->qd_list, &sdp->sd_quota_list); 429 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
430 430
431 set_bit(QDF_LOCKED, &qd->qd_flags); 431 set_bit(QDF_LOCKED, &qd->qd_flags);
432 gfs2_assert_warn(sdp, atomic_read(&qd->qd_count)); 432 gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
433 atomic_inc(&qd->qd_count); 433 atomic_inc(&qd->qd_count);
434 qd->qd_change_sync = qd->qd_change; 434 qd->qd_change_sync = qd->qd_change;
435 gfs2_assert_warn(sdp, qd->qd_slot_count); 435 gfs2_assert_warn(sdp, qd->qd_slot_count);
436 qd->qd_slot_count++; 436 qd->qd_slot_count++;
437 437
438 spin_unlock(&qd_lru_lock); 438 spin_unlock(&qd_lru_lock);
439 439
440 gfs2_assert_warn(sdp, qd->qd_change_sync); 440 gfs2_assert_warn(sdp, qd->qd_change_sync);
441 if (bh_get(qd)) { 441 if (bh_get(qd)) {
442 clear_bit(QDF_LOCKED, &qd->qd_flags); 442 clear_bit(QDF_LOCKED, &qd->qd_flags);
443 slot_put(qd); 443 slot_put(qd);
444 qd_put(qd); 444 qd_put(qd);
445 return 0; 445 return 0;
446 } 446 }
447 447
448 return 1; 448 return 1;
449 } 449 }
450 450
451 static void qd_unlock(struct gfs2_quota_data *qd) 451 static void qd_unlock(struct gfs2_quota_data *qd)
452 { 452 {
453 gfs2_assert_warn(qd->qd_gl->gl_sbd, 453 gfs2_assert_warn(qd->qd_gl->gl_sbd,
454 test_bit(QDF_LOCKED, &qd->qd_flags)); 454 test_bit(QDF_LOCKED, &qd->qd_flags));
455 clear_bit(QDF_LOCKED, &qd->qd_flags); 455 clear_bit(QDF_LOCKED, &qd->qd_flags);
456 bh_put(qd); 456 bh_put(qd);
457 slot_put(qd); 457 slot_put(qd);
458 qd_put(qd); 458 qd_put(qd);
459 } 459 }
460 460
461 static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, 461 static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id,
462 struct gfs2_quota_data **qdp) 462 struct gfs2_quota_data **qdp)
463 { 463 {
464 int error; 464 int error;
465 465
466 error = qd_get(sdp, user, id, qdp); 466 error = qd_get(sdp, user, id, qdp);
467 if (error) 467 if (error)
468 return error; 468 return error;
469 469
470 error = slot_get(*qdp); 470 error = slot_get(*qdp);
471 if (error) 471 if (error)
472 goto fail; 472 goto fail;
473 473
474 error = bh_get(*qdp); 474 error = bh_get(*qdp);
475 if (error) 475 if (error)
476 goto fail_slot; 476 goto fail_slot;
477 477
478 return 0; 478 return 0;
479 479
480 fail_slot: 480 fail_slot:
481 slot_put(*qdp); 481 slot_put(*qdp);
482 fail: 482 fail:
483 qd_put(*qdp); 483 qd_put(*qdp);
484 return error; 484 return error;
485 } 485 }
486 486
487 static void qdsb_put(struct gfs2_quota_data *qd) 487 static void qdsb_put(struct gfs2_quota_data *qd)
488 { 488 {
489 bh_put(qd); 489 bh_put(qd);
490 slot_put(qd); 490 slot_put(qd);
491 qd_put(qd); 491 qd_put(qd);
492 } 492 }
493 493
494 int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) 494 int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
495 { 495 {
496 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 496 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
497 struct gfs2_quota_data **qd; 497 struct gfs2_quota_data **qd;
498 int error; 498 int error;
499 499
500 if (ip->i_res == NULL) { 500 if (ip->i_res == NULL) {
501 error = gfs2_rs_alloc(ip); 501 error = gfs2_rs_alloc(ip);
502 if (error) 502 if (error)
503 return error; 503 return error;
504 } 504 }
505 505
506 qd = ip->i_res->rs_qa_qd; 506 qd = ip->i_res->rs_qa_qd;
507 507
508 if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) || 508 if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) ||
509 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) 509 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
510 return -EIO; 510 return -EIO;
511 511
512 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 512 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
513 return 0; 513 return 0;
514 514
515 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd); 515 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd);
516 if (error) 516 if (error)
517 goto out; 517 goto out;
518 ip->i_res->rs_qa_qd_num++; 518 ip->i_res->rs_qa_qd_num++;
519 qd++; 519 qd++;
520 520
521 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd); 521 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd);
522 if (error) 522 if (error)
523 goto out; 523 goto out;
524 ip->i_res->rs_qa_qd_num++; 524 ip->i_res->rs_qa_qd_num++;
525 qd++; 525 qd++;
526 526
527 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { 527 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
528 error = qdsb_get(sdp, QUOTA_USER, uid, qd); 528 error = qdsb_get(sdp, QUOTA_USER, uid, qd);
529 if (error) 529 if (error)
530 goto out; 530 goto out;
531 ip->i_res->rs_qa_qd_num++; 531 ip->i_res->rs_qa_qd_num++;
532 qd++; 532 qd++;
533 } 533 }
534 534
535 if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) { 535 if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) {
536 error = qdsb_get(sdp, QUOTA_GROUP, gid, qd); 536 error = qdsb_get(sdp, QUOTA_GROUP, gid, qd);
537 if (error) 537 if (error)
538 goto out; 538 goto out;
539 ip->i_res->rs_qa_qd_num++; 539 ip->i_res->rs_qa_qd_num++;
540 qd++; 540 qd++;
541 } 541 }
542 542
543 out: 543 out:
544 if (error) 544 if (error)
545 gfs2_quota_unhold(ip); 545 gfs2_quota_unhold(ip);
546 return error; 546 return error;
547 } 547 }
548 548
549 void gfs2_quota_unhold(struct gfs2_inode *ip) 549 void gfs2_quota_unhold(struct gfs2_inode *ip)
550 { 550 {
551 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 551 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
552 unsigned int x; 552 unsigned int x;
553 553
554 if (ip->i_res == NULL) 554 if (ip->i_res == NULL)
555 return; 555 return;
556 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); 556 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
557 557
558 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { 558 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
559 qdsb_put(ip->i_res->rs_qa_qd[x]); 559 qdsb_put(ip->i_res->rs_qa_qd[x]);
560 ip->i_res->rs_qa_qd[x] = NULL; 560 ip->i_res->rs_qa_qd[x] = NULL;
561 } 561 }
562 ip->i_res->rs_qa_qd_num = 0; 562 ip->i_res->rs_qa_qd_num = 0;
563 } 563 }
564 564
565 static int sort_qd(const void *a, const void *b) 565 static int sort_qd(const void *a, const void *b)
566 { 566 {
567 const struct gfs2_quota_data *qd_a = *(const struct gfs2_quota_data **)a; 567 const struct gfs2_quota_data *qd_a = *(const struct gfs2_quota_data **)a;
568 const struct gfs2_quota_data *qd_b = *(const struct gfs2_quota_data **)b; 568 const struct gfs2_quota_data *qd_b = *(const struct gfs2_quota_data **)b;
569 569
570 if (!test_bit(QDF_USER, &qd_a->qd_flags) != 570 if (!test_bit(QDF_USER, &qd_a->qd_flags) !=
571 !test_bit(QDF_USER, &qd_b->qd_flags)) { 571 !test_bit(QDF_USER, &qd_b->qd_flags)) {
572 if (test_bit(QDF_USER, &qd_a->qd_flags)) 572 if (test_bit(QDF_USER, &qd_a->qd_flags))
573 return -1; 573 return -1;
574 else 574 else
575 return 1; 575 return 1;
576 } 576 }
577 if (qd_a->qd_id < qd_b->qd_id) 577 if (qd_a->qd_id < qd_b->qd_id)
578 return -1; 578 return -1;
579 if (qd_a->qd_id > qd_b->qd_id) 579 if (qd_a->qd_id > qd_b->qd_id)
580 return 1; 580 return 1;
581 581
582 return 0; 582 return 0;
583 } 583 }
584 584
585 static void do_qc(struct gfs2_quota_data *qd, s64 change) 585 static void do_qc(struct gfs2_quota_data *qd, s64 change)
586 { 586 {
587 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 587 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
588 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); 588 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
589 struct gfs2_quota_change *qc = qd->qd_bh_qc; 589 struct gfs2_quota_change *qc = qd->qd_bh_qc;
590 s64 x; 590 s64 x;
591 591
592 mutex_lock(&sdp->sd_quota_mutex); 592 mutex_lock(&sdp->sd_quota_mutex);
593 gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1); 593 gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1);
594 594
595 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) { 595 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
596 qc->qc_change = 0; 596 qc->qc_change = 0;
597 qc->qc_flags = 0; 597 qc->qc_flags = 0;
598 if (test_bit(QDF_USER, &qd->qd_flags)) 598 if (test_bit(QDF_USER, &qd->qd_flags))
599 qc->qc_flags = cpu_to_be32(GFS2_QCF_USER); 599 qc->qc_flags = cpu_to_be32(GFS2_QCF_USER);
600 qc->qc_id = cpu_to_be32(qd->qd_id); 600 qc->qc_id = cpu_to_be32(qd->qd_id);
601 } 601 }
602 602
603 x = be64_to_cpu(qc->qc_change) + change; 603 x = be64_to_cpu(qc->qc_change) + change;
604 qc->qc_change = cpu_to_be64(x); 604 qc->qc_change = cpu_to_be64(x);
605 605
606 spin_lock(&qd_lru_lock); 606 spin_lock(&qd_lru_lock);
607 qd->qd_change = x; 607 qd->qd_change = x;
608 spin_unlock(&qd_lru_lock); 608 spin_unlock(&qd_lru_lock);
609 609
610 if (!x) { 610 if (!x) {
611 gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags)); 611 gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
612 clear_bit(QDF_CHANGE, &qd->qd_flags); 612 clear_bit(QDF_CHANGE, &qd->qd_flags);
613 qc->qc_flags = 0; 613 qc->qc_flags = 0;
614 qc->qc_id = 0; 614 qc->qc_id = 0;
615 slot_put(qd); 615 slot_put(qd);
616 qd_put(qd); 616 qd_put(qd);
617 } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) { 617 } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) {
618 qd_hold(qd); 618 qd_hold(qd);
619 slot_hold(qd); 619 slot_hold(qd);
620 } 620 }
621 621
622 mutex_unlock(&sdp->sd_quota_mutex); 622 mutex_unlock(&sdp->sd_quota_mutex);
623 } 623 }
624 624
625 /** 625 /**
626 * gfs2_adjust_quota - adjust record of current block usage 626 * gfs2_adjust_quota - adjust record of current block usage
627 * @ip: The quota inode 627 * @ip: The quota inode
628 * @loc: Offset of the entry in the quota file 628 * @loc: Offset of the entry in the quota file
629 * @change: The amount of usage change to record 629 * @change: The amount of usage change to record
630 * @qd: The quota data 630 * @qd: The quota data
631 * @fdq: The updated limits to record 631 * @fdq: The updated limits to record
632 * 632 *
633 * This function was mostly borrowed from gfs2_block_truncate_page which was 633 * This function was mostly borrowed from gfs2_block_truncate_page which was
634 * in turn mostly borrowed from ext3 634 * in turn mostly borrowed from ext3
635 * 635 *
636 * Returns: 0 or -ve on error 636 * Returns: 0 or -ve on error
637 */ 637 */
638 638
639 static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, 639 static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
640 s64 change, struct gfs2_quota_data *qd, 640 s64 change, struct gfs2_quota_data *qd,
641 struct fs_disk_quota *fdq) 641 struct fs_disk_quota *fdq)
642 { 642 {
643 struct inode *inode = &ip->i_inode; 643 struct inode *inode = &ip->i_inode;
644 struct gfs2_sbd *sdp = GFS2_SB(inode); 644 struct gfs2_sbd *sdp = GFS2_SB(inode);
645 struct address_space *mapping = inode->i_mapping; 645 struct address_space *mapping = inode->i_mapping;
646 unsigned long index = loc >> PAGE_CACHE_SHIFT; 646 unsigned long index = loc >> PAGE_CACHE_SHIFT;
647 unsigned offset = loc & (PAGE_CACHE_SIZE - 1); 647 unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
648 unsigned blocksize, iblock, pos; 648 unsigned blocksize, iblock, pos;
649 struct buffer_head *bh; 649 struct buffer_head *bh;
650 struct page *page; 650 struct page *page;
651 void *kaddr, *ptr; 651 void *kaddr, *ptr;
652 struct gfs2_quota q, *qp; 652 struct gfs2_quota q, *qp;
653 int err, nbytes; 653 int err, nbytes;
654 u64 size; 654 u64 size;
655 655
656 if (gfs2_is_stuffed(ip)) { 656 if (gfs2_is_stuffed(ip)) {
657 err = gfs2_unstuff_dinode(ip, NULL); 657 err = gfs2_unstuff_dinode(ip, NULL);
658 if (err) 658 if (err)
659 return err; 659 return err;
660 } 660 }
661 661
662 memset(&q, 0, sizeof(struct gfs2_quota)); 662 memset(&q, 0, sizeof(struct gfs2_quota));
663 err = gfs2_internal_read(ip, (char *)&q, &loc, sizeof(q)); 663 err = gfs2_internal_read(ip, (char *)&q, &loc, sizeof(q));
664 if (err < 0) 664 if (err < 0)
665 return err; 665 return err;
666 666
667 err = -EIO; 667 err = -EIO;
668 qp = &q; 668 qp = &q;
669 qp->qu_value = be64_to_cpu(qp->qu_value); 669 qp->qu_value = be64_to_cpu(qp->qu_value);
670 qp->qu_value += change; 670 qp->qu_value += change;
671 qp->qu_value = cpu_to_be64(qp->qu_value); 671 qp->qu_value = cpu_to_be64(qp->qu_value);
672 qd->qd_qb.qb_value = qp->qu_value; 672 qd->qd_qb.qb_value = qp->qu_value;
673 if (fdq) { 673 if (fdq) {
674 if (fdq->d_fieldmask & FS_DQ_BSOFT) { 674 if (fdq->d_fieldmask & FS_DQ_BSOFT) {
675 qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); 675 qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
676 qd->qd_qb.qb_warn = qp->qu_warn; 676 qd->qd_qb.qb_warn = qp->qu_warn;
677 } 677 }
678 if (fdq->d_fieldmask & FS_DQ_BHARD) { 678 if (fdq->d_fieldmask & FS_DQ_BHARD) {
679 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); 679 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
680 qd->qd_qb.qb_limit = qp->qu_limit; 680 qd->qd_qb.qb_limit = qp->qu_limit;
681 } 681 }
682 if (fdq->d_fieldmask & FS_DQ_BCOUNT) { 682 if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
683 qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); 683 qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
684 qd->qd_qb.qb_value = qp->qu_value; 684 qd->qd_qb.qb_value = qp->qu_value;
685 } 685 }
686 } 686 }
687 687
688 /* Write the quota into the quota file on disk */ 688 /* Write the quota into the quota file on disk */
689 ptr = qp; 689 ptr = qp;
690 nbytes = sizeof(struct gfs2_quota); 690 nbytes = sizeof(struct gfs2_quota);
691 get_a_page: 691 get_a_page:
692 page = find_or_create_page(mapping, index, GFP_NOFS); 692 page = find_or_create_page(mapping, index, GFP_NOFS);
693 if (!page) 693 if (!page)
694 return -ENOMEM; 694 return -ENOMEM;
695 695
696 blocksize = inode->i_sb->s_blocksize; 696 blocksize = inode->i_sb->s_blocksize;
697 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 697 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
698 698
699 if (!page_has_buffers(page)) 699 if (!page_has_buffers(page))
700 create_empty_buffers(page, blocksize, 0); 700 create_empty_buffers(page, blocksize, 0);
701 701
702 bh = page_buffers(page); 702 bh = page_buffers(page);
703 pos = blocksize; 703 pos = blocksize;
704 while (offset >= pos) { 704 while (offset >= pos) {
705 bh = bh->b_this_page; 705 bh = bh->b_this_page;
706 iblock++; 706 iblock++;
707 pos += blocksize; 707 pos += blocksize;
708 } 708 }
709 709
710 if (!buffer_mapped(bh)) { 710 if (!buffer_mapped(bh)) {
711 gfs2_block_map(inode, iblock, bh, 1); 711 gfs2_block_map(inode, iblock, bh, 1);
712 if (!buffer_mapped(bh)) 712 if (!buffer_mapped(bh))
713 goto unlock_out; 713 goto unlock_out;
714 /* If it's a newly allocated disk block for quota, zero it */ 714 /* If it's a newly allocated disk block for quota, zero it */
715 if (buffer_new(bh)) 715 if (buffer_new(bh))
716 zero_user(page, pos - blocksize, bh->b_size); 716 zero_user(page, pos - blocksize, bh->b_size);
717 } 717 }
718 718
719 if (PageUptodate(page)) 719 if (PageUptodate(page))
720 set_buffer_uptodate(bh); 720 set_buffer_uptodate(bh);
721 721
722 if (!buffer_uptodate(bh)) { 722 if (!buffer_uptodate(bh)) {
723 ll_rw_block(READ | REQ_META, 1, &bh); 723 ll_rw_block(READ | REQ_META, 1, &bh);
724 wait_on_buffer(bh); 724 wait_on_buffer(bh);
725 if (!buffer_uptodate(bh)) 725 if (!buffer_uptodate(bh))
726 goto unlock_out; 726 goto unlock_out;
727 } 727 }
728 728
729 gfs2_trans_add_bh(ip->i_gl, bh, 0); 729 gfs2_trans_add_bh(ip->i_gl, bh, 0);
730 730
731 kaddr = kmap_atomic(page); 731 kaddr = kmap_atomic(page);
732 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE) 732 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
733 nbytes = PAGE_CACHE_SIZE - offset; 733 nbytes = PAGE_CACHE_SIZE - offset;
734 memcpy(kaddr + offset, ptr, nbytes); 734 memcpy(kaddr + offset, ptr, nbytes);
735 flush_dcache_page(page); 735 flush_dcache_page(page);
736 kunmap_atomic(kaddr); 736 kunmap_atomic(kaddr);
737 unlock_page(page); 737 unlock_page(page);
738 page_cache_release(page); 738 page_cache_release(page);
739 739
740 /* If quota straddles page boundary, we need to update the rest of the 740 /* If quota straddles page boundary, we need to update the rest of the
741 * quota at the beginning of the next page */ 741 * quota at the beginning of the next page */
742 if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) { 742 if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) {
743 ptr = ptr + nbytes; 743 ptr = ptr + nbytes;
744 nbytes = sizeof(struct gfs2_quota) - nbytes; 744 nbytes = sizeof(struct gfs2_quota) - nbytes;
745 offset = 0; 745 offset = 0;
746 index++; 746 index++;
747 goto get_a_page; 747 goto get_a_page;
748 } 748 }
749 749
750 size = loc + sizeof(struct gfs2_quota); 750 size = loc + sizeof(struct gfs2_quota);
751 if (size > inode->i_size) 751 if (size > inode->i_size)
752 i_size_write(inode, size); 752 i_size_write(inode, size);
753 inode->i_mtime = inode->i_atime = CURRENT_TIME; 753 inode->i_mtime = inode->i_atime = CURRENT_TIME;
754 mark_inode_dirty(inode); 754 mark_inode_dirty(inode);
755 return 0; 755 return 0;
756 756
757 unlock_out: 757 unlock_out:
758 unlock_page(page); 758 unlock_page(page);
759 page_cache_release(page); 759 page_cache_release(page);
760 return err; 760 return err;
761 } 761 }
762 762
763 static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) 763 static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
764 { 764 {
765 struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd; 765 struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
766 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); 766 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
767 unsigned int data_blocks, ind_blocks; 767 unsigned int data_blocks, ind_blocks;
768 struct gfs2_holder *ghs, i_gh; 768 struct gfs2_holder *ghs, i_gh;
769 unsigned int qx, x; 769 unsigned int qx, x;
770 struct gfs2_quota_data *qd; 770 struct gfs2_quota_data *qd;
771 unsigned reserved; 771 unsigned reserved;
772 loff_t offset; 772 loff_t offset;
773 unsigned int nalloc = 0, blocks; 773 unsigned int nalloc = 0, blocks;
774 int error; 774 int error;
775 775
776 error = gfs2_rs_alloc(ip); 776 error = gfs2_rs_alloc(ip);
777 if (error) 777 if (error)
778 return error; 778 return error;
779 779
780 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), 780 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
781 &data_blocks, &ind_blocks); 781 &data_blocks, &ind_blocks);
782 782
783 ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_NOFS); 783 ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
784 if (!ghs) 784 if (!ghs)
785 return -ENOMEM; 785 return -ENOMEM;
786 786
787 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); 787 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
788 mutex_lock(&ip->i_inode.i_mutex); 788 mutex_lock(&ip->i_inode.i_mutex);
789 for (qx = 0; qx < num_qd; qx++) { 789 for (qx = 0; qx < num_qd; qx++) {
790 error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, 790 error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
791 GL_NOCACHE, &ghs[qx]); 791 GL_NOCACHE, &ghs[qx]);
792 if (error) 792 if (error)
793 goto out; 793 goto out;
794 } 794 }
795 795
796 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); 796 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
797 if (error) 797 if (error)
798 goto out; 798 goto out;
799 799
800 for (x = 0; x < num_qd; x++) { 800 for (x = 0; x < num_qd; x++) {
801 offset = qd2offset(qda[x]); 801 offset = qd2offset(qda[x]);
802 if (gfs2_write_alloc_required(ip, offset, 802 if (gfs2_write_alloc_required(ip, offset,
803 sizeof(struct gfs2_quota))) 803 sizeof(struct gfs2_quota)))
804 nalloc++; 804 nalloc++;
805 } 805 }
806 806
807 /* 807 /*
808 * 1 blk for unstuffing inode if stuffed. We add this extra 808 * 1 blk for unstuffing inode if stuffed. We add this extra
809 * block to the reservation unconditionally. If the inode 809 * block to the reservation unconditionally. If the inode
810 * doesn't need unstuffing, the block will be released to the 810 * doesn't need unstuffing, the block will be released to the
811 * rgrp since it won't be allocated during the transaction 811 * rgrp since it won't be allocated during the transaction
812 */ 812 */
813 /* +3 in the end for unstuffing block, inode size update block 813 /* +3 in the end for unstuffing block, inode size update block
814 * and another block in case quota straddles page boundary and 814 * and another block in case quota straddles page boundary and
815 * two blocks need to be updated instead of 1 */ 815 * two blocks need to be updated instead of 1 */
816 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; 816 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
817 817
818 reserved = 1 + (nalloc * (data_blocks + ind_blocks)); 818 reserved = 1 + (nalloc * (data_blocks + ind_blocks));
819 error = gfs2_inplace_reserve(ip, reserved); 819 error = gfs2_inplace_reserve(ip, reserved, 0);
820 if (error) 820 if (error)
821 goto out_alloc; 821 goto out_alloc;
822 822
823 if (nalloc) 823 if (nalloc)
824 blocks += gfs2_rg_blocks(ip, reserved) + nalloc * ind_blocks + RES_STATFS; 824 blocks += gfs2_rg_blocks(ip, reserved) + nalloc * ind_blocks + RES_STATFS;
825 825
826 error = gfs2_trans_begin(sdp, blocks, 0); 826 error = gfs2_trans_begin(sdp, blocks, 0);
827 if (error) 827 if (error)
828 goto out_ipres; 828 goto out_ipres;
829 829
830 for (x = 0; x < num_qd; x++) { 830 for (x = 0; x < num_qd; x++) {
831 qd = qda[x]; 831 qd = qda[x];
832 offset = qd2offset(qd); 832 offset = qd2offset(qd);
833 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, qd, NULL); 833 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, qd, NULL);
834 if (error) 834 if (error)
835 goto out_end_trans; 835 goto out_end_trans;
836 836
837 do_qc(qd, -qd->qd_change_sync); 837 do_qc(qd, -qd->qd_change_sync);
838 set_bit(QDF_REFRESH, &qd->qd_flags); 838 set_bit(QDF_REFRESH, &qd->qd_flags);
839 } 839 }
840 840
841 error = 0; 841 error = 0;
842 842
843 out_end_trans: 843 out_end_trans:
844 gfs2_trans_end(sdp); 844 gfs2_trans_end(sdp);
845 out_ipres: 845 out_ipres:
846 gfs2_inplace_release(ip); 846 gfs2_inplace_release(ip);
847 out_alloc: 847 out_alloc:
848 gfs2_glock_dq_uninit(&i_gh); 848 gfs2_glock_dq_uninit(&i_gh);
849 out: 849 out:
850 while (qx--) 850 while (qx--)
851 gfs2_glock_dq_uninit(&ghs[qx]); 851 gfs2_glock_dq_uninit(&ghs[qx]);
852 mutex_unlock(&ip->i_inode.i_mutex); 852 mutex_unlock(&ip->i_inode.i_mutex);
853 kfree(ghs); 853 kfree(ghs);
854 gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl); 854 gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
855 return error; 855 return error;
856 } 856 }
857 857
858 static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd) 858 static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
859 { 859 {
860 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); 860 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
861 struct gfs2_quota q; 861 struct gfs2_quota q;
862 struct gfs2_quota_lvb *qlvb; 862 struct gfs2_quota_lvb *qlvb;
863 loff_t pos; 863 loff_t pos;
864 int error; 864 int error;
865 865
866 memset(&q, 0, sizeof(struct gfs2_quota)); 866 memset(&q, 0, sizeof(struct gfs2_quota));
867 pos = qd2offset(qd); 867 pos = qd2offset(qd);
868 error = gfs2_internal_read(ip, (char *)&q, &pos, sizeof(q)); 868 error = gfs2_internal_read(ip, (char *)&q, &pos, sizeof(q));
869 if (error < 0) 869 if (error < 0)
870 return error; 870 return error;
871 871
872 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 872 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
873 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); 873 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
874 qlvb->__pad = 0; 874 qlvb->__pad = 0;
875 qlvb->qb_limit = q.qu_limit; 875 qlvb->qb_limit = q.qu_limit;
876 qlvb->qb_warn = q.qu_warn; 876 qlvb->qb_warn = q.qu_warn;
877 qlvb->qb_value = q.qu_value; 877 qlvb->qb_value = q.qu_value;
878 qd->qd_qb = *qlvb; 878 qd->qd_qb = *qlvb;
879 879
880 return 0; 880 return 0;
881 } 881 }
882 882
883 static int do_glock(struct gfs2_quota_data *qd, int force_refresh, 883 static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
884 struct gfs2_holder *q_gh) 884 struct gfs2_holder *q_gh)
885 { 885 {
886 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 886 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
887 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); 887 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
888 struct gfs2_holder i_gh; 888 struct gfs2_holder i_gh;
889 int error; 889 int error;
890 890
891 restart: 891 restart:
892 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); 892 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
893 if (error) 893 if (error)
894 return error; 894 return error;
895 895
896 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 896 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
897 897
898 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { 898 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
899 gfs2_glock_dq_uninit(q_gh); 899 gfs2_glock_dq_uninit(q_gh);
900 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 900 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE,
901 GL_NOCACHE, q_gh); 901 GL_NOCACHE, q_gh);
902 if (error) 902 if (error)
903 return error; 903 return error;
904 904
905 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); 905 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
906 if (error) 906 if (error)
907 goto fail; 907 goto fail;
908 908
909 error = update_qd(sdp, qd); 909 error = update_qd(sdp, qd);
910 if (error) 910 if (error)
911 goto fail_gunlock; 911 goto fail_gunlock;
912 912
913 gfs2_glock_dq_uninit(&i_gh); 913 gfs2_glock_dq_uninit(&i_gh);
914 gfs2_glock_dq_uninit(q_gh); 914 gfs2_glock_dq_uninit(q_gh);
915 force_refresh = 0; 915 force_refresh = 0;
916 goto restart; 916 goto restart;
917 } 917 }
918 918
919 return 0; 919 return 0;
920 920
921 fail_gunlock: 921 fail_gunlock:
922 gfs2_glock_dq_uninit(&i_gh); 922 gfs2_glock_dq_uninit(&i_gh);
923 fail: 923 fail:
924 gfs2_glock_dq_uninit(q_gh); 924 gfs2_glock_dq_uninit(q_gh);
925 return error; 925 return error;
926 } 926 }
927 927
928 int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) 928 int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
929 { 929 {
930 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 930 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
931 struct gfs2_quota_data *qd; 931 struct gfs2_quota_data *qd;
932 unsigned int x; 932 unsigned int x;
933 int error = 0; 933 int error = 0;
934 934
935 error = gfs2_quota_hold(ip, uid, gid); 935 error = gfs2_quota_hold(ip, uid, gid);
936 if (error) 936 if (error)
937 return error; 937 return error;
938 938
939 if (capable(CAP_SYS_RESOURCE) || 939 if (capable(CAP_SYS_RESOURCE) ||
940 sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 940 sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
941 return 0; 941 return 0;
942 942
943 sort(ip->i_res->rs_qa_qd, ip->i_res->rs_qa_qd_num, 943 sort(ip->i_res->rs_qa_qd, ip->i_res->rs_qa_qd_num,
944 sizeof(struct gfs2_quota_data *), sort_qd, NULL); 944 sizeof(struct gfs2_quota_data *), sort_qd, NULL);
945 945
946 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { 946 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
947 int force = NO_FORCE; 947 int force = NO_FORCE;
948 qd = ip->i_res->rs_qa_qd[x]; 948 qd = ip->i_res->rs_qa_qd[x];
949 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) 949 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
950 force = FORCE; 950 force = FORCE;
951 error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]); 951 error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
952 if (error) 952 if (error)
953 break; 953 break;
954 } 954 }
955 955
956 if (!error) 956 if (!error)
957 set_bit(GIF_QD_LOCKED, &ip->i_flags); 957 set_bit(GIF_QD_LOCKED, &ip->i_flags);
958 else { 958 else {
959 while (x--) 959 while (x--)
960 gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]); 960 gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
961 gfs2_quota_unhold(ip); 961 gfs2_quota_unhold(ip);
962 } 962 }
963 963
964 return error; 964 return error;
965 } 965 }
966 966
967 static int need_sync(struct gfs2_quota_data *qd) 967 static int need_sync(struct gfs2_quota_data *qd)
968 { 968 {
969 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 969 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
970 struct gfs2_tune *gt = &sdp->sd_tune; 970 struct gfs2_tune *gt = &sdp->sd_tune;
971 s64 value; 971 s64 value;
972 unsigned int num, den; 972 unsigned int num, den;
973 int do_sync = 1; 973 int do_sync = 1;
974 974
975 if (!qd->qd_qb.qb_limit) 975 if (!qd->qd_qb.qb_limit)
976 return 0; 976 return 0;
977 977
978 spin_lock(&qd_lru_lock); 978 spin_lock(&qd_lru_lock);
979 value = qd->qd_change; 979 value = qd->qd_change;
980 spin_unlock(&qd_lru_lock); 980 spin_unlock(&qd_lru_lock);
981 981
982 spin_lock(&gt->gt_spin); 982 spin_lock(&gt->gt_spin);
983 num = gt->gt_quota_scale_num; 983 num = gt->gt_quota_scale_num;
984 den = gt->gt_quota_scale_den; 984 den = gt->gt_quota_scale_den;
985 spin_unlock(&gt->gt_spin); 985 spin_unlock(&gt->gt_spin);
986 986
987 if (value < 0) 987 if (value < 0)
988 do_sync = 0; 988 do_sync = 0;
989 else if ((s64)be64_to_cpu(qd->qd_qb.qb_value) >= 989 else if ((s64)be64_to_cpu(qd->qd_qb.qb_value) >=
990 (s64)be64_to_cpu(qd->qd_qb.qb_limit)) 990 (s64)be64_to_cpu(qd->qd_qb.qb_limit))
991 do_sync = 0; 991 do_sync = 0;
992 else { 992 else {
993 value *= gfs2_jindex_size(sdp) * num; 993 value *= gfs2_jindex_size(sdp) * num;
994 value = div_s64(value, den); 994 value = div_s64(value, den);
995 value += (s64)be64_to_cpu(qd->qd_qb.qb_value); 995 value += (s64)be64_to_cpu(qd->qd_qb.qb_value);
996 if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit)) 996 if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit))
997 do_sync = 0; 997 do_sync = 0;
998 } 998 }
999 999
1000 return do_sync; 1000 return do_sync;
1001 } 1001 }
1002 1002
1003 void gfs2_quota_unlock(struct gfs2_inode *ip) 1003 void gfs2_quota_unlock(struct gfs2_inode *ip)
1004 { 1004 {
1005 struct gfs2_quota_data *qda[4]; 1005 struct gfs2_quota_data *qda[4];
1006 unsigned int count = 0; 1006 unsigned int count = 0;
1007 unsigned int x; 1007 unsigned int x;
1008 1008
1009 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) 1009 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
1010 goto out; 1010 goto out;
1011 1011
1012 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { 1012 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
1013 struct gfs2_quota_data *qd; 1013 struct gfs2_quota_data *qd;
1014 int sync; 1014 int sync;
1015 1015
1016 qd = ip->i_res->rs_qa_qd[x]; 1016 qd = ip->i_res->rs_qa_qd[x];
1017 sync = need_sync(qd); 1017 sync = need_sync(qd);
1018 1018
1019 gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]); 1019 gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
1020 1020
1021 if (sync && qd_trylock(qd)) 1021 if (sync && qd_trylock(qd))
1022 qda[count++] = qd; 1022 qda[count++] = qd;
1023 } 1023 }
1024 1024
1025 if (count) { 1025 if (count) {
1026 do_sync(count, qda); 1026 do_sync(count, qda);
1027 for (x = 0; x < count; x++) 1027 for (x = 0; x < count; x++)
1028 qd_unlock(qda[x]); 1028 qd_unlock(qda[x]);
1029 } 1029 }
1030 1030
1031 out: 1031 out:
1032 gfs2_quota_unhold(ip); 1032 gfs2_quota_unhold(ip);
1033 } 1033 }
1034 1034
1035 #define MAX_LINE 256 1035 #define MAX_LINE 256
1036 1036
1037 static int print_message(struct gfs2_quota_data *qd, char *type) 1037 static int print_message(struct gfs2_quota_data *qd, char *type)
1038 { 1038 {
1039 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 1039 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
1040 1040
1041 printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n", 1041 printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n",
1042 sdp->sd_fsname, type, 1042 sdp->sd_fsname, type,
1043 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group", 1043 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
1044 qd->qd_id); 1044 qd->qd_id);
1045 1045
1046 return 0; 1046 return 0;
1047 } 1047 }
1048 1048
1049 int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) 1049 int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1050 { 1050 {
1051 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1051 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1052 struct gfs2_quota_data *qd; 1052 struct gfs2_quota_data *qd;
1053 s64 value; 1053 s64 value;
1054 unsigned int x; 1054 unsigned int x;
1055 int error = 0; 1055 int error = 0;
1056 1056
1057 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) 1057 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
1058 return 0; 1058 return 0;
1059 1059
1060 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 1060 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
1061 return 0; 1061 return 0;
1062 1062
1063 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { 1063 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
1064 qd = ip->i_res->rs_qa_qd[x]; 1064 qd = ip->i_res->rs_qa_qd[x];
1065 1065
1066 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || 1066 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
1067 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags)))) 1067 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
1068 continue; 1068 continue;
1069 1069
1070 value = (s64)be64_to_cpu(qd->qd_qb.qb_value); 1070 value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
1071 spin_lock(&qd_lru_lock); 1071 spin_lock(&qd_lru_lock);
1072 value += qd->qd_change; 1072 value += qd->qd_change;
1073 spin_unlock(&qd_lru_lock); 1073 spin_unlock(&qd_lru_lock);
1074 1074
1075 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { 1075 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
1076 print_message(qd, "exceeded"); 1076 print_message(qd, "exceeded");
1077 quota_send_warning(make_kqid(&init_user_ns, 1077 quota_send_warning(make_kqid(&init_user_ns,
1078 test_bit(QDF_USER, &qd->qd_flags) ? 1078 test_bit(QDF_USER, &qd->qd_flags) ?
1079 USRQUOTA : GRPQUOTA, 1079 USRQUOTA : GRPQUOTA,
1080 qd->qd_id), 1080 qd->qd_id),
1081 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); 1081 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN);
1082 1082
1083 error = -EDQUOT; 1083 error = -EDQUOT;
1084 break; 1084 break;
1085 } else if (be64_to_cpu(qd->qd_qb.qb_warn) && 1085 } else if (be64_to_cpu(qd->qd_qb.qb_warn) &&
1086 (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value && 1086 (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value &&
1087 time_after_eq(jiffies, qd->qd_last_warn + 1087 time_after_eq(jiffies, qd->qd_last_warn +
1088 gfs2_tune_get(sdp, 1088 gfs2_tune_get(sdp,
1089 gt_quota_warn_period) * HZ)) { 1089 gt_quota_warn_period) * HZ)) {
1090 quota_send_warning(make_kqid(&init_user_ns, 1090 quota_send_warning(make_kqid(&init_user_ns,
1091 test_bit(QDF_USER, &qd->qd_flags) ? 1091 test_bit(QDF_USER, &qd->qd_flags) ?
1092 USRQUOTA : GRPQUOTA, 1092 USRQUOTA : GRPQUOTA,
1093 qd->qd_id), 1093 qd->qd_id),
1094 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); 1094 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
1095 error = print_message(qd, "warning"); 1095 error = print_message(qd, "warning");
1096 qd->qd_last_warn = jiffies; 1096 qd->qd_last_warn = jiffies;
1097 } 1097 }
1098 } 1098 }
1099 1099
1100 return error; 1100 return error;
1101 } 1101 }
1102 1102
1103 void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 1103 void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1104 u32 uid, u32 gid) 1104 u32 uid, u32 gid)
1105 { 1105 {
1106 struct gfs2_quota_data *qd; 1106 struct gfs2_quota_data *qd;
1107 unsigned int x; 1107 unsigned int x;
1108 1108
1109 if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change)) 1109 if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change))
1110 return; 1110 return;
1111 if (ip->i_diskflags & GFS2_DIF_SYSTEM) 1111 if (ip->i_diskflags & GFS2_DIF_SYSTEM)
1112 return; 1112 return;
1113 1113
1114 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { 1114 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
1115 qd = ip->i_res->rs_qa_qd[x]; 1115 qd = ip->i_res->rs_qa_qd[x];
1116 1116
1117 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || 1117 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
1118 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { 1118 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
1119 do_qc(qd, change); 1119 do_qc(qd, change);
1120 } 1120 }
1121 } 1121 }
1122 } 1122 }
1123 1123
1124 int gfs2_quota_sync(struct super_block *sb, int type) 1124 int gfs2_quota_sync(struct super_block *sb, int type)
1125 { 1125 {
1126 struct gfs2_sbd *sdp = sb->s_fs_info; 1126 struct gfs2_sbd *sdp = sb->s_fs_info;
1127 struct gfs2_quota_data **qda; 1127 struct gfs2_quota_data **qda;
1128 unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync); 1128 unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync);
1129 unsigned int num_qd; 1129 unsigned int num_qd;
1130 unsigned int x; 1130 unsigned int x;
1131 int error = 0; 1131 int error = 0;
1132 1132
1133 sdp->sd_quota_sync_gen++; 1133 sdp->sd_quota_sync_gen++;
1134 1134
1135 qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL); 1135 qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
1136 if (!qda) 1136 if (!qda)
1137 return -ENOMEM; 1137 return -ENOMEM;
1138 1138
1139 do { 1139 do {
1140 num_qd = 0; 1140 num_qd = 0;
1141 1141
1142 for (;;) { 1142 for (;;) {
1143 error = qd_fish(sdp, qda + num_qd); 1143 error = qd_fish(sdp, qda + num_qd);
1144 if (error || !qda[num_qd]) 1144 if (error || !qda[num_qd])
1145 break; 1145 break;
1146 if (++num_qd == max_qd) 1146 if (++num_qd == max_qd)
1147 break; 1147 break;
1148 } 1148 }
1149 1149
1150 if (num_qd) { 1150 if (num_qd) {
1151 if (!error) 1151 if (!error)
1152 error = do_sync(num_qd, qda); 1152 error = do_sync(num_qd, qda);
1153 if (!error) 1153 if (!error)
1154 for (x = 0; x < num_qd; x++) 1154 for (x = 0; x < num_qd; x++)
1155 qda[x]->qd_sync_gen = 1155 qda[x]->qd_sync_gen =
1156 sdp->sd_quota_sync_gen; 1156 sdp->sd_quota_sync_gen;
1157 1157
1158 for (x = 0; x < num_qd; x++) 1158 for (x = 0; x < num_qd; x++)
1159 qd_unlock(qda[x]); 1159 qd_unlock(qda[x]);
1160 } 1160 }
1161 } while (!error && num_qd == max_qd); 1161 } while (!error && num_qd == max_qd);
1162 1162
1163 kfree(qda); 1163 kfree(qda);
1164 1164
1165 return error; 1165 return error;
1166 } 1166 }
1167 1167
1168 static int gfs2_quota_sync_timeo(struct super_block *sb, int type) 1168 static int gfs2_quota_sync_timeo(struct super_block *sb, int type)
1169 { 1169 {
1170 return gfs2_quota_sync(sb, type); 1170 return gfs2_quota_sync(sb, type);
1171 } 1171 }
1172 1172
1173 int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) 1173 int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
1174 { 1174 {
1175 struct gfs2_quota_data *qd; 1175 struct gfs2_quota_data *qd;
1176 struct gfs2_holder q_gh; 1176 struct gfs2_holder q_gh;
1177 int error; 1177 int error;
1178 1178
1179 error = qd_get(sdp, user, id, &qd); 1179 error = qd_get(sdp, user, id, &qd);
1180 if (error) 1180 if (error)
1181 return error; 1181 return error;
1182 1182
1183 error = do_glock(qd, FORCE, &q_gh); 1183 error = do_glock(qd, FORCE, &q_gh);
1184 if (!error) 1184 if (!error)
1185 gfs2_glock_dq_uninit(&q_gh); 1185 gfs2_glock_dq_uninit(&q_gh);
1186 1186
1187 qd_put(qd); 1187 qd_put(qd);
1188 return error; 1188 return error;
1189 } 1189 }
1190 1190
1191 static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf) 1191 static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
1192 { 1192 {
1193 const struct gfs2_quota_change *str = buf; 1193 const struct gfs2_quota_change *str = buf;
1194 1194
1195 qc->qc_change = be64_to_cpu(str->qc_change); 1195 qc->qc_change = be64_to_cpu(str->qc_change);
1196 qc->qc_flags = be32_to_cpu(str->qc_flags); 1196 qc->qc_flags = be32_to_cpu(str->qc_flags);
1197 qc->qc_id = be32_to_cpu(str->qc_id); 1197 qc->qc_id = be32_to_cpu(str->qc_id);
1198 } 1198 }
1199 1199
1200 int gfs2_quota_init(struct gfs2_sbd *sdp) 1200 int gfs2_quota_init(struct gfs2_sbd *sdp)
1201 { 1201 {
1202 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); 1202 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
1203 u64 size = i_size_read(sdp->sd_qc_inode); 1203 u64 size = i_size_read(sdp->sd_qc_inode);
1204 unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift; 1204 unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift;
1205 unsigned int x, slot = 0; 1205 unsigned int x, slot = 0;
1206 unsigned int found = 0; 1206 unsigned int found = 0;
1207 u64 dblock; 1207 u64 dblock;
1208 u32 extlen = 0; 1208 u32 extlen = 0;
1209 int error; 1209 int error;
1210 1210
1211 if (gfs2_check_internal_file_size(sdp->sd_qc_inode, 1, 64 << 20)) 1211 if (gfs2_check_internal_file_size(sdp->sd_qc_inode, 1, 64 << 20))
1212 return -EIO; 1212 return -EIO;
1213 1213
1214 sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; 1214 sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block;
1215 sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); 1215 sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE);
1216 1216
1217 error = -ENOMEM; 1217 error = -ENOMEM;
1218 1218
1219 sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks, 1219 sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks,
1220 sizeof(unsigned char *), GFP_NOFS); 1220 sizeof(unsigned char *), GFP_NOFS);
1221 if (!sdp->sd_quota_bitmap) 1221 if (!sdp->sd_quota_bitmap)
1222 return error; 1222 return error;
1223 1223
1224 for (x = 0; x < sdp->sd_quota_chunks; x++) { 1224 for (x = 0; x < sdp->sd_quota_chunks; x++) {
1225 sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_NOFS); 1225 sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_NOFS);
1226 if (!sdp->sd_quota_bitmap[x]) 1226 if (!sdp->sd_quota_bitmap[x])
1227 goto fail; 1227 goto fail;
1228 } 1228 }
1229 1229
1230 for (x = 0; x < blocks; x++) { 1230 for (x = 0; x < blocks; x++) {
1231 struct buffer_head *bh; 1231 struct buffer_head *bh;
1232 unsigned int y; 1232 unsigned int y;
1233 1233
1234 if (!extlen) { 1234 if (!extlen) {
1235 int new = 0; 1235 int new = 0;
1236 error = gfs2_extent_map(&ip->i_inode, x, &new, &dblock, &extlen); 1236 error = gfs2_extent_map(&ip->i_inode, x, &new, &dblock, &extlen);
1237 if (error) 1237 if (error)
1238 goto fail; 1238 goto fail;
1239 } 1239 }
1240 error = -EIO; 1240 error = -EIO;
1241 bh = gfs2_meta_ra(ip->i_gl, dblock, extlen); 1241 bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
1242 if (!bh) 1242 if (!bh)
1243 goto fail; 1243 goto fail;
1244 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) { 1244 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) {
1245 brelse(bh); 1245 brelse(bh);
1246 goto fail; 1246 goto fail;
1247 } 1247 }
1248 1248
1249 for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots; 1249 for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots;
1250 y++, slot++) { 1250 y++, slot++) {
1251 struct gfs2_quota_change_host qc; 1251 struct gfs2_quota_change_host qc;
1252 struct gfs2_quota_data *qd; 1252 struct gfs2_quota_data *qd;
1253 1253
1254 gfs2_quota_change_in(&qc, bh->b_data + 1254 gfs2_quota_change_in(&qc, bh->b_data +
1255 sizeof(struct gfs2_meta_header) + 1255 sizeof(struct gfs2_meta_header) +
1256 y * sizeof(struct gfs2_quota_change)); 1256 y * sizeof(struct gfs2_quota_change));
1257 if (!qc.qc_change) 1257 if (!qc.qc_change)
1258 continue; 1258 continue;
1259 1259
1260 error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER), 1260 error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER),
1261 qc.qc_id, &qd); 1261 qc.qc_id, &qd);
1262 if (error) { 1262 if (error) {
1263 brelse(bh); 1263 brelse(bh);
1264 goto fail; 1264 goto fail;
1265 } 1265 }
1266 1266
1267 set_bit(QDF_CHANGE, &qd->qd_flags); 1267 set_bit(QDF_CHANGE, &qd->qd_flags);
1268 qd->qd_change = qc.qc_change; 1268 qd->qd_change = qc.qc_change;
1269 qd->qd_slot = slot; 1269 qd->qd_slot = slot;
1270 qd->qd_slot_count = 1; 1270 qd->qd_slot_count = 1;
1271 1271
1272 spin_lock(&qd_lru_lock); 1272 spin_lock(&qd_lru_lock);
1273 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1); 1273 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1);
1274 list_add(&qd->qd_list, &sdp->sd_quota_list); 1274 list_add(&qd->qd_list, &sdp->sd_quota_list);
1275 atomic_inc(&sdp->sd_quota_count); 1275 atomic_inc(&sdp->sd_quota_count);
1276 spin_unlock(&qd_lru_lock); 1276 spin_unlock(&qd_lru_lock);
1277 1277
1278 found++; 1278 found++;
1279 } 1279 }
1280 1280
1281 brelse(bh); 1281 brelse(bh);
1282 dblock++; 1282 dblock++;
1283 extlen--; 1283 extlen--;
1284 } 1284 }
1285 1285
1286 if (found) 1286 if (found)
1287 fs_info(sdp, "found %u quota changes\n", found); 1287 fs_info(sdp, "found %u quota changes\n", found);
1288 1288
1289 return 0; 1289 return 0;
1290 1290
1291 fail: 1291 fail:
1292 gfs2_quota_cleanup(sdp); 1292 gfs2_quota_cleanup(sdp);
1293 return error; 1293 return error;
1294 } 1294 }
1295 1295
1296 void gfs2_quota_cleanup(struct gfs2_sbd *sdp) 1296 void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
1297 { 1297 {
1298 struct list_head *head = &sdp->sd_quota_list; 1298 struct list_head *head = &sdp->sd_quota_list;
1299 struct gfs2_quota_data *qd; 1299 struct gfs2_quota_data *qd;
1300 unsigned int x; 1300 unsigned int x;
1301 1301
1302 spin_lock(&qd_lru_lock); 1302 spin_lock(&qd_lru_lock);
1303 while (!list_empty(head)) { 1303 while (!list_empty(head)) {
1304 qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); 1304 qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
1305 1305
1306 if (atomic_read(&qd->qd_count) > 1 || 1306 if (atomic_read(&qd->qd_count) > 1 ||
1307 (atomic_read(&qd->qd_count) && 1307 (atomic_read(&qd->qd_count) &&
1308 !test_bit(QDF_CHANGE, &qd->qd_flags))) { 1308 !test_bit(QDF_CHANGE, &qd->qd_flags))) {
1309 list_move(&qd->qd_list, head); 1309 list_move(&qd->qd_list, head);
1310 spin_unlock(&qd_lru_lock); 1310 spin_unlock(&qd_lru_lock);
1311 schedule(); 1311 schedule();
1312 spin_lock(&qd_lru_lock); 1312 spin_lock(&qd_lru_lock);
1313 continue; 1313 continue;
1314 } 1314 }
1315 1315
1316 list_del(&qd->qd_list); 1316 list_del(&qd->qd_list);
1317 /* Also remove if this qd exists in the reclaim list */ 1317 /* Also remove if this qd exists in the reclaim list */
1318 if (!list_empty(&qd->qd_reclaim)) { 1318 if (!list_empty(&qd->qd_reclaim)) {
1319 list_del_init(&qd->qd_reclaim); 1319 list_del_init(&qd->qd_reclaim);
1320 atomic_dec(&qd_lru_count); 1320 atomic_dec(&qd_lru_count);
1321 } 1321 }
1322 atomic_dec(&sdp->sd_quota_count); 1322 atomic_dec(&sdp->sd_quota_count);
1323 spin_unlock(&qd_lru_lock); 1323 spin_unlock(&qd_lru_lock);
1324 1324
1325 if (!atomic_read(&qd->qd_count)) { 1325 if (!atomic_read(&qd->qd_count)) {
1326 gfs2_assert_warn(sdp, !qd->qd_change); 1326 gfs2_assert_warn(sdp, !qd->qd_change);
1327 gfs2_assert_warn(sdp, !qd->qd_slot_count); 1327 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1328 } else 1328 } else
1329 gfs2_assert_warn(sdp, qd->qd_slot_count == 1); 1329 gfs2_assert_warn(sdp, qd->qd_slot_count == 1);
1330 gfs2_assert_warn(sdp, !qd->qd_bh_count); 1330 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1331 1331
1332 gfs2_glock_put(qd->qd_gl); 1332 gfs2_glock_put(qd->qd_gl);
1333 kmem_cache_free(gfs2_quotad_cachep, qd); 1333 kmem_cache_free(gfs2_quotad_cachep, qd);
1334 1334
1335 spin_lock(&qd_lru_lock); 1335 spin_lock(&qd_lru_lock);
1336 } 1336 }
1337 spin_unlock(&qd_lru_lock); 1337 spin_unlock(&qd_lru_lock);
1338 1338
1339 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); 1339 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
1340 1340
1341 if (sdp->sd_quota_bitmap) { 1341 if (sdp->sd_quota_bitmap) {
1342 for (x = 0; x < sdp->sd_quota_chunks; x++) 1342 for (x = 0; x < sdp->sd_quota_chunks; x++)
1343 kfree(sdp->sd_quota_bitmap[x]); 1343 kfree(sdp->sd_quota_bitmap[x]);
1344 kfree(sdp->sd_quota_bitmap); 1344 kfree(sdp->sd_quota_bitmap);
1345 } 1345 }
1346 } 1346 }
1347 1347
1348 static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) 1348 static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
1349 { 1349 {
1350 if (error == 0 || error == -EROFS) 1350 if (error == 0 || error == -EROFS)
1351 return; 1351 return;
1352 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) 1352 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
1353 fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); 1353 fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
1354 } 1354 }
1355 1355
1356 static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg, 1356 static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg,
1357 int (*fxn)(struct super_block *sb, int type), 1357 int (*fxn)(struct super_block *sb, int type),
1358 unsigned long t, unsigned long *timeo, 1358 unsigned long t, unsigned long *timeo,
1359 unsigned int *new_timeo) 1359 unsigned int *new_timeo)
1360 { 1360 {
1361 if (t >= *timeo) { 1361 if (t >= *timeo) {
1362 int error = fxn(sdp->sd_vfs, 0); 1362 int error = fxn(sdp->sd_vfs, 0);
1363 quotad_error(sdp, msg, error); 1363 quotad_error(sdp, msg, error);
1364 *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ; 1364 *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ;
1365 } else { 1365 } else {
1366 *timeo -= t; 1366 *timeo -= t;
1367 } 1367 }
1368 } 1368 }
1369 1369
1370 static void quotad_check_trunc_list(struct gfs2_sbd *sdp) 1370 static void quotad_check_trunc_list(struct gfs2_sbd *sdp)
1371 { 1371 {
1372 struct gfs2_inode *ip; 1372 struct gfs2_inode *ip;
1373 1373
1374 while(1) { 1374 while(1) {
1375 ip = NULL; 1375 ip = NULL;
1376 spin_lock(&sdp->sd_trunc_lock); 1376 spin_lock(&sdp->sd_trunc_lock);
1377 if (!list_empty(&sdp->sd_trunc_list)) { 1377 if (!list_empty(&sdp->sd_trunc_list)) {
1378 ip = list_entry(sdp->sd_trunc_list.next, 1378 ip = list_entry(sdp->sd_trunc_list.next,
1379 struct gfs2_inode, i_trunc_list); 1379 struct gfs2_inode, i_trunc_list);
1380 list_del_init(&ip->i_trunc_list); 1380 list_del_init(&ip->i_trunc_list);
1381 } 1381 }
1382 spin_unlock(&sdp->sd_trunc_lock); 1382 spin_unlock(&sdp->sd_trunc_lock);
1383 if (ip == NULL) 1383 if (ip == NULL)
1384 return; 1384 return;
1385 gfs2_glock_finish_truncate(ip); 1385 gfs2_glock_finish_truncate(ip);
1386 } 1386 }
1387 } 1387 }
1388 1388
1389 void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) { 1389 void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) {
1390 if (!sdp->sd_statfs_force_sync) { 1390 if (!sdp->sd_statfs_force_sync) {
1391 sdp->sd_statfs_force_sync = 1; 1391 sdp->sd_statfs_force_sync = 1;
1392 wake_up(&sdp->sd_quota_wait); 1392 wake_up(&sdp->sd_quota_wait);
1393 } 1393 }
1394 } 1394 }
1395 1395
1396 1396
1397 /** 1397 /**
1398 * gfs2_quotad - Write cached quota changes into the quota file 1398 * gfs2_quotad - Write cached quota changes into the quota file
1399 * @sdp: Pointer to GFS2 superblock 1399 * @sdp: Pointer to GFS2 superblock
1400 * 1400 *
1401 */ 1401 */
1402 1402
1403 int gfs2_quotad(void *data) 1403 int gfs2_quotad(void *data)
1404 { 1404 {
1405 struct gfs2_sbd *sdp = data; 1405 struct gfs2_sbd *sdp = data;
1406 struct gfs2_tune *tune = &sdp->sd_tune; 1406 struct gfs2_tune *tune = &sdp->sd_tune;
1407 unsigned long statfs_timeo = 0; 1407 unsigned long statfs_timeo = 0;
1408 unsigned long quotad_timeo = 0; 1408 unsigned long quotad_timeo = 0;
1409 unsigned long t = 0; 1409 unsigned long t = 0;
1410 DEFINE_WAIT(wait); 1410 DEFINE_WAIT(wait);
1411 int empty; 1411 int empty;
1412 1412
1413 while (!kthread_should_stop()) { 1413 while (!kthread_should_stop()) {
1414 1414
1415 /* Update the master statfs file */ 1415 /* Update the master statfs file */
1416 if (sdp->sd_statfs_force_sync) { 1416 if (sdp->sd_statfs_force_sync) {
1417 int error = gfs2_statfs_sync(sdp->sd_vfs, 0); 1417 int error = gfs2_statfs_sync(sdp->sd_vfs, 0);
1418 quotad_error(sdp, "statfs", error); 1418 quotad_error(sdp, "statfs", error);
1419 statfs_timeo = gfs2_tune_get(sdp, gt_statfs_quantum) * HZ; 1419 statfs_timeo = gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
1420 } 1420 }
1421 else 1421 else
1422 quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t, 1422 quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t,
1423 &statfs_timeo, 1423 &statfs_timeo,
1424 &tune->gt_statfs_quantum); 1424 &tune->gt_statfs_quantum);
1425 1425
1426 /* Update quota file */ 1426 /* Update quota file */
1427 quotad_check_timeo(sdp, "sync", gfs2_quota_sync_timeo, t, 1427 quotad_check_timeo(sdp, "sync", gfs2_quota_sync_timeo, t,
1428 &quotad_timeo, &tune->gt_quota_quantum); 1428 &quotad_timeo, &tune->gt_quota_quantum);
1429 1429
1430 /* Check for & recover partially truncated inodes */ 1430 /* Check for & recover partially truncated inodes */
1431 quotad_check_trunc_list(sdp); 1431 quotad_check_trunc_list(sdp);
1432 1432
1433 try_to_freeze(); 1433 try_to_freeze();
1434 1434
1435 t = min(quotad_timeo, statfs_timeo); 1435 t = min(quotad_timeo, statfs_timeo);
1436 1436
1437 prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE); 1437 prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
1438 spin_lock(&sdp->sd_trunc_lock); 1438 spin_lock(&sdp->sd_trunc_lock);
1439 empty = list_empty(&sdp->sd_trunc_list); 1439 empty = list_empty(&sdp->sd_trunc_list);
1440 spin_unlock(&sdp->sd_trunc_lock); 1440 spin_unlock(&sdp->sd_trunc_lock);
1441 if (empty && !sdp->sd_statfs_force_sync) 1441 if (empty && !sdp->sd_statfs_force_sync)
1442 t -= schedule_timeout(t); 1442 t -= schedule_timeout(t);
1443 else 1443 else
1444 t = 0; 1444 t = 0;
1445 finish_wait(&sdp->sd_quota_wait, &wait); 1445 finish_wait(&sdp->sd_quota_wait, &wait);
1446 } 1446 }
1447 1447
1448 return 0; 1448 return 0;
1449 } 1449 }
1450 1450
1451 static int gfs2_quota_get_xstate(struct super_block *sb, 1451 static int gfs2_quota_get_xstate(struct super_block *sb,
1452 struct fs_quota_stat *fqs) 1452 struct fs_quota_stat *fqs)
1453 { 1453 {
1454 struct gfs2_sbd *sdp = sb->s_fs_info; 1454 struct gfs2_sbd *sdp = sb->s_fs_info;
1455 1455
1456 memset(fqs, 0, sizeof(struct fs_quota_stat)); 1456 memset(fqs, 0, sizeof(struct fs_quota_stat));
1457 fqs->qs_version = FS_QSTAT_VERSION; 1457 fqs->qs_version = FS_QSTAT_VERSION;
1458 1458
1459 switch (sdp->sd_args.ar_quota) { 1459 switch (sdp->sd_args.ar_quota) {
1460 case GFS2_QUOTA_ON: 1460 case GFS2_QUOTA_ON:
1461 fqs->qs_flags |= (FS_QUOTA_UDQ_ENFD | FS_QUOTA_GDQ_ENFD); 1461 fqs->qs_flags |= (FS_QUOTA_UDQ_ENFD | FS_QUOTA_GDQ_ENFD);
1462 /*FALLTHRU*/ 1462 /*FALLTHRU*/
1463 case GFS2_QUOTA_ACCOUNT: 1463 case GFS2_QUOTA_ACCOUNT:
1464 fqs->qs_flags |= (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT); 1464 fqs->qs_flags |= (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT);
1465 break; 1465 break;
1466 case GFS2_QUOTA_OFF: 1466 case GFS2_QUOTA_OFF:
1467 break; 1467 break;
1468 } 1468 }
1469 1469
1470 if (sdp->sd_quota_inode) { 1470 if (sdp->sd_quota_inode) {
1471 fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr; 1471 fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr;
1472 fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks; 1472 fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks;
1473 } 1473 }
1474 fqs->qs_uquota.qfs_nextents = 1; /* unsupported */ 1474 fqs->qs_uquota.qfs_nextents = 1; /* unsupported */
1475 fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */ 1475 fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */
1476 fqs->qs_incoredqs = atomic_read(&qd_lru_count); 1476 fqs->qs_incoredqs = atomic_read(&qd_lru_count);
1477 return 0; 1477 return 0;
1478 } 1478 }
1479 1479
1480 static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, 1480 static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid,
1481 struct fs_disk_quota *fdq) 1481 struct fs_disk_quota *fdq)
1482 { 1482 {
1483 struct gfs2_sbd *sdp = sb->s_fs_info; 1483 struct gfs2_sbd *sdp = sb->s_fs_info;
1484 struct gfs2_quota_lvb *qlvb; 1484 struct gfs2_quota_lvb *qlvb;
1485 struct gfs2_quota_data *qd; 1485 struct gfs2_quota_data *qd;
1486 struct gfs2_holder q_gh; 1486 struct gfs2_holder q_gh;
1487 int error; 1487 int error;
1488 int type; 1488 int type;
1489 1489
1490 memset(fdq, 0, sizeof(struct fs_disk_quota)); 1490 memset(fdq, 0, sizeof(struct fs_disk_quota));
1491 1491
1492 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 1492 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
1493 return -ESRCH; /* Crazy XFS error code */ 1493 return -ESRCH; /* Crazy XFS error code */
1494 1494
1495 if (qid.type == USRQUOTA) 1495 if (qid.type == USRQUOTA)
1496 type = QUOTA_USER; 1496 type = QUOTA_USER;
1497 else if (qid.type == GRPQUOTA) 1497 else if (qid.type == GRPQUOTA)
1498 type = QUOTA_GROUP; 1498 type = QUOTA_GROUP;
1499 else 1499 else
1500 return -EINVAL; 1500 return -EINVAL;
1501 1501
1502 error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd); 1502 error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd);
1503 if (error) 1503 if (error)
1504 return error; 1504 return error;
1505 error = do_glock(qd, FORCE, &q_gh); 1505 error = do_glock(qd, FORCE, &q_gh);
1506 if (error) 1506 if (error)
1507 goto out; 1507 goto out;
1508 1508
1509 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 1509 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
1510 fdq->d_version = FS_DQUOT_VERSION; 1510 fdq->d_version = FS_DQUOT_VERSION;
1511 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; 1511 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
1512 fdq->d_id = from_kqid(&init_user_ns, qid); 1512 fdq->d_id = from_kqid(&init_user_ns, qid);
1513 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift; 1513 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift;
1514 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift; 1514 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift;
1515 fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift; 1515 fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift;
1516 1516
1517 gfs2_glock_dq_uninit(&q_gh); 1517 gfs2_glock_dq_uninit(&q_gh);
1518 out: 1518 out:
1519 qd_put(qd); 1519 qd_put(qd);
1520 return error; 1520 return error;
1521 } 1521 }
1522 1522
1523 /* GFS2 only supports a subset of the XFS fields */ 1523 /* GFS2 only supports a subset of the XFS fields */
1524 #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT) 1524 #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT)
1525 1525
1526 static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, 1526 static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
1527 struct fs_disk_quota *fdq) 1527 struct fs_disk_quota *fdq)
1528 { 1528 {
1529 struct gfs2_sbd *sdp = sb->s_fs_info; 1529 struct gfs2_sbd *sdp = sb->s_fs_info;
1530 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); 1530 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
1531 struct gfs2_quota_data *qd; 1531 struct gfs2_quota_data *qd;
1532 struct gfs2_holder q_gh, i_gh; 1532 struct gfs2_holder q_gh, i_gh;
1533 unsigned int data_blocks, ind_blocks; 1533 unsigned int data_blocks, ind_blocks;
1534 unsigned int blocks = 0; 1534 unsigned int blocks = 0;
1535 int alloc_required; 1535 int alloc_required;
1536 loff_t offset; 1536 loff_t offset;
1537 int error; 1537 int error;
1538 int type; 1538 int type;
1539 1539
1540 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 1540 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
1541 return -ESRCH; /* Crazy XFS error code */ 1541 return -ESRCH; /* Crazy XFS error code */
1542 1542
1543 switch(qid.type) { 1543 switch(qid.type) {
1544 case USRQUOTA: 1544 case USRQUOTA:
1545 type = QUOTA_USER; 1545 type = QUOTA_USER;
1546 if (fdq->d_flags != FS_USER_QUOTA) 1546 if (fdq->d_flags != FS_USER_QUOTA)
1547 return -EINVAL; 1547 return -EINVAL;
1548 break; 1548 break;
1549 case GRPQUOTA: 1549 case GRPQUOTA:
1550 type = QUOTA_GROUP; 1550 type = QUOTA_GROUP;
1551 if (fdq->d_flags != FS_GROUP_QUOTA) 1551 if (fdq->d_flags != FS_GROUP_QUOTA)
1552 return -EINVAL; 1552 return -EINVAL;
1553 break; 1553 break;
1554 default: 1554 default:
1555 return -EINVAL; 1555 return -EINVAL;
1556 } 1556 }
1557 1557
1558 if (fdq->d_fieldmask & ~GFS2_FIELDMASK) 1558 if (fdq->d_fieldmask & ~GFS2_FIELDMASK)
1559 return -EINVAL; 1559 return -EINVAL;
1560 if (fdq->d_id != from_kqid(&init_user_ns, qid)) 1560 if (fdq->d_id != from_kqid(&init_user_ns, qid))
1561 return -EINVAL; 1561 return -EINVAL;
1562 1562
1563 error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd); 1563 error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd);
1564 if (error) 1564 if (error)
1565 return error; 1565 return error;
1566 1566
1567 error = gfs2_rs_alloc(ip); 1567 error = gfs2_rs_alloc(ip);
1568 if (error) 1568 if (error)
1569 goto out_put; 1569 goto out_put;
1570 1570
1571 mutex_lock(&ip->i_inode.i_mutex); 1571 mutex_lock(&ip->i_inode.i_mutex);
1572 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh); 1572 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
1573 if (error) 1573 if (error)
1574 goto out_unlockput; 1574 goto out_unlockput;
1575 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); 1575 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1576 if (error) 1576 if (error)
1577 goto out_q; 1577 goto out_q;
1578 1578
1579 /* Check for existing entry, if none then alloc new blocks */ 1579 /* Check for existing entry, if none then alloc new blocks */
1580 error = update_qd(sdp, qd); 1580 error = update_qd(sdp, qd);
1581 if (error) 1581 if (error)
1582 goto out_i; 1582 goto out_i;
1583 1583
1584 /* If nothing has changed, this is a no-op */ 1584 /* If nothing has changed, this is a no-op */
1585 if ((fdq->d_fieldmask & FS_DQ_BSOFT) && 1585 if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
1586 ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) 1586 ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
1587 fdq->d_fieldmask ^= FS_DQ_BSOFT; 1587 fdq->d_fieldmask ^= FS_DQ_BSOFT;
1588 1588
1589 if ((fdq->d_fieldmask & FS_DQ_BHARD) && 1589 if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
1590 ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) 1590 ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
1591 fdq->d_fieldmask ^= FS_DQ_BHARD; 1591 fdq->d_fieldmask ^= FS_DQ_BHARD;
1592 1592
1593 if ((fdq->d_fieldmask & FS_DQ_BCOUNT) && 1593 if ((fdq->d_fieldmask & FS_DQ_BCOUNT) &&
1594 ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value))) 1594 ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value)))
1595 fdq->d_fieldmask ^= FS_DQ_BCOUNT; 1595 fdq->d_fieldmask ^= FS_DQ_BCOUNT;
1596 1596
1597 if (fdq->d_fieldmask == 0) 1597 if (fdq->d_fieldmask == 0)
1598 goto out_i; 1598 goto out_i;
1599 1599
1600 offset = qd2offset(qd); 1600 offset = qd2offset(qd);
1601 alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota)); 1601 alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota));
1602 if (gfs2_is_stuffed(ip)) 1602 if (gfs2_is_stuffed(ip))
1603 alloc_required = 1; 1603 alloc_required = 1;
1604 if (alloc_required) { 1604 if (alloc_required) {
1605 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), 1605 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
1606 &data_blocks, &ind_blocks); 1606 &data_blocks, &ind_blocks);
1607 blocks = 1 + data_blocks + ind_blocks; 1607 blocks = 1 + data_blocks + ind_blocks;
1608 error = gfs2_inplace_reserve(ip, blocks); 1608 error = gfs2_inplace_reserve(ip, blocks, 0);
1609 if (error) 1609 if (error)
1610 goto out_i; 1610 goto out_i;
1611 blocks += gfs2_rg_blocks(ip, blocks); 1611 blocks += gfs2_rg_blocks(ip, blocks);
1612 } 1612 }
1613 1613
1614 /* Some quotas span block boundaries and can update two blocks, 1614 /* Some quotas span block boundaries and can update two blocks,
1615 adding an extra block to the transaction to handle such quotas */ 1615 adding an extra block to the transaction to handle such quotas */
1616 error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 2, 0); 1616 error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 2, 0);
1617 if (error) 1617 if (error)
1618 goto out_release; 1618 goto out_release;
1619 1619
1620 /* Apply changes */ 1620 /* Apply changes */
1621 error = gfs2_adjust_quota(ip, offset, 0, qd, fdq); 1621 error = gfs2_adjust_quota(ip, offset, 0, qd, fdq);
1622 1622
1623 gfs2_trans_end(sdp); 1623 gfs2_trans_end(sdp);
1624 out_release: 1624 out_release:
1625 if (alloc_required) 1625 if (alloc_required)
1626 gfs2_inplace_release(ip); 1626 gfs2_inplace_release(ip);
1627 out_i: 1627 out_i:
1628 gfs2_glock_dq_uninit(&i_gh); 1628 gfs2_glock_dq_uninit(&i_gh);
1629 out_q: 1629 out_q:
1630 gfs2_glock_dq_uninit(&q_gh); 1630 gfs2_glock_dq_uninit(&q_gh);
1631 out_unlockput: 1631 out_unlockput:
1632 mutex_unlock(&ip->i_inode.i_mutex); 1632 mutex_unlock(&ip->i_inode.i_mutex);
1633 out_put: 1633 out_put:
1634 qd_put(qd); 1634 qd_put(qd);
1635 return error; 1635 return error;
1636 } 1636 }
1637 1637
1638 const struct quotactl_ops gfs2_quotactl_ops = { 1638 const struct quotactl_ops gfs2_quotactl_ops = {
1639 .quota_sync = gfs2_quota_sync, 1639 .quota_sync = gfs2_quota_sync,
1640 .get_xstate = gfs2_quota_get_xstate, 1640 .get_xstate = gfs2_quota_get_xstate,
1641 .get_dqblk = gfs2_get_dqblk, 1641 .get_dqblk = gfs2_get_dqblk,
1642 .set_dqblk = gfs2_set_dqblk, 1642 .set_dqblk = gfs2_set_dqblk,
1643 }; 1643 };
1644 1644
1 /* 1 /*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10 #include <linux/slab.h> 10 #include <linux/slab.h>
11 #include <linux/spinlock.h> 11 #include <linux/spinlock.h>
12 #include <linux/completion.h> 12 #include <linux/completion.h>
13 #include <linux/buffer_head.h> 13 #include <linux/buffer_head.h>
14 #include <linux/fs.h> 14 #include <linux/fs.h>
15 #include <linux/gfs2_ondisk.h> 15 #include <linux/gfs2_ondisk.h>
16 #include <linux/prefetch.h> 16 #include <linux/prefetch.h>
17 #include <linux/blkdev.h> 17 #include <linux/blkdev.h>
18 #include <linux/rbtree.h> 18 #include <linux/rbtree.h>
19 #include <linux/random.h>
19 20
20 #include "gfs2.h" 21 #include "gfs2.h"
21 #include "incore.h" 22 #include "incore.h"
22 #include "glock.h" 23 #include "glock.h"
23 #include "glops.h" 24 #include "glops.h"
24 #include "lops.h" 25 #include "lops.h"
25 #include "meta_io.h" 26 #include "meta_io.h"
26 #include "quota.h" 27 #include "quota.h"
27 #include "rgrp.h" 28 #include "rgrp.h"
28 #include "super.h" 29 #include "super.h"
29 #include "trans.h" 30 #include "trans.h"
30 #include "util.h" 31 #include "util.h"
31 #include "log.h" 32 #include "log.h"
32 #include "inode.h" 33 #include "inode.h"
33 #include "trace_gfs2.h" 34 #include "trace_gfs2.h"
34 35
35 #define BFITNOENT ((u32)~0) 36 #define BFITNOENT ((u32)~0)
36 #define NO_BLOCK ((u64)~0) 37 #define NO_BLOCK ((u64)~0)
37 38
38 #if BITS_PER_LONG == 32 39 #if BITS_PER_LONG == 32
39 #define LBITMASK (0x55555555UL) 40 #define LBITMASK (0x55555555UL)
40 #define LBITSKIP55 (0x55555555UL) 41 #define LBITSKIP55 (0x55555555UL)
41 #define LBITSKIP00 (0x00000000UL) 42 #define LBITSKIP00 (0x00000000UL)
42 #else 43 #else
43 #define LBITMASK (0x5555555555555555UL) 44 #define LBITMASK (0x5555555555555555UL)
44 #define LBITSKIP55 (0x5555555555555555UL) 45 #define LBITSKIP55 (0x5555555555555555UL)
45 #define LBITSKIP00 (0x0000000000000000UL) 46 #define LBITSKIP00 (0x0000000000000000UL)
46 #endif 47 #endif
47 48
48 /* 49 /*
49 * These routines are used by the resource group routines (rgrp.c) 50 * These routines are used by the resource group routines (rgrp.c)
50 * to keep track of block allocation. Each block is represented by two 51 * to keep track of block allocation. Each block is represented by two
51 * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks. 52 * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks.
52 * 53 *
53 * 0 = Free 54 * 0 = Free
54 * 1 = Used (not metadata) 55 * 1 = Used (not metadata)
55 * 2 = Unlinked (still in use) inode 56 * 2 = Unlinked (still in use) inode
56 * 3 = Used (metadata) 57 * 3 = Used (metadata)
57 */ 58 */
58 59
59 static const char valid_change[16] = { 60 static const char valid_change[16] = {
60 /* current */ 61 /* current */
61 /* n */ 0, 1, 1, 1, 62 /* n */ 0, 1, 1, 1,
62 /* e */ 1, 0, 0, 0, 63 /* e */ 1, 0, 0, 0,
63 /* w */ 0, 0, 0, 1, 64 /* w */ 0, 0, 0, 1,
64 1, 0, 0, 0 65 1, 0, 0, 0
65 }; 66 };
66 67
67 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, 68 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
68 const struct gfs2_inode *ip, bool nowrap); 69 const struct gfs2_inode *ip, bool nowrap);
69 70
70 71
71 /** 72 /**
72 * gfs2_setbit - Set a bit in the bitmaps 73 * gfs2_setbit - Set a bit in the bitmaps
73 * @rbm: The position of the bit to set 74 * @rbm: The position of the bit to set
74 * @do_clone: Also set the clone bitmap, if it exists 75 * @do_clone: Also set the clone bitmap, if it exists
75 * @new_state: the new state of the block 76 * @new_state: the new state of the block
76 * 77 *
77 */ 78 */
78 79
79 static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, 80 static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
80 unsigned char new_state) 81 unsigned char new_state)
81 { 82 {
82 unsigned char *byte1, *byte2, *end, cur_state; 83 unsigned char *byte1, *byte2, *end, cur_state;
83 unsigned int buflen = rbm->bi->bi_len; 84 unsigned int buflen = rbm->bi->bi_len;
84 const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; 85 const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
85 86
86 byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); 87 byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
87 end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; 88 end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen;
88 89
89 BUG_ON(byte1 >= end); 90 BUG_ON(byte1 >= end);
90 91
91 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 92 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
92 93
93 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 94 if (unlikely(!valid_change[new_state * 4 + cur_state])) {
94 printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " 95 printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, "
95 "new_state=%d\n", rbm->offset, cur_state, new_state); 96 "new_state=%d\n", rbm->offset, cur_state, new_state);
96 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", 97 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n",
97 (unsigned long long)rbm->rgd->rd_addr, 98 (unsigned long long)rbm->rgd->rd_addr,
98 rbm->bi->bi_start); 99 rbm->bi->bi_start);
99 printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", 100 printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n",
100 rbm->bi->bi_offset, rbm->bi->bi_len); 101 rbm->bi->bi_offset, rbm->bi->bi_len);
101 dump_stack(); 102 dump_stack();
102 gfs2_consist_rgrpd(rbm->rgd); 103 gfs2_consist_rgrpd(rbm->rgd);
103 return; 104 return;
104 } 105 }
105 *byte1 ^= (cur_state ^ new_state) << bit; 106 *byte1 ^= (cur_state ^ new_state) << bit;
106 107
107 if (do_clone && rbm->bi->bi_clone) { 108 if (do_clone && rbm->bi->bi_clone) {
108 byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); 109 byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
109 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; 110 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
110 *byte2 ^= (cur_state ^ new_state) << bit; 111 *byte2 ^= (cur_state ^ new_state) << bit;
111 } 112 }
112 } 113 }
113 114
114 /** 115 /**
115 * gfs2_testbit - test a bit in the bitmaps 116 * gfs2_testbit - test a bit in the bitmaps
116 * @rbm: The bit to test 117 * @rbm: The bit to test
117 * 118 *
118 * Returns: The two bit block state of the requested bit 119 * Returns: The two bit block state of the requested bit
119 */ 120 */
120 121
121 static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) 122 static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
122 { 123 {
123 const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; 124 const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset;
124 const u8 *byte; 125 const u8 *byte;
125 unsigned int bit; 126 unsigned int bit;
126 127
127 byte = buffer + (rbm->offset / GFS2_NBBY); 128 byte = buffer + (rbm->offset / GFS2_NBBY);
128 bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; 129 bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
129 130
130 return (*byte >> bit) & GFS2_BIT_MASK; 131 return (*byte >> bit) & GFS2_BIT_MASK;
131 } 132 }
132 133
133 /** 134 /**
134 * gfs2_bit_search 135 * gfs2_bit_search
135 * @ptr: Pointer to bitmap data 136 * @ptr: Pointer to bitmap data
136 * @mask: Mask to use (normally 0x55555.... but adjusted for search start) 137 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
137 * @state: The state we are searching for 138 * @state: The state we are searching for
138 * 139 *
139 * We xor the bitmap data with a patter which is the bitwise opposite 140 * We xor the bitmap data with a patter which is the bitwise opposite
140 * of what we are looking for, this gives rise to a pattern of ones 141 * of what we are looking for, this gives rise to a pattern of ones
141 * wherever there is a match. Since we have two bits per entry, we 142 * wherever there is a match. Since we have two bits per entry, we
142 * take this pattern, shift it down by one place and then and it with 143 * take this pattern, shift it down by one place and then and it with
143 * the original. All the even bit positions (0,2,4, etc) then represent 144 * the original. All the even bit positions (0,2,4, etc) then represent
144 * successful matches, so we mask with 0x55555..... to remove the unwanted 145 * successful matches, so we mask with 0x55555..... to remove the unwanted
145 * odd bit positions. 146 * odd bit positions.
146 * 147 *
147 * This allows searching of a whole u64 at once (32 blocks) with a 148 * This allows searching of a whole u64 at once (32 blocks) with a
148 * single test (on 64 bit arches). 149 * single test (on 64 bit arches).
149 */ 150 */
150 151
151 static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) 152 static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
152 { 153 {
153 u64 tmp; 154 u64 tmp;
154 static const u64 search[] = { 155 static const u64 search[] = {
155 [0] = 0xffffffffffffffffULL, 156 [0] = 0xffffffffffffffffULL,
156 [1] = 0xaaaaaaaaaaaaaaaaULL, 157 [1] = 0xaaaaaaaaaaaaaaaaULL,
157 [2] = 0x5555555555555555ULL, 158 [2] = 0x5555555555555555ULL,
158 [3] = 0x0000000000000000ULL, 159 [3] = 0x0000000000000000ULL,
159 }; 160 };
160 tmp = le64_to_cpu(*ptr) ^ search[state]; 161 tmp = le64_to_cpu(*ptr) ^ search[state];
161 tmp &= (tmp >> 1); 162 tmp &= (tmp >> 1);
162 tmp &= mask; 163 tmp &= mask;
163 return tmp; 164 return tmp;
164 } 165 }
165 166
166 /** 167 /**
167 * rs_cmp - multi-block reservation range compare 168 * rs_cmp - multi-block reservation range compare
168 * @blk: absolute file system block number of the new reservation 169 * @blk: absolute file system block number of the new reservation
169 * @len: number of blocks in the new reservation 170 * @len: number of blocks in the new reservation
170 * @rs: existing reservation to compare against 171 * @rs: existing reservation to compare against
171 * 172 *
172 * returns: 1 if the block range is beyond the reach of the reservation 173 * returns: 1 if the block range is beyond the reach of the reservation
173 * -1 if the block range is before the start of the reservation 174 * -1 if the block range is before the start of the reservation
174 * 0 if the block range overlaps with the reservation 175 * 0 if the block range overlaps with the reservation
175 */ 176 */
176 static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) 177 static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
177 { 178 {
178 u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); 179 u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
179 180
180 if (blk >= startblk + rs->rs_free) 181 if (blk >= startblk + rs->rs_free)
181 return 1; 182 return 1;
182 if (blk + len - 1 < startblk) 183 if (blk + len - 1 < startblk)
183 return -1; 184 return -1;
184 return 0; 185 return 0;
185 } 186 }
186 187
187 /** 188 /**
188 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 189 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
189 * a block in a given allocation state. 190 * a block in a given allocation state.
190 * @buf: the buffer that holds the bitmaps 191 * @buf: the buffer that holds the bitmaps
191 * @len: the length (in bytes) of the buffer 192 * @len: the length (in bytes) of the buffer
192 * @goal: start search at this block's bit-pair (within @buffer) 193 * @goal: start search at this block's bit-pair (within @buffer)
193 * @state: GFS2_BLKST_XXX the state of the block we're looking for. 194 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
194 * 195 *
195 * Scope of @goal and returned block number is only within this bitmap buffer, 196 * Scope of @goal and returned block number is only within this bitmap buffer,
196 * not entire rgrp or filesystem. @buffer will be offset from the actual 197 * not entire rgrp or filesystem. @buffer will be offset from the actual
197 * beginning of a bitmap block buffer, skipping any header structures, but 198 * beginning of a bitmap block buffer, skipping any header structures, but
198 * headers are always a multiple of 64 bits long so that the buffer is 199 * headers are always a multiple of 64 bits long so that the buffer is
199 * always aligned to a 64 bit boundary. 200 * always aligned to a 64 bit boundary.
200 * 201 *
201 * The size of the buffer is in bytes, but is it assumed that it is 202 * The size of the buffer is in bytes, but is it assumed that it is
202 * always ok to read a complete multiple of 64 bits at the end 203 * always ok to read a complete multiple of 64 bits at the end
203 * of the block in case the end is no aligned to a natural boundary. 204 * of the block in case the end is no aligned to a natural boundary.
204 * 205 *
205 * Return: the block number (bitmap buffer scope) that was found 206 * Return: the block number (bitmap buffer scope) that was found
206 */ 207 */
207 208
208 static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, 209 static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
209 u32 goal, u8 state) 210 u32 goal, u8 state)
210 { 211 {
211 u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1); 212 u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
212 const __le64 *ptr = ((__le64 *)buf) + (goal >> 5); 213 const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
213 const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64))); 214 const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
214 u64 tmp; 215 u64 tmp;
215 u64 mask = 0x5555555555555555ULL; 216 u64 mask = 0x5555555555555555ULL;
216 u32 bit; 217 u32 bit;
217 218
218 /* Mask off bits we don't care about at the start of the search */ 219 /* Mask off bits we don't care about at the start of the search */
219 mask <<= spoint; 220 mask <<= spoint;
220 tmp = gfs2_bit_search(ptr, mask, state); 221 tmp = gfs2_bit_search(ptr, mask, state);
221 ptr++; 222 ptr++;
222 while(tmp == 0 && ptr < end) { 223 while(tmp == 0 && ptr < end) {
223 tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state); 224 tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
224 ptr++; 225 ptr++;
225 } 226 }
226 /* Mask off any bits which are more than len bytes from the start */ 227 /* Mask off any bits which are more than len bytes from the start */
227 if (ptr == end && (len & (sizeof(u64) - 1))) 228 if (ptr == end && (len & (sizeof(u64) - 1)))
228 tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1)))); 229 tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
229 /* Didn't find anything, so return */ 230 /* Didn't find anything, so return */
230 if (tmp == 0) 231 if (tmp == 0)
231 return BFITNOENT; 232 return BFITNOENT;
232 ptr--; 233 ptr--;
233 bit = __ffs64(tmp); 234 bit = __ffs64(tmp);
234 bit /= 2; /* two bits per entry in the bitmap */ 235 bit /= 2; /* two bits per entry in the bitmap */
235 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; 236 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
236 } 237 }
237 238
238 /** 239 /**
239 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number 240 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
240 * @rbm: The rbm with rgd already set correctly 241 * @rbm: The rbm with rgd already set correctly
241 * @block: The block number (filesystem relative) 242 * @block: The block number (filesystem relative)
242 * 243 *
243 * This sets the bi and offset members of an rbm based on a 244 * This sets the bi and offset members of an rbm based on a
244 * resource group and a filesystem relative block number. The 245 * resource group and a filesystem relative block number. The
245 * resource group must be set in the rbm on entry, the bi and 246 * resource group must be set in the rbm on entry, the bi and
246 * offset members will be set by this function. 247 * offset members will be set by this function.
247 * 248 *
248 * Returns: 0 on success, or an error code 249 * Returns: 0 on success, or an error code
249 */ 250 */
250 251
251 static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) 252 static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
252 { 253 {
253 u64 rblock = block - rbm->rgd->rd_data0; 254 u64 rblock = block - rbm->rgd->rd_data0;
254 u32 x; 255 u32 x;
255 256
256 if (WARN_ON_ONCE(rblock > UINT_MAX)) 257 if (WARN_ON_ONCE(rblock > UINT_MAX))
257 return -EINVAL; 258 return -EINVAL;
258 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) 259 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
259 return -E2BIG; 260 return -E2BIG;
260 261
261 rbm->bi = rbm->rgd->rd_bits; 262 rbm->bi = rbm->rgd->rd_bits;
262 rbm->offset = (u32)(rblock); 263 rbm->offset = (u32)(rblock);
263 /* Check if the block is within the first block */ 264 /* Check if the block is within the first block */
264 if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) 265 if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY)
265 return 0; 266 return 0;
266 267
267 /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ 268 /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
268 rbm->offset += (sizeof(struct gfs2_rgrp) - 269 rbm->offset += (sizeof(struct gfs2_rgrp) -
269 sizeof(struct gfs2_meta_header)) * GFS2_NBBY; 270 sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
270 x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; 271 x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
271 rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; 272 rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
272 rbm->bi += x; 273 rbm->bi += x;
273 return 0; 274 return 0;
274 } 275 }
275 276
276 /** 277 /**
277 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned 278 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
278 * @rbm: Position to search (value/result) 279 * @rbm: Position to search (value/result)
279 * @n_unaligned: Number of unaligned blocks to check 280 * @n_unaligned: Number of unaligned blocks to check
280 * @len: Decremented for each block found (terminate on zero) 281 * @len: Decremented for each block found (terminate on zero)
281 * 282 *
282 * Returns: true if a non-free block is encountered 283 * Returns: true if a non-free block is encountered
283 */ 284 */
284 285
285 static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) 286 static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
286 { 287 {
287 u64 block; 288 u64 block;
288 u32 n; 289 u32 n;
289 u8 res; 290 u8 res;
290 291
291 for (n = 0; n < n_unaligned; n++) { 292 for (n = 0; n < n_unaligned; n++) {
292 res = gfs2_testbit(rbm); 293 res = gfs2_testbit(rbm);
293 if (res != GFS2_BLKST_FREE) 294 if (res != GFS2_BLKST_FREE)
294 return true; 295 return true;
295 (*len)--; 296 (*len)--;
296 if (*len == 0) 297 if (*len == 0)
297 return true; 298 return true;
298 block = gfs2_rbm_to_block(rbm); 299 block = gfs2_rbm_to_block(rbm);
299 if (gfs2_rbm_from_block(rbm, block + 1)) 300 if (gfs2_rbm_from_block(rbm, block + 1))
300 return true; 301 return true;
301 } 302 }
302 303
303 return false; 304 return false;
304 } 305 }
305 306
306 /** 307 /**
307 * gfs2_free_extlen - Return extent length of free blocks 308 * gfs2_free_extlen - Return extent length of free blocks
308 * @rbm: Starting position 309 * @rbm: Starting position
309 * @len: Max length to check 310 * @len: Max length to check
310 * 311 *
311 * Starting at the block specified by the rbm, see how many free blocks 312 * Starting at the block specified by the rbm, see how many free blocks
312 * there are, not reading more than len blocks ahead. This can be done 313 * there are, not reading more than len blocks ahead. This can be done
313 * using memchr_inv when the blocks are byte aligned, but has to be done 314 * using memchr_inv when the blocks are byte aligned, but has to be done
314 * on a block by block basis in case of unaligned blocks. Also this 315 * on a block by block basis in case of unaligned blocks. Also this
315 * function can cope with bitmap boundaries (although it must stop on 316 * function can cope with bitmap boundaries (although it must stop on
316 * a resource group boundary) 317 * a resource group boundary)
317 * 318 *
318 * Returns: Number of free blocks in the extent 319 * Returns: Number of free blocks in the extent
319 */ 320 */
320 321
321 static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) 322 static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
322 { 323 {
323 struct gfs2_rbm rbm = *rrbm; 324 struct gfs2_rbm rbm = *rrbm;
324 u32 n_unaligned = rbm.offset & 3; 325 u32 n_unaligned = rbm.offset & 3;
325 u32 size = len; 326 u32 size = len;
326 u32 bytes; 327 u32 bytes;
327 u32 chunk_size; 328 u32 chunk_size;
328 u8 *ptr, *start, *end; 329 u8 *ptr, *start, *end;
329 u64 block; 330 u64 block;
330 331
331 if (n_unaligned && 332 if (n_unaligned &&
332 gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) 333 gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
333 goto out; 334 goto out;
334 335
335 n_unaligned = len & 3; 336 n_unaligned = len & 3;
336 /* Start is now byte aligned */ 337 /* Start is now byte aligned */
337 while (len > 3) { 338 while (len > 3) {
338 start = rbm.bi->bi_bh->b_data; 339 start = rbm.bi->bi_bh->b_data;
339 if (rbm.bi->bi_clone) 340 if (rbm.bi->bi_clone)
340 start = rbm.bi->bi_clone; 341 start = rbm.bi->bi_clone;
341 end = start + rbm.bi->bi_bh->b_size; 342 end = start + rbm.bi->bi_bh->b_size;
342 start += rbm.bi->bi_offset; 343 start += rbm.bi->bi_offset;
343 BUG_ON(rbm.offset & 3); 344 BUG_ON(rbm.offset & 3);
344 start += (rbm.offset / GFS2_NBBY); 345 start += (rbm.offset / GFS2_NBBY);
345 bytes = min_t(u32, len / GFS2_NBBY, (end - start)); 346 bytes = min_t(u32, len / GFS2_NBBY, (end - start));
346 ptr = memchr_inv(start, 0, bytes); 347 ptr = memchr_inv(start, 0, bytes);
347 chunk_size = ((ptr == NULL) ? bytes : (ptr - start)); 348 chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
348 chunk_size *= GFS2_NBBY; 349 chunk_size *= GFS2_NBBY;
349 BUG_ON(len < chunk_size); 350 BUG_ON(len < chunk_size);
350 len -= chunk_size; 351 len -= chunk_size;
351 block = gfs2_rbm_to_block(&rbm); 352 block = gfs2_rbm_to_block(&rbm);
352 gfs2_rbm_from_block(&rbm, block + chunk_size); 353 gfs2_rbm_from_block(&rbm, block + chunk_size);
353 n_unaligned = 3; 354 n_unaligned = 3;
354 if (ptr) 355 if (ptr)
355 break; 356 break;
356 n_unaligned = len & 3; 357 n_unaligned = len & 3;
357 } 358 }
358 359
359 /* Deal with any bits left over at the end */ 360 /* Deal with any bits left over at the end */
360 if (n_unaligned) 361 if (n_unaligned)
361 gfs2_unaligned_extlen(&rbm, n_unaligned, &len); 362 gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
362 out: 363 out:
363 return size - len; 364 return size - len;
364 } 365 }
365 366
366 /** 367 /**
367 * gfs2_bitcount - count the number of bits in a certain state 368 * gfs2_bitcount - count the number of bits in a certain state
368 * @rgd: the resource group descriptor 369 * @rgd: the resource group descriptor
369 * @buffer: the buffer that holds the bitmaps 370 * @buffer: the buffer that holds the bitmaps
370 * @buflen: the length (in bytes) of the buffer 371 * @buflen: the length (in bytes) of the buffer
371 * @state: the state of the block we're looking for 372 * @state: the state of the block we're looking for
372 * 373 *
373 * Returns: The number of bits 374 * Returns: The number of bits
374 */ 375 */
375 376
376 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer, 377 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
377 unsigned int buflen, u8 state) 378 unsigned int buflen, u8 state)
378 { 379 {
379 const u8 *byte = buffer; 380 const u8 *byte = buffer;
380 const u8 *end = buffer + buflen; 381 const u8 *end = buffer + buflen;
381 const u8 state1 = state << 2; 382 const u8 state1 = state << 2;
382 const u8 state2 = state << 4; 383 const u8 state2 = state << 4;
383 const u8 state3 = state << 6; 384 const u8 state3 = state << 6;
384 u32 count = 0; 385 u32 count = 0;
385 386
386 for (; byte < end; byte++) { 387 for (; byte < end; byte++) {
387 if (((*byte) & 0x03) == state) 388 if (((*byte) & 0x03) == state)
388 count++; 389 count++;
389 if (((*byte) & 0x0C) == state1) 390 if (((*byte) & 0x0C) == state1)
390 count++; 391 count++;
391 if (((*byte) & 0x30) == state2) 392 if (((*byte) & 0x30) == state2)
392 count++; 393 count++;
393 if (((*byte) & 0xC0) == state3) 394 if (((*byte) & 0xC0) == state3)
394 count++; 395 count++;
395 } 396 }
396 397
397 return count; 398 return count;
398 } 399 }
399 400
400 /** 401 /**
401 * gfs2_rgrp_verify - Verify that a resource group is consistent 402 * gfs2_rgrp_verify - Verify that a resource group is consistent
402 * @rgd: the rgrp 403 * @rgd: the rgrp
403 * 404 *
404 */ 405 */
405 406
406 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) 407 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
407 { 408 {
408 struct gfs2_sbd *sdp = rgd->rd_sbd; 409 struct gfs2_sbd *sdp = rgd->rd_sbd;
409 struct gfs2_bitmap *bi = NULL; 410 struct gfs2_bitmap *bi = NULL;
410 u32 length = rgd->rd_length; 411 u32 length = rgd->rd_length;
411 u32 count[4], tmp; 412 u32 count[4], tmp;
412 int buf, x; 413 int buf, x;
413 414
414 memset(count, 0, 4 * sizeof(u32)); 415 memset(count, 0, 4 * sizeof(u32));
415 416
416 /* Count # blocks in each of 4 possible allocation states */ 417 /* Count # blocks in each of 4 possible allocation states */
417 for (buf = 0; buf < length; buf++) { 418 for (buf = 0; buf < length; buf++) {
418 bi = rgd->rd_bits + buf; 419 bi = rgd->rd_bits + buf;
419 for (x = 0; x < 4; x++) 420 for (x = 0; x < 4; x++)
420 count[x] += gfs2_bitcount(rgd, 421 count[x] += gfs2_bitcount(rgd,
421 bi->bi_bh->b_data + 422 bi->bi_bh->b_data +
422 bi->bi_offset, 423 bi->bi_offset,
423 bi->bi_len, x); 424 bi->bi_len, x);
424 } 425 }
425 426
426 if (count[0] != rgd->rd_free) { 427 if (count[0] != rgd->rd_free) {
427 if (gfs2_consist_rgrpd(rgd)) 428 if (gfs2_consist_rgrpd(rgd))
428 fs_err(sdp, "free data mismatch: %u != %u\n", 429 fs_err(sdp, "free data mismatch: %u != %u\n",
429 count[0], rgd->rd_free); 430 count[0], rgd->rd_free);
430 return; 431 return;
431 } 432 }
432 433
433 tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; 434 tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
434 if (count[1] != tmp) { 435 if (count[1] != tmp) {
435 if (gfs2_consist_rgrpd(rgd)) 436 if (gfs2_consist_rgrpd(rgd))
436 fs_err(sdp, "used data mismatch: %u != %u\n", 437 fs_err(sdp, "used data mismatch: %u != %u\n",
437 count[1], tmp); 438 count[1], tmp);
438 return; 439 return;
439 } 440 }
440 441
441 if (count[2] + count[3] != rgd->rd_dinodes) { 442 if (count[2] + count[3] != rgd->rd_dinodes) {
442 if (gfs2_consist_rgrpd(rgd)) 443 if (gfs2_consist_rgrpd(rgd))
443 fs_err(sdp, "used metadata mismatch: %u != %u\n", 444 fs_err(sdp, "used metadata mismatch: %u != %u\n",
444 count[2] + count[3], rgd->rd_dinodes); 445 count[2] + count[3], rgd->rd_dinodes);
445 return; 446 return;
446 } 447 }
447 } 448 }
448 449
449 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) 450 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
450 { 451 {
451 u64 first = rgd->rd_data0; 452 u64 first = rgd->rd_data0;
452 u64 last = first + rgd->rd_data; 453 u64 last = first + rgd->rd_data;
453 return first <= block && block < last; 454 return first <= block && block < last;
454 } 455 }
455 456
456 /** 457 /**
457 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number 458 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
458 * @sdp: The GFS2 superblock 459 * @sdp: The GFS2 superblock
459 * @blk: The data block number 460 * @blk: The data block number
460 * @exact: True if this needs to be an exact match 461 * @exact: True if this needs to be an exact match
461 * 462 *
462 * Returns: The resource group, or NULL if not found 463 * Returns: The resource group, or NULL if not found
463 */ 464 */
464 465
465 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact) 466 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
466 { 467 {
467 struct rb_node *n, *next; 468 struct rb_node *n, *next;
468 struct gfs2_rgrpd *cur; 469 struct gfs2_rgrpd *cur;
469 470
470 spin_lock(&sdp->sd_rindex_spin); 471 spin_lock(&sdp->sd_rindex_spin);
471 n = sdp->sd_rindex_tree.rb_node; 472 n = sdp->sd_rindex_tree.rb_node;
472 while (n) { 473 while (n) {
473 cur = rb_entry(n, struct gfs2_rgrpd, rd_node); 474 cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
474 next = NULL; 475 next = NULL;
475 if (blk < cur->rd_addr) 476 if (blk < cur->rd_addr)
476 next = n->rb_left; 477 next = n->rb_left;
477 else if (blk >= cur->rd_data0 + cur->rd_data) 478 else if (blk >= cur->rd_data0 + cur->rd_data)
478 next = n->rb_right; 479 next = n->rb_right;
479 if (next == NULL) { 480 if (next == NULL) {
480 spin_unlock(&sdp->sd_rindex_spin); 481 spin_unlock(&sdp->sd_rindex_spin);
481 if (exact) { 482 if (exact) {
482 if (blk < cur->rd_addr) 483 if (blk < cur->rd_addr)
483 return NULL; 484 return NULL;
484 if (blk >= cur->rd_data0 + cur->rd_data) 485 if (blk >= cur->rd_data0 + cur->rd_data)
485 return NULL; 486 return NULL;
486 } 487 }
487 return cur; 488 return cur;
488 } 489 }
489 n = next; 490 n = next;
490 } 491 }
491 spin_unlock(&sdp->sd_rindex_spin); 492 spin_unlock(&sdp->sd_rindex_spin);
492 493
493 return NULL; 494 return NULL;
494 } 495 }
495 496
496 /** 497 /**
497 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem 498 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
498 * @sdp: The GFS2 superblock 499 * @sdp: The GFS2 superblock
499 * 500 *
500 * Returns: The first rgrp in the filesystem 501 * Returns: The first rgrp in the filesystem
501 */ 502 */
502 503
503 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) 504 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
504 { 505 {
505 const struct rb_node *n; 506 const struct rb_node *n;
506 struct gfs2_rgrpd *rgd; 507 struct gfs2_rgrpd *rgd;
507 508
508 spin_lock(&sdp->sd_rindex_spin); 509 spin_lock(&sdp->sd_rindex_spin);
509 n = rb_first(&sdp->sd_rindex_tree); 510 n = rb_first(&sdp->sd_rindex_tree);
510 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 511 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
511 spin_unlock(&sdp->sd_rindex_spin); 512 spin_unlock(&sdp->sd_rindex_spin);
512 513
513 return rgd; 514 return rgd;
514 } 515 }
515 516
516 /** 517 /**
517 * gfs2_rgrpd_get_next - get the next RG 518 * gfs2_rgrpd_get_next - get the next RG
518 * @rgd: the resource group descriptor 519 * @rgd: the resource group descriptor
519 * 520 *
520 * Returns: The next rgrp 521 * Returns: The next rgrp
521 */ 522 */
522 523
523 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd) 524 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
524 { 525 {
525 struct gfs2_sbd *sdp = rgd->rd_sbd; 526 struct gfs2_sbd *sdp = rgd->rd_sbd;
526 const struct rb_node *n; 527 const struct rb_node *n;
527 528
528 spin_lock(&sdp->sd_rindex_spin); 529 spin_lock(&sdp->sd_rindex_spin);
529 n = rb_next(&rgd->rd_node); 530 n = rb_next(&rgd->rd_node);
530 if (n == NULL) 531 if (n == NULL)
531 n = rb_first(&sdp->sd_rindex_tree); 532 n = rb_first(&sdp->sd_rindex_tree);
532 533
533 if (unlikely(&rgd->rd_node == n)) { 534 if (unlikely(&rgd->rd_node == n)) {
534 spin_unlock(&sdp->sd_rindex_spin); 535 spin_unlock(&sdp->sd_rindex_spin);
535 return NULL; 536 return NULL;
536 } 537 }
537 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 538 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
538 spin_unlock(&sdp->sd_rindex_spin); 539 spin_unlock(&sdp->sd_rindex_spin);
539 return rgd; 540 return rgd;
540 } 541 }
541 542
542 void gfs2_free_clones(struct gfs2_rgrpd *rgd) 543 void gfs2_free_clones(struct gfs2_rgrpd *rgd)
543 { 544 {
544 int x; 545 int x;
545 546
546 for (x = 0; x < rgd->rd_length; x++) { 547 for (x = 0; x < rgd->rd_length; x++) {
547 struct gfs2_bitmap *bi = rgd->rd_bits + x; 548 struct gfs2_bitmap *bi = rgd->rd_bits + x;
548 kfree(bi->bi_clone); 549 kfree(bi->bi_clone);
549 bi->bi_clone = NULL; 550 bi->bi_clone = NULL;
550 } 551 }
551 } 552 }
552 553
553 /** 554 /**
554 * gfs2_rs_alloc - make sure we have a reservation assigned to the inode 555 * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
555 * @ip: the inode for this reservation 556 * @ip: the inode for this reservation
556 */ 557 */
557 int gfs2_rs_alloc(struct gfs2_inode *ip) 558 int gfs2_rs_alloc(struct gfs2_inode *ip)
558 { 559 {
559 struct gfs2_blkreserv *res; 560 struct gfs2_blkreserv *res;
560 561
561 if (ip->i_res) 562 if (ip->i_res)
562 return 0; 563 return 0;
563 564
564 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); 565 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
565 if (!res) 566 if (!res)
566 return -ENOMEM; 567 return -ENOMEM;
567 568
568 RB_CLEAR_NODE(&res->rs_node); 569 RB_CLEAR_NODE(&res->rs_node);
569 570
570 down_write(&ip->i_rw_mutex); 571 down_write(&ip->i_rw_mutex);
571 if (ip->i_res) 572 if (ip->i_res)
572 kmem_cache_free(gfs2_rsrv_cachep, res); 573 kmem_cache_free(gfs2_rsrv_cachep, res);
573 else 574 else
574 ip->i_res = res; 575 ip->i_res = res;
575 up_write(&ip->i_rw_mutex); 576 up_write(&ip->i_rw_mutex);
576 return 0; 577 return 0;
577 } 578 }
578 579
579 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) 580 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
580 { 581 {
581 gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", 582 gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n",
582 (unsigned long long)rs->rs_inum, 583 (unsigned long long)rs->rs_inum,
583 (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), 584 (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
584 rs->rs_rbm.offset, rs->rs_free); 585 rs->rs_rbm.offset, rs->rs_free);
585 } 586 }
586 587
587 /** 588 /**
588 * __rs_deltree - remove a multi-block reservation from the rgd tree 589 * __rs_deltree - remove a multi-block reservation from the rgd tree
589 * @rs: The reservation to remove 590 * @rs: The reservation to remove
590 * 591 *
591 */ 592 */
592 static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) 593 static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
593 { 594 {
594 struct gfs2_rgrpd *rgd; 595 struct gfs2_rgrpd *rgd;
595 596
596 if (!gfs2_rs_active(rs)) 597 if (!gfs2_rs_active(rs))
597 return; 598 return;
598 599
599 rgd = rs->rs_rbm.rgd; 600 rgd = rs->rs_rbm.rgd;
600 trace_gfs2_rs(rs, TRACE_RS_TREEDEL); 601 trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
601 rb_erase(&rs->rs_node, &rgd->rd_rstree); 602 rb_erase(&rs->rs_node, &rgd->rd_rstree);
602 RB_CLEAR_NODE(&rs->rs_node); 603 RB_CLEAR_NODE(&rs->rs_node);
603 604
604 if (rs->rs_free) { 605 if (rs->rs_free) {
605 /* return reserved blocks to the rgrp and the ip */ 606 /* return reserved blocks to the rgrp and the ip */
606 BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); 607 BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
607 rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; 608 rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
608 rs->rs_free = 0; 609 rs->rs_free = 0;
609 clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); 610 clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags);
610 smp_mb__after_clear_bit(); 611 smp_mb__after_clear_bit();
611 } 612 }
612 } 613 }
613 614
614 /** 615 /**
615 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree 616 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
616 * @rs: The reservation to remove 617 * @rs: The reservation to remove
617 * 618 *
618 */ 619 */
619 void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) 620 void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
620 { 621 {
621 struct gfs2_rgrpd *rgd; 622 struct gfs2_rgrpd *rgd;
622 623
623 rgd = rs->rs_rbm.rgd; 624 rgd = rs->rs_rbm.rgd;
624 if (rgd) { 625 if (rgd) {
625 spin_lock(&rgd->rd_rsspin); 626 spin_lock(&rgd->rd_rsspin);
626 __rs_deltree(ip, rs); 627 __rs_deltree(ip, rs);
627 spin_unlock(&rgd->rd_rsspin); 628 spin_unlock(&rgd->rd_rsspin);
628 } 629 }
629 } 630 }
630 631
631 /** 632 /**
632 * gfs2_rs_delete - delete a multi-block reservation 633 * gfs2_rs_delete - delete a multi-block reservation
633 * @ip: The inode for this reservation 634 * @ip: The inode for this reservation
634 * 635 *
635 */ 636 */
636 void gfs2_rs_delete(struct gfs2_inode *ip) 637 void gfs2_rs_delete(struct gfs2_inode *ip)
637 { 638 {
638 down_write(&ip->i_rw_mutex); 639 down_write(&ip->i_rw_mutex);
639 if (ip->i_res) { 640 if (ip->i_res) {
640 gfs2_rs_deltree(ip, ip->i_res); 641 gfs2_rs_deltree(ip, ip->i_res);
641 BUG_ON(ip->i_res->rs_free); 642 BUG_ON(ip->i_res->rs_free);
642 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); 643 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
643 ip->i_res = NULL; 644 ip->i_res = NULL;
644 } 645 }
645 up_write(&ip->i_rw_mutex); 646 up_write(&ip->i_rw_mutex);
646 } 647 }
647 648
648 /** 649 /**
649 * return_all_reservations - return all reserved blocks back to the rgrp. 650 * return_all_reservations - return all reserved blocks back to the rgrp.
650 * @rgd: the rgrp that needs its space back 651 * @rgd: the rgrp that needs its space back
651 * 652 *
652 * We previously reserved a bunch of blocks for allocation. Now we need to 653 * We previously reserved a bunch of blocks for allocation. Now we need to
653 * give them back. This leave the reservation structures in tact, but removes 654 * give them back. This leave the reservation structures in tact, but removes
654 * all of their corresponding "no-fly zones". 655 * all of their corresponding "no-fly zones".
655 */ 656 */
656 static void return_all_reservations(struct gfs2_rgrpd *rgd) 657 static void return_all_reservations(struct gfs2_rgrpd *rgd)
657 { 658 {
658 struct rb_node *n; 659 struct rb_node *n;
659 struct gfs2_blkreserv *rs; 660 struct gfs2_blkreserv *rs;
660 661
661 spin_lock(&rgd->rd_rsspin); 662 spin_lock(&rgd->rd_rsspin);
662 while ((n = rb_first(&rgd->rd_rstree))) { 663 while ((n = rb_first(&rgd->rd_rstree))) {
663 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 664 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
664 __rs_deltree(NULL, rs); 665 __rs_deltree(NULL, rs);
665 } 666 }
666 spin_unlock(&rgd->rd_rsspin); 667 spin_unlock(&rgd->rd_rsspin);
667 } 668 }
668 669
669 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) 670 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
670 { 671 {
671 struct rb_node *n; 672 struct rb_node *n;
672 struct gfs2_rgrpd *rgd; 673 struct gfs2_rgrpd *rgd;
673 struct gfs2_glock *gl; 674 struct gfs2_glock *gl;
674 675
675 while ((n = rb_first(&sdp->sd_rindex_tree))) { 676 while ((n = rb_first(&sdp->sd_rindex_tree))) {
676 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 677 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
677 gl = rgd->rd_gl; 678 gl = rgd->rd_gl;
678 679
679 rb_erase(n, &sdp->sd_rindex_tree); 680 rb_erase(n, &sdp->sd_rindex_tree);
680 681
681 if (gl) { 682 if (gl) {
682 spin_lock(&gl->gl_spin); 683 spin_lock(&gl->gl_spin);
683 gl->gl_object = NULL; 684 gl->gl_object = NULL;
684 spin_unlock(&gl->gl_spin); 685 spin_unlock(&gl->gl_spin);
685 gfs2_glock_add_to_lru(gl); 686 gfs2_glock_add_to_lru(gl);
686 gfs2_glock_put(gl); 687 gfs2_glock_put(gl);
687 } 688 }
688 689
689 gfs2_free_clones(rgd); 690 gfs2_free_clones(rgd);
690 kfree(rgd->rd_bits); 691 kfree(rgd->rd_bits);
691 return_all_reservations(rgd); 692 return_all_reservations(rgd);
692 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 693 kmem_cache_free(gfs2_rgrpd_cachep, rgd);
693 } 694 }
694 } 695 }
695 696
696 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) 697 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
697 { 698 {
698 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); 699 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
699 printk(KERN_INFO " ri_length = %u\n", rgd->rd_length); 700 printk(KERN_INFO " ri_length = %u\n", rgd->rd_length);
700 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); 701 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
701 printk(KERN_INFO " ri_data = %u\n", rgd->rd_data); 702 printk(KERN_INFO " ri_data = %u\n", rgd->rd_data);
702 printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes); 703 printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes);
703 } 704 }
704 705
705 /** 706 /**
706 * gfs2_compute_bitstructs - Compute the bitmap sizes 707 * gfs2_compute_bitstructs - Compute the bitmap sizes
707 * @rgd: The resource group descriptor 708 * @rgd: The resource group descriptor
708 * 709 *
709 * Calculates bitmap descriptors, one for each block that contains bitmap data 710 * Calculates bitmap descriptors, one for each block that contains bitmap data
710 * 711 *
711 * Returns: errno 712 * Returns: errno
712 */ 713 */
713 714
714 static int compute_bitstructs(struct gfs2_rgrpd *rgd) 715 static int compute_bitstructs(struct gfs2_rgrpd *rgd)
715 { 716 {
716 struct gfs2_sbd *sdp = rgd->rd_sbd; 717 struct gfs2_sbd *sdp = rgd->rd_sbd;
717 struct gfs2_bitmap *bi; 718 struct gfs2_bitmap *bi;
718 u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */ 719 u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
719 u32 bytes_left, bytes; 720 u32 bytes_left, bytes;
720 int x; 721 int x;
721 722
722 if (!length) 723 if (!length)
723 return -EINVAL; 724 return -EINVAL;
724 725
725 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS); 726 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
726 if (!rgd->rd_bits) 727 if (!rgd->rd_bits)
727 return -ENOMEM; 728 return -ENOMEM;
728 729
729 bytes_left = rgd->rd_bitbytes; 730 bytes_left = rgd->rd_bitbytes;
730 731
731 for (x = 0; x < length; x++) { 732 for (x = 0; x < length; x++) {
732 bi = rgd->rd_bits + x; 733 bi = rgd->rd_bits + x;
733 734
734 bi->bi_flags = 0; 735 bi->bi_flags = 0;
735 /* small rgrp; bitmap stored completely in header block */ 736 /* small rgrp; bitmap stored completely in header block */
736 if (length == 1) { 737 if (length == 1) {
737 bytes = bytes_left; 738 bytes = bytes_left;
738 bi->bi_offset = sizeof(struct gfs2_rgrp); 739 bi->bi_offset = sizeof(struct gfs2_rgrp);
739 bi->bi_start = 0; 740 bi->bi_start = 0;
740 bi->bi_len = bytes; 741 bi->bi_len = bytes;
741 /* header block */ 742 /* header block */
742 } else if (x == 0) { 743 } else if (x == 0) {
743 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp); 744 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
744 bi->bi_offset = sizeof(struct gfs2_rgrp); 745 bi->bi_offset = sizeof(struct gfs2_rgrp);
745 bi->bi_start = 0; 746 bi->bi_start = 0;
746 bi->bi_len = bytes; 747 bi->bi_len = bytes;
747 /* last block */ 748 /* last block */
748 } else if (x + 1 == length) { 749 } else if (x + 1 == length) {
749 bytes = bytes_left; 750 bytes = bytes_left;
750 bi->bi_offset = sizeof(struct gfs2_meta_header); 751 bi->bi_offset = sizeof(struct gfs2_meta_header);
751 bi->bi_start = rgd->rd_bitbytes - bytes_left; 752 bi->bi_start = rgd->rd_bitbytes - bytes_left;
752 bi->bi_len = bytes; 753 bi->bi_len = bytes;
753 /* other blocks */ 754 /* other blocks */
754 } else { 755 } else {
755 bytes = sdp->sd_sb.sb_bsize - 756 bytes = sdp->sd_sb.sb_bsize -
756 sizeof(struct gfs2_meta_header); 757 sizeof(struct gfs2_meta_header);
757 bi->bi_offset = sizeof(struct gfs2_meta_header); 758 bi->bi_offset = sizeof(struct gfs2_meta_header);
758 bi->bi_start = rgd->rd_bitbytes - bytes_left; 759 bi->bi_start = rgd->rd_bitbytes - bytes_left;
759 bi->bi_len = bytes; 760 bi->bi_len = bytes;
760 } 761 }
761 762
762 bytes_left -= bytes; 763 bytes_left -= bytes;
763 } 764 }
764 765
765 if (bytes_left) { 766 if (bytes_left) {
766 gfs2_consist_rgrpd(rgd); 767 gfs2_consist_rgrpd(rgd);
767 return -EIO; 768 return -EIO;
768 } 769 }
769 bi = rgd->rd_bits + (length - 1); 770 bi = rgd->rd_bits + (length - 1);
770 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) { 771 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
771 if (gfs2_consist_rgrpd(rgd)) { 772 if (gfs2_consist_rgrpd(rgd)) {
772 gfs2_rindex_print(rgd); 773 gfs2_rindex_print(rgd);
773 fs_err(sdp, "start=%u len=%u offset=%u\n", 774 fs_err(sdp, "start=%u len=%u offset=%u\n",
774 bi->bi_start, bi->bi_len, bi->bi_offset); 775 bi->bi_start, bi->bi_len, bi->bi_offset);
775 } 776 }
776 return -EIO; 777 return -EIO;
777 } 778 }
778 779
779 return 0; 780 return 0;
780 } 781 }
781 782
782 /** 783 /**
783 * gfs2_ri_total - Total up the file system space, according to the rindex. 784 * gfs2_ri_total - Total up the file system space, according to the rindex.
784 * @sdp: the filesystem 785 * @sdp: the filesystem
785 * 786 *
786 */ 787 */
787 u64 gfs2_ri_total(struct gfs2_sbd *sdp) 788 u64 gfs2_ri_total(struct gfs2_sbd *sdp)
788 { 789 {
789 u64 total_data = 0; 790 u64 total_data = 0;
790 struct inode *inode = sdp->sd_rindex; 791 struct inode *inode = sdp->sd_rindex;
791 struct gfs2_inode *ip = GFS2_I(inode); 792 struct gfs2_inode *ip = GFS2_I(inode);
792 char buf[sizeof(struct gfs2_rindex)]; 793 char buf[sizeof(struct gfs2_rindex)];
793 int error, rgrps; 794 int error, rgrps;
794 795
795 for (rgrps = 0;; rgrps++) { 796 for (rgrps = 0;; rgrps++) {
796 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 797 loff_t pos = rgrps * sizeof(struct gfs2_rindex);
797 798
798 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode)) 799 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
799 break; 800 break;
800 error = gfs2_internal_read(ip, buf, &pos, 801 error = gfs2_internal_read(ip, buf, &pos,
801 sizeof(struct gfs2_rindex)); 802 sizeof(struct gfs2_rindex));
802 if (error != sizeof(struct gfs2_rindex)) 803 if (error != sizeof(struct gfs2_rindex))
803 break; 804 break;
804 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data); 805 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
805 } 806 }
806 return total_data; 807 return total_data;
807 } 808 }
808 809
809 static int rgd_insert(struct gfs2_rgrpd *rgd) 810 static int rgd_insert(struct gfs2_rgrpd *rgd)
810 { 811 {
811 struct gfs2_sbd *sdp = rgd->rd_sbd; 812 struct gfs2_sbd *sdp = rgd->rd_sbd;
812 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL; 813 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
813 814
814 /* Figure out where to put new node */ 815 /* Figure out where to put new node */
815 while (*newn) { 816 while (*newn) {
816 struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd, 817 struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
817 rd_node); 818 rd_node);
818 819
819 parent = *newn; 820 parent = *newn;
820 if (rgd->rd_addr < cur->rd_addr) 821 if (rgd->rd_addr < cur->rd_addr)
821 newn = &((*newn)->rb_left); 822 newn = &((*newn)->rb_left);
822 else if (rgd->rd_addr > cur->rd_addr) 823 else if (rgd->rd_addr > cur->rd_addr)
823 newn = &((*newn)->rb_right); 824 newn = &((*newn)->rb_right);
824 else 825 else
825 return -EEXIST; 826 return -EEXIST;
826 } 827 }
827 828
828 rb_link_node(&rgd->rd_node, parent, newn); 829 rb_link_node(&rgd->rd_node, parent, newn);
829 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree); 830 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
830 sdp->sd_rgrps++; 831 sdp->sd_rgrps++;
831 return 0; 832 return 0;
832 } 833 }
833 834
834 /** 835 /**
835 * read_rindex_entry - Pull in a new resource index entry from the disk 836 * read_rindex_entry - Pull in a new resource index entry from the disk
836 * @ip: Pointer to the rindex inode 837 * @ip: Pointer to the rindex inode
837 * 838 *
838 * Returns: 0 on success, > 0 on EOF, error code otherwise 839 * Returns: 0 on success, > 0 on EOF, error code otherwise
839 */ 840 */
840 841
841 static int read_rindex_entry(struct gfs2_inode *ip) 842 static int read_rindex_entry(struct gfs2_inode *ip)
842 { 843 {
843 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 844 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
844 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); 845 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
845 struct gfs2_rindex buf; 846 struct gfs2_rindex buf;
846 int error; 847 int error;
847 struct gfs2_rgrpd *rgd; 848 struct gfs2_rgrpd *rgd;
848 849
849 if (pos >= i_size_read(&ip->i_inode)) 850 if (pos >= i_size_read(&ip->i_inode))
850 return 1; 851 return 1;
851 852
852 error = gfs2_internal_read(ip, (char *)&buf, &pos, 853 error = gfs2_internal_read(ip, (char *)&buf, &pos,
853 sizeof(struct gfs2_rindex)); 854 sizeof(struct gfs2_rindex));
854 855
855 if (error != sizeof(struct gfs2_rindex)) 856 if (error != sizeof(struct gfs2_rindex))
856 return (error == 0) ? 1 : error; 857 return (error == 0) ? 1 : error;
857 858
858 rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS); 859 rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
859 error = -ENOMEM; 860 error = -ENOMEM;
860 if (!rgd) 861 if (!rgd)
861 return error; 862 return error;
862 863
863 rgd->rd_sbd = sdp; 864 rgd->rd_sbd = sdp;
864 rgd->rd_addr = be64_to_cpu(buf.ri_addr); 865 rgd->rd_addr = be64_to_cpu(buf.ri_addr);
865 rgd->rd_length = be32_to_cpu(buf.ri_length); 866 rgd->rd_length = be32_to_cpu(buf.ri_length);
866 rgd->rd_data0 = be64_to_cpu(buf.ri_data0); 867 rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
867 rgd->rd_data = be32_to_cpu(buf.ri_data); 868 rgd->rd_data = be32_to_cpu(buf.ri_data);
868 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); 869 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
869 spin_lock_init(&rgd->rd_rsspin); 870 spin_lock_init(&rgd->rd_rsspin);
870 871
871 error = compute_bitstructs(rgd); 872 error = compute_bitstructs(rgd);
872 if (error) 873 if (error)
873 goto fail; 874 goto fail;
874 875
875 error = gfs2_glock_get(sdp, rgd->rd_addr, 876 error = gfs2_glock_get(sdp, rgd->rd_addr,
876 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); 877 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
877 if (error) 878 if (error)
878 goto fail; 879 goto fail;
879 880
880 rgd->rd_gl->gl_object = rgd; 881 rgd->rd_gl->gl_object = rgd;
881 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; 882 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb;
882 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 883 rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
883 if (rgd->rd_data > sdp->sd_max_rg_data) 884 if (rgd->rd_data > sdp->sd_max_rg_data)
884 sdp->sd_max_rg_data = rgd->rd_data; 885 sdp->sd_max_rg_data = rgd->rd_data;
885 spin_lock(&sdp->sd_rindex_spin); 886 spin_lock(&sdp->sd_rindex_spin);
886 error = rgd_insert(rgd); 887 error = rgd_insert(rgd);
887 spin_unlock(&sdp->sd_rindex_spin); 888 spin_unlock(&sdp->sd_rindex_spin);
888 if (!error) 889 if (!error)
889 return 0; 890 return 0;
890 891
891 error = 0; /* someone else read in the rgrp; free it and ignore it */ 892 error = 0; /* someone else read in the rgrp; free it and ignore it */
892 gfs2_glock_put(rgd->rd_gl); 893 gfs2_glock_put(rgd->rd_gl);
893 894
894 fail: 895 fail:
895 kfree(rgd->rd_bits); 896 kfree(rgd->rd_bits);
896 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 897 kmem_cache_free(gfs2_rgrpd_cachep, rgd);
897 return error; 898 return error;
898 } 899 }
899 900
900 /** 901 /**
901 * gfs2_ri_update - Pull in a new resource index from the disk 902 * gfs2_ri_update - Pull in a new resource index from the disk
902 * @ip: pointer to the rindex inode 903 * @ip: pointer to the rindex inode
903 * 904 *
904 * Returns: 0 on successful update, error code otherwise 905 * Returns: 0 on successful update, error code otherwise
905 */ 906 */
906 907
907 static int gfs2_ri_update(struct gfs2_inode *ip) 908 static int gfs2_ri_update(struct gfs2_inode *ip)
908 { 909 {
909 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 910 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
910 int error; 911 int error;
911 912
912 do { 913 do {
913 error = read_rindex_entry(ip); 914 error = read_rindex_entry(ip);
914 } while (error == 0); 915 } while (error == 0);
915 916
916 if (error < 0) 917 if (error < 0)
917 return error; 918 return error;
918 919
919 sdp->sd_rindex_uptodate = 1; 920 sdp->sd_rindex_uptodate = 1;
920 return 0; 921 return 0;
921 } 922 }
922 923
923 /** 924 /**
924 * gfs2_rindex_update - Update the rindex if required 925 * gfs2_rindex_update - Update the rindex if required
925 * @sdp: The GFS2 superblock 926 * @sdp: The GFS2 superblock
926 * 927 *
927 * We grab a lock on the rindex inode to make sure that it doesn't 928 * We grab a lock on the rindex inode to make sure that it doesn't
928 * change whilst we are performing an operation. We keep this lock 929 * change whilst we are performing an operation. We keep this lock
929 * for quite long periods of time compared to other locks. This 930 * for quite long periods of time compared to other locks. This
930 * doesn't matter, since it is shared and it is very, very rarely 931 * doesn't matter, since it is shared and it is very, very rarely
931 * accessed in the exclusive mode (i.e. only when expanding the filesystem). 932 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
932 * 933 *
933 * This makes sure that we're using the latest copy of the resource index 934 * This makes sure that we're using the latest copy of the resource index
934 * special file, which might have been updated if someone expanded the 935 * special file, which might have been updated if someone expanded the
935 * filesystem (via gfs2_grow utility), which adds new resource groups. 936 * filesystem (via gfs2_grow utility), which adds new resource groups.
936 * 937 *
937 * Returns: 0 on succeess, error code otherwise 938 * Returns: 0 on succeess, error code otherwise
938 */ 939 */
939 940
940 int gfs2_rindex_update(struct gfs2_sbd *sdp) 941 int gfs2_rindex_update(struct gfs2_sbd *sdp)
941 { 942 {
942 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); 943 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
943 struct gfs2_glock *gl = ip->i_gl; 944 struct gfs2_glock *gl = ip->i_gl;
944 struct gfs2_holder ri_gh; 945 struct gfs2_holder ri_gh;
945 int error = 0; 946 int error = 0;
946 int unlock_required = 0; 947 int unlock_required = 0;
947 948
948 /* Read new copy from disk if we don't have the latest */ 949 /* Read new copy from disk if we don't have the latest */
949 if (!sdp->sd_rindex_uptodate) { 950 if (!sdp->sd_rindex_uptodate) {
950 if (!gfs2_glock_is_locked_by_me(gl)) { 951 if (!gfs2_glock_is_locked_by_me(gl)) {
951 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); 952 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
952 if (error) 953 if (error)
953 return error; 954 return error;
954 unlock_required = 1; 955 unlock_required = 1;
955 } 956 }
956 if (!sdp->sd_rindex_uptodate) 957 if (!sdp->sd_rindex_uptodate)
957 error = gfs2_ri_update(ip); 958 error = gfs2_ri_update(ip);
958 if (unlock_required) 959 if (unlock_required)
959 gfs2_glock_dq_uninit(&ri_gh); 960 gfs2_glock_dq_uninit(&ri_gh);
960 } 961 }
961 962
962 return error; 963 return error;
963 } 964 }
964 965
965 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) 966 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
966 { 967 {
967 const struct gfs2_rgrp *str = buf; 968 const struct gfs2_rgrp *str = buf;
968 u32 rg_flags; 969 u32 rg_flags;
969 970
970 rg_flags = be32_to_cpu(str->rg_flags); 971 rg_flags = be32_to_cpu(str->rg_flags);
971 rg_flags &= ~GFS2_RDF_MASK; 972 rg_flags &= ~GFS2_RDF_MASK;
972 rgd->rd_flags &= GFS2_RDF_MASK; 973 rgd->rd_flags &= GFS2_RDF_MASK;
973 rgd->rd_flags |= rg_flags; 974 rgd->rd_flags |= rg_flags;
974 rgd->rd_free = be32_to_cpu(str->rg_free); 975 rgd->rd_free = be32_to_cpu(str->rg_free);
975 rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); 976 rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
976 rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); 977 rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
977 } 978 }
978 979
979 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) 980 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
980 { 981 {
981 struct gfs2_rgrp *str = buf; 982 struct gfs2_rgrp *str = buf;
982 983
983 str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK); 984 str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
984 str->rg_free = cpu_to_be32(rgd->rd_free); 985 str->rg_free = cpu_to_be32(rgd->rd_free);
985 str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); 986 str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
986 str->__pad = cpu_to_be32(0); 987 str->__pad = cpu_to_be32(0);
987 str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration); 988 str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
988 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); 989 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
989 } 990 }
990 991
991 static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) 992 static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
992 { 993 {
993 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; 994 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
994 struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; 995 struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
995 996
996 if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free || 997 if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
997 rgl->rl_dinodes != str->rg_dinodes || 998 rgl->rl_dinodes != str->rg_dinodes ||
998 rgl->rl_igeneration != str->rg_igeneration) 999 rgl->rl_igeneration != str->rg_igeneration)
999 return 0; 1000 return 0;
1000 return 1; 1001 return 1;
1001 } 1002 }
1002 1003
1003 static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) 1004 static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
1004 { 1005 {
1005 const struct gfs2_rgrp *str = buf; 1006 const struct gfs2_rgrp *str = buf;
1006 1007
1007 rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); 1008 rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
1008 rgl->rl_flags = str->rg_flags; 1009 rgl->rl_flags = str->rg_flags;
1009 rgl->rl_free = str->rg_free; 1010 rgl->rl_free = str->rg_free;
1010 rgl->rl_dinodes = str->rg_dinodes; 1011 rgl->rl_dinodes = str->rg_dinodes;
1011 rgl->rl_igeneration = str->rg_igeneration; 1012 rgl->rl_igeneration = str->rg_igeneration;
1012 rgl->__pad = 0UL; 1013 rgl->__pad = 0UL;
1013 } 1014 }
1014 1015
1015 static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change) 1016 static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
1016 { 1017 {
1017 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; 1018 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
1018 u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change; 1019 u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
1019 rgl->rl_unlinked = cpu_to_be32(unlinked); 1020 rgl->rl_unlinked = cpu_to_be32(unlinked);
1020 } 1021 }
1021 1022
1022 static u32 count_unlinked(struct gfs2_rgrpd *rgd) 1023 static u32 count_unlinked(struct gfs2_rgrpd *rgd)
1023 { 1024 {
1024 struct gfs2_bitmap *bi; 1025 struct gfs2_bitmap *bi;
1025 const u32 length = rgd->rd_length; 1026 const u32 length = rgd->rd_length;
1026 const u8 *buffer = NULL; 1027 const u8 *buffer = NULL;
1027 u32 i, goal, count = 0; 1028 u32 i, goal, count = 0;
1028 1029
1029 for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) { 1030 for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
1030 goal = 0; 1031 goal = 0;
1031 buffer = bi->bi_bh->b_data + bi->bi_offset; 1032 buffer = bi->bi_bh->b_data + bi->bi_offset;
1032 WARN_ON(!buffer_uptodate(bi->bi_bh)); 1033 WARN_ON(!buffer_uptodate(bi->bi_bh));
1033 while (goal < bi->bi_len * GFS2_NBBY) { 1034 while (goal < bi->bi_len * GFS2_NBBY) {
1034 goal = gfs2_bitfit(buffer, bi->bi_len, goal, 1035 goal = gfs2_bitfit(buffer, bi->bi_len, goal,
1035 GFS2_BLKST_UNLINKED); 1036 GFS2_BLKST_UNLINKED);
1036 if (goal == BFITNOENT) 1037 if (goal == BFITNOENT)
1037 break; 1038 break;
1038 count++; 1039 count++;
1039 goal++; 1040 goal++;
1040 } 1041 }
1041 } 1042 }
1042 1043
1043 return count; 1044 return count;
1044 } 1045 }
1045 1046
1046 1047
1047 /** 1048 /**
1048 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps 1049 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
1049 * @rgd: the struct gfs2_rgrpd describing the RG to read in 1050 * @rgd: the struct gfs2_rgrpd describing the RG to read in
1050 * 1051 *
1051 * Read in all of a Resource Group's header and bitmap blocks. 1052 * Read in all of a Resource Group's header and bitmap blocks.
1052 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. 1053 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
1053 * 1054 *
1054 * Returns: errno 1055 * Returns: errno
1055 */ 1056 */
1056 1057
1057 int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) 1058 int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
1058 { 1059 {
1059 struct gfs2_sbd *sdp = rgd->rd_sbd; 1060 struct gfs2_sbd *sdp = rgd->rd_sbd;
1060 struct gfs2_glock *gl = rgd->rd_gl; 1061 struct gfs2_glock *gl = rgd->rd_gl;
1061 unsigned int length = rgd->rd_length; 1062 unsigned int length = rgd->rd_length;
1062 struct gfs2_bitmap *bi; 1063 struct gfs2_bitmap *bi;
1063 unsigned int x, y; 1064 unsigned int x, y;
1064 int error; 1065 int error;
1065 1066
1066 if (rgd->rd_bits[0].bi_bh != NULL) 1067 if (rgd->rd_bits[0].bi_bh != NULL)
1067 return 0; 1068 return 0;
1068 1069
1069 for (x = 0; x < length; x++) { 1070 for (x = 0; x < length; x++) {
1070 bi = rgd->rd_bits + x; 1071 bi = rgd->rd_bits + x;
1071 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); 1072 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
1072 if (error) 1073 if (error)
1073 goto fail; 1074 goto fail;
1074 } 1075 }
1075 1076
1076 for (y = length; y--;) { 1077 for (y = length; y--;) {
1077 bi = rgd->rd_bits + y; 1078 bi = rgd->rd_bits + y;
1078 error = gfs2_meta_wait(sdp, bi->bi_bh); 1079 error = gfs2_meta_wait(sdp, bi->bi_bh);
1079 if (error) 1080 if (error)
1080 goto fail; 1081 goto fail;
1081 if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB : 1082 if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
1082 GFS2_METATYPE_RG)) { 1083 GFS2_METATYPE_RG)) {
1083 error = -EIO; 1084 error = -EIO;
1084 goto fail; 1085 goto fail;
1085 } 1086 }
1086 } 1087 }
1087 1088
1088 if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { 1089 if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
1089 for (x = 0; x < length; x++) 1090 for (x = 0; x < length; x++)
1090 clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); 1091 clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
1091 gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); 1092 gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
1092 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); 1093 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1093 rgd->rd_free_clone = rgd->rd_free; 1094 rgd->rd_free_clone = rgd->rd_free;
1094 } 1095 }
1095 if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { 1096 if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1096 rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); 1097 rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1097 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, 1098 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1098 rgd->rd_bits[0].bi_bh->b_data); 1099 rgd->rd_bits[0].bi_bh->b_data);
1099 } 1100 }
1100 else if (sdp->sd_args.ar_rgrplvb) { 1101 else if (sdp->sd_args.ar_rgrplvb) {
1101 if (!gfs2_rgrp_lvb_valid(rgd)){ 1102 if (!gfs2_rgrp_lvb_valid(rgd)){
1102 gfs2_consist_rgrpd(rgd); 1103 gfs2_consist_rgrpd(rgd);
1103 error = -EIO; 1104 error = -EIO;
1104 goto fail; 1105 goto fail;
1105 } 1106 }
1106 if (rgd->rd_rgl->rl_unlinked == 0) 1107 if (rgd->rd_rgl->rl_unlinked == 0)
1107 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1108 rgd->rd_flags &= ~GFS2_RDF_CHECK;
1108 } 1109 }
1109 return 0; 1110 return 0;
1110 1111
1111 fail: 1112 fail:
1112 while (x--) { 1113 while (x--) {
1113 bi = rgd->rd_bits + x; 1114 bi = rgd->rd_bits + x;
1114 brelse(bi->bi_bh); 1115 brelse(bi->bi_bh);
1115 bi->bi_bh = NULL; 1116 bi->bi_bh = NULL;
1116 gfs2_assert_warn(sdp, !bi->bi_clone); 1117 gfs2_assert_warn(sdp, !bi->bi_clone);
1117 } 1118 }
1118 1119
1119 return error; 1120 return error;
1120 } 1121 }
1121 1122
1122 int update_rgrp_lvb(struct gfs2_rgrpd *rgd) 1123 int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1123 { 1124 {
1124 u32 rl_flags; 1125 u32 rl_flags;
1125 1126
1126 if (rgd->rd_flags & GFS2_RDF_UPTODATE) 1127 if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1127 return 0; 1128 return 0;
1128 1129
1129 if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) 1130 if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1130 return gfs2_rgrp_bh_get(rgd); 1131 return gfs2_rgrp_bh_get(rgd);
1131 1132
1132 rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); 1133 rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1133 rl_flags &= ~GFS2_RDF_MASK; 1134 rl_flags &= ~GFS2_RDF_MASK;
1134 rgd->rd_flags &= GFS2_RDF_MASK; 1135 rgd->rd_flags &= GFS2_RDF_MASK;
1135 rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); 1136 rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1136 if (rgd->rd_rgl->rl_unlinked == 0) 1137 if (rgd->rd_rgl->rl_unlinked == 0)
1137 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1138 rgd->rd_flags &= ~GFS2_RDF_CHECK;
1138 rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); 1139 rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1139 rgd->rd_free_clone = rgd->rd_free; 1140 rgd->rd_free_clone = rgd->rd_free;
1140 rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); 1141 rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1141 rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); 1142 rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1142 return 0; 1143 return 0;
1143 } 1144 }
1144 1145
1145 int gfs2_rgrp_go_lock(struct gfs2_holder *gh) 1146 int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1146 { 1147 {
1147 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; 1148 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1148 struct gfs2_sbd *sdp = rgd->rd_sbd; 1149 struct gfs2_sbd *sdp = rgd->rd_sbd;
1149 1150
1150 if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) 1151 if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1151 return 0; 1152 return 0;
1152 return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object); 1153 return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object);
1153 } 1154 }
1154 1155
1155 /** 1156 /**
1156 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() 1157 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
1157 * @gh: The glock holder for the resource group 1158 * @gh: The glock holder for the resource group
1158 * 1159 *
1159 */ 1160 */
1160 1161
1161 void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) 1162 void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
1162 { 1163 {
1163 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; 1164 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1164 int x, length = rgd->rd_length; 1165 int x, length = rgd->rd_length;
1165 1166
1166 for (x = 0; x < length; x++) { 1167 for (x = 0; x < length; x++) {
1167 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1168 struct gfs2_bitmap *bi = rgd->rd_bits + x;
1168 if (bi->bi_bh) { 1169 if (bi->bi_bh) {
1169 brelse(bi->bi_bh); 1170 brelse(bi->bi_bh);
1170 bi->bi_bh = NULL; 1171 bi->bi_bh = NULL;
1171 } 1172 }
1172 } 1173 }
1173 1174
1174 } 1175 }
1175 1176
1176 int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 1177 int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1177 struct buffer_head *bh, 1178 struct buffer_head *bh,
1178 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed) 1179 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1179 { 1180 {
1180 struct super_block *sb = sdp->sd_vfs; 1181 struct super_block *sb = sdp->sd_vfs;
1181 struct block_device *bdev = sb->s_bdev; 1182 struct block_device *bdev = sb->s_bdev;
1182 const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / 1183 const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize /
1183 bdev_logical_block_size(sb->s_bdev); 1184 bdev_logical_block_size(sb->s_bdev);
1184 u64 blk; 1185 u64 blk;
1185 sector_t start = 0; 1186 sector_t start = 0;
1186 sector_t nr_sects = 0; 1187 sector_t nr_sects = 0;
1187 int rv; 1188 int rv;
1188 unsigned int x; 1189 unsigned int x;
1189 u32 trimmed = 0; 1190 u32 trimmed = 0;
1190 u8 diff; 1191 u8 diff;
1191 1192
1192 for (x = 0; x < bi->bi_len; x++) { 1193 for (x = 0; x < bi->bi_len; x++) {
1193 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data; 1194 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
1194 clone += bi->bi_offset; 1195 clone += bi->bi_offset;
1195 clone += x; 1196 clone += x;
1196 if (bh) { 1197 if (bh) {
1197 const u8 *orig = bh->b_data + bi->bi_offset + x; 1198 const u8 *orig = bh->b_data + bi->bi_offset + x;
1198 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); 1199 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
1199 } else { 1200 } else {
1200 diff = ~(*clone | (*clone >> 1)); 1201 diff = ~(*clone | (*clone >> 1));
1201 } 1202 }
1202 diff &= 0x55; 1203 diff &= 0x55;
1203 if (diff == 0) 1204 if (diff == 0)
1204 continue; 1205 continue;
1205 blk = offset + ((bi->bi_start + x) * GFS2_NBBY); 1206 blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1206 blk *= sects_per_blk; /* convert to sectors */ 1207 blk *= sects_per_blk; /* convert to sectors */
1207 while(diff) { 1208 while(diff) {
1208 if (diff & 1) { 1209 if (diff & 1) {
1209 if (nr_sects == 0) 1210 if (nr_sects == 0)
1210 goto start_new_extent; 1211 goto start_new_extent;
1211 if ((start + nr_sects) != blk) { 1212 if ((start + nr_sects) != blk) {
1212 if (nr_sects >= minlen) { 1213 if (nr_sects >= minlen) {
1213 rv = blkdev_issue_discard(bdev, 1214 rv = blkdev_issue_discard(bdev,
1214 start, nr_sects, 1215 start, nr_sects,
1215 GFP_NOFS, 0); 1216 GFP_NOFS, 0);
1216 if (rv) 1217 if (rv)
1217 goto fail; 1218 goto fail;
1218 trimmed += nr_sects; 1219 trimmed += nr_sects;
1219 } 1220 }
1220 nr_sects = 0; 1221 nr_sects = 0;
1221 start_new_extent: 1222 start_new_extent:
1222 start = blk; 1223 start = blk;
1223 } 1224 }
1224 nr_sects += sects_per_blk; 1225 nr_sects += sects_per_blk;
1225 } 1226 }
1226 diff >>= 2; 1227 diff >>= 2;
1227 blk += sects_per_blk; 1228 blk += sects_per_blk;
1228 } 1229 }
1229 } 1230 }
1230 if (nr_sects >= minlen) { 1231 if (nr_sects >= minlen) {
1231 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); 1232 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0);
1232 if (rv) 1233 if (rv)
1233 goto fail; 1234 goto fail;
1234 trimmed += nr_sects; 1235 trimmed += nr_sects;
1235 } 1236 }
1236 if (ptrimmed) 1237 if (ptrimmed)
1237 *ptrimmed = trimmed; 1238 *ptrimmed = trimmed;
1238 return 0; 1239 return 0;
1239 1240
1240 fail: 1241 fail:
1241 if (sdp->sd_args.ar_discard) 1242 if (sdp->sd_args.ar_discard)
1242 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); 1243 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
1243 sdp->sd_args.ar_discard = 0; 1244 sdp->sd_args.ar_discard = 0;
1244 return -EIO; 1245 return -EIO;
1245 } 1246 }
1246 1247
1247 /** 1248 /**
1248 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem 1249 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
1249 * @filp: Any file on the filesystem 1250 * @filp: Any file on the filesystem
1250 * @argp: Pointer to the arguments (also used to pass result) 1251 * @argp: Pointer to the arguments (also used to pass result)
1251 * 1252 *
1252 * Returns: 0 on success, otherwise error code 1253 * Returns: 0 on success, otherwise error code
1253 */ 1254 */
1254 1255
1255 int gfs2_fitrim(struct file *filp, void __user *argp) 1256 int gfs2_fitrim(struct file *filp, void __user *argp)
1256 { 1257 {
1257 struct inode *inode = filp->f_dentry->d_inode; 1258 struct inode *inode = filp->f_dentry->d_inode;
1258 struct gfs2_sbd *sdp = GFS2_SB(inode); 1259 struct gfs2_sbd *sdp = GFS2_SB(inode);
1259 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); 1260 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1260 struct buffer_head *bh; 1261 struct buffer_head *bh;
1261 struct gfs2_rgrpd *rgd; 1262 struct gfs2_rgrpd *rgd;
1262 struct gfs2_rgrpd *rgd_end; 1263 struct gfs2_rgrpd *rgd_end;
1263 struct gfs2_holder gh; 1264 struct gfs2_holder gh;
1264 struct fstrim_range r; 1265 struct fstrim_range r;
1265 int ret = 0; 1266 int ret = 0;
1266 u64 amt; 1267 u64 amt;
1267 u64 trimmed = 0; 1268 u64 trimmed = 0;
1268 u64 start, end, minlen; 1269 u64 start, end, minlen;
1269 unsigned int x; 1270 unsigned int x;
1270 unsigned bs_shift = sdp->sd_sb.sb_bsize_shift; 1271 unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1271 1272
1272 if (!capable(CAP_SYS_ADMIN)) 1273 if (!capable(CAP_SYS_ADMIN))
1273 return -EPERM; 1274 return -EPERM;
1274 1275
1275 if (!blk_queue_discard(q)) 1276 if (!blk_queue_discard(q))
1276 return -EOPNOTSUPP; 1277 return -EOPNOTSUPP;
1277 1278
1278 if (copy_from_user(&r, argp, sizeof(r))) 1279 if (copy_from_user(&r, argp, sizeof(r)))
1279 return -EFAULT; 1280 return -EFAULT;
1280 1281
1281 ret = gfs2_rindex_update(sdp); 1282 ret = gfs2_rindex_update(sdp);
1282 if (ret) 1283 if (ret)
1283 return ret; 1284 return ret;
1284 1285
1285 start = r.start >> bs_shift; 1286 start = r.start >> bs_shift;
1286 end = start + (r.len >> bs_shift); 1287 end = start + (r.len >> bs_shift);
1287 minlen = max_t(u64, r.minlen, 1288 minlen = max_t(u64, r.minlen,
1288 q->limits.discard_granularity) >> bs_shift; 1289 q->limits.discard_granularity) >> bs_shift;
1289 1290
1290 rgd = gfs2_blk2rgrpd(sdp, start, 0); 1291 rgd = gfs2_blk2rgrpd(sdp, start, 0);
1291 rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0); 1292 rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0);
1292 1293
1293 if (end <= start || 1294 if (end <= start ||
1294 minlen > sdp->sd_max_rg_data || 1295 minlen > sdp->sd_max_rg_data ||
1295 start > rgd_end->rd_data0 + rgd_end->rd_data) 1296 start > rgd_end->rd_data0 + rgd_end->rd_data)
1296 return -EINVAL; 1297 return -EINVAL;
1297 1298
1298 while (1) { 1299 while (1) {
1299 1300
1300 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); 1301 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1301 if (ret) 1302 if (ret)
1302 goto out; 1303 goto out;
1303 1304
1304 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) { 1305 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
1305 /* Trim each bitmap in the rgrp */ 1306 /* Trim each bitmap in the rgrp */
1306 for (x = 0; x < rgd->rd_length; x++) { 1307 for (x = 0; x < rgd->rd_length; x++) {
1307 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1308 struct gfs2_bitmap *bi = rgd->rd_bits + x;
1308 ret = gfs2_rgrp_send_discards(sdp, 1309 ret = gfs2_rgrp_send_discards(sdp,
1309 rgd->rd_data0, NULL, bi, minlen, 1310 rgd->rd_data0, NULL, bi, minlen,
1310 &amt); 1311 &amt);
1311 if (ret) { 1312 if (ret) {
1312 gfs2_glock_dq_uninit(&gh); 1313 gfs2_glock_dq_uninit(&gh);
1313 goto out; 1314 goto out;
1314 } 1315 }
1315 trimmed += amt; 1316 trimmed += amt;
1316 } 1317 }
1317 1318
1318 /* Mark rgrp as having been trimmed */ 1319 /* Mark rgrp as having been trimmed */
1319 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0); 1320 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
1320 if (ret == 0) { 1321 if (ret == 0) {
1321 bh = rgd->rd_bits[0].bi_bh; 1322 bh = rgd->rd_bits[0].bi_bh;
1322 rgd->rd_flags |= GFS2_RGF_TRIMMED; 1323 rgd->rd_flags |= GFS2_RGF_TRIMMED;
1323 gfs2_trans_add_bh(rgd->rd_gl, bh, 1); 1324 gfs2_trans_add_bh(rgd->rd_gl, bh, 1);
1324 gfs2_rgrp_out(rgd, bh->b_data); 1325 gfs2_rgrp_out(rgd, bh->b_data);
1325 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); 1326 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1326 gfs2_trans_end(sdp); 1327 gfs2_trans_end(sdp);
1327 } 1328 }
1328 } 1329 }
1329 gfs2_glock_dq_uninit(&gh); 1330 gfs2_glock_dq_uninit(&gh);
1330 1331
1331 if (rgd == rgd_end) 1332 if (rgd == rgd_end)
1332 break; 1333 break;
1333 1334
1334 rgd = gfs2_rgrpd_get_next(rgd); 1335 rgd = gfs2_rgrpd_get_next(rgd);
1335 } 1336 }
1336 1337
1337 out: 1338 out:
1338 r.len = trimmed << 9; 1339 r.len = trimmed << 9;
1339 if (copy_to_user(argp, &r, sizeof(r))) 1340 if (copy_to_user(argp, &r, sizeof(r)))
1340 return -EFAULT; 1341 return -EFAULT;
1341 1342
1342 return ret; 1343 return ret;
1343 } 1344 }
1344 1345
1345 /** 1346 /**
1346 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree 1347 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1347 * @ip: the inode structure 1348 * @ip: the inode structure
1348 * 1349 *
1349 */ 1350 */
1350 static void rs_insert(struct gfs2_inode *ip) 1351 static void rs_insert(struct gfs2_inode *ip)
1351 { 1352 {
1352 struct rb_node **newn, *parent = NULL; 1353 struct rb_node **newn, *parent = NULL;
1353 int rc; 1354 int rc;
1354 struct gfs2_blkreserv *rs = ip->i_res; 1355 struct gfs2_blkreserv *rs = ip->i_res;
1355 struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; 1356 struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1356 u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); 1357 u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1357 1358
1358 BUG_ON(gfs2_rs_active(rs)); 1359 BUG_ON(gfs2_rs_active(rs));
1359 1360
1360 spin_lock(&rgd->rd_rsspin); 1361 spin_lock(&rgd->rd_rsspin);
1361 newn = &rgd->rd_rstree.rb_node; 1362 newn = &rgd->rd_rstree.rb_node;
1362 while (*newn) { 1363 while (*newn) {
1363 struct gfs2_blkreserv *cur = 1364 struct gfs2_blkreserv *cur =
1364 rb_entry(*newn, struct gfs2_blkreserv, rs_node); 1365 rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1365 1366
1366 parent = *newn; 1367 parent = *newn;
1367 rc = rs_cmp(fsblock, rs->rs_free, cur); 1368 rc = rs_cmp(fsblock, rs->rs_free, cur);
1368 if (rc > 0) 1369 if (rc > 0)
1369 newn = &((*newn)->rb_right); 1370 newn = &((*newn)->rb_right);
1370 else if (rc < 0) 1371 else if (rc < 0)
1371 newn = &((*newn)->rb_left); 1372 newn = &((*newn)->rb_left);
1372 else { 1373 else {
1373 spin_unlock(&rgd->rd_rsspin); 1374 spin_unlock(&rgd->rd_rsspin);
1374 WARN_ON(1); 1375 WARN_ON(1);
1375 return; 1376 return;
1376 } 1377 }
1377 } 1378 }
1378 1379
1379 rb_link_node(&rs->rs_node, parent, newn); 1380 rb_link_node(&rs->rs_node, parent, newn);
1380 rb_insert_color(&rs->rs_node, &rgd->rd_rstree); 1381 rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1381 1382
1382 /* Do our rgrp accounting for the reservation */ 1383 /* Do our rgrp accounting for the reservation */
1383 rgd->rd_reserved += rs->rs_free; /* blocks reserved */ 1384 rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1384 spin_unlock(&rgd->rd_rsspin); 1385 spin_unlock(&rgd->rd_rsspin);
1385 trace_gfs2_rs(rs, TRACE_RS_INSERT); 1386 trace_gfs2_rs(rs, TRACE_RS_INSERT);
1386 } 1387 }
1387 1388
1388 /** 1389 /**
1389 * rg_mblk_search - find a group of multiple free blocks to form a reservation 1390 * rg_mblk_search - find a group of multiple free blocks to form a reservation
1390 * @rgd: the resource group descriptor 1391 * @rgd: the resource group descriptor
1391 * @ip: pointer to the inode for which we're reserving blocks 1392 * @ip: pointer to the inode for which we're reserving blocks
1392 * @requested: number of blocks required for this allocation 1393 * @requested: number of blocks required for this allocation
1393 * 1394 *
1394 */ 1395 */
1395 1396
1396 static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, 1397 static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1397 unsigned requested) 1398 unsigned requested)
1398 { 1399 {
1399 struct gfs2_rbm rbm = { .rgd = rgd, }; 1400 struct gfs2_rbm rbm = { .rgd = rgd, };
1400 u64 goal; 1401 u64 goal;
1401 struct gfs2_blkreserv *rs = ip->i_res; 1402 struct gfs2_blkreserv *rs = ip->i_res;
1402 u32 extlen; 1403 u32 extlen;
1403 u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; 1404 u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1404 int ret; 1405 int ret;
1405 1406
1406 extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); 1407 extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested);
1407 extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); 1408 extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1408 if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) 1409 if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1409 return; 1410 return;
1410 1411
1411 /* Find bitmap block that contains bits for goal block */ 1412 /* Find bitmap block that contains bits for goal block */
1412 if (rgrp_contains_block(rgd, ip->i_goal)) 1413 if (rgrp_contains_block(rgd, ip->i_goal))
1413 goal = ip->i_goal; 1414 goal = ip->i_goal;
1414 else 1415 else
1415 goal = rgd->rd_last_alloc + rgd->rd_data0; 1416 goal = rgd->rd_last_alloc + rgd->rd_data0;
1416 1417
1417 if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) 1418 if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1418 return; 1419 return;
1419 1420
1420 ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); 1421 ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true);
1421 if (ret == 0) { 1422 if (ret == 0) {
1422 rs->rs_rbm = rbm; 1423 rs->rs_rbm = rbm;
1423 rs->rs_free = extlen; 1424 rs->rs_free = extlen;
1424 rs->rs_inum = ip->i_no_addr; 1425 rs->rs_inum = ip->i_no_addr;
1425 rs_insert(ip); 1426 rs_insert(ip);
1426 } 1427 }
1427 } 1428 }
1428 1429
1429 /** 1430 /**
1430 * gfs2_next_unreserved_block - Return next block that is not reserved 1431 * gfs2_next_unreserved_block - Return next block that is not reserved
1431 * @rgd: The resource group 1432 * @rgd: The resource group
1432 * @block: The starting block 1433 * @block: The starting block
1433 * @length: The required length 1434 * @length: The required length
1434 * @ip: Ignore any reservations for this inode 1435 * @ip: Ignore any reservations for this inode
1435 * 1436 *
1436 * If the block does not appear in any reservation, then return the 1437 * If the block does not appear in any reservation, then return the
1437 * block number unchanged. If it does appear in the reservation, then 1438 * block number unchanged. If it does appear in the reservation, then
1438 * keep looking through the tree of reservations in order to find the 1439 * keep looking through the tree of reservations in order to find the
1439 * first block number which is not reserved. 1440 * first block number which is not reserved.
1440 */ 1441 */
1441 1442
1442 static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, 1443 static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1443 u32 length, 1444 u32 length,
1444 const struct gfs2_inode *ip) 1445 const struct gfs2_inode *ip)
1445 { 1446 {
1446 struct gfs2_blkreserv *rs; 1447 struct gfs2_blkreserv *rs;
1447 struct rb_node *n; 1448 struct rb_node *n;
1448 int rc; 1449 int rc;
1449 1450
1450 spin_lock(&rgd->rd_rsspin); 1451 spin_lock(&rgd->rd_rsspin);
1451 n = rgd->rd_rstree.rb_node; 1452 n = rgd->rd_rstree.rb_node;
1452 while (n) { 1453 while (n) {
1453 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 1454 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1454 rc = rs_cmp(block, length, rs); 1455 rc = rs_cmp(block, length, rs);
1455 if (rc < 0) 1456 if (rc < 0)
1456 n = n->rb_left; 1457 n = n->rb_left;
1457 else if (rc > 0) 1458 else if (rc > 0)
1458 n = n->rb_right; 1459 n = n->rb_right;
1459 else 1460 else
1460 break; 1461 break;
1461 } 1462 }
1462 1463
1463 if (n) { 1464 if (n) {
1464 while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) { 1465 while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
1465 block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; 1466 block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1466 n = n->rb_right; 1467 n = n->rb_right;
1467 if (n == NULL) 1468 if (n == NULL)
1468 break; 1469 break;
1469 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 1470 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1470 } 1471 }
1471 } 1472 }
1472 1473
1473 spin_unlock(&rgd->rd_rsspin); 1474 spin_unlock(&rgd->rd_rsspin);
1474 return block; 1475 return block;
1475 } 1476 }
1476 1477
1477 /** 1478 /**
1478 * gfs2_reservation_check_and_update - Check for reservations during block alloc 1479 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1479 * @rbm: The current position in the resource group 1480 * @rbm: The current position in the resource group
1480 * @ip: The inode for which we are searching for blocks 1481 * @ip: The inode for which we are searching for blocks
1481 * @minext: The minimum extent length 1482 * @minext: The minimum extent length
1482 * 1483 *
1483 * This checks the current position in the rgrp to see whether there is 1484 * This checks the current position in the rgrp to see whether there is
1484 * a reservation covering this block. If not then this function is a 1485 * a reservation covering this block. If not then this function is a
1485 * no-op. If there is, then the position is moved to the end of the 1486 * no-op. If there is, then the position is moved to the end of the
1486 * contiguous reservation(s) so that we are pointing at the first 1487 * contiguous reservation(s) so that we are pointing at the first
1487 * non-reserved block. 1488 * non-reserved block.
1488 * 1489 *
1489 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error 1490 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1490 */ 1491 */
1491 1492
1492 static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, 1493 static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1493 const struct gfs2_inode *ip, 1494 const struct gfs2_inode *ip,
1494 u32 minext) 1495 u32 minext)
1495 { 1496 {
1496 u64 block = gfs2_rbm_to_block(rbm); 1497 u64 block = gfs2_rbm_to_block(rbm);
1497 u32 extlen = 1; 1498 u32 extlen = 1;
1498 u64 nblock; 1499 u64 nblock;
1499 int ret; 1500 int ret;
1500 1501
1501 /* 1502 /*
1502 * If we have a minimum extent length, then skip over any extent 1503 * If we have a minimum extent length, then skip over any extent
1503 * which is less than the min extent length in size. 1504 * which is less than the min extent length in size.
1504 */ 1505 */
1505 if (minext) { 1506 if (minext) {
1506 extlen = gfs2_free_extlen(rbm, minext); 1507 extlen = gfs2_free_extlen(rbm, minext);
1507 nblock = block + extlen; 1508 nblock = block + extlen;
1508 if (extlen < minext) 1509 if (extlen < minext)
1509 goto fail; 1510 goto fail;
1510 } 1511 }
1511 1512
1512 /* 1513 /*
1513 * Check the extent which has been found against the reservations 1514 * Check the extent which has been found against the reservations
1514 * and skip if parts of it are already reserved 1515 * and skip if parts of it are already reserved
1515 */ 1516 */
1516 nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); 1517 nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1517 if (nblock == block) 1518 if (nblock == block)
1518 return 0; 1519 return 0;
1519 fail: 1520 fail:
1520 ret = gfs2_rbm_from_block(rbm, nblock); 1521 ret = gfs2_rbm_from_block(rbm, nblock);
1521 if (ret < 0) 1522 if (ret < 0)
1522 return ret; 1523 return ret;
1523 return 1; 1524 return 1;
1524 } 1525 }
1525 1526
1526 /** 1527 /**
1527 * gfs2_rbm_find - Look for blocks of a particular state 1528 * gfs2_rbm_find - Look for blocks of a particular state
1528 * @rbm: Value/result starting position and final position 1529 * @rbm: Value/result starting position and final position
1529 * @state: The state which we want to find 1530 * @state: The state which we want to find
1530 * @minext: The requested extent length (0 for a single block) 1531 * @minext: The requested extent length (0 for a single block)
1531 * @ip: If set, check for reservations 1532 * @ip: If set, check for reservations
1532 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping 1533 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1533 * around until we've reached the starting point. 1534 * around until we've reached the starting point.
1534 * 1535 *
1535 * Side effects: 1536 * Side effects:
1536 * - If looking for free blocks, we set GBF_FULL on each bitmap which 1537 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1537 * has no free blocks in it. 1538 * has no free blocks in it.
1538 * 1539 *
1539 * Returns: 0 on success, -ENOSPC if there is no block of the requested state 1540 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1540 */ 1541 */
1541 1542
1542 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, 1543 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
1543 const struct gfs2_inode *ip, bool nowrap) 1544 const struct gfs2_inode *ip, bool nowrap)
1544 { 1545 {
1545 struct buffer_head *bh; 1546 struct buffer_head *bh;
1546 struct gfs2_bitmap *initial_bi; 1547 struct gfs2_bitmap *initial_bi;
1547 u32 initial_offset; 1548 u32 initial_offset;
1548 u32 offset; 1549 u32 offset;
1549 u8 *buffer; 1550 u8 *buffer;
1550 int index; 1551 int index;
1551 int n = 0; 1552 int n = 0;
1552 int iters = rbm->rgd->rd_length; 1553 int iters = rbm->rgd->rd_length;
1553 int ret; 1554 int ret;
1554 1555
1555 /* If we are not starting at the beginning of a bitmap, then we 1556 /* If we are not starting at the beginning of a bitmap, then we
1556 * need to add one to the bitmap count to ensure that we search 1557 * need to add one to the bitmap count to ensure that we search
1557 * the starting bitmap twice. 1558 * the starting bitmap twice.
1558 */ 1559 */
1559 if (rbm->offset != 0) 1560 if (rbm->offset != 0)
1560 iters++; 1561 iters++;
1561 1562
1562 while(1) { 1563 while(1) {
1563 if (test_bit(GBF_FULL, &rbm->bi->bi_flags) && 1564 if (test_bit(GBF_FULL, &rbm->bi->bi_flags) &&
1564 (state == GFS2_BLKST_FREE)) 1565 (state == GFS2_BLKST_FREE))
1565 goto next_bitmap; 1566 goto next_bitmap;
1566 1567
1567 bh = rbm->bi->bi_bh; 1568 bh = rbm->bi->bi_bh;
1568 buffer = bh->b_data + rbm->bi->bi_offset; 1569 buffer = bh->b_data + rbm->bi->bi_offset;
1569 WARN_ON(!buffer_uptodate(bh)); 1570 WARN_ON(!buffer_uptodate(bh));
1570 if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) 1571 if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone)
1571 buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; 1572 buffer = rbm->bi->bi_clone + rbm->bi->bi_offset;
1572 initial_offset = rbm->offset; 1573 initial_offset = rbm->offset;
1573 offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); 1574 offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state);
1574 if (offset == BFITNOENT) 1575 if (offset == BFITNOENT)
1575 goto bitmap_full; 1576 goto bitmap_full;
1576 rbm->offset = offset; 1577 rbm->offset = offset;
1577 if (ip == NULL) 1578 if (ip == NULL)
1578 return 0; 1579 return 0;
1579 1580
1580 initial_bi = rbm->bi; 1581 initial_bi = rbm->bi;
1581 ret = gfs2_reservation_check_and_update(rbm, ip, minext); 1582 ret = gfs2_reservation_check_and_update(rbm, ip, minext);
1582 if (ret == 0) 1583 if (ret == 0)
1583 return 0; 1584 return 0;
1584 if (ret > 0) { 1585 if (ret > 0) {
1585 n += (rbm->bi - initial_bi); 1586 n += (rbm->bi - initial_bi);
1586 goto next_iter; 1587 goto next_iter;
1587 } 1588 }
1588 if (ret == -E2BIG) { 1589 if (ret == -E2BIG) {
1589 index = 0; 1590 index = 0;
1590 rbm->offset = 0; 1591 rbm->offset = 0;
1591 n += (rbm->bi - initial_bi); 1592 n += (rbm->bi - initial_bi);
1592 goto res_covered_end_of_rgrp; 1593 goto res_covered_end_of_rgrp;
1593 } 1594 }
1594 return ret; 1595 return ret;
1595 1596
1596 bitmap_full: /* Mark bitmap as full and fall through */ 1597 bitmap_full: /* Mark bitmap as full and fall through */
1597 if ((state == GFS2_BLKST_FREE) && initial_offset == 0) 1598 if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
1598 set_bit(GBF_FULL, &rbm->bi->bi_flags); 1599 set_bit(GBF_FULL, &rbm->bi->bi_flags);
1599 1600
1600 next_bitmap: /* Find next bitmap in the rgrp */ 1601 next_bitmap: /* Find next bitmap in the rgrp */
1601 rbm->offset = 0; 1602 rbm->offset = 0;
1602 index = rbm->bi - rbm->rgd->rd_bits; 1603 index = rbm->bi - rbm->rgd->rd_bits;
1603 index++; 1604 index++;
1604 if (index == rbm->rgd->rd_length) 1605 if (index == rbm->rgd->rd_length)
1605 index = 0; 1606 index = 0;
1606 res_covered_end_of_rgrp: 1607 res_covered_end_of_rgrp:
1607 rbm->bi = &rbm->rgd->rd_bits[index]; 1608 rbm->bi = &rbm->rgd->rd_bits[index];
1608 if ((index == 0) && nowrap) 1609 if ((index == 0) && nowrap)
1609 break; 1610 break;
1610 n++; 1611 n++;
1611 next_iter: 1612 next_iter:
1612 if (n >= iters) 1613 if (n >= iters)
1613 break; 1614 break;
1614 } 1615 }
1615 1616
1616 return -ENOSPC; 1617 return -ENOSPC;
1617 } 1618 }
1618 1619
1619 /** 1620 /**
1620 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes 1621 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1621 * @rgd: The rgrp 1622 * @rgd: The rgrp
1622 * @last_unlinked: block address of the last dinode we unlinked 1623 * @last_unlinked: block address of the last dinode we unlinked
1623 * @skip: block address we should explicitly not unlink 1624 * @skip: block address we should explicitly not unlink
1624 * 1625 *
1625 * Returns: 0 if no error 1626 * Returns: 0 if no error
1626 * The inode, if one has been found, in inode. 1627 * The inode, if one has been found, in inode.
1627 */ 1628 */
1628 1629
1629 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) 1630 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1630 { 1631 {
1631 u64 block; 1632 u64 block;
1632 struct gfs2_sbd *sdp = rgd->rd_sbd; 1633 struct gfs2_sbd *sdp = rgd->rd_sbd;
1633 struct gfs2_glock *gl; 1634 struct gfs2_glock *gl;
1634 struct gfs2_inode *ip; 1635 struct gfs2_inode *ip;
1635 int error; 1636 int error;
1636 int found = 0; 1637 int found = 0;
1637 struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 }; 1638 struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 };
1638 1639
1639 while (1) { 1640 while (1) {
1640 down_write(&sdp->sd_log_flush_lock); 1641 down_write(&sdp->sd_log_flush_lock);
1641 error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); 1642 error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true);
1642 up_write(&sdp->sd_log_flush_lock); 1643 up_write(&sdp->sd_log_flush_lock);
1643 if (error == -ENOSPC) 1644 if (error == -ENOSPC)
1644 break; 1645 break;
1645 if (WARN_ON_ONCE(error)) 1646 if (WARN_ON_ONCE(error))
1646 break; 1647 break;
1647 1648
1648 block = gfs2_rbm_to_block(&rbm); 1649 block = gfs2_rbm_to_block(&rbm);
1649 if (gfs2_rbm_from_block(&rbm, block + 1)) 1650 if (gfs2_rbm_from_block(&rbm, block + 1))
1650 break; 1651 break;
1651 if (*last_unlinked != NO_BLOCK && block <= *last_unlinked) 1652 if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1652 continue; 1653 continue;
1653 if (block == skip) 1654 if (block == skip)
1654 continue; 1655 continue;
1655 *last_unlinked = block; 1656 *last_unlinked = block;
1656 1657
1657 error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl); 1658 error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
1658 if (error) 1659 if (error)
1659 continue; 1660 continue;
1660 1661
1661 /* If the inode is already in cache, we can ignore it here 1662 /* If the inode is already in cache, we can ignore it here
1662 * because the existing inode disposal code will deal with 1663 * because the existing inode disposal code will deal with
1663 * it when all refs have gone away. Accessing gl_object like 1664 * it when all refs have gone away. Accessing gl_object like
1664 * this is not safe in general. Here it is ok because we do 1665 * this is not safe in general. Here it is ok because we do
1665 * not dereference the pointer, and we only need an approx 1666 * not dereference the pointer, and we only need an approx
1666 * answer to whether it is NULL or not. 1667 * answer to whether it is NULL or not.
1667 */ 1668 */
1668 ip = gl->gl_object; 1669 ip = gl->gl_object;
1669 1670
1670 if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) 1671 if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1671 gfs2_glock_put(gl); 1672 gfs2_glock_put(gl);
1672 else 1673 else
1673 found++; 1674 found++;
1674 1675
1675 /* Limit reclaim to sensible number of tasks */ 1676 /* Limit reclaim to sensible number of tasks */
1676 if (found > NR_CPUS) 1677 if (found > NR_CPUS)
1677 return; 1678 return;
1678 } 1679 }
1679 1680
1680 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1681 rgd->rd_flags &= ~GFS2_RDF_CHECK;
1681 return; 1682 return;
1682 } 1683 }
1683 1684
1684 /** 1685 /**
1685 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested 1686 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1686 * @rgd: The rgrp in question 1687 * @rgd: The rgrp in question
1687 * @loops: An indication of how picky we can be (0=very, 1=less so) 1688 * @loops: An indication of how picky we can be (0=very, 1=less so)
1688 * 1689 *
1689 * This function uses the recently added glock statistics in order to 1690 * This function uses the recently added glock statistics in order to
1690 * figure out whether a parciular resource group is suffering from 1691 * figure out whether a parciular resource group is suffering from
1691 * contention from multiple nodes. This is done purely on the basis 1692 * contention from multiple nodes. This is done purely on the basis
1692 * of timings, since this is the only data we have to work with and 1693 * of timings, since this is the only data we have to work with and
1693 * our aim here is to reject a resource group which is highly contended 1694 * our aim here is to reject a resource group which is highly contended
1694 * but (very important) not to do this too often in order to ensure that 1695 * but (very important) not to do this too often in order to ensure that
1695 * we do not land up introducing fragmentation by changing resource 1696 * we do not land up introducing fragmentation by changing resource
1696 * groups when not actually required. 1697 * groups when not actually required.
1697 * 1698 *
1698 * The calculation is fairly simple, we want to know whether the SRTTB 1699 * The calculation is fairly simple, we want to know whether the SRTTB
1699 * (i.e. smoothed round trip time for blocking operations) to acquire 1700 * (i.e. smoothed round trip time for blocking operations) to acquire
1700 * the lock for this rgrp's glock is significantly greater than the 1701 * the lock for this rgrp's glock is significantly greater than the
1701 * time taken for resource groups on average. We introduce a margin in 1702 * time taken for resource groups on average. We introduce a margin in
1702 * the form of the variable @var which is computed as the sum of the two 1703 * the form of the variable @var which is computed as the sum of the two
1703 * respective variences, and multiplied by a factor depending on @loops 1704 * respective variences, and multiplied by a factor depending on @loops
1704 * and whether we have a lot of data to base the decision on. This is 1705 * and whether we have a lot of data to base the decision on. This is
1705 * then tested against the square difference of the means in order to 1706 * then tested against the square difference of the means in order to
1706 * decide whether the result is statistically significant or not. 1707 * decide whether the result is statistically significant or not.
1707 * 1708 *
1708 * Returns: A boolean verdict on the congestion status 1709 * Returns: A boolean verdict on the congestion status
1709 */ 1710 */
1710 1711
1711 static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) 1712 static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1712 { 1713 {
1713 const struct gfs2_glock *gl = rgd->rd_gl; 1714 const struct gfs2_glock *gl = rgd->rd_gl;
1714 const struct gfs2_sbd *sdp = gl->gl_sbd; 1715 const struct gfs2_sbd *sdp = gl->gl_sbd;
1715 struct gfs2_lkstats *st; 1716 struct gfs2_lkstats *st;
1716 s64 r_dcount, l_dcount; 1717 s64 r_dcount, l_dcount;
1717 s64 r_srttb, l_srttb; 1718 s64 r_srttb, l_srttb;
1718 s64 srttb_diff; 1719 s64 srttb_diff;
1719 s64 sqr_diff; 1720 s64 sqr_diff;
1720 s64 var; 1721 s64 var;
1721 1722
1722 preempt_disable(); 1723 preempt_disable();
1723 st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP]; 1724 st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1724 r_srttb = st->stats[GFS2_LKS_SRTTB]; 1725 r_srttb = st->stats[GFS2_LKS_SRTTB];
1725 r_dcount = st->stats[GFS2_LKS_DCOUNT]; 1726 r_dcount = st->stats[GFS2_LKS_DCOUNT];
1726 var = st->stats[GFS2_LKS_SRTTVARB] + 1727 var = st->stats[GFS2_LKS_SRTTVARB] +
1727 gl->gl_stats.stats[GFS2_LKS_SRTTVARB]; 1728 gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1728 preempt_enable(); 1729 preempt_enable();
1729 1730
1730 l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB]; 1731 l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1731 l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT]; 1732 l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1732 1733
1733 if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0)) 1734 if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0))
1734 return false; 1735 return false;
1735 1736
1736 srttb_diff = r_srttb - l_srttb; 1737 srttb_diff = r_srttb - l_srttb;
1737 sqr_diff = srttb_diff * srttb_diff; 1738 sqr_diff = srttb_diff * srttb_diff;
1738 1739
1739 var *= 2; 1740 var *= 2;
1740 if (l_dcount < 8 || r_dcount < 8) 1741 if (l_dcount < 8 || r_dcount < 8)
1741 var *= 2; 1742 var *= 2;
1742 if (loops == 1) 1743 if (loops == 1)
1743 var *= 2; 1744 var *= 2;
1744 1745
1745 return ((srttb_diff < 0) && (sqr_diff > var)); 1746 return ((srttb_diff < 0) && (sqr_diff > var));
1746 } 1747 }
1747 1748
1748 /** 1749 /**
1749 * gfs2_rgrp_used_recently 1750 * gfs2_rgrp_used_recently
1750 * @rs: The block reservation with the rgrp to test 1751 * @rs: The block reservation with the rgrp to test
1751 * @msecs: The time limit in milliseconds 1752 * @msecs: The time limit in milliseconds
1752 * 1753 *
1753 * Returns: True if the rgrp glock has been used within the time limit 1754 * Returns: True if the rgrp glock has been used within the time limit
1754 */ 1755 */
1755 static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, 1756 static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1756 u64 msecs) 1757 u64 msecs)
1757 { 1758 {
1758 u64 tdiff; 1759 u64 tdiff;
1759 1760
1760 tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), 1761 tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1761 rs->rs_rbm.rgd->rd_gl->gl_dstamp)); 1762 rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1762 1763
1763 return tdiff > (msecs * 1000 * 1000); 1764 return tdiff > (msecs * 1000 * 1000);
1764 } 1765 }
1765 1766
1767 static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1768 {
1769 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1770 u32 skip;
1771
1772 get_random_bytes(&skip, sizeof(skip));
1773 return skip % sdp->sd_rgrps;
1774 }
1775
1766 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) 1776 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1767 { 1777 {
1768 struct gfs2_rgrpd *rgd = *pos; 1778 struct gfs2_rgrpd *rgd = *pos;
1769 1779
1770 rgd = gfs2_rgrpd_get_next(rgd); 1780 rgd = gfs2_rgrpd_get_next(rgd);
1771 if (rgd == NULL) 1781 if (rgd == NULL)
1772 rgd = gfs2_rgrpd_get_next(NULL); 1782 rgd = gfs2_rgrpd_get_next(NULL);
1773 *pos = rgd; 1783 *pos = rgd;
1774 if (rgd != begin) /* If we didn't wrap */ 1784 if (rgd != begin) /* If we didn't wrap */
1775 return true; 1785 return true;
1776 return false; 1786 return false;
1777 } 1787 }
1778 1788
1779 /** 1789 /**
1780 * gfs2_inplace_reserve - Reserve space in the filesystem 1790 * gfs2_inplace_reserve - Reserve space in the filesystem
1781 * @ip: the inode to reserve space for 1791 * @ip: the inode to reserve space for
1782 * @requested: the number of blocks to be reserved 1792 * @requested: the number of blocks to be reserved
1783 * 1793 *
1784 * Returns: errno 1794 * Returns: errno
1785 */ 1795 */
1786 1796
1787 int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) 1797 int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags)
1788 { 1798 {
1789 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1799 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1790 struct gfs2_rgrpd *begin = NULL; 1800 struct gfs2_rgrpd *begin = NULL;
1791 struct gfs2_blkreserv *rs = ip->i_res; 1801 struct gfs2_blkreserv *rs = ip->i_res;
1792 int error = 0, rg_locked, flags = 0; 1802 int error = 0, rg_locked, flags = 0;
1793 u64 last_unlinked = NO_BLOCK; 1803 u64 last_unlinked = NO_BLOCK;
1794 int loops = 0; 1804 int loops = 0;
1805 u32 skip = 0;
1795 1806
1796 if (sdp->sd_args.ar_rgrplvb) 1807 if (sdp->sd_args.ar_rgrplvb)
1797 flags |= GL_SKIP; 1808 flags |= GL_SKIP;
1798 if (gfs2_assert_warn(sdp, requested)) 1809 if (gfs2_assert_warn(sdp, requested))
1799 return -EINVAL; 1810 return -EINVAL;
1800 if (gfs2_rs_active(rs)) { 1811 if (gfs2_rs_active(rs)) {
1801 begin = rs->rs_rbm.rgd; 1812 begin = rs->rs_rbm.rgd;
1802 flags = 0; /* Yoda: Do or do not. There is no try */ 1813 flags = 0; /* Yoda: Do or do not. There is no try */
1803 } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { 1814 } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1804 rs->rs_rbm.rgd = begin = ip->i_rgd; 1815 rs->rs_rbm.rgd = begin = ip->i_rgd;
1805 } else { 1816 } else {
1806 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1817 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1807 } 1818 }
1819 if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV))
1820 skip = gfs2_orlov_skip(ip);
1808 if (rs->rs_rbm.rgd == NULL) 1821 if (rs->rs_rbm.rgd == NULL)
1809 return -EBADSLT; 1822 return -EBADSLT;
1810 1823
1811 while (loops < 3) { 1824 while (loops < 3) {
1812 rg_locked = 1; 1825 rg_locked = 1;
1813 1826
1814 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { 1827 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1815 rg_locked = 0; 1828 rg_locked = 0;
1829 if (skip && skip--)
1830 goto next_rgrp;
1816 if (!gfs2_rs_active(rs) && (loops < 2) && 1831 if (!gfs2_rs_active(rs) && (loops < 2) &&
1817 gfs2_rgrp_used_recently(rs, 1000) && 1832 gfs2_rgrp_used_recently(rs, 1000) &&
1818 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) 1833 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1819 goto next_rgrp; 1834 goto next_rgrp;
1820 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, 1835 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
1821 LM_ST_EXCLUSIVE, flags, 1836 LM_ST_EXCLUSIVE, flags,
1822 &rs->rs_rgd_gh); 1837 &rs->rs_rgd_gh);
1823 if (unlikely(error)) 1838 if (unlikely(error))
1824 return error; 1839 return error;
1825 if (!gfs2_rs_active(rs) && (loops < 2) && 1840 if (!gfs2_rs_active(rs) && (loops < 2) &&
1826 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) 1841 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1827 goto skip_rgrp; 1842 goto skip_rgrp;
1828 if (sdp->sd_args.ar_rgrplvb) { 1843 if (sdp->sd_args.ar_rgrplvb) {
1829 error = update_rgrp_lvb(rs->rs_rbm.rgd); 1844 error = update_rgrp_lvb(rs->rs_rbm.rgd);
1830 if (unlikely(error)) { 1845 if (unlikely(error)) {
1831 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1846 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1832 return error; 1847 return error;
1833 } 1848 }
1834 } 1849 }
1835 } 1850 }
1836 1851
1837 /* Skip unuseable resource groups */ 1852 /* Skip unuseable resource groups */
1838 if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) 1853 if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
1839 goto skip_rgrp; 1854 goto skip_rgrp;
1840 1855
1841 if (sdp->sd_args.ar_rgrplvb) 1856 if (sdp->sd_args.ar_rgrplvb)
1842 gfs2_rgrp_bh_get(rs->rs_rbm.rgd); 1857 gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
1843 1858
1844 /* Get a reservation if we don't already have one */ 1859 /* Get a reservation if we don't already have one */
1845 if (!gfs2_rs_active(rs)) 1860 if (!gfs2_rs_active(rs))
1846 rg_mblk_search(rs->rs_rbm.rgd, ip, requested); 1861 rg_mblk_search(rs->rs_rbm.rgd, ip, requested);
1847 1862
1848 /* Skip rgrps when we can't get a reservation on first pass */ 1863 /* Skip rgrps when we can't get a reservation on first pass */
1849 if (!gfs2_rs_active(rs) && (loops < 1)) 1864 if (!gfs2_rs_active(rs) && (loops < 1))
1850 goto check_rgrp; 1865 goto check_rgrp;
1851 1866
1852 /* If rgrp has enough free space, use it */ 1867 /* If rgrp has enough free space, use it */
1853 if (rs->rs_rbm.rgd->rd_free_clone >= requested) { 1868 if (rs->rs_rbm.rgd->rd_free_clone >= requested) {
1854 ip->i_rgd = rs->rs_rbm.rgd; 1869 ip->i_rgd = rs->rs_rbm.rgd;
1855 return 0; 1870 return 0;
1856 } 1871 }
1857 1872
1858 /* Drop reservation, if we couldn't use reserved rgrp */ 1873 /* Drop reservation, if we couldn't use reserved rgrp */
1859 if (gfs2_rs_active(rs)) 1874 if (gfs2_rs_active(rs))
1860 gfs2_rs_deltree(ip, rs); 1875 gfs2_rs_deltree(ip, rs);
1861 check_rgrp: 1876 check_rgrp:
1862 /* Check for unlinked inodes which can be reclaimed */ 1877 /* Check for unlinked inodes which can be reclaimed */
1863 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) 1878 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
1864 try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, 1879 try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
1865 ip->i_no_addr); 1880 ip->i_no_addr);
1866 skip_rgrp: 1881 skip_rgrp:
1867 /* Unlock rgrp if required */ 1882 /* Unlock rgrp if required */
1868 if (!rg_locked) 1883 if (!rg_locked)
1869 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1884 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1870 next_rgrp: 1885 next_rgrp:
1871 /* Find the next rgrp, and continue looking */ 1886 /* Find the next rgrp, and continue looking */
1872 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) 1887 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
1888 continue;
1889 if (skip)
1873 continue; 1890 continue;
1874 1891
1875 /* If we've scanned all the rgrps, but found no free blocks 1892 /* If we've scanned all the rgrps, but found no free blocks
1876 * then this checks for some less likely conditions before 1893 * then this checks for some less likely conditions before
1877 * trying again. 1894 * trying again.
1878 */ 1895 */
1879 loops++; 1896 loops++;
1880 /* Check that fs hasn't grown if writing to rindex */ 1897 /* Check that fs hasn't grown if writing to rindex */
1881 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { 1898 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
1882 error = gfs2_ri_update(ip); 1899 error = gfs2_ri_update(ip);
1883 if (error) 1900 if (error)
1884 return error; 1901 return error;
1885 } 1902 }
1886 /* Flushing the log may release space */ 1903 /* Flushing the log may release space */
1887 if (loops == 2) 1904 if (loops == 2)
1888 gfs2_log_flush(sdp, NULL); 1905 gfs2_log_flush(sdp, NULL);
1889 } 1906 }
1890 1907
1891 return -ENOSPC; 1908 return -ENOSPC;
1892 } 1909 }
1893 1910
1894 /** 1911 /**
1895 * gfs2_inplace_release - release an inplace reservation 1912 * gfs2_inplace_release - release an inplace reservation
1896 * @ip: the inode the reservation was taken out on 1913 * @ip: the inode the reservation was taken out on
1897 * 1914 *
1898 * Release a reservation made by gfs2_inplace_reserve(). 1915 * Release a reservation made by gfs2_inplace_reserve().
1899 */ 1916 */
1900 1917
1901 void gfs2_inplace_release(struct gfs2_inode *ip) 1918 void gfs2_inplace_release(struct gfs2_inode *ip)
1902 { 1919 {
1903 struct gfs2_blkreserv *rs = ip->i_res; 1920 struct gfs2_blkreserv *rs = ip->i_res;
1904 1921
1905 if (rs->rs_rgd_gh.gh_gl) 1922 if (rs->rs_rgd_gh.gh_gl)
1906 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1923 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1907 } 1924 }
1908 1925
1909 /** 1926 /**
1910 * gfs2_get_block_type - Check a block in a RG is of given type 1927 * gfs2_get_block_type - Check a block in a RG is of given type
1911 * @rgd: the resource group holding the block 1928 * @rgd: the resource group holding the block
1912 * @block: the block number 1929 * @block: the block number
1913 * 1930 *
1914 * Returns: The block type (GFS2_BLKST_*) 1931 * Returns: The block type (GFS2_BLKST_*)
1915 */ 1932 */
1916 1933
1917 static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) 1934 static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1918 { 1935 {
1919 struct gfs2_rbm rbm = { .rgd = rgd, }; 1936 struct gfs2_rbm rbm = { .rgd = rgd, };
1920 int ret; 1937 int ret;
1921 1938
1922 ret = gfs2_rbm_from_block(&rbm, block); 1939 ret = gfs2_rbm_from_block(&rbm, block);
1923 WARN_ON_ONCE(ret != 0); 1940 WARN_ON_ONCE(ret != 0);
1924 1941
1925 return gfs2_testbit(&rbm); 1942 return gfs2_testbit(&rbm);
1926 } 1943 }
1927 1944
1928 1945
1929 /** 1946 /**
1930 * gfs2_alloc_extent - allocate an extent from a given bitmap 1947 * gfs2_alloc_extent - allocate an extent from a given bitmap
1931 * @rbm: the resource group information 1948 * @rbm: the resource group information
1932 * @dinode: TRUE if the first block we allocate is for a dinode 1949 * @dinode: TRUE if the first block we allocate is for a dinode
1933 * @n: The extent length (value/result) 1950 * @n: The extent length (value/result)
1934 * 1951 *
1935 * Add the bitmap buffer to the transaction. 1952 * Add the bitmap buffer to the transaction.
1936 * Set the found bits to @new_state to change block's allocation state. 1953 * Set the found bits to @new_state to change block's allocation state.
1937 */ 1954 */
1938 static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, 1955 static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
1939 unsigned int *n) 1956 unsigned int *n)
1940 { 1957 {
1941 struct gfs2_rbm pos = { .rgd = rbm->rgd, }; 1958 struct gfs2_rbm pos = { .rgd = rbm->rgd, };
1942 const unsigned int elen = *n; 1959 const unsigned int elen = *n;
1943 u64 block; 1960 u64 block;
1944 int ret; 1961 int ret;
1945 1962
1946 *n = 1; 1963 *n = 1;
1947 block = gfs2_rbm_to_block(rbm); 1964 block = gfs2_rbm_to_block(rbm);
1948 gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); 1965 gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1);
1949 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 1966 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1950 block++; 1967 block++;
1951 while (*n < elen) { 1968 while (*n < elen) {
1952 ret = gfs2_rbm_from_block(&pos, block); 1969 ret = gfs2_rbm_from_block(&pos, block);
1953 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) 1970 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
1954 break; 1971 break;
1955 gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); 1972 gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1);
1956 gfs2_setbit(&pos, true, GFS2_BLKST_USED); 1973 gfs2_setbit(&pos, true, GFS2_BLKST_USED);
1957 (*n)++; 1974 (*n)++;
1958 block++; 1975 block++;
1959 } 1976 }
1960 } 1977 }
1961 1978
1962 /** 1979 /**
1963 * rgblk_free - Change alloc state of given block(s) 1980 * rgblk_free - Change alloc state of given block(s)
1964 * @sdp: the filesystem 1981 * @sdp: the filesystem
1965 * @bstart: the start of a run of blocks to free 1982 * @bstart: the start of a run of blocks to free
1966 * @blen: the length of the block run (all must lie within ONE RG!) 1983 * @blen: the length of the block run (all must lie within ONE RG!)
1967 * @new_state: GFS2_BLKST_XXX the after-allocation block state 1984 * @new_state: GFS2_BLKST_XXX the after-allocation block state
1968 * 1985 *
1969 * Returns: Resource group containing the block(s) 1986 * Returns: Resource group containing the block(s)
1970 */ 1987 */
1971 1988
1972 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, 1989 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1973 u32 blen, unsigned char new_state) 1990 u32 blen, unsigned char new_state)
1974 { 1991 {
1975 struct gfs2_rbm rbm; 1992 struct gfs2_rbm rbm;
1976 1993
1977 rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); 1994 rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
1978 if (!rbm.rgd) { 1995 if (!rbm.rgd) {
1979 if (gfs2_consist(sdp)) 1996 if (gfs2_consist(sdp))
1980 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); 1997 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
1981 return NULL; 1998 return NULL;
1982 } 1999 }
1983 2000
1984 while (blen--) { 2001 while (blen--) {
1985 gfs2_rbm_from_block(&rbm, bstart); 2002 gfs2_rbm_from_block(&rbm, bstart);
1986 bstart++; 2003 bstart++;
1987 if (!rbm.bi->bi_clone) { 2004 if (!rbm.bi->bi_clone) {
1988 rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size, 2005 rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size,
1989 GFP_NOFS | __GFP_NOFAIL); 2006 GFP_NOFS | __GFP_NOFAIL);
1990 memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset, 2007 memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset,
1991 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, 2008 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
1992 rbm.bi->bi_len); 2009 rbm.bi->bi_len);
1993 } 2010 }
1994 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); 2011 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1);
1995 gfs2_setbit(&rbm, false, new_state); 2012 gfs2_setbit(&rbm, false, new_state);
1996 } 2013 }
1997 2014
1998 return rbm.rgd; 2015 return rbm.rgd;
1999 } 2016 }
2000 2017
2001 /** 2018 /**
2002 * gfs2_rgrp_dump - print out an rgrp 2019 * gfs2_rgrp_dump - print out an rgrp
2003 * @seq: The iterator 2020 * @seq: The iterator
2004 * @gl: The glock in question 2021 * @gl: The glock in question
2005 * 2022 *
2006 */ 2023 */
2007 2024
2008 int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) 2025 int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
2009 { 2026 {
2010 struct gfs2_rgrpd *rgd = gl->gl_object; 2027 struct gfs2_rgrpd *rgd = gl->gl_object;
2011 struct gfs2_blkreserv *trs; 2028 struct gfs2_blkreserv *trs;
2012 const struct rb_node *n; 2029 const struct rb_node *n;
2013 2030
2014 if (rgd == NULL) 2031 if (rgd == NULL)
2015 return 0; 2032 return 0;
2016 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", 2033 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n",
2017 (unsigned long long)rgd->rd_addr, rgd->rd_flags, 2034 (unsigned long long)rgd->rd_addr, rgd->rd_flags,
2018 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, 2035 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2019 rgd->rd_reserved); 2036 rgd->rd_reserved);
2020 spin_lock(&rgd->rd_rsspin); 2037 spin_lock(&rgd->rd_rsspin);
2021 for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { 2038 for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
2022 trs = rb_entry(n, struct gfs2_blkreserv, rs_node); 2039 trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
2023 dump_rs(seq, trs); 2040 dump_rs(seq, trs);
2024 } 2041 }
2025 spin_unlock(&rgd->rd_rsspin); 2042 spin_unlock(&rgd->rd_rsspin);
2026 return 0; 2043 return 0;
2027 } 2044 }
2028 2045
2029 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) 2046 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
2030 { 2047 {
2031 struct gfs2_sbd *sdp = rgd->rd_sbd; 2048 struct gfs2_sbd *sdp = rgd->rd_sbd;
2032 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", 2049 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
2033 (unsigned long long)rgd->rd_addr); 2050 (unsigned long long)rgd->rd_addr);
2034 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); 2051 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
2035 gfs2_rgrp_dump(NULL, rgd->rd_gl); 2052 gfs2_rgrp_dump(NULL, rgd->rd_gl);
2036 rgd->rd_flags |= GFS2_RDF_ERROR; 2053 rgd->rd_flags |= GFS2_RDF_ERROR;
2037 } 2054 }
2038 2055
2039 /** 2056 /**
2040 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation 2057 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
2041 * @ip: The inode we have just allocated blocks for 2058 * @ip: The inode we have just allocated blocks for
2042 * @rbm: The start of the allocated blocks 2059 * @rbm: The start of the allocated blocks
2043 * @len: The extent length 2060 * @len: The extent length
2044 * 2061 *
2045 * Adjusts a reservation after an allocation has taken place. If the 2062 * Adjusts a reservation after an allocation has taken place. If the
2046 * reservation does not match the allocation, or if it is now empty 2063 * reservation does not match the allocation, or if it is now empty
2047 * then it is removed. 2064 * then it is removed.
2048 */ 2065 */
2049 2066
2050 static void gfs2_adjust_reservation(struct gfs2_inode *ip, 2067 static void gfs2_adjust_reservation(struct gfs2_inode *ip,
2051 const struct gfs2_rbm *rbm, unsigned len) 2068 const struct gfs2_rbm *rbm, unsigned len)
2052 { 2069 {
2053 struct gfs2_blkreserv *rs = ip->i_res; 2070 struct gfs2_blkreserv *rs = ip->i_res;
2054 struct gfs2_rgrpd *rgd = rbm->rgd; 2071 struct gfs2_rgrpd *rgd = rbm->rgd;
2055 unsigned rlen; 2072 unsigned rlen;
2056 u64 block; 2073 u64 block;
2057 int ret; 2074 int ret;
2058 2075
2059 spin_lock(&rgd->rd_rsspin); 2076 spin_lock(&rgd->rd_rsspin);
2060 if (gfs2_rs_active(rs)) { 2077 if (gfs2_rs_active(rs)) {
2061 if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { 2078 if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2062 block = gfs2_rbm_to_block(rbm); 2079 block = gfs2_rbm_to_block(rbm);
2063 ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); 2080 ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2064 rlen = min(rs->rs_free, len); 2081 rlen = min(rs->rs_free, len);
2065 rs->rs_free -= rlen; 2082 rs->rs_free -= rlen;
2066 rgd->rd_reserved -= rlen; 2083 rgd->rd_reserved -= rlen;
2067 trace_gfs2_rs(rs, TRACE_RS_CLAIM); 2084 trace_gfs2_rs(rs, TRACE_RS_CLAIM);
2068 if (rs->rs_free && !ret) 2085 if (rs->rs_free && !ret)
2069 goto out; 2086 goto out;
2070 } 2087 }
2071 __rs_deltree(ip, rs); 2088 __rs_deltree(ip, rs);
2072 } 2089 }
2073 out: 2090 out:
2074 spin_unlock(&rgd->rd_rsspin); 2091 spin_unlock(&rgd->rd_rsspin);
2075 } 2092 }
2076 2093
2077 /** 2094 /**
2078 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode 2095 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
2079 * @ip: the inode to allocate the block for 2096 * @ip: the inode to allocate the block for
2080 * @bn: Used to return the starting block number 2097 * @bn: Used to return the starting block number
2081 * @nblocks: requested number of blocks/extent length (value/result) 2098 * @nblocks: requested number of blocks/extent length (value/result)
2082 * @dinode: 1 if we're allocating a dinode block, else 0 2099 * @dinode: 1 if we're allocating a dinode block, else 0
2083 * @generation: the generation number of the inode 2100 * @generation: the generation number of the inode
2084 * 2101 *
2085 * Returns: 0 or error 2102 * Returns: 0 or error
2086 */ 2103 */
2087 2104
2088 int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, 2105 int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2089 bool dinode, u64 *generation) 2106 bool dinode, u64 *generation)
2090 { 2107 {
2091 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2108 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2092 struct buffer_head *dibh; 2109 struct buffer_head *dibh;
2093 struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; 2110 struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
2094 unsigned int ndata; 2111 unsigned int ndata;
2095 u64 goal; 2112 u64 goal;
2096 u64 block; /* block, within the file system scope */ 2113 u64 block; /* block, within the file system scope */
2097 int error; 2114 int error;
2098 2115
2099 if (gfs2_rs_active(ip->i_res)) 2116 if (gfs2_rs_active(ip->i_res))
2100 goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm); 2117 goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm);
2101 else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) 2118 else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal))
2102 goal = ip->i_goal; 2119 goal = ip->i_goal;
2103 else 2120 else
2104 goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; 2121 goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0;
2105 2122
2106 gfs2_rbm_from_block(&rbm, goal); 2123 gfs2_rbm_from_block(&rbm, goal);
2107 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); 2124 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false);
2108 2125
2109 if (error == -ENOSPC) { 2126 if (error == -ENOSPC) {
2110 gfs2_rbm_from_block(&rbm, goal); 2127 gfs2_rbm_from_block(&rbm, goal);
2111 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); 2128 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false);
2112 } 2129 }
2113 2130
2114 /* Since all blocks are reserved in advance, this shouldn't happen */ 2131 /* Since all blocks are reserved in advance, this shouldn't happen */
2115 if (error) { 2132 if (error) {
2116 fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n", 2133 fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n",
2117 (unsigned long long)ip->i_no_addr, error, *nblocks, 2134 (unsigned long long)ip->i_no_addr, error, *nblocks,
2118 test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); 2135 test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags));
2119 goto rgrp_error; 2136 goto rgrp_error;
2120 } 2137 }
2121 2138
2122 gfs2_alloc_extent(&rbm, dinode, nblocks); 2139 gfs2_alloc_extent(&rbm, dinode, nblocks);
2123 block = gfs2_rbm_to_block(&rbm); 2140 block = gfs2_rbm_to_block(&rbm);
2124 rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; 2141 rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2125 if (gfs2_rs_active(ip->i_res)) 2142 if (gfs2_rs_active(ip->i_res))
2126 gfs2_adjust_reservation(ip, &rbm, *nblocks); 2143 gfs2_adjust_reservation(ip, &rbm, *nblocks);
2127 ndata = *nblocks; 2144 ndata = *nblocks;
2128 if (dinode) 2145 if (dinode)
2129 ndata--; 2146 ndata--;
2130 2147
2131 if (!dinode) { 2148 if (!dinode) {
2132 ip->i_goal = block + ndata - 1; 2149 ip->i_goal = block + ndata - 1;
2133 error = gfs2_meta_inode_buffer(ip, &dibh); 2150 error = gfs2_meta_inode_buffer(ip, &dibh);
2134 if (error == 0) { 2151 if (error == 0) {
2135 struct gfs2_dinode *di = 2152 struct gfs2_dinode *di =
2136 (struct gfs2_dinode *)dibh->b_data; 2153 (struct gfs2_dinode *)dibh->b_data;
2137 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 2154 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
2138 di->di_goal_meta = di->di_goal_data = 2155 di->di_goal_meta = di->di_goal_data =
2139 cpu_to_be64(ip->i_goal); 2156 cpu_to_be64(ip->i_goal);
2140 brelse(dibh); 2157 brelse(dibh);
2141 } 2158 }
2142 } 2159 }
2143 if (rbm.rgd->rd_free < *nblocks) { 2160 if (rbm.rgd->rd_free < *nblocks) {
2144 printk(KERN_WARNING "nblocks=%u\n", *nblocks); 2161 printk(KERN_WARNING "nblocks=%u\n", *nblocks);
2145 goto rgrp_error; 2162 goto rgrp_error;
2146 } 2163 }
2147 2164
2148 rbm.rgd->rd_free -= *nblocks; 2165 rbm.rgd->rd_free -= *nblocks;
2149 if (dinode) { 2166 if (dinode) {
2150 rbm.rgd->rd_dinodes++; 2167 rbm.rgd->rd_dinodes++;
2151 *generation = rbm.rgd->rd_igeneration++; 2168 *generation = rbm.rgd->rd_igeneration++;
2152 if (*generation == 0) 2169 if (*generation == 0)
2153 *generation = rbm.rgd->rd_igeneration++; 2170 *generation = rbm.rgd->rd_igeneration++;
2154 } 2171 }
2155 2172
2156 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); 2173 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1);
2157 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); 2174 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2158 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); 2175 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2159 2176
2160 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); 2177 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2161 if (dinode) 2178 if (dinode)
2162 gfs2_trans_add_unrevoke(sdp, block, 1); 2179 gfs2_trans_add_unrevoke(sdp, block, 1);
2163 2180
2164 /* 2181 /*
2165 * This needs reviewing to see why we cannot do the quota change 2182 * This needs reviewing to see why we cannot do the quota change
2166 * at this point in the dinode case. 2183 * at this point in the dinode case.
2167 */ 2184 */
2168 if (ndata) 2185 if (ndata)
2169 gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, 2186 gfs2_quota_change(ip, ndata, ip->i_inode.i_uid,
2170 ip->i_inode.i_gid); 2187 ip->i_inode.i_gid);
2171 2188
2172 rbm.rgd->rd_free_clone -= *nblocks; 2189 rbm.rgd->rd_free_clone -= *nblocks;
2173 trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, 2190 trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2174 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 2191 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2175 *bn = block; 2192 *bn = block;
2176 return 0; 2193 return 0;
2177 2194
2178 rgrp_error: 2195 rgrp_error:
2179 gfs2_rgrp_error(rbm.rgd); 2196 gfs2_rgrp_error(rbm.rgd);
2180 return -EIO; 2197 return -EIO;
2181 } 2198 }
2182 2199
2183 /** 2200 /**
2184 * __gfs2_free_blocks - free a contiguous run of block(s) 2201 * __gfs2_free_blocks - free a contiguous run of block(s)
2185 * @ip: the inode these blocks are being freed from 2202 * @ip: the inode these blocks are being freed from
2186 * @bstart: first block of a run of contiguous blocks 2203 * @bstart: first block of a run of contiguous blocks
2187 * @blen: the length of the block run 2204 * @blen: the length of the block run
2188 * @meta: 1 if the blocks represent metadata 2205 * @meta: 1 if the blocks represent metadata
2189 * 2206 *
2190 */ 2207 */
2191 2208
2192 void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) 2209 void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2193 { 2210 {
2194 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2211 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2195 struct gfs2_rgrpd *rgd; 2212 struct gfs2_rgrpd *rgd;
2196 2213
2197 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); 2214 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
2198 if (!rgd) 2215 if (!rgd)
2199 return; 2216 return;
2200 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); 2217 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2201 rgd->rd_free += blen; 2218 rgd->rd_free += blen;
2202 rgd->rd_flags &= ~GFS2_RGF_TRIMMED; 2219 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2203 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2220 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
2204 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2221 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2205 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2222 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2206 2223
2207 /* Directories keep their data in the metadata address space */ 2224 /* Directories keep their data in the metadata address space */
2208 if (meta || ip->i_depth) 2225 if (meta || ip->i_depth)
2209 gfs2_meta_wipe(ip, bstart, blen); 2226 gfs2_meta_wipe(ip, bstart, blen);
2210 } 2227 }
2211 2228
2212 /** 2229 /**
2213 * gfs2_free_meta - free a contiguous run of data block(s) 2230 * gfs2_free_meta - free a contiguous run of data block(s)
2214 * @ip: the inode these blocks are being freed from 2231 * @ip: the inode these blocks are being freed from
2215 * @bstart: first block of a run of contiguous blocks 2232 * @bstart: first block of a run of contiguous blocks
2216 * @blen: the length of the block run 2233 * @blen: the length of the block run
2217 * 2234 *
2218 */ 2235 */
2219 2236
2220 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) 2237 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
2221 { 2238 {
2222 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2239 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2223 2240
2224 __gfs2_free_blocks(ip, bstart, blen, 1); 2241 __gfs2_free_blocks(ip, bstart, blen, 1);
2225 gfs2_statfs_change(sdp, 0, +blen, 0); 2242 gfs2_statfs_change(sdp, 0, +blen, 0);
2226 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 2243 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
2227 } 2244 }
2228 2245
2229 void gfs2_unlink_di(struct inode *inode) 2246 void gfs2_unlink_di(struct inode *inode)
2230 { 2247 {
2231 struct gfs2_inode *ip = GFS2_I(inode); 2248 struct gfs2_inode *ip = GFS2_I(inode);
2232 struct gfs2_sbd *sdp = GFS2_SB(inode); 2249 struct gfs2_sbd *sdp = GFS2_SB(inode);
2233 struct gfs2_rgrpd *rgd; 2250 struct gfs2_rgrpd *rgd;
2234 u64 blkno = ip->i_no_addr; 2251 u64 blkno = ip->i_no_addr;
2235 2252
2236 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); 2253 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
2237 if (!rgd) 2254 if (!rgd)
2238 return; 2255 return;
2239 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); 2256 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2240 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2257 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
2241 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2258 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2242 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2259 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2243 update_rgrp_lvb_unlinked(rgd, 1); 2260 update_rgrp_lvb_unlinked(rgd, 1);
2244 } 2261 }
2245 2262
2246 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) 2263 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2247 { 2264 {
2248 struct gfs2_sbd *sdp = rgd->rd_sbd; 2265 struct gfs2_sbd *sdp = rgd->rd_sbd;
2249 struct gfs2_rgrpd *tmp_rgd; 2266 struct gfs2_rgrpd *tmp_rgd;
2250 2267
2251 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE); 2268 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
2252 if (!tmp_rgd) 2269 if (!tmp_rgd)
2253 return; 2270 return;
2254 gfs2_assert_withdraw(sdp, rgd == tmp_rgd); 2271 gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
2255 2272
2256 if (!rgd->rd_dinodes) 2273 if (!rgd->rd_dinodes)
2257 gfs2_consist_rgrpd(rgd); 2274 gfs2_consist_rgrpd(rgd);
2258 rgd->rd_dinodes--; 2275 rgd->rd_dinodes--;
2259 rgd->rd_free++; 2276 rgd->rd_free++;
2260 2277
2261 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2278 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
2262 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2279 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2263 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2280 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2264 update_rgrp_lvb_unlinked(rgd, -1); 2281 update_rgrp_lvb_unlinked(rgd, -1);
2265 2282
2266 gfs2_statfs_change(sdp, 0, +1, -1); 2283 gfs2_statfs_change(sdp, 0, +1, -1);
2267 } 2284 }
2268 2285
2269 2286
2270 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 2287 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
2271 { 2288 {
2272 gfs2_free_uninit_di(rgd, ip->i_no_addr); 2289 gfs2_free_uninit_di(rgd, ip->i_no_addr);
2273 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); 2290 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
2274 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); 2291 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
2275 gfs2_meta_wipe(ip, ip->i_no_addr, 1); 2292 gfs2_meta_wipe(ip, ip->i_no_addr, 1);
2276 } 2293 }
2277 2294
2278 /** 2295 /**
2279 * gfs2_check_blk_type - Check the type of a block 2296 * gfs2_check_blk_type - Check the type of a block
2280 * @sdp: The superblock 2297 * @sdp: The superblock
2281 * @no_addr: The block number to check 2298 * @no_addr: The block number to check
2282 * @type: The block type we are looking for 2299 * @type: The block type we are looking for
2283 * 2300 *
2284 * Returns: 0 if the block type matches the expected type 2301 * Returns: 0 if the block type matches the expected type
2285 * -ESTALE if it doesn't match 2302 * -ESTALE if it doesn't match
2286 * or -ve errno if something went wrong while checking 2303 * or -ve errno if something went wrong while checking
2287 */ 2304 */
2288 2305
2289 int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) 2306 int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
2290 { 2307 {
2291 struct gfs2_rgrpd *rgd; 2308 struct gfs2_rgrpd *rgd;
2292 struct gfs2_holder rgd_gh; 2309 struct gfs2_holder rgd_gh;
2293 int error = -EINVAL; 2310 int error = -EINVAL;
2294 2311
2295 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1); 2312 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
2296 if (!rgd) 2313 if (!rgd)
2297 goto fail; 2314 goto fail;
2298 2315
2299 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); 2316 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
2300 if (error) 2317 if (error)
2301 goto fail; 2318 goto fail;
2302 2319
2303 if (gfs2_get_block_type(rgd, no_addr) != type) 2320 if (gfs2_get_block_type(rgd, no_addr) != type)
2304 error = -ESTALE; 2321 error = -ESTALE;
2305 2322
2306 gfs2_glock_dq_uninit(&rgd_gh); 2323 gfs2_glock_dq_uninit(&rgd_gh);
2307 fail: 2324 fail:
2308 return error; 2325 return error;
2309 } 2326 }
2310 2327
2311 /** 2328 /**
2312 * gfs2_rlist_add - add a RG to a list of RGs 2329 * gfs2_rlist_add - add a RG to a list of RGs
2313 * @ip: the inode 2330 * @ip: the inode
2314 * @rlist: the list of resource groups 2331 * @rlist: the list of resource groups
2315 * @block: the block 2332 * @block: the block
2316 * 2333 *
2317 * Figure out what RG a block belongs to and add that RG to the list 2334 * Figure out what RG a block belongs to and add that RG to the list
2318 * 2335 *
2319 * FIXME: Don't use NOFAIL 2336 * FIXME: Don't use NOFAIL
2320 * 2337 *
2321 */ 2338 */
2322 2339
2323 void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, 2340 void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
2324 u64 block) 2341 u64 block)
2325 { 2342 {
2326 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2343 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2327 struct gfs2_rgrpd *rgd; 2344 struct gfs2_rgrpd *rgd;
2328 struct gfs2_rgrpd **tmp; 2345 struct gfs2_rgrpd **tmp;
2329 unsigned int new_space; 2346 unsigned int new_space;
2330 unsigned int x; 2347 unsigned int x;
2331 2348
2332 if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) 2349 if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
2333 return; 2350 return;
2334 2351
2335 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) 2352 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
2336 rgd = ip->i_rgd; 2353 rgd = ip->i_rgd;
2337 else 2354 else
2338 rgd = gfs2_blk2rgrpd(sdp, block, 1); 2355 rgd = gfs2_blk2rgrpd(sdp, block, 1);
2339 if (!rgd) { 2356 if (!rgd) {
2340 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); 2357 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
2341 return; 2358 return;
2342 } 2359 }
2343 ip->i_rgd = rgd; 2360 ip->i_rgd = rgd;
2344 2361
2345 for (x = 0; x < rlist->rl_rgrps; x++) 2362 for (x = 0; x < rlist->rl_rgrps; x++)
2346 if (rlist->rl_rgd[x] == rgd) 2363 if (rlist->rl_rgd[x] == rgd)
2347 return; 2364 return;
2348 2365
2349 if (rlist->rl_rgrps == rlist->rl_space) { 2366 if (rlist->rl_rgrps == rlist->rl_space) {
2350 new_space = rlist->rl_space + 10; 2367 new_space = rlist->rl_space + 10;
2351 2368
2352 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *), 2369 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
2353 GFP_NOFS | __GFP_NOFAIL); 2370 GFP_NOFS | __GFP_NOFAIL);
2354 2371
2355 if (rlist->rl_rgd) { 2372 if (rlist->rl_rgd) {
2356 memcpy(tmp, rlist->rl_rgd, 2373 memcpy(tmp, rlist->rl_rgd,
2357 rlist->rl_space * sizeof(struct gfs2_rgrpd *)); 2374 rlist->rl_space * sizeof(struct gfs2_rgrpd *));
2358 kfree(rlist->rl_rgd); 2375 kfree(rlist->rl_rgd);
2359 } 2376 }
2360 2377
2361 rlist->rl_space = new_space; 2378 rlist->rl_space = new_space;
2362 rlist->rl_rgd = tmp; 2379 rlist->rl_rgd = tmp;
2363 } 2380 }
2364 2381
2365 rlist->rl_rgd[rlist->rl_rgrps++] = rgd; 2382 rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
2366 } 2383 }
2367 2384
2368 /** 2385 /**
2369 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate 2386 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
2370 * and initialize an array of glock holders for them 2387 * and initialize an array of glock holders for them
2371 * @rlist: the list of resource groups 2388 * @rlist: the list of resource groups
2372 * @state: the lock state to acquire the RG lock in 2389 * @state: the lock state to acquire the RG lock in
2373 * 2390 *
2374 * FIXME: Don't use NOFAIL 2391 * FIXME: Don't use NOFAIL
2375 * 2392 *
2376 */ 2393 */
2377 2394
2378 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) 2395 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
2379 { 2396 {
2380 unsigned int x; 2397 unsigned int x;
2381 2398
2382 rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder), 2399 rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
2383 GFP_NOFS | __GFP_NOFAIL); 2400 GFP_NOFS | __GFP_NOFAIL);
2384 for (x = 0; x < rlist->rl_rgrps; x++) 2401 for (x = 0; x < rlist->rl_rgrps; x++)
2385 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, 2402 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
2386 state, 0, 2403 state, 0,
2387 &rlist->rl_ghs[x]); 2404 &rlist->rl_ghs[x]);
2388 } 2405 }
2389 2406
2390 /** 2407 /**
2391 * gfs2_rlist_free - free a resource group list 2408 * gfs2_rlist_free - free a resource group list
2392 * @list: the list of resource groups 2409 * @list: the list of resource groups
2393 * 2410 *
2394 */ 2411 */
2395 2412
2396 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) 2413 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
2397 { 2414 {
2398 unsigned int x; 2415 unsigned int x;
2399 2416
2400 kfree(rlist->rl_rgd); 2417 kfree(rlist->rl_rgd);
2401 2418
2402 if (rlist->rl_ghs) { 2419 if (rlist->rl_ghs) {
2403 for (x = 0; x < rlist->rl_rgrps; x++) 2420 for (x = 0; x < rlist->rl_rgrps; x++)
2404 gfs2_holder_uninit(&rlist->rl_ghs[x]); 2421 gfs2_holder_uninit(&rlist->rl_ghs[x]);
2405 kfree(rlist->rl_ghs); 2422 kfree(rlist->rl_ghs);
2406 rlist->rl_ghs = NULL; 2423 rlist->rl_ghs = NULL;
2407 } 2424 }
2408 } 2425 }
2409 2426
2410 2427
1 /* 1 /*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10 #ifndef __RGRP_DOT_H__ 10 #ifndef __RGRP_DOT_H__
11 #define __RGRP_DOT_H__ 11 #define __RGRP_DOT_H__
12 12
13 #include <linux/slab.h> 13 #include <linux/slab.h>
14 #include <linux/uaccess.h> 14 #include <linux/uaccess.h>
15 15
16 /* Since each block in the file system is represented by two bits in the 16 /* Since each block in the file system is represented by two bits in the
17 * bitmap, one 64-bit word in the bitmap will represent 32 blocks. 17 * bitmap, one 64-bit word in the bitmap will represent 32 blocks.
18 * By reserving 32 blocks at a time, we can optimize / shortcut how we search 18 * By reserving 32 blocks at a time, we can optimize / shortcut how we search
19 * through the bitmaps by looking a word at a time. 19 * through the bitmaps by looking a word at a time.
20 */ 20 */
21 #define RGRP_RSRV_MINBYTES 8 21 #define RGRP_RSRV_MINBYTES 8
22 #define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY)) 22 #define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY))
23 23
24 struct gfs2_rgrpd; 24 struct gfs2_rgrpd;
25 struct gfs2_sbd; 25 struct gfs2_sbd;
26 struct gfs2_holder; 26 struct gfs2_holder;
27 27
28 extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); 28 extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
29 29
30 extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact); 30 extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact);
31 extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); 31 extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
32 extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); 32 extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
33 33
34 extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); 34 extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
35 extern int gfs2_rindex_update(struct gfs2_sbd *sdp); 35 extern int gfs2_rindex_update(struct gfs2_sbd *sdp);
36 extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); 36 extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
37 extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); 37 extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
38 extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); 38 extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
39 39
40 extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); 40 extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
41 41
42 extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); 42 #define GFS2_AF_ORLOV 1
43 extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags);
43 extern void gfs2_inplace_release(struct gfs2_inode *ip); 44 extern void gfs2_inplace_release(struct gfs2_inode *ip);
44 45
45 extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, 46 extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
46 bool dinode, u64 *generation); 47 bool dinode, u64 *generation);
47 48
48 extern int gfs2_rs_alloc(struct gfs2_inode *ip); 49 extern int gfs2_rs_alloc(struct gfs2_inode *ip);
49 extern void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs); 50 extern void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs);
50 extern void gfs2_rs_delete(struct gfs2_inode *ip); 51 extern void gfs2_rs_delete(struct gfs2_inode *ip);
51 extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); 52 extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
52 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); 53 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
53 extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); 54 extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
54 extern void gfs2_unlink_di(struct inode *inode); 55 extern void gfs2_unlink_di(struct inode *inode);
55 extern int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, 56 extern int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr,
56 unsigned int type); 57 unsigned int type);
57 58
58 struct gfs2_rgrp_list { 59 struct gfs2_rgrp_list {
59 unsigned int rl_rgrps; 60 unsigned int rl_rgrps;
60 unsigned int rl_space; 61 unsigned int rl_space;
61 struct gfs2_rgrpd **rl_rgd; 62 struct gfs2_rgrpd **rl_rgd;
62 struct gfs2_holder *rl_ghs; 63 struct gfs2_holder *rl_ghs;
63 }; 64 };
64 65
65 extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, 66 extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
66 u64 block); 67 u64 block);
67 extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); 68 extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
68 extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); 69 extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
69 extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); 70 extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
70 extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); 71 extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
71 extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 72 extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
72 struct buffer_head *bh, 73 struct buffer_head *bh,
73 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); 74 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
74 extern int gfs2_fitrim(struct file *filp, void __user *argp); 75 extern int gfs2_fitrim(struct file *filp, void __user *argp);
75 76
76 /* This is how to tell if a reservation is in the rgrp tree: */ 77 /* This is how to tell if a reservation is in the rgrp tree: */
77 static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs) 78 static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs)
78 { 79 {
79 return rs && !RB_EMPTY_NODE(&rs->rs_node); 80 return rs && !RB_EMPTY_NODE(&rs->rs_node);
80 } 81 }
81 82
82 #endif /* __RGRP_DOT_H__ */ 83 #endif /* __RGRP_DOT_H__ */
83 84
1 /* 1 /*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10 #include <linux/slab.h> 10 #include <linux/slab.h>
11 #include <linux/spinlock.h> 11 #include <linux/spinlock.h>
12 #include <linux/completion.h> 12 #include <linux/completion.h>
13 #include <linux/buffer_head.h> 13 #include <linux/buffer_head.h>
14 #include <linux/xattr.h> 14 #include <linux/xattr.h>
15 #include <linux/gfs2_ondisk.h> 15 #include <linux/gfs2_ondisk.h>
16 #include <asm/uaccess.h> 16 #include <asm/uaccess.h>
17 17
18 #include "gfs2.h" 18 #include "gfs2.h"
19 #include "incore.h" 19 #include "incore.h"
20 #include "acl.h" 20 #include "acl.h"
21 #include "xattr.h" 21 #include "xattr.h"
22 #include "glock.h" 22 #include "glock.h"
23 #include "inode.h" 23 #include "inode.h"
24 #include "meta_io.h" 24 #include "meta_io.h"
25 #include "quota.h" 25 #include "quota.h"
26 #include "rgrp.h" 26 #include "rgrp.h"
27 #include "trans.h" 27 #include "trans.h"
28 #include "util.h" 28 #include "util.h"
29 29
30 /** 30 /**
31 * ea_calc_size - returns the acutal number of bytes the request will take up 31 * ea_calc_size - returns the acutal number of bytes the request will take up
32 * (not counting any unstuffed data blocks) 32 * (not counting any unstuffed data blocks)
33 * @sdp: 33 * @sdp:
34 * @er: 34 * @er:
35 * @size: 35 * @size:
36 * 36 *
37 * Returns: 1 if the EA should be stuffed 37 * Returns: 1 if the EA should be stuffed
38 */ 38 */
39 39
40 static int ea_calc_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize, 40 static int ea_calc_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize,
41 unsigned int *size) 41 unsigned int *size)
42 { 42 {
43 unsigned int jbsize = sdp->sd_jbsize; 43 unsigned int jbsize = sdp->sd_jbsize;
44 44
45 /* Stuffed */ 45 /* Stuffed */
46 *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize + dsize, 8); 46 *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize + dsize, 8);
47 47
48 if (*size <= jbsize) 48 if (*size <= jbsize)
49 return 1; 49 return 1;
50 50
51 /* Unstuffed */ 51 /* Unstuffed */
52 *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize + 52 *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize +
53 (sizeof(__be64) * DIV_ROUND_UP(dsize, jbsize)), 8); 53 (sizeof(__be64) * DIV_ROUND_UP(dsize, jbsize)), 8);
54 54
55 return 0; 55 return 0;
56 } 56 }
57 57
58 static int ea_check_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize) 58 static int ea_check_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize)
59 { 59 {
60 unsigned int size; 60 unsigned int size;
61 61
62 if (dsize > GFS2_EA_MAX_DATA_LEN) 62 if (dsize > GFS2_EA_MAX_DATA_LEN)
63 return -ERANGE; 63 return -ERANGE;
64 64
65 ea_calc_size(sdp, nsize, dsize, &size); 65 ea_calc_size(sdp, nsize, dsize, &size);
66 66
67 /* This can only happen with 512 byte blocks */ 67 /* This can only happen with 512 byte blocks */
68 if (size > sdp->sd_jbsize) 68 if (size > sdp->sd_jbsize)
69 return -ERANGE; 69 return -ERANGE;
70 70
71 return 0; 71 return 0;
72 } 72 }
73 73
74 typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh, 74 typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh,
75 struct gfs2_ea_header *ea, 75 struct gfs2_ea_header *ea,
76 struct gfs2_ea_header *prev, void *private); 76 struct gfs2_ea_header *prev, void *private);
77 77
78 static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh, 78 static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
79 ea_call_t ea_call, void *data) 79 ea_call_t ea_call, void *data)
80 { 80 {
81 struct gfs2_ea_header *ea, *prev = NULL; 81 struct gfs2_ea_header *ea, *prev = NULL;
82 int error = 0; 82 int error = 0;
83 83
84 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_EA)) 84 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_EA))
85 return -EIO; 85 return -EIO;
86 86
87 for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) { 87 for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) {
88 if (!GFS2_EA_REC_LEN(ea)) 88 if (!GFS2_EA_REC_LEN(ea))
89 goto fail; 89 goto fail;
90 if (!(bh->b_data <= (char *)ea && (char *)GFS2_EA2NEXT(ea) <= 90 if (!(bh->b_data <= (char *)ea && (char *)GFS2_EA2NEXT(ea) <=
91 bh->b_data + bh->b_size)) 91 bh->b_data + bh->b_size))
92 goto fail; 92 goto fail;
93 if (!GFS2_EATYPE_VALID(ea->ea_type)) 93 if (!GFS2_EATYPE_VALID(ea->ea_type))
94 goto fail; 94 goto fail;
95 95
96 error = ea_call(ip, bh, ea, prev, data); 96 error = ea_call(ip, bh, ea, prev, data);
97 if (error) 97 if (error)
98 return error; 98 return error;
99 99
100 if (GFS2_EA_IS_LAST(ea)) { 100 if (GFS2_EA_IS_LAST(ea)) {
101 if ((char *)GFS2_EA2NEXT(ea) != 101 if ((char *)GFS2_EA2NEXT(ea) !=
102 bh->b_data + bh->b_size) 102 bh->b_data + bh->b_size)
103 goto fail; 103 goto fail;
104 break; 104 break;
105 } 105 }
106 } 106 }
107 107
108 return error; 108 return error;
109 109
110 fail: 110 fail:
111 gfs2_consist_inode(ip); 111 gfs2_consist_inode(ip);
112 return -EIO; 112 return -EIO;
113 } 113 }
114 114
115 static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data) 115 static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
116 { 116 {
117 struct buffer_head *bh, *eabh; 117 struct buffer_head *bh, *eabh;
118 __be64 *eablk, *end; 118 __be64 *eablk, *end;
119 int error; 119 int error;
120 120
121 error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &bh); 121 error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &bh);
122 if (error) 122 if (error)
123 return error; 123 return error;
124 124
125 if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT)) { 125 if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT)) {
126 error = ea_foreach_i(ip, bh, ea_call, data); 126 error = ea_foreach_i(ip, bh, ea_call, data);
127 goto out; 127 goto out;
128 } 128 }
129 129
130 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_IN)) { 130 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_IN)) {
131 error = -EIO; 131 error = -EIO;
132 goto out; 132 goto out;
133 } 133 }
134 134
135 eablk = (__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)); 135 eablk = (__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header));
136 end = eablk + GFS2_SB(&ip->i_inode)->sd_inptrs; 136 end = eablk + GFS2_SB(&ip->i_inode)->sd_inptrs;
137 137
138 for (; eablk < end; eablk++) { 138 for (; eablk < end; eablk++) {
139 u64 bn; 139 u64 bn;
140 140
141 if (!*eablk) 141 if (!*eablk)
142 break; 142 break;
143 bn = be64_to_cpu(*eablk); 143 bn = be64_to_cpu(*eablk);
144 144
145 error = gfs2_meta_read(ip->i_gl, bn, DIO_WAIT, &eabh); 145 error = gfs2_meta_read(ip->i_gl, bn, DIO_WAIT, &eabh);
146 if (error) 146 if (error)
147 break; 147 break;
148 error = ea_foreach_i(ip, eabh, ea_call, data); 148 error = ea_foreach_i(ip, eabh, ea_call, data);
149 brelse(eabh); 149 brelse(eabh);
150 if (error) 150 if (error)
151 break; 151 break;
152 } 152 }
153 out: 153 out:
154 brelse(bh); 154 brelse(bh);
155 return error; 155 return error;
156 } 156 }
157 157
158 struct ea_find { 158 struct ea_find {
159 int type; 159 int type;
160 const char *name; 160 const char *name;
161 size_t namel; 161 size_t namel;
162 struct gfs2_ea_location *ef_el; 162 struct gfs2_ea_location *ef_el;
163 }; 163 };
164 164
165 static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh, 165 static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
166 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, 166 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
167 void *private) 167 void *private)
168 { 168 {
169 struct ea_find *ef = private; 169 struct ea_find *ef = private;
170 170
171 if (ea->ea_type == GFS2_EATYPE_UNUSED) 171 if (ea->ea_type == GFS2_EATYPE_UNUSED)
172 return 0; 172 return 0;
173 173
174 if (ea->ea_type == ef->type) { 174 if (ea->ea_type == ef->type) {
175 if (ea->ea_name_len == ef->namel && 175 if (ea->ea_name_len == ef->namel &&
176 !memcmp(GFS2_EA2NAME(ea), ef->name, ea->ea_name_len)) { 176 !memcmp(GFS2_EA2NAME(ea), ef->name, ea->ea_name_len)) {
177 struct gfs2_ea_location *el = ef->ef_el; 177 struct gfs2_ea_location *el = ef->ef_el;
178 get_bh(bh); 178 get_bh(bh);
179 el->el_bh = bh; 179 el->el_bh = bh;
180 el->el_ea = ea; 180 el->el_ea = ea;
181 el->el_prev = prev; 181 el->el_prev = prev;
182 return 1; 182 return 1;
183 } 183 }
184 } 184 }
185 185
186 return 0; 186 return 0;
187 } 187 }
188 188
189 static int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, 189 static int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name,
190 struct gfs2_ea_location *el) 190 struct gfs2_ea_location *el)
191 { 191 {
192 struct ea_find ef; 192 struct ea_find ef;
193 int error; 193 int error;
194 194
195 ef.type = type; 195 ef.type = type;
196 ef.name = name; 196 ef.name = name;
197 ef.namel = strlen(name); 197 ef.namel = strlen(name);
198 ef.ef_el = el; 198 ef.ef_el = el;
199 199
200 memset(el, 0, sizeof(struct gfs2_ea_location)); 200 memset(el, 0, sizeof(struct gfs2_ea_location));
201 201
202 error = ea_foreach(ip, ea_find_i, &ef); 202 error = ea_foreach(ip, ea_find_i, &ef);
203 if (error > 0) 203 if (error > 0)
204 return 0; 204 return 0;
205 205
206 return error; 206 return error;
207 } 207 }
208 208
209 /** 209 /**
210 * ea_dealloc_unstuffed - 210 * ea_dealloc_unstuffed -
211 * @ip: 211 * @ip:
212 * @bh: 212 * @bh:
213 * @ea: 213 * @ea:
214 * @prev: 214 * @prev:
215 * @private: 215 * @private:
216 * 216 *
217 * Take advantage of the fact that all unstuffed blocks are 217 * Take advantage of the fact that all unstuffed blocks are
218 * allocated from the same RG. But watch, this may not always 218 * allocated from the same RG. But watch, this may not always
219 * be true. 219 * be true.
220 * 220 *
221 * Returns: errno 221 * Returns: errno
222 */ 222 */
223 223
224 static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, 224 static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
225 struct gfs2_ea_header *ea, 225 struct gfs2_ea_header *ea,
226 struct gfs2_ea_header *prev, void *private) 226 struct gfs2_ea_header *prev, void *private)
227 { 227 {
228 int *leave = private; 228 int *leave = private;
229 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 229 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
230 struct gfs2_rgrpd *rgd; 230 struct gfs2_rgrpd *rgd;
231 struct gfs2_holder rg_gh; 231 struct gfs2_holder rg_gh;
232 struct buffer_head *dibh; 232 struct buffer_head *dibh;
233 __be64 *dataptrs; 233 __be64 *dataptrs;
234 u64 bn = 0; 234 u64 bn = 0;
235 u64 bstart = 0; 235 u64 bstart = 0;
236 unsigned int blen = 0; 236 unsigned int blen = 0;
237 unsigned int blks = 0; 237 unsigned int blks = 0;
238 unsigned int x; 238 unsigned int x;
239 int error; 239 int error;
240 240
241 error = gfs2_rindex_update(sdp); 241 error = gfs2_rindex_update(sdp);
242 if (error) 242 if (error)
243 return error; 243 return error;
244 244
245 if (GFS2_EA_IS_STUFFED(ea)) 245 if (GFS2_EA_IS_STUFFED(ea))
246 return 0; 246 return 0;
247 247
248 dataptrs = GFS2_EA2DATAPTRS(ea); 248 dataptrs = GFS2_EA2DATAPTRS(ea);
249 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) { 249 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
250 if (*dataptrs) { 250 if (*dataptrs) {
251 blks++; 251 blks++;
252 bn = be64_to_cpu(*dataptrs); 252 bn = be64_to_cpu(*dataptrs);
253 } 253 }
254 } 254 }
255 if (!blks) 255 if (!blks)
256 return 0; 256 return 0;
257 257
258 rgd = gfs2_blk2rgrpd(sdp, bn, 1); 258 rgd = gfs2_blk2rgrpd(sdp, bn, 1);
259 if (!rgd) { 259 if (!rgd) {
260 gfs2_consist_inode(ip); 260 gfs2_consist_inode(ip);
261 return -EIO; 261 return -EIO;
262 } 262 }
263 263
264 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh); 264 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
265 if (error) 265 if (error)
266 return error; 266 return error;
267 267
268 error = gfs2_trans_begin(sdp, rgd->rd_length + RES_DINODE + 268 error = gfs2_trans_begin(sdp, rgd->rd_length + RES_DINODE +
269 RES_EATTR + RES_STATFS + RES_QUOTA, blks); 269 RES_EATTR + RES_STATFS + RES_QUOTA, blks);
270 if (error) 270 if (error)
271 goto out_gunlock; 271 goto out_gunlock;
272 272
273 gfs2_trans_add_bh(ip->i_gl, bh, 1); 273 gfs2_trans_add_bh(ip->i_gl, bh, 1);
274 274
275 dataptrs = GFS2_EA2DATAPTRS(ea); 275 dataptrs = GFS2_EA2DATAPTRS(ea);
276 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) { 276 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
277 if (!*dataptrs) 277 if (!*dataptrs)
278 break; 278 break;
279 bn = be64_to_cpu(*dataptrs); 279 bn = be64_to_cpu(*dataptrs);
280 280
281 if (bstart + blen == bn) 281 if (bstart + blen == bn)
282 blen++; 282 blen++;
283 else { 283 else {
284 if (bstart) 284 if (bstart)
285 gfs2_free_meta(ip, bstart, blen); 285 gfs2_free_meta(ip, bstart, blen);
286 bstart = bn; 286 bstart = bn;
287 blen = 1; 287 blen = 1;
288 } 288 }
289 289
290 *dataptrs = 0; 290 *dataptrs = 0;
291 gfs2_add_inode_blocks(&ip->i_inode, -1); 291 gfs2_add_inode_blocks(&ip->i_inode, -1);
292 } 292 }
293 if (bstart) 293 if (bstart)
294 gfs2_free_meta(ip, bstart, blen); 294 gfs2_free_meta(ip, bstart, blen);
295 295
296 if (prev && !leave) { 296 if (prev && !leave) {
297 u32 len; 297 u32 len;
298 298
299 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea); 299 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
300 prev->ea_rec_len = cpu_to_be32(len); 300 prev->ea_rec_len = cpu_to_be32(len);
301 301
302 if (GFS2_EA_IS_LAST(ea)) 302 if (GFS2_EA_IS_LAST(ea))
303 prev->ea_flags |= GFS2_EAFLAG_LAST; 303 prev->ea_flags |= GFS2_EAFLAG_LAST;
304 } else { 304 } else {
305 ea->ea_type = GFS2_EATYPE_UNUSED; 305 ea->ea_type = GFS2_EATYPE_UNUSED;
306 ea->ea_num_ptrs = 0; 306 ea->ea_num_ptrs = 0;
307 } 307 }
308 308
309 error = gfs2_meta_inode_buffer(ip, &dibh); 309 error = gfs2_meta_inode_buffer(ip, &dibh);
310 if (!error) { 310 if (!error) {
311 ip->i_inode.i_ctime = CURRENT_TIME; 311 ip->i_inode.i_ctime = CURRENT_TIME;
312 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 312 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
313 gfs2_dinode_out(ip, dibh->b_data); 313 gfs2_dinode_out(ip, dibh->b_data);
314 brelse(dibh); 314 brelse(dibh);
315 } 315 }
316 316
317 gfs2_trans_end(sdp); 317 gfs2_trans_end(sdp);
318 318
319 out_gunlock: 319 out_gunlock:
320 gfs2_glock_dq_uninit(&rg_gh); 320 gfs2_glock_dq_uninit(&rg_gh);
321 return error; 321 return error;
322 } 322 }
323 323
324 static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, 324 static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
325 struct gfs2_ea_header *ea, 325 struct gfs2_ea_header *ea,
326 struct gfs2_ea_header *prev, int leave) 326 struct gfs2_ea_header *prev, int leave)
327 { 327 {
328 int error; 328 int error;
329 329
330 error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); 330 error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
331 if (error) 331 if (error)
332 return error; 332 return error;
333 333
334 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 334 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
335 if (error) 335 if (error)
336 goto out_alloc; 336 goto out_alloc;
337 337
338 error = ea_dealloc_unstuffed(ip, bh, ea, prev, (leave) ? &error : NULL); 338 error = ea_dealloc_unstuffed(ip, bh, ea, prev, (leave) ? &error : NULL);
339 339
340 gfs2_quota_unhold(ip); 340 gfs2_quota_unhold(ip);
341 out_alloc: 341 out_alloc:
342 return error; 342 return error;
343 } 343 }
344 344
345 struct ea_list { 345 struct ea_list {
346 struct gfs2_ea_request *ei_er; 346 struct gfs2_ea_request *ei_er;
347 unsigned int ei_size; 347 unsigned int ei_size;
348 }; 348 };
349 349
350 static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea) 350 static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea)
351 { 351 {
352 switch (ea->ea_type) { 352 switch (ea->ea_type) {
353 case GFS2_EATYPE_USR: 353 case GFS2_EATYPE_USR:
354 return 5 + ea->ea_name_len + 1; 354 return 5 + ea->ea_name_len + 1;
355 case GFS2_EATYPE_SYS: 355 case GFS2_EATYPE_SYS:
356 return 7 + ea->ea_name_len + 1; 356 return 7 + ea->ea_name_len + 1;
357 case GFS2_EATYPE_SECURITY: 357 case GFS2_EATYPE_SECURITY:
358 return 9 + ea->ea_name_len + 1; 358 return 9 + ea->ea_name_len + 1;
359 default: 359 default:
360 return 0; 360 return 0;
361 } 361 }
362 } 362 }
363 363
364 static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh, 364 static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
365 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, 365 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
366 void *private) 366 void *private)
367 { 367 {
368 struct ea_list *ei = private; 368 struct ea_list *ei = private;
369 struct gfs2_ea_request *er = ei->ei_er; 369 struct gfs2_ea_request *er = ei->ei_er;
370 unsigned int ea_size = gfs2_ea_strlen(ea); 370 unsigned int ea_size = gfs2_ea_strlen(ea);
371 371
372 if (ea->ea_type == GFS2_EATYPE_UNUSED) 372 if (ea->ea_type == GFS2_EATYPE_UNUSED)
373 return 0; 373 return 0;
374 374
375 if (er->er_data_len) { 375 if (er->er_data_len) {
376 char *prefix = NULL; 376 char *prefix = NULL;
377 unsigned int l = 0; 377 unsigned int l = 0;
378 char c = 0; 378 char c = 0;
379 379
380 if (ei->ei_size + ea_size > er->er_data_len) 380 if (ei->ei_size + ea_size > er->er_data_len)
381 return -ERANGE; 381 return -ERANGE;
382 382
383 switch (ea->ea_type) { 383 switch (ea->ea_type) {
384 case GFS2_EATYPE_USR: 384 case GFS2_EATYPE_USR:
385 prefix = "user."; 385 prefix = "user.";
386 l = 5; 386 l = 5;
387 break; 387 break;
388 case GFS2_EATYPE_SYS: 388 case GFS2_EATYPE_SYS:
389 prefix = "system."; 389 prefix = "system.";
390 l = 7; 390 l = 7;
391 break; 391 break;
392 case GFS2_EATYPE_SECURITY: 392 case GFS2_EATYPE_SECURITY:
393 prefix = "security."; 393 prefix = "security.";
394 l = 9; 394 l = 9;
395 break; 395 break;
396 } 396 }
397 397
398 BUG_ON(l == 0); 398 BUG_ON(l == 0);
399 399
400 memcpy(er->er_data + ei->ei_size, prefix, l); 400 memcpy(er->er_data + ei->ei_size, prefix, l);
401 memcpy(er->er_data + ei->ei_size + l, GFS2_EA2NAME(ea), 401 memcpy(er->er_data + ei->ei_size + l, GFS2_EA2NAME(ea),
402 ea->ea_name_len); 402 ea->ea_name_len);
403 memcpy(er->er_data + ei->ei_size + ea_size - 1, &c, 1); 403 memcpy(er->er_data + ei->ei_size + ea_size - 1, &c, 1);
404 } 404 }
405 405
406 ei->ei_size += ea_size; 406 ei->ei_size += ea_size;
407 407
408 return 0; 408 return 0;
409 } 409 }
410 410
411 /** 411 /**
412 * gfs2_listxattr - List gfs2 extended attributes 412 * gfs2_listxattr - List gfs2 extended attributes
413 * @dentry: The dentry whose inode we are interested in 413 * @dentry: The dentry whose inode we are interested in
414 * @buffer: The buffer to write the results 414 * @buffer: The buffer to write the results
415 * @size: The size of the buffer 415 * @size: The size of the buffer
416 * 416 *
417 * Returns: actual size of data on success, -errno on error 417 * Returns: actual size of data on success, -errno on error
418 */ 418 */
419 419
420 ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) 420 ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
421 { 421 {
422 struct gfs2_inode *ip = GFS2_I(dentry->d_inode); 422 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
423 struct gfs2_ea_request er; 423 struct gfs2_ea_request er;
424 struct gfs2_holder i_gh; 424 struct gfs2_holder i_gh;
425 int error; 425 int error;
426 426
427 memset(&er, 0, sizeof(struct gfs2_ea_request)); 427 memset(&er, 0, sizeof(struct gfs2_ea_request));
428 if (size) { 428 if (size) {
429 er.er_data = buffer; 429 er.er_data = buffer;
430 er.er_data_len = size; 430 er.er_data_len = size;
431 } 431 }
432 432
433 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 433 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
434 if (error) 434 if (error)
435 return error; 435 return error;
436 436
437 if (ip->i_eattr) { 437 if (ip->i_eattr) {
438 struct ea_list ei = { .ei_er = &er, .ei_size = 0 }; 438 struct ea_list ei = { .ei_er = &er, .ei_size = 0 };
439 439
440 error = ea_foreach(ip, ea_list_i, &ei); 440 error = ea_foreach(ip, ea_list_i, &ei);
441 if (!error) 441 if (!error)
442 error = ei.ei_size; 442 error = ei.ei_size;
443 } 443 }
444 444
445 gfs2_glock_dq_uninit(&i_gh); 445 gfs2_glock_dq_uninit(&i_gh);
446 446
447 return error; 447 return error;
448 } 448 }
449 449
450 /** 450 /**
451 * ea_iter_unstuffed - copies the unstuffed xattr data to/from the 451 * ea_iter_unstuffed - copies the unstuffed xattr data to/from the
452 * request buffer 452 * request buffer
453 * @ip: The GFS2 inode 453 * @ip: The GFS2 inode
454 * @ea: The extended attribute header structure 454 * @ea: The extended attribute header structure
455 * @din: The data to be copied in 455 * @din: The data to be copied in
456 * @dout: The data to be copied out (one of din,dout will be NULL) 456 * @dout: The data to be copied out (one of din,dout will be NULL)
457 * 457 *
458 * Returns: errno 458 * Returns: errno
459 */ 459 */
460 460
461 static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, 461 static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
462 const char *din, char *dout) 462 const char *din, char *dout)
463 { 463 {
464 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 464 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
465 struct buffer_head **bh; 465 struct buffer_head **bh;
466 unsigned int amount = GFS2_EA_DATA_LEN(ea); 466 unsigned int amount = GFS2_EA_DATA_LEN(ea);
467 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); 467 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
468 __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); 468 __be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
469 unsigned int x; 469 unsigned int x;
470 int error = 0; 470 int error = 0;
471 unsigned char *pos; 471 unsigned char *pos;
472 unsigned cp_size; 472 unsigned cp_size;
473 473
474 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); 474 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS);
475 if (!bh) 475 if (!bh)
476 return -ENOMEM; 476 return -ENOMEM;
477 477
478 for (x = 0; x < nptrs; x++) { 478 for (x = 0; x < nptrs; x++) {
479 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0, 479 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0,
480 bh + x); 480 bh + x);
481 if (error) { 481 if (error) {
482 while (x--) 482 while (x--)
483 brelse(bh[x]); 483 brelse(bh[x]);
484 goto out; 484 goto out;
485 } 485 }
486 dataptrs++; 486 dataptrs++;
487 } 487 }
488 488
489 for (x = 0; x < nptrs; x++) { 489 for (x = 0; x < nptrs; x++) {
490 error = gfs2_meta_wait(sdp, bh[x]); 490 error = gfs2_meta_wait(sdp, bh[x]);
491 if (error) { 491 if (error) {
492 for (; x < nptrs; x++) 492 for (; x < nptrs; x++)
493 brelse(bh[x]); 493 brelse(bh[x]);
494 goto out; 494 goto out;
495 } 495 }
496 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) { 496 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
497 for (; x < nptrs; x++) 497 for (; x < nptrs; x++)
498 brelse(bh[x]); 498 brelse(bh[x]);
499 error = -EIO; 499 error = -EIO;
500 goto out; 500 goto out;
501 } 501 }
502 502
503 pos = bh[x]->b_data + sizeof(struct gfs2_meta_header); 503 pos = bh[x]->b_data + sizeof(struct gfs2_meta_header);
504 cp_size = (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize; 504 cp_size = (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize;
505 505
506 if (dout) { 506 if (dout) {
507 memcpy(dout, pos, cp_size); 507 memcpy(dout, pos, cp_size);
508 dout += sdp->sd_jbsize; 508 dout += sdp->sd_jbsize;
509 } 509 }
510 510
511 if (din) { 511 if (din) {
512 gfs2_trans_add_bh(ip->i_gl, bh[x], 1); 512 gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
513 memcpy(pos, din, cp_size); 513 memcpy(pos, din, cp_size);
514 din += sdp->sd_jbsize; 514 din += sdp->sd_jbsize;
515 } 515 }
516 516
517 amount -= sdp->sd_jbsize; 517 amount -= sdp->sd_jbsize;
518 brelse(bh[x]); 518 brelse(bh[x]);
519 } 519 }
520 520
521 out: 521 out:
522 kfree(bh); 522 kfree(bh);
523 return error; 523 return error;
524 } 524 }
525 525
526 static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, 526 static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
527 char *data, size_t size) 527 char *data, size_t size)
528 { 528 {
529 int ret; 529 int ret;
530 size_t len = GFS2_EA_DATA_LEN(el->el_ea); 530 size_t len = GFS2_EA_DATA_LEN(el->el_ea);
531 if (len > size) 531 if (len > size)
532 return -ERANGE; 532 return -ERANGE;
533 533
534 if (GFS2_EA_IS_STUFFED(el->el_ea)) { 534 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
535 memcpy(data, GFS2_EA2DATA(el->el_ea), len); 535 memcpy(data, GFS2_EA2DATA(el->el_ea), len);
536 return len; 536 return len;
537 } 537 }
538 ret = gfs2_iter_unstuffed(ip, el->el_ea, NULL, data); 538 ret = gfs2_iter_unstuffed(ip, el->el_ea, NULL, data);
539 if (ret < 0) 539 if (ret < 0)
540 return ret; 540 return ret;
541 return len; 541 return len;
542 } 542 }
543 543
544 int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **ppdata) 544 int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **ppdata)
545 { 545 {
546 struct gfs2_ea_location el; 546 struct gfs2_ea_location el;
547 int error; 547 int error;
548 int len; 548 int len;
549 char *data; 549 char *data;
550 550
551 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, &el); 551 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, &el);
552 if (error) 552 if (error)
553 return error; 553 return error;
554 if (!el.el_ea) 554 if (!el.el_ea)
555 goto out; 555 goto out;
556 if (!GFS2_EA_DATA_LEN(el.el_ea)) 556 if (!GFS2_EA_DATA_LEN(el.el_ea))
557 goto out; 557 goto out;
558 558
559 len = GFS2_EA_DATA_LEN(el.el_ea); 559 len = GFS2_EA_DATA_LEN(el.el_ea);
560 data = kmalloc(len, GFP_NOFS); 560 data = kmalloc(len, GFP_NOFS);
561 error = -ENOMEM; 561 error = -ENOMEM;
562 if (data == NULL) 562 if (data == NULL)
563 goto out; 563 goto out;
564 564
565 error = gfs2_ea_get_copy(ip, &el, data, len); 565 error = gfs2_ea_get_copy(ip, &el, data, len);
566 if (error < 0) 566 if (error < 0)
567 kfree(data); 567 kfree(data);
568 else 568 else
569 *ppdata = data; 569 *ppdata = data;
570 out: 570 out:
571 brelse(el.el_bh); 571 brelse(el.el_bh);
572 return error; 572 return error;
573 } 573 }
574 574
575 /** 575 /**
576 * gfs2_xattr_get - Get a GFS2 extended attribute 576 * gfs2_xattr_get - Get a GFS2 extended attribute
577 * @inode: The inode 577 * @inode: The inode
578 * @name: The name of the extended attribute 578 * @name: The name of the extended attribute
579 * @buffer: The buffer to write the result into 579 * @buffer: The buffer to write the result into
580 * @size: The size of the buffer 580 * @size: The size of the buffer
581 * @type: The type of extended attribute 581 * @type: The type of extended attribute
582 * 582 *
583 * Returns: actual size of data on success, -errno on error 583 * Returns: actual size of data on success, -errno on error
584 */ 584 */
585 static int gfs2_xattr_get(struct dentry *dentry, const char *name, 585 static int gfs2_xattr_get(struct dentry *dentry, const char *name,
586 void *buffer, size_t size, int type) 586 void *buffer, size_t size, int type)
587 { 587 {
588 struct gfs2_inode *ip = GFS2_I(dentry->d_inode); 588 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
589 struct gfs2_ea_location el; 589 struct gfs2_ea_location el;
590 int error; 590 int error;
591 591
592 if (!ip->i_eattr) 592 if (!ip->i_eattr)
593 return -ENODATA; 593 return -ENODATA;
594 if (strlen(name) > GFS2_EA_MAX_NAME_LEN) 594 if (strlen(name) > GFS2_EA_MAX_NAME_LEN)
595 return -EINVAL; 595 return -EINVAL;
596 596
597 error = gfs2_ea_find(ip, type, name, &el); 597 error = gfs2_ea_find(ip, type, name, &el);
598 if (error) 598 if (error)
599 return error; 599 return error;
600 if (!el.el_ea) 600 if (!el.el_ea)
601 return -ENODATA; 601 return -ENODATA;
602 if (size) 602 if (size)
603 error = gfs2_ea_get_copy(ip, &el, buffer, size); 603 error = gfs2_ea_get_copy(ip, &el, buffer, size);
604 else 604 else
605 error = GFS2_EA_DATA_LEN(el.el_ea); 605 error = GFS2_EA_DATA_LEN(el.el_ea);
606 brelse(el.el_bh); 606 brelse(el.el_bh);
607 607
608 return error; 608 return error;
609 } 609 }
610 610
611 /** 611 /**
612 * ea_alloc_blk - allocates a new block for extended attributes. 612 * ea_alloc_blk - allocates a new block for extended attributes.
613 * @ip: A pointer to the inode that's getting extended attributes 613 * @ip: A pointer to the inode that's getting extended attributes
614 * @bhp: Pointer to pointer to a struct buffer_head 614 * @bhp: Pointer to pointer to a struct buffer_head
615 * 615 *
616 * Returns: errno 616 * Returns: errno
617 */ 617 */
618 618
619 static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) 619 static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
620 { 620 {
621 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 621 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
622 struct gfs2_ea_header *ea; 622 struct gfs2_ea_header *ea;
623 unsigned int n = 1; 623 unsigned int n = 1;
624 u64 block; 624 u64 block;
625 int error; 625 int error;
626 626
627 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); 627 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
628 if (error) 628 if (error)
629 return error; 629 return error;
630 gfs2_trans_add_unrevoke(sdp, block, 1); 630 gfs2_trans_add_unrevoke(sdp, block, 1);
631 *bhp = gfs2_meta_new(ip->i_gl, block); 631 *bhp = gfs2_meta_new(ip->i_gl, block);
632 gfs2_trans_add_bh(ip->i_gl, *bhp, 1); 632 gfs2_trans_add_bh(ip->i_gl, *bhp, 1);
633 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); 633 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
634 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header)); 634 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header));
635 635
636 ea = GFS2_EA_BH2FIRST(*bhp); 636 ea = GFS2_EA_BH2FIRST(*bhp);
637 ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize); 637 ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize);
638 ea->ea_type = GFS2_EATYPE_UNUSED; 638 ea->ea_type = GFS2_EATYPE_UNUSED;
639 ea->ea_flags = GFS2_EAFLAG_LAST; 639 ea->ea_flags = GFS2_EAFLAG_LAST;
640 ea->ea_num_ptrs = 0; 640 ea->ea_num_ptrs = 0;
641 641
642 gfs2_add_inode_blocks(&ip->i_inode, 1); 642 gfs2_add_inode_blocks(&ip->i_inode, 1);
643 643
644 return 0; 644 return 0;
645 } 645 }
646 646
647 /** 647 /**
648 * ea_write - writes the request info to an ea, creating new blocks if 648 * ea_write - writes the request info to an ea, creating new blocks if
649 * necessary 649 * necessary
650 * @ip: inode that is being modified 650 * @ip: inode that is being modified
651 * @ea: the location of the new ea in a block 651 * @ea: the location of the new ea in a block
652 * @er: the write request 652 * @er: the write request
653 * 653 *
654 * Note: does not update ea_rec_len or the GFS2_EAFLAG_LAST bin of ea_flags 654 * Note: does not update ea_rec_len or the GFS2_EAFLAG_LAST bin of ea_flags
655 * 655 *
656 * returns : errno 656 * returns : errno
657 */ 657 */
658 658
659 static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, 659 static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
660 struct gfs2_ea_request *er) 660 struct gfs2_ea_request *er)
661 { 661 {
662 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 662 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
663 int error; 663 int error;
664 664
665 ea->ea_data_len = cpu_to_be32(er->er_data_len); 665 ea->ea_data_len = cpu_to_be32(er->er_data_len);
666 ea->ea_name_len = er->er_name_len; 666 ea->ea_name_len = er->er_name_len;
667 ea->ea_type = er->er_type; 667 ea->ea_type = er->er_type;
668 ea->__pad = 0; 668 ea->__pad = 0;
669 669
670 memcpy(GFS2_EA2NAME(ea), er->er_name, er->er_name_len); 670 memcpy(GFS2_EA2NAME(ea), er->er_name, er->er_name_len);
671 671
672 if (GFS2_EAREQ_SIZE_STUFFED(er) <= sdp->sd_jbsize) { 672 if (GFS2_EAREQ_SIZE_STUFFED(er) <= sdp->sd_jbsize) {
673 ea->ea_num_ptrs = 0; 673 ea->ea_num_ptrs = 0;
674 memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len); 674 memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len);
675 } else { 675 } else {
676 __be64 *dataptr = GFS2_EA2DATAPTRS(ea); 676 __be64 *dataptr = GFS2_EA2DATAPTRS(ea);
677 const char *data = er->er_data; 677 const char *data = er->er_data;
678 unsigned int data_len = er->er_data_len; 678 unsigned int data_len = er->er_data_len;
679 unsigned int copy; 679 unsigned int copy;
680 unsigned int x; 680 unsigned int x;
681 681
682 ea->ea_num_ptrs = DIV_ROUND_UP(er->er_data_len, sdp->sd_jbsize); 682 ea->ea_num_ptrs = DIV_ROUND_UP(er->er_data_len, sdp->sd_jbsize);
683 for (x = 0; x < ea->ea_num_ptrs; x++) { 683 for (x = 0; x < ea->ea_num_ptrs; x++) {
684 struct buffer_head *bh; 684 struct buffer_head *bh;
685 u64 block; 685 u64 block;
686 int mh_size = sizeof(struct gfs2_meta_header); 686 int mh_size = sizeof(struct gfs2_meta_header);
687 unsigned int n = 1; 687 unsigned int n = 1;
688 688
689 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); 689 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
690 if (error) 690 if (error)
691 return error; 691 return error;
692 gfs2_trans_add_unrevoke(sdp, block, 1); 692 gfs2_trans_add_unrevoke(sdp, block, 1);
693 bh = gfs2_meta_new(ip->i_gl, block); 693 bh = gfs2_meta_new(ip->i_gl, block);
694 gfs2_trans_add_bh(ip->i_gl, bh, 1); 694 gfs2_trans_add_bh(ip->i_gl, bh, 1);
695 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); 695 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
696 696
697 gfs2_add_inode_blocks(&ip->i_inode, 1); 697 gfs2_add_inode_blocks(&ip->i_inode, 1);
698 698
699 copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize : 699 copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize :
700 data_len; 700 data_len;
701 memcpy(bh->b_data + mh_size, data, copy); 701 memcpy(bh->b_data + mh_size, data, copy);
702 if (copy < sdp->sd_jbsize) 702 if (copy < sdp->sd_jbsize)
703 memset(bh->b_data + mh_size + copy, 0, 703 memset(bh->b_data + mh_size + copy, 0,
704 sdp->sd_jbsize - copy); 704 sdp->sd_jbsize - copy);
705 705
706 *dataptr++ = cpu_to_be64(bh->b_blocknr); 706 *dataptr++ = cpu_to_be64(bh->b_blocknr);
707 data += copy; 707 data += copy;
708 data_len -= copy; 708 data_len -= copy;
709 709
710 brelse(bh); 710 brelse(bh);
711 } 711 }
712 712
713 gfs2_assert_withdraw(sdp, !data_len); 713 gfs2_assert_withdraw(sdp, !data_len);
714 } 714 }
715 715
716 return 0; 716 return 0;
717 } 717 }
718 718
719 typedef int (*ea_skeleton_call_t) (struct gfs2_inode *ip, 719 typedef int (*ea_skeleton_call_t) (struct gfs2_inode *ip,
720 struct gfs2_ea_request *er, void *private); 720 struct gfs2_ea_request *er, void *private);
721 721
722 static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, 722 static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
723 unsigned int blks, 723 unsigned int blks,
724 ea_skeleton_call_t skeleton_call, void *private) 724 ea_skeleton_call_t skeleton_call, void *private)
725 { 725 {
726 struct buffer_head *dibh; 726 struct buffer_head *dibh;
727 int error; 727 int error;
728 728
729 error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); 729 error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
730 if (error) 730 if (error)
731 return error; 731 return error;
732 732
733 error = gfs2_quota_lock_check(ip); 733 error = gfs2_quota_lock_check(ip);
734 if (error) 734 if (error)
735 return error; 735 return error;
736 736
737 error = gfs2_inplace_reserve(ip, blks); 737 error = gfs2_inplace_reserve(ip, blks, 0);
738 if (error) 738 if (error)
739 goto out_gunlock_q; 739 goto out_gunlock_q;
740 740
741 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), 741 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
742 blks + gfs2_rg_blocks(ip, blks) + 742 blks + gfs2_rg_blocks(ip, blks) +
743 RES_DINODE + RES_STATFS + RES_QUOTA, 0); 743 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
744 if (error) 744 if (error)
745 goto out_ipres; 745 goto out_ipres;
746 746
747 error = skeleton_call(ip, er, private); 747 error = skeleton_call(ip, er, private);
748 if (error) 748 if (error)
749 goto out_end_trans; 749 goto out_end_trans;
750 750
751 error = gfs2_meta_inode_buffer(ip, &dibh); 751 error = gfs2_meta_inode_buffer(ip, &dibh);
752 if (!error) { 752 if (!error) {
753 ip->i_inode.i_ctime = CURRENT_TIME; 753 ip->i_inode.i_ctime = CURRENT_TIME;
754 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 754 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
755 gfs2_dinode_out(ip, dibh->b_data); 755 gfs2_dinode_out(ip, dibh->b_data);
756 brelse(dibh); 756 brelse(dibh);
757 } 757 }
758 758
759 out_end_trans: 759 out_end_trans:
760 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 760 gfs2_trans_end(GFS2_SB(&ip->i_inode));
761 out_ipres: 761 out_ipres:
762 gfs2_inplace_release(ip); 762 gfs2_inplace_release(ip);
763 out_gunlock_q: 763 out_gunlock_q:
764 gfs2_quota_unlock(ip); 764 gfs2_quota_unlock(ip);
765 return error; 765 return error;
766 } 766 }
767 767
768 static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er, 768 static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
769 void *private) 769 void *private)
770 { 770 {
771 struct buffer_head *bh; 771 struct buffer_head *bh;
772 int error; 772 int error;
773 773
774 error = ea_alloc_blk(ip, &bh); 774 error = ea_alloc_blk(ip, &bh);
775 if (error) 775 if (error)
776 return error; 776 return error;
777 777
778 ip->i_eattr = bh->b_blocknr; 778 ip->i_eattr = bh->b_blocknr;
779 error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er); 779 error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er);
780 780
781 brelse(bh); 781 brelse(bh);
782 782
783 return error; 783 return error;
784 } 784 }
785 785
786 /** 786 /**
787 * ea_init - initializes a new eattr block 787 * ea_init - initializes a new eattr block
788 * @ip: 788 * @ip:
789 * @er: 789 * @er:
790 * 790 *
791 * Returns: errno 791 * Returns: errno
792 */ 792 */
793 793
794 static int ea_init(struct gfs2_inode *ip, int type, const char *name, 794 static int ea_init(struct gfs2_inode *ip, int type, const char *name,
795 const void *data, size_t size) 795 const void *data, size_t size)
796 { 796 {
797 struct gfs2_ea_request er; 797 struct gfs2_ea_request er;
798 unsigned int jbsize = GFS2_SB(&ip->i_inode)->sd_jbsize; 798 unsigned int jbsize = GFS2_SB(&ip->i_inode)->sd_jbsize;
799 unsigned int blks = 1; 799 unsigned int blks = 1;
800 800
801 er.er_type = type; 801 er.er_type = type;
802 er.er_name = name; 802 er.er_name = name;
803 er.er_name_len = strlen(name); 803 er.er_name_len = strlen(name);
804 er.er_data = (void *)data; 804 er.er_data = (void *)data;
805 er.er_data_len = size; 805 er.er_data_len = size;
806 806
807 if (GFS2_EAREQ_SIZE_STUFFED(&er) > jbsize) 807 if (GFS2_EAREQ_SIZE_STUFFED(&er) > jbsize)
808 blks += DIV_ROUND_UP(er.er_data_len, jbsize); 808 blks += DIV_ROUND_UP(er.er_data_len, jbsize);
809 809
810 return ea_alloc_skeleton(ip, &er, blks, ea_init_i, NULL); 810 return ea_alloc_skeleton(ip, &er, blks, ea_init_i, NULL);
811 } 811 }
812 812
813 static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea) 813 static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea)
814 { 814 {
815 u32 ea_size = GFS2_EA_SIZE(ea); 815 u32 ea_size = GFS2_EA_SIZE(ea);
816 struct gfs2_ea_header *new = (struct gfs2_ea_header *)((char *)ea + 816 struct gfs2_ea_header *new = (struct gfs2_ea_header *)((char *)ea +
817 ea_size); 817 ea_size);
818 u32 new_size = GFS2_EA_REC_LEN(ea) - ea_size; 818 u32 new_size = GFS2_EA_REC_LEN(ea) - ea_size;
819 int last = ea->ea_flags & GFS2_EAFLAG_LAST; 819 int last = ea->ea_flags & GFS2_EAFLAG_LAST;
820 820
821 ea->ea_rec_len = cpu_to_be32(ea_size); 821 ea->ea_rec_len = cpu_to_be32(ea_size);
822 ea->ea_flags ^= last; 822 ea->ea_flags ^= last;
823 823
824 new->ea_rec_len = cpu_to_be32(new_size); 824 new->ea_rec_len = cpu_to_be32(new_size);
825 new->ea_flags = last; 825 new->ea_flags = last;
826 826
827 return new; 827 return new;
828 } 828 }
829 829
830 static void ea_set_remove_stuffed(struct gfs2_inode *ip, 830 static void ea_set_remove_stuffed(struct gfs2_inode *ip,
831 struct gfs2_ea_location *el) 831 struct gfs2_ea_location *el)
832 { 832 {
833 struct gfs2_ea_header *ea = el->el_ea; 833 struct gfs2_ea_header *ea = el->el_ea;
834 struct gfs2_ea_header *prev = el->el_prev; 834 struct gfs2_ea_header *prev = el->el_prev;
835 u32 len; 835 u32 len;
836 836
837 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); 837 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
838 838
839 if (!prev || !GFS2_EA_IS_STUFFED(ea)) { 839 if (!prev || !GFS2_EA_IS_STUFFED(ea)) {
840 ea->ea_type = GFS2_EATYPE_UNUSED; 840 ea->ea_type = GFS2_EATYPE_UNUSED;
841 return; 841 return;
842 } else if (GFS2_EA2NEXT(prev) != ea) { 842 } else if (GFS2_EA2NEXT(prev) != ea) {
843 prev = GFS2_EA2NEXT(prev); 843 prev = GFS2_EA2NEXT(prev);
844 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), GFS2_EA2NEXT(prev) == ea); 844 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), GFS2_EA2NEXT(prev) == ea);
845 } 845 }
846 846
847 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea); 847 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
848 prev->ea_rec_len = cpu_to_be32(len); 848 prev->ea_rec_len = cpu_to_be32(len);
849 849
850 if (GFS2_EA_IS_LAST(ea)) 850 if (GFS2_EA_IS_LAST(ea))
851 prev->ea_flags |= GFS2_EAFLAG_LAST; 851 prev->ea_flags |= GFS2_EAFLAG_LAST;
852 } 852 }
853 853
854 struct ea_set { 854 struct ea_set {
855 int ea_split; 855 int ea_split;
856 856
857 struct gfs2_ea_request *es_er; 857 struct gfs2_ea_request *es_er;
858 struct gfs2_ea_location *es_el; 858 struct gfs2_ea_location *es_el;
859 859
860 struct buffer_head *es_bh; 860 struct buffer_head *es_bh;
861 struct gfs2_ea_header *es_ea; 861 struct gfs2_ea_header *es_ea;
862 }; 862 };
863 863
864 static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, 864 static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
865 struct gfs2_ea_header *ea, struct ea_set *es) 865 struct gfs2_ea_header *ea, struct ea_set *es)
866 { 866 {
867 struct gfs2_ea_request *er = es->es_er; 867 struct gfs2_ea_request *er = es->es_er;
868 struct buffer_head *dibh; 868 struct buffer_head *dibh;
869 int error; 869 int error;
870 870
871 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + 2 * RES_EATTR, 0); 871 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + 2 * RES_EATTR, 0);
872 if (error) 872 if (error)
873 return error; 873 return error;
874 874
875 gfs2_trans_add_bh(ip->i_gl, bh, 1); 875 gfs2_trans_add_bh(ip->i_gl, bh, 1);
876 876
877 if (es->ea_split) 877 if (es->ea_split)
878 ea = ea_split_ea(ea); 878 ea = ea_split_ea(ea);
879 879
880 ea_write(ip, ea, er); 880 ea_write(ip, ea, er);
881 881
882 if (es->es_el) 882 if (es->es_el)
883 ea_set_remove_stuffed(ip, es->es_el); 883 ea_set_remove_stuffed(ip, es->es_el);
884 884
885 error = gfs2_meta_inode_buffer(ip, &dibh); 885 error = gfs2_meta_inode_buffer(ip, &dibh);
886 if (error) 886 if (error)
887 goto out; 887 goto out;
888 ip->i_inode.i_ctime = CURRENT_TIME; 888 ip->i_inode.i_ctime = CURRENT_TIME;
889 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 889 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
890 gfs2_dinode_out(ip, dibh->b_data); 890 gfs2_dinode_out(ip, dibh->b_data);
891 brelse(dibh); 891 brelse(dibh);
892 out: 892 out:
893 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 893 gfs2_trans_end(GFS2_SB(&ip->i_inode));
894 return error; 894 return error;
895 } 895 }
896 896
897 static int ea_set_simple_alloc(struct gfs2_inode *ip, 897 static int ea_set_simple_alloc(struct gfs2_inode *ip,
898 struct gfs2_ea_request *er, void *private) 898 struct gfs2_ea_request *er, void *private)
899 { 899 {
900 struct ea_set *es = private; 900 struct ea_set *es = private;
901 struct gfs2_ea_header *ea = es->es_ea; 901 struct gfs2_ea_header *ea = es->es_ea;
902 int error; 902 int error;
903 903
904 gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1); 904 gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1);
905 905
906 if (es->ea_split) 906 if (es->ea_split)
907 ea = ea_split_ea(ea); 907 ea = ea_split_ea(ea);
908 908
909 error = ea_write(ip, ea, er); 909 error = ea_write(ip, ea, er);
910 if (error) 910 if (error)
911 return error; 911 return error;
912 912
913 if (es->es_el) 913 if (es->es_el)
914 ea_set_remove_stuffed(ip, es->es_el); 914 ea_set_remove_stuffed(ip, es->es_el);
915 915
916 return 0; 916 return 0;
917 } 917 }
918 918
919 static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh, 919 static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh,
920 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, 920 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
921 void *private) 921 void *private)
922 { 922 {
923 struct ea_set *es = private; 923 struct ea_set *es = private;
924 unsigned int size; 924 unsigned int size;
925 int stuffed; 925 int stuffed;
926 int error; 926 int error;
927 927
928 stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er->er_name_len, 928 stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er->er_name_len,
929 es->es_er->er_data_len, &size); 929 es->es_er->er_data_len, &size);
930 930
931 if (ea->ea_type == GFS2_EATYPE_UNUSED) { 931 if (ea->ea_type == GFS2_EATYPE_UNUSED) {
932 if (GFS2_EA_REC_LEN(ea) < size) 932 if (GFS2_EA_REC_LEN(ea) < size)
933 return 0; 933 return 0;
934 if (!GFS2_EA_IS_STUFFED(ea)) { 934 if (!GFS2_EA_IS_STUFFED(ea)) {
935 error = ea_remove_unstuffed(ip, bh, ea, prev, 1); 935 error = ea_remove_unstuffed(ip, bh, ea, prev, 1);
936 if (error) 936 if (error)
937 return error; 937 return error;
938 } 938 }
939 es->ea_split = 0; 939 es->ea_split = 0;
940 } else if (GFS2_EA_REC_LEN(ea) - GFS2_EA_SIZE(ea) >= size) 940 } else if (GFS2_EA_REC_LEN(ea) - GFS2_EA_SIZE(ea) >= size)
941 es->ea_split = 1; 941 es->ea_split = 1;
942 else 942 else
943 return 0; 943 return 0;
944 944
945 if (stuffed) { 945 if (stuffed) {
946 error = ea_set_simple_noalloc(ip, bh, ea, es); 946 error = ea_set_simple_noalloc(ip, bh, ea, es);
947 if (error) 947 if (error)
948 return error; 948 return error;
949 } else { 949 } else {
950 unsigned int blks; 950 unsigned int blks;
951 951
952 es->es_bh = bh; 952 es->es_bh = bh;
953 es->es_ea = ea; 953 es->es_ea = ea;
954 blks = 2 + DIV_ROUND_UP(es->es_er->er_data_len, 954 blks = 2 + DIV_ROUND_UP(es->es_er->er_data_len,
955 GFS2_SB(&ip->i_inode)->sd_jbsize); 955 GFS2_SB(&ip->i_inode)->sd_jbsize);
956 956
957 error = ea_alloc_skeleton(ip, es->es_er, blks, 957 error = ea_alloc_skeleton(ip, es->es_er, blks,
958 ea_set_simple_alloc, es); 958 ea_set_simple_alloc, es);
959 if (error) 959 if (error)
960 return error; 960 return error;
961 } 961 }
962 962
963 return 1; 963 return 1;
964 } 964 }
965 965
966 static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, 966 static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
967 void *private) 967 void *private)
968 { 968 {
969 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 969 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
970 struct buffer_head *indbh, *newbh; 970 struct buffer_head *indbh, *newbh;
971 __be64 *eablk; 971 __be64 *eablk;
972 int error; 972 int error;
973 int mh_size = sizeof(struct gfs2_meta_header); 973 int mh_size = sizeof(struct gfs2_meta_header);
974 974
975 if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) { 975 if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
976 __be64 *end; 976 __be64 *end;
977 977
978 error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, 978 error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT,
979 &indbh); 979 &indbh);
980 if (error) 980 if (error)
981 return error; 981 return error;
982 982
983 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) { 983 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
984 error = -EIO; 984 error = -EIO;
985 goto out; 985 goto out;
986 } 986 }
987 987
988 eablk = (__be64 *)(indbh->b_data + mh_size); 988 eablk = (__be64 *)(indbh->b_data + mh_size);
989 end = eablk + sdp->sd_inptrs; 989 end = eablk + sdp->sd_inptrs;
990 990
991 for (; eablk < end; eablk++) 991 for (; eablk < end; eablk++)
992 if (!*eablk) 992 if (!*eablk)
993 break; 993 break;
994 994
995 if (eablk == end) { 995 if (eablk == end) {
996 error = -ENOSPC; 996 error = -ENOSPC;
997 goto out; 997 goto out;
998 } 998 }
999 999
1000 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1000 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
1001 } else { 1001 } else {
1002 u64 blk; 1002 u64 blk;
1003 unsigned int n = 1; 1003 unsigned int n = 1;
1004 error = gfs2_alloc_blocks(ip, &blk, &n, 0, NULL); 1004 error = gfs2_alloc_blocks(ip, &blk, &n, 0, NULL);
1005 if (error) 1005 if (error)
1006 return error; 1006 return error;
1007 gfs2_trans_add_unrevoke(sdp, blk, 1); 1007 gfs2_trans_add_unrevoke(sdp, blk, 1);
1008 indbh = gfs2_meta_new(ip->i_gl, blk); 1008 indbh = gfs2_meta_new(ip->i_gl, blk);
1009 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1009 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
1010 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); 1010 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
1011 gfs2_buffer_clear_tail(indbh, mh_size); 1011 gfs2_buffer_clear_tail(indbh, mh_size);
1012 1012
1013 eablk = (__be64 *)(indbh->b_data + mh_size); 1013 eablk = (__be64 *)(indbh->b_data + mh_size);
1014 *eablk = cpu_to_be64(ip->i_eattr); 1014 *eablk = cpu_to_be64(ip->i_eattr);
1015 ip->i_eattr = blk; 1015 ip->i_eattr = blk;
1016 ip->i_diskflags |= GFS2_DIF_EA_INDIRECT; 1016 ip->i_diskflags |= GFS2_DIF_EA_INDIRECT;
1017 gfs2_add_inode_blocks(&ip->i_inode, 1); 1017 gfs2_add_inode_blocks(&ip->i_inode, 1);
1018 1018
1019 eablk++; 1019 eablk++;
1020 } 1020 }
1021 1021
1022 error = ea_alloc_blk(ip, &newbh); 1022 error = ea_alloc_blk(ip, &newbh);
1023 if (error) 1023 if (error)
1024 goto out; 1024 goto out;
1025 1025
1026 *eablk = cpu_to_be64((u64)newbh->b_blocknr); 1026 *eablk = cpu_to_be64((u64)newbh->b_blocknr);
1027 error = ea_write(ip, GFS2_EA_BH2FIRST(newbh), er); 1027 error = ea_write(ip, GFS2_EA_BH2FIRST(newbh), er);
1028 brelse(newbh); 1028 brelse(newbh);
1029 if (error) 1029 if (error)
1030 goto out; 1030 goto out;
1031 1031
1032 if (private) 1032 if (private)
1033 ea_set_remove_stuffed(ip, private); 1033 ea_set_remove_stuffed(ip, private);
1034 1034
1035 out: 1035 out:
1036 brelse(indbh); 1036 brelse(indbh);
1037 return error; 1037 return error;
1038 } 1038 }
1039 1039
1040 static int ea_set_i(struct gfs2_inode *ip, int type, const char *name, 1040 static int ea_set_i(struct gfs2_inode *ip, int type, const char *name,
1041 const void *value, size_t size, struct gfs2_ea_location *el) 1041 const void *value, size_t size, struct gfs2_ea_location *el)
1042 { 1042 {
1043 struct gfs2_ea_request er; 1043 struct gfs2_ea_request er;
1044 struct ea_set es; 1044 struct ea_set es;
1045 unsigned int blks = 2; 1045 unsigned int blks = 2;
1046 int error; 1046 int error;
1047 1047
1048 er.er_type = type; 1048 er.er_type = type;
1049 er.er_name = name; 1049 er.er_name = name;
1050 er.er_data = (void *)value; 1050 er.er_data = (void *)value;
1051 er.er_name_len = strlen(name); 1051 er.er_name_len = strlen(name);
1052 er.er_data_len = size; 1052 er.er_data_len = size;
1053 1053
1054 memset(&es, 0, sizeof(struct ea_set)); 1054 memset(&es, 0, sizeof(struct ea_set));
1055 es.es_er = &er; 1055 es.es_er = &er;
1056 es.es_el = el; 1056 es.es_el = el;
1057 1057
1058 error = ea_foreach(ip, ea_set_simple, &es); 1058 error = ea_foreach(ip, ea_set_simple, &es);
1059 if (error > 0) 1059 if (error > 0)
1060 return 0; 1060 return 0;
1061 if (error) 1061 if (error)
1062 return error; 1062 return error;
1063 1063
1064 if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT)) 1064 if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT))
1065 blks++; 1065 blks++;
1066 if (GFS2_EAREQ_SIZE_STUFFED(&er) > GFS2_SB(&ip->i_inode)->sd_jbsize) 1066 if (GFS2_EAREQ_SIZE_STUFFED(&er) > GFS2_SB(&ip->i_inode)->sd_jbsize)
1067 blks += DIV_ROUND_UP(er.er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize); 1067 blks += DIV_ROUND_UP(er.er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize);
1068 1068
1069 return ea_alloc_skeleton(ip, &er, blks, ea_set_block, el); 1069 return ea_alloc_skeleton(ip, &er, blks, ea_set_block, el);
1070 } 1070 }
1071 1071
1072 static int ea_set_remove_unstuffed(struct gfs2_inode *ip, 1072 static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
1073 struct gfs2_ea_location *el) 1073 struct gfs2_ea_location *el)
1074 { 1074 {
1075 if (el->el_prev && GFS2_EA2NEXT(el->el_prev) != el->el_ea) { 1075 if (el->el_prev && GFS2_EA2NEXT(el->el_prev) != el->el_ea) {
1076 el->el_prev = GFS2_EA2NEXT(el->el_prev); 1076 el->el_prev = GFS2_EA2NEXT(el->el_prev);
1077 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), 1077 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
1078 GFS2_EA2NEXT(el->el_prev) == el->el_ea); 1078 GFS2_EA2NEXT(el->el_prev) == el->el_ea);
1079 } 1079 }
1080 1080
1081 return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev, 0); 1081 return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev, 0);
1082 } 1082 }
1083 1083
1084 static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) 1084 static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1085 { 1085 {
1086 struct gfs2_ea_header *ea = el->el_ea; 1086 struct gfs2_ea_header *ea = el->el_ea;
1087 struct gfs2_ea_header *prev = el->el_prev; 1087 struct gfs2_ea_header *prev = el->el_prev;
1088 struct buffer_head *dibh; 1088 struct buffer_head *dibh;
1089 int error; 1089 int error;
1090 1090
1091 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); 1091 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0);
1092 if (error) 1092 if (error)
1093 return error; 1093 return error;
1094 1094
1095 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); 1095 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
1096 1096
1097 if (prev) { 1097 if (prev) {
1098 u32 len; 1098 u32 len;
1099 1099
1100 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea); 1100 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
1101 prev->ea_rec_len = cpu_to_be32(len); 1101 prev->ea_rec_len = cpu_to_be32(len);
1102 1102
1103 if (GFS2_EA_IS_LAST(ea)) 1103 if (GFS2_EA_IS_LAST(ea))
1104 prev->ea_flags |= GFS2_EAFLAG_LAST; 1104 prev->ea_flags |= GFS2_EAFLAG_LAST;
1105 } else { 1105 } else {
1106 ea->ea_type = GFS2_EATYPE_UNUSED; 1106 ea->ea_type = GFS2_EATYPE_UNUSED;
1107 } 1107 }
1108 1108
1109 error = gfs2_meta_inode_buffer(ip, &dibh); 1109 error = gfs2_meta_inode_buffer(ip, &dibh);
1110 if (!error) { 1110 if (!error) {
1111 ip->i_inode.i_ctime = CURRENT_TIME; 1111 ip->i_inode.i_ctime = CURRENT_TIME;
1112 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1112 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1113 gfs2_dinode_out(ip, dibh->b_data); 1113 gfs2_dinode_out(ip, dibh->b_data);
1114 brelse(dibh); 1114 brelse(dibh);
1115 } 1115 }
1116 1116
1117 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 1117 gfs2_trans_end(GFS2_SB(&ip->i_inode));
1118 1118
1119 return error; 1119 return error;
1120 } 1120 }
1121 1121
1122 /** 1122 /**
1123 * gfs2_xattr_remove - Remove a GFS2 extended attribute 1123 * gfs2_xattr_remove - Remove a GFS2 extended attribute
1124 * @ip: The inode 1124 * @ip: The inode
1125 * @type: The type of the extended attribute 1125 * @type: The type of the extended attribute
1126 * @name: The name of the extended attribute 1126 * @name: The name of the extended attribute
1127 * 1127 *
1128 * This is not called directly by the VFS since we use the (common) 1128 * This is not called directly by the VFS since we use the (common)
1129 * scheme of making a "set with NULL data" mean a remove request. Note 1129 * scheme of making a "set with NULL data" mean a remove request. Note
1130 * that this is different from a set with zero length data. 1130 * that this is different from a set with zero length data.
1131 * 1131 *
1132 * Returns: 0, or errno on failure 1132 * Returns: 0, or errno on failure
1133 */ 1133 */
1134 1134
1135 static int gfs2_xattr_remove(struct gfs2_inode *ip, int type, const char *name) 1135 static int gfs2_xattr_remove(struct gfs2_inode *ip, int type, const char *name)
1136 { 1136 {
1137 struct gfs2_ea_location el; 1137 struct gfs2_ea_location el;
1138 int error; 1138 int error;
1139 1139
1140 if (!ip->i_eattr) 1140 if (!ip->i_eattr)
1141 return -ENODATA; 1141 return -ENODATA;
1142 1142
1143 error = gfs2_ea_find(ip, type, name, &el); 1143 error = gfs2_ea_find(ip, type, name, &el);
1144 if (error) 1144 if (error)
1145 return error; 1145 return error;
1146 if (!el.el_ea) 1146 if (!el.el_ea)
1147 return -ENODATA; 1147 return -ENODATA;
1148 1148
1149 if (GFS2_EA_IS_STUFFED(el.el_ea)) 1149 if (GFS2_EA_IS_STUFFED(el.el_ea))
1150 error = ea_remove_stuffed(ip, &el); 1150 error = ea_remove_stuffed(ip, &el);
1151 else 1151 else
1152 error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev, 0); 1152 error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev, 0);
1153 1153
1154 brelse(el.el_bh); 1154 brelse(el.el_bh);
1155 1155
1156 return error; 1156 return error;
1157 } 1157 }
1158 1158
1159 /** 1159 /**
1160 * __gfs2_xattr_set - Set (or remove) a GFS2 extended attribute 1160 * __gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
1161 * @ip: The inode 1161 * @ip: The inode
1162 * @name: The name of the extended attribute 1162 * @name: The name of the extended attribute
1163 * @value: The value of the extended attribute (NULL for remove) 1163 * @value: The value of the extended attribute (NULL for remove)
1164 * @size: The size of the @value argument 1164 * @size: The size of the @value argument
1165 * @flags: Create or Replace 1165 * @flags: Create or Replace
1166 * @type: The type of the extended attribute 1166 * @type: The type of the extended attribute
1167 * 1167 *
1168 * See gfs2_xattr_remove() for details of the removal of xattrs. 1168 * See gfs2_xattr_remove() for details of the removal of xattrs.
1169 * 1169 *
1170 * Returns: 0 or errno on failure 1170 * Returns: 0 or errno on failure
1171 */ 1171 */
1172 1172
1173 int __gfs2_xattr_set(struct inode *inode, const char *name, 1173 int __gfs2_xattr_set(struct inode *inode, const char *name,
1174 const void *value, size_t size, int flags, int type) 1174 const void *value, size_t size, int flags, int type)
1175 { 1175 {
1176 struct gfs2_inode *ip = GFS2_I(inode); 1176 struct gfs2_inode *ip = GFS2_I(inode);
1177 struct gfs2_sbd *sdp = GFS2_SB(inode); 1177 struct gfs2_sbd *sdp = GFS2_SB(inode);
1178 struct gfs2_ea_location el; 1178 struct gfs2_ea_location el;
1179 unsigned int namel = strlen(name); 1179 unsigned int namel = strlen(name);
1180 int error; 1180 int error;
1181 1181
1182 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 1182 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1183 return -EPERM; 1183 return -EPERM;
1184 if (namel > GFS2_EA_MAX_NAME_LEN) 1184 if (namel > GFS2_EA_MAX_NAME_LEN)
1185 return -ERANGE; 1185 return -ERANGE;
1186 1186
1187 if (value == NULL) 1187 if (value == NULL)
1188 return gfs2_xattr_remove(ip, type, name); 1188 return gfs2_xattr_remove(ip, type, name);
1189 1189
1190 if (ea_check_size(sdp, namel, size)) 1190 if (ea_check_size(sdp, namel, size))
1191 return -ERANGE; 1191 return -ERANGE;
1192 1192
1193 if (!ip->i_eattr) { 1193 if (!ip->i_eattr) {
1194 if (flags & XATTR_REPLACE) 1194 if (flags & XATTR_REPLACE)
1195 return -ENODATA; 1195 return -ENODATA;
1196 return ea_init(ip, type, name, value, size); 1196 return ea_init(ip, type, name, value, size);
1197 } 1197 }
1198 1198
1199 error = gfs2_ea_find(ip, type, name, &el); 1199 error = gfs2_ea_find(ip, type, name, &el);
1200 if (error) 1200 if (error)
1201 return error; 1201 return error;
1202 1202
1203 if (el.el_ea) { 1203 if (el.el_ea) {
1204 if (ip->i_diskflags & GFS2_DIF_APPENDONLY) { 1204 if (ip->i_diskflags & GFS2_DIF_APPENDONLY) {
1205 brelse(el.el_bh); 1205 brelse(el.el_bh);
1206 return -EPERM; 1206 return -EPERM;
1207 } 1207 }
1208 1208
1209 error = -EEXIST; 1209 error = -EEXIST;
1210 if (!(flags & XATTR_CREATE)) { 1210 if (!(flags & XATTR_CREATE)) {
1211 int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea); 1211 int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
1212 error = ea_set_i(ip, type, name, value, size, &el); 1212 error = ea_set_i(ip, type, name, value, size, &el);
1213 if (!error && unstuffed) 1213 if (!error && unstuffed)
1214 ea_set_remove_unstuffed(ip, &el); 1214 ea_set_remove_unstuffed(ip, &el);
1215 } 1215 }
1216 1216
1217 brelse(el.el_bh); 1217 brelse(el.el_bh);
1218 return error; 1218 return error;
1219 } 1219 }
1220 1220
1221 error = -ENODATA; 1221 error = -ENODATA;
1222 if (!(flags & XATTR_REPLACE)) 1222 if (!(flags & XATTR_REPLACE))
1223 error = ea_set_i(ip, type, name, value, size, NULL); 1223 error = ea_set_i(ip, type, name, value, size, NULL);
1224 1224
1225 return error; 1225 return error;
1226 } 1226 }
1227 1227
1228 static int gfs2_xattr_set(struct dentry *dentry, const char *name, 1228 static int gfs2_xattr_set(struct dentry *dentry, const char *name,
1229 const void *value, size_t size, int flags, int type) 1229 const void *value, size_t size, int flags, int type)
1230 { 1230 {
1231 return __gfs2_xattr_set(dentry->d_inode, name, value, 1231 return __gfs2_xattr_set(dentry->d_inode, name, value,
1232 size, flags, type); 1232 size, flags, type);
1233 } 1233 }
1234 1234
1235 1235
1236 static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, 1236 static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1237 struct gfs2_ea_header *ea, char *data) 1237 struct gfs2_ea_header *ea, char *data)
1238 { 1238 {
1239 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1239 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1240 unsigned int amount = GFS2_EA_DATA_LEN(ea); 1240 unsigned int amount = GFS2_EA_DATA_LEN(ea);
1241 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); 1241 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
1242 int ret; 1242 int ret;
1243 1243
1244 ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0); 1244 ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
1245 if (ret) 1245 if (ret)
1246 return ret; 1246 return ret;
1247 1247
1248 ret = gfs2_iter_unstuffed(ip, ea, data, NULL); 1248 ret = gfs2_iter_unstuffed(ip, ea, data, NULL);
1249 gfs2_trans_end(sdp); 1249 gfs2_trans_end(sdp);
1250 1250
1251 return ret; 1251 return ret;
1252 } 1252 }
1253 1253
1254 int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) 1254 int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1255 { 1255 {
1256 struct inode *inode = &ip->i_inode; 1256 struct inode *inode = &ip->i_inode;
1257 struct gfs2_sbd *sdp = GFS2_SB(inode); 1257 struct gfs2_sbd *sdp = GFS2_SB(inode);
1258 struct gfs2_ea_location el; 1258 struct gfs2_ea_location el;
1259 int error; 1259 int error;
1260 1260
1261 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el); 1261 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el);
1262 if (error) 1262 if (error)
1263 return error; 1263 return error;
1264 1264
1265 if (GFS2_EA_IS_STUFFED(el.el_ea)) { 1265 if (GFS2_EA_IS_STUFFED(el.el_ea)) {
1266 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0); 1266 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
1267 if (error == 0) { 1267 if (error == 0) {
1268 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); 1268 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1);
1269 memcpy(GFS2_EA2DATA(el.el_ea), data, 1269 memcpy(GFS2_EA2DATA(el.el_ea), data,
1270 GFS2_EA_DATA_LEN(el.el_ea)); 1270 GFS2_EA_DATA_LEN(el.el_ea));
1271 } 1271 }
1272 } else { 1272 } else {
1273 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data); 1273 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data);
1274 } 1274 }
1275 1275
1276 brelse(el.el_bh); 1276 brelse(el.el_bh);
1277 if (error) 1277 if (error)
1278 return error; 1278 return error;
1279 1279
1280 error = gfs2_setattr_simple(inode, attr); 1280 error = gfs2_setattr_simple(inode, attr);
1281 gfs2_trans_end(sdp); 1281 gfs2_trans_end(sdp);
1282 return error; 1282 return error;
1283 } 1283 }
1284 1284
1285 static int ea_dealloc_indirect(struct gfs2_inode *ip) 1285 static int ea_dealloc_indirect(struct gfs2_inode *ip)
1286 { 1286 {
1287 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1287 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1288 struct gfs2_rgrp_list rlist; 1288 struct gfs2_rgrp_list rlist;
1289 struct buffer_head *indbh, *dibh; 1289 struct buffer_head *indbh, *dibh;
1290 __be64 *eablk, *end; 1290 __be64 *eablk, *end;
1291 unsigned int rg_blocks = 0; 1291 unsigned int rg_blocks = 0;
1292 u64 bstart = 0; 1292 u64 bstart = 0;
1293 unsigned int blen = 0; 1293 unsigned int blen = 0;
1294 unsigned int blks = 0; 1294 unsigned int blks = 0;
1295 unsigned int x; 1295 unsigned int x;
1296 int error; 1296 int error;
1297 1297
1298 error = gfs2_rindex_update(sdp); 1298 error = gfs2_rindex_update(sdp);
1299 if (error) 1299 if (error)
1300 return error; 1300 return error;
1301 1301
1302 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); 1302 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1303 1303
1304 error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &indbh); 1304 error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &indbh);
1305 if (error) 1305 if (error)
1306 return error; 1306 return error;
1307 1307
1308 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) { 1308 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
1309 error = -EIO; 1309 error = -EIO;
1310 goto out; 1310 goto out;
1311 } 1311 }
1312 1312
1313 eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); 1313 eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1314 end = eablk + sdp->sd_inptrs; 1314 end = eablk + sdp->sd_inptrs;
1315 1315
1316 for (; eablk < end; eablk++) { 1316 for (; eablk < end; eablk++) {
1317 u64 bn; 1317 u64 bn;
1318 1318
1319 if (!*eablk) 1319 if (!*eablk)
1320 break; 1320 break;
1321 bn = be64_to_cpu(*eablk); 1321 bn = be64_to_cpu(*eablk);
1322 1322
1323 if (bstart + blen == bn) 1323 if (bstart + blen == bn)
1324 blen++; 1324 blen++;
1325 else { 1325 else {
1326 if (bstart) 1326 if (bstart)
1327 gfs2_rlist_add(ip, &rlist, bstart); 1327 gfs2_rlist_add(ip, &rlist, bstart);
1328 bstart = bn; 1328 bstart = bn;
1329 blen = 1; 1329 blen = 1;
1330 } 1330 }
1331 blks++; 1331 blks++;
1332 } 1332 }
1333 if (bstart) 1333 if (bstart)
1334 gfs2_rlist_add(ip, &rlist, bstart); 1334 gfs2_rlist_add(ip, &rlist, bstart);
1335 else 1335 else
1336 goto out; 1336 goto out;
1337 1337
1338 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); 1338 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE);
1339 1339
1340 for (x = 0; x < rlist.rl_rgrps; x++) { 1340 for (x = 0; x < rlist.rl_rgrps; x++) {
1341 struct gfs2_rgrpd *rgd; 1341 struct gfs2_rgrpd *rgd;
1342 rgd = rlist.rl_ghs[x].gh_gl->gl_object; 1342 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1343 rg_blocks += rgd->rd_length; 1343 rg_blocks += rgd->rd_length;
1344 } 1344 }
1345 1345
1346 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); 1346 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1347 if (error) 1347 if (error)
1348 goto out_rlist_free; 1348 goto out_rlist_free;
1349 1349
1350 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + 1350 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT +
1351 RES_STATFS + RES_QUOTA, blks); 1351 RES_STATFS + RES_QUOTA, blks);
1352 if (error) 1352 if (error)
1353 goto out_gunlock; 1353 goto out_gunlock;
1354 1354
1355 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1355 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
1356 1356
1357 eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); 1357 eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1358 bstart = 0; 1358 bstart = 0;
1359 blen = 0; 1359 blen = 0;
1360 1360
1361 for (; eablk < end; eablk++) { 1361 for (; eablk < end; eablk++) {
1362 u64 bn; 1362 u64 bn;
1363 1363
1364 if (!*eablk) 1364 if (!*eablk)
1365 break; 1365 break;
1366 bn = be64_to_cpu(*eablk); 1366 bn = be64_to_cpu(*eablk);
1367 1367
1368 if (bstart + blen == bn) 1368 if (bstart + blen == bn)
1369 blen++; 1369 blen++;
1370 else { 1370 else {
1371 if (bstart) 1371 if (bstart)
1372 gfs2_free_meta(ip, bstart, blen); 1372 gfs2_free_meta(ip, bstart, blen);
1373 bstart = bn; 1373 bstart = bn;
1374 blen = 1; 1374 blen = 1;
1375 } 1375 }
1376 1376
1377 *eablk = 0; 1377 *eablk = 0;
1378 gfs2_add_inode_blocks(&ip->i_inode, -1); 1378 gfs2_add_inode_blocks(&ip->i_inode, -1);
1379 } 1379 }
1380 if (bstart) 1380 if (bstart)
1381 gfs2_free_meta(ip, bstart, blen); 1381 gfs2_free_meta(ip, bstart, blen);
1382 1382
1383 ip->i_diskflags &= ~GFS2_DIF_EA_INDIRECT; 1383 ip->i_diskflags &= ~GFS2_DIF_EA_INDIRECT;
1384 1384
1385 error = gfs2_meta_inode_buffer(ip, &dibh); 1385 error = gfs2_meta_inode_buffer(ip, &dibh);
1386 if (!error) { 1386 if (!error) {
1387 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1387 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1388 gfs2_dinode_out(ip, dibh->b_data); 1388 gfs2_dinode_out(ip, dibh->b_data);
1389 brelse(dibh); 1389 brelse(dibh);
1390 } 1390 }
1391 1391
1392 gfs2_trans_end(sdp); 1392 gfs2_trans_end(sdp);
1393 1393
1394 out_gunlock: 1394 out_gunlock:
1395 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); 1395 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1396 out_rlist_free: 1396 out_rlist_free:
1397 gfs2_rlist_free(&rlist); 1397 gfs2_rlist_free(&rlist);
1398 out: 1398 out:
1399 brelse(indbh); 1399 brelse(indbh);
1400 return error; 1400 return error;
1401 } 1401 }
1402 1402
1403 static int ea_dealloc_block(struct gfs2_inode *ip) 1403 static int ea_dealloc_block(struct gfs2_inode *ip)
1404 { 1404 {
1405 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1405 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1406 struct gfs2_rgrpd *rgd; 1406 struct gfs2_rgrpd *rgd;
1407 struct buffer_head *dibh; 1407 struct buffer_head *dibh;
1408 struct gfs2_holder gh; 1408 struct gfs2_holder gh;
1409 int error; 1409 int error;
1410 1410
1411 error = gfs2_rindex_update(sdp); 1411 error = gfs2_rindex_update(sdp);
1412 if (error) 1412 if (error)
1413 return error; 1413 return error;
1414 1414
1415 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr, 1); 1415 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr, 1);
1416 if (!rgd) { 1416 if (!rgd) {
1417 gfs2_consist_inode(ip); 1417 gfs2_consist_inode(ip);
1418 return -EIO; 1418 return -EIO;
1419 } 1419 }
1420 1420
1421 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); 1421 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1422 if (error) 1422 if (error)
1423 return error; 1423 return error;
1424 1424
1425 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_DINODE + RES_STATFS + 1425 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_DINODE + RES_STATFS +
1426 RES_QUOTA, 1); 1426 RES_QUOTA, 1);
1427 if (error) 1427 if (error)
1428 goto out_gunlock; 1428 goto out_gunlock;
1429 1429
1430 gfs2_free_meta(ip, ip->i_eattr, 1); 1430 gfs2_free_meta(ip, ip->i_eattr, 1);
1431 1431
1432 ip->i_eattr = 0; 1432 ip->i_eattr = 0;
1433 gfs2_add_inode_blocks(&ip->i_inode, -1); 1433 gfs2_add_inode_blocks(&ip->i_inode, -1);
1434 1434
1435 error = gfs2_meta_inode_buffer(ip, &dibh); 1435 error = gfs2_meta_inode_buffer(ip, &dibh);
1436 if (!error) { 1436 if (!error) {
1437 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1437 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1438 gfs2_dinode_out(ip, dibh->b_data); 1438 gfs2_dinode_out(ip, dibh->b_data);
1439 brelse(dibh); 1439 brelse(dibh);
1440 } 1440 }
1441 1441
1442 gfs2_trans_end(sdp); 1442 gfs2_trans_end(sdp);
1443 1443
1444 out_gunlock: 1444 out_gunlock:
1445 gfs2_glock_dq_uninit(&gh); 1445 gfs2_glock_dq_uninit(&gh);
1446 return error; 1446 return error;
1447 } 1447 }
1448 1448
1449 /** 1449 /**
1450 * gfs2_ea_dealloc - deallocate the extended attribute fork 1450 * gfs2_ea_dealloc - deallocate the extended attribute fork
1451 * @ip: the inode 1451 * @ip: the inode
1452 * 1452 *
1453 * Returns: errno 1453 * Returns: errno
1454 */ 1454 */
1455 1455
1456 int gfs2_ea_dealloc(struct gfs2_inode *ip) 1456 int gfs2_ea_dealloc(struct gfs2_inode *ip)
1457 { 1457 {
1458 int error; 1458 int error;
1459 1459
1460 error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); 1460 error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
1461 if (error) 1461 if (error)
1462 return error; 1462 return error;
1463 1463
1464 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1464 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1465 if (error) 1465 if (error)
1466 return error; 1466 return error;
1467 1467
1468 error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); 1468 error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
1469 if (error) 1469 if (error)
1470 goto out_quota; 1470 goto out_quota;
1471 1471
1472 if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) { 1472 if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
1473 error = ea_dealloc_indirect(ip); 1473 error = ea_dealloc_indirect(ip);
1474 if (error) 1474 if (error)
1475 goto out_quota; 1475 goto out_quota;
1476 } 1476 }
1477 1477
1478 error = ea_dealloc_block(ip); 1478 error = ea_dealloc_block(ip);
1479 1479
1480 out_quota: 1480 out_quota:
1481 gfs2_quota_unhold(ip); 1481 gfs2_quota_unhold(ip);
1482 return error; 1482 return error;
1483 } 1483 }
1484 1484
1485 static const struct xattr_handler gfs2_xattr_user_handler = { 1485 static const struct xattr_handler gfs2_xattr_user_handler = {
1486 .prefix = XATTR_USER_PREFIX, 1486 .prefix = XATTR_USER_PREFIX,
1487 .flags = GFS2_EATYPE_USR, 1487 .flags = GFS2_EATYPE_USR,
1488 .get = gfs2_xattr_get, 1488 .get = gfs2_xattr_get,
1489 .set = gfs2_xattr_set, 1489 .set = gfs2_xattr_set,
1490 }; 1490 };
1491 1491
1492 static const struct xattr_handler gfs2_xattr_security_handler = { 1492 static const struct xattr_handler gfs2_xattr_security_handler = {
1493 .prefix = XATTR_SECURITY_PREFIX, 1493 .prefix = XATTR_SECURITY_PREFIX,
1494 .flags = GFS2_EATYPE_SECURITY, 1494 .flags = GFS2_EATYPE_SECURITY,
1495 .get = gfs2_xattr_get, 1495 .get = gfs2_xattr_get,
1496 .set = gfs2_xattr_set, 1496 .set = gfs2_xattr_set,
1497 }; 1497 };
1498 1498
1499 const struct xattr_handler *gfs2_xattr_handlers[] = { 1499 const struct xattr_handler *gfs2_xattr_handlers[] = {
1500 &gfs2_xattr_user_handler, 1500 &gfs2_xattr_user_handler,
1501 &gfs2_xattr_security_handler, 1501 &gfs2_xattr_security_handler,
1502 &gfs2_xattr_system_handler, 1502 &gfs2_xattr_system_handler,
1503 NULL, 1503 NULL,
1504 }; 1504 };
1505 1505
1506 1506