Doug / smarc-fsl-linux-kernel

1

/* -*- mode: c; c-basic-offset: 8; -*-

1

/* -*- mode: c; c-basic-offset: 8; -*-

2

* vim: noexpandtab sw=8 ts=8 sts=0:

2

* vim: noexpandtab sw=8 ts=8 sts=0:

3

*

3

*

4

5

*

5

*

6

* This program is free software; you can redistribute it and/or

6

* This program is free software; you can redistribute it and/or

7

* modify it under the terms of the GNU General Public

7

* modify it under the terms of the GNU General Public

8

* License as published by the Free Software Foundation; either

8

* License as published by the Free Software Foundation; either

9

* version 2 of the License, or (at your option) any later version.

9

* version 2 of the License, or (at your option) any later version.

10

*

10

*

11

* This program is distributed in the hope that it will be useful,

11

* This program is distributed in the hope that it will be useful,

12

* but WITHOUT ANY WARRANTY; without even the implied warranty of

12

* but WITHOUT ANY WARRANTY; without even the implied warranty of

13

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

13

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

14

* General Public License for more details.

14

* General Public License for more details.

15

*

15

*

16

* You should have received a copy of the GNU General Public

16

* You should have received a copy of the GNU General Public

17

* License along with this program; if not, write to the

17

* License along with this program; if not, write to the

18

* Free Software Foundation, Inc., 59 Temple Place - Suite 330,

18

* Free Software Foundation, Inc., 59 Temple Place - Suite 330,

19

* Boston, MA 021110-1307, USA.

19

* Boston, MA 021110-1307, USA.

20

*/

20

*/

21

22

#include <linux/fs.h>

22

#include <linux/fs.h>

23

#include <linux/slab.h>

23

#include <linux/slab.h>

24

#include <linux/highmem.h>

24

#include <linux/highmem.h>

25

#include <linux/pagemap.h>

25

#include <linux/pagemap.h>

26

#include <asm/byteorder.h>

26

#include <asm/byteorder.h>

27

#include <linux/swap.h>

27

#include <linux/swap.h>

28

#include <linux/pipe_fs_i.h>

28

#include <linux/pipe_fs_i.h>

29

#include <linux/mpage.h>

29

#include <linux/mpage.h>

30

31

#define MLOG_MASK_PREFIX ML_FILE_IO

31

#define MLOG_MASK_PREFIX ML_FILE_IO

32

#include <cluster/masklog.h>

32

#include <cluster/masklog.h>

33

34

#include "ocfs2.h"

34

#include "ocfs2.h"

35

36

#include "alloc.h"

36

#include "alloc.h"

37

#include "aops.h"

37

#include "aops.h"

38

#include "dlmglue.h"

38

#include "dlmglue.h"

39

#include "extent_map.h"

39

#include "extent_map.h"

40

#include "file.h"

40

#include "file.h"

41

#include "inode.h"

41

#include "inode.h"

42

#include "journal.h"

42

#include "journal.h"

43

#include "suballoc.h"

43

#include "suballoc.h"

44

#include "super.h"

44

#include "super.h"

45

#include "symlink.h"

45

#include "symlink.h"

46

47

#include "buffer_head_io.h"

47

#include "buffer_head_io.h"

48

49

static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,

49

static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,

50

struct buffer_head *bh_result, int create)

50

struct buffer_head *bh_result, int create)

51

{

51

{

52

int err = -EIO;

52

int err = -EIO;

53

int status;

53

int status;

54

struct ocfs2_dinode *fe = NULL;

54

struct ocfs2_dinode *fe = NULL;

55

struct buffer_head *bh = NULL;

55

struct buffer_head *bh = NULL;

56

struct buffer_head *buffer_cache_bh = NULL;

56

struct buffer_head *buffer_cache_bh = NULL;

57

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

57

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

58

void *kaddr;

58

void *kaddr;

59

60

mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,

60

mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,

61

(unsigned long long)iblock, bh_result, create);

61

(unsigned long long)iblock, bh_result, create);

62

63

BUG_ON(ocfs2_inode_is_fast_symlink(inode));

63

BUG_ON(ocfs2_inode_is_fast_symlink(inode));

64

65

if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) {

65

if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) {

66

mlog(ML_ERROR, "block offset > PATH_MAX: %llu",

66

mlog(ML_ERROR, "block offset > PATH_MAX: %llu",

67

(unsigned long long)iblock);

67

(unsigned long long)iblock);

68

goto bail;

68

goto bail;

69

}

69

}

70

71

status = ocfs2_read_block(OCFS2_SB(inode->i_sb),

71

status = ocfs2_read_block(OCFS2_SB(inode->i_sb),

72

OCFS2_I(inode)->ip_blkno,

72

OCFS2_I(inode)->ip_blkno,

73

&bh, OCFS2_BH_CACHED, inode);

73

&bh, OCFS2_BH_CACHED, inode);

74

if (status < 0) {

74

if (status < 0) {

75

mlog_errno(status);

75

mlog_errno(status);

76

goto bail;

76

goto bail;

77

}

77

}

78

fe = (struct ocfs2_dinode *) bh->b_data;

78

fe = (struct ocfs2_dinode *) bh->b_data;

79

80

if (!OCFS2_IS_VALID_DINODE(fe)) {

80

if (!OCFS2_IS_VALID_DINODE(fe)) {

81

mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",

81

mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",

82

(unsigned long long)le64_to_cpu(fe->i_blkno), 7,

82

(unsigned long long)le64_to_cpu(fe->i_blkno), 7,

83

fe->i_signature);

83

fe->i_signature);

84

goto bail;

84

goto bail;

85

}

85

}

86

87

if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,

87

if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,

88

le32_to_cpu(fe->i_clusters))) {

88

le32_to_cpu(fe->i_clusters))) {

89

mlog(ML_ERROR, "block offset is outside the allocated size: "

89

mlog(ML_ERROR, "block offset is outside the allocated size: "

90

"%llu\n", (unsigned long long)iblock);

90

"%llu\n", (unsigned long long)iblock);

91

goto bail;

91

goto bail;

92

}

92

}

93

94

/* We don't use the page cache to create symlink data, so if

94

/* We don't use the page cache to create symlink data, so if

95

* need be, copy it over from the buffer cache. */

95

* need be, copy it over from the buffer cache. */

96

if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) {

96

if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) {

97

u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) +

97

u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) +

98

iblock;

98

iblock;

99

buffer_cache_bh = sb_getblk(osb->sb, blkno);

99

buffer_cache_bh = sb_getblk(osb->sb, blkno);

100

if (!buffer_cache_bh) {

100

if (!buffer_cache_bh) {

101

mlog(ML_ERROR, "couldn't getblock for symlink!\n");

101

mlog(ML_ERROR, "couldn't getblock for symlink!\n");

102

goto bail;

102

goto bail;

103

}

103

}

104

105

/* we haven't locked out transactions, so a commit

105

/* we haven't locked out transactions, so a commit

106

* could've happened. Since we've got a reference on

106

* could've happened. Since we've got a reference on

107

* the bh, even if it commits while we're doing the

107

* the bh, even if it commits while we're doing the

108

* copy, the data is still good. */

108

* copy, the data is still good. */

109

if (buffer_jbd(buffer_cache_bh)

109

if (buffer_jbd(buffer_cache_bh)

110

&& ocfs2_inode_is_new(inode)) {

110

&& ocfs2_inode_is_new(inode)) {

111

kaddr = kmap_atomic(bh_result->b_page, KM_USER0);

111

kaddr = kmap_atomic(bh_result->b_page, KM_USER0);

112

if (!kaddr) {

112

if (!kaddr) {

113

mlog(ML_ERROR, "couldn't kmap!\n");

113

mlog(ML_ERROR, "couldn't kmap!\n");

114

goto bail;

114

goto bail;

115

}

115

}

116

memcpy(kaddr + (bh_result->b_size * iblock),

116

memcpy(kaddr + (bh_result->b_size * iblock),

117

buffer_cache_bh->b_data,

117

buffer_cache_bh->b_data,

118

bh_result->b_size);

118

bh_result->b_size);

119

kunmap_atomic(kaddr, KM_USER0);

119

kunmap_atomic(kaddr, KM_USER0);

120

set_buffer_uptodate(bh_result);

120

set_buffer_uptodate(bh_result);

121

}

121

}

122

brelse(buffer_cache_bh);

122

brelse(buffer_cache_bh);

123

}

123

}

124

125

map_bh(bh_result, inode->i_sb,

125

map_bh(bh_result, inode->i_sb,

126

le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock);

126

le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock);

127

128

err = 0;

128

err = 0;

129

130

bail:

130

bail:

131

if (bh)

131

if (bh)

132

brelse(bh);

132

brelse(bh);

133

134

mlog_exit(err);

134

mlog_exit(err);

135

return err;

135

return err;

136

}

136

}

137

138

static int ocfs2_get_block(struct inode *inode, sector_t iblock,

138

static int ocfs2_get_block(struct inode *inode, sector_t iblock,

139

struct buffer_head *bh_result, int create)

139

struct buffer_head *bh_result, int create)

140

{

140

{

141

int err = 0;

141

int err = 0;

142

unsigned int ext_flags;

142

unsigned int ext_flags;

143

u64 max_blocks = bh_result->b_size >> inode->i_blkbits;

143

u64 max_blocks = bh_result->b_size >> inode->i_blkbits;

144

u64 p_blkno, count, past_eof;

144

u64 p_blkno, count, past_eof;

145

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

145

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

146

147

mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,

147

mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,

148

(unsigned long long)iblock, bh_result, create);

148

(unsigned long long)iblock, bh_result, create);

149

150

if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE)

150

if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE)

151

mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n",

151

mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n",

152

inode, inode->i_ino);

152

inode, inode->i_ino);

153

154

if (S_ISLNK(inode->i_mode)) {

154

if (S_ISLNK(inode->i_mode)) {

155

/* this always does I/O for some reason. */

155

/* this always does I/O for some reason. */

156

err = ocfs2_symlink_get_block(inode, iblock, bh_result, create);

156

err = ocfs2_symlink_get_block(inode, iblock, bh_result, create);

157

goto bail;

157

goto bail;

158

}

158

}

159

160

err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count,

160

err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count,

161

&ext_flags);

161

&ext_flags);

162

if (err) {

162

if (err) {

163

mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "

163

mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "

164

"%llu, NULL)\n", err, inode, (unsigned long long)iblock,

164

"%llu, NULL)\n", err, inode, (unsigned long long)iblock,

165

(unsigned long long)p_blkno);

165

(unsigned long long)p_blkno);

166

goto bail;

166

goto bail;

167

}

167

}

168

169

if (max_blocks < count)

169

if (max_blocks < count)

170

count = max_blocks;

170

count = max_blocks;

171

172

/*

172

/*

173

* ocfs2 never allocates in this function - the only time we

173

* ocfs2 never allocates in this function - the only time we

174

* need to use BH_New is when we're extending i_size on a file

174

* need to use BH_New is when we're extending i_size on a file

175

* system which doesn't support holes, in which case BH_New

175

* system which doesn't support holes, in which case BH_New

176

* allows block_prepare_write() to zero.

176

* allows block_prepare_write() to zero.

177

*/

177

*/

178

mlog_bug_on_msg(create && p_blkno == 0 && ocfs2_sparse_alloc(osb),

178

mlog_bug_on_msg(create && p_blkno == 0 && ocfs2_sparse_alloc(osb),

179

"ino %lu, iblock %llu\n", inode->i_ino,

179

"ino %lu, iblock %llu\n", inode->i_ino,

180

(unsigned long long)iblock);

180

(unsigned long long)iblock);

181

182

/* Treat the unwritten extent as a hole for zeroing purposes. */

182

/* Treat the unwritten extent as a hole for zeroing purposes. */

183

if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))

183

if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))

184

map_bh(bh_result, inode->i_sb, p_blkno);

184

map_bh(bh_result, inode->i_sb, p_blkno);

185

186

bh_result->b_size = count << inode->i_blkbits;

186

bh_result->b_size = count << inode->i_blkbits;

187

188

if (!ocfs2_sparse_alloc(osb)) {

188

if (!ocfs2_sparse_alloc(osb)) {

189

if (p_blkno == 0) {

189

if (p_blkno == 0) {

190

err = -EIO;

190

err = -EIO;

191

mlog(ML_ERROR,

191

mlog(ML_ERROR,

192

"iblock = %llu p_blkno = %llu blkno=(%llu)\n",

192

"iblock = %llu p_blkno = %llu blkno=(%llu)\n",

193

(unsigned long long)iblock,

193

(unsigned long long)iblock,

194

(unsigned long long)p_blkno,

194

(unsigned long long)p_blkno,

195

(unsigned long long)OCFS2_I(inode)->ip_blkno);

195

(unsigned long long)OCFS2_I(inode)->ip_blkno);

196

mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);

196

mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);

197

dump_stack();

197

dump_stack();

198

}

198

}

199

200

past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));

200

past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));

201

mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,

201

mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,

202

(unsigned long long)past_eof);

202

(unsigned long long)past_eof);

203

204

if (create && (iblock >= past_eof))

204

if (create && (iblock >= past_eof))

205

set_buffer_new(bh_result);

205

set_buffer_new(bh_result);

206

}

206

}

207

208

bail:

208

bail:

209

if (err < 0)

209

if (err < 0)

210

err = -EIO;

210

err = -EIO;

211

212

mlog_exit(err);

212

mlog_exit(err);

213

return err;

213

return err;

214

}

214

}

215

216

int ocfs2_read_inline_data(struct inode *inode, struct page *page,

216

int ocfs2_read_inline_data(struct inode *inode, struct page *page,

217

struct buffer_head *di_bh)

217

struct buffer_head *di_bh)

218

{

218

{

219

void *kaddr;

219

void *kaddr;

220

loff_t size;

220

loff_t size;

221

struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;

221

struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;

222

223

if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) {

223

if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) {

224

ocfs2_error(inode->i_sb, "Inode %llu lost inline data flag",

224

ocfs2_error(inode->i_sb, "Inode %llu lost inline data flag",

225

(unsigned long long)OCFS2_I(inode)->ip_blkno);

225

(unsigned long long)OCFS2_I(inode)->ip_blkno);

226

return -EROFS;

226

return -EROFS;

227

}

227

}

228

229

size = i_size_read(inode);

229

size = i_size_read(inode);

230

231

if (size > PAGE_CACHE_SIZE ||

231

if (size > PAGE_CACHE_SIZE ||

232

size > ocfs2_max_inline_data(inode->i_sb)) {

232

size > ocfs2_max_inline_data(inode->i_sb)) {

233

ocfs2_error(inode->i_sb,

233

ocfs2_error(inode->i_sb,

234

"Inode %llu has with inline data has bad size: %Lu",

234

"Inode %llu has with inline data has bad size: %Lu",

235

(unsigned long long)OCFS2_I(inode)->ip_blkno,

235

(unsigned long long)OCFS2_I(inode)->ip_blkno,

236

(unsigned long long)size);

236

(unsigned long long)size);

237

return -EROFS;

237

return -EROFS;

238

}

238

}

239

240

kaddr = kmap_atomic(page, KM_USER0);

240

kaddr = kmap_atomic(page, KM_USER0);

241

if (size)

241

if (size)

242

memcpy(kaddr, di->id2.i_data.id_data, size);

242

memcpy(kaddr, di->id2.i_data.id_data, size);

243

/* Clear the remaining part of the page */

243

/* Clear the remaining part of the page */

244

memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);

244

memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);

245

flush_dcache_page(page);

245

flush_dcache_page(page);

246

kunmap_atomic(kaddr, KM_USER0);

246

kunmap_atomic(kaddr, KM_USER0);

247

248

SetPageUptodate(page);

248

SetPageUptodate(page);

249

250

return 0;

250

return 0;

251

}

251

}

252

253

static int ocfs2_readpage_inline(struct inode *inode, struct page *page)

253

static int ocfs2_readpage_inline(struct inode *inode, struct page *page)

254

{

254

{

255

int ret;

255

int ret;

256

struct buffer_head *di_bh = NULL;

256

struct buffer_head *di_bh = NULL;

257

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

257

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

258

259

BUG_ON(!PageLocked(page));

259

BUG_ON(!PageLocked(page));

260

BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));

260

BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));

261

262

ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh,

262

ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh,

263

OCFS2_BH_CACHED, inode);

263

OCFS2_BH_CACHED, inode);

264

if (ret) {

264

if (ret) {

265

mlog_errno(ret);

265

mlog_errno(ret);

266

goto out;

266

goto out;

267

}

267

}

268

269

ret = ocfs2_read_inline_data(inode, page, di_bh);

269

ret = ocfs2_read_inline_data(inode, page, di_bh);

270

out:

270

out:

271

unlock_page(page);

271

unlock_page(page);

272

273

brelse(di_bh);

273

brelse(di_bh);

274

return ret;

274

return ret;

275

}

275

}

276

277

static int ocfs2_readpage(struct file *file, struct page *page)

277

static int ocfs2_readpage(struct file *file, struct page *page)

278

{

278

{

279

struct inode *inode = page->mapping->host;

279

struct inode *inode = page->mapping->host;

280

struct ocfs2_inode_info *oi = OCFS2_I(inode);

280

struct ocfs2_inode_info *oi = OCFS2_I(inode);

281

loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;

281

loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;

282

int ret, unlock = 1;

282

int ret, unlock = 1;

283

284

mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0));

284

mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0));

285

286

ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page);

286

ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page);

287

if (ret != 0) {

287

if (ret != 0) {

288

if (ret == AOP_TRUNCATED_PAGE)

288

if (ret == AOP_TRUNCATED_PAGE)

289

unlock = 0;

289

unlock = 0;

290

mlog_errno(ret);

290

mlog_errno(ret);

291

goto out;

291

goto out;

292

}

292

}

293

294

if (down_read_trylock(&oi->ip_alloc_sem) == 0) {

294

if (down_read_trylock(&oi->ip_alloc_sem) == 0) {

295

ret = AOP_TRUNCATED_PAGE;

295

ret = AOP_TRUNCATED_PAGE;

296

goto out_inode_unlock;

296

goto out_inode_unlock;

297

}

297

}

298

299

/*

299

/*

300

* i_size might have just been updated as we grabed the meta lock. We

300

* i_size might have just been updated as we grabed the meta lock. We

301

* might now be discovering a truncate that hit on another node.

301

* might now be discovering a truncate that hit on another node.

302

* block_read_full_page->get_block freaks out if it is asked to read

302

* block_read_full_page->get_block freaks out if it is asked to read

303

* beyond the end of a file, so we check here. Callers

303

* beyond the end of a file, so we check here. Callers

304

* (generic_file_read, vm_ops->fault) are clever enough to check i_size

304

* (generic_file_read, vm_ops->fault) are clever enough to check i_size

305

* and notice that the page they just read isn't needed.

305

* and notice that the page they just read isn't needed.

306

*

306

*

307

* XXX sys_readahead() seems to get that wrong?

307

* XXX sys_readahead() seems to get that wrong?

308

*/

308

*/

309

if (start >= i_size_read(inode)) {

309

if (start >= i_size_read(inode)) {

310

zero_user(page, 0, PAGE_SIZE);

310

zero_user(page, 0, PAGE_SIZE);

311

SetPageUptodate(page);

311

SetPageUptodate(page);

312

ret = 0;

312

ret = 0;

313

goto out_alloc;

313

goto out_alloc;

314

}

314

}

315

316

if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)

316

if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)

317

ret = ocfs2_readpage_inline(inode, page);

317

ret = ocfs2_readpage_inline(inode, page);

318

else

318

else

319

ret = block_read_full_page(page, ocfs2_get_block);

319

ret = block_read_full_page(page, ocfs2_get_block);

320

unlock = 0;

320

unlock = 0;

321

322

out_alloc:

322

out_alloc:

323

up_read(&OCFS2_I(inode)->ip_alloc_sem);

323

up_read(&OCFS2_I(inode)->ip_alloc_sem);

324

out_inode_unlock:

324

out_inode_unlock:

325

ocfs2_inode_unlock(inode, 0);

325

ocfs2_inode_unlock(inode, 0);

326

out:

326

out:

327

if (unlock)

327

if (unlock)

328

unlock_page(page);

328

unlock_page(page);

329

mlog_exit(ret);

329

mlog_exit(ret);

330

return ret;

330

return ret;

331

}

331

}

332

333

/*

333

/*

334

* This is used only for read-ahead. Failures or difficult to handle

334

* This is used only for read-ahead. Failures or difficult to handle

335

* situations are safe to ignore.

335

* situations are safe to ignore.

336

*

336

*

337

* Right now, we don't bother with BH_Boundary - in-inode extent lists

337

* Right now, we don't bother with BH_Boundary - in-inode extent lists

338

* are quite large (243 extents on 4k blocks), so most inodes don't

338

* are quite large (243 extents on 4k blocks), so most inodes don't

339

* grow out to a tree. If need be, detecting boundary extents could

339

* grow out to a tree. If need be, detecting boundary extents could

340

* trivially be added in a future version of ocfs2_get_block().

340

* trivially be added in a future version of ocfs2_get_block().

341

*/

341

*/

342

static int ocfs2_readpages(struct file *filp, struct address_space *mapping,

342

static int ocfs2_readpages(struct file *filp, struct address_space *mapping,

343

struct list_head *pages, unsigned nr_pages)

343

struct list_head *pages, unsigned nr_pages)

344

{

344

{

345

int ret, err = -EIO;

345

int ret, err = -EIO;

346

struct inode *inode = mapping->host;

346

struct inode *inode = mapping->host;

347

struct ocfs2_inode_info *oi = OCFS2_I(inode);

347

struct ocfs2_inode_info *oi = OCFS2_I(inode);

348

loff_t start;

348

loff_t start;

349

struct page *last;

349

struct page *last;

350

351

/*

351

/*

352

* Use the nonblocking flag for the dlm code to avoid page

352

* Use the nonblocking flag for the dlm code to avoid page

353

* lock inversion, but don't bother with retrying.

353

* lock inversion, but don't bother with retrying.

354

*/

354

*/

355

ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK);

355

ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK);

356

if (ret)

356

if (ret)

357

return err;

357

return err;

358

359

if (down_read_trylock(&oi->ip_alloc_sem) == 0) {

359

if (down_read_trylock(&oi->ip_alloc_sem) == 0) {

360

ocfs2_inode_unlock(inode, 0);

360

ocfs2_inode_unlock(inode, 0);

361

return err;

361

return err;

362

}

362

}

363

364

/*

364

/*

365

* Don't bother with inline-data. There isn't anything

365

* Don't bother with inline-data. There isn't anything

366

* to read-ahead in that case anyway...

366

* to read-ahead in that case anyway...

367

*/

367

*/

368

if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)

368

if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)

369

goto out_unlock;

369

goto out_unlock;

370

371

/*

371

/*

372

* Check whether a remote node truncated this file - we just

372

* Check whether a remote node truncated this file - we just

373

* drop out in that case as it's not worth handling here.

373

* drop out in that case as it's not worth handling here.

374

*/

374

*/

375

last = list_entry(pages->prev, struct page, lru);

375

last = list_entry(pages->prev, struct page, lru);

376

start = (loff_t)last->index << PAGE_CACHE_SHIFT;

376

start = (loff_t)last->index << PAGE_CACHE_SHIFT;

377

if (start >= i_size_read(inode))

377

if (start >= i_size_read(inode))

378

goto out_unlock;

378

goto out_unlock;

379

380

err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block);

380

err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block);

381

382

out_unlock:

382

out_unlock:

383

up_read(&oi->ip_alloc_sem);

383

up_read(&oi->ip_alloc_sem);

384

ocfs2_inode_unlock(inode, 0);

384

ocfs2_inode_unlock(inode, 0);

385

386

return err;

386

return err;

387

}

387

}

388

389

/* Note: Because we don't support holes, our allocation has

389

/* Note: Because we don't support holes, our allocation has

390

* already happened (allocation writes zeros to the file data)

390

* already happened (allocation writes zeros to the file data)

391

* so we don't have to worry about ordered writes in

391

* so we don't have to worry about ordered writes in

392

* ocfs2_writepage.

392

* ocfs2_writepage.

393

*

393

*

394

* ->writepage is called during the process of invalidating the page cache

394

* ->writepage is called during the process of invalidating the page cache

395

* during blocked lock processing. It can't block on any cluster locks

395

* during blocked lock processing. It can't block on any cluster locks

396

* to during block mapping. It's relying on the fact that the block

396

* to during block mapping. It's relying on the fact that the block

397

* mapping can't have disappeared under the dirty pages that it is

397

* mapping can't have disappeared under the dirty pages that it is

398

* being asked to write back.

398

* being asked to write back.

399

*/

399

*/

400

static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)

400

static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)

401

{

401

{

402

int ret;

402

int ret;

403

404

mlog_entry("(0x%p)\n", page);

404

mlog_entry("(0x%p)\n", page);

405

406

ret = block_write_full_page(page, ocfs2_get_block, wbc);

406

ret = block_write_full_page(page, ocfs2_get_block, wbc);

407

408

mlog_exit(ret);

408

mlog_exit(ret);

409

410

return ret;

410

return ret;

411

}

411

}

412

413

/*

413

/*

414

* This is called from ocfs2_write_zero_page() which has handled it's

414

* This is called from ocfs2_write_zero_page() which has handled it's

415

* own cluster locking and has ensured allocation exists for those

415

* own cluster locking and has ensured allocation exists for those

416

* blocks to be written.

416

* blocks to be written.

417

*/

417

*/

418

int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,

418

int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,

419

unsigned from, unsigned to)

419

unsigned from, unsigned to)

420

{

420

{

421

int ret;

421

int ret;

422

423

ret = block_prepare_write(page, from, to, ocfs2_get_block);

423

ret = block_prepare_write(page, from, to, ocfs2_get_block);

424

425

return ret;

425

return ret;

426

}

426

}

427

428

/* Taken from ext3. We don't necessarily need the full blown

428

/* Taken from ext3. We don't necessarily need the full blown

429

* functionality yet, but IMHO it's better to cut and paste the whole

429

* functionality yet, but IMHO it's better to cut and paste the whole

430

* thing so we can avoid introducing our own bugs (and easily pick up

430

* thing so we can avoid introducing our own bugs (and easily pick up

431

* their fixes when they happen) --Mark */

431

* their fixes when they happen) --Mark */

432

int walk_page_buffers( handle_t *handle,

432

int walk_page_buffers( handle_t *handle,

433

struct buffer_head *head,

433

struct buffer_head *head,

434

unsigned from,

434

unsigned from,

435

unsigned to,

435

unsigned to,

436

int *partial,

436

int *partial,

437

int (*fn)( handle_t *handle,

437

int (*fn)( handle_t *handle,

438

struct buffer_head *bh))

438

struct buffer_head *bh))

439

{

439

{

440

struct buffer_head *bh;

440

struct buffer_head *bh;

441

unsigned block_start, block_end;

441

unsigned block_start, block_end;

442

unsigned blocksize = head->b_size;

442

unsigned blocksize = head->b_size;

443

int err, ret = 0;

443

int err, ret = 0;

444

struct buffer_head *next;

444

struct buffer_head *next;

445

446

for ( bh = head, block_start = 0;

446

for ( bh = head, block_start = 0;

447

ret == 0 && (bh != head || !block_start);

447

ret == 0 && (bh != head || !block_start);

448

block_start = block_end, bh = next)

448

block_start = block_end, bh = next)

449

{

449

{

450

next = bh->b_this_page;

450

next = bh->b_this_page;

451

block_end = block_start + blocksize;

451

block_end = block_start + blocksize;

452

if (block_end <= from || block_start >= to) {

452

if (block_end <= from || block_start >= to) {

453

if (partial && !buffer_uptodate(bh))

453

if (partial && !buffer_uptodate(bh))

454

*partial = 1;

454

*partial = 1;

455

continue;

455

continue;

456

}

456

}

457

err = (*fn)(handle, bh);

457

err = (*fn)(handle, bh);

458

if (!ret)

458

if (!ret)

459

ret = err;

459

ret = err;

460

}

460

}

461

return ret;

461

return ret;

462

}

462

}

463

464

handle_t *ocfs2_start_walk_page_trans(struct inode *inode,

464

handle_t *ocfs2_start_walk_page_trans(struct inode *inode,

465

struct page *page,

465

struct page *page,

466

unsigned from,

466

unsigned from,

467

unsigned to)

467

unsigned to)

468

{

468

{

469

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

469

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

470

handle_t *handle = NULL;

470

handle_t *handle;

471

int ret = 0;

471

int ret = 0;

472

473

handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);

473

handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);

474

if (!handle) {

474

if (IS_ERR(handle)) {

475

ret = -ENOMEM;

475

ret = -ENOMEM;

476

mlog_errno(ret);

476

mlog_errno(ret);

477

goto out;

477

goto out;

478

}

478

}

479

480

if (ocfs2_should_order_data(inode)) {

480

if (ocfs2_should_order_data(inode)) {

481

ret = walk_page_buffers(handle,

481

ret = walk_page_buffers(handle,

482

page_buffers(page),

482

page_buffers(page),

483

from, to, NULL,

483

from, to, NULL,

484

ocfs2_journal_dirty_data);

484

ocfs2_journal_dirty_data);

485

if (ret < 0)

485

if (ret < 0)

486

mlog_errno(ret);

486

mlog_errno(ret);

487

}

487

}

488

out:

488

out:

489

if (ret) {

489

if (ret) {

490

if (handle)

490

if (!IS_ERR(handle))

491

ocfs2_commit_trans(osb, handle);

491

ocfs2_commit_trans(osb, handle);

492

handle = ERR_PTR(ret);

492

handle = ERR_PTR(ret);

493

}

493

}

494

return handle;

494

return handle;

495

}

495

}

496

497

static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)

497

static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)

498

{

498

{

499

sector_t status;

499

sector_t status;

500

u64 p_blkno = 0;

500

u64 p_blkno = 0;

501

int err = 0;

501

int err = 0;

502

struct inode *inode = mapping->host;

502

struct inode *inode = mapping->host;

503

504

mlog_entry("(block = %llu)\n", (unsigned long long)block);

504

mlog_entry("(block = %llu)\n", (unsigned long long)block);

505

506

/* We don't need to lock journal system files, since they aren't

506

/* We don't need to lock journal system files, since they aren't

507

* accessed concurrently from multiple nodes.

507

* accessed concurrently from multiple nodes.

508

*/

508

*/

509

if (!INODE_JOURNAL(inode)) {

509

if (!INODE_JOURNAL(inode)) {

510

err = ocfs2_inode_lock(inode, NULL, 0);

510

err = ocfs2_inode_lock(inode, NULL, 0);

511

if (err) {

511

if (err) {

512

if (err != -ENOENT)

512

if (err != -ENOENT)

513

mlog_errno(err);

513

mlog_errno(err);

514

goto bail;

514

goto bail;

515

}

515

}

516

down_read(&OCFS2_I(inode)->ip_alloc_sem);

516

down_read(&OCFS2_I(inode)->ip_alloc_sem);

517

}

517

}

518

519

if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))

519

if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))

520

err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,

520

err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,

521

NULL);

521

NULL);

522

523

if (!INODE_JOURNAL(inode)) {

523

if (!INODE_JOURNAL(inode)) {

524

up_read(&OCFS2_I(inode)->ip_alloc_sem);

524

up_read(&OCFS2_I(inode)->ip_alloc_sem);

525

ocfs2_inode_unlock(inode, 0);

525

ocfs2_inode_unlock(inode, 0);

526

}

526

}

527

528

if (err) {

528

if (err) {

529

mlog(ML_ERROR, "get_blocks() failed, block = %llu\n",

529

mlog(ML_ERROR, "get_blocks() failed, block = %llu\n",

530

(unsigned long long)block);

530

(unsigned long long)block);

531

mlog_errno(err);

531

mlog_errno(err);

532

goto bail;

532

goto bail;

533

}

533

}

534

535

bail:

535

bail:

536

status = err ? 0 : p_blkno;

536

status = err ? 0 : p_blkno;

537

538

mlog_exit((int)status);

538

mlog_exit((int)status);

539

540

return status;

540

return status;

541

}

541

}

542

543

/*

543

/*

544

* TODO: Make this into a generic get_blocks function.

544

* TODO: Make this into a generic get_blocks function.

545

*

545

*

546

* From do_direct_io in direct-io.c:

546

* From do_direct_io in direct-io.c:

547

* "So what we do is to permit the ->get_blocks function to populate

547

* "So what we do is to permit the ->get_blocks function to populate

548

* bh.b_size with the size of IO which is permitted at this offset and

548

* bh.b_size with the size of IO which is permitted at this offset and

549

* this i_blkbits."

549

* this i_blkbits."

550

*

550

*

551

* This function is called directly from get_more_blocks in direct-io.c.

551

* This function is called directly from get_more_blocks in direct-io.c.

552

*

552

*

553

* called like this: dio->get_blocks(dio->inode, fs_startblk,

553

* called like this: dio->get_blocks(dio->inode, fs_startblk,

554

* fs_count, map_bh, dio->rw == WRITE);

554

* fs_count, map_bh, dio->rw == WRITE);

555

*/

555

*/

556

static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,

556

static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,

557

struct buffer_head *bh_result, int create)

557

struct buffer_head *bh_result, int create)

558

{

558

{

559

int ret;

559

int ret;

560

u64 p_blkno, inode_blocks, contig_blocks;

560

u64 p_blkno, inode_blocks, contig_blocks;

561

unsigned int ext_flags;

561

unsigned int ext_flags;

562

unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;

562

unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;

563

unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;

563

unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;

564

565

/* This function won't even be called if the request isn't all

565

/* This function won't even be called if the request isn't all

566

* nicely aligned and of the right size, so there's no need

566

* nicely aligned and of the right size, so there's no need

567

* for us to check any of that. */

567

* for us to check any of that. */

568

569

inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));

569

inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));

570

571

/*

571

/*

572

* Any write past EOF is not allowed because we'd be extending.

572

* Any write past EOF is not allowed because we'd be extending.

573

*/

573

*/

574

if (create && (iblock + max_blocks) > inode_blocks) {

574

if (create && (iblock + max_blocks) > inode_blocks) {

575

ret = -EIO;

575

ret = -EIO;

576

goto bail;

576

goto bail;

577

}

577

}

578

579

/* This figures out the size of the next contiguous block, and

579

/* This figures out the size of the next contiguous block, and

580

* our logical offset */

580

* our logical offset */

581

ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,

581

ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,

582

&contig_blocks, &ext_flags);

582

&contig_blocks, &ext_flags);

583

if (ret) {

583

if (ret) {

584

mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",

584

mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",

585

(unsigned long long)iblock);

585

(unsigned long long)iblock);

586

ret = -EIO;

586

ret = -EIO;

587

goto bail;

587

goto bail;

588

}

588

}

589

590

if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno) {

590

if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno) {

591

ocfs2_error(inode->i_sb,

591

ocfs2_error(inode->i_sb,

592

"Inode %llu has a hole at block %llu\n",

592

"Inode %llu has a hole at block %llu\n",

593

(unsigned long long)OCFS2_I(inode)->ip_blkno,

593

(unsigned long long)OCFS2_I(inode)->ip_blkno,

594

(unsigned long long)iblock);

594

(unsigned long long)iblock);

595

ret = -EROFS;

595

ret = -EROFS;

596

goto bail;

596

goto bail;

597

}

597

}

598

599

/*

599

/*

600

* get_more_blocks() expects us to describe a hole by clearing

600

* get_more_blocks() expects us to describe a hole by clearing

601

* the mapped bit on bh_result().

601

* the mapped bit on bh_result().

602

*

602

*

603

* Consider an unwritten extent as a hole.

603

* Consider an unwritten extent as a hole.

604

*/

604

*/

605

if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))

605

if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))

606

map_bh(bh_result, inode->i_sb, p_blkno);

606

map_bh(bh_result, inode->i_sb, p_blkno);

607

else {

607

else {

608

/*

608

/*

609

* ocfs2_prepare_inode_for_write() should have caught

609

* ocfs2_prepare_inode_for_write() should have caught

610

* the case where we'd be filling a hole and triggered

610

* the case where we'd be filling a hole and triggered

611

* a buffered write instead.

611

* a buffered write instead.

612

*/

612

*/

613

if (create) {

613

if (create) {

614

ret = -EIO;

614

ret = -EIO;

615

mlog_errno(ret);

615

mlog_errno(ret);

616

goto bail;

616

goto bail;

617

}

617

}

618

619

clear_buffer_mapped(bh_result);

619

clear_buffer_mapped(bh_result);

620

}

620

}

621

622

/* make sure we don't map more than max_blocks blocks here as

622

/* make sure we don't map more than max_blocks blocks here as

623

that's all the kernel will handle at this point. */

623

that's all the kernel will handle at this point. */

624

if (max_blocks < contig_blocks)

624

if (max_blocks < contig_blocks)

625

contig_blocks = max_blocks;

625

contig_blocks = max_blocks;

626

bh_result->b_size = contig_blocks << blocksize_bits;

626

bh_result->b_size = contig_blocks << blocksize_bits;

627

bail:

627

bail:

628

return ret;

628

return ret;

629

}

629

}

630

631

/*

631

/*

632

* ocfs2_dio_end_io is called by the dio core when a dio is finished. We're

632

* ocfs2_dio_end_io is called by the dio core when a dio is finished. We're

633

* particularly interested in the aio/dio case. Like the core uses

633

* particularly interested in the aio/dio case. Like the core uses

634

* i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from

634

* i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from

635

* truncation on another.

635

* truncation on another.

636

*/

636

*/

637

static void ocfs2_dio_end_io(struct kiocb *iocb,

637

static void ocfs2_dio_end_io(struct kiocb *iocb,

638

loff_t offset,

638

loff_t offset,

639

ssize_t bytes,

639

ssize_t bytes,

640

void *private)

640

void *private)

641

{

641

{

642

struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;

642

struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;

643

int level;

643

int level;

644

645

/* this io's submitter should not have unlocked this before we could */

645

/* this io's submitter should not have unlocked this before we could */

646

BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));

646

BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));

647

648

ocfs2_iocb_clear_rw_locked(iocb);

648

ocfs2_iocb_clear_rw_locked(iocb);

649

650

level = ocfs2_iocb_rw_locked_level(iocb);

650

level = ocfs2_iocb_rw_locked_level(iocb);

651

if (!level)

651

if (!level)

652

up_read(&inode->i_alloc_sem);

652

up_read(&inode->i_alloc_sem);

653

ocfs2_rw_unlock(inode, level);

653

ocfs2_rw_unlock(inode, level);

654

}

654

}

655

656

/*

656

/*

657

* ocfs2_invalidatepage() and ocfs2_releasepage() are shamelessly stolen

657

* ocfs2_invalidatepage() and ocfs2_releasepage() are shamelessly stolen

658

* from ext3. PageChecked() bits have been removed as OCFS2 does not

658

* from ext3. PageChecked() bits have been removed as OCFS2 does not

659

* do journalled data.

659

* do journalled data.

660

*/

660

*/

661

static void ocfs2_invalidatepage(struct page *page, unsigned long offset)

661

static void ocfs2_invalidatepage(struct page *page, unsigned long offset)

662

{

662

{

663

journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;

663

journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;

664

665

journal_invalidatepage(journal, page, offset);

665

journal_invalidatepage(journal, page, offset);

666

}

666

}

667

668

static int ocfs2_releasepage(struct page *page, gfp_t wait)

668

static int ocfs2_releasepage(struct page *page, gfp_t wait)

669

{

669

{

670

journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;

670

journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;

671

672

if (!page_has_buffers(page))

672

if (!page_has_buffers(page))

673

return 0;

673

return 0;

674

return journal_try_to_free_buffers(journal, page, wait);

674

return journal_try_to_free_buffers(journal, page, wait);

675

}

675

}

676

677

static ssize_t ocfs2_direct_IO(int rw,

677

static ssize_t ocfs2_direct_IO(int rw,

678

struct kiocb *iocb,

678

struct kiocb *iocb,

679

const struct iovec *iov,

679

const struct iovec *iov,

680

loff_t offset,

680

loff_t offset,

681

unsigned long nr_segs)

681

unsigned long nr_segs)

682

{

682

{

683

struct file *file = iocb->ki_filp;

683

struct file *file = iocb->ki_filp;

684

struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;

684

struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;

685

int ret;

685

int ret;

686

687

mlog_entry_void();

687

mlog_entry_void();

688

689

/*

689

/*

690

* Fallback to buffered I/O if we see an inode without

690

* Fallback to buffered I/O if we see an inode without

691

* extents.

691

* extents.

692

*/

692

*/

693

if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)

693

if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)

694

return 0;

694

return 0;

695

696

ret = blockdev_direct_IO_no_locking(rw, iocb, inode,

696

ret = blockdev_direct_IO_no_locking(rw, iocb, inode,

697

inode->i_sb->s_bdev, iov, offset,

697

inode->i_sb->s_bdev, iov, offset,

698

nr_segs,

698

nr_segs,

699

ocfs2_direct_IO_get_blocks,

699

ocfs2_direct_IO_get_blocks,

700

ocfs2_dio_end_io);

700

ocfs2_dio_end_io);

701

702

mlog_exit(ret);

702

mlog_exit(ret);

703

return ret;

703

return ret;

704

}

704

}

705

706

static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,

706

static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,

707

u32 cpos,

707

u32 cpos,

708

unsigned int *start,

708

unsigned int *start,

709

unsigned int *end)

709

unsigned int *end)

710

{

710

{

711

unsigned int cluster_start = 0, cluster_end = PAGE_CACHE_SIZE;

711

unsigned int cluster_start = 0, cluster_end = PAGE_CACHE_SIZE;

712

713

if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) {

713

if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) {

714

unsigned int cpp;

714

unsigned int cpp;

715

716

cpp = 1 << (PAGE_CACHE_SHIFT - osb->s_clustersize_bits);

716

cpp = 1 << (PAGE_CACHE_SHIFT - osb->s_clustersize_bits);

717

718

cluster_start = cpos % cpp;

718

cluster_start = cpos % cpp;

719

cluster_start = cluster_start << osb->s_clustersize_bits;

719

cluster_start = cluster_start << osb->s_clustersize_bits;

720

721

cluster_end = cluster_start + osb->s_clustersize;

721

cluster_end = cluster_start + osb->s_clustersize;

722

}

722

}

723

724

BUG_ON(cluster_start > PAGE_SIZE);

724

BUG_ON(cluster_start > PAGE_SIZE);

725

BUG_ON(cluster_end > PAGE_SIZE);

725

BUG_ON(cluster_end > PAGE_SIZE);

726

727

if (start)

727

if (start)

728

*start = cluster_start;

728

*start = cluster_start;

729

if (end)

729

if (end)

730

*end = cluster_end;

730

*end = cluster_end;

731

}

731

}

732

733

/*

733

/*

734

* 'from' and 'to' are the region in the page to avoid zeroing.

734

* 'from' and 'to' are the region in the page to avoid zeroing.

735

*

735

*

736

* If pagesize > clustersize, this function will avoid zeroing outside

736

* If pagesize > clustersize, this function will avoid zeroing outside

737

* of the cluster boundary.

737

* of the cluster boundary.

738

*

738

*

739

* from == to == 0 is code for "zero the entire cluster region"

739

* from == to == 0 is code for "zero the entire cluster region"

740

*/

740

*/

741

static void ocfs2_clear_page_regions(struct page *page,

741

static void ocfs2_clear_page_regions(struct page *page,

742

struct ocfs2_super *osb, u32 cpos,

742

struct ocfs2_super *osb, u32 cpos,

743

unsigned from, unsigned to)

743

unsigned from, unsigned to)

744

{

744

{

745

void *kaddr;

745

void *kaddr;

746

unsigned int cluster_start, cluster_end;

746

unsigned int cluster_start, cluster_end;

747

748

ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end);

748

ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end);

749

750

kaddr = kmap_atomic(page, KM_USER0);

750

kaddr = kmap_atomic(page, KM_USER0);

751

752

if (from || to) {

752

if (from || to) {

753

if (from > cluster_start)

753

if (from > cluster_start)

754

memset(kaddr + cluster_start, 0, from - cluster_start);

754

memset(kaddr + cluster_start, 0, from - cluster_start);

755

if (to < cluster_end)

755

if (to < cluster_end)

756

memset(kaddr + to, 0, cluster_end - to);

756

memset(kaddr + to, 0, cluster_end - to);

757

} else {

757

} else {

758

memset(kaddr + cluster_start, 0, cluster_end - cluster_start);

758

memset(kaddr + cluster_start, 0, cluster_end - cluster_start);

759

}

759

}

760

761

kunmap_atomic(kaddr, KM_USER0);

761

kunmap_atomic(kaddr, KM_USER0);

762

}

762

}

763

764

/*

764

/*

765

* Nonsparse file systems fully allocate before we get to the write

765

* Nonsparse file systems fully allocate before we get to the write

766

* code. This prevents ocfs2_write() from tagging the write as an

766

* code. This prevents ocfs2_write() from tagging the write as an

767

* allocating one, which means ocfs2_map_page_blocks() might try to

767

* allocating one, which means ocfs2_map_page_blocks() might try to

768

* read-in the blocks at the tail of our file. Avoid reading them by

768

* read-in the blocks at the tail of our file. Avoid reading them by

769

* testing i_size against each block offset.

769

* testing i_size against each block offset.

770

*/

770

*/

771

static int ocfs2_should_read_blk(struct inode *inode, struct page *page,

771

static int ocfs2_should_read_blk(struct inode *inode, struct page *page,

772

unsigned int block_start)

772

unsigned int block_start)

773

{

773

{

774

u64 offset = page_offset(page) + block_start;

774

u64 offset = page_offset(page) + block_start;

775

776

if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))

776

if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))

777

return 1;

777

return 1;

778

779

if (i_size_read(inode) > offset)

779

if (i_size_read(inode) > offset)

780

return 1;

780

return 1;

781

782

return 0;

782

return 0;

783

}

783

}

784

785

/*

785

/*

786

* Some of this taken from block_prepare_write(). We already have our

786

* Some of this taken from block_prepare_write(). We already have our

787

* mapping by now though, and the entire write will be allocating or

787

* mapping by now though, and the entire write will be allocating or

788

* it won't, so not much need to use BH_New.

788

* it won't, so not much need to use BH_New.

789

*

789

*

790

* This will also skip zeroing, which is handled externally.

790

* This will also skip zeroing, which is handled externally.

791

*/

791

*/

792

int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,

792

int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,

793

struct inode *inode, unsigned int from,

793

struct inode *inode, unsigned int from,

794

unsigned int to, int new)

794

unsigned int to, int new)

795

{

795

{

796

int ret = 0;

796

int ret = 0;

797

struct buffer_head *head, *bh, *wait[2], **wait_bh = wait;

797

struct buffer_head *head, *bh, *wait[2], **wait_bh = wait;

798

unsigned int block_end, block_start;

798

unsigned int block_end, block_start;

799

unsigned int bsize = 1 << inode->i_blkbits;

799

unsigned int bsize = 1 << inode->i_blkbits;

800

801

if (!page_has_buffers(page))

801

if (!page_has_buffers(page))

802

create_empty_buffers(page, bsize, 0);

802

create_empty_buffers(page, bsize, 0);

803

804

head = page_buffers(page);

804

head = page_buffers(page);

805

for (bh = head, block_start = 0; bh != head || !block_start;

805

for (bh = head, block_start = 0; bh != head || !block_start;

806

bh = bh->b_this_page, block_start += bsize) {

806

bh = bh->b_this_page, block_start += bsize) {

807

block_end = block_start + bsize;

807

block_end = block_start + bsize;

808

809

clear_buffer_new(bh);

809

clear_buffer_new(bh);

810

811

/*

811

/*

812

* Ignore blocks outside of our i/o range -

812

* Ignore blocks outside of our i/o range -

813

* they may belong to unallocated clusters.

813

* they may belong to unallocated clusters.

814

*/

814

*/

815

if (block_start >= to || block_end <= from) {

815

if (block_start >= to || block_end <= from) {

816

if (PageUptodate(page))

816

if (PageUptodate(page))

817

set_buffer_uptodate(bh);

817

set_buffer_uptodate(bh);

818

continue;

818

continue;

819

}

819

}

820

821

/*

821

/*

822

* For an allocating write with cluster size >= page

822

* For an allocating write with cluster size >= page

823

* size, we always write the entire page.

823

* size, we always write the entire page.

824

*/

824

*/

825

if (new)

825

if (new)

826

set_buffer_new(bh);

826

set_buffer_new(bh);

827

828

if (!buffer_mapped(bh)) {

828

if (!buffer_mapped(bh)) {

829

map_bh(bh, inode->i_sb, *p_blkno);

829

map_bh(bh, inode->i_sb, *p_blkno);

830

unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);

830

unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);

831

}

831

}

832

833

if (PageUptodate(page)) {

833

if (PageUptodate(page)) {

834

if (!buffer_uptodate(bh))

834

if (!buffer_uptodate(bh))

835

set_buffer_uptodate(bh);

835

set_buffer_uptodate(bh);

836

} else if (!buffer_uptodate(bh) && !buffer_delay(bh) &&

836

} else if (!buffer_uptodate(bh) && !buffer_delay(bh) &&

837

!buffer_new(bh) &&

837

!buffer_new(bh) &&

838

ocfs2_should_read_blk(inode, page, block_start) &&

838

ocfs2_should_read_blk(inode, page, block_start) &&

839

(block_start < from || block_end > to)) {

839

(block_start < from || block_end > to)) {

840

ll_rw_block(READ, 1, &bh);

840

ll_rw_block(READ, 1, &bh);

841

*wait_bh++=bh;

841

*wait_bh++=bh;

842

}

842

}

843

844

*p_blkno = *p_blkno + 1;

844

*p_blkno = *p_blkno + 1;

845

}

845

}

846

847

/*

847

/*

848

* If we issued read requests - let them complete.

848

* If we issued read requests - let them complete.

849

*/

849

*/

850

while(wait_bh > wait) {

850

while(wait_bh > wait) {

851

wait_on_buffer(*--wait_bh);

851

wait_on_buffer(*--wait_bh);

852

if (!buffer_uptodate(*wait_bh))

852

if (!buffer_uptodate(*wait_bh))

853

ret = -EIO;

853

ret = -EIO;

854

}

854

}

855

856

if (ret == 0 || !new)

856

if (ret == 0 || !new)

857

return ret;

857

return ret;

858

859

/*

859

/*

860

* If we get -EIO above, zero out any newly allocated blocks

860

* If we get -EIO above, zero out any newly allocated blocks

861

* to avoid exposing stale data.

861

* to avoid exposing stale data.

862

*/

862

*/

863

bh = head;

863

bh = head;

864

block_start = 0;

864

block_start = 0;

865

do {

865

do {

866

block_end = block_start + bsize;

866

block_end = block_start + bsize;

867

if (block_end <= from)

867

if (block_end <= from)

868

goto next_bh;

868

goto next_bh;

869

if (block_start >= to)

869

if (block_start >= to)

870

break;

870

break;

871

872

zero_user(page, block_start, bh->b_size);

872

zero_user(page, block_start, bh->b_size);

873

set_buffer_uptodate(bh);

873

set_buffer_uptodate(bh);

874

mark_buffer_dirty(bh);

874

mark_buffer_dirty(bh);

875

876

next_bh:

876

next_bh:

877

block_start = block_end;

877

block_start = block_end;

878

bh = bh->b_this_page;

878

bh = bh->b_this_page;

879

} while (bh != head);

879

} while (bh != head);

880

881

return ret;

881

return ret;

882

}

882

}

883

884

#if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE)

884

#if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE)

885

#define OCFS2_MAX_CTXT_PAGES 1

885

#define OCFS2_MAX_CTXT_PAGES 1

886

#else

886

#else

887

#define OCFS2_MAX_CTXT_PAGES (OCFS2_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE)

887

#define OCFS2_MAX_CTXT_PAGES (OCFS2_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE)

888

#endif

888

#endif

889

890

#define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE)

890

#define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE)

891

892

/*

892

/*

893

* Describe the state of a single cluster to be written to.

893

* Describe the state of a single cluster to be written to.

894

*/

894

*/

895

struct ocfs2_write_cluster_desc {

895

struct ocfs2_write_cluster_desc {

896

u32 c_cpos;

896

u32 c_cpos;

897

u32 c_phys;

897

u32 c_phys;

898

/*

898

/*

899

* Give this a unique field because c_phys eventually gets

899

* Give this a unique field because c_phys eventually gets

900

* filled.

900

* filled.

901

*/

901

*/

902

unsigned c_new;

902

unsigned c_new;

903

unsigned c_unwritten;

903

unsigned c_unwritten;

904

};

904

};

905

906

static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)

906

static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)

907

{

907

{

908

return d->c_new || d->c_unwritten;

908

return d->c_new || d->c_unwritten;

909

}

909

}

910

911

struct ocfs2_write_ctxt {

911

struct ocfs2_write_ctxt {

912

/* Logical cluster position / len of write */

912

/* Logical cluster position / len of write */

913

u32 w_cpos;

913

u32 w_cpos;

914

u32 w_clen;

914

u32 w_clen;

915

916

struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];

916

struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];

917

918

/*

918

/*

919

* This is true if page_size > cluster_size.

919

* This is true if page_size > cluster_size.

920

*

920

*

921

* It triggers a set of special cases during write which might

921

* It triggers a set of special cases during write which might

922

* have to deal with allocating writes to partial pages.

922

* have to deal with allocating writes to partial pages.

923

*/

923

*/

924

unsigned int w_large_pages;

924

unsigned int w_large_pages;

925

926

/*

926

/*

927

* Pages involved in this write.

927

* Pages involved in this write.

928

*

928

*

929

* w_target_page is the page being written to by the user.

929

* w_target_page is the page being written to by the user.

930

*

930

*

931

* w_pages is an array of pages which always contains

931

* w_pages is an array of pages which always contains

932

* w_target_page, and in the case of an allocating write with

932

* w_target_page, and in the case of an allocating write with

933

* page_size < cluster size, it will contain zero'd and mapped

933

* page_size < cluster size, it will contain zero'd and mapped

934

* pages adjacent to w_target_page which need to be written

934

* pages adjacent to w_target_page which need to be written

935

* out in so that future reads from that region will get

935

* out in so that future reads from that region will get

936

* zero's.

936

* zero's.

937

*/

937

*/

938

struct page *w_pages[OCFS2_MAX_CTXT_PAGES];

938

struct page *w_pages[OCFS2_MAX_CTXT_PAGES];

939

unsigned int w_num_pages;

939

unsigned int w_num_pages;

940

struct page *w_target_page;

940

struct page *w_target_page;

941

942

/*

942

/*

943

* ocfs2_write_end() uses this to know what the real range to

943

* ocfs2_write_end() uses this to know what the real range to

944

* write in the target should be.

944

* write in the target should be.

945

*/

945

*/

946

unsigned int w_target_from;

946

unsigned int w_target_from;

947

unsigned int w_target_to;

947

unsigned int w_target_to;

948

949

/*

949

/*

950

* We could use journal_current_handle() but this is cleaner,

950

* We could use journal_current_handle() but this is cleaner,

951

* IMHO -Mark

951

* IMHO -Mark

952

*/

952

*/

953

handle_t *w_handle;

953

handle_t *w_handle;

954

955

struct buffer_head *w_di_bh;

955

struct buffer_head *w_di_bh;

956

957

struct ocfs2_cached_dealloc_ctxt w_dealloc;

957

struct ocfs2_cached_dealloc_ctxt w_dealloc;

958

};

958

};

959

960

void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)

960

void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)

961

{

961

{

962

int i;

962

int i;

963

964

for(i = 0; i < num_pages; i++) {

964

for(i = 0; i < num_pages; i++) {

965

if (pages[i]) {

965

if (pages[i]) {

966

unlock_page(pages[i]);

966

unlock_page(pages[i]);

967

mark_page_accessed(pages[i]);

967

mark_page_accessed(pages[i]);

968

page_cache_release(pages[i]);

968

page_cache_release(pages[i]);

969

}

969

}

970

}

970

}

971

}

971

}

972

973

static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)

973

static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)

974

{

974

{

975

ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);

975

ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);

976

977

brelse(wc->w_di_bh);

977

brelse(wc->w_di_bh);

978

kfree(wc);

978

kfree(wc);

979

}

979

}

980

981

static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,

981

static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,

982

struct ocfs2_super *osb, loff_t pos,

982

struct ocfs2_super *osb, loff_t pos,

983

unsigned len, struct buffer_head *di_bh)

983

unsigned len, struct buffer_head *di_bh)

984

{

984

{

985

u32 cend;

985

u32 cend;

986

struct ocfs2_write_ctxt *wc;

986

struct ocfs2_write_ctxt *wc;

987

988

wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS);

988

wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS);

989

if (!wc)

989

if (!wc)

990

return -ENOMEM;

990

return -ENOMEM;

991

992

wc->w_cpos = pos >> osb->s_clustersize_bits;

992

wc->w_cpos = pos >> osb->s_clustersize_bits;

993

cend = (pos + len - 1) >> osb->s_clustersize_bits;

993

cend = (pos + len - 1) >> osb->s_clustersize_bits;

994

wc->w_clen = cend - wc->w_cpos + 1;

994

wc->w_clen = cend - wc->w_cpos + 1;

995

get_bh(di_bh);

995

get_bh(di_bh);

996

wc->w_di_bh = di_bh;

996

wc->w_di_bh = di_bh;

997

998

if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits))

998

if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits))

999

wc->w_large_pages = 1;

999

wc->w_large_pages = 1;

1000

else

1000

else

1001

wc->w_large_pages = 0;

1001

wc->w_large_pages = 0;

1002

1003

ocfs2_init_dealloc_ctxt(&wc->w_dealloc);

1003

ocfs2_init_dealloc_ctxt(&wc->w_dealloc);

1004

1005

*wcp = wc;

1005

*wcp = wc;

1006

1007

return 0;

1007

return 0;

1008

}

1008

}

1009

1010

/*

1010

/*

1011

* If a page has any new buffers, zero them out here, and mark them uptodate

1011

* If a page has any new buffers, zero them out here, and mark them uptodate

1012

* and dirty so they'll be written out (in order to prevent uninitialised

1012

* and dirty so they'll be written out (in order to prevent uninitialised

1013

* block data from leaking). And clear the new bit.

1013

* block data from leaking). And clear the new bit.

1014

*/

1014

*/

1015

static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to)

1015

static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to)

1016

{

1016

{

1017

unsigned int block_start, block_end;

1017

unsigned int block_start, block_end;

1018

struct buffer_head *head, *bh;

1018

struct buffer_head *head, *bh;

1019

1020

BUG_ON(!PageLocked(page));

1020

BUG_ON(!PageLocked(page));

1021

if (!page_has_buffers(page))

1021

if (!page_has_buffers(page))

1022

return;

1022

return;

1023

1024

bh = head = page_buffers(page);

1024

bh = head = page_buffers(page);

1025

block_start = 0;

1025

block_start = 0;

1026

do {

1026

do {

1027

block_end = block_start + bh->b_size;

1027

block_end = block_start + bh->b_size;

1028

1029

if (buffer_new(bh)) {

1029

if (buffer_new(bh)) {

1030

if (block_end > from && block_start < to) {

1030

if (block_end > from && block_start < to) {

1031

if (!PageUptodate(page)) {

1031

if (!PageUptodate(page)) {

1032

unsigned start, end;

1032

unsigned start, end;

1033

1034

start = max(from, block_start);

1034

start = max(from, block_start);

1035

end = min(to, block_end);

1035

end = min(to, block_end);

1036

1037

zero_user_segment(page, start, end);

1037

zero_user_segment(page, start, end);

1038

set_buffer_uptodate(bh);

1038

set_buffer_uptodate(bh);

1039

}

1039

}

1040

1041

clear_buffer_new(bh);

1041

clear_buffer_new(bh);

1042

mark_buffer_dirty(bh);

1042

mark_buffer_dirty(bh);

1043

}

1043

}

1044

}

1044

}

1045

1046

block_start = block_end;

1046

block_start = block_end;

1047

bh = bh->b_this_page;

1047

bh = bh->b_this_page;

1048

} while (bh != head);

1048

} while (bh != head);

1049

}

1049

}

1050

1051

/*

1051

/*

1052

* Only called when we have a failure during allocating write to write

1052

* Only called when we have a failure during allocating write to write

1053

* zero's to the newly allocated region.

1053

* zero's to the newly allocated region.

1054

*/

1054

*/

1055

static void ocfs2_write_failure(struct inode *inode,

1055

static void ocfs2_write_failure(struct inode *inode,

1056

struct ocfs2_write_ctxt *wc,

1056

struct ocfs2_write_ctxt *wc,

1057

loff_t user_pos, unsigned user_len)

1057

loff_t user_pos, unsigned user_len)

1058

{

1058

{

1059

int i;

1059

int i;

1060

unsigned from = user_pos & (PAGE_CACHE_SIZE - 1),

1060

unsigned from = user_pos & (PAGE_CACHE_SIZE - 1),

1061

to = user_pos + user_len;

1061

to = user_pos + user_len;

1062

struct page *tmppage;

1062

struct page *tmppage;

1063

1064

ocfs2_zero_new_buffers(wc->w_target_page, from, to);

1064

ocfs2_zero_new_buffers(wc->w_target_page, from, to);

1065

1066

for(i = 0; i < wc->w_num_pages; i++) {

1066

for(i = 0; i < wc->w_num_pages; i++) {

1067

tmppage = wc->w_pages[i];

1067

tmppage = wc->w_pages[i];

1068

1069

if (ocfs2_should_order_data(inode))

1069

if (ocfs2_should_order_data(inode))

1070

walk_page_buffers(wc->w_handle, page_buffers(tmppage),

1070

walk_page_buffers(wc->w_handle, page_buffers(tmppage),

1071

from, to, NULL,

1071

from, to, NULL,

1072

ocfs2_journal_dirty_data);

1072

ocfs2_journal_dirty_data);

1073

1074

block_commit_write(tmppage, from, to);

1074

block_commit_write(tmppage, from, to);

1075

}

1075

}

1076

}

1076

}

1077

1078

static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,

1078

static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,

1079

struct ocfs2_write_ctxt *wc,

1079

struct ocfs2_write_ctxt *wc,

1080

struct page *page, u32 cpos,

1080

struct page *page, u32 cpos,

1081

loff_t user_pos, unsigned user_len,

1081

loff_t user_pos, unsigned user_len,

1082

int new)

1082

int new)

1083

{

1083

{

1084

int ret;

1084

int ret;

1085

unsigned int map_from = 0, map_to = 0;

1085

unsigned int map_from = 0, map_to = 0;

1086

unsigned int cluster_start, cluster_end;

1086

unsigned int cluster_start, cluster_end;

1087

unsigned int user_data_from = 0, user_data_to = 0;

1087

unsigned int user_data_from = 0, user_data_to = 0;

1088

1089

ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos,

1089

ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos,

1090

&cluster_start, &cluster_end);

1090

&cluster_start, &cluster_end);

1091

1092

if (page == wc->w_target_page) {

1092

if (page == wc->w_target_page) {

1093

map_from = user_pos & (PAGE_CACHE_SIZE - 1);

1093

map_from = user_pos & (PAGE_CACHE_SIZE - 1);

1094

map_to = map_from + user_len;

1094

map_to = map_from + user_len;

1095

1096

if (new)

1096

if (new)

1097

ret = ocfs2_map_page_blocks(page, p_blkno, inode,

1097

ret = ocfs2_map_page_blocks(page, p_blkno, inode,

1098

cluster_start, cluster_end,

1098

cluster_start, cluster_end,

1099

new);

1099

new);

1100

else

1100

else

1101

ret = ocfs2_map_page_blocks(page, p_blkno, inode,

1101

ret = ocfs2_map_page_blocks(page, p_blkno, inode,

1102

map_from, map_to, new);

1102

map_from, map_to, new);

1103

if (ret) {

1103

if (ret) {

1104

mlog_errno(ret);

1104

mlog_errno(ret);

1105

goto out;

1105

goto out;

1106

}

1106

}

1107

1108

user_data_from = map_from;

1108

user_data_from = map_from;

1109

user_data_to = map_to;

1109

user_data_to = map_to;

1110

if (new) {

1110

if (new) {

1111

map_from = cluster_start;

1111

map_from = cluster_start;

1112

map_to = cluster_end;

1112

map_to = cluster_end;

1113

}

1113

}

1114

} else {

1114

} else {

1115

/*

1115

/*

1116

* If we haven't allocated the new page yet, we

1116

* If we haven't allocated the new page yet, we

1117

* shouldn't be writing it out without copying user

1117

* shouldn't be writing it out without copying user

1118

* data. This is likely a math error from the caller.

1118

* data. This is likely a math error from the caller.

1119

*/

1119

*/

1120

BUG_ON(!new);

1120

BUG_ON(!new);

1121

1122

map_from = cluster_start;

1122

map_from = cluster_start;

1123

map_to = cluster_end;

1123

map_to = cluster_end;

1124

1125

ret = ocfs2_map_page_blocks(page, p_blkno, inode,

1125

ret = ocfs2_map_page_blocks(page, p_blkno, inode,

1126

cluster_start, cluster_end, new);

1126

cluster_start, cluster_end, new);

1127

if (ret) {

1127

if (ret) {

1128

mlog_errno(ret);

1128

mlog_errno(ret);

1129

goto out;

1129

goto out;

1130

}

1130

}

1131

}

1131

}

1132

1133

/*

1133

/*

1134

* Parts of newly allocated pages need to be zero'd.

1134

* Parts of newly allocated pages need to be zero'd.

1135

*

1135

*

1136

* Above, we have also rewritten 'to' and 'from' - as far as

1136

* Above, we have also rewritten 'to' and 'from' - as far as

1137

* the rest of the function is concerned, the entire cluster

1137

* the rest of the function is concerned, the entire cluster

1138

* range inside of a page needs to be written.

1138

* range inside of a page needs to be written.

1139

*

1139

*

1140

* We can skip this if the page is up to date - it's already

1140

* We can skip this if the page is up to date - it's already

1141

* been zero'd from being read in as a hole.

1141

* been zero'd from being read in as a hole.

1142

*/

1142

*/

1143

if (new && !PageUptodate(page))

1143

if (new && !PageUptodate(page))

1144

ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb),

1144

ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb),

1145

cpos, user_data_from, user_data_to);

1145

cpos, user_data_from, user_data_to);

1146

1147

flush_dcache_page(page);

1147

flush_dcache_page(page);

1148

1149

out:

1149

out:

1150

return ret;

1150

return ret;

1151

}

1151

}

1152

1153

/*

1153

/*

1154

* This function will only grab one clusters worth of pages.

1154

* This function will only grab one clusters worth of pages.

1155

*/

1155

*/

1156

static int ocfs2_grab_pages_for_write(struct address_space *mapping,

1156

static int ocfs2_grab_pages_for_write(struct address_space *mapping,

1157

struct ocfs2_write_ctxt *wc,

1157

struct ocfs2_write_ctxt *wc,

1158

u32 cpos, loff_t user_pos, int new,

1158

u32 cpos, loff_t user_pos, int new,

1159

struct page *mmap_page)

1159

struct page *mmap_page)

1160

{

1160

{

1161

int ret = 0, i;

1161

int ret = 0, i;

1162

unsigned long start, target_index, index;

1162

unsigned long start, target_index, index;

1163

struct inode *inode = mapping->host;

1163

struct inode *inode = mapping->host;

1164

1165

target_index = user_pos >> PAGE_CACHE_SHIFT;

1165

target_index = user_pos >> PAGE_CACHE_SHIFT;

1166

1167

/*

1167

/*

1168

* Figure out how many pages we'll be manipulating here. For

1168

* Figure out how many pages we'll be manipulating here. For

1169

* non allocating write, we just change the one

1169

* non allocating write, we just change the one

1170

* page. Otherwise, we'll need a whole clusters worth.

1170

* page. Otherwise, we'll need a whole clusters worth.

1171

*/

1171

*/

1172

if (new) {

1172

if (new) {

1173

wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);

1173

wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);

1174

start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);

1174

start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);

1175

} else {

1175

} else {

1176

wc->w_num_pages = 1;

1176

wc->w_num_pages = 1;

1177

start = target_index;

1177

start = target_index;

1178

}

1178

}

1179

1180

for(i = 0; i < wc->w_num_pages; i++) {

1180

for(i = 0; i < wc->w_num_pages; i++) {

1181

index = start + i;

1181

index = start + i;

1182

1183

if (index == target_index && mmap_page) {

1183

if (index == target_index && mmap_page) {

1184

/*

1184

/*

1185

* ocfs2_pagemkwrite() is a little different

1185

* ocfs2_pagemkwrite() is a little different

1186

* and wants us to directly use the page

1186

* and wants us to directly use the page

1187

* passed in.

1187

* passed in.

1188

*/

1188

*/

1189

lock_page(mmap_page);

1189

lock_page(mmap_page);

1190

1191

if (mmap_page->mapping != mapping) {

1191

if (mmap_page->mapping != mapping) {

1192

unlock_page(mmap_page);

1192

unlock_page(mmap_page);

1193

/*

1193

/*

1194

* Sanity check - the locking in

1194

* Sanity check - the locking in

1195

* ocfs2_pagemkwrite() should ensure

1195

* ocfs2_pagemkwrite() should ensure

1196

* that this code doesn't trigger.

1196

* that this code doesn't trigger.

1197

*/

1197

*/

1198

ret = -EINVAL;

1198

ret = -EINVAL;

1199

mlog_errno(ret);

1199

mlog_errno(ret);

1200

goto out;

1200

goto out;

1201

}

1201

}

1202

1203

page_cache_get(mmap_page);

1203

page_cache_get(mmap_page);

1204

wc->w_pages[i] = mmap_page;

1204

wc->w_pages[i] = mmap_page;

1205

} else {

1205

} else {

1206

wc->w_pages[i] = find_or_create_page(mapping, index,

1206

wc->w_pages[i] = find_or_create_page(mapping, index,

1207

GFP_NOFS);

1207

GFP_NOFS);

1208

if (!wc->w_pages[i]) {

1208

if (!wc->w_pages[i]) {

1209

ret = -ENOMEM;

1209

ret = -ENOMEM;

1210

mlog_errno(ret);

1210

mlog_errno(ret);

1211

goto out;

1211

goto out;

1212

}

1212

}

1213

}

1213

}

1214

1215

if (index == target_index)

1215

if (index == target_index)

1216

wc->w_target_page = wc->w_pages[i];

1216

wc->w_target_page = wc->w_pages[i];

1217

}

1217

}

1218

out:

1218

out:

1219

return ret;

1219

return ret;

1220

}

1220

}

1221

1222

/*

1222

/*

1223

* Prepare a single cluster for write one cluster into the file.

1223

* Prepare a single cluster for write one cluster into the file.

1224

*/

1224

*/

1225

static int ocfs2_write_cluster(struct address_space *mapping,

1225

static int ocfs2_write_cluster(struct address_space *mapping,

1226

u32 phys, unsigned int unwritten,

1226

u32 phys, unsigned int unwritten,

1227

struct ocfs2_alloc_context *data_ac,

1227

struct ocfs2_alloc_context *data_ac,

1228

struct ocfs2_alloc_context *meta_ac,

1228

struct ocfs2_alloc_context *meta_ac,

1229

struct ocfs2_write_ctxt *wc, u32 cpos,

1229

struct ocfs2_write_ctxt *wc, u32 cpos,

1230

loff_t user_pos, unsigned user_len)

1230

loff_t user_pos, unsigned user_len)

1231

{

1231

{

1232

int ret, i, new, should_zero = 0;

1232

int ret, i, new, should_zero = 0;

1233

u64 v_blkno, p_blkno;

1233

u64 v_blkno, p_blkno;

1234

struct inode *inode = mapping->host;

1234

struct inode *inode = mapping->host;

1235

1236

new = phys == 0 ? 1 : 0;

1236

new = phys == 0 ? 1 : 0;

1237

if (new || unwritten)

1237

if (new || unwritten)

1238

should_zero = 1;

1238

should_zero = 1;

1239

1240

if (new) {

1240

if (new) {

1241

u32 tmp_pos;

1241

u32 tmp_pos;

1242

1243

/*

1243

/*

1244

* This is safe to call with the page locks - it won't take

1244

* This is safe to call with the page locks - it won't take

1245

* any additional semaphores or cluster locks.

1245

* any additional semaphores or cluster locks.

1246

*/

1246

*/

1247

tmp_pos = cpos;

1247

tmp_pos = cpos;

1248

ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode,

1248

ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode,

1249

&tmp_pos, 1, 0, wc->w_di_bh,

1249

&tmp_pos, 1, 0, wc->w_di_bh,

1250

wc->w_handle, data_ac,

1250

wc->w_handle, data_ac,

1251

meta_ac, NULL);

1251

meta_ac, NULL);

1252

/*

1252

/*

1253

* This shouldn't happen because we must have already

1253

* This shouldn't happen because we must have already

1254

* calculated the correct meta data allocation required. The

1254

* calculated the correct meta data allocation required. The

1255

* internal tree allocation code should know how to increase

1255

* internal tree allocation code should know how to increase

1256

* transaction credits itself.

1256

* transaction credits itself.

1257

*

1257

*

1258

* If need be, we could handle -EAGAIN for a

1258

* If need be, we could handle -EAGAIN for a

1259

* RESTART_TRANS here.

1259

* RESTART_TRANS here.

1260

*/

1260

*/

1261

mlog_bug_on_msg(ret == -EAGAIN,

1261

mlog_bug_on_msg(ret == -EAGAIN,

1262

"Inode %llu: EAGAIN return during allocation.\n",

1262

"Inode %llu: EAGAIN return during allocation.\n",

1263

(unsigned long long)OCFS2_I(inode)->ip_blkno);

1263

(unsigned long long)OCFS2_I(inode)->ip_blkno);

1264

if (ret < 0) {

1264

if (ret < 0) {

1265

mlog_errno(ret);

1265

mlog_errno(ret);

1266

goto out;

1266

goto out;

1267

}

1267

}

1268

} else if (unwritten) {

1268

} else if (unwritten) {

1269

ret = ocfs2_mark_extent_written(inode, wc->w_di_bh,

1269

ret = ocfs2_mark_extent_written(inode, wc->w_di_bh,

1270

wc->w_handle, cpos, 1, phys,

1270

wc->w_handle, cpos, 1, phys,

1271

meta_ac, &wc->w_dealloc);

1271

meta_ac, &wc->w_dealloc);

1272

if (ret < 0) {

1272

if (ret < 0) {

1273

mlog_errno(ret);

1273

mlog_errno(ret);

1274

goto out;

1274

goto out;

1275

}

1275

}

1276

}

1276

}

1277

1278

if (should_zero)

1278

if (should_zero)

1279

v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos);

1279

v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos);

1280

else

1280

else

1281

v_blkno = user_pos >> inode->i_sb->s_blocksize_bits;

1281

v_blkno = user_pos >> inode->i_sb->s_blocksize_bits;

1282

1283

/*

1283

/*

1284

* The only reason this should fail is due to an inability to

1284

* The only reason this should fail is due to an inability to

1285

* find the extent added.

1285

* find the extent added.

1286

*/

1286

*/

1287

ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,

1287

ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,

1288

NULL);

1288

NULL);

1289

if (ret < 0) {

1289

if (ret < 0) {

1290

ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, "

1290

ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, "

1291

"at logical block %llu",

1291

"at logical block %llu",

1292

(unsigned long long)OCFS2_I(inode)->ip_blkno,

1292

(unsigned long long)OCFS2_I(inode)->ip_blkno,

1293

(unsigned long long)v_blkno);

1293

(unsigned long long)v_blkno);

1294

goto out;

1294

goto out;

1295

}

1295

}

1296

1297

BUG_ON(p_blkno == 0);

1297

BUG_ON(p_blkno == 0);

1298

1299

for(i = 0; i < wc->w_num_pages; i++) {

1299

for(i = 0; i < wc->w_num_pages; i++) {

1300

int tmpret;

1300

int tmpret;

1301

1302

tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc,

1302

tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc,

1303

wc->w_pages[i], cpos,

1303

wc->w_pages[i], cpos,

1304

user_pos, user_len,

1304

user_pos, user_len,

1305

should_zero);

1305

should_zero);

1306

if (tmpret) {

1306

if (tmpret) {

1307

mlog_errno(tmpret);

1307

mlog_errno(tmpret);

1308

if (ret == 0)

1308

if (ret == 0)

1309

tmpret = ret;

1309

tmpret = ret;

1310

}

1310

}

1311

}

1311

}

1312

1313

/*

1313

/*

1314

* We only have cleanup to do in case of allocating write.

1314

* We only have cleanup to do in case of allocating write.

1315

*/

1315

*/

1316

if (ret && new)

1316

if (ret && new)

1317

ocfs2_write_failure(inode, wc, user_pos, user_len);

1317

ocfs2_write_failure(inode, wc, user_pos, user_len);

1318

1319

out:

1319

out:

1320

1321

return ret;

1321

return ret;

1322

}

1322

}

1323

1324

static int ocfs2_write_cluster_by_desc(struct address_space *mapping,

1324

static int ocfs2_write_cluster_by_desc(struct address_space *mapping,

1325

struct ocfs2_alloc_context *data_ac,

1325

struct ocfs2_alloc_context *data_ac,

1326

struct ocfs2_alloc_context *meta_ac,

1326

struct ocfs2_alloc_context *meta_ac,

1327

struct ocfs2_write_ctxt *wc,

1327

struct ocfs2_write_ctxt *wc,

1328

loff_t pos, unsigned len)

1328

loff_t pos, unsigned len)

1329

{

1329

{

1330

int ret, i;

1330

int ret, i;

1331

loff_t cluster_off;

1331

loff_t cluster_off;

1332

unsigned int local_len = len;

1332

unsigned int local_len = len;

1333

struct ocfs2_write_cluster_desc *desc;

1333

struct ocfs2_write_cluster_desc *desc;

1334

struct ocfs2_super *osb = OCFS2_SB(mapping->host->i_sb);

1334

struct ocfs2_super *osb = OCFS2_SB(mapping->host->i_sb);

1335

1336

for (i = 0; i < wc->w_clen; i++) {

1336

for (i = 0; i < wc->w_clen; i++) {

1337

desc = &wc->w_desc[i];

1337

desc = &wc->w_desc[i];

1338

1339

/*

1339

/*

1340

* We have to make sure that the total write passed in

1340

* We have to make sure that the total write passed in

1341

* doesn't extend past a single cluster.

1341

* doesn't extend past a single cluster.

1342

*/

1342

*/

1343

local_len = len;

1343

local_len = len;

1344

cluster_off = pos & (osb->s_clustersize - 1);

1344

cluster_off = pos & (osb->s_clustersize - 1);

1345

if ((cluster_off + local_len) > osb->s_clustersize)

1345

if ((cluster_off + local_len) > osb->s_clustersize)

1346

local_len = osb->s_clustersize - cluster_off;

1346

local_len = osb->s_clustersize - cluster_off;

1347

1348

ret = ocfs2_write_cluster(mapping, desc->c_phys,

1348

ret = ocfs2_write_cluster(mapping, desc->c_phys,

1349

desc->c_unwritten, data_ac, meta_ac,

1349

desc->c_unwritten, data_ac, meta_ac,

1350

wc, desc->c_cpos, pos, local_len);

1350

wc, desc->c_cpos, pos, local_len);

1351

if (ret) {

1351

if (ret) {

1352

mlog_errno(ret);

1352

mlog_errno(ret);

1353

goto out;

1353

goto out;

1354

}

1354

}

1355

1356

len -= local_len;

1356

len -= local_len;

1357

pos += local_len;

1357

pos += local_len;

1358

}

1358

}

1359

1360

ret = 0;

1360

ret = 0;

1361

out:

1361

out:

1362

return ret;

1362

return ret;

1363

}

1363

}

1364

1365

/*

1365

/*

1366

* ocfs2_write_end() wants to know which parts of the target page it

1366

* ocfs2_write_end() wants to know which parts of the target page it

1367

* should complete the write on. It's easiest to compute them ahead of

1367

* should complete the write on. It's easiest to compute them ahead of

1368

* time when a more complete view of the write is available.

1368

* time when a more complete view of the write is available.

1369

*/

1369

*/

1370

static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,

1370

static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,

1371

struct ocfs2_write_ctxt *wc,

1371

struct ocfs2_write_ctxt *wc,

1372

loff_t pos, unsigned len, int alloc)

1372

loff_t pos, unsigned len, int alloc)

1373

{

1373

{

1374

struct ocfs2_write_cluster_desc *desc;

1374

struct ocfs2_write_cluster_desc *desc;

1375

1376

wc->w_target_from = pos & (PAGE_CACHE_SIZE - 1);

1376

wc->w_target_from = pos & (PAGE_CACHE_SIZE - 1);

1377

wc->w_target_to = wc->w_target_from + len;

1377

wc->w_target_to = wc->w_target_from + len;

1378

1379

if (alloc == 0)

1379

if (alloc == 0)

1380

return;

1380

return;

1381

1382

/*

1382

/*

1383

* Allocating write - we may have different boundaries based

1383

* Allocating write - we may have different boundaries based

1384

* on page size and cluster size.

1384

* on page size and cluster size.

1385

*

1385

*

1386

* NOTE: We can no longer compute one value from the other as

1386

* NOTE: We can no longer compute one value from the other as

1387

* the actual write length and user provided length may be

1387

* the actual write length and user provided length may be

1388

* different.

1388

* different.

1389

*/

1389

*/

1390

1391

if (wc->w_large_pages) {

1391

if (wc->w_large_pages) {

1392

/*

1392

/*

1393

* We only care about the 1st and last cluster within

1393

* We only care about the 1st and last cluster within

1394

* our range and whether they should be zero'd or not. Either

1394

* our range and whether they should be zero'd or not. Either

1395

* value may be extended out to the start/end of a

1395

* value may be extended out to the start/end of a

1396

* newly allocated cluster.

1396

* newly allocated cluster.

1397

*/

1397

*/

1398

desc = &wc->w_desc[0];

1398

desc = &wc->w_desc[0];

1399

if (ocfs2_should_zero_cluster(desc))

1399

if (ocfs2_should_zero_cluster(desc))

1400

ocfs2_figure_cluster_boundaries(osb,

1400

ocfs2_figure_cluster_boundaries(osb,

1401

desc->c_cpos,

1401

desc->c_cpos,

1402

&wc->w_target_from,

1402

&wc->w_target_from,

1403

NULL);

1403

NULL);

1404

1405

desc = &wc->w_desc[wc->w_clen - 1];

1405

desc = &wc->w_desc[wc->w_clen - 1];

1406

if (ocfs2_should_zero_cluster(desc))

1406

if (ocfs2_should_zero_cluster(desc))

1407

ocfs2_figure_cluster_boundaries(osb,

1407

ocfs2_figure_cluster_boundaries(osb,

1408

desc->c_cpos,

1408

desc->c_cpos,

1409

NULL,

1409

NULL,

1410

&wc->w_target_to);

1410

&wc->w_target_to);

1411

} else {

1411

} else {

1412

wc->w_target_from = 0;

1412

wc->w_target_from = 0;

1413

wc->w_target_to = PAGE_CACHE_SIZE;

1413

wc->w_target_to = PAGE_CACHE_SIZE;

1414

}

1414

}

1415

}

1415

}

1416

1417

/*

1417

/*

1418

* Populate each single-cluster write descriptor in the write context

1418

* Populate each single-cluster write descriptor in the write context

1419

* with information about the i/o to be done.

1419

* with information about the i/o to be done.

1420

*

1420

*

1421

* Returns the number of clusters that will have to be allocated, as

1421

* Returns the number of clusters that will have to be allocated, as

1422

* well as a worst case estimate of the number of extent records that

1422

* well as a worst case estimate of the number of extent records that

1423

* would have to be created during a write to an unwritten region.

1423

* would have to be created during a write to an unwritten region.

1424

*/

1424

*/

1425

static int ocfs2_populate_write_desc(struct inode *inode,

1425

static int ocfs2_populate_write_desc(struct inode *inode,

1426

struct ocfs2_write_ctxt *wc,

1426

struct ocfs2_write_ctxt *wc,

1427

unsigned int *clusters_to_alloc,

1427

unsigned int *clusters_to_alloc,

1428

unsigned int *extents_to_split)

1428

unsigned int *extents_to_split)

1429

{

1429

{

1430

int ret;

1430

int ret;

1431

struct ocfs2_write_cluster_desc *desc;

1431

struct ocfs2_write_cluster_desc *desc;

1432

unsigned int num_clusters = 0;

1432

unsigned int num_clusters = 0;

1433

unsigned int ext_flags = 0;

1433

unsigned int ext_flags = 0;

1434

u32 phys = 0;

1434

u32 phys = 0;

1435

int i;

1435

int i;

1436

1437

*clusters_to_alloc = 0;

1437

*clusters_to_alloc = 0;

1438

*extents_to_split = 0;

1438

*extents_to_split = 0;

1439

1440

for (i = 0; i < wc->w_clen; i++) {

1440

for (i = 0; i < wc->w_clen; i++) {

1441

desc = &wc->w_desc[i];

1441

desc = &wc->w_desc[i];

1442

desc->c_cpos = wc->w_cpos + i;

1442

desc->c_cpos = wc->w_cpos + i;

1443

1444

if (num_clusters == 0) {

1444

if (num_clusters == 0) {

1445

/*

1445

/*

1446

* Need to look up the next extent record.

1446

* Need to look up the next extent record.

1447

*/

1447

*/

1448

ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys,

1448

ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys,

1449

&num_clusters, &ext_flags);

1449

&num_clusters, &ext_flags);

1450

if (ret) {

1450

if (ret) {

1451

mlog_errno(ret);

1451

mlog_errno(ret);

1452

goto out;

1452

goto out;

1453

}

1453

}

1454

1455

/*

1455

/*

1456

* Assume worst case - that we're writing in

1456

* Assume worst case - that we're writing in

1457

* the middle of the extent.

1457

* the middle of the extent.

1458

*

1458

*

1459

* We can assume that the write proceeds from

1459

* We can assume that the write proceeds from

1460

* left to right, in which case the extent

1460

* left to right, in which case the extent

1461

* insert code is smart enough to coalesce the

1461

* insert code is smart enough to coalesce the

1462

* next splits into the previous records created.

1462

* next splits into the previous records created.

1463

*/

1463

*/

1464

if (ext_flags & OCFS2_EXT_UNWRITTEN)

1464

if (ext_flags & OCFS2_EXT_UNWRITTEN)

1465

*extents_to_split = *extents_to_split + 2;

1465

*extents_to_split = *extents_to_split + 2;

1466

} else if (phys) {

1466

} else if (phys) {

1467

/*

1467

/*

1468

* Only increment phys if it doesn't describe

1468

* Only increment phys if it doesn't describe

1469

* a hole.

1469

* a hole.

1470

*/

1470

*/

1471

phys++;

1471

phys++;

1472

}

1472

}

1473

1474

desc->c_phys = phys;

1474

desc->c_phys = phys;

1475

if (phys == 0) {

1475

if (phys == 0) {

1476

desc->c_new = 1;

1476

desc->c_new = 1;

1477

*clusters_to_alloc = *clusters_to_alloc + 1;

1477

*clusters_to_alloc = *clusters_to_alloc + 1;

1478

}

1478

}

1479

if (ext_flags & OCFS2_EXT_UNWRITTEN)

1479

if (ext_flags & OCFS2_EXT_UNWRITTEN)

1480

desc->c_unwritten = 1;

1480

desc->c_unwritten = 1;

1481

1482

num_clusters--;

1482

num_clusters--;

1483

}

1483

}

1484

1485

ret = 0;

1485

ret = 0;

1486

out:

1486

out:

1487

return ret;

1487

return ret;

1488

}

1488

}

1489

1490

static int ocfs2_write_begin_inline(struct address_space *mapping,

1490

static int ocfs2_write_begin_inline(struct address_space *mapping,

1491

struct inode *inode,

1491

struct inode *inode,

1492

struct ocfs2_write_ctxt *wc)

1492

struct ocfs2_write_ctxt *wc)

1493

{

1493

{

1494

int ret;

1494

int ret;

1495

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1495

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1496

struct page *page;

1496

struct page *page;

1497

handle_t *handle;

1497

handle_t *handle;

1498

struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;

1498

struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;

1499

1500

page = find_or_create_page(mapping, 0, GFP_NOFS);

1500

page = find_or_create_page(mapping, 0, GFP_NOFS);

1501

if (!page) {

1501

if (!page) {

1502

ret = -ENOMEM;

1502

ret = -ENOMEM;

1503

mlog_errno(ret);

1503

mlog_errno(ret);

1504

goto out;

1504

goto out;

1505

}

1505

}

1506

/*

1506

/*

1507

* If we don't set w_num_pages then this page won't get unlocked

1507

* If we don't set w_num_pages then this page won't get unlocked

1508

* and freed on cleanup of the write context.

1508

* and freed on cleanup of the write context.

1509

*/

1509

*/

1510

wc->w_pages[0] = wc->w_target_page = page;

1510

wc->w_pages[0] = wc->w_target_page = page;

1511

wc->w_num_pages = 1;

1511

wc->w_num_pages = 1;

1512

1513

handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);

1513

handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);

1514

if (IS_ERR(handle)) {

1514

if (IS_ERR(handle)) {

1515

ret = PTR_ERR(handle);

1515

ret = PTR_ERR(handle);

1516

mlog_errno(ret);

1516

mlog_errno(ret);

1517

goto out;

1517

goto out;

1518

}

1518

}

1519

1520

ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,

1520

ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,

1521

OCFS2_JOURNAL_ACCESS_WRITE);

1521

OCFS2_JOURNAL_ACCESS_WRITE);

1522

if (ret) {

1522

if (ret) {

1523

ocfs2_commit_trans(osb, handle);

1523

ocfs2_commit_trans(osb, handle);

1524

1525

mlog_errno(ret);

1525

mlog_errno(ret);

1526

goto out;

1526

goto out;

1527

}

1527

}

1528

1529

if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))

1529

if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))

1530

ocfs2_set_inode_data_inline(inode, di);

1530

ocfs2_set_inode_data_inline(inode, di);

1531

1532

if (!PageUptodate(page)) {

1532

if (!PageUptodate(page)) {

1533

ret = ocfs2_read_inline_data(inode, page, wc->w_di_bh);

1533

ret = ocfs2_read_inline_data(inode, page, wc->w_di_bh);

1534

if (ret) {

1534

if (ret) {

1535

ocfs2_commit_trans(osb, handle);

1535

ocfs2_commit_trans(osb, handle);

1536

1537

goto out;

1537

goto out;

1538

}

1538

}

1539

}

1539

}

1540

1541

wc->w_handle = handle;

1541

wc->w_handle = handle;

1542

out:

1542

out:

1543

return ret;

1543

return ret;

1544

}

1544

}

1545

1546

int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size)

1546

int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size)

1547

{

1547

{

1548

struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;

1548

struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;

1549

1550

if (new_size <= le16_to_cpu(di->id2.i_data.id_count))

1550

if (new_size <= le16_to_cpu(di->id2.i_data.id_count))

1551

return 1;

1551

return 1;

1552

return 0;

1552

return 0;

1553

}

1553

}

1554

1555

static int ocfs2_try_to_write_inline_data(struct address_space *mapping,

1555

static int ocfs2_try_to_write_inline_data(struct address_space *mapping,

1556

struct inode *inode, loff_t pos,

1556

struct inode *inode, loff_t pos,

1557

unsigned len, struct page *mmap_page,

1557

unsigned len, struct page *mmap_page,

1558

struct ocfs2_write_ctxt *wc)

1558

struct ocfs2_write_ctxt *wc)

1559

{

1559

{

1560

int ret, written = 0;

1560

int ret, written = 0;

1561

loff_t end = pos + len;

1561

loff_t end = pos + len;

1562

struct ocfs2_inode_info *oi = OCFS2_I(inode);

1562

struct ocfs2_inode_info *oi = OCFS2_I(inode);

1563

1564

mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n",

1564

mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n",

1565

(unsigned long long)oi->ip_blkno, len, (unsigned long long)pos,

1565

(unsigned long long)oi->ip_blkno, len, (unsigned long long)pos,

1566

oi->ip_dyn_features);

1566

oi->ip_dyn_features);

1567

1568

/*

1568

/*

1569

* Handle inodes which already have inline data 1st.

1569

* Handle inodes which already have inline data 1st.

1570

*/

1570

*/

1571

if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {

1571

if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {

1572

if (mmap_page == NULL &&

1572

if (mmap_page == NULL &&

1573

ocfs2_size_fits_inline_data(wc->w_di_bh, end))

1573

ocfs2_size_fits_inline_data(wc->w_di_bh, end))

1574

goto do_inline_write;

1574

goto do_inline_write;

1575

1576

/*

1576

/*

1577

* The write won't fit - we have to give this inode an

1577

* The write won't fit - we have to give this inode an

1578

* inline extent list now.

1578

* inline extent list now.

1579

*/

1579

*/

1580

ret = ocfs2_convert_inline_data_to_extents(inode, wc->w_di_bh);

1580

ret = ocfs2_convert_inline_data_to_extents(inode, wc->w_di_bh);

1581

if (ret)

1581

if (ret)

1582

mlog_errno(ret);

1582

mlog_errno(ret);

1583

goto out;

1583

goto out;

1584

}

1584

}

1585

1586

/*

1586

/*

1587

* Check whether the inode can accept inline data.

1587

* Check whether the inode can accept inline data.

1588

*/

1588

*/

1589

if (oi->ip_clusters != 0 || i_size_read(inode) != 0)

1589

if (oi->ip_clusters != 0 || i_size_read(inode) != 0)

1590

return 0;

1590

return 0;

1591

1592

/*

1592

/*

1593

* Check whether the write can fit.

1593

* Check whether the write can fit.

1594

*/

1594

*/

1595

if (mmap_page || end > ocfs2_max_inline_data(inode->i_sb))

1595

if (mmap_page || end > ocfs2_max_inline_data(inode->i_sb))

1596

return 0;

1596

return 0;

1597

1598

do_inline_write:

1598

do_inline_write:

1599

ret = ocfs2_write_begin_inline(mapping, inode, wc);

1599

ret = ocfs2_write_begin_inline(mapping, inode, wc);

1600

if (ret) {

1600

if (ret) {

1601

mlog_errno(ret);

1601

mlog_errno(ret);

1602

goto out;

1602

goto out;

1603

}

1603

}

1604

1605

/*

1605

/*

1606

* This signals to the caller that the data can be written

1606

* This signals to the caller that the data can be written

1607

* inline.

1607

* inline.

1608

*/

1608

*/

1609

written = 1;

1609

written = 1;

1610

out:

1610

out:

1611

return written ? written : ret;

1611

return written ? written : ret;

1612

}

1612

}

1613

1614

/*

1614

/*

1615

* This function only does anything for file systems which can't

1615

* This function only does anything for file systems which can't

1616

* handle sparse files.

1616

* handle sparse files.

1617

*

1617

*

1618

* What we want to do here is fill in any hole between the current end

1618

* What we want to do here is fill in any hole between the current end

1619

* of allocation and the end of our write. That way the rest of the

1619

* of allocation and the end of our write. That way the rest of the

1620

* write path can treat it as an non-allocating write, which has no

1620

* write path can treat it as an non-allocating write, which has no

1621

* special case code for sparse/nonsparse files.

1621

* special case code for sparse/nonsparse files.

1622

*/

1622

*/

1623

static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,

1623

static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,

1624

unsigned len,

1624

unsigned len,

1625

struct ocfs2_write_ctxt *wc)

1625

struct ocfs2_write_ctxt *wc)

1626

{

1626

{

1627

int ret;

1627

int ret;

1628

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1628

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1629

loff_t newsize = pos + len;

1629

loff_t newsize = pos + len;

1630

1631

if (ocfs2_sparse_alloc(osb))

1631

if (ocfs2_sparse_alloc(osb))

1632

return 0;

1632

return 0;

1633

1634

if (newsize <= i_size_read(inode))

1634

if (newsize <= i_size_read(inode))

1635

return 0;

1635

return 0;

1636

1637

ret = ocfs2_extend_no_holes(inode, newsize, newsize - len);

1637

ret = ocfs2_extend_no_holes(inode, newsize, newsize - len);

1638

if (ret)

1638

if (ret)

1639

mlog_errno(ret);

1639

mlog_errno(ret);

1640

1641

return ret;

1641

return ret;

1642

}

1642

}

1643

1644

int ocfs2_write_begin_nolock(struct address_space *mapping,

1644

int ocfs2_write_begin_nolock(struct address_space *mapping,

1645

loff_t pos, unsigned len, unsigned flags,

1645

loff_t pos, unsigned len, unsigned flags,

1646

struct page **pagep, void **fsdata,

1646

struct page **pagep, void **fsdata,

1647

struct buffer_head *di_bh, struct page *mmap_page)

1647

struct buffer_head *di_bh, struct page *mmap_page)

1648

{

1648

{

1649

int ret, credits = OCFS2_INODE_UPDATE_CREDITS;

1649

int ret, credits = OCFS2_INODE_UPDATE_CREDITS;

1650

unsigned int clusters_to_alloc, extents_to_split;

1650

unsigned int clusters_to_alloc, extents_to_split;

1651

struct ocfs2_write_ctxt *wc;

1651

struct ocfs2_write_ctxt *wc;

1652

struct inode *inode = mapping->host;

1652

struct inode *inode = mapping->host;

1653

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1653

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1654

struct ocfs2_dinode *di;

1654

struct ocfs2_dinode *di;

1655

struct ocfs2_alloc_context *data_ac = NULL;

1655

struct ocfs2_alloc_context *data_ac = NULL;

1656

struct ocfs2_alloc_context *meta_ac = NULL;

1656

struct ocfs2_alloc_context *meta_ac = NULL;

1657

handle_t *handle;

1657

handle_t *handle;

1658

1659

ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);

1659

ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);

1660

if (ret) {

1660

if (ret) {

1661

mlog_errno(ret);

1661

mlog_errno(ret);

1662

return ret;

1662

return ret;

1663

}

1663

}

1664

1665

if (ocfs2_supports_inline_data(osb)) {

1665

if (ocfs2_supports_inline_data(osb)) {

1666

ret = ocfs2_try_to_write_inline_data(mapping, inode, pos, len,

1666

ret = ocfs2_try_to_write_inline_data(mapping, inode, pos, len,

1667

mmap_page, wc);

1667

mmap_page, wc);

1668

if (ret == 1) {

1668

if (ret == 1) {

1669

ret = 0;

1669

ret = 0;

1670

goto success;

1670

goto success;

1671

}

1671

}

1672

if (ret < 0) {

1672

if (ret < 0) {

1673

mlog_errno(ret);

1673

mlog_errno(ret);

1674

goto out;

1674

goto out;

1675

}

1675

}

1676

}

1676

}

1677

1678

ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);

1678

ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);

1679

if (ret) {

1679

if (ret) {

1680

mlog_errno(ret);

1680

mlog_errno(ret);

1681

goto out;

1681

goto out;

1682

}

1682

}

1683

1684

ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,

1684

ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,

1685

&extents_to_split);

1685

&extents_to_split);

1686

if (ret) {

1686

if (ret) {

1687

mlog_errno(ret);

1687

mlog_errno(ret);

1688

goto out;

1688

goto out;

1689

}

1689

}

1690

1691

di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;

1691

di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;

1692

1693

/*

1693

/*

1694

* We set w_target_from, w_target_to here so that

1694

* We set w_target_from, w_target_to here so that

1695

* ocfs2_write_end() knows which range in the target page to

1695

* ocfs2_write_end() knows which range in the target page to

1696

* write out. An allocation requires that we write the entire

1696

* write out. An allocation requires that we write the entire

1697

* cluster range.

1697

* cluster range.

1698

*/

1698

*/

1699

if (clusters_to_alloc || extents_to_split) {

1699

if (clusters_to_alloc || extents_to_split) {

1700

/*

1700

/*

1701

* XXX: We are stretching the limits of

1701

* XXX: We are stretching the limits of

1702

* ocfs2_lock_allocators(). It greatly over-estimates

1702

* ocfs2_lock_allocators(). It greatly over-estimates

1703

* the work to be done.

1703

* the work to be done.

1704

*/

1704

*/

1705

ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc,

1705

ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc,

1706

extents_to_split, &data_ac, &meta_ac);

1706

extents_to_split, &data_ac, &meta_ac);

1707

if (ret) {

1707

if (ret) {

1708

mlog_errno(ret);

1708

mlog_errno(ret);

1709

goto out;

1709

goto out;

1710

}

1710

}

1711

1712

credits = ocfs2_calc_extend_credits(inode->i_sb, di,

1712

credits = ocfs2_calc_extend_credits(inode->i_sb, di,

1713

clusters_to_alloc);

1713

clusters_to_alloc);

1714

1715

}

1715

}

1716

1717

ocfs2_set_target_boundaries(osb, wc, pos, len,

1717

ocfs2_set_target_boundaries(osb, wc, pos, len,

1718

clusters_to_alloc + extents_to_split);

1718

clusters_to_alloc + extents_to_split);

1719

1720

handle = ocfs2_start_trans(osb, credits);

1720

handle = ocfs2_start_trans(osb, credits);

1721

if (IS_ERR(handle)) {

1721

if (IS_ERR(handle)) {

1722

ret = PTR_ERR(handle);

1722

ret = PTR_ERR(handle);

1723

mlog_errno(ret);

1723

mlog_errno(ret);

1724

goto out;

1724

goto out;

1725

}

1725

}

1726

1727

wc->w_handle = handle;

1727

wc->w_handle = handle;

1728

1729

/*

1729

/*

1730

* We don't want this to fail in ocfs2_write_end(), so do it

1730

* We don't want this to fail in ocfs2_write_end(), so do it

1731

* here.

1731

* here.

1732

*/

1732

*/

1733

ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,

1733

ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,

1734

OCFS2_JOURNAL_ACCESS_WRITE);

1734

OCFS2_JOURNAL_ACCESS_WRITE);

1735

if (ret) {

1735

if (ret) {

1736

mlog_errno(ret);

1736

mlog_errno(ret);

1737

goto out_commit;

1737

goto out_commit;

1738

}

1738

}

1739

1740

/*

1740

/*

1741

* Fill our page array first. That way we've grabbed enough so

1741

* Fill our page array first. That way we've grabbed enough so

1742

* that we can zero and flush if we error after adding the

1742

* that we can zero and flush if we error after adding the

1743

* extent.

1743

* extent.

1744

*/

1744

*/

1745

ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,

1745

ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,

1746

clusters_to_alloc + extents_to_split,

1746

clusters_to_alloc + extents_to_split,

1747

mmap_page);

1747

mmap_page);

1748

if (ret) {

1748

if (ret) {

1749

mlog_errno(ret);

1749

mlog_errno(ret);

1750

goto out_commit;

1750

goto out_commit;

1751

}

1751

}

1752

1753

ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,

1753

ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,

1754

len);

1754

len);

1755

if (ret) {

1755

if (ret) {

1756

mlog_errno(ret);

1756

mlog_errno(ret);

1757

goto out_commit;

1757

goto out_commit;

1758

}

1758

}

1759

1760

if (data_ac)

1760

if (data_ac)

1761

ocfs2_free_alloc_context(data_ac);

1761

ocfs2_free_alloc_context(data_ac);

1762

if (meta_ac)

1762

if (meta_ac)

1763

ocfs2_free_alloc_context(meta_ac);

1763

ocfs2_free_alloc_context(meta_ac);

1764

1765

success:

1765

success:

1766

*pagep = wc->w_target_page;

1766

*pagep = wc->w_target_page;

1767

*fsdata = wc;

1767

*fsdata = wc;

1768

return 0;

1768

return 0;

1769

out_commit:

1769

out_commit:

1770

ocfs2_commit_trans(osb, handle);

1770

ocfs2_commit_trans(osb, handle);

1771

1772

out:

1772

out:

1773

ocfs2_free_write_ctxt(wc);

1773

ocfs2_free_write_ctxt(wc);

1774

1775

if (data_ac)

1775

if (data_ac)

1776

ocfs2_free_alloc_context(data_ac);

1776

ocfs2_free_alloc_context(data_ac);

1777

if (meta_ac)

1777

if (meta_ac)

1778

ocfs2_free_alloc_context(meta_ac);

1778

ocfs2_free_alloc_context(meta_ac);

1779

return ret;

1779

return ret;

1780

}

1780

}

1781

1782

static int ocfs2_write_begin(struct file *file, struct address_space *mapping,

1782

static int ocfs2_write_begin(struct file *file, struct address_space *mapping,

1783

loff_t pos, unsigned len, unsigned flags,

1783

loff_t pos, unsigned len, unsigned flags,

1784

struct page **pagep, void **fsdata)

1784

struct page **pagep, void **fsdata)

1785

{

1785

{

1786

int ret;

1786

int ret;

1787

struct buffer_head *di_bh = NULL;

1787

struct buffer_head *di_bh = NULL;

1788

struct inode *inode = mapping->host;

1788

struct inode *inode = mapping->host;

1789

1790

ret = ocfs2_inode_lock(inode, &di_bh, 1);

1790

ret = ocfs2_inode_lock(inode, &di_bh, 1);

1791

if (ret) {

1791

if (ret) {

1792

mlog_errno(ret);

1792

mlog_errno(ret);

1793

return ret;

1793

return ret;

1794

}

1794

}

1795

1796

/*

1796

/*

1797

* Take alloc sem here to prevent concurrent lookups. That way

1797

* Take alloc sem here to prevent concurrent lookups. That way

1798

* the mapping, zeroing and tree manipulation within

1798

* the mapping, zeroing and tree manipulation within

1799

* ocfs2_write() will be safe against ->readpage(). This

1799

* ocfs2_write() will be safe against ->readpage(). This

1800

* should also serve to lock out allocation from a shared

1800

* should also serve to lock out allocation from a shared

1801

* writeable region.

1801

* writeable region.

1802

*/

1802

*/

1803

down_write(&OCFS2_I(inode)->ip_alloc_sem);

1803

down_write(&OCFS2_I(inode)->ip_alloc_sem);

1804

1805

ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,

1805

ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,

1806

fsdata, di_bh, NULL);

1806

fsdata, di_bh, NULL);

1807

if (ret) {

1807

if (ret) {

1808

mlog_errno(ret);

1808

mlog_errno(ret);

1809

goto out_fail;

1809

goto out_fail;

1810

}

1810

}

1811

1812

brelse(di_bh);

1812

brelse(di_bh);

1813

1814

return 0;

1814

return 0;

1815

1816

out_fail:

1816

out_fail:

1817

up_write(&OCFS2_I(inode)->ip_alloc_sem);

1817

up_write(&OCFS2_I(inode)->ip_alloc_sem);

1818

1819

brelse(di_bh);

1819

brelse(di_bh);

1820

ocfs2_inode_unlock(inode, 1);

1820

ocfs2_inode_unlock(inode, 1);

1821

1822

return ret;

1822

return ret;

1823

}

1823

}

1824

1825

static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,

1825

static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,

1826

unsigned len, unsigned *copied,

1826

unsigned len, unsigned *copied,

1827

struct ocfs2_dinode *di,

1827

struct ocfs2_dinode *di,

1828

struct ocfs2_write_ctxt *wc)

1828

struct ocfs2_write_ctxt *wc)

1829

{

1829

{

1830

void *kaddr;

1830

void *kaddr;

1831

1832

if (unlikely(*copied < len)) {

1832

if (unlikely(*copied < len)) {

1833

if (!PageUptodate(wc->w_target_page)) {

1833

if (!PageUptodate(wc->w_target_page)) {

1834

*copied = 0;

1834

*copied = 0;

1835

return;

1835

return;

1836

}

1836

}

1837

}

1837

}

1838

1839

kaddr = kmap_atomic(wc->w_target_page, KM_USER0);

1839

kaddr = kmap_atomic(wc->w_target_page, KM_USER0);

1840

memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);

1840

memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);

1841

kunmap_atomic(kaddr, KM_USER0);

1841

kunmap_atomic(kaddr, KM_USER0);

1842

1843

mlog(0, "Data written to inode at offset %llu. "

1843

mlog(0, "Data written to inode at offset %llu. "

1844

"id_count = %u, copied = %u, i_dyn_features = 0x%x\n",

1844

"id_count = %u, copied = %u, i_dyn_features = 0x%x\n",

1845

(unsigned long long)pos, *copied,

1845

(unsigned long long)pos, *copied,

1846

le16_to_cpu(di->id2.i_data.id_count),

1846

le16_to_cpu(di->id2.i_data.id_count),

1847

le16_to_cpu(di->i_dyn_features));

1847

le16_to_cpu(di->i_dyn_features));

1848

}

1848

}

1849

1850

int ocfs2_write_end_nolock(struct address_space *mapping,

1850

int ocfs2_write_end_nolock(struct address_space *mapping,

1851

loff_t pos, unsigned len, unsigned copied,

1851

loff_t pos, unsigned len, unsigned copied,

1852

struct page *page, void *fsdata)

1852

struct page *page, void *fsdata)

1853

{

1853

{

1854

int i;

1854

int i;

1855

unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);

1855

unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);

1856

struct inode *inode = mapping->host;

1856

struct inode *inode = mapping->host;

1857

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1857

struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);

1858

struct ocfs2_write_ctxt *wc = fsdata;

1858

struct ocfs2_write_ctxt *wc = fsdata;

1859

struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;

1859

struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;

1860

handle_t *handle = wc->w_handle;

1860

handle_t *handle = wc->w_handle;

1861

struct page *tmppage;

1861

struct page *tmppage;

1862

1863

if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {

1863

if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {

1864

ocfs2_write_end_inline(inode, pos, len, &copied, di, wc);

1864

ocfs2_write_end_inline(inode, pos, len, &copied, di, wc);

1865

goto out_write_size;

1865

goto out_write_size;

1866

}

1866

}

1867

1868

if (unlikely(copied < len)) {

1868

if (unlikely(copied < len)) {

1869

if (!PageUptodate(wc->w_target_page))

1869

if (!PageUptodate(wc->w_target_page))

1870

copied = 0;

1870

copied = 0;

1871

1872

ocfs2_zero_new_buffers(wc->w_target_page, start+copied,

1872

ocfs2_zero_new_buffers(wc->w_target_page, start+copied,

1873

start+len);

1873

start+len);

1874

}

1874

}

1875

flush_dcache_page(wc->w_target_page);

1875

flush_dcache_page(wc->w_target_page);

1876

1877

for(i = 0; i < wc->w_num_pages; i++) {

1877

for(i = 0; i < wc->w_num_pages; i++) {

1878

tmppage = wc->w_pages[i];

1878

tmppage = wc->w_pages[i];

1879

1880

if (tmppage == wc->w_target_page) {

1880

if (tmppage == wc->w_target_page) {

1881

from = wc->w_target_from;

1881

from = wc->w_target_from;

1882

to = wc->w_target_to;

1882

to = wc->w_target_to;

1883

1884

BUG_ON(from > PAGE_CACHE_SIZE ||

1884

BUG_ON(from > PAGE_CACHE_SIZE ||

1885

to > PAGE_CACHE_SIZE ||

1885

to > PAGE_CACHE_SIZE ||

1886

to < from);

1886

to < from);

1887

} else {

1887

} else {

1888

/*

1888

/*

1889

* Pages adjacent to the target (if any) imply

1889

* Pages adjacent to the target (if any) imply

1890

* a hole-filling write in which case we want

1890

* a hole-filling write in which case we want

1891

* to flush their entire range.

1891

* to flush their entire range.

1892

*/

1892

*/

1893

from = 0;

1893

from = 0;

1894

to = PAGE_CACHE_SIZE;

1894

to = PAGE_CACHE_SIZE;

1895

}

1895

}

1896

1897

if (ocfs2_should_order_data(inode))

1897

if (ocfs2_should_order_data(inode))

1898

walk_page_buffers(wc->w_handle, page_buffers(tmppage),

1898

walk_page_buffers(wc->w_handle, page_buffers(tmppage),

1899

from, to, NULL,

1899

from, to, NULL,

1900

ocfs2_journal_dirty_data);

1900

ocfs2_journal_dirty_data);

1901

1902

block_commit_write(tmppage, from, to);

1902

block_commit_write(tmppage, from, to);

1903

}

1903

}

1904

1905

out_write_size:

1905

out_write_size:

1906

pos += copied;

1906

pos += copied;

1907

if (pos > inode->i_size) {

1907

if (pos > inode->i_size) {

1908

i_size_write(inode, pos);

1908

i_size_write(inode, pos);

1909

mark_inode_dirty(inode);

1909

mark_inode_dirty(inode);

1910

}

1910

}

1911

inode->i_blocks = ocfs2_inode_sector_count(inode);

1911

inode->i_blocks = ocfs2_inode_sector_count(inode);

1912

di->i_size = cpu_to_le64((u64)i_size_read(inode));

1912

di->i_size = cpu_to_le64((u64)i_size_read(inode));

1913

inode->i_mtime = inode->i_ctime = CURRENT_TIME;

1913

inode->i_mtime = inode->i_ctime = CURRENT_TIME;

1914

di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);

1914

di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);

1915

di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);

1915

di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);

1916

ocfs2_journal_dirty(handle, wc->w_di_bh);

1916

ocfs2_journal_dirty(handle, wc->w_di_bh);

1917

1918

ocfs2_commit_trans(osb, handle);

1918

ocfs2_commit_trans(osb, handle);

1919

1920

ocfs2_run_deallocs(osb, &wc->w_dealloc);

1920

ocfs2_run_deallocs(osb, &wc->w_dealloc);

1921

1922

ocfs2_free_write_ctxt(wc);

1922

ocfs2_free_write_ctxt(wc);

1923

1924

return copied;

1924

return copied;

1925

}

1925

}

1926

1927

static int ocfs2_write_end(struct file *file, struct address_space *mapping,

1927

static int ocfs2_write_end(struct file *file, struct address_space *mapping,

1928

loff_t pos, unsigned len, unsigned copied,

1928

loff_t pos, unsigned len, unsigned copied,

1929

struct page *page, void *fsdata)

1929

struct page *page, void *fsdata)

1930

{

1930

{

1931

int ret;

1931

int ret;

1932

struct inode *inode = mapping->host;

1932

struct inode *inode = mapping->host;

1933

1934

ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);

1934

ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);

1935

1936

up_write(&OCFS2_I(inode)->ip_alloc_sem);

1936

up_write(&OCFS2_I(inode)->ip_alloc_sem);

1937

ocfs2_inode_unlock(inode, 1);

1937

ocfs2_inode_unlock(inode, 1);

1938

1939

return ret;

1939

return ret;

1940

}

1940

}

1941

1942

const struct address_space_operations ocfs2_aops = {

1942

const struct address_space_operations ocfs2_aops = {

1943

.readpage = ocfs2_readpage,

1943

.readpage = ocfs2_readpage,

1944

.readpages = ocfs2_readpages,

1944

.readpages = ocfs2_readpages,

1945

.writepage = ocfs2_writepage,

1945

.writepage = ocfs2_writepage,

1946

.write_begin = ocfs2_write_begin,

1946

.write_begin = ocfs2_write_begin,

1947

.write_end = ocfs2_write_end,

1947

.write_end = ocfs2_write_end,

1948

.bmap = ocfs2_bmap,

1948

.bmap = ocfs2_bmap,

1949

.sync_page = block_sync_page,

1949

.sync_page = block_sync_page,

1950

.direct_IO = ocfs2_direct_IO,

1950

.direct_IO = ocfs2_direct_IO,

1951

.invalidatepage = ocfs2_invalidatepage,

1951

.invalidatepage = ocfs2_invalidatepage,

1952

.releasepage = ocfs2_releasepage,

1952

.releasepage = ocfs2_releasepage,

1953

.migratepage = buffer_migrate_page,

1953

.migratepage = buffer_migrate_page,

1954

};

1954

};

1955

GITLAB

fs/ocfs2/aops.c: test for IS_ERR rather than 0