Blame view

net/ceph/osdmap.c 26.9 KB
f24e9980e   Sage Weil   ceph: OSD client
1

3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
2
  #include <linux/ceph/ceph_debug.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
3

3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
4
  #include <linux/module.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
5
  #include <linux/slab.h>
f24e9980e   Sage Weil   ceph: OSD client
6
  #include <asm/div64.h>
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
7
8
9
10
11
  #include <linux/ceph/libceph.h>
  #include <linux/ceph/osdmap.h>
  #include <linux/ceph/decode.h>
  #include <linux/crush/hash.h>
  #include <linux/crush/mapper.h>
f24e9980e   Sage Weil   ceph: OSD client
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
  
  char *ceph_osdmap_state_str(char *str, int len, int state)
  {
  	int flag = 0;
  
  	if (!len)
  		goto done;
  
  	*str = '\0';
  	if (state) {
  		if (state & CEPH_OSD_EXISTS) {
  			snprintf(str, len, "exists");
  			flag = 1;
  		}
  		if (state & CEPH_OSD_UP) {
  			snprintf(str, len, "%s%s%s", str, (flag ? ", " : ""),
  				 "up");
  			flag = 1;
  		}
  	} else {
  		snprintf(str, len, "doesn't exist");
  	}
  done:
  	return str;
  }
  
  /* maps */
  
  static int calc_bits_of(unsigned t)
  {
  	int b = 0;
  	while (t) {
  		t = t >> 1;
  		b++;
  	}
  	return b;
  }
  
  /*
   * the foo_mask is the smallest value 2^n-1 that is >= foo.
   */
  static void calc_pg_masks(struct ceph_pg_pool_info *pi)
  {
  	pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1;
  	pi->pgp_num_mask =
  		(1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1;
  	pi->lpg_num_mask =
  		(1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1;
  	pi->lpgp_num_mask =
  		(1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1;
  }
  
  /*
   * decode crush map
   */
  static int crush_decode_uniform_bucket(void **p, void *end,
  				       struct crush_bucket_uniform *b)
  {
  	dout("crush_decode_uniform_bucket %p to %p
  ", *p, end);
  	ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
c89136ea4   Sage Weil   ceph: convert enc...
73
  	b->item_weight = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
  	return 0;
  bad:
  	return -EINVAL;
  }
  
  static int crush_decode_list_bucket(void **p, void *end,
  				    struct crush_bucket_list *b)
  {
  	int j;
  	dout("crush_decode_list_bucket %p to %p
  ", *p, end);
  	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
  	if (b->item_weights == NULL)
  		return -ENOMEM;
  	b->sum_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
  	if (b->sum_weights == NULL)
  		return -ENOMEM;
  	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
  	for (j = 0; j < b->h.size; j++) {
c89136ea4   Sage Weil   ceph: convert enc...
93
94
  		b->item_weights[j] = ceph_decode_32(p);
  		b->sum_weights[j] = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
  	}
  	return 0;
  bad:
  	return -EINVAL;
  }
  
  static int crush_decode_tree_bucket(void **p, void *end,
  				    struct crush_bucket_tree *b)
  {
  	int j;
  	dout("crush_decode_tree_bucket %p to %p
  ", *p, end);
  	ceph_decode_32_safe(p, end, b->num_nodes, bad);
  	b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
  	if (b->node_weights == NULL)
  		return -ENOMEM;
  	ceph_decode_need(p, end, b->num_nodes * sizeof(u32), bad);
  	for (j = 0; j < b->num_nodes; j++)
c89136ea4   Sage Weil   ceph: convert enc...
113
  		b->node_weights[j] = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
  	return 0;
  bad:
  	return -EINVAL;
  }
  
  static int crush_decode_straw_bucket(void **p, void *end,
  				     struct crush_bucket_straw *b)
  {
  	int j;
  	dout("crush_decode_straw_bucket %p to %p
  ", *p, end);
  	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
  	if (b->item_weights == NULL)
  		return -ENOMEM;
  	b->straws = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
  	if (b->straws == NULL)
  		return -ENOMEM;
  	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
  	for (j = 0; j < b->h.size; j++) {
c89136ea4   Sage Weil   ceph: convert enc...
133
134
  		b->item_weights[j] = ceph_decode_32(p);
  		b->straws[j] = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
  	}
  	return 0;
  bad:
  	return -EINVAL;
  }
  
  static struct crush_map *crush_decode(void *pbyval, void *end)
  {
  	struct crush_map *c;
  	int err = -EINVAL;
  	int i, j;
  	void **p = &pbyval;
  	void *start = pbyval;
  	u32 magic;
  
  	dout("crush_decode %p to %p len %d
  ", *p, end, (int)(end - *p));
  
  	c = kzalloc(sizeof(*c), GFP_NOFS);
  	if (c == NULL)
  		return ERR_PTR(-ENOMEM);
  
  	ceph_decode_need(p, end, 4*sizeof(u32), bad);
c89136ea4   Sage Weil   ceph: convert enc...
158
  	magic = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
159
160
161
162
163
164
  	if (magic != CRUSH_MAGIC) {
  		pr_err("crush_decode magic %x != current %x
  ",
  		       (unsigned)magic, (unsigned)CRUSH_MAGIC);
  		goto bad;
  	}
c89136ea4   Sage Weil   ceph: convert enc...
165
166
167
  	c->max_buckets = ceph_decode_32(p);
  	c->max_rules = ceph_decode_32(p);
  	c->max_devices = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
  
  	c->device_parents = kcalloc(c->max_devices, sizeof(u32), GFP_NOFS);
  	if (c->device_parents == NULL)
  		goto badmem;
  	c->bucket_parents = kcalloc(c->max_buckets, sizeof(u32), GFP_NOFS);
  	if (c->bucket_parents == NULL)
  		goto badmem;
  
  	c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS);
  	if (c->buckets == NULL)
  		goto badmem;
  	c->rules = kcalloc(c->max_rules, sizeof(*c->rules), GFP_NOFS);
  	if (c->rules == NULL)
  		goto badmem;
  
  	/* buckets */
  	for (i = 0; i < c->max_buckets; i++) {
  		int size = 0;
  		u32 alg;
  		struct crush_bucket *b;
  
  		ceph_decode_32_safe(p, end, alg, bad);
  		if (alg == 0) {
  			c->buckets[i] = NULL;
  			continue;
  		}
  		dout("crush_decode bucket %d off %x %p to %p
  ",
  		     i, (int)(*p-start), *p, end);
  
  		switch (alg) {
  		case CRUSH_BUCKET_UNIFORM:
  			size = sizeof(struct crush_bucket_uniform);
  			break;
  		case CRUSH_BUCKET_LIST:
  			size = sizeof(struct crush_bucket_list);
  			break;
  		case CRUSH_BUCKET_TREE:
  			size = sizeof(struct crush_bucket_tree);
  			break;
  		case CRUSH_BUCKET_STRAW:
  			size = sizeof(struct crush_bucket_straw);
  			break;
  		default:
30dc6381b   Sage Weil   ceph: fix error p...
212
  			err = -EINVAL;
f24e9980e   Sage Weil   ceph: OSD client
213
214
215
216
217
218
219
220
  			goto bad;
  		}
  		BUG_ON(size == 0);
  		b = c->buckets[i] = kzalloc(size, GFP_NOFS);
  		if (b == NULL)
  			goto badmem;
  
  		ceph_decode_need(p, end, 4*sizeof(u32), bad);
c89136ea4   Sage Weil   ceph: convert enc...
221
222
  		b->id = ceph_decode_32(p);
  		b->type = ceph_decode_16(p);
fb690390e   Sage Weil   ceph: make CRUSH ...
223
224
  		b->alg = ceph_decode_8(p);
  		b->hash = ceph_decode_8(p);
c89136ea4   Sage Weil   ceph: convert enc...
225
226
  		b->weight = ceph_decode_32(p);
  		b->size = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
  
  		dout("crush_decode bucket size %d off %x %p to %p
  ",
  		     b->size, (int)(*p-start), *p, end);
  
  		b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
  		if (b->items == NULL)
  			goto badmem;
  		b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS);
  		if (b->perm == NULL)
  			goto badmem;
  		b->perm_n = 0;
  
  		ceph_decode_need(p, end, b->size*sizeof(u32), bad);
  		for (j = 0; j < b->size; j++)
c89136ea4   Sage Weil   ceph: convert enc...
242
  			b->items[j] = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
  
  		switch (b->alg) {
  		case CRUSH_BUCKET_UNIFORM:
  			err = crush_decode_uniform_bucket(p, end,
  				  (struct crush_bucket_uniform *)b);
  			if (err < 0)
  				goto bad;
  			break;
  		case CRUSH_BUCKET_LIST:
  			err = crush_decode_list_bucket(p, end,
  			       (struct crush_bucket_list *)b);
  			if (err < 0)
  				goto bad;
  			break;
  		case CRUSH_BUCKET_TREE:
  			err = crush_decode_tree_bucket(p, end,
  				(struct crush_bucket_tree *)b);
  			if (err < 0)
  				goto bad;
  			break;
  		case CRUSH_BUCKET_STRAW:
  			err = crush_decode_straw_bucket(p, end,
  				(struct crush_bucket_straw *)b);
  			if (err < 0)
  				goto bad;
  			break;
  		}
  	}
  
  	/* rules */
  	dout("rule vec is %p
  ", c->rules);
  	for (i = 0; i < c->max_rules; i++) {
  		u32 yes;
  		struct crush_rule *r;
  
  		ceph_decode_32_safe(p, end, yes, bad);
  		if (!yes) {
  			dout("crush_decode NO rule %d off %x %p to %p
  ",
  			     i, (int)(*p-start), *p, end);
  			c->rules[i] = NULL;
  			continue;
  		}
  
  		dout("crush_decode rule %d off %x %p to %p
  ",
  		     i, (int)(*p-start), *p, end);
  
  		/* len */
  		ceph_decode_32_safe(p, end, yes, bad);
  #if BITS_PER_LONG == 32
30dc6381b   Sage Weil   ceph: fix error p...
295
  		err = -EINVAL;
f24e9980e   Sage Weil   ceph: OSD client
296
297
298
299
300
301
302
303
304
305
306
307
308
309
  		if (yes > ULONG_MAX / sizeof(struct crush_rule_step))
  			goto bad;
  #endif
  		r = c->rules[i] = kmalloc(sizeof(*r) +
  					  yes*sizeof(struct crush_rule_step),
  					  GFP_NOFS);
  		if (r == NULL)
  			goto badmem;
  		dout(" rule %d is at %p
  ", i, r);
  		r->len = yes;
  		ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
  		ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
  		for (j = 0; j < r->len; j++) {
c89136ea4   Sage Weil   ceph: convert enc...
310
311
312
  			r->steps[j].op = ceph_decode_32(p);
  			r->steps[j].arg1 = ceph_decode_32(p);
  			r->steps[j].arg2 = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
  		}
  	}
  
  	/* ignore trailing name maps. */
  
  	dout("crush_decode success
  ");
  	return c;
  
  badmem:
  	err = -ENOMEM;
  bad:
  	dout("crush_decode fail %d
  ", err);
  	crush_destroy(c);
  	return ERR_PTR(err);
  }
f24e9980e   Sage Weil   ceph: OSD client
330
  /*
9794b146f   Sage Weil   ceph: fix memory ...
331
332
   * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
   * to a set of osds)
f24e9980e   Sage Weil   ceph: OSD client
333
   */
51042122d   Sage Weil   ceph: fix endian ...
334
335
336
337
338
339
340
341
342
343
344
  static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
  {
  	u64 a = *(u64 *)&l;
  	u64 b = *(u64 *)&r;
  
  	if (a < b)
  		return -1;
  	if (a > b)
  		return 1;
  	return 0;
  }
991abb6ec   Sage Weil   ceph: fail gracef...
345
346
  static int __insert_pg_mapping(struct ceph_pg_mapping *new,
  			       struct rb_root *root)
f24e9980e   Sage Weil   ceph: OSD client
347
348
349
350
  {
  	struct rb_node **p = &root->rb_node;
  	struct rb_node *parent = NULL;
  	struct ceph_pg_mapping *pg = NULL;
51042122d   Sage Weil   ceph: fix endian ...
351
  	int c;
f24e9980e   Sage Weil   ceph: OSD client
352

8adc8b3d7   Sage Weil   libceph: fix pg_t...
353
354
  	dout("__insert_pg_mapping %llx %p
  ", *(u64 *)&new->pgid, new);
f24e9980e   Sage Weil   ceph: OSD client
355
356
357
  	while (*p) {
  		parent = *p;
  		pg = rb_entry(parent, struct ceph_pg_mapping, node);
51042122d   Sage Weil   ceph: fix endian ...
358
359
  		c = pgid_cmp(new->pgid, pg->pgid);
  		if (c < 0)
f24e9980e   Sage Weil   ceph: OSD client
360
  			p = &(*p)->rb_left;
51042122d   Sage Weil   ceph: fix endian ...
361
  		else if (c > 0)
f24e9980e   Sage Weil   ceph: OSD client
362
363
  			p = &(*p)->rb_right;
  		else
991abb6ec   Sage Weil   ceph: fail gracef...
364
  			return -EEXIST;
f24e9980e   Sage Weil   ceph: OSD client
365
366
367
368
  	}
  
  	rb_link_node(&new->node, parent, p);
  	rb_insert_color(&new->node, root);
991abb6ec   Sage Weil   ceph: fail gracef...
369
  	return 0;
f24e9980e   Sage Weil   ceph: OSD client
370
  }
9794b146f   Sage Weil   ceph: fix memory ...
371
372
373
374
375
376
377
378
379
380
  static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
  						   struct ceph_pg pgid)
  {
  	struct rb_node *n = root->rb_node;
  	struct ceph_pg_mapping *pg;
  	int c;
  
  	while (n) {
  		pg = rb_entry(n, struct ceph_pg_mapping, node);
  		c = pgid_cmp(pgid, pg->pgid);
8adc8b3d7   Sage Weil   libceph: fix pg_t...
381
  		if (c < 0) {
9794b146f   Sage Weil   ceph: fix memory ...
382
  			n = n->rb_left;
8adc8b3d7   Sage Weil   libceph: fix pg_t...
383
  		} else if (c > 0) {
9794b146f   Sage Weil   ceph: fix memory ...
384
  			n = n->rb_right;
8adc8b3d7   Sage Weil   libceph: fix pg_t...
385
386
387
388
  		} else {
  			dout("__lookup_pg_mapping %llx got %p
  ",
  			     *(u64 *)&pgid, pg);
9794b146f   Sage Weil   ceph: fix memory ...
389
  			return pg;
8adc8b3d7   Sage Weil   libceph: fix pg_t...
390
  		}
9794b146f   Sage Weil   ceph: fix memory ...
391
392
393
  	}
  	return NULL;
  }
8adc8b3d7   Sage Weil   libceph: fix pg_t...
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
  static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
  {
  	struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
  
  	if (pg) {
  		dout("__remove_pg_mapping %llx %p
  ", *(u64 *)&pgid, pg);
  		rb_erase(&pg->node, root);
  		kfree(pg);
  		return 0;
  	}
  	dout("__remove_pg_mapping %llx dne
  ", *(u64 *)&pgid);
  	return -ENOENT;
  }
f24e9980e   Sage Weil   ceph: OSD client
409
  /*
4fc51be8f   Sage Weil   ceph: use rbtree ...
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
   * rbtree of pg pool info
   */
  static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
  {
  	struct rb_node **p = &root->rb_node;
  	struct rb_node *parent = NULL;
  	struct ceph_pg_pool_info *pi = NULL;
  
  	while (*p) {
  		parent = *p;
  		pi = rb_entry(parent, struct ceph_pg_pool_info, node);
  		if (new->id < pi->id)
  			p = &(*p)->rb_left;
  		else if (new->id > pi->id)
  			p = &(*p)->rb_right;
  		else
  			return -EEXIST;
  	}
  
  	rb_link_node(&new->node, parent, p);
  	rb_insert_color(&new->node, root);
  	return 0;
  }
  
  static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
  {
  	struct ceph_pg_pool_info *pi;
  	struct rb_node *n = root->rb_node;
  
  	while (n) {
  		pi = rb_entry(n, struct ceph_pg_pool_info, node);
  		if (id < pi->id)
  			n = n->rb_left;
  		else if (id > pi->id)
  			n = n->rb_right;
  		else
  			return pi;
  	}
  	return NULL;
  }
7669a2c95   Yehuda Sadeh   ceph: lookup pool...
450
451
452
453
454
455
456
457
458
459
460
461
  int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
  {
  	struct rb_node *rbp;
  
  	for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) {
  		struct ceph_pg_pool_info *pi =
  			rb_entry(rbp, struct ceph_pg_pool_info, node);
  		if (pi->name && strcmp(pi->name, name) == 0)
  			return pi->id;
  	}
  	return -ENOENT;
  }
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
462
  EXPORT_SYMBOL(ceph_pg_poolid_by_name);
7669a2c95   Yehuda Sadeh   ceph: lookup pool...
463

2844a76a2   Sage Weil   ceph: decode v5 o...
464
465
466
467
468
469
  static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
  {
  	rb_erase(&pi->node, root);
  	kfree(pi->name);
  	kfree(pi);
  }
73a7e693f   Sage Weil   ceph: fix decodin...
470
  static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
efd7576b2   Sage Weil   ceph: fix pg pool...
471
  {
73a7e693f   Sage Weil   ceph: fix decodin...
472
  	unsigned n, m;
efd7576b2   Sage Weil   ceph: fix pg pool...
473
474
  	ceph_decode_copy(p, &pi->v, sizeof(pi->v));
  	calc_pg_masks(pi);
73a7e693f   Sage Weil   ceph: fix decodin...
475
476
477
478
479
480
481
482
483
484
485
486
  
  	/* num_snaps * snap_info_t */
  	n = le32_to_cpu(pi->v.num_snaps);
  	while (n--) {
  		ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) +
  				 sizeof(struct ceph_timespec), bad);
  		*p += sizeof(u64) +       /* key */
  			1 + sizeof(u64) + /* u8, snapid */
  			sizeof(struct ceph_timespec);
  		m = ceph_decode_32(p);    /* snap name */
  		*p += m;
  	}
efd7576b2   Sage Weil   ceph: fix pg pool...
487
  	*p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
73a7e693f   Sage Weil   ceph: fix decodin...
488
489
490
491
  	return 0;
  
  bad:
  	return -EINVAL;
efd7576b2   Sage Weil   ceph: fix pg pool...
492
  }
2844a76a2   Sage Weil   ceph: decode v5 o...
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
  static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
  {
  	struct ceph_pg_pool_info *pi;
  	u32 num, len, pool;
  
  	ceph_decode_32_safe(p, end, num, bad);
  	dout(" %d pool names
  ", num);
  	while (num--) {
  		ceph_decode_32_safe(p, end, pool, bad);
  		ceph_decode_32_safe(p, end, len, bad);
  		dout("  pool %d len %d
  ", pool, len);
  		pi = __lookup_pg_pool(&map->pg_pools, pool);
  		if (pi) {
  			kfree(pi->name);
  			pi->name = kmalloc(len + 1, GFP_NOFS);
  			if (pi->name) {
  				memcpy(pi->name, *p, len);
  				pi->name[len] = '\0';
  				dout("  name is %s
  ", pi->name);
  			}
  		}
  		*p += len;
  	}
  	return 0;
  
  bad:
  	return -EINVAL;
  }
  
  /*
   * osd map
   */
  void ceph_osdmap_destroy(struct ceph_osdmap *map)
  {
  	dout("osdmap_destroy %p
  ", map);
  	if (map->crush)
  		crush_destroy(map->crush);
  	while (!RB_EMPTY_ROOT(&map->pg_temp)) {
  		struct ceph_pg_mapping *pg =
  			rb_entry(rb_first(&map->pg_temp),
  				 struct ceph_pg_mapping, node);
  		rb_erase(&pg->node, &map->pg_temp);
  		kfree(pg);
  	}
  	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
  		struct ceph_pg_pool_info *pi =
  			rb_entry(rb_first(&map->pg_pools),
  				 struct ceph_pg_pool_info, node);
  		__remove_pg_pool(&map->pg_pools, pi);
  	}
  	kfree(map->osd_state);
  	kfree(map->osd_weight);
  	kfree(map->osd_addr);
  	kfree(map);
  }
  
  /*
   * adjust max osd value.  reallocate arrays.
   */
  static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
  {
  	u8 *state;
  	struct ceph_entity_addr *addr;
  	u32 *weight;
  
  	state = kcalloc(max, sizeof(*state), GFP_NOFS);
  	addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
  	weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
  	if (state == NULL || addr == NULL || weight == NULL) {
  		kfree(state);
  		kfree(addr);
  		kfree(weight);
  		return -ENOMEM;
  	}
  
  	/* copy old? */
  	if (map->osd_state) {
  		memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
  		memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
  		memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
  		kfree(map->osd_state);
  		kfree(map->osd_addr);
  		kfree(map->osd_weight);
  	}
  
  	map->osd_state = state;
  	map->osd_weight = weight;
  	map->osd_addr = addr;
  	map->max_osd = max;
  	return 0;
  }
4fc51be8f   Sage Weil   ceph: use rbtree ...
588
  /*
f24e9980e   Sage Weil   ceph: OSD client
589
590
591
592
593
594
595
   * decode a full map.
   */
  struct ceph_osdmap *osdmap_decode(void **p, void *end)
  {
  	struct ceph_osdmap *map;
  	u16 version;
  	u32 len, max, i;
361be8601   Sage Weil   ceph: precede enc...
596
  	u8 ev;
f24e9980e   Sage Weil   ceph: OSD client
597
598
  	int err = -EINVAL;
  	void *start = *p;
4fc51be8f   Sage Weil   ceph: use rbtree ...
599
  	struct ceph_pg_pool_info *pi;
f24e9980e   Sage Weil   ceph: OSD client
600
601
602
603
604
605
606
607
608
609
  
  	dout("osdmap_decode %p to %p len %d
  ", *p, end, (int)(end - *p));
  
  	map = kzalloc(sizeof(*map), GFP_NOFS);
  	if (map == NULL)
  		return ERR_PTR(-ENOMEM);
  	map->pg_temp = RB_ROOT;
  
  	ceph_decode_16_safe(p, end, version, bad);
02f90c610   Sage Weil   ceph: add uid fie...
610
611
612
613
614
615
  	if (version > CEPH_OSDMAP_VERSION) {
  		pr_warning("got unknown v %d > %d of osdmap
  ", version,
  			   CEPH_OSDMAP_VERSION);
  		goto bad;
  	}
f24e9980e   Sage Weil   ceph: OSD client
616
617
618
  
  	ceph_decode_need(p, end, 2*sizeof(u64)+6*sizeof(u32), bad);
  	ceph_decode_copy(p, &map->fsid, sizeof(map->fsid));
c89136ea4   Sage Weil   ceph: convert enc...
619
  	map->epoch = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
620
621
  	ceph_decode_copy(p, &map->created, sizeof(map->created));
  	ceph_decode_copy(p, &map->modified, sizeof(map->modified));
f24e9980e   Sage Weil   ceph: OSD client
622
623
  	ceph_decode_32_safe(p, end, max, bad);
  	while (max--) {
4fc51be8f   Sage Weil   ceph: use rbtree ...
624
  		ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
2844a76a2   Sage Weil   ceph: decode v5 o...
625
  		pi = kzalloc(sizeof(*pi), GFP_NOFS);
4fc51be8f   Sage Weil   ceph: use rbtree ...
626
  		if (!pi)
f24e9980e   Sage Weil   ceph: OSD client
627
  			goto bad;
4fc51be8f   Sage Weil   ceph: use rbtree ...
628
  		pi->id = ceph_decode_32(p);
361be8601   Sage Weil   ceph: precede enc...
629
  		ev = ceph_decode_8(p); /* encoding version */
02f90c610   Sage Weil   ceph: add uid fie...
630
631
632
633
  		if (ev > CEPH_PG_POOL_VERSION) {
  			pr_warning("got unknown v %d > %d of ceph_pg_pool
  ",
  				   ev, CEPH_PG_POOL_VERSION);
b0bbb0be8   Dan Carpenter   ceph: add kfree()...
634
  			kfree(pi);
02f90c610   Sage Weil   ceph: add uid fie...
635
636
  			goto bad;
  		}
73a7e693f   Sage Weil   ceph: fix decodin...
637
  		err = __decode_pool(p, end, pi);
b0aee3516   Jesper Juhl   ceph: Always free...
638
639
  		if (err < 0) {
  			kfree(pi);
73a7e693f   Sage Weil   ceph: fix decodin...
640
  			goto bad;
b0aee3516   Jesper Juhl   ceph: Always free...
641
  		}
4fc51be8f   Sage Weil   ceph: use rbtree ...
642
  		__insert_pg_pool(&map->pg_pools, pi);
f24e9980e   Sage Weil   ceph: OSD client
643
  	}
2844a76a2   Sage Weil   ceph: decode v5 o...
644
645
646
  
  	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
  		goto bad;
4fc51be8f   Sage Weil   ceph: use rbtree ...
647
  	ceph_decode_32_safe(p, end, map->pool_max, bad);
f24e9980e   Sage Weil   ceph: OSD client
648
649
  
  	ceph_decode_32_safe(p, end, map->flags, bad);
c89136ea4   Sage Weil   ceph: convert enc...
650
  	max = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
  
  	/* (re)alloc osd arrays */
  	err = osdmap_set_max_osd(map, max);
  	if (err < 0)
  		goto bad;
  	dout("osdmap_decode max_osd = %d
  ", map->max_osd);
  
  	/* osds */
  	err = -EINVAL;
  	ceph_decode_need(p, end, 3*sizeof(u32) +
  			 map->max_osd*(1 + sizeof(*map->osd_weight) +
  				       sizeof(*map->osd_addr)), bad);
  	*p += 4; /* skip length field (should match max) */
  	ceph_decode_copy(p, map->osd_state, map->max_osd);
  
  	*p += 4; /* skip length field (should match max) */
  	for (i = 0; i < map->max_osd; i++)
c89136ea4   Sage Weil   ceph: convert enc...
669
  		map->osd_weight[i] = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
670
671
672
  
  	*p += 4; /* skip length field (should match max) */
  	ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr));
63f2d2119   Sage Weil   ceph: use fixed e...
673
674
  	for (i = 0; i < map->max_osd; i++)
  		ceph_decode_addr(&map->osd_addr[i]);
f24e9980e   Sage Weil   ceph: OSD client
675
676
677
678
679
  
  	/* pg_temp */
  	ceph_decode_32_safe(p, end, len, bad);
  	for (i = 0; i < len; i++) {
  		int n, j;
51042122d   Sage Weil   ceph: fix endian ...
680
  		struct ceph_pg pgid;
f24e9980e   Sage Weil   ceph: OSD client
681
682
683
  		struct ceph_pg_mapping *pg;
  
  		ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
51042122d   Sage Weil   ceph: fix endian ...
684
  		ceph_decode_copy(p, &pgid, sizeof(pgid));
c89136ea4   Sage Weil   ceph: convert enc...
685
  		n = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
686
  		ceph_decode_need(p, end, n * sizeof(u32), bad);
30dc6381b   Sage Weil   ceph: fix error p...
687
  		err = -ENOMEM;
f24e9980e   Sage Weil   ceph: OSD client
688
  		pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
30dc6381b   Sage Weil   ceph: fix error p...
689
  		if (!pg)
f24e9980e   Sage Weil   ceph: OSD client
690
  			goto bad;
f24e9980e   Sage Weil   ceph: OSD client
691
692
693
  		pg->pgid = pgid;
  		pg->len = n;
  		for (j = 0; j < n; j++)
c89136ea4   Sage Weil   ceph: convert enc...
694
  			pg->osds[j] = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
695

991abb6ec   Sage Weil   ceph: fail gracef...
696
697
698
  		err = __insert_pg_mapping(pg, &map->pg_temp);
  		if (err)
  			goto bad;
51042122d   Sage Weil   ceph: fix endian ...
699
700
  		dout(" added pg_temp %llx len %d
  ", *(u64 *)&pgid, len);
f24e9980e   Sage Weil   ceph: OSD client
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
  	}
  
  	/* crush */
  	ceph_decode_32_safe(p, end, len, bad);
  	dout("osdmap_decode crush len %d from off 0x%x
  ", len,
  	     (int)(*p - start));
  	ceph_decode_need(p, end, len, bad);
  	map->crush = crush_decode(*p, end);
  	*p += len;
  	if (IS_ERR(map->crush)) {
  		err = PTR_ERR(map->crush);
  		map->crush = NULL;
  		goto bad;
  	}
  
  	/* ignore the rest of the map */
  	*p = end;
  
  	dout("osdmap_decode done %p %p
  ", *p, end);
  	return map;
  
  bad:
  	dout("osdmap_decode fail
  ");
  	ceph_osdmap_destroy(map);
  	return ERR_PTR(err);
  }
  
  /*
   * decode and apply an incremental map update.
   */
  struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
  					     struct ceph_osdmap *map,
  					     struct ceph_messenger *msgr)
  {
f24e9980e   Sage Weil   ceph: OSD client
738
739
740
741
742
  	struct crush_map *newcrush = NULL;
  	struct ceph_fsid fsid;
  	u32 epoch = 0;
  	struct ceph_timespec modified;
  	u32 len, pool;
4fc51be8f   Sage Weil   ceph: use rbtree ...
743
  	__s32 new_pool_max, new_flags, max;
f24e9980e   Sage Weil   ceph: OSD client
744
745
746
  	void *start = *p;
  	int err = -EINVAL;
  	u16 version;
f24e9980e   Sage Weil   ceph: OSD client
747
748
  
  	ceph_decode_16_safe(p, end, version, bad);
02f90c610   Sage Weil   ceph: add uid fie...
749
750
751
752
753
754
  	if (version > CEPH_OSDMAP_INC_VERSION) {
  		pr_warning("got unknown v %d > %d of inc osdmap
  ", version,
  			   CEPH_OSDMAP_INC_VERSION);
  		goto bad;
  	}
f24e9980e   Sage Weil   ceph: OSD client
755
756
757
758
  
  	ceph_decode_need(p, end, sizeof(fsid)+sizeof(modified)+2*sizeof(u32),
  			 bad);
  	ceph_decode_copy(p, &fsid, sizeof(fsid));
c89136ea4   Sage Weil   ceph: convert enc...
759
  	epoch = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
760
761
  	BUG_ON(epoch != map->epoch+1);
  	ceph_decode_copy(p, &modified, sizeof(modified));
4fc51be8f   Sage Weil   ceph: use rbtree ...
762
  	new_pool_max = ceph_decode_32(p);
c89136ea4   Sage Weil   ceph: convert enc...
763
  	new_flags = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
764
765
766
767
768
769
770
  
  	/* full map? */
  	ceph_decode_32_safe(p, end, len, bad);
  	if (len > 0) {
  		dout("apply_incremental full map len %d, %p to %p
  ",
  		     len, *p, end);
30dc6381b   Sage Weil   ceph: fix error p...
771
  		return osdmap_decode(p, min(*p+len, end));
f24e9980e   Sage Weil   ceph: OSD client
772
773
774
775
776
777
778
779
780
781
  	}
  
  	/* new crush? */
  	ceph_decode_32_safe(p, end, len, bad);
  	if (len > 0) {
  		dout("apply_incremental new crush map len %d, %p to %p
  ",
  		     len, *p, end);
  		newcrush = crush_decode(*p, min(*p+len, end));
  		if (IS_ERR(newcrush))
7e34bc524   Julia Lawall   fs/ceph: Use ERR_...
782
  			return ERR_CAST(newcrush);
cebc5be6b   Sage Weil   ceph: fix crush m...
783
  		*p += len;
f24e9980e   Sage Weil   ceph: OSD client
784
785
786
787
788
  	}
  
  	/* new flags? */
  	if (new_flags >= 0)
  		map->flags = new_flags;
4fc51be8f   Sage Weil   ceph: use rbtree ...
789
790
  	if (new_pool_max >= 0)
  		map->pool_max = new_pool_max;
f24e9980e   Sage Weil   ceph: OSD client
791
792
793
794
  
  	ceph_decode_need(p, end, 5*sizeof(u32), bad);
  
  	/* new max? */
c89136ea4   Sage Weil   ceph: convert enc...
795
  	max = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
796
797
798
799
800
801
802
  	if (max >= 0) {
  		err = osdmap_set_max_osd(map, max);
  		if (err < 0)
  			goto bad;
  	}
  
  	map->epoch++;
31456665a   Sage Weil   libceph: fix osdm...
803
  	map->modified = modified;
f24e9980e   Sage Weil   ceph: OSD client
804
805
806
807
808
809
810
811
812
813
  	if (newcrush) {
  		if (map->crush)
  			crush_destroy(map->crush);
  		map->crush = newcrush;
  		newcrush = NULL;
  	}
  
  	/* new_pool */
  	ceph_decode_32_safe(p, end, len, bad);
  	while (len--) {
361be8601   Sage Weil   ceph: precede enc...
814
  		__u8 ev;
4fc51be8f   Sage Weil   ceph: use rbtree ...
815
  		struct ceph_pg_pool_info *pi;
361be8601   Sage Weil   ceph: precede enc...
816

f24e9980e   Sage Weil   ceph: OSD client
817
  		ceph_decode_32_safe(p, end, pool, bad);
4fc51be8f   Sage Weil   ceph: use rbtree ...
818
  		ceph_decode_need(p, end, 1 + sizeof(pi->v), bad);
361be8601   Sage Weil   ceph: precede enc...
819
  		ev = ceph_decode_8(p);  /* encoding version */
02f90c610   Sage Weil   ceph: add uid fie...
820
821
822
823
824
825
  		if (ev > CEPH_PG_POOL_VERSION) {
  			pr_warning("got unknown v %d > %d of ceph_pg_pool
  ",
  				   ev, CEPH_PG_POOL_VERSION);
  			goto bad;
  		}
4fc51be8f   Sage Weil   ceph: use rbtree ...
826
827
  		pi = __lookup_pg_pool(&map->pg_pools, pool);
  		if (!pi) {
2844a76a2   Sage Weil   ceph: decode v5 o...
828
  			pi = kzalloc(sizeof(*pi), GFP_NOFS);
4fc51be8f   Sage Weil   ceph: use rbtree ...
829
830
831
832
833
834
835
  			if (!pi) {
  				err = -ENOMEM;
  				goto bad;
  			}
  			pi->id = pool;
  			__insert_pg_pool(&map->pg_pools, pi);
  		}
73a7e693f   Sage Weil   ceph: fix decodin...
836
837
838
  		err = __decode_pool(p, end, pi);
  		if (err < 0)
  			goto bad;
f24e9980e   Sage Weil   ceph: OSD client
839
  	}
2844a76a2   Sage Weil   ceph: decode v5 o...
840
841
  	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
  		goto bad;
f24e9980e   Sage Weil   ceph: OSD client
842

4fc51be8f   Sage Weil   ceph: use rbtree ...
843
  	/* old_pool */
f24e9980e   Sage Weil   ceph: OSD client
844
  	ceph_decode_32_safe(p, end, len, bad);
4fc51be8f   Sage Weil   ceph: use rbtree ...
845
846
847
848
849
  	while (len--) {
  		struct ceph_pg_pool_info *pi;
  
  		ceph_decode_32_safe(p, end, pool, bad);
  		pi = __lookup_pg_pool(&map->pg_pools, pool);
2844a76a2   Sage Weil   ceph: decode v5 o...
850
851
  		if (pi)
  			__remove_pg_pool(&map->pg_pools, pi);
4fc51be8f   Sage Weil   ceph: use rbtree ...
852
  	}
f24e9980e   Sage Weil   ceph: OSD client
853
854
855
856
857
858
859
860
861
  
  	/* new_up */
  	err = -EINVAL;
  	ceph_decode_32_safe(p, end, len, bad);
  	while (len--) {
  		u32 osd;
  		struct ceph_entity_addr addr;
  		ceph_decode_32_safe(p, end, osd, bad);
  		ceph_decode_copy_safe(p, end, &addr, sizeof(addr), bad);
63f2d2119   Sage Weil   ceph: use fixed e...
862
  		ceph_decode_addr(&addr);
f24e9980e   Sage Weil   ceph: OSD client
863
864
865
866
867
868
  		pr_info("osd%d up
  ", osd);
  		BUG_ON(osd >= map->max_osd);
  		map->osd_state[osd] |= CEPH_OSD_UP;
  		map->osd_addr[osd] = addr;
  	}
7662d8ff5   Sage Weil   libceph: handle n...
869
  	/* new_state */
f24e9980e   Sage Weil   ceph: OSD client
870
871
872
  	ceph_decode_32_safe(p, end, len, bad);
  	while (len--) {
  		u32 osd;
7662d8ff5   Sage Weil   libceph: handle n...
873
  		u8 xorstate;
f24e9980e   Sage Weil   ceph: OSD client
874
  		ceph_decode_32_safe(p, end, osd, bad);
7662d8ff5   Sage Weil   libceph: handle n...
875
  		xorstate = **(u8 **)p;
f24e9980e   Sage Weil   ceph: OSD client
876
  		(*p)++;  /* clean flag */
7662d8ff5   Sage Weil   libceph: handle n...
877
878
879
880
881
  		if (xorstate == 0)
  			xorstate = CEPH_OSD_UP;
  		if (xorstate & CEPH_OSD_UP)
  			pr_info("osd%d down
  ", osd);
f24e9980e   Sage Weil   ceph: OSD client
882
  		if (osd < map->max_osd)
7662d8ff5   Sage Weil   libceph: handle n...
883
  			map->osd_state[osd] ^= xorstate;
f24e9980e   Sage Weil   ceph: OSD client
884
885
886
887
888
889
890
  	}
  
  	/* new_weight */
  	ceph_decode_32_safe(p, end, len, bad);
  	while (len--) {
  		u32 osd, off;
  		ceph_decode_need(p, end, sizeof(u32)*2, bad);
c89136ea4   Sage Weil   ceph: convert enc...
891
892
  		osd = ceph_decode_32(p);
  		off = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
893
894
895
896
897
898
899
900
901
  		pr_info("osd%d weight 0x%x %s
  ", osd, off,
  		     off == CEPH_OSD_IN ? "(in)" :
  		     (off == CEPH_OSD_OUT ? "(out)" : ""));
  		if (osd < map->max_osd)
  			map->osd_weight[osd] = off;
  	}
  
  	/* new_pg_temp */
f24e9980e   Sage Weil   ceph: OSD client
902
903
904
905
  	ceph_decode_32_safe(p, end, len, bad);
  	while (len--) {
  		struct ceph_pg_mapping *pg;
  		int j;
51042122d   Sage Weil   ceph: fix endian ...
906
  		struct ceph_pg pgid;
f24e9980e   Sage Weil   ceph: OSD client
907
908
  		u32 pglen;
  		ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
51042122d   Sage Weil   ceph: fix endian ...
909
  		ceph_decode_copy(p, &pgid, sizeof(pgid));
c89136ea4   Sage Weil   ceph: convert enc...
910
  		pglen = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
911

f24e9980e   Sage Weil   ceph: OSD client
912
913
914
915
916
917
918
919
920
921
  		if (pglen) {
  			/* insert */
  			ceph_decode_need(p, end, pglen*sizeof(u32), bad);
  			pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
  			if (!pg) {
  				err = -ENOMEM;
  				goto bad;
  			}
  			pg->pgid = pgid;
  			pg->len = pglen;
7067f797b   Sage Weil   ceph: fix increme...
922
  			for (j = 0; j < pglen; j++)
c89136ea4   Sage Weil   ceph: convert enc...
923
  				pg->osds[j] = ceph_decode_32(p);
991abb6ec   Sage Weil   ceph: fail gracef...
924
  			err = __insert_pg_mapping(pg, &map->pg_temp);
bc4fdca85   Sage Weil   ceph: fix pg_mapp...
925
926
  			if (err) {
  				kfree(pg);
991abb6ec   Sage Weil   ceph: fail gracef...
927
  				goto bad;
bc4fdca85   Sage Weil   ceph: fix pg_mapp...
928
  			}
51042122d   Sage Weil   ceph: fix endian ...
929
930
931
  			dout(" added pg_temp %llx len %d
  ", *(u64 *)&pgid,
  			     pglen);
8adc8b3d7   Sage Weil   libceph: fix pg_t...
932
933
934
  		} else {
  			/* remove */
  			__remove_pg_mapping(&map->pg_temp, pgid);
f24e9980e   Sage Weil   ceph: OSD client
935
936
  		}
  	}
f24e9980e   Sage Weil   ceph: OSD client
937
938
939
940
941
942
943
944
945
  
  	/* ignore the rest */
  	*p = end;
  	return map;
  
  bad:
  	pr_err("corrupt inc osdmap epoch %d off %d (%p of %p-%p)
  ",
  	       epoch, (int)(*p - start), *p, start, end);
9ec7cab14   Sage Weil   ceph: hex dump co...
946
947
948
  	print_hex_dump(KERN_DEBUG, "osdmap: ",
  		       DUMP_PREFIX_OFFSET, 16, 1,
  		       start, end - start, true);
f24e9980e   Sage Weil   ceph: OSD client
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
  	if (newcrush)
  		crush_destroy(newcrush);
  	return ERR_PTR(err);
  }
  
  
  
  
  /*
   * calculate file layout from given offset, length.
   * fill in correct oid, logical length, and object extent
   * offset, length.
   *
   * for now, we write only a single su, until we can
   * pass a stride back to the caller.
   */
  void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
  				   u64 off, u64 *plen,
645a10258   Sage Weil   ceph: fix object ...
967
  				   u64 *ono,
f24e9980e   Sage Weil   ceph: OSD client
968
969
970
971
972
973
974
  				   u64 *oxoff, u64 *oxlen)
  {
  	u32 osize = le32_to_cpu(layout->fl_object_size);
  	u32 su = le32_to_cpu(layout->fl_stripe_unit);
  	u32 sc = le32_to_cpu(layout->fl_stripe_count);
  	u32 bl, stripeno, stripepos, objsetno;
  	u32 su_per_object;
ff1d1f717   Noah Watkins   ceph: fix intra s...
975
  	u64 t, su_offset;
f24e9980e   Sage Weil   ceph: OSD client
976
977
978
979
  
  	dout("mapping %llu~%llu  osize %u fl_su %u
  ", off, *plen,
  	     osize, su);
35e054a66   Noah Watkins   ceph: remove redu...
980
  	su_per_object = osize / su;
f24e9980e   Sage Weil   ceph: OSD client
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
  	dout("osize %u / su %u = su_per_object %u
  ", osize, su,
  	     su_per_object);
  
  	BUG_ON((su & ~PAGE_MASK) != 0);
  	/* bl = *off / su; */
  	t = off;
  	do_div(t, su);
  	bl = t;
  	dout("off %llu / su %u = bl %u
  ", off, su, bl);
  
  	stripeno = bl / sc;
  	stripepos = bl % sc;
  	objsetno = stripeno / su_per_object;
645a10258   Sage Weil   ceph: fix object ...
996
997
998
999
1000
  	*ono = objsetno * sc + stripepos;
  	dout("objset %u * sc %u = ono %u
  ", objsetno, sc, (unsigned)*ono);
  
  	/* *oxoff = *off % layout->fl_stripe_unit;  # offset in su */
f24e9980e   Sage Weil   ceph: OSD client
1001
  	t = off;
ff1d1f717   Noah Watkins   ceph: fix intra s...
1002
1003
1004
1005
1006
1007
1008
1009
1010
  	su_offset = do_div(t, su);
  	*oxoff = su_offset + (stripeno % su_per_object) * su;
  
  	/*
  	 * Calculate the length of the extent being written to the selected
  	 * object. This is the minimum of the full length requested (plen) or
  	 * the remainder of the current stripe being written to.
  	 */
  	*oxlen = min_t(u64, *plen, su - su_offset);
f24e9980e   Sage Weil   ceph: OSD client
1011
1012
1013
1014
1015
  	*plen = *oxlen;
  
  	dout(" obj extent %llu~%llu
  ", *oxoff, *oxlen);
  }
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
1016
  EXPORT_SYMBOL(ceph_calc_file_object_mapping);
f24e9980e   Sage Weil   ceph: OSD client
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
  
  /*
   * calculate an object layout (i.e. pgid) from an oid,
   * file_layout, and osdmap
   */
  int ceph_calc_object_layout(struct ceph_object_layout *ol,
  			    const char *oid,
  			    struct ceph_file_layout *fl,
  			    struct ceph_osdmap *osdmap)
  {
  	unsigned num, num_mask;
51042122d   Sage Weil   ceph: fix endian ...
1028
  	struct ceph_pg pgid;
f24e9980e   Sage Weil   ceph: OSD client
1029
1030
1031
  	s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred);
  	int poolid = le32_to_cpu(fl->fl_pg_pool);
  	struct ceph_pg_pool_info *pool;
51042122d   Sage Weil   ceph: fix endian ...
1032
  	unsigned ps;
f24e9980e   Sage Weil   ceph: OSD client
1033

30dc6381b   Sage Weil   ceph: fix error p...
1034
  	BUG_ON(!osdmap);
f24e9980e   Sage Weil   ceph: OSD client
1035

4fc51be8f   Sage Weil   ceph: use rbtree ...
1036
1037
1038
  	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
  	if (!pool)
  		return -EIO;
1654dd0cf   Sage Weil   ceph: make object...
1039
  	ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
f24e9980e   Sage Weil   ceph: OSD client
1040
  	if (preferred >= 0) {
51042122d   Sage Weil   ceph: fix endian ...
1041
  		ps += preferred;
f24e9980e   Sage Weil   ceph: OSD client
1042
1043
1044
1045
1046
1047
  		num = le32_to_cpu(pool->v.lpg_num);
  		num_mask = pool->lpg_num_mask;
  	} else {
  		num = le32_to_cpu(pool->v.pg_num);
  		num_mask = pool->pg_num_mask;
  	}
51042122d   Sage Weil   ceph: fix endian ...
1048
1049
1050
  	pgid.ps = cpu_to_le16(ps);
  	pgid.preferred = cpu_to_le16(preferred);
  	pgid.pool = fl->fl_pg_pool;
f24e9980e   Sage Weil   ceph: OSD client
1051
  	if (preferred >= 0)
51042122d   Sage Weil   ceph: fix endian ...
1052
1053
1054
  		dout("calc_object_layout '%s' pgid %d.%xp%d
  ", oid, poolid, ps,
  		     (int)preferred);
f24e9980e   Sage Weil   ceph: OSD client
1055
  	else
51042122d   Sage Weil   ceph: fix endian ...
1056
1057
  		dout("calc_object_layout '%s' pgid %d.%x
  ", oid, poolid, ps);
f24e9980e   Sage Weil   ceph: OSD client
1058

51042122d   Sage Weil   ceph: fix endian ...
1059
  	ol->ol_pgid = pgid;
f24e9980e   Sage Weil   ceph: OSD client
1060
  	ol->ol_stripe_unit = fl->fl_object_stripe_unit;
f24e9980e   Sage Weil   ceph: OSD client
1061
1062
  	return 0;
  }
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
1063
  EXPORT_SYMBOL(ceph_calc_object_layout);
f24e9980e   Sage Weil   ceph: OSD client
1064
1065
1066
1067
1068
  
  /*
   * Calculate raw osd vector for the given pgid.  Return pointer to osd
   * array, or NULL on failure.
   */
51042122d   Sage Weil   ceph: fix endian ...
1069
  static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
f24e9980e   Sage Weil   ceph: OSD client
1070
1071
  			int *osds, int *num)
  {
f24e9980e   Sage Weil   ceph: OSD client
1072
1073
1074
  	struct ceph_pg_mapping *pg;
  	struct ceph_pg_pool_info *pool;
  	int ruleno;
782e182e9   Sage Weil   libceph: fix pg_t...
1075
  	unsigned poolid, ps, pps, t;
51042122d   Sage Weil   ceph: fix endian ...
1076
  	int preferred;
f24e9980e   Sage Weil   ceph: OSD client
1077

782e182e9   Sage Weil   libceph: fix pg_t...
1078
1079
1080
1081
1082
1083
1084
  	poolid = le32_to_cpu(pgid.pool);
  	ps = le16_to_cpu(pgid.ps);
  	preferred = (s16)le16_to_cpu(pgid.preferred);
  
  	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
  	if (!pool)
  		return NULL;
f24e9980e   Sage Weil   ceph: OSD client
1085
  	/* pg_temp? */
782e182e9   Sage Weil   libceph: fix pg_t...
1086
1087
1088
1089
1090
1091
1092
  	if (preferred >= 0)
  		t = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpg_num),
  				    pool->lpgp_num_mask);
  	else
  		t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
  				    pool->pgp_num_mask);
  	pgid.ps = cpu_to_le16(t);
9794b146f   Sage Weil   ceph: fix memory ...
1093
1094
1095
1096
  	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
  	if (pg) {
  		*num = pg->len;
  		return pg->osds;
f24e9980e   Sage Weil   ceph: OSD client
1097
1098
1099
  	}
  
  	/* crush */
f24e9980e   Sage Weil   ceph: OSD client
1100
1101
1102
  	ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
  				 pool->v.type, pool->v.size);
  	if (ruleno < 0) {
effcb9ed4   Sage Weil   ceph: print usefu...
1103
1104
1105
1106
  		pr_err("no crush rule pool %d ruleset %d type %d size %d
  ",
  		       poolid, pool->v.crush_ruleset, pool->v.type,
  		       pool->v.size);
f24e9980e   Sage Weil   ceph: OSD client
1107
1108
  		return NULL;
  	}
782e182e9   Sage Weil   libceph: fix pg_t...
1109
1110
1111
1112
  	/* don't forcefeed bad device ids to crush */
  	if (preferred >= osdmap->max_osd ||
  	    preferred >= osdmap->crush->max_devices)
  		preferred = -1;
51042122d   Sage Weil   ceph: fix endian ...
1113
1114
  	if (preferred >= 0)
  		pps = ceph_stable_mod(ps,
f24e9980e   Sage Weil   ceph: OSD client
1115
1116
1117
  				      le32_to_cpu(pool->v.lpgp_num),
  				      pool->lpgp_num_mask);
  	else
51042122d   Sage Weil   ceph: fix endian ...
1118
  		pps = ceph_stable_mod(ps,
f24e9980e   Sage Weil   ceph: OSD client
1119
1120
  				      le32_to_cpu(pool->v.pgp_num),
  				      pool->pgp_num_mask);
51042122d   Sage Weil   ceph: fix endian ...
1121
  	pps += poolid;
f24e9980e   Sage Weil   ceph: OSD client
1122
1123
  	*num = crush_do_rule(osdmap->crush, ruleno, pps, osds,
  			     min_t(int, pool->v.size, *num),
51042122d   Sage Weil   ceph: fix endian ...
1124
  			     preferred, osdmap->osd_weight);
f24e9980e   Sage Weil   ceph: OSD client
1125
1126
1127
1128
  	return osds;
  }
  
  /*
d85b70566   Sage Weil   ceph: resubmit re...
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
   * Return acting set for given pgid.
   */
  int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
  			int *acting)
  {
  	int rawosds[CEPH_PG_MAX_SIZE], *osds;
  	int i, o, num = CEPH_PG_MAX_SIZE;
  
  	osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
  	if (!osds)
  		return -1;
  
  	/* primary is first up osd */
  	o = 0;
  	for (i = 0; i < num; i++)
  		if (ceph_osd_is_up(osdmap, osds[i]))
  			acting[o++] = osds[i];
  	return o;
  }
  
  /*
f24e9980e   Sage Weil   ceph: OSD client
1150
1151
   * Return primary osd for given pgid, or -1 if none.
   */
51042122d   Sage Weil   ceph: fix endian ...
1152
  int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
f24e9980e   Sage Weil   ceph: OSD client
1153
  {
d85b70566   Sage Weil   ceph: resubmit re...
1154
1155
  	int rawosds[CEPH_PG_MAX_SIZE], *osds;
  	int i, num = CEPH_PG_MAX_SIZE;
f24e9980e   Sage Weil   ceph: OSD client
1156
1157
1158
1159
1160
1161
1162
  
  	osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
  	if (!osds)
  		return -1;
  
  	/* primary is first up osd */
  	for (i = 0; i < num; i++)
d85b70566   Sage Weil   ceph: resubmit re...
1163
  		if (ceph_osd_is_up(osdmap, osds[i]))
f24e9980e   Sage Weil   ceph: OSD client
1164
  			return osds[i];
f24e9980e   Sage Weil   ceph: OSD client
1165
1166
  	return -1;
  }
3d14c5d2b   Yehuda Sadeh   ceph: factor out ...
1167
  EXPORT_SYMBOL(ceph_calc_pg_primary);