Blame view

fs/ceph/osdmap.c 25.5 KB
f24e9980e   Sage Weil   ceph: OSD client
1

5a0e3ad6a   Tejun Heo   include cleanup: ...
2
3
4
  #include "ceph_debug.h"
  
  #include <linux/slab.h>
f24e9980e   Sage Weil   ceph: OSD client
5
6
7
8
9
10
11
  #include <asm/div64.h>
  
  #include "super.h"
  #include "osdmap.h"
  #include "crush/hash.h"
  #include "crush/mapper.h"
  #include "decode.h"
f24e9980e   Sage Weil   ceph: OSD client
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
  
  char *ceph_osdmap_state_str(char *str, int len, int state)
  {
  	int flag = 0;
  
  	if (!len)
  		goto done;
  
  	*str = '\0';
  	if (state) {
  		if (state & CEPH_OSD_EXISTS) {
  			snprintf(str, len, "exists");
  			flag = 1;
  		}
  		if (state & CEPH_OSD_UP) {
  			snprintf(str, len, "%s%s%s", str, (flag ? ", " : ""),
  				 "up");
  			flag = 1;
  		}
  	} else {
  		snprintf(str, len, "doesn't exist");
  	}
  done:
  	return str;
  }
  
  /* maps */
  
  static int calc_bits_of(unsigned t)
  {
  	int b = 0;
  	while (t) {
  		t = t >> 1;
  		b++;
  	}
  	return b;
  }
  
  /*
   * the foo_mask is the smallest value 2^n-1 that is >= foo.
   */
  static void calc_pg_masks(struct ceph_pg_pool_info *pi)
  {
  	pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1;
  	pi->pgp_num_mask =
  		(1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1;
  	pi->lpg_num_mask =
  		(1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1;
  	pi->lpgp_num_mask =
  		(1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1;
  }
  
  /*
   * decode crush map
   */
  static int crush_decode_uniform_bucket(void **p, void *end,
  				       struct crush_bucket_uniform *b)
  {
  	dout("crush_decode_uniform_bucket %p to %p
  ", *p, end);
  	ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
c89136ea4   Sage Weil   ceph: convert enc...
73
  	b->item_weight = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
  	return 0;
  bad:
  	return -EINVAL;
  }
  
  static int crush_decode_list_bucket(void **p, void *end,
  				    struct crush_bucket_list *b)
  {
  	int j;
  	dout("crush_decode_list_bucket %p to %p
  ", *p, end);
  	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
  	if (b->item_weights == NULL)
  		return -ENOMEM;
  	b->sum_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
  	if (b->sum_weights == NULL)
  		return -ENOMEM;
  	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
  	for (j = 0; j < b->h.size; j++) {
c89136ea4   Sage Weil   ceph: convert enc...
93
94
  		b->item_weights[j] = ceph_decode_32(p);
  		b->sum_weights[j] = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
  	}
  	return 0;
  bad:
  	return -EINVAL;
  }
  
  static int crush_decode_tree_bucket(void **p, void *end,
  				    struct crush_bucket_tree *b)
  {
  	int j;
  	dout("crush_decode_tree_bucket %p to %p
  ", *p, end);
  	ceph_decode_32_safe(p, end, b->num_nodes, bad);
  	b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
  	if (b->node_weights == NULL)
  		return -ENOMEM;
  	ceph_decode_need(p, end, b->num_nodes * sizeof(u32), bad);
  	for (j = 0; j < b->num_nodes; j++)
c89136ea4   Sage Weil   ceph: convert enc...
113
  		b->node_weights[j] = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
  	return 0;
  bad:
  	return -EINVAL;
  }
  
  static int crush_decode_straw_bucket(void **p, void *end,
  				     struct crush_bucket_straw *b)
  {
  	int j;
  	dout("crush_decode_straw_bucket %p to %p
  ", *p, end);
  	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
  	if (b->item_weights == NULL)
  		return -ENOMEM;
  	b->straws = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
  	if (b->straws == NULL)
  		return -ENOMEM;
  	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
  	for (j = 0; j < b->h.size; j++) {
c89136ea4   Sage Weil   ceph: convert enc...
133
134
  		b->item_weights[j] = ceph_decode_32(p);
  		b->straws[j] = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
  	}
  	return 0;
  bad:
  	return -EINVAL;
  }
  
  static struct crush_map *crush_decode(void *pbyval, void *end)
  {
  	struct crush_map *c;
  	int err = -EINVAL;
  	int i, j;
  	void **p = &pbyval;
  	void *start = pbyval;
  	u32 magic;
  
  	dout("crush_decode %p to %p len %d
  ", *p, end, (int)(end - *p));
  
  	c = kzalloc(sizeof(*c), GFP_NOFS);
  	if (c == NULL)
  		return ERR_PTR(-ENOMEM);
  
  	ceph_decode_need(p, end, 4*sizeof(u32), bad);
c89136ea4   Sage Weil   ceph: convert enc...
158
  	magic = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
159
160
161
162
163
164
  	if (magic != CRUSH_MAGIC) {
  		pr_err("crush_decode magic %x != current %x
  ",
  		       (unsigned)magic, (unsigned)CRUSH_MAGIC);
  		goto bad;
  	}
c89136ea4   Sage Weil   ceph: convert enc...
165
166
167
  	c->max_buckets = ceph_decode_32(p);
  	c->max_rules = ceph_decode_32(p);
  	c->max_devices = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
  
  	c->device_parents = kcalloc(c->max_devices, sizeof(u32), GFP_NOFS);
  	if (c->device_parents == NULL)
  		goto badmem;
  	c->bucket_parents = kcalloc(c->max_buckets, sizeof(u32), GFP_NOFS);
  	if (c->bucket_parents == NULL)
  		goto badmem;
  
  	c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS);
  	if (c->buckets == NULL)
  		goto badmem;
  	c->rules = kcalloc(c->max_rules, sizeof(*c->rules), GFP_NOFS);
  	if (c->rules == NULL)
  		goto badmem;
  
  	/* buckets */
  	for (i = 0; i < c->max_buckets; i++) {
  		int size = 0;
  		u32 alg;
  		struct crush_bucket *b;
  
  		ceph_decode_32_safe(p, end, alg, bad);
  		if (alg == 0) {
  			c->buckets[i] = NULL;
  			continue;
  		}
  		dout("crush_decode bucket %d off %x %p to %p
  ",
  		     i, (int)(*p-start), *p, end);
  
  		switch (alg) {
  		case CRUSH_BUCKET_UNIFORM:
  			size = sizeof(struct crush_bucket_uniform);
  			break;
  		case CRUSH_BUCKET_LIST:
  			size = sizeof(struct crush_bucket_list);
  			break;
  		case CRUSH_BUCKET_TREE:
  			size = sizeof(struct crush_bucket_tree);
  			break;
  		case CRUSH_BUCKET_STRAW:
  			size = sizeof(struct crush_bucket_straw);
  			break;
  		default:
30dc6381b   Sage Weil   ceph: fix error p...
212
  			err = -EINVAL;
f24e9980e   Sage Weil   ceph: OSD client
213
214
215
216
217
218
219
220
  			goto bad;
  		}
  		BUG_ON(size == 0);
  		b = c->buckets[i] = kzalloc(size, GFP_NOFS);
  		if (b == NULL)
  			goto badmem;
  
  		ceph_decode_need(p, end, 4*sizeof(u32), bad);
c89136ea4   Sage Weil   ceph: convert enc...
221
222
  		b->id = ceph_decode_32(p);
  		b->type = ceph_decode_16(p);
fb690390e   Sage Weil   ceph: make CRUSH ...
223
224
  		b->alg = ceph_decode_8(p);
  		b->hash = ceph_decode_8(p);
c89136ea4   Sage Weil   ceph: convert enc...
225
226
  		b->weight = ceph_decode_32(p);
  		b->size = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
  
  		dout("crush_decode bucket size %d off %x %p to %p
  ",
  		     b->size, (int)(*p-start), *p, end);
  
  		b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
  		if (b->items == NULL)
  			goto badmem;
  		b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS);
  		if (b->perm == NULL)
  			goto badmem;
  		b->perm_n = 0;
  
  		ceph_decode_need(p, end, b->size*sizeof(u32), bad);
  		for (j = 0; j < b->size; j++)
c89136ea4   Sage Weil   ceph: convert enc...
242
  			b->items[j] = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
  
  		switch (b->alg) {
  		case CRUSH_BUCKET_UNIFORM:
  			err = crush_decode_uniform_bucket(p, end,
  				  (struct crush_bucket_uniform *)b);
  			if (err < 0)
  				goto bad;
  			break;
  		case CRUSH_BUCKET_LIST:
  			err = crush_decode_list_bucket(p, end,
  			       (struct crush_bucket_list *)b);
  			if (err < 0)
  				goto bad;
  			break;
  		case CRUSH_BUCKET_TREE:
  			err = crush_decode_tree_bucket(p, end,
  				(struct crush_bucket_tree *)b);
  			if (err < 0)
  				goto bad;
  			break;
  		case CRUSH_BUCKET_STRAW:
  			err = crush_decode_straw_bucket(p, end,
  				(struct crush_bucket_straw *)b);
  			if (err < 0)
  				goto bad;
  			break;
  		}
  	}
  
  	/* rules */
  	dout("rule vec is %p
  ", c->rules);
  	for (i = 0; i < c->max_rules; i++) {
  		u32 yes;
  		struct crush_rule *r;
  
  		ceph_decode_32_safe(p, end, yes, bad);
  		if (!yes) {
  			dout("crush_decode NO rule %d off %x %p to %p
  ",
  			     i, (int)(*p-start), *p, end);
  			c->rules[i] = NULL;
  			continue;
  		}
  
  		dout("crush_decode rule %d off %x %p to %p
  ",
  		     i, (int)(*p-start), *p, end);
  
  		/* len */
  		ceph_decode_32_safe(p, end, yes, bad);
  #if BITS_PER_LONG == 32
30dc6381b   Sage Weil   ceph: fix error p...
295
  		err = -EINVAL;
f24e9980e   Sage Weil   ceph: OSD client
296
297
298
299
300
301
302
303
304
305
306
307
308
309
  		if (yes > ULONG_MAX / sizeof(struct crush_rule_step))
  			goto bad;
  #endif
  		r = c->rules[i] = kmalloc(sizeof(*r) +
  					  yes*sizeof(struct crush_rule_step),
  					  GFP_NOFS);
  		if (r == NULL)
  			goto badmem;
  		dout(" rule %d is at %p
  ", i, r);
  		r->len = yes;
  		ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
  		ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
  		for (j = 0; j < r->len; j++) {
c89136ea4   Sage Weil   ceph: convert enc...
310
311
312
  			r->steps[j].op = ceph_decode_32(p);
  			r->steps[j].arg1 = ceph_decode_32(p);
  			r->steps[j].arg2 = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
  		}
  	}
  
  	/* ignore trailing name maps. */
  
  	dout("crush_decode success
  ");
  	return c;
  
  badmem:
  	err = -ENOMEM;
  bad:
  	dout("crush_decode fail %d
  ", err);
  	crush_destroy(c);
  	return ERR_PTR(err);
  }
f24e9980e   Sage Weil   ceph: OSD client
330
  /*
9794b146f   Sage Weil   ceph: fix memory ...
331
332
   * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
   * to a set of osds)
f24e9980e   Sage Weil   ceph: OSD client
333
   */
51042122d   Sage Weil   ceph: fix endian ...
334
335
336
337
338
339
340
341
342
343
344
  static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
  {
  	u64 a = *(u64 *)&l;
  	u64 b = *(u64 *)&r;
  
  	if (a < b)
  		return -1;
  	if (a > b)
  		return 1;
  	return 0;
  }
991abb6ec   Sage Weil   ceph: fail gracef...
345
346
  static int __insert_pg_mapping(struct ceph_pg_mapping *new,
  			       struct rb_root *root)
f24e9980e   Sage Weil   ceph: OSD client
347
348
349
350
  {
  	struct rb_node **p = &root->rb_node;
  	struct rb_node *parent = NULL;
  	struct ceph_pg_mapping *pg = NULL;
51042122d   Sage Weil   ceph: fix endian ...
351
  	int c;
f24e9980e   Sage Weil   ceph: OSD client
352
353
354
355
  
  	while (*p) {
  		parent = *p;
  		pg = rb_entry(parent, struct ceph_pg_mapping, node);
51042122d   Sage Weil   ceph: fix endian ...
356
357
  		c = pgid_cmp(new->pgid, pg->pgid);
  		if (c < 0)
f24e9980e   Sage Weil   ceph: OSD client
358
  			p = &(*p)->rb_left;
51042122d   Sage Weil   ceph: fix endian ...
359
  		else if (c > 0)
f24e9980e   Sage Weil   ceph: OSD client
360
361
  			p = &(*p)->rb_right;
  		else
991abb6ec   Sage Weil   ceph: fail gracef...
362
  			return -EEXIST;
f24e9980e   Sage Weil   ceph: OSD client
363
364
365
366
  	}
  
  	rb_link_node(&new->node, parent, p);
  	rb_insert_color(&new->node, root);
991abb6ec   Sage Weil   ceph: fail gracef...
367
  	return 0;
f24e9980e   Sage Weil   ceph: OSD client
368
  }
9794b146f   Sage Weil   ceph: fix memory ...
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
  static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
  						   struct ceph_pg pgid)
  {
  	struct rb_node *n = root->rb_node;
  	struct ceph_pg_mapping *pg;
  	int c;
  
  	while (n) {
  		pg = rb_entry(n, struct ceph_pg_mapping, node);
  		c = pgid_cmp(pgid, pg->pgid);
  		if (c < 0)
  			n = n->rb_left;
  		else if (c > 0)
  			n = n->rb_right;
  		else
  			return pg;
  	}
  	return NULL;
  }
f24e9980e   Sage Weil   ceph: OSD client
388
  /*
4fc51be8f   Sage Weil   ceph: use rbtree ...
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
   * rbtree of pg pool info
   */
  static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
  {
  	struct rb_node **p = &root->rb_node;
  	struct rb_node *parent = NULL;
  	struct ceph_pg_pool_info *pi = NULL;
  
  	while (*p) {
  		parent = *p;
  		pi = rb_entry(parent, struct ceph_pg_pool_info, node);
  		if (new->id < pi->id)
  			p = &(*p)->rb_left;
  		else if (new->id > pi->id)
  			p = &(*p)->rb_right;
  		else
  			return -EEXIST;
  	}
  
  	rb_link_node(&new->node, parent, p);
  	rb_insert_color(&new->node, root);
  	return 0;
  }
  
  static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
  {
  	struct ceph_pg_pool_info *pi;
  	struct rb_node *n = root->rb_node;
  
  	while (n) {
  		pi = rb_entry(n, struct ceph_pg_pool_info, node);
  		if (id < pi->id)
  			n = n->rb_left;
  		else if (id > pi->id)
  			n = n->rb_right;
  		else
  			return pi;
  	}
  	return NULL;
  }
2844a76a2   Sage Weil   ceph: decode v5 o...
429
430
431
432
433
434
  static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
  {
  	rb_erase(&pi->node, root);
  	kfree(pi->name);
  	kfree(pi);
  }
efd7576b2   Sage Weil   ceph: fix pg pool...
435
436
437
438
439
440
441
  void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
  {
  	ceph_decode_copy(p, &pi->v, sizeof(pi->v));
  	calc_pg_masks(pi);
  	*p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64);
  	*p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
  }
2844a76a2   Sage Weil   ceph: decode v5 o...
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
  static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
  {
  	struct ceph_pg_pool_info *pi;
  	u32 num, len, pool;
  
  	ceph_decode_32_safe(p, end, num, bad);
  	dout(" %d pool names
  ", num);
  	while (num--) {
  		ceph_decode_32_safe(p, end, pool, bad);
  		ceph_decode_32_safe(p, end, len, bad);
  		dout("  pool %d len %d
  ", pool, len);
  		pi = __lookup_pg_pool(&map->pg_pools, pool);
  		if (pi) {
  			kfree(pi->name);
  			pi->name = kmalloc(len + 1, GFP_NOFS);
  			if (pi->name) {
  				memcpy(pi->name, *p, len);
  				pi->name[len] = '\0';
  				dout("  name is %s
  ", pi->name);
  			}
  		}
  		*p += len;
  	}
  	return 0;
  
  bad:
  	return -EINVAL;
  }
  
  /*
   * osd map
   */
  void ceph_osdmap_destroy(struct ceph_osdmap *map)
  {
  	dout("osdmap_destroy %p
  ", map);
  	if (map->crush)
  		crush_destroy(map->crush);
  	while (!RB_EMPTY_ROOT(&map->pg_temp)) {
  		struct ceph_pg_mapping *pg =
  			rb_entry(rb_first(&map->pg_temp),
  				 struct ceph_pg_mapping, node);
  		rb_erase(&pg->node, &map->pg_temp);
  		kfree(pg);
  	}
  	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
  		struct ceph_pg_pool_info *pi =
  			rb_entry(rb_first(&map->pg_pools),
  				 struct ceph_pg_pool_info, node);
  		__remove_pg_pool(&map->pg_pools, pi);
  	}
  	kfree(map->osd_state);
  	kfree(map->osd_weight);
  	kfree(map->osd_addr);
  	kfree(map);
  }
  
  /*
   * adjust max osd value.  reallocate arrays.
   */
  static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
  {
  	u8 *state;
  	struct ceph_entity_addr *addr;
  	u32 *weight;
  
  	state = kcalloc(max, sizeof(*state), GFP_NOFS);
  	addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
  	weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
  	if (state == NULL || addr == NULL || weight == NULL) {
  		kfree(state);
  		kfree(addr);
  		kfree(weight);
  		return -ENOMEM;
  	}
  
  	/* copy old? */
  	if (map->osd_state) {
  		memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
  		memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
  		memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
  		kfree(map->osd_state);
  		kfree(map->osd_addr);
  		kfree(map->osd_weight);
  	}
  
  	map->osd_state = state;
  	map->osd_weight = weight;
  	map->osd_addr = addr;
  	map->max_osd = max;
  	return 0;
  }
4fc51be8f   Sage Weil   ceph: use rbtree ...
537
  /*
f24e9980e   Sage Weil   ceph: OSD client
538
539
540
541
542
543
544
   * decode a full map.
   */
  struct ceph_osdmap *osdmap_decode(void **p, void *end)
  {
  	struct ceph_osdmap *map;
  	u16 version;
  	u32 len, max, i;
361be8601   Sage Weil   ceph: precede enc...
545
  	u8 ev;
f24e9980e   Sage Weil   ceph: OSD client
546
547
  	int err = -EINVAL;
  	void *start = *p;
4fc51be8f   Sage Weil   ceph: use rbtree ...
548
  	struct ceph_pg_pool_info *pi;
f24e9980e   Sage Weil   ceph: OSD client
549
550
551
552
553
554
555
556
557
558
  
  	dout("osdmap_decode %p to %p len %d
  ", *p, end, (int)(end - *p));
  
  	map = kzalloc(sizeof(*map), GFP_NOFS);
  	if (map == NULL)
  		return ERR_PTR(-ENOMEM);
  	map->pg_temp = RB_ROOT;
  
  	ceph_decode_16_safe(p, end, version, bad);
02f90c610   Sage Weil   ceph: add uid fie...
559
560
561
562
563
564
  	if (version > CEPH_OSDMAP_VERSION) {
  		pr_warning("got unknown v %d > %d of osdmap
  ", version,
  			   CEPH_OSDMAP_VERSION);
  		goto bad;
  	}
f24e9980e   Sage Weil   ceph: OSD client
565
566
567
  
  	ceph_decode_need(p, end, 2*sizeof(u64)+6*sizeof(u32), bad);
  	ceph_decode_copy(p, &map->fsid, sizeof(map->fsid));
c89136ea4   Sage Weil   ceph: convert enc...
568
  	map->epoch = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
569
570
  	ceph_decode_copy(p, &map->created, sizeof(map->created));
  	ceph_decode_copy(p, &map->modified, sizeof(map->modified));
f24e9980e   Sage Weil   ceph: OSD client
571
572
  	ceph_decode_32_safe(p, end, max, bad);
  	while (max--) {
4fc51be8f   Sage Weil   ceph: use rbtree ...
573
  		ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
2844a76a2   Sage Weil   ceph: decode v5 o...
574
  		pi = kzalloc(sizeof(*pi), GFP_NOFS);
4fc51be8f   Sage Weil   ceph: use rbtree ...
575
  		if (!pi)
f24e9980e   Sage Weil   ceph: OSD client
576
  			goto bad;
4fc51be8f   Sage Weil   ceph: use rbtree ...
577
  		pi->id = ceph_decode_32(p);
361be8601   Sage Weil   ceph: precede enc...
578
  		ev = ceph_decode_8(p); /* encoding version */
02f90c610   Sage Weil   ceph: add uid fie...
579
580
581
582
  		if (ev > CEPH_PG_POOL_VERSION) {
  			pr_warning("got unknown v %d > %d of ceph_pg_pool
  ",
  				   ev, CEPH_PG_POOL_VERSION);
b0bbb0be8   Dan Carpenter   ceph: add kfree()...
583
  			kfree(pi);
02f90c610   Sage Weil   ceph: add uid fie...
584
585
  			goto bad;
  		}
efd7576b2   Sage Weil   ceph: fix pg pool...
586
  		__decode_pool(p, pi);
4fc51be8f   Sage Weil   ceph: use rbtree ...
587
  		__insert_pg_pool(&map->pg_pools, pi);
f24e9980e   Sage Weil   ceph: OSD client
588
  	}
2844a76a2   Sage Weil   ceph: decode v5 o...
589
590
591
  
  	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
  		goto bad;
4fc51be8f   Sage Weil   ceph: use rbtree ...
592
  	ceph_decode_32_safe(p, end, map->pool_max, bad);
f24e9980e   Sage Weil   ceph: OSD client
593
594
  
  	ceph_decode_32_safe(p, end, map->flags, bad);
c89136ea4   Sage Weil   ceph: convert enc...
595
  	max = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
  
  	/* (re)alloc osd arrays */
  	err = osdmap_set_max_osd(map, max);
  	if (err < 0)
  		goto bad;
  	dout("osdmap_decode max_osd = %d
  ", map->max_osd);
  
  	/* osds */
  	err = -EINVAL;
  	ceph_decode_need(p, end, 3*sizeof(u32) +
  			 map->max_osd*(1 + sizeof(*map->osd_weight) +
  				       sizeof(*map->osd_addr)), bad);
  	*p += 4; /* skip length field (should match max) */
  	ceph_decode_copy(p, map->osd_state, map->max_osd);
  
  	*p += 4; /* skip length field (should match max) */
  	for (i = 0; i < map->max_osd; i++)
c89136ea4   Sage Weil   ceph: convert enc...
614
  		map->osd_weight[i] = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
615
616
617
  
  	*p += 4; /* skip length field (should match max) */
  	ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr));
63f2d2119   Sage Weil   ceph: use fixed e...
618
619
  	for (i = 0; i < map->max_osd; i++)
  		ceph_decode_addr(&map->osd_addr[i]);
f24e9980e   Sage Weil   ceph: OSD client
620
621
622
623
624
  
  	/* pg_temp */
  	ceph_decode_32_safe(p, end, len, bad);
  	for (i = 0; i < len; i++) {
  		int n, j;
51042122d   Sage Weil   ceph: fix endian ...
625
  		struct ceph_pg pgid;
f24e9980e   Sage Weil   ceph: OSD client
626
627
628
  		struct ceph_pg_mapping *pg;
  
  		ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
51042122d   Sage Weil   ceph: fix endian ...
629
  		ceph_decode_copy(p, &pgid, sizeof(pgid));
c89136ea4   Sage Weil   ceph: convert enc...
630
  		n = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
631
  		ceph_decode_need(p, end, n * sizeof(u32), bad);
30dc6381b   Sage Weil   ceph: fix error p...
632
  		err = -ENOMEM;
f24e9980e   Sage Weil   ceph: OSD client
633
  		pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
30dc6381b   Sage Weil   ceph: fix error p...
634
  		if (!pg)
f24e9980e   Sage Weil   ceph: OSD client
635
  			goto bad;
f24e9980e   Sage Weil   ceph: OSD client
636
637
638
  		pg->pgid = pgid;
  		pg->len = n;
  		for (j = 0; j < n; j++)
c89136ea4   Sage Weil   ceph: convert enc...
639
  			pg->osds[j] = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
640

991abb6ec   Sage Weil   ceph: fail gracef...
641
642
643
  		err = __insert_pg_mapping(pg, &map->pg_temp);
  		if (err)
  			goto bad;
51042122d   Sage Weil   ceph: fix endian ...
644
645
  		dout(" added pg_temp %llx len %d
  ", *(u64 *)&pgid, len);
f24e9980e   Sage Weil   ceph: OSD client
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
  	}
  
  	/* crush */
  	ceph_decode_32_safe(p, end, len, bad);
  	dout("osdmap_decode crush len %d from off 0x%x
  ", len,
  	     (int)(*p - start));
  	ceph_decode_need(p, end, len, bad);
  	map->crush = crush_decode(*p, end);
  	*p += len;
  	if (IS_ERR(map->crush)) {
  		err = PTR_ERR(map->crush);
  		map->crush = NULL;
  		goto bad;
  	}
  
  	/* ignore the rest of the map */
  	*p = end;
  
  	dout("osdmap_decode done %p %p
  ", *p, end);
  	return map;
  
  bad:
  	dout("osdmap_decode fail
  ");
  	ceph_osdmap_destroy(map);
  	return ERR_PTR(err);
  }
  
  /*
   * decode and apply an incremental map update.
   */
  struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
  					     struct ceph_osdmap *map,
  					     struct ceph_messenger *msgr)
  {
f24e9980e   Sage Weil   ceph: OSD client
683
684
685
686
687
  	struct crush_map *newcrush = NULL;
  	struct ceph_fsid fsid;
  	u32 epoch = 0;
  	struct ceph_timespec modified;
  	u32 len, pool;
4fc51be8f   Sage Weil   ceph: use rbtree ...
688
  	__s32 new_pool_max, new_flags, max;
f24e9980e   Sage Weil   ceph: OSD client
689
690
691
692
693
694
  	void *start = *p;
  	int err = -EINVAL;
  	u16 version;
  	struct rb_node *rbp;
  
  	ceph_decode_16_safe(p, end, version, bad);
02f90c610   Sage Weil   ceph: add uid fie...
695
696
697
698
699
700
  	if (version > CEPH_OSDMAP_INC_VERSION) {
  		pr_warning("got unknown v %d > %d of inc osdmap
  ", version,
  			   CEPH_OSDMAP_INC_VERSION);
  		goto bad;
  	}
f24e9980e   Sage Weil   ceph: OSD client
701
702
703
704
  
  	ceph_decode_need(p, end, sizeof(fsid)+sizeof(modified)+2*sizeof(u32),
  			 bad);
  	ceph_decode_copy(p, &fsid, sizeof(fsid));
c89136ea4   Sage Weil   ceph: convert enc...
705
  	epoch = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
706
707
  	BUG_ON(epoch != map->epoch+1);
  	ceph_decode_copy(p, &modified, sizeof(modified));
4fc51be8f   Sage Weil   ceph: use rbtree ...
708
  	new_pool_max = ceph_decode_32(p);
c89136ea4   Sage Weil   ceph: convert enc...
709
  	new_flags = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
710
711
712
713
714
715
716
  
  	/* full map? */
  	ceph_decode_32_safe(p, end, len, bad);
  	if (len > 0) {
  		dout("apply_incremental full map len %d, %p to %p
  ",
  		     len, *p, end);
30dc6381b   Sage Weil   ceph: fix error p...
717
  		return osdmap_decode(p, min(*p+len, end));
f24e9980e   Sage Weil   ceph: OSD client
718
719
720
721
722
723
724
725
726
727
  	}
  
  	/* new crush? */
  	ceph_decode_32_safe(p, end, len, bad);
  	if (len > 0) {
  		dout("apply_incremental new crush map len %d, %p to %p
  ",
  		     len, *p, end);
  		newcrush = crush_decode(*p, min(*p+len, end));
  		if (IS_ERR(newcrush))
7e34bc524   Julia Lawall   fs/ceph: Use ERR_...
728
  			return ERR_CAST(newcrush);
cebc5be6b   Sage Weil   ceph: fix crush m...
729
  		*p += len;
f24e9980e   Sage Weil   ceph: OSD client
730
731
732
733
734
  	}
  
  	/* new flags? */
  	if (new_flags >= 0)
  		map->flags = new_flags;
4fc51be8f   Sage Weil   ceph: use rbtree ...
735
736
  	if (new_pool_max >= 0)
  		map->pool_max = new_pool_max;
f24e9980e   Sage Weil   ceph: OSD client
737
738
739
740
  
  	ceph_decode_need(p, end, 5*sizeof(u32), bad);
  
  	/* new max? */
c89136ea4   Sage Weil   ceph: convert enc...
741
  	max = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
  	if (max >= 0) {
  		err = osdmap_set_max_osd(map, max);
  		if (err < 0)
  			goto bad;
  	}
  
  	map->epoch++;
  	map->modified = map->modified;
  	if (newcrush) {
  		if (map->crush)
  			crush_destroy(map->crush);
  		map->crush = newcrush;
  		newcrush = NULL;
  	}
  
  	/* new_pool */
  	ceph_decode_32_safe(p, end, len, bad);
  	while (len--) {
361be8601   Sage Weil   ceph: precede enc...
760
  		__u8 ev;
4fc51be8f   Sage Weil   ceph: use rbtree ...
761
  		struct ceph_pg_pool_info *pi;
361be8601   Sage Weil   ceph: precede enc...
762

f24e9980e   Sage Weil   ceph: OSD client
763
  		ceph_decode_32_safe(p, end, pool, bad);
4fc51be8f   Sage Weil   ceph: use rbtree ...
764
  		ceph_decode_need(p, end, 1 + sizeof(pi->v), bad);
361be8601   Sage Weil   ceph: precede enc...
765
  		ev = ceph_decode_8(p);  /* encoding version */
02f90c610   Sage Weil   ceph: add uid fie...
766
767
768
769
770
771
  		if (ev > CEPH_PG_POOL_VERSION) {
  			pr_warning("got unknown v %d > %d of ceph_pg_pool
  ",
  				   ev, CEPH_PG_POOL_VERSION);
  			goto bad;
  		}
4fc51be8f   Sage Weil   ceph: use rbtree ...
772
773
  		pi = __lookup_pg_pool(&map->pg_pools, pool);
  		if (!pi) {
2844a76a2   Sage Weil   ceph: decode v5 o...
774
  			pi = kzalloc(sizeof(*pi), GFP_NOFS);
4fc51be8f   Sage Weil   ceph: use rbtree ...
775
776
777
778
779
780
781
  			if (!pi) {
  				err = -ENOMEM;
  				goto bad;
  			}
  			pi->id = pool;
  			__insert_pg_pool(&map->pg_pools, pi);
  		}
efd7576b2   Sage Weil   ceph: fix pg pool...
782
  		__decode_pool(p, pi);
f24e9980e   Sage Weil   ceph: OSD client
783
  	}
2844a76a2   Sage Weil   ceph: decode v5 o...
784
785
  	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
  		goto bad;
f24e9980e   Sage Weil   ceph: OSD client
786

4fc51be8f   Sage Weil   ceph: use rbtree ...
787
  	/* old_pool */
f24e9980e   Sage Weil   ceph: OSD client
788
  	ceph_decode_32_safe(p, end, len, bad);
4fc51be8f   Sage Weil   ceph: use rbtree ...
789
790
791
792
793
  	while (len--) {
  		struct ceph_pg_pool_info *pi;
  
  		ceph_decode_32_safe(p, end, pool, bad);
  		pi = __lookup_pg_pool(&map->pg_pools, pool);
2844a76a2   Sage Weil   ceph: decode v5 o...
794
795
  		if (pi)
  			__remove_pg_pool(&map->pg_pools, pi);
4fc51be8f   Sage Weil   ceph: use rbtree ...
796
  	}
f24e9980e   Sage Weil   ceph: OSD client
797
798
799
800
801
802
803
804
805
  
  	/* new_up */
  	err = -EINVAL;
  	ceph_decode_32_safe(p, end, len, bad);
  	while (len--) {
  		u32 osd;
  		struct ceph_entity_addr addr;
  		ceph_decode_32_safe(p, end, osd, bad);
  		ceph_decode_copy_safe(p, end, &addr, sizeof(addr), bad);
63f2d2119   Sage Weil   ceph: use fixed e...
806
  		ceph_decode_addr(&addr);
f24e9980e   Sage Weil   ceph: OSD client
807
808
809
810
811
812
813
814
815
816
817
818
819
  		pr_info("osd%d up
  ", osd);
  		BUG_ON(osd >= map->max_osd);
  		map->osd_state[osd] |= CEPH_OSD_UP;
  		map->osd_addr[osd] = addr;
  	}
  
  	/* new_down */
  	ceph_decode_32_safe(p, end, len, bad);
  	while (len--) {
  		u32 osd;
  		ceph_decode_32_safe(p, end, osd, bad);
  		(*p)++;  /* clean flag */
1bdb70e59   Sage Weil   ceph: clean up 'o...
820
821
  		pr_info("osd%d down
  ", osd);
f24e9980e   Sage Weil   ceph: OSD client
822
823
824
825
826
827
828
829
830
  		if (osd < map->max_osd)
  			map->osd_state[osd] &= ~CEPH_OSD_UP;
  	}
  
  	/* new_weight */
  	ceph_decode_32_safe(p, end, len, bad);
  	while (len--) {
  		u32 osd, off;
  		ceph_decode_need(p, end, sizeof(u32)*2, bad);
c89136ea4   Sage Weil   ceph: convert enc...
831
832
  		osd = ceph_decode_32(p);
  		off = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
833
834
835
836
837
838
839
840
841
842
843
844
845
846
  		pr_info("osd%d weight 0x%x %s
  ", osd, off,
  		     off == CEPH_OSD_IN ? "(in)" :
  		     (off == CEPH_OSD_OUT ? "(out)" : ""));
  		if (osd < map->max_osd)
  			map->osd_weight[osd] = off;
  	}
  
  	/* new_pg_temp */
  	rbp = rb_first(&map->pg_temp);
  	ceph_decode_32_safe(p, end, len, bad);
  	while (len--) {
  		struct ceph_pg_mapping *pg;
  		int j;
51042122d   Sage Weil   ceph: fix endian ...
847
  		struct ceph_pg pgid;
f24e9980e   Sage Weil   ceph: OSD client
848
849
  		u32 pglen;
  		ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
51042122d   Sage Weil   ceph: fix endian ...
850
  		ceph_decode_copy(p, &pgid, sizeof(pgid));
c89136ea4   Sage Weil   ceph: convert enc...
851
  		pglen = ceph_decode_32(p);
f24e9980e   Sage Weil   ceph: OSD client
852
853
  
  		/* remove any? */
51042122d   Sage Weil   ceph: fix endian ...
854
855
  		while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
  						node)->pgid, pgid) <= 0) {
bc4fdca85   Sage Weil   ceph: fix pg_mapp...
856
857
858
  			struct ceph_pg_mapping *cur =
  				rb_entry(rbp, struct ceph_pg_mapping, node);
  			
f24e9980e   Sage Weil   ceph: OSD client
859
  			rbp = rb_next(rbp);
bc4fdca85   Sage Weil   ceph: fix pg_mapp...
860
861
862
863
  			dout(" removed pg_temp %llx
  ", *(u64 *)&cur->pgid);
  			rb_erase(&cur->node, &map->pg_temp);
  			kfree(cur);
f24e9980e   Sage Weil   ceph: OSD client
864
865
866
867
868
869
870
871
872
873
874
875
  		}
  
  		if (pglen) {
  			/* insert */
  			ceph_decode_need(p, end, pglen*sizeof(u32), bad);
  			pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
  			if (!pg) {
  				err = -ENOMEM;
  				goto bad;
  			}
  			pg->pgid = pgid;
  			pg->len = pglen;
7067f797b   Sage Weil   ceph: fix increme...
876
  			for (j = 0; j < pglen; j++)
c89136ea4   Sage Weil   ceph: convert enc...
877
  				pg->osds[j] = ceph_decode_32(p);
991abb6ec   Sage Weil   ceph: fail gracef...
878
  			err = __insert_pg_mapping(pg, &map->pg_temp);
bc4fdca85   Sage Weil   ceph: fix pg_mapp...
879
880
  			if (err) {
  				kfree(pg);
991abb6ec   Sage Weil   ceph: fail gracef...
881
  				goto bad;
bc4fdca85   Sage Weil   ceph: fix pg_mapp...
882
  			}
51042122d   Sage Weil   ceph: fix endian ...
883
884
885
  			dout(" added pg_temp %llx len %d
  ", *(u64 *)&pgid,
  			     pglen);
f24e9980e   Sage Weil   ceph: OSD client
886
887
888
  		}
  	}
  	while (rbp) {
bc4fdca85   Sage Weil   ceph: fix pg_mapp...
889
890
  		struct ceph_pg_mapping *cur =
  			rb_entry(rbp, struct ceph_pg_mapping, node);
f24e9980e   Sage Weil   ceph: OSD client
891
  		rbp = rb_next(rbp);
bc4fdca85   Sage Weil   ceph: fix pg_mapp...
892
893
894
895
  		dout(" removed pg_temp %llx
  ", *(u64 *)&cur->pgid);
  		rb_erase(&cur->node, &map->pg_temp);
  		kfree(cur);
f24e9980e   Sage Weil   ceph: OSD client
896
897
898
899
900
901
902
903
904
905
  	}
  
  	/* ignore the rest */
  	*p = end;
  	return map;
  
  bad:
  	pr_err("corrupt inc osdmap epoch %d off %d (%p of %p-%p)
  ",
  	       epoch, (int)(*p - start), *p, start, end);
9ec7cab14   Sage Weil   ceph: hex dump co...
906
907
908
  	print_hex_dump(KERN_DEBUG, "osdmap: ",
  		       DUMP_PREFIX_OFFSET, 16, 1,
  		       start, end - start, true);
f24e9980e   Sage Weil   ceph: OSD client
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
  	if (newcrush)
  		crush_destroy(newcrush);
  	return ERR_PTR(err);
  }
  
  
  
  
  /*
   * calculate file layout from given offset, length.
   * fill in correct oid, logical length, and object extent
   * offset, length.
   *
   * for now, we write only a single su, until we can
   * pass a stride back to the caller.
   */
  void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
  				   u64 off, u64 *plen,
645a10258   Sage Weil   ceph: fix object ...
927
  				   u64 *ono,
f24e9980e   Sage Weil   ceph: OSD client
928
929
930
931
932
933
934
  				   u64 *oxoff, u64 *oxlen)
  {
  	u32 osize = le32_to_cpu(layout->fl_object_size);
  	u32 su = le32_to_cpu(layout->fl_stripe_unit);
  	u32 sc = le32_to_cpu(layout->fl_stripe_count);
  	u32 bl, stripeno, stripepos, objsetno;
  	u32 su_per_object;
ff1d1f717   Noah Watkins   ceph: fix intra s...
935
  	u64 t, su_offset;
f24e9980e   Sage Weil   ceph: OSD client
936
937
938
939
  
  	dout("mapping %llu~%llu  osize %u fl_su %u
  ", off, *plen,
  	     osize, su);
35e054a66   Noah Watkins   ceph: remove redu...
940
  	su_per_object = osize / su;
f24e9980e   Sage Weil   ceph: OSD client
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
  	dout("osize %u / su %u = su_per_object %u
  ", osize, su,
  	     su_per_object);
  
  	BUG_ON((su & ~PAGE_MASK) != 0);
  	/* bl = *off / su; */
  	t = off;
  	do_div(t, su);
  	bl = t;
  	dout("off %llu / su %u = bl %u
  ", off, su, bl);
  
  	stripeno = bl / sc;
  	stripepos = bl % sc;
  	objsetno = stripeno / su_per_object;
645a10258   Sage Weil   ceph: fix object ...
956
957
958
959
960
  	*ono = objsetno * sc + stripepos;
  	dout("objset %u * sc %u = ono %u
  ", objsetno, sc, (unsigned)*ono);
  
  	/* *oxoff = *off % layout->fl_stripe_unit;  # offset in su */
f24e9980e   Sage Weil   ceph: OSD client
961
  	t = off;
ff1d1f717   Noah Watkins   ceph: fix intra s...
962
963
964
965
966
967
968
969
970
  	su_offset = do_div(t, su);
  	*oxoff = su_offset + (stripeno % su_per_object) * su;
  
  	/*
  	 * Calculate the length of the extent being written to the selected
  	 * object. This is the minimum of the full length requested (plen) or
  	 * the remainder of the current stripe being written to.
  	 */
  	*oxlen = min_t(u64, *plen, su - su_offset);
f24e9980e   Sage Weil   ceph: OSD client
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
  	*plen = *oxlen;
  
  	dout(" obj extent %llu~%llu
  ", *oxoff, *oxlen);
  }
  
  /*
   * calculate an object layout (i.e. pgid) from an oid,
   * file_layout, and osdmap
   */
  int ceph_calc_object_layout(struct ceph_object_layout *ol,
  			    const char *oid,
  			    struct ceph_file_layout *fl,
  			    struct ceph_osdmap *osdmap)
  {
  	unsigned num, num_mask;
51042122d   Sage Weil   ceph: fix endian ...
987
  	struct ceph_pg pgid;
f24e9980e   Sage Weil   ceph: OSD client
988
989
990
  	s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred);
  	int poolid = le32_to_cpu(fl->fl_pg_pool);
  	struct ceph_pg_pool_info *pool;
51042122d   Sage Weil   ceph: fix endian ...
991
  	unsigned ps;
f24e9980e   Sage Weil   ceph: OSD client
992

30dc6381b   Sage Weil   ceph: fix error p...
993
  	BUG_ON(!osdmap);
f24e9980e   Sage Weil   ceph: OSD client
994

4fc51be8f   Sage Weil   ceph: use rbtree ...
995
996
997
  	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
  	if (!pool)
  		return -EIO;
1654dd0cf   Sage Weil   ceph: make object...
998
  	ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
f24e9980e   Sage Weil   ceph: OSD client
999
  	if (preferred >= 0) {
51042122d   Sage Weil   ceph: fix endian ...
1000
  		ps += preferred;
f24e9980e   Sage Weil   ceph: OSD client
1001
1002
1003
1004
1005
1006
  		num = le32_to_cpu(pool->v.lpg_num);
  		num_mask = pool->lpg_num_mask;
  	} else {
  		num = le32_to_cpu(pool->v.pg_num);
  		num_mask = pool->pg_num_mask;
  	}
51042122d   Sage Weil   ceph: fix endian ...
1007
1008
1009
  	pgid.ps = cpu_to_le16(ps);
  	pgid.preferred = cpu_to_le16(preferred);
  	pgid.pool = fl->fl_pg_pool;
f24e9980e   Sage Weil   ceph: OSD client
1010
  	if (preferred >= 0)
51042122d   Sage Weil   ceph: fix endian ...
1011
1012
1013
  		dout("calc_object_layout '%s' pgid %d.%xp%d
  ", oid, poolid, ps,
  		     (int)preferred);
f24e9980e   Sage Weil   ceph: OSD client
1014
  	else
51042122d   Sage Weil   ceph: fix endian ...
1015
1016
  		dout("calc_object_layout '%s' pgid %d.%x
  ", oid, poolid, ps);
f24e9980e   Sage Weil   ceph: OSD client
1017

51042122d   Sage Weil   ceph: fix endian ...
1018
  	ol->ol_pgid = pgid;
f24e9980e   Sage Weil   ceph: OSD client
1019
  	ol->ol_stripe_unit = fl->fl_object_stripe_unit;
f24e9980e   Sage Weil   ceph: OSD client
1020
1021
1022
1023
1024
1025
1026
  	return 0;
  }
  
  /*
   * Calculate raw osd vector for the given pgid.  Return pointer to osd
   * array, or NULL on failure.
   */
51042122d   Sage Weil   ceph: fix endian ...
1027
  static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
f24e9980e   Sage Weil   ceph: OSD client
1028
1029
  			int *osds, int *num)
  {
f24e9980e   Sage Weil   ceph: OSD client
1030
1031
1032
  	struct ceph_pg_mapping *pg;
  	struct ceph_pg_pool_info *pool;
  	int ruleno;
51042122d   Sage Weil   ceph: fix endian ...
1033
1034
  	unsigned poolid, ps, pps;
  	int preferred;
f24e9980e   Sage Weil   ceph: OSD client
1035
1036
  
  	/* pg_temp? */
9794b146f   Sage Weil   ceph: fix memory ...
1037
1038
1039
1040
  	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
  	if (pg) {
  		*num = pg->len;
  		return pg->osds;
f24e9980e   Sage Weil   ceph: OSD client
1041
1042
1043
  	}
  
  	/* crush */
51042122d   Sage Weil   ceph: fix endian ...
1044
1045
1046
  	poolid = le32_to_cpu(pgid.pool);
  	ps = le16_to_cpu(pgid.ps);
  	preferred = (s16)le16_to_cpu(pgid.preferred);
767ea5c33   Sage Weil   ceph: do not feed...
1047
1048
1049
1050
  	/* don't forcefeed bad device ids to crush */
  	if (preferred >= osdmap->max_osd ||
  	    preferred >= osdmap->crush->max_devices)
  		preferred = -1;
4fc51be8f   Sage Weil   ceph: use rbtree ...
1051
1052
  	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
  	if (!pool)
f24e9980e   Sage Weil   ceph: OSD client
1053
  		return NULL;
f24e9980e   Sage Weil   ceph: OSD client
1054
1055
1056
1057
1058
  	ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
  				 pool->v.type, pool->v.size);
  	if (ruleno < 0) {
  		pr_err("no crush rule pool %d type %d size %d
  ",
51042122d   Sage Weil   ceph: fix endian ...
1059
  		       poolid, pool->v.type, pool->v.size);
f24e9980e   Sage Weil   ceph: OSD client
1060
1061
  		return NULL;
  	}
51042122d   Sage Weil   ceph: fix endian ...
1062
1063
  	if (preferred >= 0)
  		pps = ceph_stable_mod(ps,
f24e9980e   Sage Weil   ceph: OSD client
1064
1065
1066
  				      le32_to_cpu(pool->v.lpgp_num),
  				      pool->lpgp_num_mask);
  	else
51042122d   Sage Weil   ceph: fix endian ...
1067
  		pps = ceph_stable_mod(ps,
f24e9980e   Sage Weil   ceph: OSD client
1068
1069
  				      le32_to_cpu(pool->v.pgp_num),
  				      pool->pgp_num_mask);
51042122d   Sage Weil   ceph: fix endian ...
1070
  	pps += poolid;
f24e9980e   Sage Weil   ceph: OSD client
1071
1072
  	*num = crush_do_rule(osdmap->crush, ruleno, pps, osds,
  			     min_t(int, pool->v.size, *num),
51042122d   Sage Weil   ceph: fix endian ...
1073
  			     preferred, osdmap->osd_weight);
f24e9980e   Sage Weil   ceph: OSD client
1074
1075
1076
1077
  	return osds;
  }
  
  /*
d85b70566   Sage Weil   ceph: resubmit re...
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
   * Return acting set for given pgid.
   */
  int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
  			int *acting)
  {
  	int rawosds[CEPH_PG_MAX_SIZE], *osds;
  	int i, o, num = CEPH_PG_MAX_SIZE;
  
  	osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
  	if (!osds)
  		return -1;
  
  	/* primary is first up osd */
  	o = 0;
  	for (i = 0; i < num; i++)
  		if (ceph_osd_is_up(osdmap, osds[i]))
  			acting[o++] = osds[i];
  	return o;
  }
  
  /*
f24e9980e   Sage Weil   ceph: OSD client
1099
1100
   * Return primary osd for given pgid, or -1 if none.
   */
51042122d   Sage Weil   ceph: fix endian ...
1101
  int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
f24e9980e   Sage Weil   ceph: OSD client
1102
  {
d85b70566   Sage Weil   ceph: resubmit re...
1103
1104
  	int rawosds[CEPH_PG_MAX_SIZE], *osds;
  	int i, num = CEPH_PG_MAX_SIZE;
f24e9980e   Sage Weil   ceph: OSD client
1105
1106
1107
1108
1109
1110
1111
  
  	osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
  	if (!osds)
  		return -1;
  
  	/* primary is first up osd */
  	for (i = 0; i < num; i++)
d85b70566   Sage Weil   ceph: resubmit re...
1112
  		if (ceph_osd_is_up(osdmap, osds[i]))
f24e9980e   Sage Weil   ceph: OSD client
1113
  			return osds[i];
f24e9980e   Sage Weil   ceph: OSD client
1114
1115
  	return -1;
  }