| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | #ifndef _FS_CEPH_OSDMAP_H
 | 
					
						
							|  |  |  | #define _FS_CEPH_OSDMAP_H
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <linux/rbtree.h>
 | 
					
						
							| 
									
										
										
										
											2012-10-02 18:01:25 +01:00
										 |  |  | #include <linux/ceph/types.h>
 | 
					
						
							| 
									
										
										
										
											2013-04-01 18:58:26 -05:00
										 |  |  | #include <linux/ceph/decode.h>
 | 
					
						
							| 
									
										
										
										
											2012-10-02 18:01:25 +01:00
										 |  |  | #include <linux/ceph/ceph_fs.h>
 | 
					
						
							| 
									
										
										
										
											2010-04-06 15:14:15 -07:00
										 |  |  | #include <linux/crush/crush.h>
 | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * The osd map describes the current membership of the osd cluster and | 
					
						
							|  |  |  |  * specifies the mapping of objects to placement groups and placement | 
					
						
							|  |  |  |  * groups to (sets of) osds.  That is, it completely specifies the | 
					
						
							|  |  |  |  * (desired) distribution of all data objects in the system at some | 
					
						
							|  |  |  |  * point in time. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Each map version is identified by an epoch, which increases monotonically. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The map can be updated either via an incremental map (diff) describing | 
					
						
							|  |  |  |  * the change between two successive epochs, or as a fully encoded map. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2013-02-23 10:38:16 -08:00
										 |  |  | struct ceph_pg { | 
					
						
							|  |  |  | 	uint64_t pool; | 
					
						
							|  |  |  | 	uint32_t seed; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-02-26 10:39:09 -08:00
										 |  |  | #define CEPH_POOL_FLAG_HASHPSPOOL  1
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | struct ceph_pg_pool_info { | 
					
						
							| 
									
										
										
										
											2010-02-16 15:55:03 -08:00
										 |  |  | 	struct rb_node node; | 
					
						
							| 
									
										
										
										
											2013-02-23 10:41:09 -08:00
										 |  |  | 	s64 id; | 
					
						
							|  |  |  | 	u8 type; | 
					
						
							|  |  |  | 	u8 size; | 
					
						
							|  |  |  | 	u8 crush_ruleset; | 
					
						
							|  |  |  | 	u8 object_hash; | 
					
						
							|  |  |  | 	u32 pg_num, pgp_num; | 
					
						
							|  |  |  | 	int pg_num_mask, pgp_num_mask; | 
					
						
							| 
									
										
										
										
											2014-01-27 17:40:19 +02:00
										 |  |  | 	s64 read_tier; | 
					
						
							|  |  |  | 	s64 write_tier; /* wins for read+write ops */ | 
					
						
							| 
									
										
										
										
											2013-02-23 10:41:09 -08:00
										 |  |  | 	u64 flags; | 
					
						
							| 
									
										
										
										
											2010-04-09 15:46:42 -07:00
										 |  |  | 	char *name; | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-24 17:12:47 +02:00
										 |  |  | static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	switch (pool->type) { | 
					
						
							|  |  |  | 	case CEPH_POOL_TYPE_REP: | 
					
						
							|  |  |  | 		return true; | 
					
						
							|  |  |  | 	case CEPH_POOL_TYPE_EC: | 
					
						
							|  |  |  | 		return false; | 
					
						
							|  |  |  | 	default: | 
					
						
							|  |  |  | 		BUG_ON(1); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-02-23 10:41:09 -08:00
										 |  |  | struct ceph_object_locator { | 
					
						
							| 
									
										
										
										
											2014-01-27 17:40:18 +02:00
										 |  |  | 	s64 pool; | 
					
						
							| 
									
										
										
										
											2013-02-23 10:41:09 -08:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-27 17:40:18 +02:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Maximum supported by kernel client object name length | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100) | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | #define CEPH_MAX_OID_NAME_LEN 100
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ceph_object_id { | 
					
						
							|  |  |  | 	char name[CEPH_MAX_OID_NAME_LEN]; | 
					
						
							|  |  |  | 	int name_len; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | struct ceph_pg_mapping { | 
					
						
							|  |  |  | 	struct rb_node node; | 
					
						
							| 
									
										
										
										
											2013-02-23 10:38:16 -08:00
										 |  |  | 	struct ceph_pg pgid; | 
					
						
							| 
									
										
										
										
											2014-03-21 19:05:29 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	union { | 
					
						
							|  |  |  | 		struct { | 
					
						
							|  |  |  | 			int len; | 
					
						
							|  |  |  | 			int osds[]; | 
					
						
							|  |  |  | 		} pg_temp; | 
					
						
							| 
									
										
										
										
											2014-03-21 19:05:29 +02:00
										 |  |  | 		struct { | 
					
						
							|  |  |  | 			int osd; | 
					
						
							|  |  |  | 		} primary_temp; | 
					
						
							| 
									
										
										
										
											2014-03-21 19:05:29 +02:00
										 |  |  | 	}; | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct ceph_osdmap { | 
					
						
							|  |  |  | 	struct ceph_fsid fsid; | 
					
						
							|  |  |  | 	u32 epoch; | 
					
						
							|  |  |  | 	u32 mkfs_epoch; | 
					
						
							|  |  |  | 	struct ceph_timespec created, modified; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	u32 flags;         /* CEPH_OSDMAP_* */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	u32 max_osd;       /* size of osd_state, _offload, _addr arrays */ | 
					
						
							|  |  |  | 	u8 *osd_state;     /* CEPH_OSD_* */ | 
					
						
							|  |  |  | 	u32 *osd_weight;   /* 0 = failed, 0x10000 = 100% normal */ | 
					
						
							|  |  |  | 	struct ceph_entity_addr *osd_addr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	struct rb_root pg_temp; | 
					
						
							| 
									
										
										
										
											2014-03-21 19:05:29 +02:00
										 |  |  | 	struct rb_root primary_temp; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-21 19:05:30 +02:00
										 |  |  | 	u32 *osd_primary_affinity; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-16 15:55:03 -08:00
										 |  |  | 	struct rb_root pg_pools; | 
					
						
							|  |  |  | 	u32 pool_max; | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* the CRUSH map specifies the mapping of placement groups to
 | 
					
						
							|  |  |  | 	 * the list of osds that store+replicate them. */ | 
					
						
							|  |  |  | 	struct crush_map *crush; | 
					
						
							| 
									
										
										
										
											2014-01-31 17:54:26 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	struct mutex crush_scratch_mutex; | 
					
						
							|  |  |  | 	int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-27 17:40:18 +02:00
										 |  |  | static inline void ceph_oid_set_name(struct ceph_object_id *oid, | 
					
						
							|  |  |  | 				     const char *name) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int len; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	len = strlen(name); | 
					
						
							|  |  |  | 	if (len > sizeof(oid->name)) { | 
					
						
							|  |  |  | 		WARN(1, "ceph_oid_set_name '%s' len %d vs %zu, truncating\n", | 
					
						
							|  |  |  | 		     name, len, sizeof(oid->name)); | 
					
						
							|  |  |  | 		len = sizeof(oid->name); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	memcpy(oid->name, name, len); | 
					
						
							|  |  |  | 	oid->name_len = len; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline void ceph_oid_copy(struct ceph_object_id *dest, | 
					
						
							|  |  |  | 				 struct ceph_object_id *src) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	BUG_ON(src->name_len > sizeof(dest->name)); | 
					
						
							|  |  |  | 	memcpy(dest->name, src->name, src->name_len); | 
					
						
							|  |  |  | 	dest->name_len = src->name_len; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-24 17:12:46 +02:00
										 |  |  | static inline int ceph_osd_exists(struct ceph_osdmap *map, int osd) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return osd >= 0 && osd < map->max_osd && | 
					
						
							|  |  |  | 	       (map->osd_state[osd] & CEPH_OSD_EXISTS); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2014-03-24 17:12:46 +02:00
										 |  |  | 	return ceph_osd_exists(map, osd) && | 
					
						
							|  |  |  | 	       (map->osd_state[osd] & CEPH_OSD_UP); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int ceph_osd_is_down(struct ceph_osdmap *map, int osd) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return !ceph_osd_is_up(map, osd); | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return map && (map->flags & flag); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | extern char *ceph_osdmap_state_str(char *str, int len, int state); | 
					
						
							| 
									
										
										
										
											2014-03-21 19:05:30 +02:00
										 |  |  | extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd); | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, | 
					
						
							|  |  |  | 						     int osd) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if (osd >= map->max_osd) | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							|  |  |  | 	return &map->osd_addr[osd]; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-04-01 18:58:26 -05:00
										 |  |  | static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	__u8 version; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) { | 
					
						
							|  |  |  | 		pr_warning("incomplete pg encoding"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		return -EINVAL; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	version = ceph_decode_8(p); | 
					
						
							|  |  |  | 	if (version > 1) { | 
					
						
							|  |  |  | 		pr_warning("do not understand pg encoding %d > 1", | 
					
						
							|  |  |  | 			(int)version); | 
					
						
							|  |  |  | 		return -EINVAL; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	pgid->pool = ceph_decode_64(p); | 
					
						
							|  |  |  | 	pgid->seed = ceph_decode_32(p); | 
					
						
							|  |  |  | 	*p += 4;	/* skip deprecated preferred value */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-13 16:36:13 +02:00
										 |  |  | extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end); | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | 
					
						
							|  |  |  | 					    struct ceph_osdmap *map, | 
					
						
							|  |  |  | 					    struct ceph_messenger *msgr); | 
					
						
							|  |  |  | extern void ceph_osdmap_destroy(struct ceph_osdmap *map); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* calculate mapping of a file extent to an object */ | 
					
						
							| 
									
										
										
										
											2012-09-24 20:59:48 -07:00
										 |  |  | extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | 
					
						
							| 
									
										
										
										
											2012-11-14 09:38:19 -06:00
										 |  |  | 					 u64 off, u64 len, | 
					
						
							| 
									
										
										
										
											2012-09-24 20:59:48 -07:00
										 |  |  | 					 u64 *bno, u64 *oxoff, u64 *oxlen); | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | /* calculate mapping of object to a placement group */ | 
					
						
							| 
									
										
										
										
											2014-01-27 17:40:19 +02:00
										 |  |  | extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, | 
					
						
							|  |  |  | 			       struct ceph_object_locator *oloc, | 
					
						
							|  |  |  | 			       struct ceph_object_id *oid, | 
					
						
							|  |  |  | 			       struct ceph_pg *pg_out); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-01-08 09:15:10 -08:00
										 |  |  | extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, | 
					
						
							| 
									
										
										
										
											2013-02-23 10:38:16 -08:00
										 |  |  | 			       struct ceph_pg pgid, | 
					
						
							| 
									
										
										
										
											2014-03-24 17:12:48 +02:00
										 |  |  | 			       int *osds, int *primary); | 
					
						
							| 
									
										
										
										
											2009-11-04 11:39:12 -08:00
										 |  |  | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, | 
					
						
							| 
									
										
										
										
											2013-02-23 10:38:16 -08:00
										 |  |  | 				struct ceph_pg pgid); | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-27 17:40:19 +02:00
										 |  |  | extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, | 
					
						
							|  |  |  | 						    u64 id); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-30 19:40:33 -05:00
										 |  |  | extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); | 
					
						
							| 
									
										
										
										
											2010-05-17 12:31:35 -07:00
										 |  |  | extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-10-06 11:31:10 -07:00
										 |  |  | #endif
 |