Merge branch 'drbd-8.4_ed6' into for-3.8-drivers-drbd-8.4_ed6
This commit is contained in:
		
				commit
				
					
						986836503e
					
				
			
		
					 30 changed files with 12120 additions and 8633 deletions
				
			
		|  | @ -1,5 +1,7 @@ | |||
| drbd-y := drbd_bitmap.o drbd_proc.o | ||||
| drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o | ||||
| drbd-y += drbd_main.o drbd_strings.o drbd_nl.o | ||||
| drbd-y += drbd_interval.o drbd_state.o | ||||
| drbd-y += drbd_nla.o | ||||
| 
 | ||||
| obj-$(CONFIG_BLK_DEV_DRBD)     += drbd.o | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -119,13 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) | |||
| 	if (!__ratelimit(&drbd_ratelimit_state)) | ||||
| 		return; | ||||
| 	dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", | ||||
| 	    current == mdev->receiver.task ? "receiver" : | ||||
| 	    current == mdev->asender.task  ? "asender"  : | ||||
| 	    current == mdev->worker.task   ? "worker"   : current->comm, | ||||
| 		drbd_task_to_thread_name(mdev->tconn, current), | ||||
| 		func, b->bm_why ?: "?", | ||||
| 	    b->bm_task == mdev->receiver.task ? "receiver" : | ||||
| 	    b->bm_task == mdev->asender.task  ? "asender"  : | ||||
| 	    b->bm_task == mdev->worker.task   ? "worker"   : "?"); | ||||
| 		drbd_task_to_thread_name(mdev->tconn, b->bm_task)); | ||||
| } | ||||
| 
 | ||||
| void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) | ||||
|  | @ -142,13 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) | |||
| 
 | ||||
| 	if (trylock_failed) { | ||||
| 		dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", | ||||
| 		    current == mdev->receiver.task ? "receiver" : | ||||
| 		    current == mdev->asender.task  ? "asender"  : | ||||
| 		    current == mdev->worker.task   ? "worker"   : current->comm, | ||||
| 			 drbd_task_to_thread_name(mdev->tconn, current), | ||||
| 			 why, b->bm_why ?: "?", | ||||
| 		    b->bm_task == mdev->receiver.task ? "receiver" : | ||||
| 		    b->bm_task == mdev->asender.task  ? "asender"  : | ||||
| 		    b->bm_task == mdev->worker.task   ? "worker"   : "?"); | ||||
| 			 drbd_task_to_thread_name(mdev->tconn, b->bm_task)); | ||||
| 		mutex_lock(&b->bm_change); | ||||
| 	} | ||||
| 	if (BM_LOCKED_MASK & b->bm_flags) | ||||
|  | @ -196,6 +188,9 @@ void drbd_bm_unlock(struct drbd_conf *mdev) | |||
| /* to mark for lazy writeout once syncer cleared all clearable bits,
 | ||||
|  * we if bits have been cleared since last IO. */ | ||||
| #define BM_PAGE_LAZY_WRITEOUT	28 | ||||
| /* pages marked with this "HINT" will be considered for writeout
 | ||||
|  * on activity log transactions */ | ||||
| #define BM_PAGE_HINT_WRITEOUT	27 | ||||
| 
 | ||||
| /* store_page_idx uses non-atomic assignment. It is only used directly after
 | ||||
|  * allocating the page.  All other bm_set_page_* and bm_clear_page_* need to | ||||
|  | @ -227,8 +222,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr) | |||
| { | ||||
| 	struct drbd_bitmap *b = mdev->bitmap; | ||||
| 	void *addr = &page_private(b->bm_pages[page_nr]); | ||||
| 	clear_bit(BM_PAGE_IO_LOCK, addr); | ||||
| 	smp_mb__after_clear_bit(); | ||||
| 	clear_bit_unlock(BM_PAGE_IO_LOCK, addr); | ||||
| 	wake_up(&mdev->bitmap->bm_io_wait); | ||||
| } | ||||
| 
 | ||||
|  | @ -246,6 +240,27 @@ static void bm_set_page_need_writeout(struct page *page) | |||
| 	set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout | ||||
|  * @mdev:	DRBD device. | ||||
|  * @page_nr:	the bitmap page to mark with the "hint" flag | ||||
|  * | ||||
|  * From within an activity log transaction, we mark a few pages with these | ||||
|  * hints, then call drbd_bm_write_hinted(), which will only write out changed | ||||
|  * pages which are flagged with this mark. | ||||
|  */ | ||||
| void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr) | ||||
| { | ||||
| 	struct page *page; | ||||
| 	if (page_nr >= mdev->bitmap->bm_number_of_pages) { | ||||
| 		dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n", | ||||
| 			 page_nr, (int)mdev->bitmap->bm_number_of_pages); | ||||
| 		return; | ||||
| 	} | ||||
| 	page = mdev->bitmap->bm_pages[page_nr]; | ||||
| 	set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page)); | ||||
| } | ||||
| 
 | ||||
| static int bm_test_page_unchanged(struct page *page) | ||||
| { | ||||
| 	volatile const unsigned long *addr = &page_private(page); | ||||
|  | @ -376,7 +391,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) | |||
| 	 * GFP_NOIO, as this is called while drbd IO is "suspended", | ||||
| 	 * and during resize or attach on diskless Primary, | ||||
| 	 * we must not block on IO to ourselves. | ||||
| 	 * Context is receiver thread or cqueue thread/dmsetup.  */ | ||||
| 	 * Context is receiver thread or dmsetup. */ | ||||
| 	bytes = sizeof(struct page *)*want; | ||||
| 	new_pages = kzalloc(bytes, GFP_NOIO); | ||||
| 	if (!new_pages) { | ||||
|  | @ -441,7 +456,8 @@ int drbd_bm_init(struct drbd_conf *mdev) | |||
| 
 | ||||
| sector_t drbd_bm_capacity(struct drbd_conf *mdev) | ||||
| { | ||||
| 	ERR_IF(!mdev->bitmap) return 0; | ||||
| 	if (!expect(mdev->bitmap)) | ||||
| 		return 0; | ||||
| 	return mdev->bitmap->bm_dev_capacity; | ||||
| } | ||||
| 
 | ||||
|  | @ -449,7 +465,8 @@ sector_t drbd_bm_capacity(struct drbd_conf *mdev) | |||
|  */ | ||||
| void drbd_bm_cleanup(struct drbd_conf *mdev) | ||||
| { | ||||
| 	ERR_IF (!mdev->bitmap) return; | ||||
| 	if (!expect(mdev->bitmap)) | ||||
| 		return; | ||||
| 	bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); | ||||
| 	bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags)); | ||||
| 	kfree(mdev->bitmap); | ||||
|  | @ -612,7 +629,8 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | |||
| 	int err = 0, growing; | ||||
| 	int opages_vmalloced; | ||||
| 
 | ||||
| 	ERR_IF(!b) return -ENOMEM; | ||||
| 	if (!expect(b)) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK); | ||||
| 
 | ||||
|  | @ -734,8 +752,10 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev) | |||
| 	unsigned long s; | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	ERR_IF(!b) return 0; | ||||
| 	ERR_IF(!b->bm_pages) return 0; | ||||
| 	if (!expect(b)) | ||||
| 		return 0; | ||||
| 	if (!expect(b->bm_pages)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	spin_lock_irqsave(&b->bm_lock, flags); | ||||
| 	s = b->bm_set; | ||||
|  | @ -758,8 +778,10 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev) | |||
| size_t drbd_bm_words(struct drbd_conf *mdev) | ||||
| { | ||||
| 	struct drbd_bitmap *b = mdev->bitmap; | ||||
| 	ERR_IF(!b) return 0; | ||||
| 	ERR_IF(!b->bm_pages) return 0; | ||||
| 	if (!expect(b)) | ||||
| 		return 0; | ||||
| 	if (!expect(b->bm_pages)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	return b->bm_words; | ||||
| } | ||||
|  | @ -767,7 +789,8 @@ size_t drbd_bm_words(struct drbd_conf *mdev) | |||
| unsigned long drbd_bm_bits(struct drbd_conf *mdev) | ||||
| { | ||||
| 	struct drbd_bitmap *b = mdev->bitmap; | ||||
| 	ERR_IF(!b) return 0; | ||||
| 	if (!expect(b)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	return b->bm_bits; | ||||
| } | ||||
|  | @ -788,8 +811,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, | |||
| 
 | ||||
| 	end = offset + number; | ||||
| 
 | ||||
| 	ERR_IF(!b) return; | ||||
| 	ERR_IF(!b->bm_pages) return; | ||||
| 	if (!expect(b)) | ||||
| 		return; | ||||
| 	if (!expect(b->bm_pages)) | ||||
| 		return; | ||||
| 	if (number == 0) | ||||
| 		return; | ||||
| 	WARN_ON(offset >= b->bm_words); | ||||
|  | @ -833,8 +858,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, | |||
| 
 | ||||
| 	end = offset + number; | ||||
| 
 | ||||
| 	ERR_IF(!b) return; | ||||
| 	ERR_IF(!b->bm_pages) return; | ||||
| 	if (!expect(b)) | ||||
| 		return; | ||||
| 	if (!expect(b->bm_pages)) | ||||
| 		return; | ||||
| 
 | ||||
| 	spin_lock_irq(&b->bm_lock); | ||||
| 	if ((offset >= b->bm_words) || | ||||
|  | @ -862,8 +889,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, | |||
| void drbd_bm_set_all(struct drbd_conf *mdev) | ||||
| { | ||||
| 	struct drbd_bitmap *b = mdev->bitmap; | ||||
| 	ERR_IF(!b) return; | ||||
| 	ERR_IF(!b->bm_pages) return; | ||||
| 	if (!expect(b)) | ||||
| 		return; | ||||
| 	if (!expect(b->bm_pages)) | ||||
| 		return; | ||||
| 
 | ||||
| 	spin_lock_irq(&b->bm_lock); | ||||
| 	bm_memset(b, 0, 0xff, b->bm_words); | ||||
|  | @ -876,8 +905,10 @@ void drbd_bm_set_all(struct drbd_conf *mdev) | |||
| void drbd_bm_clear_all(struct drbd_conf *mdev) | ||||
| { | ||||
| 	struct drbd_bitmap *b = mdev->bitmap; | ||||
| 	ERR_IF(!b) return; | ||||
| 	ERR_IF(!b->bm_pages) return; | ||||
| 	if (!expect(b)) | ||||
| 		return; | ||||
| 	if (!expect(b->bm_pages)) | ||||
| 		return; | ||||
| 
 | ||||
| 	spin_lock_irq(&b->bm_lock); | ||||
| 	bm_memset(b, 0, 0, b->bm_words); | ||||
|  | @ -891,7 +922,8 @@ struct bm_aio_ctx { | |||
| 	unsigned int done; | ||||
| 	unsigned flags; | ||||
| #define BM_AIO_COPY_PAGES	1 | ||||
| #define BM_WRITE_ALL_PAGES	2 | ||||
| #define BM_AIO_WRITE_HINTED	2 | ||||
| #define BM_WRITE_ALL_PAGES	4 | ||||
| 	int error; | ||||
| 	struct kref kref; | ||||
| }; | ||||
|  | @ -1062,6 +1094,11 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w | |||
| 		if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) | ||||
| 			break; | ||||
| 		if (rw & WRITE) { | ||||
| 			if ((flags & BM_AIO_WRITE_HINTED) && | ||||
| 			    !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, | ||||
| 				    &page_private(b->bm_pages[i]))) | ||||
| 				continue; | ||||
| 
 | ||||
| 			if (!(flags & BM_WRITE_ALL_PAGES) && | ||||
| 			    bm_test_page_unchanged(b->bm_pages[i])) { | ||||
| 				dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); | ||||
|  | @ -1094,6 +1131,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w | |||
| 	else | ||||
| 		kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||||
| 
 | ||||
| 	/* summary for global bitmap IO */ | ||||
| 	if (flags == 0) | ||||
| 		dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", | ||||
| 			 rw == WRITE ? "WRITE" : "READ", | ||||
| 			 count, jiffies - now); | ||||
|  | @ -1117,6 +1156,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w | |||
| 	} | ||||
| 	now = b->bm_set; | ||||
| 
 | ||||
| 	if (flags == 0) | ||||
| 		dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", | ||||
| 		     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); | ||||
| 
 | ||||
|  | @ -1181,9 +1221,17 @@ int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local) | |||
| 	return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed. | ||||
|  * @mdev:	DRBD device. | ||||
|  */ | ||||
| int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local) | ||||
| { | ||||
| 	return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap | ||||
|  * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap | ||||
|  * @mdev:	DRBD device. | ||||
|  * @idx:	bitmap page index | ||||
|  * | ||||
|  | @ -1291,8 +1339,10 @@ static unsigned long bm_find_next(struct drbd_conf *mdev, | |||
| 	struct drbd_bitmap *b = mdev->bitmap; | ||||
| 	unsigned long i = DRBD_END_OF_BITMAP; | ||||
| 
 | ||||
| 	ERR_IF(!b) return i; | ||||
| 	ERR_IF(!b->bm_pages) return i; | ||||
| 	if (!expect(b)) | ||||
| 		return i; | ||||
| 	if (!expect(b->bm_pages)) | ||||
| 		return i; | ||||
| 
 | ||||
| 	spin_lock_irq(&b->bm_lock); | ||||
| 	if (BM_DONT_TEST & b->bm_flags) | ||||
|  | @ -1393,8 +1443,10 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | |||
| 	struct drbd_bitmap *b = mdev->bitmap; | ||||
| 	int c = 0; | ||||
| 
 | ||||
| 	ERR_IF(!b) return 1; | ||||
| 	ERR_IF(!b->bm_pages) return 0; | ||||
| 	if (!expect(b)) | ||||
| 		return 1; | ||||
| 	if (!expect(b->bm_pages)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	spin_lock_irqsave(&b->bm_lock, flags); | ||||
| 	if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags) | ||||
|  | @ -1425,13 +1477,21 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, | |||
| { | ||||
| 	int i; | ||||
| 	int bits; | ||||
| 	int changed = 0; | ||||
| 	unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]); | ||||
| 	for (i = first_word; i < last_word; i++) { | ||||
| 		bits = hweight_long(paddr[i]); | ||||
| 		paddr[i] = ~0UL; | ||||
| 		b->bm_set += BITS_PER_LONG - bits; | ||||
| 		changed += BITS_PER_LONG - bits; | ||||
| 	} | ||||
| 	kunmap_atomic(paddr); | ||||
| 	if (changed) { | ||||
| 		/* We only need lazy writeout, the information is still in the
 | ||||
| 		 * remote bitmap as well, and is reconstructed during the next | ||||
| 		 * bitmap exchange, if lost locally due to a crash. */ | ||||
| 		bm_set_page_lazy_writeout(b->bm_pages[page_nr]); | ||||
| 		b->bm_set += changed; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /* Same thing as drbd_bm_set_bits,
 | ||||
|  | @ -1526,8 +1586,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) | |||
| 	unsigned long *p_addr; | ||||
| 	int i; | ||||
| 
 | ||||
| 	ERR_IF(!b) return 0; | ||||
| 	ERR_IF(!b->bm_pages) return 0; | ||||
| 	if (!expect(b)) | ||||
| 		return 0; | ||||
| 	if (!expect(b->bm_pages)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	spin_lock_irqsave(&b->bm_lock, flags); | ||||
| 	if (BM_DONT_TEST & b->bm_flags) | ||||
|  | @ -1561,8 +1623,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi | |||
| 	 * robust in case we screwed up elsewhere, in that case pretend there | ||||
| 	 * was one dirty bit in the requested area, so we won't try to do a | ||||
| 	 * local read there (no bitmap probably implies no disk) */ | ||||
| 	ERR_IF(!b) return 1; | ||||
| 	ERR_IF(!b->bm_pages) return 1; | ||||
| 	if (!expect(b)) | ||||
| 		return 1; | ||||
| 	if (!expect(b->bm_pages)) | ||||
| 		return 1; | ||||
| 
 | ||||
| 	spin_lock_irqsave(&b->bm_lock, flags); | ||||
| 	if (BM_DONT_TEST & b->bm_flags) | ||||
|  | @ -1575,11 +1639,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi | |||
| 				bm_unmap(p_addr); | ||||
| 			p_addr = bm_map_pidx(b, idx); | ||||
| 		} | ||||
| 		ERR_IF (bitnr >= b->bm_bits) { | ||||
| 			dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); | ||||
| 		} else { | ||||
| 		if (expect(bitnr < b->bm_bits)) | ||||
| 			c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); | ||||
| 		} | ||||
| 		else | ||||
| 			dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); | ||||
| 	} | ||||
| 	if (p_addr) | ||||
| 		bm_unmap(p_addr); | ||||
|  | @ -1609,8 +1672,10 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) | |||
| 	unsigned long flags; | ||||
| 	unsigned long *p_addr, *bm; | ||||
| 
 | ||||
| 	ERR_IF(!b) return 0; | ||||
| 	ERR_IF(!b->bm_pages) return 0; | ||||
| 	if (!expect(b)) | ||||
| 		return 0; | ||||
| 	if (!expect(b->bm_pages)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	spin_lock_irqsave(&b->bm_lock, flags); | ||||
| 	if (BM_DONT_TEST & b->bm_flags) | ||||
|  | @ -1632,47 +1697,3 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) | |||
| 	spin_unlock_irqrestore(&b->bm_lock, flags); | ||||
| 	return count; | ||||
| } | ||||
| 
 | ||||
| /* Set all bits covered by the AL-extent al_enr.
 | ||||
|  * Returns number of bits changed. */ | ||||
| unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) | ||||
| { | ||||
| 	struct drbd_bitmap *b = mdev->bitmap; | ||||
| 	unsigned long *p_addr, *bm; | ||||
| 	unsigned long weight; | ||||
| 	unsigned long s, e; | ||||
| 	int count, i, do_now; | ||||
| 	ERR_IF(!b) return 0; | ||||
| 	ERR_IF(!b->bm_pages) return 0; | ||||
| 
 | ||||
| 	spin_lock_irq(&b->bm_lock); | ||||
| 	if (BM_DONT_SET & b->bm_flags) | ||||
| 		bm_print_lock_info(mdev); | ||||
| 	weight = b->bm_set; | ||||
| 
 | ||||
| 	s = al_enr * BM_WORDS_PER_AL_EXT; | ||||
| 	e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words); | ||||
| 	/* assert that s and e are on the same page */ | ||||
| 	D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3) | ||||
| 	      ==  s    >> (PAGE_SHIFT - LN2_BPL + 3)); | ||||
| 	count = 0; | ||||
| 	if (s < b->bm_words) { | ||||
| 		i = do_now = e-s; | ||||
| 		p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); | ||||
| 		bm = p_addr + MLPP(s); | ||||
| 		while (i--) { | ||||
| 			count += hweight_long(*bm); | ||||
| 			*bm = -1UL; | ||||
| 			bm++; | ||||
| 		} | ||||
| 		bm_unmap(p_addr); | ||||
| 		b->bm_set += do_now*BITS_PER_LONG - count; | ||||
| 		if (e == b->bm_words) | ||||
| 			b->bm_set -= bm_clear_surplus(b); | ||||
| 	} else { | ||||
| 		dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s); | ||||
| 	} | ||||
| 	weight = b->bm_set - weight; | ||||
| 	spin_unlock_irq(&b->bm_lock); | ||||
| 	return weight; | ||||
| } | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										207
									
								
								drivers/block/drbd/drbd_interval.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										207
									
								
								drivers/block/drbd/drbd_interval.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,207 @@ | |||
| #include <asm/bug.h> | ||||
| #include <linux/rbtree_augmented.h> | ||||
| #include "drbd_interval.h" | ||||
| 
 | ||||
| /**
 | ||||
|  * interval_end  -  return end of @node | ||||
|  */ | ||||
| static inline | ||||
| sector_t interval_end(struct rb_node *node) | ||||
| { | ||||
| 	struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb); | ||||
| 	return this->end; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * compute_subtree_last  -  compute end of @node | ||||
|  * | ||||
|  * The end of an interval is the highest (start + (size >> 9)) value of this | ||||
|  * node and of its children.  Called for @node and its parents whenever the end | ||||
|  * may have changed. | ||||
|  */ | ||||
| static inline sector_t | ||||
| compute_subtree_last(struct drbd_interval *node) | ||||
| { | ||||
| 	sector_t max = node->sector + (node->size >> 9); | ||||
| 
 | ||||
| 	if (node->rb.rb_left) { | ||||
| 		sector_t left = interval_end(node->rb.rb_left); | ||||
| 		if (left > max) | ||||
| 			max = left; | ||||
| 	} | ||||
| 	if (node->rb.rb_right) { | ||||
| 		sector_t right = interval_end(node->rb.rb_right); | ||||
| 		if (right > max) | ||||
| 			max = right; | ||||
| 	} | ||||
| 	return max; | ||||
| } | ||||
| 
 | ||||
| static void augment_propagate(struct rb_node *rb, struct rb_node *stop) | ||||
| { | ||||
| 	while (rb != stop) { | ||||
| 		struct drbd_interval *node = rb_entry(rb, struct drbd_interval, rb); | ||||
| 		sector_t subtree_last = compute_subtree_last(node); | ||||
| 		if (node->end == subtree_last) | ||||
| 			break; | ||||
| 		node->end = subtree_last; | ||||
| 		rb = rb_parent(&node->rb); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new) | ||||
| { | ||||
| 	struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb); | ||||
| 	struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb); | ||||
| 
 | ||||
| 	new->end = old->end; | ||||
| } | ||||
| 
 | ||||
| static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new) | ||||
| { | ||||
| 	struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb); | ||||
| 	struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb); | ||||
| 
 | ||||
| 	new->end = old->end; | ||||
| 	old->end = compute_subtree_last(old); | ||||
| } | ||||
| 
 | ||||
| static const struct rb_augment_callbacks augment_callbacks = { | ||||
| 	augment_propagate, | ||||
| 	augment_copy, | ||||
| 	augment_rotate, | ||||
| }; | ||||
| 
 | ||||
| /**
 | ||||
|  * drbd_insert_interval  -  insert a new interval into a tree | ||||
|  */ | ||||
| bool | ||||
| drbd_insert_interval(struct rb_root *root, struct drbd_interval *this) | ||||
| { | ||||
| 	struct rb_node **new = &root->rb_node, *parent = NULL; | ||||
| 
 | ||||
| 	BUG_ON(!IS_ALIGNED(this->size, 512)); | ||||
| 
 | ||||
| 	while (*new) { | ||||
| 		struct drbd_interval *here = | ||||
| 			rb_entry(*new, struct drbd_interval, rb); | ||||
| 
 | ||||
| 		parent = *new; | ||||
| 		if (this->sector < here->sector) | ||||
| 			new = &(*new)->rb_left; | ||||
| 		else if (this->sector > here->sector) | ||||
| 			new = &(*new)->rb_right; | ||||
| 		else if (this < here) | ||||
| 			new = &(*new)->rb_left; | ||||
| 		else if (this > here) | ||||
| 			new = &(*new)->rb_right; | ||||
| 		else | ||||
| 			return false; | ||||
| 	} | ||||
| 
 | ||||
| 	rb_link_node(&this->rb, parent, new); | ||||
| 	rb_insert_augmented(&this->rb, root, &augment_callbacks); | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * drbd_contains_interval  -  check if a tree contains a given interval | ||||
|  * @sector:	start sector of @interval | ||||
|  * @interval:	may not be a valid pointer | ||||
|  * | ||||
|  * Returns if the tree contains the node @interval with start sector @start. | ||||
|  * Does not dereference @interval until @interval is known to be a valid object | ||||
|  * in @tree.  Returns %false if @interval is in the tree but with a different | ||||
|  * sector number. | ||||
|  */ | ||||
| bool | ||||
| drbd_contains_interval(struct rb_root *root, sector_t sector, | ||||
| 		       struct drbd_interval *interval) | ||||
| { | ||||
| 	struct rb_node *node = root->rb_node; | ||||
| 
 | ||||
| 	while (node) { | ||||
| 		struct drbd_interval *here = | ||||
| 			rb_entry(node, struct drbd_interval, rb); | ||||
| 
 | ||||
| 		if (sector < here->sector) | ||||
| 			node = node->rb_left; | ||||
| 		else if (sector > here->sector) | ||||
| 			node = node->rb_right; | ||||
| 		else if (interval < here) | ||||
| 			node = node->rb_left; | ||||
| 		else if (interval > here) | ||||
| 			node = node->rb_right; | ||||
| 		else | ||||
| 			return true; | ||||
| 	} | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * drbd_remove_interval  -  remove an interval from a tree | ||||
|  */ | ||||
| void | ||||
| drbd_remove_interval(struct rb_root *root, struct drbd_interval *this) | ||||
| { | ||||
| 	rb_erase_augmented(&this->rb, root, &augment_callbacks); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * drbd_find_overlap  - search for an interval overlapping with [sector, sector + size) | ||||
|  * @sector:	start sector | ||||
|  * @size:	size, aligned to 512 bytes | ||||
|  * | ||||
|  * Returns an interval overlapping with [sector, sector + size), or NULL if | ||||
|  * there is none.  When there is more than one overlapping interval in the | ||||
|  * tree, the interval with the lowest start sector is returned, and all other | ||||
|  * overlapping intervals will be on the right side of the tree, reachable with | ||||
|  * rb_next(). | ||||
|  */ | ||||
| struct drbd_interval * | ||||
| drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size) | ||||
| { | ||||
| 	struct rb_node *node = root->rb_node; | ||||
| 	struct drbd_interval *overlap = NULL; | ||||
| 	sector_t end = sector + (size >> 9); | ||||
| 
 | ||||
| 	BUG_ON(!IS_ALIGNED(size, 512)); | ||||
| 
 | ||||
| 	while (node) { | ||||
| 		struct drbd_interval *here = | ||||
| 			rb_entry(node, struct drbd_interval, rb); | ||||
| 
 | ||||
| 		if (node->rb_left && | ||||
| 		    sector < interval_end(node->rb_left)) { | ||||
| 			/* Overlap if any must be on left side */ | ||||
| 			node = node->rb_left; | ||||
| 		} else if (here->sector < end && | ||||
| 			   sector < here->sector + (here->size >> 9)) { | ||||
| 			overlap = here; | ||||
| 			break; | ||||
| 		} else if (sector >= here->sector) { | ||||
| 			/* Overlap if any must be on right side */ | ||||
| 			node = node->rb_right; | ||||
| 		} else | ||||
| 			break; | ||||
| 	} | ||||
| 	return overlap; | ||||
| } | ||||
| 
 | ||||
| struct drbd_interval * | ||||
| drbd_next_overlap(struct drbd_interval *i, sector_t sector, unsigned int size) | ||||
| { | ||||
| 	sector_t end = sector + (size >> 9); | ||||
| 	struct rb_node *node; | ||||
| 
 | ||||
| 	for (;;) { | ||||
| 		node = rb_next(&i->rb); | ||||
| 		if (!node) | ||||
| 			return NULL; | ||||
| 		i = rb_entry(node, struct drbd_interval, rb); | ||||
| 		if (i->sector >= end) | ||||
| 			return NULL; | ||||
| 		if (sector < i->sector + (i->size >> 9)) | ||||
| 			return i; | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										40
									
								
								drivers/block/drbd/drbd_interval.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								drivers/block/drbd/drbd_interval.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,40 @@ | |||
| #ifndef __DRBD_INTERVAL_H | ||||
| #define __DRBD_INTERVAL_H | ||||
| 
 | ||||
| #include <linux/types.h> | ||||
| #include <linux/rbtree.h> | ||||
| 
 | ||||
| struct drbd_interval { | ||||
| 	struct rb_node rb; | ||||
| 	sector_t sector;	/* start sector of the interval */ | ||||
| 	unsigned int size;	/* size in bytes */ | ||||
| 	sector_t end;		/* highest interval end in subtree */ | ||||
| 	int local:1		/* local or remote request? */; | ||||
| 	int waiting:1; | ||||
| }; | ||||
| 
 | ||||
| static inline void drbd_clear_interval(struct drbd_interval *i) | ||||
| { | ||||
| 	RB_CLEAR_NODE(&i->rb); | ||||
| } | ||||
| 
 | ||||
| static inline bool drbd_interval_empty(struct drbd_interval *i) | ||||
| { | ||||
| 	return RB_EMPTY_NODE(&i->rb); | ||||
| } | ||||
| 
 | ||||
| extern bool drbd_insert_interval(struct rb_root *, struct drbd_interval *); | ||||
| extern bool drbd_contains_interval(struct rb_root *, sector_t, | ||||
| 				   struct drbd_interval *); | ||||
| extern void drbd_remove_interval(struct rb_root *, struct drbd_interval *); | ||||
| extern struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t, | ||||
| 					unsigned int); | ||||
| extern struct drbd_interval *drbd_next_overlap(struct drbd_interval *, sector_t, | ||||
| 					unsigned int); | ||||
| 
 | ||||
| #define drbd_for_each_overlap(i, root, sector, size)		\ | ||||
| 	for (i = drbd_find_overlap(root, sector, size);		\ | ||||
| 	     i;							\ | ||||
| 	     i = drbd_next_overlap(i, sector, size)) | ||||
| 
 | ||||
| #endif  /* __DRBD_INTERVAL_H */ | ||||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										55
									
								
								drivers/block/drbd/drbd_nla.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								drivers/block/drbd/drbd_nla.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,55 @@ | |||
| #include "drbd_wrappers.h" | ||||
| #include <linux/kernel.h> | ||||
| #include <net/netlink.h> | ||||
| #include <linux/drbd_genl_api.h> | ||||
| #include "drbd_nla.h" | ||||
| 
 | ||||
| static int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla) | ||||
| { | ||||
| 	struct nlattr *head = nla_data(nla); | ||||
| 	int len = nla_len(nla); | ||||
| 	int rem; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * validate_nla (called from nla_parse_nested) ignores attributes | ||||
| 	 * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag. | ||||
| 	 * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY | ||||
| 	 * flag set also, check and remove that flag before calling | ||||
| 	 * nla_parse_nested. | ||||
| 	 */ | ||||
| 
 | ||||
| 	nla_for_each_attr(nla, head, len, rem) { | ||||
| 		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) { | ||||
| 			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY; | ||||
| 			if (nla_type(nla) > maxtype) | ||||
| 				return -EOPNOTSUPP; | ||||
| 		} | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, | ||||
| 			  const struct nla_policy *policy) | ||||
| { | ||||
| 	int err; | ||||
| 
 | ||||
| 	err = drbd_nla_check_mandatory(maxtype, nla); | ||||
| 	if (!err) | ||||
| 		err = nla_parse_nested(tb, maxtype, nla, policy); | ||||
| 
 | ||||
| 	return err; | ||||
| } | ||||
| 
 | ||||
| struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype) | ||||
| { | ||||
| 	int err; | ||||
| 	/*
 | ||||
| 	 * If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and | ||||
| 	 * we don't know about that attribute, reject all the nested | ||||
| 	 * attributes. | ||||
| 	 */ | ||||
| 	err = drbd_nla_check_mandatory(maxtype, nla); | ||||
| 	if (err) | ||||
| 		return ERR_PTR(err); | ||||
| 	return nla_find_nested(nla, attrtype); | ||||
| } | ||||
							
								
								
									
										8
									
								
								drivers/block/drbd/drbd_nla.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								drivers/block/drbd/drbd_nla.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,8 @@ | |||
| #ifndef __DRBD_NLA_H | ||||
| #define __DRBD_NLA_H | ||||
| 
 | ||||
| extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, | ||||
| 				 const struct nla_policy *policy); | ||||
| extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype); | ||||
| 
 | ||||
| #endif  /* __DRBD_NLA_H */ | ||||
|  | @ -171,7 +171,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) | |||
| 		if (mdev->state.conn == C_VERIFY_S || | ||||
| 		    mdev->state.conn == C_VERIFY_T) { | ||||
| 			bit_pos = bm_bits - mdev->ov_left; | ||||
| 			if (mdev->agreed_pro_version >= 97) | ||||
| 			if (verify_can_do_stop_sector(mdev)) | ||||
| 				stop_sector = mdev->ov_stop_sector; | ||||
| 		} else | ||||
| 			bit_pos = mdev->bm_resync_fo; | ||||
|  | @ -200,9 +200,11 @@ static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) | |||
| 
 | ||||
| static int drbd_seq_show(struct seq_file *seq, void *v) | ||||
| { | ||||
| 	int i, hole = 0; | ||||
| 	int i, prev_i = -1; | ||||
| 	const char *sn; | ||||
| 	struct drbd_conf *mdev; | ||||
| 	struct net_conf *nc; | ||||
| 	char wp; | ||||
| 
 | ||||
| 	static char write_ordering_chars[] = { | ||||
| 		[WO_none] = 'n', | ||||
|  | @ -233,16 +235,11 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
| 	 oos .. known out-of-sync kB | ||||
| 	*/ | ||||
| 
 | ||||
| 	for (i = 0; i < minor_count; i++) { | ||||
| 		mdev = minor_to_mdev(i); | ||||
| 		if (!mdev) { | ||||
| 			hole = 1; | ||||
| 			continue; | ||||
| 		} | ||||
| 		if (hole) { | ||||
| 			hole = 0; | ||||
| 	rcu_read_lock(); | ||||
| 	idr_for_each_entry(&minors, mdev, i) { | ||||
| 		if (prev_i != i - 1) | ||||
| 			seq_printf(seq, "\n"); | ||||
| 		} | ||||
| 		prev_i = i; | ||||
| 
 | ||||
| 		sn = drbd_conn_str(mdev->state.conn); | ||||
| 
 | ||||
|  | @ -254,6 +251,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
| 			/* reset mdev->congestion_reason */ | ||||
| 			bdi_rw_congested(&mdev->rq_queue->backing_dev_info); | ||||
| 
 | ||||
| 			nc = rcu_dereference(mdev->tconn->net_conf); | ||||
| 			wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' '; | ||||
| 			seq_printf(seq, | ||||
| 			   "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n" | ||||
| 			   "    ns:%u nr:%u dw:%u dr:%u al:%u bm:%u " | ||||
|  | @ -263,14 +262,13 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
| 			   drbd_role_str(mdev->state.peer), | ||||
| 			   drbd_disk_str(mdev->state.disk), | ||||
| 			   drbd_disk_str(mdev->state.pdsk), | ||||
| 			   (mdev->net_conf == NULL ? ' ' : | ||||
| 			    (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')), | ||||
| 			   is_susp(mdev->state) ? 's' : 'r', | ||||
| 			   wp, | ||||
| 			   drbd_suspended(mdev) ? 's' : 'r', | ||||
| 			   mdev->state.aftr_isp ? 'a' : '-', | ||||
| 			   mdev->state.peer_isp ? 'p' : '-', | ||||
| 			   mdev->state.user_isp ? 'u' : '-', | ||||
| 			   mdev->congestion_reason ?: '-', | ||||
| 			   drbd_test_flag(mdev, AL_SUSPENDED) ? 's' : '-', | ||||
| 			   test_bit(AL_SUSPENDED, &mdev->flags) ? 's' : '-', | ||||
| 			   mdev->send_cnt/2, | ||||
| 			   mdev->recv_cnt/2, | ||||
| 			   mdev->writ_cnt/2, | ||||
|  | @ -282,8 +280,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
| 			   atomic_read(&mdev->rs_pending_cnt), | ||||
| 			   atomic_read(&mdev->unacked_cnt), | ||||
| 			   atomic_read(&mdev->ap_bio_cnt), | ||||
| 			   mdev->epochs, | ||||
| 			   write_ordering_chars[mdev->write_ordering] | ||||
| 			   mdev->tconn->epochs, | ||||
| 			   write_ordering_chars[mdev->tconn->write_ordering] | ||||
| 			); | ||||
| 			seq_printf(seq, " oos:%llu\n", | ||||
| 				   Bit2KB((unsigned long long) | ||||
|  | @ -308,6 +306,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	rcu_read_unlock(); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -77,40 +77,41 @@ | |||
|  */ | ||||
| 
 | ||||
| enum drbd_req_event { | ||||
| 	created, | ||||
| 	to_be_send, | ||||
| 	to_be_submitted, | ||||
| 	CREATED, | ||||
| 	TO_BE_SENT, | ||||
| 	TO_BE_SUBMITTED, | ||||
| 
 | ||||
| 	/* XXX yes, now I am inconsistent...
 | ||||
| 	 * these are not "events" but "actions" | ||||
| 	 * oh, well... */ | ||||
| 	queue_for_net_write, | ||||
| 	queue_for_net_read, | ||||
| 	queue_for_send_oos, | ||||
| 	QUEUE_FOR_NET_WRITE, | ||||
| 	QUEUE_FOR_NET_READ, | ||||
| 	QUEUE_FOR_SEND_OOS, | ||||
| 
 | ||||
| 	send_canceled, | ||||
| 	send_failed, | ||||
| 	handed_over_to_network, | ||||
| 	oos_handed_to_network, | ||||
| 	connection_lost_while_pending, | ||||
| 	read_retry_remote_canceled, | ||||
| 	recv_acked_by_peer, | ||||
| 	write_acked_by_peer, | ||||
| 	write_acked_by_peer_and_sis, /* and set_in_sync */ | ||||
| 	conflict_discarded_by_peer, | ||||
| 	neg_acked, | ||||
| 	barrier_acked, /* in protocol A and B */ | ||||
| 	data_received, /* (remote read) */ | ||||
| 	SEND_CANCELED, | ||||
| 	SEND_FAILED, | ||||
| 	HANDED_OVER_TO_NETWORK, | ||||
| 	OOS_HANDED_TO_NETWORK, | ||||
| 	CONNECTION_LOST_WHILE_PENDING, | ||||
| 	READ_RETRY_REMOTE_CANCELED, | ||||
| 	RECV_ACKED_BY_PEER, | ||||
| 	WRITE_ACKED_BY_PEER, | ||||
| 	WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */ | ||||
| 	CONFLICT_RESOLVED, | ||||
| 	POSTPONE_WRITE, | ||||
| 	NEG_ACKED, | ||||
| 	BARRIER_ACKED, /* in protocol A and B */ | ||||
| 	DATA_RECEIVED, /* (remote read) */ | ||||
| 
 | ||||
| 	read_completed_with_error, | ||||
| 	read_ahead_completed_with_error, | ||||
| 	write_completed_with_error, | ||||
| 	abort_disk_io, | ||||
| 	completed_ok, | ||||
| 	resend, | ||||
| 	fail_frozen_disk_io, | ||||
| 	restart_frozen_disk_io, | ||||
| 	nothing, /* for tracing only */ | ||||
| 	READ_COMPLETED_WITH_ERROR, | ||||
| 	READ_AHEAD_COMPLETED_WITH_ERROR, | ||||
| 	WRITE_COMPLETED_WITH_ERROR, | ||||
| 	ABORT_DISK_IO, | ||||
| 	COMPLETED_OK, | ||||
| 	RESEND, | ||||
| 	FAIL_FROZEN_DISK_IO, | ||||
| 	RESTART_FROZEN_DISK_IO, | ||||
| 	NOTHING, | ||||
| }; | ||||
| 
 | ||||
| /* encoding of request states for now.  we don't actually need that many bits.
 | ||||
|  | @ -142,8 +143,8 @@ enum drbd_req_state_bits { | |||
| 	 *        recv_ack (B) or implicit "ack" (A), | ||||
| 	 *        still waiting for the barrier ack. | ||||
| 	 *        master_bio may already be completed and invalidated. | ||||
| 	 * 11100: write_acked (C), | ||||
| 	 *        data_received (for remote read, any protocol) | ||||
| 	 * 11100: write acked (C), | ||||
| 	 *        data received (for remote read, any protocol) | ||||
| 	 *        or finally the barrier ack has arrived (B,A)... | ||||
| 	 *        request can be freed | ||||
| 	 * 01100: neg-acked (write, protocol C) | ||||
|  | @ -198,6 +199,22 @@ enum drbd_req_state_bits { | |||
| 
 | ||||
| 	/* Should call drbd_al_complete_io() for this request... */ | ||||
| 	__RQ_IN_ACT_LOG, | ||||
| 
 | ||||
| 	/* The peer has sent a retry ACK */ | ||||
| 	__RQ_POSTPONED, | ||||
| 
 | ||||
| 	/* would have been completed,
 | ||||
| 	 * but was not, because of drbd_suspended() */ | ||||
| 	__RQ_COMPLETION_SUSP, | ||||
| 
 | ||||
| 	/* We expect a receive ACK (wire proto B) */ | ||||
| 	__RQ_EXP_RECEIVE_ACK, | ||||
| 
 | ||||
| 	/* We expect a write ACK (wite proto C) */ | ||||
| 	__RQ_EXP_WRITE_ACK, | ||||
| 
 | ||||
| 	/* waiting for a barrier ack, did an extra kref_get */ | ||||
| 	__RQ_EXP_BARR_ACK, | ||||
| }; | ||||
| 
 | ||||
| #define RQ_LOCAL_PENDING   (1UL << __RQ_LOCAL_PENDING) | ||||
|  | @ -219,56 +236,16 @@ enum drbd_req_state_bits { | |||
| 
 | ||||
| #define RQ_WRITE           (1UL << __RQ_WRITE) | ||||
| #define RQ_IN_ACT_LOG      (1UL << __RQ_IN_ACT_LOG) | ||||
| #define RQ_POSTPONED	   (1UL << __RQ_POSTPONED) | ||||
| #define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP) | ||||
| #define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK) | ||||
| #define RQ_EXP_WRITE_ACK   (1UL << __RQ_EXP_WRITE_ACK) | ||||
| #define RQ_EXP_BARR_ACK    (1UL << __RQ_EXP_BARR_ACK) | ||||
| 
 | ||||
| /* For waking up the frozen transfer log mod_req() has to return if the request
 | ||||
|    should be counted in the epoch object*/ | ||||
| #define MR_WRITE_SHIFT 0 | ||||
| #define MR_WRITE       (1 << MR_WRITE_SHIFT) | ||||
| #define MR_READ_SHIFT  1 | ||||
| #define MR_READ        (1 << MR_READ_SHIFT) | ||||
| 
 | ||||
| /* epoch entries */ | ||||
| static inline | ||||
| struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector) | ||||
| { | ||||
| 	BUG_ON(mdev->ee_hash_s == 0); | ||||
| 	return mdev->ee_hash + | ||||
| 		((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s); | ||||
| } | ||||
| 
 | ||||
| /* transfer log (drbd_request objects) */ | ||||
| static inline | ||||
| struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector) | ||||
| { | ||||
| 	BUG_ON(mdev->tl_hash_s == 0); | ||||
| 	return mdev->tl_hash + | ||||
| 		((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s); | ||||
| } | ||||
| 
 | ||||
| /* application reads (drbd_request objects) */ | ||||
| static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector) | ||||
| { | ||||
| 	return mdev->app_reads_hash | ||||
| 		+ ((unsigned int)(sector) % APP_R_HSIZE); | ||||
| } | ||||
| 
 | ||||
| /* when we receive the answer for a read request,
 | ||||
|  * verify that we actually know about it */ | ||||
| static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev, | ||||
| 	u64 id, sector_t sector) | ||||
| { | ||||
| 	struct hlist_head *slot = ar_hash_slot(mdev, sector); | ||||
| 	struct hlist_node *n; | ||||
| 	struct drbd_request *req; | ||||
| 
 | ||||
| 	hlist_for_each_entry(req, n, slot, collision) { | ||||
| 		if ((unsigned long)req == (unsigned long)id) { | ||||
| 			D_ASSERT(req->sector == sector); | ||||
| 			return req; | ||||
| 		} | ||||
| 	} | ||||
| 	return NULL; | ||||
| } | ||||
| #define MR_WRITE       1 | ||||
| #define MR_READ        2 | ||||
| 
 | ||||
| static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src) | ||||
| { | ||||
|  | @ -278,41 +255,10 @@ static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bi | |||
| 	req->private_bio = bio; | ||||
| 
 | ||||
| 	bio->bi_private  = req; | ||||
| 	bio->bi_end_io   = drbd_endio_pri; | ||||
| 	bio->bi_end_io   = drbd_request_endio; | ||||
| 	bio->bi_next     = NULL; | ||||
| } | ||||
| 
 | ||||
| static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, | ||||
| 	struct bio *bio_src) | ||||
| { | ||||
| 	struct drbd_request *req = | ||||
| 		mempool_alloc(drbd_request_mempool, GFP_NOIO); | ||||
| 	if (likely(req)) { | ||||
| 		drbd_req_make_private_bio(req, bio_src); | ||||
| 
 | ||||
| 		req->rq_state    = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0; | ||||
| 		req->mdev        = mdev; | ||||
| 		req->master_bio  = bio_src; | ||||
| 		req->epoch       = 0; | ||||
| 		req->sector      = bio_src->bi_sector; | ||||
| 		req->size        = bio_src->bi_size; | ||||
| 		INIT_HLIST_NODE(&req->collision); | ||||
| 		INIT_LIST_HEAD(&req->tl_requests); | ||||
| 		INIT_LIST_HEAD(&req->w.list); | ||||
| 	} | ||||
| 	return req; | ||||
| } | ||||
| 
 | ||||
| static inline void drbd_req_free(struct drbd_request *req) | ||||
| { | ||||
| 	mempool_free(req, drbd_request_mempool); | ||||
| } | ||||
| 
 | ||||
| static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) | ||||
| { | ||||
| 	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); | ||||
| } | ||||
| 
 | ||||
| /* Short lived temporary struct on the stack.
 | ||||
|  * We could squirrel the error to be returned into | ||||
|  * bio->bi_size, or similar. But that would be too ugly. */ | ||||
|  | @ -321,6 +267,7 @@ struct bio_and_error { | |||
| 	int error; | ||||
| }; | ||||
| 
 | ||||
| extern void drbd_req_destroy(struct kref *kref); | ||||
| extern void _req_may_be_done(struct drbd_request *req, | ||||
| 		struct bio_and_error *m); | ||||
| extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, | ||||
|  | @ -328,13 +275,17 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
| extern void complete_master_bio(struct drbd_conf *mdev, | ||||
| 		struct bio_and_error *m); | ||||
| extern void request_timer_fn(unsigned long data); | ||||
| extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); | ||||
| extern void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what); | ||||
| extern void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what); | ||||
| 
 | ||||
| /* this is in drbd_main.c */ | ||||
| extern void drbd_restart_request(struct drbd_request *req); | ||||
| 
 | ||||
| /* use this if you don't want to deal with calling complete_master_bio()
 | ||||
|  * outside the spinlock, e.g. when walking some list on cleanup. */ | ||||
| static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) | ||||
| { | ||||
| 	struct drbd_conf *mdev = req->mdev; | ||||
| 	struct drbd_conf *mdev = req->w.mdev; | ||||
| 	struct bio_and_error m; | ||||
| 	int rv; | ||||
| 
 | ||||
|  | @ -354,13 +305,13 @@ static inline int req_mod(struct drbd_request *req, | |||
| 		enum drbd_req_event what) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 	struct drbd_conf *mdev = req->mdev; | ||||
| 	struct drbd_conf *mdev = req->w.mdev; | ||||
| 	struct bio_and_error m; | ||||
| 	int rv; | ||||
| 
 | ||||
| 	spin_lock_irqsave(&mdev->req_lock, flags); | ||||
| 	spin_lock_irqsave(&mdev->tconn->req_lock, flags); | ||||
| 	rv = __req_mod(req, what, &m); | ||||
| 	spin_unlock_irqrestore(&mdev->req_lock, flags); | ||||
| 	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||||
| 
 | ||||
| 	if (m.bio) | ||||
| 		complete_master_bio(mdev, &m); | ||||
|  | @ -368,7 +319,7 @@ static inline int req_mod(struct drbd_request *req, | |||
| 	return rv; | ||||
| } | ||||
| 
 | ||||
| static inline bool drbd_should_do_remote(union drbd_state s) | ||||
| static inline bool drbd_should_do_remote(union drbd_dev_state s) | ||||
| { | ||||
| 	return s.pdsk == D_UP_TO_DATE || | ||||
| 		(s.pdsk >= D_INCONSISTENT && | ||||
|  | @ -378,7 +329,7 @@ static inline bool drbd_should_do_remote(union drbd_state s) | |||
| 	   That is equivalent since before 96 IO was frozen in the C_WF_BITMAP* | ||||
| 	   states. */ | ||||
| } | ||||
| static inline bool drbd_should_send_oos(union drbd_state s) | ||||
| static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s) | ||||
| { | ||||
| 	return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S; | ||||
| 	/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
 | ||||
|  |  | |||
							
								
								
									
										1857
									
								
								drivers/block/drbd/drbd_state.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1857
									
								
								drivers/block/drbd/drbd_state.c
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										161
									
								
								drivers/block/drbd/drbd_state.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										161
									
								
								drivers/block/drbd/drbd_state.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,161 @@ | |||
| #ifndef DRBD_STATE_H | ||||
| #define DRBD_STATE_H | ||||
| 
 | ||||
| struct drbd_conf; | ||||
| struct drbd_tconn; | ||||
| 
 | ||||
| /**
 | ||||
|  * DOC: DRBD State macros | ||||
|  * | ||||
|  * These macros are used to express state changes in easily readable form. | ||||
|  * | ||||
|  * The NS macros expand to a mask and a value, that can be bit ored onto the | ||||
|  * current state as soon as the spinlock (req_lock) was taken. | ||||
|  * | ||||
|  * The _NS macros are used for state functions that get called with the | ||||
|  * spinlock. These macros expand directly to the new state value. | ||||
|  * | ||||
|  * Besides the basic forms NS() and _NS() additional _?NS[23] are defined | ||||
|  * to express state changes that affect more than one aspect of the state. | ||||
|  * | ||||
|  * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY) | ||||
|  * Means that the network connection was established and that the peer | ||||
|  * is in secondary role. | ||||
|  */ | ||||
| #define role_MASK R_MASK | ||||
| #define peer_MASK R_MASK | ||||
| #define disk_MASK D_MASK | ||||
| #define pdsk_MASK D_MASK | ||||
| #define conn_MASK C_MASK | ||||
| #define susp_MASK 1 | ||||
| #define user_isp_MASK 1 | ||||
| #define aftr_isp_MASK 1 | ||||
| #define susp_nod_MASK 1 | ||||
| #define susp_fen_MASK 1 | ||||
| 
 | ||||
| #define NS(T, S) \ | ||||
| 	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ | ||||
| 	({ union drbd_state val; val.i = 0; val.T = (S); val; }) | ||||
| #define NS2(T1, S1, T2, S2) \ | ||||
| 	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ | ||||
| 	  mask.T2 = T2##_MASK; mask; }), \ | ||||
| 	({ union drbd_state val; val.i = 0; val.T1 = (S1); \ | ||||
| 	  val.T2 = (S2); val; }) | ||||
| #define NS3(T1, S1, T2, S2, T3, S3) \ | ||||
| 	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ | ||||
| 	  mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \ | ||||
| 	({ union drbd_state val;  val.i = 0; val.T1 = (S1); \ | ||||
| 	  val.T2 = (S2); val.T3 = (S3); val; }) | ||||
| 
 | ||||
| #define _NS(D, T, S) \ | ||||
| 	D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T = (S); __ns; }) | ||||
| #define _NS2(D, T1, S1, T2, S2) \ | ||||
| 	D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \ | ||||
| 	__ns.T2 = (S2); __ns; }) | ||||
| #define _NS3(D, T1, S1, T2, S2, T3, S3) \ | ||||
| 	D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \ | ||||
| 	__ns.T2 = (S2); __ns.T3 = (S3); __ns; }) | ||||
| 
 | ||||
| enum chg_state_flags { | ||||
| 	CS_HARD	         = 1 << 0, | ||||
| 	CS_VERBOSE       = 1 << 1, | ||||
| 	CS_WAIT_COMPLETE = 1 << 2, | ||||
| 	CS_SERIALIZE     = 1 << 3, | ||||
| 	CS_ORDERED       = CS_WAIT_COMPLETE + CS_SERIALIZE, | ||||
| 	CS_LOCAL_ONLY    = 1 << 4, /* Do not consider a device pair wide state change */ | ||||
| 	CS_DC_ROLE       = 1 << 5, /* DC = display as connection state change */ | ||||
| 	CS_DC_PEER       = 1 << 6, | ||||
| 	CS_DC_CONN       = 1 << 7, | ||||
| 	CS_DC_DISK       = 1 << 8, | ||||
| 	CS_DC_PDSK       = 1 << 9, | ||||
| 	CS_DC_SUSP       = 1 << 10, | ||||
| 	CS_DC_MASK       = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK, | ||||
| 	CS_IGN_OUTD_FAIL = 1 << 11, | ||||
| }; | ||||
| 
 | ||||
| /* drbd_dev_state and drbd_state are different types. This is to stress the
 | ||||
|    small difference. There is no suspended flag (.susp), and no suspended | ||||
|    while fence handler runs flas (susp_fen). */ | ||||
| union drbd_dev_state { | ||||
| 	struct { | ||||
| #if defined(__LITTLE_ENDIAN_BITFIELD) | ||||
| 		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */ | ||||
| 		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */ | ||||
| 		unsigned conn:5 ;   /* 17/32	 cstates */ | ||||
| 		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */ | ||||
| 		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */ | ||||
| 		unsigned _unused:1 ; | ||||
| 		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ | ||||
| 		unsigned peer_isp:1 ; | ||||
| 		unsigned user_isp:1 ; | ||||
| 		unsigned _pad:11;   /* 0	 unused */ | ||||
| #elif defined(__BIG_ENDIAN_BITFIELD) | ||||
| 		unsigned _pad:11; | ||||
| 		unsigned user_isp:1 ; | ||||
| 		unsigned peer_isp:1 ; | ||||
| 		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ | ||||
| 		unsigned _unused:1 ; | ||||
| 		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */ | ||||
| 		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */ | ||||
| 		unsigned conn:5 ;   /* 17/32	 cstates */ | ||||
| 		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */ | ||||
| 		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */ | ||||
| #else | ||||
| # error "this endianess is not supported" | ||||
| #endif | ||||
| 	}; | ||||
| 	unsigned int i; | ||||
| }; | ||||
| 
 | ||||
| extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, | ||||
| 					    enum chg_state_flags f, | ||||
| 					    union drbd_state mask, | ||||
| 					    union drbd_state val); | ||||
| extern void drbd_force_state(struct drbd_conf *, union drbd_state, | ||||
| 			union drbd_state); | ||||
| extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *, | ||||
| 					      union drbd_state, | ||||
| 					      union drbd_state, | ||||
| 					      enum chg_state_flags); | ||||
| extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state, | ||||
| 					   enum chg_state_flags, | ||||
| 					   struct completion *done); | ||||
| extern void print_st_err(struct drbd_conf *, union drbd_state, | ||||
| 			union drbd_state, int); | ||||
| 
 | ||||
| enum drbd_state_rv | ||||
| _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, | ||||
| 		    enum chg_state_flags flags); | ||||
| 
 | ||||
| enum drbd_state_rv | ||||
| conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, | ||||
| 		   enum chg_state_flags flags); | ||||
| 
 | ||||
| extern void drbd_resume_al(struct drbd_conf *mdev); | ||||
| extern bool conn_all_vols_unconf(struct drbd_tconn *tconn); | ||||
| 
 | ||||
| /**
 | ||||
|  * drbd_request_state() - Reqest a state change | ||||
|  * @mdev:	DRBD device. | ||||
|  * @mask:	mask of state bits to change. | ||||
|  * @val:	value of new state bits. | ||||
|  * | ||||
|  * This is the most graceful way of requesting a state change. It is verbose | ||||
|  * quite verbose in case the state change is not possible, and all those | ||||
|  * state changes are globally serialized. | ||||
|  */ | ||||
| static inline int drbd_request_state(struct drbd_conf *mdev, | ||||
| 				     union drbd_state mask, | ||||
| 				     union drbd_state val) | ||||
| { | ||||
| 	return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); | ||||
| } | ||||
| 
 | ||||
| enum drbd_role conn_highest_role(struct drbd_tconn *tconn); | ||||
| enum drbd_role conn_highest_peer(struct drbd_tconn *tconn); | ||||
| enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn); | ||||
| enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn); | ||||
| enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn); | ||||
| enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn); | ||||
| 
 | ||||
| #endif | ||||
|  | @ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = { | |||
| 	[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated", | ||||
| 	[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change", | ||||
| 	[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted", | ||||
| 	[-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config", | ||||
| }; | ||||
| 
 | ||||
| const char *drbd_conn_str(enum drbd_conns s) | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -3,6 +3,7 @@ | |||
| 
 | ||||
| #include <linux/ctype.h> | ||||
| #include <linux/mm.h> | ||||
| #include "drbd_int.h" | ||||
| 
 | ||||
| /* see get_sb_bdev and bd_claim */ | ||||
| extern char *drbd_sec_holder; | ||||
|  | @ -20,8 +21,8 @@ static inline void drbd_set_my_capacity(struct drbd_conf *mdev, | |||
| 
 | ||||
| /* bi_end_io handlers */ | ||||
| extern void drbd_md_io_complete(struct bio *bio, int error); | ||||
| extern void drbd_endio_sec(struct bio *bio, int error); | ||||
| extern void drbd_endio_pri(struct bio *bio, int error); | ||||
| extern void drbd_peer_request_endio(struct bio *bio, int error); | ||||
| extern void drbd_request_endio(struct bio *bio, int error); | ||||
| 
 | ||||
| /*
 | ||||
|  * used to submit our private bio | ||||
|  | @ -45,12 +46,6 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev, | |||
| 		generic_make_request(bio); | ||||
| } | ||||
| 
 | ||||
| static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm) | ||||
| { | ||||
|         return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK) | ||||
|                 == CRYPTO_ALG_TYPE_HASH; | ||||
| } | ||||
| 
 | ||||
| #ifndef __CHECKER__ | ||||
| # undef __cond_lock | ||||
| # define __cond_lock(x,c) (c) | ||||
|  |  | |||
|  | @ -51,12 +51,11 @@ | |||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| extern const char *drbd_buildtag(void); | ||||
| #define REL_VERSION "8.3.14" | ||||
| #define API_VERSION 88 | ||||
| #define REL_VERSION "8.4.2" | ||||
| #define API_VERSION 1 | ||||
| #define PRO_VERSION_MIN 86 | ||||
| #define PRO_VERSION_MAX 97 | ||||
| #define PRO_VERSION_MAX 101 | ||||
| 
 | ||||
| 
 | ||||
| enum drbd_io_error_p { | ||||
|  | @ -66,7 +65,8 @@ enum drbd_io_error_p { | |||
| }; | ||||
| 
 | ||||
| enum drbd_fencing_p { | ||||
| 	FP_DONT_CARE, | ||||
| 	FP_NOT_AVAIL = -1, /* Not a policy */ | ||||
| 	FP_DONT_CARE = 0, | ||||
| 	FP_RESOURCE, | ||||
| 	FP_STONITH | ||||
| }; | ||||
|  | @ -102,6 +102,20 @@ enum drbd_on_congestion { | |||
| 	OC_DISCONNECT, | ||||
| }; | ||||
| 
 | ||||
| enum drbd_read_balancing { | ||||
| 	RB_PREFER_LOCAL, | ||||
| 	RB_PREFER_REMOTE, | ||||
| 	RB_ROUND_ROBIN, | ||||
| 	RB_LEAST_PENDING, | ||||
| 	RB_CONGESTED_REMOTE, | ||||
| 	RB_32K_STRIPING, | ||||
| 	RB_64K_STRIPING, | ||||
| 	RB_128K_STRIPING, | ||||
| 	RB_256K_STRIPING, | ||||
| 	RB_512K_STRIPING, | ||||
| 	RB_1M_STRIPING, | ||||
| }; | ||||
| 
 | ||||
| /* KEEP the order, do not delete or insert. Only append. */ | ||||
| enum drbd_ret_code { | ||||
| 	ERR_CODE_BASE		= 100, | ||||
|  | @ -122,7 +136,7 @@ enum drbd_ret_code { | |||
| 	ERR_AUTH_ALG		= 120, | ||||
| 	ERR_AUTH_ALG_ND		= 121, | ||||
| 	ERR_NOMEM		= 122, | ||||
| 	ERR_DISCARD		= 123, | ||||
| 	ERR_DISCARD_IMPOSSIBLE	= 123, | ||||
| 	ERR_DISK_CONFIGURED	= 124, | ||||
| 	ERR_NET_CONFIGURED	= 125, | ||||
| 	ERR_MANDATORY_TAG	= 126, | ||||
|  | @ -130,8 +144,8 @@ enum drbd_ret_code { | |||
| 	ERR_INTR		= 129, /* EINTR */ | ||||
| 	ERR_RESIZE_RESYNC	= 130, | ||||
| 	ERR_NO_PRIMARY		= 131, | ||||
| 	ERR_SYNC_AFTER		= 132, | ||||
| 	ERR_SYNC_AFTER_CYCLE	= 133, | ||||
| 	ERR_RESYNC_AFTER	= 132, | ||||
| 	ERR_RESYNC_AFTER_CYCLE	= 133, | ||||
| 	ERR_PAUSE_IS_SET	= 134, | ||||
| 	ERR_PAUSE_IS_CLEAR	= 135, | ||||
| 	ERR_PACKET_NR		= 137, | ||||
|  | @ -155,6 +169,14 @@ enum drbd_ret_code { | |||
| 	ERR_CONG_NOT_PROTO_A	= 155, | ||||
| 	ERR_PIC_AFTER_DEP	= 156, | ||||
| 	ERR_PIC_PEER_DEP	= 157, | ||||
| 	ERR_RES_NOT_KNOWN	= 158, | ||||
| 	ERR_RES_IN_USE		= 159, | ||||
| 	ERR_MINOR_CONFIGURED    = 160, | ||||
| 	ERR_MINOR_EXISTS	= 161, | ||||
| 	ERR_INVALID_REQUEST	= 162, | ||||
| 	ERR_NEED_APV_100	= 163, | ||||
| 	ERR_NEED_ALLOW_TWO_PRI  = 164, | ||||
| 	ERR_MD_UNCLEAN          = 165, | ||||
| 
 | ||||
| 	/* insert new ones above this line */ | ||||
| 	AFTER_LAST_ERR_CODE | ||||
|  | @ -296,7 +318,8 @@ enum drbd_state_rv { | |||
| 	SS_NOT_SUPPORTED = -17,      /* drbd-8.2 only */ | ||||
| 	SS_IN_TRANSIENT_STATE = -18,  /* Retry after the next state change */ | ||||
| 	SS_CONCURRENT_ST_CHG = -19,   /* Concurrent cluster side state change! */ | ||||
| 	SS_AFTER_LAST_ERROR = -20,    /* Keep this at bottom */ | ||||
| 	SS_O_VOL_PEER_PRI = -20, | ||||
| 	SS_AFTER_LAST_ERROR = -21,    /* Keep this at bottom */ | ||||
| }; | ||||
| 
 | ||||
| /* from drbd_strings.c */ | ||||
|  | @ -314,6 +337,8 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv); | |||
| #define MDF_WAS_UP_TO_DATE	(1 << 4) | ||||
| #define MDF_PEER_OUT_DATED	(1 << 5) | ||||
| #define MDF_CRASHED_PRIMARY	(1 << 6) | ||||
| #define MDF_AL_CLEAN		(1 << 7) | ||||
| #define MDF_AL_DISABLED		(1 << 8) | ||||
| 
 | ||||
| enum drbd_uuid_index { | ||||
| 	UI_CURRENT, | ||||
|  | @ -333,37 +358,23 @@ enum drbd_timeout_flag { | |||
| 
 | ||||
| #define UUID_JUST_CREATED ((__u64)4) | ||||
| 
 | ||||
| /* magic numbers used in meta data and network packets */ | ||||
| #define DRBD_MAGIC 0x83740267 | ||||
| #define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC) | ||||
| #define DRBD_MAGIC_BIG 0x835a | ||||
| #define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG) | ||||
| #define DRBD_MAGIC_100 0x8620ec20 | ||||
| 
 | ||||
| #define DRBD_MD_MAGIC_07   (DRBD_MAGIC+3) | ||||
| #define DRBD_MD_MAGIC_08   (DRBD_MAGIC+4) | ||||
| #define DRBD_MD_MAGIC_84_UNCLEAN	(DRBD_MAGIC+5) | ||||
| 
 | ||||
| 
 | ||||
| /* how I came up with this magic?
 | ||||
|  * base64 decode "actlog==" ;) */ | ||||
| #define DRBD_AL_MAGIC 0x69cb65a2 | ||||
| 
 | ||||
| /* these are of type "int" */ | ||||
| #define DRBD_MD_INDEX_INTERNAL -1 | ||||
| #define DRBD_MD_INDEX_FLEX_EXT -2 | ||||
| #define DRBD_MD_INDEX_FLEX_INT -3 | ||||
| 
 | ||||
| /* Start of the new netlink/connector stuff */ | ||||
| 
 | ||||
| #define DRBD_NL_CREATE_DEVICE 0x01 | ||||
| #define DRBD_NL_SET_DEFAULTS  0x02 | ||||
| 
 | ||||
| 
 | ||||
| /* For searching a vacant cn_idx value */ | ||||
| #define CN_IDX_STEP			6977 | ||||
| 
 | ||||
| struct drbd_nl_cfg_req { | ||||
| 	int packet_type; | ||||
| 	unsigned int drbd_minor; | ||||
| 	int flags; | ||||
| 	unsigned short tag_list[]; | ||||
| }; | ||||
| 
 | ||||
| struct drbd_nl_cfg_reply { | ||||
| 	int packet_type; | ||||
| 	unsigned int minor; | ||||
| 	int ret_code; /* enum ret_code or set_st_err_t */ | ||||
| 	unsigned short tag_list[]; /* only used with get_* calls */ | ||||
| }; | ||||
| 
 | ||||
| #endif | ||||
|  |  | |||
							
								
								
									
										378
									
								
								include/linux/drbd_genl.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										378
									
								
								include/linux/drbd_genl.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,378 @@ | |||
| /*
 | ||||
|  * General overview: | ||||
|  * full generic netlink message: | ||||
|  * |nlmsghdr|genlmsghdr|<payload> | ||||
|  * | ||||
|  * payload: | ||||
|  * |optional fixed size family header|<sequence of netlink attributes> | ||||
|  * | ||||
|  * sequence of netlink attributes: | ||||
|  * I chose to have all "top level" attributes NLA_NESTED, | ||||
|  * corresponding to some real struct. | ||||
|  * So we have a sequence of |tla, len|<nested nla sequence> | ||||
|  * | ||||
|  * nested nla sequence: | ||||
|  * may be empty, or contain a sequence of netlink attributes | ||||
|  * representing the struct fields. | ||||
|  * | ||||
|  * The tag number of any field (regardless of containing struct) | ||||
|  * will be available as T_ ## field_name, | ||||
|  * so you cannot have the same field name in two differnt structs. | ||||
|  * | ||||
|  * The tag numbers themselves are per struct, though, | ||||
|  * so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type, | ||||
|  * which we won't use here). | ||||
|  * The tag numbers are used as index in the respective nla_policy array. | ||||
|  * | ||||
|  * GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy | ||||
|  *	genl_magic_struct.h | ||||
|  *		generates the struct declaration, | ||||
|  *		generates an entry in the tla enum, | ||||
|  *	genl_magic_func.h | ||||
|  *		generates an entry in the static tla policy | ||||
|  *		with .type = NLA_NESTED | ||||
|  *		generates the static <struct_name>_nl_policy definition, | ||||
|  *		and static conversion functions | ||||
|  * | ||||
|  *	genl_magic_func.h | ||||
|  * | ||||
|  * GENL_mc_group(group) | ||||
|  *	genl_magic_struct.h | ||||
|  *		does nothing | ||||
|  *	genl_magic_func.h | ||||
|  *		defines and registers the mcast group, | ||||
|  *		and provides a send helper | ||||
|  * | ||||
|  * GENL_notification(op_name, op_num, mcast_group, tla list) | ||||
|  *	These are notifications to userspace. | ||||
|  * | ||||
|  *	genl_magic_struct.h | ||||
|  *		generates an entry in the genl_ops enum, | ||||
|  *	genl_magic_func.h | ||||
|  *		does nothing | ||||
|  * | ||||
|  *	mcast group: the name of the mcast group this notification should be | ||||
|  *	expected on | ||||
|  *	tla list: the list of expected top level attributes, | ||||
|  *	for documentation and sanity checking. | ||||
|  * | ||||
|  * GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations" | ||||
|  *	These are requests from userspace. | ||||
|  * | ||||
|  *	_op and _notification share the same "number space", | ||||
|  *	op_nr will be assigned to "genlmsghdr->cmd" | ||||
|  * | ||||
|  *	genl_magic_struct.h | ||||
|  *		generates an entry in the genl_ops enum, | ||||
|  *	genl_magic_func.h | ||||
|  *		generates an entry in the static genl_ops array, | ||||
|  *		and static register/unregister functions to | ||||
|  *		genl_register_family_with_ops(). | ||||
|  * | ||||
|  *	flags and handler: | ||||
|  *		GENL_op_init( .doit = x, .dumpit = y, .flags = something) | ||||
|  *		GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM | ||||
|  *	tla list: the list of expected top level attributes, | ||||
|  *	for documentation and sanity checking. | ||||
|  */ | ||||
| 
 | ||||
| /*
 | ||||
|  * STRUCTS | ||||
|  */ | ||||
| 
 | ||||
| /* this is sent kernel -> userland on various error conditions, and contains
 | ||||
|  * informational textual info, which is supposedly human readable. | ||||
|  * The computer relevant return code is in the drbd_genlmsghdr. | ||||
|  */ | ||||
| GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, | ||||
| 		/* "arbitrary" size strings, nla_policy.len = 0 */ | ||||
| 	__str_field(1, DRBD_GENLA_F_MANDATORY,	info_text, 0) | ||||
| ) | ||||
| 
 | ||||
| /* Configuration requests typically need a context to operate on.
 | ||||
|  * Possible keys are device minor (fits in the drbd_genlmsghdr), | ||||
|  * the replication link (aka connection) name, | ||||
|  * and/or the replication group (aka resource) name, | ||||
|  * and the volume id within the resource. */ | ||||
| GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, | ||||
| 	__u32_field(1, DRBD_GENLA_F_MANDATORY,	ctx_volume) | ||||
| 	__str_field(2, DRBD_GENLA_F_MANDATORY,	ctx_resource_name, 128) | ||||
| 	__bin_field(3, DRBD_GENLA_F_MANDATORY,	ctx_my_addr, 128) | ||||
| 	__bin_field(4, DRBD_GENLA_F_MANDATORY,	ctx_peer_addr, 128) | ||||
| ) | ||||
| 
 | ||||
| GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, | ||||
| 	__str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	backing_dev,	128) | ||||
| 	__str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	meta_dev,	128) | ||||
| 	__s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	meta_dev_idx) | ||||
| 
 | ||||
| 	/* use the resize command to try and change the disk_size */ | ||||
| 	__u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	disk_size) | ||||
| 	/* we could change the max_bio_bvecs,
 | ||||
| 	 * but it won't propagate through the stack */ | ||||
| 	__u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	max_bio_bvecs) | ||||
| 
 | ||||
| 	__u32_field_def(6, DRBD_GENLA_F_MANDATORY,	on_io_error, DRBD_ON_IO_ERROR_DEF) | ||||
| 	__u32_field_def(7, DRBD_GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF) | ||||
| 
 | ||||
| 	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF) | ||||
| 	__s32_field_def(9,	DRBD_GENLA_F_MANDATORY,	resync_after, DRBD_MINOR_NUMBER_DEF) | ||||
| 	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF) | ||||
| 	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF) | ||||
| 	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF) | ||||
| 	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	c_fill_target, DRBD_C_FILL_TARGET_DEF) | ||||
| 	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF) | ||||
| 	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF) | ||||
| 
 | ||||
| 	__flg_field_def(16, DRBD_GENLA_F_MANDATORY,	disk_barrier, DRBD_DISK_BARRIER_DEF) | ||||
| 	__flg_field_def(17, DRBD_GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF) | ||||
| 	__flg_field_def(18, DRBD_GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF) | ||||
| 	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF) | ||||
| 	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	disk_timeout, DRBD_DISK_TIMEOUT_DEF) | ||||
| 	__u32_field_def(21,	0 /* OPTIONAL */,       read_balancing, DRBD_READ_BALANCING_DEF) | ||||
| 	/* 9: __u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */ | ||||
| 	__flg_field_def(23,     0 /* OPTIONAL */,	al_updates, DRBD_AL_UPDATES_DEF) | ||||
| ) | ||||
| 
 | ||||
| GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, | ||||
| 	__str_field_def(1,	DRBD_GENLA_F_MANDATORY,	cpu_mask,       32) | ||||
| 	__u32_field_def(2,	DRBD_GENLA_F_MANDATORY,	on_no_data, DRBD_ON_NO_DATA_DEF) | ||||
| ) | ||||
| 
 | ||||
| GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, | ||||
| 	__str_field_def(1,	DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE, | ||||
| 						shared_secret,	SHARED_SECRET_MAX) | ||||
| 	__str_field_def(2,	DRBD_GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX) | ||||
| 	__str_field_def(3,	DRBD_GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX) | ||||
| 	__str_field_def(4,	DRBD_GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX) | ||||
| 	__str_field_def(5,	DRBD_GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX) | ||||
| 	__u32_field_def(6,	DRBD_GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF) | ||||
| 	__u32_field_def(7,	DRBD_GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF) | ||||
| 	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF) | ||||
| 	__u32_field_def(9,	DRBD_GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF) | ||||
| 	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF) | ||||
| 	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF) | ||||
| 	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF) | ||||
| 	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF) | ||||
| 	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF) | ||||
| 	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF) | ||||
| 	__u32_field_def(16,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) | ||||
| 	__u32_field_def(17,	DRBD_GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF) | ||||
| 	__u32_field_def(18,	DRBD_GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF) | ||||
| 	__u32_field_def(19,	DRBD_GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF) | ||||
| 	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF) | ||||
| 	__u32_field_def(21,	DRBD_GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF) | ||||
| 	__u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF) | ||||
| 	__u32_field_def(23,	DRBD_GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF) | ||||
| 	__flg_field_def(24, DRBD_GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF) | ||||
| 	__flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	discard_my_data) | ||||
| 	__flg_field_def(26, DRBD_GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF) | ||||
| 	__flg_field_def(27, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF) | ||||
| 	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	tentative) | ||||
| 	__flg_field_def(29,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF) | ||||
| 	/* 9: __u32_field_def(30,	DRBD_GENLA_F_MANDATORY,	fencing_policy, DRBD_FENCING_DEF) */ | ||||
| ) | ||||
| 
 | ||||
| GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, | ||||
| 	__flg_field(1, DRBD_GENLA_F_MANDATORY,	assume_uptodate) | ||||
| ) | ||||
| 
 | ||||
| GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, | ||||
| 	__u64_field(1, DRBD_GENLA_F_MANDATORY,	resize_size) | ||||
| 	__flg_field(2, DRBD_GENLA_F_MANDATORY,	resize_force) | ||||
| 	__flg_field(3, DRBD_GENLA_F_MANDATORY,	no_resync) | ||||
| ) | ||||
| 
 | ||||
| GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, | ||||
| 	/* the reason of the broadcast,
 | ||||
| 	 * if this is an event triggered broadcast. */ | ||||
| 	__u32_field(1, DRBD_GENLA_F_MANDATORY,	sib_reason) | ||||
| 	__u32_field(2, DRBD_F_REQUIRED,	current_state) | ||||
| 	__u64_field(3, DRBD_GENLA_F_MANDATORY,	capacity) | ||||
| 	__u64_field(4, DRBD_GENLA_F_MANDATORY,	ed_uuid) | ||||
| 
 | ||||
| 	/* These are for broadcast from after state change work.
 | ||||
| 	 * prev_state and new_state are from the moment the state change took | ||||
| 	 * place, new_state is not neccessarily the same as current_state, | ||||
| 	 * there may have been more state changes since.  Which will be | ||||
| 	 * broadcasted soon, in their respective after state change work.  */ | ||||
| 	__u32_field(5, DRBD_GENLA_F_MANDATORY,	prev_state) | ||||
| 	__u32_field(6, DRBD_GENLA_F_MANDATORY,	new_state) | ||||
| 
 | ||||
| 	/* if we have a local disk: */ | ||||
| 	__bin_field(7, DRBD_GENLA_F_MANDATORY,	uuids, (UI_SIZE*sizeof(__u64))) | ||||
| 	__u32_field(8, DRBD_GENLA_F_MANDATORY,	disk_flags) | ||||
| 	__u64_field(9, DRBD_GENLA_F_MANDATORY,	bits_total) | ||||
| 	__u64_field(10, DRBD_GENLA_F_MANDATORY,	bits_oos) | ||||
| 	/* and in case resync or online verify is active */ | ||||
| 	__u64_field(11, DRBD_GENLA_F_MANDATORY,	bits_rs_total) | ||||
| 	__u64_field(12, DRBD_GENLA_F_MANDATORY,	bits_rs_failed) | ||||
| 
 | ||||
| 	/* for pre and post notifications of helper execution */ | ||||
| 	__str_field(13, DRBD_GENLA_F_MANDATORY,	helper, 32) | ||||
| 	__u32_field(14, DRBD_GENLA_F_MANDATORY,	helper_exit_code) | ||||
| 
 | ||||
| 	__u64_field(15,                      0, send_cnt) | ||||
| 	__u64_field(16,                      0, recv_cnt) | ||||
| 	__u64_field(17,                      0, read_cnt) | ||||
| 	__u64_field(18,                      0, writ_cnt) | ||||
| 	__u64_field(19,                      0, al_writ_cnt) | ||||
| 	__u64_field(20,                      0, bm_writ_cnt) | ||||
| 	__u32_field(21,                      0, ap_bio_cnt) | ||||
| 	__u32_field(22,                      0, ap_pending_cnt) | ||||
| 	__u32_field(23,                      0, rs_pending_cnt) | ||||
| ) | ||||
| 
 | ||||
| GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, | ||||
| 	__u64_field(1, DRBD_GENLA_F_MANDATORY,	ov_start_sector) | ||||
| 	__u64_field(2, DRBD_GENLA_F_MANDATORY,	ov_stop_sector) | ||||
| ) | ||||
| 
 | ||||
| GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, | ||||
| 	__flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm) | ||||
| ) | ||||
| 
 | ||||
| GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms, | ||||
| 	__u32_field(1,	DRBD_F_REQUIRED,	timeout_type) | ||||
| ) | ||||
| 
 | ||||
| GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, | ||||
| 	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_disconnect) | ||||
| ) | ||||
| 
 | ||||
| GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms, | ||||
| 	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_detach) | ||||
| ) | ||||
| 
 | ||||
| /*
 | ||||
|  * Notifications and commands (genlmsghdr->cmd) | ||||
|  */ | ||||
| GENL_mc_group(events) | ||||
| 
 | ||||
| 	/* kernel -> userspace announcement of changes */ | ||||
| GENL_notification( | ||||
| 	DRBD_EVENT, 1, events, | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||||
| 	GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED) | ||||
| 	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY) | ||||
| 	GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY) | ||||
| 	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY) | ||||
| ) | ||||
| 
 | ||||
| 	/* query kernel for specific or all info */ | ||||
| GENL_op( | ||||
| 	DRBD_ADM_GET_STATUS, 2, | ||||
| 	GENL_op_init( | ||||
| 		.doit = drbd_adm_get_status, | ||||
| 		.dumpit = drbd_adm_get_status_all, | ||||
| 		/* anyone may ask for the status,
 | ||||
| 		 * it is broadcasted anyways */ | ||||
| 	), | ||||
| 	/* To select the object .doit.
 | ||||
| 	 * Or a subset of objects in .dumpit. */ | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) | ||||
| ) | ||||
| 
 | ||||
| 	/* add DRBD minor devices as volumes to resources */ | ||||
| GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
| GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
| 
 | ||||
| 	/* add or delete resources */ | ||||
| GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
| GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
| 
 | ||||
| GENL_op(DRBD_ADM_RESOURCE_OPTS, 9, | ||||
| 	GENL_doit(drbd_adm_resource_opts), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||||
| 	GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY) | ||||
| ) | ||||
| 
 | ||||
| GENL_op( | ||||
| 	DRBD_ADM_CONNECT, 10, | ||||
| 	GENL_doit(drbd_adm_connect), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||||
| 	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED) | ||||
| ) | ||||
| 
 | ||||
| GENL_op( | ||||
| 	DRBD_ADM_CHG_NET_OPTS, 29, | ||||
| 	GENL_doit(drbd_adm_net_opts), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||||
| 	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED) | ||||
| ) | ||||
| 
 | ||||
| GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
| 
 | ||||
| GENL_op(DRBD_ADM_ATTACH, 12, | ||||
| 	GENL_doit(drbd_adm_attach), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||||
| 	GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED) | ||||
| ) | ||||
| 
 | ||||
| GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28, | ||||
| 	GENL_doit(drbd_adm_disk_opts), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||||
| 	GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED) | ||||
| ) | ||||
| 
 | ||||
| GENL_op( | ||||
| 	DRBD_ADM_RESIZE, 13, | ||||
| 	GENL_doit(drbd_adm_resize), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||||
| 	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY) | ||||
| ) | ||||
| 
 | ||||
| GENL_op( | ||||
| 	DRBD_ADM_PRIMARY, 14, | ||||
| 	GENL_doit(drbd_adm_set_role), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||||
| 	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED) | ||||
| ) | ||||
| 
 | ||||
| GENL_op( | ||||
| 	DRBD_ADM_SECONDARY, 15, | ||||
| 	GENL_doit(drbd_adm_set_role), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||||
| 	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED) | ||||
| ) | ||||
| 
 | ||||
| GENL_op( | ||||
| 	DRBD_ADM_NEW_C_UUID, 16, | ||||
| 	GENL_doit(drbd_adm_new_c_uuid), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||||
| 	GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY) | ||||
| ) | ||||
| 
 | ||||
| GENL_op( | ||||
| 	DRBD_ADM_START_OV, 17, | ||||
| 	GENL_doit(drbd_adm_start_ov), | ||||
| 	GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY) | ||||
| ) | ||||
| 
 | ||||
| GENL_op(DRBD_ADM_DETACH,	18, GENL_doit(drbd_adm_detach), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||||
| 	GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY)) | ||||
| 
 | ||||
| GENL_op(DRBD_ADM_INVALIDATE,	19, GENL_doit(drbd_adm_invalidate), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
| GENL_op(DRBD_ADM_INVAL_PEER,	20, GENL_doit(drbd_adm_invalidate_peer), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
| GENL_op(DRBD_ADM_PAUSE_SYNC,	21, GENL_doit(drbd_adm_pause_sync), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
| GENL_op(DRBD_ADM_RESUME_SYNC,	22, GENL_doit(drbd_adm_resume_sync), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
| GENL_op(DRBD_ADM_SUSPEND_IO,	23, GENL_doit(drbd_adm_suspend_io), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
| GENL_op(DRBD_ADM_RESUME_IO,	24, GENL_doit(drbd_adm_resume_io), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
| GENL_op(DRBD_ADM_OUTDATE,	25, GENL_doit(drbd_adm_outdate), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
| GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
| GENL_op(DRBD_ADM_DOWN,		27, GENL_doit(drbd_adm_down), | ||||
| 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | ||||
							
								
								
									
										55
									
								
								include/linux/drbd_genl_api.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								include/linux/drbd_genl_api.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,55 @@ | |||
| #ifndef DRBD_GENL_STRUCT_H | ||||
| #define DRBD_GENL_STRUCT_H | ||||
| 
 | ||||
| /**
 | ||||
|  * struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests | ||||
|  * @minor: | ||||
|  *     For admin requests (user -> kernel): which minor device to operate on. | ||||
|  *     For (unicast) replies or informational (broadcast) messages | ||||
|  *     (kernel -> user): which minor device the information is about. | ||||
|  *     If we do not operate on minors, but on connections or resources, | ||||
|  *     the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT | ||||
|  *     is used instead. | ||||
|  * @flags: possible operation modifiers (relevant only for user->kernel): | ||||
|  *     DRBD_GENL_F_SET_DEFAULTS | ||||
|  * @volume: | ||||
|  *     When creating a new minor (adding it to a resource), the resource needs | ||||
|  *     to know which volume number within the resource this is supposed to be. | ||||
|  *     The volume number corresponds to the same volume number on the remote side, | ||||
|  *     whereas the minor number on the remote side may be different | ||||
|  *     (union with flags). | ||||
|  * @ret_code: kernel->userland unicast cfg reply return code (union with flags); | ||||
|  */ | ||||
| struct drbd_genlmsghdr { | ||||
| 	__u32 minor; | ||||
| 	union { | ||||
| 	__u32 flags; | ||||
| 	__s32 ret_code; | ||||
| 	}; | ||||
| }; | ||||
| 
 | ||||
| /* To be used in drbd_genlmsghdr.flags */ | ||||
| enum { | ||||
| 	DRBD_GENL_F_SET_DEFAULTS = 1, | ||||
| }; | ||||
| 
 | ||||
| enum drbd_state_info_bcast_reason { | ||||
| 	SIB_GET_STATUS_REPLY = 1, | ||||
| 	SIB_STATE_CHANGE = 2, | ||||
| 	SIB_HELPER_PRE = 3, | ||||
| 	SIB_HELPER_POST = 4, | ||||
| 	SIB_SYNC_PROGRESS = 5, | ||||
| }; | ||||
| 
 | ||||
| /* hack around predefined gcc/cpp "linux=1",
 | ||||
|  * we cannot possibly include <1/drbd_genl.h> */ | ||||
| #undef linux | ||||
| 
 | ||||
| #include <linux/drbd.h> | ||||
| #define GENL_MAGIC_VERSION	API_VERSION | ||||
| #define GENL_MAGIC_FAMILY	drbd | ||||
| #define GENL_MAGIC_FAMILY_HDRSZ	sizeof(struct drbd_genlmsghdr) | ||||
| #define GENL_MAGIC_INCLUDE_FILE <linux/drbd_genl.h> | ||||
| #include <linux/genl_magic_struct.h> | ||||
| 
 | ||||
| #endif | ||||
|  | @ -16,29 +16,37 @@ | |||
| #define DEBUG_RANGE_CHECK 0 | ||||
| 
 | ||||
| #define DRBD_MINOR_COUNT_MIN 1 | ||||
| #define DRBD_MINOR_COUNT_MAX 256 | ||||
| #define DRBD_MINOR_COUNT_MAX 255 | ||||
| #define DRBD_MINOR_COUNT_DEF 32 | ||||
| #define DRBD_MINOR_COUNT_SCALE '1' | ||||
| 
 | ||||
| #define DRBD_VOLUME_MAX 65535 | ||||
| 
 | ||||
| #define DRBD_DIALOG_REFRESH_MIN 0 | ||||
| #define DRBD_DIALOG_REFRESH_MAX 600 | ||||
| #define DRBD_DIALOG_REFRESH_SCALE '1' | ||||
| 
 | ||||
| /* valid port number */ | ||||
| #define DRBD_PORT_MIN 1 | ||||
| #define DRBD_PORT_MAX 0xffff | ||||
| #define DRBD_PORT_SCALE '1' | ||||
| 
 | ||||
| /* startup { */ | ||||
|   /* if you want more than 3.4 days, disable */ | ||||
| #define DRBD_WFC_TIMEOUT_MIN 0 | ||||
| #define DRBD_WFC_TIMEOUT_MAX 300000 | ||||
| #define DRBD_WFC_TIMEOUT_DEF 0 | ||||
| #define DRBD_WFC_TIMEOUT_SCALE '1' | ||||
| 
 | ||||
| #define DRBD_DEGR_WFC_TIMEOUT_MIN 0 | ||||
| #define DRBD_DEGR_WFC_TIMEOUT_MAX 300000 | ||||
| #define DRBD_DEGR_WFC_TIMEOUT_DEF 0 | ||||
| #define DRBD_DEGR_WFC_TIMEOUT_SCALE '1' | ||||
| 
 | ||||
| #define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0 | ||||
| #define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000 | ||||
| #define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0 | ||||
| #define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1' | ||||
| /* }*/ | ||||
| 
 | ||||
| /* net { */ | ||||
|  | @ -47,75 +55,91 @@ | |||
| #define DRBD_TIMEOUT_MIN 1 | ||||
| #define DRBD_TIMEOUT_MAX 600 | ||||
| #define DRBD_TIMEOUT_DEF 60       /* 6 seconds */ | ||||
| #define DRBD_TIMEOUT_SCALE '1' | ||||
| 
 | ||||
|  /* If backing disk takes longer than disk_timeout, mark the disk as failed */ | ||||
| #define DRBD_DISK_TIMEOUT_MIN 0    /* 0 = disabled */ | ||||
| #define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */ | ||||
| #define DRBD_DISK_TIMEOUT_DEF 0    /* disabled */ | ||||
| #define DRBD_DISK_TIMEOUT_SCALE '1' | ||||
| 
 | ||||
|   /* active connection retries when C_WF_CONNECTION */ | ||||
| #define DRBD_CONNECT_INT_MIN 1 | ||||
| #define DRBD_CONNECT_INT_MAX 120 | ||||
| #define DRBD_CONNECT_INT_DEF 10   /* seconds */ | ||||
| #define DRBD_CONNECT_INT_SCALE '1' | ||||
| 
 | ||||
|   /* keep-alive probes when idle */ | ||||
| #define DRBD_PING_INT_MIN 1 | ||||
| #define DRBD_PING_INT_MAX 120 | ||||
| #define DRBD_PING_INT_DEF 10 | ||||
| #define DRBD_PING_INT_SCALE '1' | ||||
| 
 | ||||
|  /* timeout for the ping packets.*/ | ||||
| #define DRBD_PING_TIMEO_MIN  1 | ||||
| #define DRBD_PING_TIMEO_MAX  300 | ||||
| #define DRBD_PING_TIMEO_DEF  5 | ||||
| #define DRBD_PING_TIMEO_SCALE '1' | ||||
| 
 | ||||
|   /* max number of write requests between write barriers */ | ||||
| #define DRBD_MAX_EPOCH_SIZE_MIN 1 | ||||
| #define DRBD_MAX_EPOCH_SIZE_MAX 20000 | ||||
| #define DRBD_MAX_EPOCH_SIZE_DEF 2048 | ||||
| #define DRBD_MAX_EPOCH_SIZE_SCALE '1' | ||||
| 
 | ||||
|   /* I don't think that a tcp send buffer of more than 10M is useful */ | ||||
| #define DRBD_SNDBUF_SIZE_MIN  0 | ||||
| #define DRBD_SNDBUF_SIZE_MAX  (10<<20) | ||||
| #define DRBD_SNDBUF_SIZE_DEF  0 | ||||
| #define DRBD_SNDBUF_SIZE_SCALE '1' | ||||
| 
 | ||||
| #define DRBD_RCVBUF_SIZE_MIN  0 | ||||
| #define DRBD_RCVBUF_SIZE_MAX  (10<<20) | ||||
| #define DRBD_RCVBUF_SIZE_DEF  0 | ||||
| #define DRBD_RCVBUF_SIZE_SCALE '1' | ||||
| 
 | ||||
|   /* @4k PageSize -> 128kB - 512MB */ | ||||
| #define DRBD_MAX_BUFFERS_MIN  32 | ||||
| #define DRBD_MAX_BUFFERS_MAX  131072 | ||||
| #define DRBD_MAX_BUFFERS_DEF  2048 | ||||
| #define DRBD_MAX_BUFFERS_SCALE '1' | ||||
| 
 | ||||
|   /* @4k PageSize -> 4kB - 512MB */ | ||||
| #define DRBD_UNPLUG_WATERMARK_MIN  1 | ||||
| #define DRBD_UNPLUG_WATERMARK_MAX  131072 | ||||
| #define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16) | ||||
| #define DRBD_UNPLUG_WATERMARK_SCALE '1' | ||||
| 
 | ||||
|   /* 0 is disabled.
 | ||||
|    * 200 should be more than enough even for very short timeouts */ | ||||
| #define DRBD_KO_COUNT_MIN  0 | ||||
| #define DRBD_KO_COUNT_MAX  200 | ||||
| #define DRBD_KO_COUNT_DEF  0 | ||||
| #define DRBD_KO_COUNT_DEF  7 | ||||
| #define DRBD_KO_COUNT_SCALE '1' | ||||
| /* } */ | ||||
| 
 | ||||
| /* syncer { */ | ||||
|   /* FIXME allow rate to be zero? */ | ||||
| #define DRBD_RATE_MIN 1 | ||||
| #define DRBD_RESYNC_RATE_MIN 1 | ||||
| /* channel bonding 10 GbE, or other hardware */ | ||||
| #define DRBD_RATE_MAX (4 << 20) | ||||
| #define DRBD_RATE_DEF 250  /* kb/second */ | ||||
| #define DRBD_RESYNC_RATE_MAX (4 << 20) | ||||
| #define DRBD_RESYNC_RATE_DEF 250 | ||||
| #define DRBD_RESYNC_RATE_SCALE 'k'  /* kilobytes */ | ||||
| 
 | ||||
|   /* less than 7 would hit performance unnecessarily.
 | ||||
|    * 3833 is the largest prime that still does fit | ||||
|    * into 64 sectors of activity log */ | ||||
|    * 919 slots context information per transaction, | ||||
|    * 32k activity log, 4k transaction size, | ||||
|    * one transaction in flight: | ||||
|    * 919 * 7 = 6433 */ | ||||
| #define DRBD_AL_EXTENTS_MIN  7 | ||||
| #define DRBD_AL_EXTENTS_MAX  3833 | ||||
| #define DRBD_AL_EXTENTS_DEF  127 | ||||
| #define DRBD_AL_EXTENTS_MAX  6433 | ||||
| #define DRBD_AL_EXTENTS_DEF  1237 | ||||
| #define DRBD_AL_EXTENTS_SCALE '1' | ||||
| 
 | ||||
| #define DRBD_AFTER_MIN  -1 | ||||
| #define DRBD_AFTER_MAX  255 | ||||
| #define DRBD_AFTER_DEF  -1 | ||||
| #define DRBD_MINOR_NUMBER_MIN  -1 | ||||
| #define DRBD_MINOR_NUMBER_MAX  ((1 << 20) - 1) | ||||
| #define DRBD_MINOR_NUMBER_DEF  -1 | ||||
| #define DRBD_MINOR_NUMBER_SCALE '1' | ||||
| 
 | ||||
| /* } */ | ||||
| 
 | ||||
|  | @ -124,11 +148,12 @@ | |||
|  * the upper limit with 64bit kernel, enough ram and flexible meta data | ||||
|  * is 1 PiB, currently. */ | ||||
| /* DRBD_MAX_SECTORS */ | ||||
| #define DRBD_DISK_SIZE_SECT_MIN  0 | ||||
| #define DRBD_DISK_SIZE_SECT_MAX  (1 * (2LLU << 40)) | ||||
| #define DRBD_DISK_SIZE_SECT_DEF  0 /* = disabled = no user size... */ | ||||
| #define DRBD_DISK_SIZE_MIN  0 | ||||
| #define DRBD_DISK_SIZE_MAX  (1 * (2LLU << 40)) | ||||
| #define DRBD_DISK_SIZE_DEF  0 /* = disabled = no user size... */ | ||||
| #define DRBD_DISK_SIZE_SCALE 's'  /* sectors */ | ||||
| 
 | ||||
| #define DRBD_ON_IO_ERROR_DEF EP_PASS_ON | ||||
| #define DRBD_ON_IO_ERROR_DEF EP_DETACH | ||||
| #define DRBD_FENCING_DEF FP_DONT_CARE | ||||
| #define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT | ||||
| #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT | ||||
|  | @ -136,38 +161,59 @@ | |||
| #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT | ||||
| #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR | ||||
| #define DRBD_ON_CONGESTION_DEF OC_BLOCK | ||||
| #define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL | ||||
| 
 | ||||
| #define DRBD_MAX_BIO_BVECS_MIN 0 | ||||
| #define DRBD_MAX_BIO_BVECS_MAX 128 | ||||
| #define DRBD_MAX_BIO_BVECS_DEF 0 | ||||
| #define DRBD_MAX_BIO_BVECS_SCALE '1' | ||||
| 
 | ||||
| #define DRBD_C_PLAN_AHEAD_MIN  0 | ||||
| #define DRBD_C_PLAN_AHEAD_MAX  300 | ||||
| #define DRBD_C_PLAN_AHEAD_DEF  0 /* RS rate controller disabled by default */ | ||||
| #define DRBD_C_PLAN_AHEAD_DEF  20 | ||||
| #define DRBD_C_PLAN_AHEAD_SCALE '1' | ||||
| 
 | ||||
| #define DRBD_C_DELAY_TARGET_MIN 1 | ||||
| #define DRBD_C_DELAY_TARGET_MAX 100 | ||||
| #define DRBD_C_DELAY_TARGET_DEF 10 | ||||
| #define DRBD_C_DELAY_TARGET_SCALE '1' | ||||
| 
 | ||||
| #define DRBD_C_FILL_TARGET_MIN 0 | ||||
| #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */ | ||||
| #define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */ | ||||
| #define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */ | ||||
| #define DRBD_C_FILL_TARGET_SCALE 's'  /* sectors */ | ||||
| 
 | ||||
| #define DRBD_C_MAX_RATE_MIN     250 /* kByte/sec */ | ||||
| #define DRBD_C_MAX_RATE_MIN     250 | ||||
| #define DRBD_C_MAX_RATE_MAX     (4 << 20) | ||||
| #define DRBD_C_MAX_RATE_DEF     102400 | ||||
| #define DRBD_C_MAX_RATE_SCALE	'k'  /* kilobytes */ | ||||
| 
 | ||||
| #define DRBD_C_MIN_RATE_MIN     0 /* kByte/sec */ | ||||
| #define DRBD_C_MIN_RATE_MIN     0 | ||||
| #define DRBD_C_MIN_RATE_MAX     (4 << 20) | ||||
| #define DRBD_C_MIN_RATE_DEF     4096 | ||||
| #define DRBD_C_MIN_RATE_DEF     250 | ||||
| #define DRBD_C_MIN_RATE_SCALE	'k'  /* kilobytes */ | ||||
| 
 | ||||
| #define DRBD_CONG_FILL_MIN	0 | ||||
| #define DRBD_CONG_FILL_MAX	(10<<21) /* 10GByte in sectors */ | ||||
| #define DRBD_CONG_FILL_DEF	0 | ||||
| #define DRBD_CONG_FILL_SCALE	's'  /* sectors */ | ||||
| 
 | ||||
| #define DRBD_CONG_EXTENTS_MIN	DRBD_AL_EXTENTS_MIN | ||||
| #define DRBD_CONG_EXTENTS_MAX	DRBD_AL_EXTENTS_MAX | ||||
| #define DRBD_CONG_EXTENTS_DEF	DRBD_AL_EXTENTS_DEF | ||||
| #define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE | ||||
| 
 | ||||
| #define DRBD_PROTOCOL_DEF DRBD_PROT_C | ||||
| 
 | ||||
| #define DRBD_DISK_BARRIER_DEF	0 | ||||
| #define DRBD_DISK_FLUSHES_DEF	1 | ||||
| #define DRBD_DISK_DRAIN_DEF	1 | ||||
| #define DRBD_MD_FLUSHES_DEF	1 | ||||
| #define DRBD_TCP_CORK_DEF	1 | ||||
| #define DRBD_AL_UPDATES_DEF     1 | ||||
| 
 | ||||
| #define DRBD_ALLOW_TWO_PRIMARIES_DEF	0 | ||||
| #define DRBD_ALWAYS_ASBP_DEF	0 | ||||
| #define DRBD_USE_RLE_DEF	1 | ||||
| 
 | ||||
| #undef RANGE | ||||
| #endif | ||||
|  |  | |||
|  | @ -1,164 +0,0 @@ | |||
| /*
 | ||||
|    PAKET( name, | ||||
| 	  TYPE ( pn, pr, member ) | ||||
| 	  ... | ||||
|    ) | ||||
| 
 | ||||
|    You may never reissue one of the pn arguments | ||||
| */ | ||||
| 
 | ||||
| #if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64) | ||||
| #error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined" | ||||
| #endif | ||||
| 
 | ||||
| NL_PACKET(primary, 1, | ||||
|        NL_BIT(		1,	T_MAY_IGNORE,	primary_force) | ||||
| ) | ||||
| 
 | ||||
| NL_PACKET(secondary, 2, ) | ||||
| 
 | ||||
| NL_PACKET(disk_conf, 3, | ||||
| 	NL_INT64(	2,	T_MAY_IGNORE,	disk_size) | ||||
| 	NL_STRING(	3,	T_MANDATORY,	backing_dev,	128) | ||||
| 	NL_STRING(	4,	T_MANDATORY,	meta_dev,	128) | ||||
| 	NL_INTEGER(	5,	T_MANDATORY,	meta_dev_idx) | ||||
| 	NL_INTEGER(	6,	T_MAY_IGNORE,	on_io_error) | ||||
| 	NL_INTEGER(	7,	T_MAY_IGNORE,	fencing) | ||||
| 	NL_BIT(		37,	T_MAY_IGNORE,	use_bmbv) | ||||
| 	NL_BIT(		53,	T_MAY_IGNORE,	no_disk_flush) | ||||
| 	NL_BIT(		54,	T_MAY_IGNORE,	no_md_flush) | ||||
| 	  /*  55 max_bio_size was available in 8.2.6rc2 */ | ||||
| 	NL_INTEGER(	56,	T_MAY_IGNORE,	max_bio_bvecs) | ||||
| 	NL_BIT(		57,	T_MAY_IGNORE,	no_disk_barrier) | ||||
| 	NL_BIT(		58,	T_MAY_IGNORE,	no_disk_drain) | ||||
| 	NL_INTEGER(	89,	T_MAY_IGNORE,	disk_timeout) | ||||
| ) | ||||
| 
 | ||||
| NL_PACKET(detach, 4, | ||||
| 	NL_BIT(		88,	T_MANDATORY,	detach_force) | ||||
| ) | ||||
| 
 | ||||
| NL_PACKET(net_conf, 5, | ||||
| 	NL_STRING(	8,	T_MANDATORY,	my_addr,	128) | ||||
| 	NL_STRING(	9,	T_MANDATORY,	peer_addr,	128) | ||||
| 	NL_STRING(	10,	T_MAY_IGNORE,	shared_secret,	SHARED_SECRET_MAX) | ||||
| 	NL_STRING(	11,	T_MAY_IGNORE,	cram_hmac_alg,	SHARED_SECRET_MAX) | ||||
| 	NL_STRING(	44,	T_MAY_IGNORE,	integrity_alg,	SHARED_SECRET_MAX) | ||||
| 	NL_INTEGER(	14,	T_MAY_IGNORE,	timeout) | ||||
| 	NL_INTEGER(	15,	T_MANDATORY,	wire_protocol) | ||||
| 	NL_INTEGER(	16,	T_MAY_IGNORE,	try_connect_int) | ||||
| 	NL_INTEGER(	17,	T_MAY_IGNORE,	ping_int) | ||||
| 	NL_INTEGER(	18,	T_MAY_IGNORE,	max_epoch_size) | ||||
| 	NL_INTEGER(	19,	T_MAY_IGNORE,	max_buffers) | ||||
| 	NL_INTEGER(	20,	T_MAY_IGNORE,	unplug_watermark) | ||||
| 	NL_INTEGER(	21,	T_MAY_IGNORE,	sndbuf_size) | ||||
| 	NL_INTEGER(	22,	T_MAY_IGNORE,	ko_count) | ||||
| 	NL_INTEGER(	24,	T_MAY_IGNORE,	after_sb_0p) | ||||
| 	NL_INTEGER(	25,	T_MAY_IGNORE,	after_sb_1p) | ||||
| 	NL_INTEGER(	26,	T_MAY_IGNORE,	after_sb_2p) | ||||
| 	NL_INTEGER(	39,	T_MAY_IGNORE,	rr_conflict) | ||||
| 	NL_INTEGER(	40,	T_MAY_IGNORE,	ping_timeo) | ||||
| 	NL_INTEGER(	67,	T_MAY_IGNORE,	rcvbuf_size) | ||||
| 	NL_INTEGER(	81,	T_MAY_IGNORE,	on_congestion) | ||||
| 	NL_INTEGER(	82,	T_MAY_IGNORE,	cong_fill) | ||||
| 	NL_INTEGER(	83,	T_MAY_IGNORE,	cong_extents) | ||||
| 	  /* 59 addr_family was available in GIT, never released */ | ||||
| 	NL_BIT(		60,	T_MANDATORY,	mind_af) | ||||
| 	NL_BIT(		27,	T_MAY_IGNORE,	want_lose) | ||||
| 	NL_BIT(		28,	T_MAY_IGNORE,	two_primaries) | ||||
| 	NL_BIT(		41,	T_MAY_IGNORE,	always_asbp) | ||||
| 	NL_BIT(		61,	T_MAY_IGNORE,	no_cork) | ||||
| 	NL_BIT(		62,	T_MANDATORY,	auto_sndbuf_size) | ||||
| 	NL_BIT(		70,	T_MANDATORY,	dry_run) | ||||
| ) | ||||
| 
 | ||||
| NL_PACKET(disconnect, 6, | ||||
| 	NL_BIT(		84,	T_MAY_IGNORE,	force) | ||||
| ) | ||||
| 
 | ||||
| NL_PACKET(resize, 7, | ||||
| 	NL_INT64(		29,	T_MAY_IGNORE,	resize_size) | ||||
| 	NL_BIT(			68,	T_MAY_IGNORE,	resize_force) | ||||
| 	NL_BIT(			69,	T_MANDATORY,	no_resync) | ||||
| ) | ||||
| 
 | ||||
| NL_PACKET(syncer_conf, 8, | ||||
| 	NL_INTEGER(	30,	T_MAY_IGNORE,	rate) | ||||
| 	NL_INTEGER(	31,	T_MAY_IGNORE,	after) | ||||
| 	NL_INTEGER(	32,	T_MAY_IGNORE,	al_extents) | ||||
| /*	NL_INTEGER(     71,	T_MAY_IGNORE,	dp_volume)
 | ||||
|  *	NL_INTEGER(     72,	T_MAY_IGNORE,	dp_interval) | ||||
|  *	NL_INTEGER(     73,	T_MAY_IGNORE,	throttle_th) | ||||
|  *	NL_INTEGER(     74,	T_MAY_IGNORE,	hold_off_th) | ||||
|  * feature will be reimplemented differently with 8.3.9 */ | ||||
| 	NL_STRING(      52,     T_MAY_IGNORE,   verify_alg,     SHARED_SECRET_MAX) | ||||
| 	NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32) | ||||
| 	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX) | ||||
| 	NL_BIT(         65,     T_MAY_IGNORE,   use_rle) | ||||
| 	NL_INTEGER(	75,	T_MAY_IGNORE,	on_no_data) | ||||
| 	NL_INTEGER(	76,	T_MAY_IGNORE,	c_plan_ahead) | ||||
| 	NL_INTEGER(     77,	T_MAY_IGNORE,	c_delay_target) | ||||
| 	NL_INTEGER(     78,	T_MAY_IGNORE,	c_fill_target) | ||||
| 	NL_INTEGER(     79,	T_MAY_IGNORE,	c_max_rate) | ||||
| 	NL_INTEGER(     80,	T_MAY_IGNORE,	c_min_rate) | ||||
| ) | ||||
| 
 | ||||
| NL_PACKET(invalidate, 9, ) | ||||
| NL_PACKET(invalidate_peer, 10, ) | ||||
| NL_PACKET(pause_sync, 11, ) | ||||
| NL_PACKET(resume_sync, 12, ) | ||||
| NL_PACKET(suspend_io, 13, ) | ||||
| NL_PACKET(resume_io, 14, ) | ||||
| NL_PACKET(outdate, 15, ) | ||||
| NL_PACKET(get_config, 16, ) | ||||
| NL_PACKET(get_state, 17, | ||||
| 	NL_INTEGER(	33,	T_MAY_IGNORE,	state_i) | ||||
| ) | ||||
| 
 | ||||
| NL_PACKET(get_uuids, 18, | ||||
| 	NL_STRING(	34,	T_MAY_IGNORE,	uuids,	(UI_SIZE*sizeof(__u64))) | ||||
| 	NL_INTEGER(	35,	T_MAY_IGNORE,	uuids_flags) | ||||
| ) | ||||
| 
 | ||||
| NL_PACKET(get_timeout_flag, 19, | ||||
| 	NL_BIT(		36,	T_MAY_IGNORE,	use_degraded) | ||||
| ) | ||||
| 
 | ||||
| NL_PACKET(call_helper, 20, | ||||
| 	NL_STRING(	38,	T_MAY_IGNORE,	helper,		32) | ||||
| ) | ||||
| 
 | ||||
| /* Tag nr 42 already allocated in drbd-8.1 development. */ | ||||
| 
 | ||||
| NL_PACKET(sync_progress, 23, | ||||
| 	NL_INTEGER(	43,	T_MAY_IGNORE,	sync_progress) | ||||
| ) | ||||
| 
 | ||||
| NL_PACKET(dump_ee, 24, | ||||
| 	NL_STRING(	45,	T_MAY_IGNORE,	dump_ee_reason, 32) | ||||
| 	NL_STRING(	46,	T_MAY_IGNORE,	seen_digest, SHARED_SECRET_MAX) | ||||
| 	NL_STRING(	47,	T_MAY_IGNORE,	calc_digest, SHARED_SECRET_MAX) | ||||
| 	NL_INT64(	48,	T_MAY_IGNORE,	ee_sector) | ||||
| 	NL_INT64(	49,	T_MAY_IGNORE,	ee_block_id) | ||||
| 	NL_STRING(	50,	T_MAY_IGNORE,	ee_data,	32 << 10) | ||||
| ) | ||||
| 
 | ||||
| NL_PACKET(start_ov, 25, | ||||
| 	NL_INT64(	66,	T_MAY_IGNORE,	start_sector) | ||||
| 	NL_INT64(	90,	T_MANDATORY,	stop_sector) | ||||
| ) | ||||
| 
 | ||||
| NL_PACKET(new_c_uuid, 26, | ||||
|        NL_BIT(		63,	T_MANDATORY,	clear_bm) | ||||
| ) | ||||
| 
 | ||||
| #ifdef NL_RESPONSE | ||||
| NL_RESPONSE(return_code_only, 27) | ||||
| #endif | ||||
| 
 | ||||
| #undef NL_PACKET | ||||
| #undef NL_INTEGER | ||||
| #undef NL_INT64 | ||||
| #undef NL_BIT | ||||
| #undef NL_STRING | ||||
| #undef NL_RESPONSE | ||||
|  | @ -1,84 +0,0 @@ | |||
| #ifndef DRBD_TAG_MAGIC_H | ||||
| #define DRBD_TAG_MAGIC_H | ||||
| 
 | ||||
| #define TT_END     0 | ||||
| #define TT_REMOVED 0xE000 | ||||
| 
 | ||||
| /* declare packet_type enums */ | ||||
| enum packet_types { | ||||
| #define NL_PACKET(name, number, fields) P_ ## name = number, | ||||
| #define NL_RESPONSE(name, number) P_ ## name = number, | ||||
| #define NL_INTEGER(pn, pr, member) | ||||
| #define NL_INT64(pn, pr, member) | ||||
| #define NL_BIT(pn, pr, member) | ||||
| #define NL_STRING(pn, pr, member, len) | ||||
| #include <linux/drbd_nl.h> | ||||
| 	P_nl_after_last_packet, | ||||
| }; | ||||
| 
 | ||||
| /* These struct are used to deduce the size of the tag lists: */ | ||||
| #define NL_PACKET(name, number, fields)	\ | ||||
| 	struct name ## _tag_len_struct { fields }; | ||||
| #define NL_INTEGER(pn, pr, member)		\ | ||||
| 	int member; int tag_and_len ## member; | ||||
| #define NL_INT64(pn, pr, member)		\ | ||||
| 	__u64 member; int tag_and_len ## member; | ||||
| #define NL_BIT(pn, pr, member)		\ | ||||
| 	unsigned char member:1; int tag_and_len ## member; | ||||
| #define NL_STRING(pn, pr, member, len)	\ | ||||
| 	unsigned char member[len]; int member ## _len; \ | ||||
| 	int tag_and_len ## member; | ||||
| #include <linux/drbd_nl.h> | ||||
| 
 | ||||
| /* declare tag-list-sizes */ | ||||
| static const int tag_list_sizes[] = { | ||||
| #define NL_PACKET(name, number, fields) 2 fields , | ||||
| #define NL_INTEGER(pn, pr, member)      + 4 + 4 | ||||
| #define NL_INT64(pn, pr, member)        + 4 + 8 | ||||
| #define NL_BIT(pn, pr, member)          + 4 + 1 | ||||
| #define NL_STRING(pn, pr, member, len)  + 4 + (len) | ||||
| #include <linux/drbd_nl.h> | ||||
| }; | ||||
| 
 | ||||
| /* The two highest bits are used for the tag type */ | ||||
| #define TT_MASK      0xC000 | ||||
| #define TT_INTEGER   0x0000 | ||||
| #define TT_INT64     0x4000 | ||||
| #define TT_BIT       0x8000 | ||||
| #define TT_STRING    0xC000 | ||||
| /* The next bit indicates if processing of the tag is mandatory */ | ||||
| #define T_MANDATORY  0x2000 | ||||
| #define T_MAY_IGNORE 0x0000 | ||||
| #define TN_MASK      0x1fff | ||||
| /* The remaining 13 bits are used to enumerate the tags */ | ||||
| 
 | ||||
| #define tag_type(T)   ((T) & TT_MASK) | ||||
| #define tag_number(T) ((T) & TN_MASK) | ||||
| 
 | ||||
| /* declare tag enums */ | ||||
| #define NL_PACKET(name, number, fields) fields | ||||
| enum drbd_tags { | ||||
| #define NL_INTEGER(pn, pr, member)     T_ ## member = pn | TT_INTEGER | pr , | ||||
| #define NL_INT64(pn, pr, member)       T_ ## member = pn | TT_INT64   | pr , | ||||
| #define NL_BIT(pn, pr, member)         T_ ## member = pn | TT_BIT     | pr , | ||||
| #define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING  | pr , | ||||
| #include <linux/drbd_nl.h> | ||||
| }; | ||||
| 
 | ||||
| struct tag { | ||||
| 	const char *name; | ||||
| 	int type_n_flags; | ||||
| 	int max_len; | ||||
| }; | ||||
| 
 | ||||
| /* declare tag names */ | ||||
| #define NL_PACKET(name, number, fields) fields | ||||
| static const struct tag tag_descriptions[] = { | ||||
| #define NL_INTEGER(pn, pr, member)     [ pn ] = { #member, TT_INTEGER | pr, sizeof(int)   }, | ||||
| #define NL_INT64(pn, pr, member)       [ pn ] = { #member, TT_INT64   | pr, sizeof(__u64) }, | ||||
| #define NL_BIT(pn, pr, member)         [ pn ] = { #member, TT_BIT     | pr, sizeof(int)   }, | ||||
| #define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING  | pr, (len)         }, | ||||
| #include <linux/drbd_nl.h> | ||||
| }; | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										422
									
								
								include/linux/genl_magic_func.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										422
									
								
								include/linux/genl_magic_func.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,422 @@ | |||
| #ifndef GENL_MAGIC_FUNC_H | ||||
| #define GENL_MAGIC_FUNC_H | ||||
| 
 | ||||
| #include <linux/genl_magic_struct.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * Magic: declare tla policy						{{{1 | ||||
|  * Magic: declare nested policies | ||||
|  *									{{{2 | ||||
|  */ | ||||
| #undef GENL_mc_group | ||||
| #define GENL_mc_group(group) | ||||
| 
 | ||||
| #undef GENL_notification | ||||
| #define GENL_notification(op_name, op_num, mcast_group, tla_list) | ||||
| 
 | ||||
| #undef GENL_op | ||||
| #define GENL_op(op_name, op_num, handler, tla_list) | ||||
| 
 | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\ | ||||
| 	[tag_name] = { .type = NLA_NESTED }, | ||||
| 
 | ||||
| static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = { | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| }; | ||||
| 
 | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\ | ||||
| static struct nla_policy s_name ## _nl_policy[] __read_mostly =		\ | ||||
| { s_fields }; | ||||
| 
 | ||||
| #undef __field | ||||
| #define __field(attr_nr, attr_flag, name, nla_type, _type, __get,	\ | ||||
| 		 __put, __is_signed)					\ | ||||
| 	[attr_nr] = { .type = nla_type }, | ||||
| 
 | ||||
| #undef __array | ||||
| #define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen,	\ | ||||
| 		__get, __put, __is_signed)				\ | ||||
| 	[attr_nr] = { .type = nla_type,					\ | ||||
| 		      .len = maxlen - (nla_type == NLA_NUL_STRING) }, | ||||
| 
 | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| 
 | ||||
| #ifndef __KERNEL__ | ||||
| #ifndef pr_info | ||||
| #define pr_info(args...)	fprintf(stderr, args); | ||||
| #endif | ||||
| #endif | ||||
| 
 | ||||
| #ifdef GENL_MAGIC_DEBUG | ||||
| static void dprint_field(const char *dir, int nla_type, | ||||
| 		const char *name, void *valp) | ||||
| { | ||||
| 	__u64 val = valp ? *(__u32 *)valp : 1; | ||||
| 	switch (nla_type) { | ||||
| 	case NLA_U8:  val = (__u8)val; | ||||
| 	case NLA_U16: val = (__u16)val; | ||||
| 	case NLA_U32: val = (__u32)val; | ||||
| 		pr_info("%s attr %s: %d 0x%08x\n", dir, | ||||
| 			name, (int)val, (unsigned)val); | ||||
| 		break; | ||||
| 	case NLA_U64: | ||||
| 		val = *(__u64*)valp; | ||||
| 		pr_info("%s attr %s: %lld 0x%08llx\n", dir, | ||||
| 			name, (long long)val, (unsigned long long)val); | ||||
| 		break; | ||||
| 	case NLA_FLAG: | ||||
| 		if (val) | ||||
| 			pr_info("%s attr %s: set\n", dir, name); | ||||
| 		break; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void dprint_array(const char *dir, int nla_type, | ||||
| 		const char *name, const char *val, unsigned len) | ||||
| { | ||||
| 	switch (nla_type) { | ||||
| 	case NLA_NUL_STRING: | ||||
| 		if (len && val[len-1] == '\0') | ||||
| 			len--; | ||||
| 		pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val); | ||||
| 		break; | ||||
| 	default: | ||||
| 		/* we can always show 4 byte,
 | ||||
| 		 * thats what nlattr are aligned to. */ | ||||
| 		pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n", | ||||
| 			dir, name, len, val[0], val[1], val[2], val[3]); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| #define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b); | ||||
| 
 | ||||
| /* Name is a member field name of the struct s.
 | ||||
|  * If s is NULL (only parsing, no copy requested in *_from_attrs()), | ||||
|  * nla is supposed to point to the attribute containing the information | ||||
|  * corresponding to that struct member. */ | ||||
| #define DPRINT_FIELD(dir, nla_type, name, s, nla)			\ | ||||
| 	do {								\ | ||||
| 		if (s)							\ | ||||
| 			dprint_field(dir, nla_type, #name, &s->name);	\ | ||||
| 		else if (nla)						\ | ||||
| 			dprint_field(dir, nla_type, #name,		\ | ||||
| 				(nla_type == NLA_FLAG) ? NULL		\ | ||||
| 						: nla_data(nla));	\ | ||||
| 	} while (0) | ||||
| 
 | ||||
| #define	DPRINT_ARRAY(dir, nla_type, name, s, nla)			\ | ||||
| 	do {								\ | ||||
| 		if (s)							\ | ||||
| 			dprint_array(dir, nla_type, #name,		\ | ||||
| 					s->name, s->name ## _len);	\ | ||||
| 		else if (nla)						\ | ||||
| 			dprint_array(dir, nla_type, #name,		\ | ||||
| 					nla_data(nla), nla_len(nla));	\ | ||||
| 	} while (0) | ||||
| #else | ||||
| #define DPRINT_TLA(a, op, b) do {} while (0) | ||||
| #define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0) | ||||
| #define	DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0) | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * Magic: provide conversion functions					{{{1 | ||||
|  * populate struct from attribute table: | ||||
|  *									{{{2 | ||||
|  */ | ||||
| 
 | ||||
| /* processing of generic netlink messages is serialized.
 | ||||
|  * use one static buffer for parsing of nested attributes */ | ||||
| static struct nlattr *nested_attr_tb[128]; | ||||
| 
 | ||||
| #ifndef BUILD_BUG_ON | ||||
| /* Force a compilation error if condition is true */ | ||||
| #define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition)) | ||||
| /* Force a compilation error if condition is true, but also produce a
 | ||||
|    result (of value 0 and type size_t), so the expression can be used | ||||
|    e.g. in a structure initializer (or where-ever else comma expressions | ||||
|    aren't permitted). */ | ||||
| #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) | ||||
| #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) | ||||
| #endif | ||||
| 
 | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\ | ||||
| /* *_from_attrs functions are static, but potentially unused */		\ | ||||
| static int __ ## s_name ## _from_attrs(struct s_name *s,		\ | ||||
| 		struct genl_info *info, bool exclude_invariants)	\ | ||||
| {									\ | ||||
| 	const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1;		\ | ||||
| 	struct nlattr *tla = info->attrs[tag_number];			\ | ||||
| 	struct nlattr **ntb = nested_attr_tb;				\ | ||||
| 	struct nlattr *nla;						\ | ||||
| 	int err;							\ | ||||
| 	BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb));	\ | ||||
| 	if (!tla)							\ | ||||
| 		return -ENOMSG;						\ | ||||
| 	DPRINT_TLA(#s_name, "<=-", #tag_name);				\ | ||||
| 	err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy);	\ | ||||
| 	if (err)							\ | ||||
| 		return err;						\ | ||||
| 									\ | ||||
| 	s_fields							\ | ||||
| 	return 0;							\ | ||||
| }					__attribute__((unused))		\ | ||||
| static int s_name ## _from_attrs(struct s_name *s,			\ | ||||
| 						struct genl_info *info)	\ | ||||
| {									\ | ||||
| 	return __ ## s_name ## _from_attrs(s, info, false);		\ | ||||
| }					__attribute__((unused))		\ | ||||
| static int s_name ## _from_attrs_for_change(struct s_name *s,		\ | ||||
| 						struct genl_info *info)	\ | ||||
| {									\ | ||||
| 	return __ ## s_name ## _from_attrs(s, info, true);		\ | ||||
| }					__attribute__((unused))		\ | ||||
| 
 | ||||
| #define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...)	\ | ||||
| 		nla = ntb[attr_nr];						\ | ||||
| 		if (nla) {						\ | ||||
| 			if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\ | ||||
| 				pr_info("<< must not change invariant attr: %s\n", #name);	\ | ||||
| 				return -EEXIST;				\ | ||||
| 			}						\ | ||||
| 			assignment;					\ | ||||
| 		} else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\ | ||||
| 			/* attribute missing from payload, */		\ | ||||
| 			/* which was expected */			\ | ||||
| 		} else if ((attr_flag) & DRBD_F_REQUIRED) {		\ | ||||
| 			pr_info("<< missing attr: %s\n", #name);	\ | ||||
| 			return -ENOMSG;					\ | ||||
| 		} | ||||
| 
 | ||||
| #undef __field | ||||
| #define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\ | ||||
| 		__is_signed)						\ | ||||
| 	__assign(attr_nr, attr_flag, name, nla_type, type,		\ | ||||
| 			if (s)						\ | ||||
| 				s->name = __get(nla);			\ | ||||
| 			DPRINT_FIELD("<<", nla_type, name, s, nla)) | ||||
| 
 | ||||
| /* validate_nla() already checked nla_len <= maxlen appropriately. */ | ||||
| #undef __array | ||||
| #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\ | ||||
| 		__get, __put, __is_signed)				\ | ||||
| 	__assign(attr_nr, attr_flag, name, nla_type, type,		\ | ||||
| 			if (s)						\ | ||||
| 				s->name ## _len =			\ | ||||
| 					__get(s->name, nla, maxlen);	\ | ||||
| 			DPRINT_ARRAY("<<", nla_type, name, s, nla)) | ||||
| 
 | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| 
 | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields) | ||||
| 
 | ||||
| /*
 | ||||
|  * Magic: define op number to op name mapping				{{{1 | ||||
|  *									{{{2 | ||||
|  */ | ||||
| const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) | ||||
| { | ||||
| 	switch (cmd) { | ||||
| #undef GENL_op | ||||
| #define GENL_op(op_name, op_num, handler, tla_list)		\ | ||||
| 	case op_num: return #op_name; | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| 	default: | ||||
| 		     return "unknown"; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| #ifdef __KERNEL__ | ||||
| #include <linux/stringify.h> | ||||
| /*
 | ||||
|  * Magic: define genl_ops						{{{1 | ||||
|  *									{{{2 | ||||
|  */ | ||||
| 
 | ||||
| #undef GENL_op | ||||
| #define GENL_op(op_name, op_num, handler, tla_list)		\ | ||||
| {								\ | ||||
| 	handler							\ | ||||
| 	.cmd = op_name,						\ | ||||
| 	.policy	= CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy),	\ | ||||
| }, | ||||
| 
 | ||||
| #define ZZZ_genl_ops		CONCAT_(GENL_MAGIC_FAMILY, _genl_ops) | ||||
| static struct genl_ops ZZZ_genl_ops[] __read_mostly = { | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| }; | ||||
| 
 | ||||
| #undef GENL_op | ||||
| #define GENL_op(op_name, op_num, handler, tla_list) | ||||
| 
 | ||||
| /*
 | ||||
|  * Define the genl_family, multicast groups,				{{{1 | ||||
|  * and provide register/unregister functions. | ||||
|  *									{{{2 | ||||
|  */ | ||||
| #define ZZZ_genl_family		CONCAT_(GENL_MAGIC_FAMILY, _genl_family) | ||||
| static struct genl_family ZZZ_genl_family __read_mostly = { | ||||
| 	.id = GENL_ID_GENERATE, | ||||
| 	.name = __stringify(GENL_MAGIC_FAMILY), | ||||
| 	.version = GENL_MAGIC_VERSION, | ||||
| #ifdef GENL_MAGIC_FAMILY_HDRSZ | ||||
| 	.hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), | ||||
| #endif | ||||
| 	.maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Magic: define multicast groups | ||||
|  * Magic: define multicast group registration helper | ||||
|  */ | ||||
| #undef GENL_mc_group | ||||
| #define GENL_mc_group(group)						\ | ||||
| static struct genl_multicast_group					\ | ||||
| CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = {		\ | ||||
| 	.name = #group,							\ | ||||
| };									\ | ||||
| static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)(	\ | ||||
| 	struct sk_buff *skb, gfp_t flags)				\ | ||||
| {									\ | ||||
| 	unsigned int group_id =						\ | ||||
| 		CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id;	\ | ||||
| 	if (!group_id)							\ | ||||
| 		return -EINVAL;						\ | ||||
| 	return genlmsg_multicast(skb, 0, group_id, flags);		\ | ||||
| } | ||||
| 
 | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| 
 | ||||
| int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) | ||||
| { | ||||
| 	int err = genl_register_family_with_ops(&ZZZ_genl_family, | ||||
| 		ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops)); | ||||
| 	if (err) | ||||
| 		return err; | ||||
| #undef GENL_mc_group | ||||
| #define GENL_mc_group(group)						\ | ||||
| 	err = genl_register_mc_group(&ZZZ_genl_family,			\ | ||||
| 		&CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group));		\ | ||||
| 	if (err)							\ | ||||
| 		goto fail;						\ | ||||
| 	else								\ | ||||
| 		pr_info("%s: mcg %s: %u\n", #group,			\ | ||||
| 			__stringify(GENL_MAGIC_FAMILY),			\ | ||||
| 			CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id); | ||||
| 
 | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| 
 | ||||
| #undef GENL_mc_group | ||||
| #define GENL_mc_group(group) | ||||
| 	return 0; | ||||
| fail: | ||||
| 	genl_unregister_family(&ZZZ_genl_family); | ||||
| 	return err; | ||||
| } | ||||
| 
 | ||||
| void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) | ||||
| { | ||||
| 	genl_unregister_family(&ZZZ_genl_family); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Magic: provide conversion functions					{{{1 | ||||
|  * populate skb from struct. | ||||
|  *									{{{2 | ||||
|  */ | ||||
| 
 | ||||
| #undef GENL_op | ||||
| #define GENL_op(op_name, op_num, handler, tla_list) | ||||
| 
 | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\ | ||||
| static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s,	\ | ||||
| 		const bool exclude_sensitive)				\ | ||||
| {									\ | ||||
| 	struct nlattr *tla = nla_nest_start(skb, tag_number);		\ | ||||
| 	if (!tla)							\ | ||||
| 		goto nla_put_failure;					\ | ||||
| 	DPRINT_TLA(#s_name, "-=>", #tag_name);				\ | ||||
| 	s_fields							\ | ||||
| 	nla_nest_end(skb, tla);						\ | ||||
| 	return 0;							\ | ||||
| 									\ | ||||
| nla_put_failure:							\ | ||||
| 	if (tla)							\ | ||||
| 		nla_nest_cancel(skb, tla);				\ | ||||
|         return -EMSGSIZE;						\ | ||||
| }									\ | ||||
| static inline int s_name ## _to_priv_skb(struct sk_buff *skb,		\ | ||||
| 		struct s_name *s)					\ | ||||
| {									\ | ||||
| 	return s_name ## _to_skb(skb, s, 0);				\ | ||||
| }									\ | ||||
| static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\ | ||||
| 		struct s_name *s)					\ | ||||
| {									\ | ||||
| 	return s_name ## _to_skb(skb, s, 1);				\ | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| #undef __field | ||||
| #define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\ | ||||
| 		__is_signed)						\ | ||||
| 	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\ | ||||
| 		DPRINT_FIELD(">>", nla_type, name, s, NULL);		\ | ||||
| 		if (__put(skb, attr_nr, s->name))			\ | ||||
| 			goto nla_put_failure;				\ | ||||
| 	} | ||||
| 
 | ||||
| #undef __array | ||||
| #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\ | ||||
| 		__get, __put, __is_signed)				\ | ||||
| 	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\ | ||||
| 		DPRINT_ARRAY(">>",nla_type, name, s, NULL);		\ | ||||
| 		if (__put(skb, attr_nr, min_t(int, maxlen,		\ | ||||
| 			s->name ## _len + (nla_type == NLA_NUL_STRING)),\ | ||||
| 						s->name))		\ | ||||
| 			goto nla_put_failure;				\ | ||||
| 	} | ||||
| 
 | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| 
 | ||||
| 
 | ||||
| /* Functions for initializing structs to default values.  */ | ||||
| 
 | ||||
| #undef __field | ||||
| #define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\ | ||||
| 		__is_signed) | ||||
| #undef __array | ||||
| #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\ | ||||
| 		__get, __put, __is_signed) | ||||
| #undef __u32_field_def | ||||
| #define __u32_field_def(attr_nr, attr_flag, name, default)		\ | ||||
| 	x->name = default; | ||||
| #undef __s32_field_def | ||||
| #define __s32_field_def(attr_nr, attr_flag, name, default)		\ | ||||
| 	x->name = default; | ||||
| #undef __flg_field_def | ||||
| #define __flg_field_def(attr_nr, attr_flag, name, default)		\ | ||||
| 	x->name = default; | ||||
| #undef __str_field_def | ||||
| #define __str_field_def(attr_nr, attr_flag, name, maxlen)		\ | ||||
| 	memset(x->name, 0, sizeof(x->name));				\ | ||||
| 	x->name ## _len = 0; | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\ | ||||
| static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \ | ||||
| static void set_ ## s_name ## _defaults(struct s_name *x) {	\ | ||||
| s_fields								\ | ||||
| } | ||||
| 
 | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| 
 | ||||
| #endif /* __KERNEL__ */ | ||||
| 
 | ||||
| /* }}}1 */ | ||||
| #endif /* GENL_MAGIC_FUNC_H */ | ||||
| /* vim: set foldmethod=marker foldlevel=1 nofoldenable : */ | ||||
							
								
								
									
										277
									
								
								include/linux/genl_magic_struct.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										277
									
								
								include/linux/genl_magic_struct.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,277 @@ | |||
| #ifndef GENL_MAGIC_STRUCT_H | ||||
| #define GENL_MAGIC_STRUCT_H | ||||
| 
 | ||||
| #ifndef GENL_MAGIC_FAMILY | ||||
| # error "you need to define GENL_MAGIC_FAMILY before inclusion" | ||||
| #endif | ||||
| 
 | ||||
| #ifndef GENL_MAGIC_VERSION | ||||
| # error "you need to define GENL_MAGIC_VERSION before inclusion" | ||||
| #endif | ||||
| 
 | ||||
| #ifndef GENL_MAGIC_INCLUDE_FILE | ||||
| # error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion" | ||||
| #endif | ||||
| 
 | ||||
| #include <linux/genetlink.h> | ||||
| #include <linux/types.h> | ||||
| 
 | ||||
| #define CONCAT__(a,b)	a ## b | ||||
| #define CONCAT_(a,b)	CONCAT__(a,b) | ||||
| 
 | ||||
| extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void); | ||||
| extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); | ||||
| 
 | ||||
| /*
 | ||||
|  * Extension of genl attribute validation policies			{{{2 | ||||
|  */ | ||||
| 
 | ||||
| /*
 | ||||
|  * @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not | ||||
|  * know about.  This flag can be set in nlattr->nla_type to indicate that this | ||||
|  * attribute must not be ignored. | ||||
|  * | ||||
|  * We check and remove this flag in drbd_nla_check_mandatory() before | ||||
|  * validating the attribute types and lengths via nla_parse_nested(). | ||||
|  */ | ||||
| #define DRBD_GENLA_F_MANDATORY (1 << 14) | ||||
| 
 | ||||
| /*
 | ||||
|  * Flags specific to drbd and not visible at the netlink layer, used in | ||||
|  * <struct>_from_attrs and <struct>_to_skb: | ||||
|  * | ||||
|  * @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is | ||||
|  * invalid. | ||||
|  * | ||||
|  * @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be | ||||
|  * included in unpriviledged get requests or broadcasts. | ||||
|  * | ||||
|  * @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but | ||||
|  * cannot subsequently be changed. | ||||
|  */ | ||||
| #define DRBD_F_REQUIRED (1 << 0) | ||||
| #define DRBD_F_SENSITIVE (1 << 1) | ||||
| #define DRBD_F_INVARIANT (1 << 2) | ||||
| 
 | ||||
| #define __nla_type(x)	((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY)) | ||||
| 
 | ||||
| /*									}}}1
 | ||||
|  * MAGIC | ||||
|  * multi-include macro expansion magic starts here | ||||
|  */ | ||||
| 
 | ||||
| /* MAGIC helpers							{{{2 */ | ||||
| 
 | ||||
| /* possible field types */ | ||||
| #define __flg_field(attr_nr, attr_flag, name) \ | ||||
| 	__field(attr_nr, attr_flag, name, NLA_U8, char, \ | ||||
| 			nla_get_u8, nla_put_u8, false) | ||||
| #define __u8_field(attr_nr, attr_flag, name)	\ | ||||
| 	__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ | ||||
| 			nla_get_u8, nla_put_u8, false) | ||||
| #define __u16_field(attr_nr, attr_flag, name)	\ | ||||
| 	__field(attr_nr, attr_flag, name, NLA_U16, __u16, \ | ||||
| 			nla_get_u16, nla_put_u16, false) | ||||
| #define __u32_field(attr_nr, attr_flag, name)	\ | ||||
| 	__field(attr_nr, attr_flag, name, NLA_U32, __u32, \ | ||||
| 			nla_get_u32, nla_put_u32, false) | ||||
| #define __s32_field(attr_nr, attr_flag, name)	\ | ||||
| 	__field(attr_nr, attr_flag, name, NLA_U32, __s32, \ | ||||
| 			nla_get_u32, nla_put_u32, true) | ||||
| #define __u64_field(attr_nr, attr_flag, name)	\ | ||||
| 	__field(attr_nr, attr_flag, name, NLA_U64, __u64, \ | ||||
| 			nla_get_u64, nla_put_u64, false) | ||||
| #define __str_field(attr_nr, attr_flag, name, maxlen) \ | ||||
| 	__array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ | ||||
| 			nla_strlcpy, nla_put, false) | ||||
| #define __bin_field(attr_nr, attr_flag, name, maxlen) \ | ||||
| 	__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ | ||||
| 			nla_memcpy, nla_put, false) | ||||
| 
 | ||||
| /* fields with default values */ | ||||
| #define __flg_field_def(attr_nr, attr_flag, name, default) \ | ||||
| 	__flg_field(attr_nr, attr_flag, name) | ||||
| #define __u32_field_def(attr_nr, attr_flag, name, default) \ | ||||
| 	__u32_field(attr_nr, attr_flag, name) | ||||
| #define __s32_field_def(attr_nr, attr_flag, name, default) \ | ||||
| 	__s32_field(attr_nr, attr_flag, name) | ||||
| #define __str_field_def(attr_nr, attr_flag, name, maxlen) \ | ||||
| 	__str_field(attr_nr, attr_flag, name, maxlen) | ||||
| 
 | ||||
| #define GENL_op_init(args...)	args | ||||
| #define GENL_doit(handler)		\ | ||||
| 	.doit = handler,		\ | ||||
| 	.flags = GENL_ADMIN_PERM, | ||||
| #define GENL_dumpit(handler)		\ | ||||
| 	.dumpit = handler,		\ | ||||
| 	.flags = GENL_ADMIN_PERM, | ||||
| 
 | ||||
| /*									}}}1
 | ||||
|  * Magic: define the enum symbols for genl_ops | ||||
|  * Magic: define the enum symbols for top level attributes | ||||
|  * Magic: define the enum symbols for nested attributes | ||||
|  *									{{{2 | ||||
|  */ | ||||
| 
 | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields) | ||||
| 
 | ||||
| #undef GENL_mc_group | ||||
| #define GENL_mc_group(group) | ||||
| 
 | ||||
| #undef GENL_notification | ||||
| #define GENL_notification(op_name, op_num, mcast_group, tla_list)	\ | ||||
| 	op_name = op_num, | ||||
| 
 | ||||
| #undef GENL_op | ||||
| #define GENL_op(op_name, op_num, handler, tla_list)			\ | ||||
| 	op_name = op_num, | ||||
| 
 | ||||
| enum { | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| }; | ||||
| 
 | ||||
| #undef GENL_notification | ||||
| #define GENL_notification(op_name, op_num, mcast_group, tla_list) | ||||
| 
 | ||||
| #undef GENL_op | ||||
| #define GENL_op(op_name, op_num, handler, attr_list) | ||||
| 
 | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields) \ | ||||
| 		tag_name = tag_number, | ||||
| 
 | ||||
| enum { | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| }; | ||||
| 
 | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields)	\ | ||||
| enum {								\ | ||||
| 	s_fields						\ | ||||
| }; | ||||
| 
 | ||||
| #undef __field | ||||
| #define __field(attr_nr, attr_flag, name, nla_type, type,	\ | ||||
| 		__get, __put, __is_signed)			\ | ||||
| 	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)), | ||||
| 
 | ||||
| #undef __array | ||||
| #define __array(attr_nr, attr_flag, name, nla_type, type,	\ | ||||
| 		maxlen, __get, __put, __is_signed)		\ | ||||
| 	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)), | ||||
| 
 | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| 
 | ||||
| /*									}}}1
 | ||||
|  * Magic: compile time assert unique numbers for operations | ||||
|  * Magic: -"- unique numbers for top level attributes | ||||
|  * Magic: -"- unique numbers for nested attributes | ||||
|  *									{{{2 | ||||
|  */ | ||||
| 
 | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields) | ||||
| 
 | ||||
| #undef GENL_op | ||||
| #define GENL_op(op_name, op_num, handler, attr_list)	\ | ||||
| 	case op_name: | ||||
| 
 | ||||
| #undef GENL_notification | ||||
| #define GENL_notification(op_name, op_num, mcast_group, tla_list)	\ | ||||
| 	case op_name: | ||||
| 
 | ||||
| static inline void ct_assert_unique_operations(void) | ||||
| { | ||||
| 	switch (0) { | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| 		; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| #undef GENL_op | ||||
| #define GENL_op(op_name, op_num, handler, attr_list) | ||||
| 
 | ||||
| #undef GENL_notification | ||||
| #define GENL_notification(op_name, op_num, mcast_group, tla_list) | ||||
| 
 | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\ | ||||
| 		case tag_number: | ||||
| 
 | ||||
| static inline void ct_assert_unique_top_level_attributes(void) | ||||
| { | ||||
| 	switch (0) { | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| 		; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\ | ||||
| static inline void ct_assert_unique_ ## s_name ## _attributes(void)	\ | ||||
| {									\ | ||||
| 	switch (0) {							\ | ||||
| 		s_fields						\ | ||||
| 			;						\ | ||||
| 	}								\ | ||||
| } | ||||
| 
 | ||||
| #undef __field | ||||
| #define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\ | ||||
| 		__is_signed)						\ | ||||
| 	case attr_nr: | ||||
| 
 | ||||
| #undef __array | ||||
| #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\ | ||||
| 		__get, __put, __is_signed)				\ | ||||
| 	case attr_nr: | ||||
| 
 | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| 
 | ||||
| /*									}}}1
 | ||||
|  * Magic: declare structs | ||||
|  * struct <name> { | ||||
|  *	fields | ||||
|  * }; | ||||
|  *									{{{2 | ||||
|  */ | ||||
| 
 | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\ | ||||
| struct s_name { s_fields }; | ||||
| 
 | ||||
| #undef __field | ||||
| #define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\ | ||||
| 		__is_signed)						\ | ||||
| 	type name; | ||||
| 
 | ||||
| #undef __array | ||||
| #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\ | ||||
| 		__get, __put, __is_signed)				\ | ||||
| 	type name[maxlen];	\ | ||||
| 	__u32 name ## _len; | ||||
| 
 | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| 
 | ||||
| #undef GENL_struct | ||||
| #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\ | ||||
| enum {									\ | ||||
| 	s_fields							\ | ||||
| }; | ||||
| 
 | ||||
| #undef __field | ||||
| #define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\ | ||||
| 		is_signed)						\ | ||||
| 	F_ ## name ## _IS_SIGNED = is_signed, | ||||
| 
 | ||||
| #undef __array | ||||
| #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\ | ||||
| 		__get, __put, is_signed)				\ | ||||
| 	F_ ## name ## _IS_SIGNED = is_signed, | ||||
| 
 | ||||
| #include GENL_MAGIC_INCLUDE_FILE | ||||
| 
 | ||||
| /* }}}1 */ | ||||
| #endif /* GENL_MAGIC_STRUCT_H */ | ||||
| /* vim: set foldmethod=marker nofoldenable : */ | ||||
|  | @ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id); | |||
| 
 | ||||
| void __init idr_init_cache(void); | ||||
| 
 | ||||
| /**
 | ||||
|  * idr_for_each_entry - iterate over an idr's elements of a given type | ||||
|  * @idp:     idr handle | ||||
|  * @entry:   the type * to use as cursor | ||||
|  * @id:      id entry's key | ||||
|  */ | ||||
| #define idr_for_each_entry(idp, entry, id)				\ | ||||
| 	for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ | ||||
| 	     entry != NULL;                                             \ | ||||
| 	     ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) | ||||
| 
 | ||||
| #endif /* __IDR_H__ */ | ||||
|  |  | |||
|  | @ -166,9 +166,11 @@ struct lc_element { | |||
| 	/* if we want to track a larger set of objects,
 | ||||
| 	 * it needs to become arch independend u64 */ | ||||
| 	unsigned lc_number; | ||||
| 
 | ||||
| 	/* special label when on free list */ | ||||
| #define LC_FREE (~0U) | ||||
| 
 | ||||
| 	/* for pending changes */ | ||||
| 	unsigned lc_new_number; | ||||
| }; | ||||
| 
 | ||||
| struct lru_cache { | ||||
|  | @ -176,6 +178,7 @@ struct lru_cache { | |||
| 	struct list_head lru; | ||||
| 	struct list_head free; | ||||
| 	struct list_head in_use; | ||||
| 	struct list_head to_be_changed; | ||||
| 
 | ||||
| 	/* the pre-created kmem cache to allocate the objects from */ | ||||
| 	struct kmem_cache *lc_cache; | ||||
|  | @ -194,18 +197,19 @@ struct lru_cache { | |||
| 	 * 8 high bits of .lc_index to be overloaded with flags in the future. */ | ||||
| #define LC_MAX_ACTIVE	(1<<24) | ||||
| 
 | ||||
| 	/* allow to accumulate a few (index:label) changes,
 | ||||
| 	 * but no more than max_pending_changes */ | ||||
| 	unsigned int max_pending_changes; | ||||
| 	/* number of elements currently on to_be_changed list */ | ||||
| 	unsigned int pending_changes; | ||||
| 
 | ||||
| 	/* statistics */ | ||||
| 	unsigned used; /* number of lelements currently on in_use list */ | ||||
| 	unsigned long hits, misses, starving, dirty, changed; | ||||
| 	unsigned used; /* number of elements currently on in_use list */ | ||||
| 	unsigned long hits, misses, starving, locked, changed; | ||||
| 
 | ||||
| 	/* see below: flag-bits for lru_cache */ | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	/* when changing the label of an index element */ | ||||
| 	unsigned int  new_number; | ||||
| 
 | ||||
| 	/* for paranoia when changing the label of an index element */ | ||||
| 	struct lc_element *changing_element; | ||||
| 
 | ||||
| 	void  *lc_private; | ||||
| 	const char *name; | ||||
|  | @ -221,10 +225,15 @@ enum { | |||
| 	/* debugging aid, to catch concurrent access early.
 | ||||
| 	 * user needs to guarantee exclusive access by proper locking! */ | ||||
| 	__LC_PARANOIA, | ||||
| 	/* if we need to change the set, but currently there is a changing
 | ||||
| 	 * transaction pending, we are "dirty", and must deferr further | ||||
| 	 * changing requests */ | ||||
| 
 | ||||
| 	/* annotate that the set is "dirty", possibly accumulating further
 | ||||
| 	 * changes, until a transaction is finally triggered */ | ||||
| 	__LC_DIRTY, | ||||
| 
 | ||||
| 	/* Locked, no further changes allowed.
 | ||||
| 	 * Also used to serialize changing transactions. */ | ||||
| 	__LC_LOCKED, | ||||
| 
 | ||||
| 	/* if we need to change the set, but currently there is no free nor
 | ||||
| 	 * unused element available, we are "starving", and must not give out | ||||
| 	 * further references, to guarantee that eventually some refcnt will | ||||
|  | @ -236,9 +245,11 @@ enum { | |||
| }; | ||||
| #define LC_PARANOIA (1<<__LC_PARANOIA) | ||||
| #define LC_DIRTY    (1<<__LC_DIRTY) | ||||
| #define LC_LOCKED   (1<<__LC_LOCKED) | ||||
| #define LC_STARVING (1<<__LC_STARVING) | ||||
| 
 | ||||
| extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, | ||||
| 		unsigned max_pending_changes, | ||||
| 		unsigned e_count, size_t e_size, size_t e_off); | ||||
| extern void lc_reset(struct lru_cache *lc); | ||||
| extern void lc_destroy(struct lru_cache *lc); | ||||
|  | @ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); | |||
| extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); | ||||
| extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); | ||||
| extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); | ||||
| extern void lc_changed(struct lru_cache *lc, struct lc_element *e); | ||||
| extern void lc_committed(struct lru_cache *lc); | ||||
| 
 | ||||
| struct seq_file; | ||||
| extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); | ||||
|  | @ -258,16 +269,28 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char | |||
| 				void (*detail) (struct seq_file *, struct lc_element *)); | ||||
| 
 | ||||
| /**
 | ||||
|  * lc_try_lock - can be used to stop lc_get() from changing the tracked set | ||||
|  * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set | ||||
|  * @lc: the lru cache to operate on | ||||
|  * | ||||
|  * Allows (expects) the set to be "dirty".  Note that the reference counts and | ||||
|  * order on the active and lru lists may still change.  Used to serialize | ||||
|  * changing transactions.  Returns true if we aquired the lock. | ||||
|  */ | ||||
| static inline int lc_try_lock_for_transaction(struct lru_cache *lc) | ||||
| { | ||||
| 	return !test_and_set_bit(__LC_LOCKED, &lc->flags); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * lc_try_lock - variant to stop lc_get() from changing the tracked set | ||||
|  * @lc: the lru cache to operate on | ||||
|  * | ||||
|  * Note that the reference counts and order on the active and lru lists may | ||||
|  * still change.  Returns true if we acquired the lock. | ||||
|  * still change.  Only works on a "clean" set.  Returns true if we aquired the | ||||
|  * lock, which means there are no pending changes, and any further attempt to | ||||
|  * change the set will not succeed until the next lc_unlock(). | ||||
|  */ | ||||
| static inline int lc_try_lock(struct lru_cache *lc) | ||||
| { | ||||
| 	return !test_and_set_bit(__LC_DIRTY, &lc->flags); | ||||
| } | ||||
| extern int lc_try_lock(struct lru_cache *lc); | ||||
| 
 | ||||
| /**
 | ||||
|  * lc_unlock - unlock @lc, allow lc_get() to change the set again | ||||
|  | @ -276,14 +299,10 @@ static inline int lc_try_lock(struct lru_cache *lc) | |||
| static inline void lc_unlock(struct lru_cache *lc) | ||||
| { | ||||
| 	clear_bit(__LC_DIRTY, &lc->flags); | ||||
| 	smp_mb__after_clear_bit(); | ||||
| 	clear_bit_unlock(__LC_LOCKED, &lc->flags); | ||||
| } | ||||
| 
 | ||||
| static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) | ||||
| { | ||||
| 	struct lc_element *e = lc_find(lc, enr); | ||||
| 	return e && e->refcnt; | ||||
| } | ||||
| extern bool lc_is_used(struct lru_cache *lc, unsigned int enr); | ||||
| 
 | ||||
| #define lc_entry(ptr, type, member) \ | ||||
| 	container_of(ptr, type, member) | ||||
|  |  | |||
							
								
								
									
										367
									
								
								lib/lru_cache.c
									
										
									
									
									
								
							
							
						
						
									
										367
									
								
								lib/lru_cache.c
									
										
									
									
									
								
							|  | @ -44,8 +44,8 @@ MODULE_LICENSE("GPL"); | |||
| } while (0) | ||||
| 
 | ||||
| #define RETURN(x...)     do { \ | ||||
| 	clear_bit(__LC_PARANOIA, &lc->flags); \ | ||||
| 	smp_mb__after_clear_bit(); return x ; } while (0) | ||||
| 	clear_bit_unlock(__LC_PARANOIA, &lc->flags); \ | ||||
| 	return x ; } while (0) | ||||
| 
 | ||||
| /* BUG() if e is not one of the elements tracked by lc */ | ||||
| #define PARANOIA_LC_ELEMENT(lc, e) do {	\ | ||||
|  | @ -55,9 +55,40 @@ MODULE_LICENSE("GPL"); | |||
| 	BUG_ON(i >= lc_->nr_elements);	\ | ||||
| 	BUG_ON(lc_->lc_element[i] != e_); } while (0) | ||||
| 
 | ||||
| 
 | ||||
| /* We need to atomically
 | ||||
|  *  - try to grab the lock (set LC_LOCKED) | ||||
|  *  - only if there is no pending transaction | ||||
|  *    (neither LC_DIRTY nor LC_STARVING is set) | ||||
|  * Because of PARANOIA_ENTRY() above abusing lc->flags as well, | ||||
|  * it is not sufficient to just say | ||||
|  *	return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED); | ||||
|  */ | ||||
| int lc_try_lock(struct lru_cache *lc) | ||||
| { | ||||
| 	unsigned long val; | ||||
| 	do { | ||||
| 		val = cmpxchg(&lc->flags, 0, LC_LOCKED); | ||||
| 	} while (unlikely (val == LC_PARANOIA)); | ||||
| 	/* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ | ||||
| 	return 0 == val; | ||||
| #if 0 | ||||
| 	/* Alternative approach, spin in case someone enters or leaves a
 | ||||
| 	 * PARANOIA_ENTRY()/RETURN() section. */ | ||||
| 	unsigned long old, new, val; | ||||
| 	do { | ||||
| 		old = lc->flags & LC_PARANOIA; | ||||
| 		new = old | LC_LOCKED; | ||||
| 		val = cmpxchg(&lc->flags, old, new); | ||||
| 	} while (unlikely (val == (old ^ LC_PARANOIA))); | ||||
| 	return old == val; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * lc_create - prepares to track objects in an active set | ||||
|  * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details | ||||
|  * @max_pending_changes: maximum changes to accumulate until a transaction is required | ||||
|  * @e_count: number of elements allowed to be active simultaneously | ||||
|  * @e_size: size of the tracked objects | ||||
|  * @e_off: offset to the &struct lc_element member in a tracked object | ||||
|  | @ -66,6 +97,7 @@ MODULE_LICENSE("GPL"); | |||
|  * or NULL on (allocation) failure. | ||||
|  */ | ||||
| struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, | ||||
| 		unsigned max_pending_changes, | ||||
| 		unsigned e_count, size_t e_size, size_t e_off) | ||||
| { | ||||
| 	struct hlist_head *slot = NULL; | ||||
|  | @ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, | |||
| 	INIT_LIST_HEAD(&lc->in_use); | ||||
| 	INIT_LIST_HEAD(&lc->lru); | ||||
| 	INIT_LIST_HEAD(&lc->free); | ||||
| 	INIT_LIST_HEAD(&lc->to_be_changed); | ||||
| 
 | ||||
| 	lc->name = name; | ||||
| 	lc->element_size = e_size; | ||||
| 	lc->element_off = e_off; | ||||
| 	lc->nr_elements = e_count; | ||||
| 	lc->new_number = LC_FREE; | ||||
| 	lc->max_pending_changes = max_pending_changes; | ||||
| 	lc->lc_cache = cache; | ||||
| 	lc->lc_element = element; | ||||
| 	lc->lc_slot = slot; | ||||
|  | @ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, | |||
| 		e = p + e_off; | ||||
| 		e->lc_index = i; | ||||
| 		e->lc_number = LC_FREE; | ||||
| 		e->lc_new_number = LC_FREE; | ||||
| 		list_add(&e->list, &lc->free); | ||||
| 		element[i] = e; | ||||
| 	} | ||||
|  | @ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc) | |||
| 	INIT_LIST_HEAD(&lc->in_use); | ||||
| 	INIT_LIST_HEAD(&lc->lru); | ||||
| 	INIT_LIST_HEAD(&lc->free); | ||||
| 	INIT_LIST_HEAD(&lc->to_be_changed); | ||||
| 	lc->used = 0; | ||||
| 	lc->hits = 0; | ||||
| 	lc->misses = 0; | ||||
| 	lc->starving = 0; | ||||
| 	lc->dirty = 0; | ||||
| 	lc->locked = 0; | ||||
| 	lc->changed = 0; | ||||
| 	lc->pending_changes = 0; | ||||
| 	lc->flags = 0; | ||||
| 	lc->changing_element = NULL; | ||||
| 	lc->new_number = LC_FREE; | ||||
| 	memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); | ||||
| 
 | ||||
| 	for (i = 0; i < lc->nr_elements; i++) { | ||||
|  | @ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc) | |||
| 		/* re-init it */ | ||||
| 		e->lc_index = i; | ||||
| 		e->lc_number = LC_FREE; | ||||
| 		e->lc_new_number = LC_FREE; | ||||
| 		list_add(&e->list, &lc->free); | ||||
| 	} | ||||
| } | ||||
|  | @ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) | |||
| 	/* NOTE:
 | ||||
| 	 * total calls to lc_get are | ||||
| 	 * (starving + hits + misses) | ||||
| 	 * misses include "dirty" count (update from an other thread in | ||||
| 	 * misses include "locked" count (update from an other thread in | ||||
| 	 * progress) and "changed", when this in fact lead to an successful | ||||
| 	 * update of the cache. | ||||
| 	 */ | ||||
| 	return seq_printf(seq, "\t%s: used:%u/%u " | ||||
| 		"hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", | ||||
| 		"hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", | ||||
| 		lc->name, lc->used, lc->nr_elements, | ||||
| 		lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); | ||||
| 		lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); | ||||
| } | ||||
| 
 | ||||
| static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) | ||||
|  | @ -224,6 +259,27 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) | |||
| } | ||||
| 
 | ||||
| 
 | ||||
| static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, | ||||
| 		bool include_changing) | ||||
| { | ||||
| 	struct hlist_node *n; | ||||
| 	struct lc_element *e; | ||||
| 
 | ||||
| 	BUG_ON(!lc); | ||||
| 	BUG_ON(!lc->nr_elements); | ||||
| 	hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { | ||||
| 		/* "about to be changed" elements, pending transaction commit,
 | ||||
| 		 * are hashed by their "new number". "Normal" elements have | ||||
| 		 * lc_number == lc_new_number. */ | ||||
| 		if (e->lc_new_number != enr) | ||||
| 			continue; | ||||
| 		if (e->lc_new_number == e->lc_number || include_changing) | ||||
| 			return e; | ||||
| 		break; | ||||
| 	} | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * lc_find - find element by label, if present in the hash table | ||||
|  * @lc: The lru_cache object | ||||
|  | @ -232,38 +288,28 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) | |||
|  * Returns the pointer to an element, if the element with the requested | ||||
|  * "label" or element number is present in the hash table, | ||||
|  * or NULL if not found. Does not change the refcnt. | ||||
|  * Ignores elements that are "about to be used", i.e. not yet in the active | ||||
|  * set, but still pending transaction commit. | ||||
|  */ | ||||
| struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) | ||||
| { | ||||
| 	struct hlist_node *n; | ||||
| 	struct lc_element *e; | ||||
| 
 | ||||
| 	BUG_ON(!lc); | ||||
| 	BUG_ON(!lc->nr_elements); | ||||
| 	hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { | ||||
| 		if (e->lc_number == enr) | ||||
| 			return e; | ||||
| 	} | ||||
| 	return NULL; | ||||
| 	return __lc_find(lc, enr, 0); | ||||
| } | ||||
| 
 | ||||
| /* returned element will be "recycled" immediately */ | ||||
| static struct lc_element *lc_evict(struct lru_cache *lc) | ||||
| /**
 | ||||
|  * lc_is_used - find element by label | ||||
|  * @lc: The lru_cache object | ||||
|  * @enr: element number | ||||
|  * | ||||
|  * Returns true, if the element with the requested "label" or element number is | ||||
|  * present in the hash table, and is used (refcnt > 0). | ||||
|  * Also finds elements that are not _currently_ used but only "about to be | ||||
|  * used", i.e. on the "to_be_changed" list, pending transaction commit. | ||||
|  */ | ||||
| bool lc_is_used(struct lru_cache *lc, unsigned int enr) | ||||
| { | ||||
| 	struct list_head  *n; | ||||
| 	struct lc_element *e; | ||||
| 
 | ||||
| 	if (list_empty(&lc->lru)) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	n = lc->lru.prev; | ||||
| 	e = list_entry(n, struct lc_element, list); | ||||
| 
 | ||||
| 	PARANOIA_LC_ELEMENT(lc, e); | ||||
| 
 | ||||
| 	list_del(&e->list); | ||||
| 	hlist_del(&e->colision); | ||||
| 	return e; | ||||
| 	struct lc_element *e = __lc_find(lc, enr, 1); | ||||
| 	return e && e->refcnt; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  | @ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e) | |||
| 	PARANOIA_LC_ELEMENT(lc, e); | ||||
| 	BUG_ON(e->refcnt); | ||||
| 
 | ||||
| 	e->lc_number = LC_FREE; | ||||
| 	e->lc_number = e->lc_new_number = LC_FREE; | ||||
| 	hlist_del_init(&e->colision); | ||||
| 	list_move(&e->list, &lc->free); | ||||
| 	RETURN(); | ||||
| } | ||||
| 
 | ||||
| static struct lc_element *lc_get_unused_element(struct lru_cache *lc) | ||||
| static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number) | ||||
| { | ||||
| 	struct list_head *n; | ||||
| 	struct lc_element *e; | ||||
| 
 | ||||
| 	if (list_empty(&lc->free)) | ||||
| 		return lc_evict(lc); | ||||
| 
 | ||||
| 	if (!list_empty(&lc->free)) | ||||
| 		n = lc->free.next; | ||||
| 	list_del(n); | ||||
| 	return list_entry(n, struct lc_element, list); | ||||
| 	else if (!list_empty(&lc->lru)) | ||||
| 		n = lc->lru.prev; | ||||
| 	else | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	e = list_entry(n, struct lc_element, list); | ||||
| 	PARANOIA_LC_ELEMENT(lc, e); | ||||
| 
 | ||||
| 	e->lc_new_number = new_number; | ||||
| 	if (!hlist_unhashed(&e->colision)) | ||||
| 		__hlist_del(&e->colision); | ||||
| 	hlist_add_head(&e->colision, lc_hash_slot(lc, new_number)); | ||||
| 	list_move(&e->list, &lc->to_be_changed); | ||||
| 
 | ||||
| 	return e; | ||||
| } | ||||
| 
 | ||||
| static int lc_unused_element_available(struct lru_cache *lc) | ||||
|  | @ -308,6 +366,75 @@ static int lc_unused_element_available(struct lru_cache *lc) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change) | ||||
| { | ||||
| 	struct lc_element *e; | ||||
| 
 | ||||
| 	PARANOIA_ENTRY(); | ||||
| 	if (lc->flags & LC_STARVING) { | ||||
| 		++lc->starving; | ||||
| 		RETURN(NULL); | ||||
| 	} | ||||
| 
 | ||||
| 	e = __lc_find(lc, enr, 1); | ||||
| 	/* if lc_new_number != lc_number,
 | ||||
| 	 * this enr is currently being pulled in already, | ||||
| 	 * and will be available once the pending transaction | ||||
| 	 * has been committed. */ | ||||
| 	if (e && e->lc_new_number == e->lc_number) { | ||||
| 		++lc->hits; | ||||
| 		if (e->refcnt++ == 0) | ||||
| 			lc->used++; | ||||
| 		list_move(&e->list, &lc->in_use); /* Not evictable... */ | ||||
| 		RETURN(e); | ||||
| 	} | ||||
| 
 | ||||
| 	++lc->misses; | ||||
| 	if (!may_change) | ||||
| 		RETURN(NULL); | ||||
| 
 | ||||
| 	/* It has been found above, but on the "to_be_changed" list, not yet
 | ||||
| 	 * committed.  Don't pull it in twice, wait for the transaction, then | ||||
| 	 * try again */ | ||||
| 	if (e) | ||||
| 		RETURN(NULL); | ||||
| 
 | ||||
| 	/* To avoid races with lc_try_lock(), first, mark us dirty
 | ||||
| 	 * (using test_and_set_bit, as it implies memory barriers), ... */ | ||||
| 	test_and_set_bit(__LC_DIRTY, &lc->flags); | ||||
| 
 | ||||
| 	/* ... only then check if it is locked anyways. If lc_unlock clears
 | ||||
| 	 * the dirty bit again, that's not a problem, we will come here again. | ||||
| 	 */ | ||||
| 	if (test_bit(__LC_LOCKED, &lc->flags)) { | ||||
| 		++lc->locked; | ||||
| 		RETURN(NULL); | ||||
| 	} | ||||
| 
 | ||||
| 	/* In case there is nothing available and we can not kick out
 | ||||
| 	 * the LRU element, we have to wait ... | ||||
| 	 */ | ||||
| 	if (!lc_unused_element_available(lc)) { | ||||
| 		__set_bit(__LC_STARVING, &lc->flags); | ||||
| 		RETURN(NULL); | ||||
| 	} | ||||
| 
 | ||||
| 	/* It was not present in the active set.  We are going to recycle an
 | ||||
| 	 * unused (or even "free") element, but we won't accumulate more than | ||||
| 	 * max_pending_changes changes.  */ | ||||
| 	if (lc->pending_changes >= lc->max_pending_changes) | ||||
| 		RETURN(NULL); | ||||
| 
 | ||||
| 	e = lc_prepare_for_change(lc, enr); | ||||
| 	BUG_ON(!e); | ||||
| 
 | ||||
| 	clear_bit(__LC_STARVING, &lc->flags); | ||||
| 	BUG_ON(++e->refcnt != 1); | ||||
| 	lc->used++; | ||||
| 	lc->pending_changes++; | ||||
| 
 | ||||
| 	RETURN(e); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * lc_get - get element by label, maybe change the active set | ||||
|  | @ -336,110 +463,65 @@ static int lc_unused_element_available(struct lru_cache *lc) | |||
|  *  pointer to an UNUSED element with some different element number, | ||||
|  *          where that different number may also be %LC_FREE. | ||||
|  * | ||||
|  *          In this case, the cache is marked %LC_DIRTY (blocking further changes), | ||||
|  *          and the returned element pointer is removed from the lru list and | ||||
|  *          hash collision chains.  The user now should do whatever housekeeping | ||||
|  *          is necessary. | ||||
|  *          Then he must call lc_changed(lc,element_pointer), to finish | ||||
|  *          the change. | ||||
|  *          In this case, the cache is marked %LC_DIRTY, | ||||
|  *          so lc_try_lock() will no longer succeed. | ||||
|  *          The returned element pointer is moved to the "to_be_changed" list, | ||||
|  *          and registered with the new element number on the hash collision chains, | ||||
|  *          so it is possible to pick it up from lc_is_used(). | ||||
|  *          Up to "max_pending_changes" (see lc_create()) can be accumulated. | ||||
|  *          The user now should do whatever housekeeping is necessary, | ||||
|  *          typically serialize on lc_try_lock_for_transaction(), then call | ||||
|  *          lc_committed(lc) and lc_unlock(), to finish the change. | ||||
|  * | ||||
|  * NOTE: The user needs to check the lc_number on EACH use, so he recognizes | ||||
|  *       any cache set change. | ||||
|  */ | ||||
| struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) | ||||
| { | ||||
| 	struct lc_element *e; | ||||
| 
 | ||||
| 	PARANOIA_ENTRY(); | ||||
| 	if (lc->flags & LC_STARVING) { | ||||
| 		++lc->starving; | ||||
| 		RETURN(NULL); | ||||
| 	} | ||||
| 
 | ||||
| 	e = lc_find(lc, enr); | ||||
| 	if (e) { | ||||
| 		++lc->hits; | ||||
| 		if (e->refcnt++ == 0) | ||||
| 			lc->used++; | ||||
| 		list_move(&e->list, &lc->in_use); /* Not evictable... */ | ||||
| 		RETURN(e); | ||||
| 	} | ||||
| 
 | ||||
| 	++lc->misses; | ||||
| 
 | ||||
| 	/* In case there is nothing available and we can not kick out
 | ||||
| 	 * the LRU element, we have to wait ... | ||||
| 	 */ | ||||
| 	if (!lc_unused_element_available(lc)) { | ||||
| 		__set_bit(__LC_STARVING, &lc->flags); | ||||
| 		RETURN(NULL); | ||||
| 	} | ||||
| 
 | ||||
| 	/* it was not present in the active set.
 | ||||
| 	 * we are going to recycle an unused (or even "free") element. | ||||
| 	 * user may need to commit a transaction to record that change. | ||||
| 	 * we serialize on flags & TF_DIRTY */ | ||||
| 	if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { | ||||
| 		++lc->dirty; | ||||
| 		RETURN(NULL); | ||||
| 	} | ||||
| 
 | ||||
| 	e = lc_get_unused_element(lc); | ||||
| 	BUG_ON(!e); | ||||
| 
 | ||||
| 	clear_bit(__LC_STARVING, &lc->flags); | ||||
| 	BUG_ON(++e->refcnt != 1); | ||||
| 	lc->used++; | ||||
| 
 | ||||
| 	lc->changing_element = e; | ||||
| 	lc->new_number = enr; | ||||
| 
 | ||||
| 	RETURN(e); | ||||
| } | ||||
| 
 | ||||
| /* similar to lc_get,
 | ||||
|  * but only gets a new reference on an existing element. | ||||
|  * you either get the requested element, or NULL. | ||||
|  * will be consolidated into one function. | ||||
|  */ | ||||
| struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) | ||||
| { | ||||
| 	struct lc_element *e; | ||||
| 
 | ||||
| 	PARANOIA_ENTRY(); | ||||
| 	if (lc->flags & LC_STARVING) { | ||||
| 		++lc->starving; | ||||
| 		RETURN(NULL); | ||||
| 	} | ||||
| 
 | ||||
| 	e = lc_find(lc, enr); | ||||
| 	if (e) { | ||||
| 		++lc->hits; | ||||
| 		if (e->refcnt++ == 0) | ||||
| 			lc->used++; | ||||
| 		list_move(&e->list, &lc->in_use); /* Not evictable... */ | ||||
| 	} | ||||
| 	RETURN(e); | ||||
| 	return __lc_get(lc, enr, 1); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * lc_changed - tell @lc that the change has been recorded | ||||
|  * lc_try_get - get element by label, if present; do not change the active set | ||||
|  * @lc: the lru cache to operate on | ||||
|  * @e: the element pending label change | ||||
|  * @enr: the label to look up | ||||
|  * | ||||
|  * Finds an element in the cache, increases its usage count, | ||||
|  * "touches" and returns it. | ||||
|  * | ||||
|  * Return values: | ||||
|  *  NULL | ||||
|  *     The cache was marked %LC_STARVING, | ||||
|  *     or the requested label was not in the active set | ||||
|  * | ||||
|  *  pointer to the element with the REQUESTED element number. | ||||
|  *     In this case, it can be used right away | ||||
|  */ | ||||
| void lc_changed(struct lru_cache *lc, struct lc_element *e) | ||||
| struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) | ||||
| { | ||||
| 	return __lc_get(lc, enr, 0); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * lc_committed - tell @lc that pending changes have been recorded | ||||
|  * @lc: the lru cache to operate on | ||||
|  * | ||||
|  * User is expected to serialize on explicit lc_try_lock_for_transaction() | ||||
|  * before the transaction is started, and later needs to lc_unlock() explicitly | ||||
|  * as well. | ||||
|  */ | ||||
| void lc_committed(struct lru_cache *lc) | ||||
| { | ||||
| 	struct lc_element *e, *tmp; | ||||
| 
 | ||||
| 	PARANOIA_ENTRY(); | ||||
| 	BUG_ON(e != lc->changing_element); | ||||
| 	PARANOIA_LC_ELEMENT(lc, e); | ||||
| 	list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) { | ||||
| 		/* count number of changes, not number of transactions */ | ||||
| 		++lc->changed; | ||||
| 	e->lc_number = lc->new_number; | ||||
| 	list_add(&e->list, &lc->in_use); | ||||
| 	hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); | ||||
| 	lc->changing_element = NULL; | ||||
| 	lc->new_number = LC_FREE; | ||||
| 	clear_bit(__LC_DIRTY, &lc->flags); | ||||
| 	smp_mb__after_clear_bit(); | ||||
| 		e->lc_number = e->lc_new_number; | ||||
| 		list_move(&e->list, &lc->in_use); | ||||
| 	} | ||||
| 	lc->pending_changes = 0; | ||||
| 	RETURN(); | ||||
| } | ||||
| 
 | ||||
|  | @ -458,13 +540,12 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) | |||
| 	PARANOIA_ENTRY(); | ||||
| 	PARANOIA_LC_ELEMENT(lc, e); | ||||
| 	BUG_ON(e->refcnt == 0); | ||||
| 	BUG_ON(e == lc->changing_element); | ||||
| 	BUG_ON(e->lc_number != e->lc_new_number); | ||||
| 	if (--e->refcnt == 0) { | ||||
| 		/* move it to the front of LRU. */ | ||||
| 		list_move(&e->list, &lc->lru); | ||||
| 		lc->used--; | ||||
| 		clear_bit(__LC_STARVING, &lc->flags); | ||||
| 		smp_mb__after_clear_bit(); | ||||
| 		clear_bit_unlock(__LC_STARVING, &lc->flags); | ||||
| 	} | ||||
| 	RETURN(e->refcnt); | ||||
| } | ||||
|  | @ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) | |||
| void lc_set(struct lru_cache *lc, unsigned int enr, int index) | ||||
| { | ||||
| 	struct lc_element *e; | ||||
| 	struct list_head *lh; | ||||
| 
 | ||||
| 	if (index < 0 || index >= lc->nr_elements) | ||||
| 		return; | ||||
| 
 | ||||
| 	e = lc_element_by_index(lc, index); | ||||
| 	e->lc_number = enr; | ||||
| 	BUG_ON(e->lc_number != e->lc_new_number); | ||||
| 	BUG_ON(e->refcnt != 0); | ||||
| 
 | ||||
| 	e->lc_number = e->lc_new_number = enr; | ||||
| 	hlist_del_init(&e->colision); | ||||
| 	if (enr == LC_FREE) | ||||
| 		lh = &lc->free; | ||||
| 	else { | ||||
| 		hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); | ||||
| 	list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); | ||||
| 		lh = &lc->lru; | ||||
| 	} | ||||
| 	list_move(&e->list, lh); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  | @ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get); | |||
| EXPORT_SYMBOL(lc_find); | ||||
| EXPORT_SYMBOL(lc_get); | ||||
| EXPORT_SYMBOL(lc_put); | ||||
| EXPORT_SYMBOL(lc_changed); | ||||
| EXPORT_SYMBOL(lc_committed); | ||||
| EXPORT_SYMBOL(lc_element_by_index); | ||||
| EXPORT_SYMBOL(lc_index_of); | ||||
| EXPORT_SYMBOL(lc_seq_printf_stats); | ||||
| EXPORT_SYMBOL(lc_seq_dump_details); | ||||
| EXPORT_SYMBOL(lc_try_lock); | ||||
| EXPORT_SYMBOL(lc_is_used); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Philipp Reisner
				Philipp Reisner