| 
									
										
										
										
											2011-07-30 20:52:41 -04:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  *  linux/fs/nfs/blocklayout/blocklayout.h | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  Module for the NFSv4.1 pNFS block layout driver. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  Copyright (c) 2006 The Regents of the University of Michigan. | 
					
						
							|  |  |  |  *  All rights reserved. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  Andy Adamson <andros@citi.umich.edu> | 
					
						
							|  |  |  |  *  Fred Isaman <iisaman@umich.edu> | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * permission is granted to use, copy, create derivative works and | 
					
						
							|  |  |  |  * redistribute this software and such derivative works for any purpose, | 
					
						
							|  |  |  |  * so long as the name of the university of michigan is not used in | 
					
						
							|  |  |  |  * any advertising or publicity pertaining to the use or distribution | 
					
						
							|  |  |  |  * of this software without specific, written prior authorization.  if | 
					
						
							|  |  |  |  * the above copyright notice or any other identification of the | 
					
						
							|  |  |  |  * university of michigan is included in any copy of any portion of | 
					
						
							|  |  |  |  * this software, then the disclaimer below must also be included. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * this software is provided as is, without representation from the | 
					
						
							|  |  |  |  * university of michigan as to its fitness for any purpose, and without | 
					
						
							|  |  |  |  * warranty by the university of michigan of any kind, either express | 
					
						
							|  |  |  |  * or implied, including without limitation the implied warranties of | 
					
						
							|  |  |  |  * merchantability and fitness for a particular purpose.  the regents | 
					
						
							|  |  |  |  * of the university of michigan shall not be liable for any damages, | 
					
						
							|  |  |  |  * including special, indirect, incidental, or consequential damages, | 
					
						
							|  |  |  |  * with respect to any claim arising out or in connection with the use | 
					
						
							|  |  |  |  * of the software, even if it has been or is hereafter advised of the | 
					
						
							|  |  |  |  * possibility of such damages. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "blocklayout.h"
 | 
					
						
							|  |  |  | #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:49 -04:00
										 |  |  | /* Bit numbers */ | 
					
						
							|  |  |  | #define EXTENT_INITIALIZED 0
 | 
					
						
							|  |  |  | #define EXTENT_WRITTEN     1
 | 
					
						
							|  |  |  | #define EXTENT_IN_COMMIT   2
 | 
					
						
							|  |  |  | #define INTERNAL_EXISTS    MY_MAX_TAGS
 | 
					
						
							|  |  |  | #define INTERNAL_MASK      ((1 << INTERNAL_EXISTS) - 1)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Returns largest t<=s s.t. t%base==0 */ | 
					
						
							|  |  |  | static inline sector_t normalize(sector_t s, int base) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	sector_t tmp = s; /* Since do_div modifies its argument */ | 
					
						
							|  |  |  | 	return s - do_div(tmp, base); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline sector_t normalize_up(sector_t s, int base) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return normalize(s + base - 1, base); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Complete stub using list while determine API wanted */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Returns tags, or negative */ | 
					
						
							|  |  |  | static int32_t _find_entry(struct my_tree *tree, u64 s) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct pnfs_inval_tracking *pos; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s(%llu) enter\n", __func__, s); | 
					
						
							|  |  |  | 	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | 
					
						
							|  |  |  | 		if (pos->it_sector > s) | 
					
						
							|  |  |  | 			continue; | 
					
						
							|  |  |  | 		else if (pos->it_sector == s) | 
					
						
							|  |  |  | 			return pos->it_tags & INTERNAL_MASK; | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return -ENOENT; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline | 
					
						
							|  |  |  | int _has_tag(struct my_tree *tree, u64 s, int32_t tag) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int32_t tags; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s(%llu, %i) enter\n", __func__, s, tag); | 
					
						
							|  |  |  | 	s = normalize(s, tree->mtt_step_size); | 
					
						
							|  |  |  | 	tags = _find_entry(tree, s); | 
					
						
							|  |  |  | 	if ((tags < 0) || !(tags & (1 << tag))) | 
					
						
							|  |  |  | 		return 0; | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 		return 1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Creates entry with tag, or if entry already exists, unions tag to it.
 | 
					
						
							|  |  |  |  * If storage is not NULL, newly created entry will use it. | 
					
						
							|  |  |  |  * Returns number of entries added, or negative on error. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static int _add_entry(struct my_tree *tree, u64 s, int32_t tag, | 
					
						
							|  |  |  | 		      struct pnfs_inval_tracking *storage) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int found = 0; | 
					
						
							|  |  |  | 	struct pnfs_inval_tracking *pos; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage); | 
					
						
							|  |  |  | 	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | 
					
						
							|  |  |  | 		if (pos->it_sector > s) | 
					
						
							|  |  |  | 			continue; | 
					
						
							|  |  |  | 		else if (pos->it_sector == s) { | 
					
						
							|  |  |  | 			found = 1; | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		} else | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if (found) { | 
					
						
							|  |  |  | 		pos->it_tags |= (1 << tag); | 
					
						
							|  |  |  | 		return 0; | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		struct pnfs_inval_tracking *new; | 
					
						
							|  |  |  | 		if (storage) | 
					
						
							|  |  |  | 			new = storage; | 
					
						
							|  |  |  | 		else { | 
					
						
							|  |  |  | 			new = kmalloc(sizeof(*new), GFP_NOFS); | 
					
						
							|  |  |  | 			if (!new) | 
					
						
							|  |  |  | 				return -ENOMEM; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		new->it_sector = s; | 
					
						
							|  |  |  | 		new->it_tags = (1 << tag); | 
					
						
							|  |  |  | 		list_add(&new->it_link, &pos->it_link); | 
					
						
							|  |  |  | 		return 1; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* XXXX Really want option to not create */ | 
					
						
							|  |  |  | /* Over range, unions tag with existing entries, else creates entry with tag */ | 
					
						
							|  |  |  | static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	u64 i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length); | 
					
						
							|  |  |  | 	for (i = normalize(s, tree->mtt_step_size); i < s + length; | 
					
						
							|  |  |  | 	     i += tree->mtt_step_size) | 
					
						
							|  |  |  | 		if (_add_entry(tree, i, tag, NULL)) | 
					
						
							|  |  |  | 			return -ENOMEM; | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Ensure that future operations on given range of tree will not malloc */ | 
					
						
							|  |  |  | static int _preload_range(struct my_tree *tree, u64 offset, u64 length) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	u64 start, end, s; | 
					
						
							|  |  |  | 	int count, i, used = 0, status = -ENOMEM; | 
					
						
							|  |  |  | 	struct pnfs_inval_tracking **storage; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); | 
					
						
							|  |  |  | 	start = normalize(offset, tree->mtt_step_size); | 
					
						
							|  |  |  | 	end = normalize_up(offset + length, tree->mtt_step_size); | 
					
						
							|  |  |  | 	count = (int)(end - start) / (int)tree->mtt_step_size; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Pre-malloc what memory we might need */ | 
					
						
							|  |  |  | 	storage = kmalloc(sizeof(*storage) * count, GFP_NOFS); | 
					
						
							|  |  |  | 	if (!storage) | 
					
						
							|  |  |  | 		return -ENOMEM; | 
					
						
							|  |  |  | 	for (i = 0; i < count; i++) { | 
					
						
							|  |  |  | 		storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking), | 
					
						
							|  |  |  | 				     GFP_NOFS); | 
					
						
							|  |  |  | 		if (!storage[i]) | 
					
						
							|  |  |  | 			goto out_cleanup; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Now need lock - HOW??? */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for (s = start; s < end; s += tree->mtt_step_size) | 
					
						
							|  |  |  | 		used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Unlock - HOW??? */ | 
					
						
							|  |  |  | 	status = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  out_cleanup: | 
					
						
							|  |  |  | 	for (i = used; i < count; i++) { | 
					
						
							|  |  |  | 		if (!storage[i]) | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		kfree(storage[i]); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	kfree(storage); | 
					
						
							|  |  |  | 	return status; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void set_needs_init(sector_t *array, sector_t offset) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	sector_t *p = array; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s enter\n", __func__); | 
					
						
							|  |  |  | 	if (!p) | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	while (*p < offset) | 
					
						
							|  |  |  | 		p++; | 
					
						
							|  |  |  | 	if (*p == offset) | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	else if (*p == ~0) { | 
					
						
							|  |  |  | 		*p++ = offset; | 
					
						
							|  |  |  | 		*p = ~0; | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		sector_t *save = p; | 
					
						
							|  |  |  | 		dprintk("%s Adding %llu\n", __func__, (u64)offset); | 
					
						
							|  |  |  | 		while (*p != ~0) | 
					
						
							|  |  |  | 			p++; | 
					
						
							|  |  |  | 		p++; | 
					
						
							|  |  |  | 		memmove(save + 1, save, (char *)p - (char *)save); | 
					
						
							|  |  |  | 		*save = offset; | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* We are relying on page lock to serialize this */ | 
					
						
							|  |  |  | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int rv; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	spin_lock(&marks->im_lock); | 
					
						
							|  |  |  | 	rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); | 
					
						
							|  |  |  | 	spin_unlock(&marks->im_lock); | 
					
						
							|  |  |  | 	return rv; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:55 -04:00
										 |  |  | /* Assume start, end already sector aligned */ | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | _range_has_tag(struct my_tree *tree, u64 start, u64 end, int32_t tag) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct pnfs_inval_tracking *pos; | 
					
						
							|  |  |  | 	u64 expect = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag); | 
					
						
							|  |  |  | 	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | 
					
						
							|  |  |  | 		if (pos->it_sector >= end) | 
					
						
							|  |  |  | 			continue; | 
					
						
							|  |  |  | 		if (!expect) { | 
					
						
							|  |  |  | 			if ((pos->it_sector == end - tree->mtt_step_size) && | 
					
						
							|  |  |  | 			    (pos->it_tags & (1 << tag))) { | 
					
						
							|  |  |  | 				expect = pos->it_sector - tree->mtt_step_size; | 
					
						
							|  |  |  | 				if (pos->it_sector < tree->mtt_step_size || expect < start) | 
					
						
							|  |  |  | 					return 1; | 
					
						
							|  |  |  | 				continue; | 
					
						
							|  |  |  | 			} else { | 
					
						
							|  |  |  | 				return 0; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if (pos->it_sector != expect || !(pos->it_tags & (1 << tag))) | 
					
						
							|  |  |  | 			return 0; | 
					
						
							|  |  |  | 		expect -= tree->mtt_step_size; | 
					
						
							|  |  |  | 		if (expect < start) | 
					
						
							|  |  |  | 			return 1; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int is_range_written(struct pnfs_inval_markings *marks, | 
					
						
							|  |  |  | 			    sector_t start, sector_t end) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int rv; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	spin_lock(&marks->im_lock); | 
					
						
							|  |  |  | 	rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); | 
					
						
							|  |  |  | 	spin_unlock(&marks->im_lock); | 
					
						
							|  |  |  | 	return rv; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:49 -04:00
										 |  |  | /* Marks sectors in [offest, offset_length) as having been initialized.
 | 
					
						
							|  |  |  |  * All lengths are step-aligned, where step is min(pagesize, blocksize). | 
					
						
							|  |  |  |  * Notes where partial block is initialized, and helps prepare it for | 
					
						
							|  |  |  |  * complete initialization later. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | /* Currently assumes offset is page-aligned */ | 
					
						
							|  |  |  | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | 
					
						
							|  |  |  | 			     sector_t offset, sector_t length, | 
					
						
							|  |  |  | 			     sector_t **pages) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	sector_t s, start, end; | 
					
						
							|  |  |  | 	sector_t *array = NULL; /* Pages to mark */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s(offset=%llu,len=%llu) enter\n", | 
					
						
							|  |  |  | 		__func__, (u64)offset, (u64)length); | 
					
						
							|  |  |  | 	s = max((sector_t) 3, | 
					
						
							|  |  |  | 		2 * (marks->im_block_size / (PAGE_CACHE_SECTORS))); | 
					
						
							|  |  |  | 	dprintk("%s set max=%llu\n", __func__, (u64)s); | 
					
						
							|  |  |  | 	if (pages) { | 
					
						
							|  |  |  | 		array = kmalloc(s * sizeof(sector_t), GFP_NOFS); | 
					
						
							|  |  |  | 		if (!array) | 
					
						
							|  |  |  | 			goto outerr; | 
					
						
							|  |  |  | 		array[0] = ~0; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	start = normalize(offset, marks->im_block_size); | 
					
						
							|  |  |  | 	end = normalize_up(offset + length, marks->im_block_size); | 
					
						
							|  |  |  | 	if (_preload_range(&marks->im_tree, start, end - start)) | 
					
						
							|  |  |  | 		goto outerr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	spin_lock(&marks->im_lock); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for (s = normalize_up(start, PAGE_CACHE_SECTORS); | 
					
						
							|  |  |  | 	     s < offset; s += PAGE_CACHE_SECTORS) { | 
					
						
							|  |  |  | 		dprintk("%s pre-area pages\n", __func__); | 
					
						
							|  |  |  | 		/* Portion of used block is not initialized */ | 
					
						
							|  |  |  | 		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) | 
					
						
							|  |  |  | 			set_needs_init(array, s); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) | 
					
						
							|  |  |  | 		goto out_unlock; | 
					
						
							|  |  |  | 	for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS); | 
					
						
							|  |  |  | 	     s < end; s += PAGE_CACHE_SECTORS) { | 
					
						
							|  |  |  | 		dprintk("%s post-area pages\n", __func__); | 
					
						
							|  |  |  | 		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) | 
					
						
							|  |  |  | 			set_needs_init(array, s); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	spin_unlock(&marks->im_lock); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (pages) { | 
					
						
							|  |  |  | 		if (array[0] == ~0) { | 
					
						
							|  |  |  | 			kfree(array); | 
					
						
							|  |  |  | 			*pages = NULL; | 
					
						
							|  |  |  | 		} else | 
					
						
							|  |  |  | 			*pages = array; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  out_unlock: | 
					
						
							|  |  |  | 	spin_unlock(&marks->im_lock); | 
					
						
							|  |  |  |  outerr: | 
					
						
							|  |  |  | 	if (pages) { | 
					
						
							|  |  |  | 		kfree(array); | 
					
						
							|  |  |  | 		*pages = NULL; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return -ENOMEM; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:51 -04:00
										 |  |  | /* Marks sectors in [offest, offset+length) as having been written to disk.
 | 
					
						
							|  |  |  |  * All lengths should be block aligned. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static int mark_written_sectors(struct pnfs_inval_markings *marks, | 
					
						
							|  |  |  | 				sector_t offset, sector_t length) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int status; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, | 
					
						
							|  |  |  | 		(u64)offset, (u64)length); | 
					
						
							|  |  |  | 	spin_lock(&marks->im_lock); | 
					
						
							|  |  |  | 	status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length); | 
					
						
							|  |  |  | 	spin_unlock(&marks->im_lock); | 
					
						
							|  |  |  | 	return status; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void print_short_extent(struct pnfs_block_short_extent *be) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	dprintk("PRINT SHORT EXTENT extent %p\n", be); | 
					
						
							|  |  |  | 	if (be) { | 
					
						
							|  |  |  | 		dprintk("        be_f_offset %llu\n", (u64)be->bse_f_offset); | 
					
						
							|  |  |  | 		dprintk("        be_length   %llu\n", (u64)be->bse_length); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void print_clist(struct list_head *list, unsigned int count) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct pnfs_block_short_extent *be; | 
					
						
							|  |  |  | 	unsigned int i = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	ifdebug(FACILITY) { | 
					
						
							|  |  |  | 		printk(KERN_DEBUG "****************\n"); | 
					
						
							|  |  |  | 		printk(KERN_DEBUG "Extent list looks like:\n"); | 
					
						
							|  |  |  | 		list_for_each_entry(be, list, bse_node) { | 
					
						
							|  |  |  | 			i++; | 
					
						
							|  |  |  | 			print_short_extent(be); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if (i != count) | 
					
						
							|  |  |  | 			printk(KERN_DEBUG "\n\nExpected %u entries\n\n\n", count); | 
					
						
							|  |  |  | 		printk(KERN_DEBUG "****************\n"); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:52 -04:00
										 |  |  | /* Note: In theory, we should do more checking that devid's match between
 | 
					
						
							|  |  |  |  * old and new, but if they don't, the lists are too corrupt to salvage anyway. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | /* Note this is very similar to bl_add_merge_extent */ | 
					
						
							|  |  |  | static void add_to_commitlist(struct pnfs_block_layout *bl, | 
					
						
							|  |  |  | 			      struct pnfs_block_short_extent *new) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct list_head *clist = &bl->bl_commit; | 
					
						
							|  |  |  | 	struct pnfs_block_short_extent *old, *save; | 
					
						
							|  |  |  | 	sector_t end = new->bse_f_offset + new->bse_length; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s enter\n", __func__); | 
					
						
							|  |  |  | 	print_short_extent(new); | 
					
						
							|  |  |  | 	print_clist(clist, bl->bl_count); | 
					
						
							|  |  |  | 	bl->bl_count++; | 
					
						
							|  |  |  | 	/* Scan for proper place to insert, extending new to the left
 | 
					
						
							|  |  |  | 	 * as much as possible. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	list_for_each_entry_safe(old, save, clist, bse_node) { | 
					
						
							|  |  |  | 		if (new->bse_f_offset < old->bse_f_offset) | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		if (end <= old->bse_f_offset + old->bse_length) { | 
					
						
							|  |  |  | 			/* Range is already in list */ | 
					
						
							|  |  |  | 			bl->bl_count--; | 
					
						
							|  |  |  | 			kfree(new); | 
					
						
							|  |  |  | 			return; | 
					
						
							|  |  |  | 		} else if (new->bse_f_offset <= | 
					
						
							|  |  |  | 				old->bse_f_offset + old->bse_length) { | 
					
						
							|  |  |  | 			/* new overlaps or abuts existing be */ | 
					
						
							|  |  |  | 			if (new->bse_mdev == old->bse_mdev) { | 
					
						
							|  |  |  | 				/* extend new to fully replace old */ | 
					
						
							|  |  |  | 				new->bse_length += new->bse_f_offset - | 
					
						
							|  |  |  | 						old->bse_f_offset; | 
					
						
							|  |  |  | 				new->bse_f_offset = old->bse_f_offset; | 
					
						
							|  |  |  | 				list_del(&old->bse_node); | 
					
						
							|  |  |  | 				bl->bl_count--; | 
					
						
							|  |  |  | 				kfree(old); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	/* Note that if we never hit the above break, old will not point to a
 | 
					
						
							|  |  |  | 	 * valid extent.  However, in that case &old->bse_node==list. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	list_add_tail(&new->bse_node, &old->bse_node); | 
					
						
							|  |  |  | 	/* Scan forward for overlaps.  If we find any, extend new and
 | 
					
						
							|  |  |  | 	 * remove the overlapped extent. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	old = list_prepare_entry(new, clist, bse_node); | 
					
						
							|  |  |  | 	list_for_each_entry_safe_continue(old, save, clist, bse_node) { | 
					
						
							|  |  |  | 		if (end < old->bse_f_offset) | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		/* new overlaps or abuts old */ | 
					
						
							|  |  |  | 		if (new->bse_mdev == old->bse_mdev) { | 
					
						
							|  |  |  | 			if (end < old->bse_f_offset + old->bse_length) { | 
					
						
							|  |  |  | 				/* extend new to fully cover old */ | 
					
						
							|  |  |  | 				end = old->bse_f_offset + old->bse_length; | 
					
						
							|  |  |  | 				new->bse_length = end - new->bse_f_offset; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			list_del(&old->bse_node); | 
					
						
							|  |  |  | 			bl->bl_count--; | 
					
						
							|  |  |  | 			kfree(old); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	dprintk("%s: after merging\n", __func__); | 
					
						
							|  |  |  | 	print_clist(clist, bl->bl_count); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:55 -04:00
										 |  |  | /* Note the range described by offset, length is guaranteed to be contained
 | 
					
						
							|  |  |  |  * within be. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int bl_mark_for_commit(struct pnfs_block_extent *be, | 
					
						
							|  |  |  | 		    sector_t offset, sector_t length) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	sector_t new_end, end = offset + length; | 
					
						
							|  |  |  | 	struct pnfs_block_short_extent *new; | 
					
						
							|  |  |  | 	struct pnfs_block_layout *bl = container_of(be->be_inval, | 
					
						
							|  |  |  | 						    struct pnfs_block_layout, | 
					
						
							|  |  |  | 						    bl_inval); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	new = kmalloc(sizeof(*new), GFP_NOFS); | 
					
						
							|  |  |  | 	if (!new) | 
					
						
							|  |  |  | 		return -ENOMEM; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	mark_written_sectors(be->be_inval, offset, length); | 
					
						
							|  |  |  | 	/* We want to add the range to commit list, but it must be
 | 
					
						
							|  |  |  | 	 * block-normalized, and verified that the normalized range has | 
					
						
							|  |  |  | 	 * been entirely written to disk. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	new->bse_f_offset = offset; | 
					
						
							|  |  |  | 	offset = normalize(offset, bl->bl_blocksize); | 
					
						
							|  |  |  | 	if (offset < new->bse_f_offset) { | 
					
						
							|  |  |  | 		if (is_range_written(be->be_inval, offset, new->bse_f_offset)) | 
					
						
							|  |  |  | 			new->bse_f_offset = offset; | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 			new->bse_f_offset = offset + bl->bl_blocksize; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	new_end = normalize_up(end, bl->bl_blocksize); | 
					
						
							|  |  |  | 	if (end < new_end) { | 
					
						
							|  |  |  | 		if (is_range_written(be->be_inval, end, new_end)) | 
					
						
							|  |  |  | 			end = new_end; | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 			end = new_end - bl->bl_blocksize; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if (end <= new->bse_f_offset) { | 
					
						
							|  |  |  | 		kfree(new); | 
					
						
							|  |  |  | 		return 0; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	new->bse_length = end - new->bse_f_offset; | 
					
						
							|  |  |  | 	new->bse_devid = be->be_devid; | 
					
						
							|  |  |  | 	new->bse_mdev = be->be_mdev; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	spin_lock(&bl->bl_ext_lock); | 
					
						
							|  |  |  | 	/* new will be freed, either by add_to_commitlist if it decides not
 | 
					
						
							|  |  |  | 	 * to use it, or after LAYOUTCOMMIT uses it in the commitlist. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	add_to_commitlist(bl, new); | 
					
						
							|  |  |  | 	spin_unlock(&bl->bl_ext_lock); | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:41 -04:00
										 |  |  | static void print_bl_extent(struct pnfs_block_extent *be) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	dprintk("PRINT EXTENT extent %p\n", be); | 
					
						
							|  |  |  | 	if (be) { | 
					
						
							|  |  |  | 		dprintk("        be_f_offset %llu\n", (u64)be->be_f_offset); | 
					
						
							|  |  |  | 		dprintk("        be_length   %llu\n", (u64)be->be_length); | 
					
						
							|  |  |  | 		dprintk("        be_v_offset %llu\n", (u64)be->be_v_offset); | 
					
						
							|  |  |  | 		dprintk("        be_state    %d\n", be->be_state); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | destroy_extent(struct kref *kref) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct pnfs_block_extent *be; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	be = container_of(kref, struct pnfs_block_extent, be_refcnt); | 
					
						
							|  |  |  | 	dprintk("%s be=%p\n", __func__, be); | 
					
						
							|  |  |  | 	kfree(be); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | bl_put_extent(struct pnfs_block_extent *be) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if (be) { | 
					
						
							|  |  |  | 		dprintk("%s enter %p (%i)\n", __func__, be, | 
					
						
							|  |  |  | 			atomic_read(&be->be_refcnt.refcount)); | 
					
						
							|  |  |  | 		kref_put(&be->be_refcnt, destroy_extent); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct pnfs_block_extent *bl_alloc_extent(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct pnfs_block_extent *be; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	be = kmalloc(sizeof(struct pnfs_block_extent), GFP_NOFS); | 
					
						
							|  |  |  | 	if (!be) | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							|  |  |  | 	INIT_LIST_HEAD(&be->be_node); | 
					
						
							|  |  |  | 	kref_init(&be->be_refcnt); | 
					
						
							|  |  |  | 	be->be_inval = NULL; | 
					
						
							|  |  |  | 	return be; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void print_elist(struct list_head *list) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct pnfs_block_extent *be; | 
					
						
							|  |  |  | 	dprintk("****************\n"); | 
					
						
							|  |  |  | 	dprintk("Extent list looks like:\n"); | 
					
						
							|  |  |  | 	list_for_each_entry(be, list, be_node) { | 
					
						
							|  |  |  | 		print_bl_extent(be); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	dprintk("****************\n"); | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:45 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | static inline int | 
					
						
							|  |  |  | extents_consistent(struct pnfs_block_extent *old, struct pnfs_block_extent *new) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	/* Note this assumes new->be_f_offset >= old->be_f_offset */ | 
					
						
							|  |  |  | 	return (new->be_state == old->be_state) && | 
					
						
							|  |  |  | 		((new->be_state == PNFS_BLOCK_NONE_DATA) || | 
					
						
							|  |  |  | 		 ((new->be_v_offset - old->be_v_offset == | 
					
						
							|  |  |  | 		   new->be_f_offset - old->be_f_offset) && | 
					
						
							|  |  |  | 		  new->be_mdev == old->be_mdev)); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Adds new to appropriate list in bl, modifying new and removing existing
 | 
					
						
							|  |  |  |  * extents as appropriate to deal with overlaps. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * See bl_find_get_extent for list constraints. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Refcount on new is already set.  If end up not using it, or error out, | 
					
						
							|  |  |  |  * need to put the reference. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * bl->bl_ext_lock is held by caller. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | bl_add_merge_extent(struct pnfs_block_layout *bl, | 
					
						
							|  |  |  | 		     struct pnfs_block_extent *new) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct pnfs_block_extent *be, *tmp; | 
					
						
							|  |  |  | 	sector_t end = new->be_f_offset + new->be_length; | 
					
						
							|  |  |  | 	struct list_head *list; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s enter with be=%p\n", __func__, new); | 
					
						
							|  |  |  | 	print_bl_extent(new); | 
					
						
							|  |  |  | 	list = &bl->bl_extents[bl_choose_list(new->be_state)]; | 
					
						
							|  |  |  | 	print_elist(list); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Scan for proper place to insert, extending new to the left
 | 
					
						
							|  |  |  | 	 * as much as possible. | 
					
						
							|  |  |  | 	 */ | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:51 -04:00
										 |  |  | 	list_for_each_entry_safe_reverse(be, tmp, list, be_node) { | 
					
						
							|  |  |  | 		if (new->be_f_offset >= be->be_f_offset + be->be_length) | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:45 -04:00
										 |  |  | 			break; | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:51 -04:00
										 |  |  | 		if (new->be_f_offset >= be->be_f_offset) { | 
					
						
							|  |  |  | 			if (end <= be->be_f_offset + be->be_length) { | 
					
						
							|  |  |  | 				/* new is a subset of existing be*/ | 
					
						
							|  |  |  | 				if (extents_consistent(be, new)) { | 
					
						
							|  |  |  | 					dprintk("%s: new is subset, ignoring\n", | 
					
						
							|  |  |  | 						__func__); | 
					
						
							|  |  |  | 					bl_put_extent(new); | 
					
						
							|  |  |  | 					return 0; | 
					
						
							|  |  |  | 				} else { | 
					
						
							|  |  |  | 					goto out_err; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} else { | 
					
						
							|  |  |  | 				/* |<--   be   -->|
 | 
					
						
							|  |  |  | 				 *          |<--   new   -->| */ | 
					
						
							|  |  |  | 				if (extents_consistent(be, new)) { | 
					
						
							|  |  |  | 					/* extend new to fully replace be */ | 
					
						
							|  |  |  | 					new->be_length += new->be_f_offset - | 
					
						
							|  |  |  | 						be->be_f_offset; | 
					
						
							|  |  |  | 					new->be_f_offset = be->be_f_offset; | 
					
						
							|  |  |  | 					new->be_v_offset = be->be_v_offset; | 
					
						
							|  |  |  | 					dprintk("%s: removing %p\n", __func__, be); | 
					
						
							|  |  |  | 					list_del(&be->be_node); | 
					
						
							|  |  |  | 					bl_put_extent(be); | 
					
						
							|  |  |  | 				} else { | 
					
						
							|  |  |  | 					goto out_err; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} else if (end >= be->be_f_offset + be->be_length) { | 
					
						
							|  |  |  | 			/* new extent overlap existing be */ | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:45 -04:00
										 |  |  | 			if (extents_consistent(be, new)) { | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:51 -04:00
										 |  |  | 				/* extend new to fully replace be */ | 
					
						
							|  |  |  | 				dprintk("%s: removing %p\n", __func__, be); | 
					
						
							|  |  |  | 				list_del(&be->be_node); | 
					
						
							|  |  |  | 				bl_put_extent(be); | 
					
						
							|  |  |  | 			} else { | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:45 -04:00
										 |  |  | 				goto out_err; | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:51 -04:00
										 |  |  | 			} | 
					
						
							|  |  |  | 		} else if (end > be->be_f_offset) { | 
					
						
							|  |  |  | 			/*           |<--   be   -->|
 | 
					
						
							|  |  |  | 			 *|<--   new   -->| */ | 
					
						
							|  |  |  | 			if (extents_consistent(new, be)) { | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:45 -04:00
										 |  |  | 				/* extend new to fully replace be */ | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:51 -04:00
										 |  |  | 				new->be_length += be->be_f_offset + be->be_length - | 
					
						
							|  |  |  | 					new->be_f_offset - new->be_length; | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:45 -04:00
										 |  |  | 				dprintk("%s: removing %p\n", __func__, be); | 
					
						
							|  |  |  | 				list_del(&be->be_node); | 
					
						
							|  |  |  | 				bl_put_extent(be); | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:51 -04:00
										 |  |  | 			} else { | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:45 -04:00
										 |  |  | 				goto out_err; | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:51 -04:00
										 |  |  | 			} | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:45 -04:00
										 |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	/* Note that if we never hit the above break, be will not point to a
 | 
					
						
							|  |  |  | 	 * valid extent.  However, in that case &be->be_node==list. | 
					
						
							|  |  |  | 	 */ | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:51 -04:00
										 |  |  | 	list_add(&new->be_node, &be->be_node); | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:45 -04:00
										 |  |  | 	dprintk("%s: inserting new\n", __func__); | 
					
						
							|  |  |  | 	print_elist(list); | 
					
						
							|  |  |  | 	/* FIXME - The per-list consistency checks have all been done,
 | 
					
						
							|  |  |  | 	 * should now check cross-list consistency. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  out_err: | 
					
						
							|  |  |  | 	bl_put_extent(new); | 
					
						
							|  |  |  | 	return -EIO; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:48 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | /* Returns extent, or NULL.  If a second READ extent exists, it is returned
 | 
					
						
							|  |  |  |  * in cow_read, if given. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The extents are kept in two seperate ordered lists, one for READ and NONE, | 
					
						
							|  |  |  |  * one for READWRITE and INVALID.  Within each list, we assume: | 
					
						
							|  |  |  |  * 1. Extents are ordered by file offset. | 
					
						
							|  |  |  |  * 2. For any given isect, there is at most one extents that matches. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct pnfs_block_extent * | 
					
						
							|  |  |  | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, | 
					
						
							|  |  |  | 	    struct pnfs_block_extent **cow_read) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct pnfs_block_extent *be, *cow, *ret; | 
					
						
							|  |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s enter with isect %llu\n", __func__, (u64)isect); | 
					
						
							|  |  |  | 	cow = ret = NULL; | 
					
						
							|  |  |  | 	spin_lock(&bl->bl_ext_lock); | 
					
						
							|  |  |  | 	for (i = 0; i < EXTENT_LISTS; i++) { | 
					
						
							|  |  |  | 		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { | 
					
						
							|  |  |  | 			if (isect >= be->be_f_offset + be->be_length) | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			if (isect >= be->be_f_offset) { | 
					
						
							|  |  |  | 				/* We have found an extent */ | 
					
						
							|  |  |  | 				dprintk("%s Get %p (%i)\n", __func__, be, | 
					
						
							|  |  |  | 					atomic_read(&be->be_refcnt.refcount)); | 
					
						
							|  |  |  | 				kref_get(&be->be_refcnt); | 
					
						
							|  |  |  | 				if (!ret) | 
					
						
							|  |  |  | 					ret = be; | 
					
						
							|  |  |  | 				else if (be->be_state != PNFS_BLOCK_READ_DATA) | 
					
						
							|  |  |  | 					bl_put_extent(be); | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 					cow = be; | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if (ret && | 
					
						
							|  |  |  | 		    (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA)) | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	spin_unlock(&bl->bl_ext_lock); | 
					
						
							|  |  |  | 	if (cow_read) | 
					
						
							|  |  |  | 		*cow_read = cow; | 
					
						
							|  |  |  | 	print_bl_extent(ret); | 
					
						
							|  |  |  | 	return ret; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:50 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:52 -04:00
										 |  |  | /* Similar to bl_find_get_extent, but called with lock held, and ignores cow */ | 
					
						
							|  |  |  | static struct pnfs_block_extent * | 
					
						
							|  |  |  | bl_find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct pnfs_block_extent *be, *ret = NULL; | 
					
						
							|  |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s enter with isect %llu\n", __func__, (u64)isect); | 
					
						
							|  |  |  | 	for (i = 0; i < EXTENT_LISTS; i++) { | 
					
						
							|  |  |  | 		if (ret) | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { | 
					
						
							|  |  |  | 			if (isect >= be->be_f_offset + be->be_length) | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			if (isect >= be->be_f_offset) { | 
					
						
							|  |  |  | 				/* We have found an extent */ | 
					
						
							|  |  |  | 				dprintk("%s Get %p (%i)\n", __func__, be, | 
					
						
							|  |  |  | 					atomic_read(&be->be_refcnt.refcount)); | 
					
						
							|  |  |  | 				kref_get(&be->be_refcnt); | 
					
						
							|  |  |  | 				ret = be; | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	print_bl_extent(ret); | 
					
						
							|  |  |  | 	return ret; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:51 -04:00
										 |  |  | int | 
					
						
							|  |  |  | encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | 
					
						
							|  |  |  | 			       struct xdr_stream *xdr, | 
					
						
							|  |  |  | 			       const struct nfs4_layoutcommit_args *arg) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct pnfs_block_short_extent *lce, *save; | 
					
						
							|  |  |  | 	unsigned int count = 0; | 
					
						
							|  |  |  | 	__be32 *p, *xdr_start; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s enter\n", __func__); | 
					
						
							|  |  |  | 	/* BUG - creation of bl_commit is buggy - need to wait for
 | 
					
						
							|  |  |  | 	 * entire block to be marked WRITTEN before it can be added. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	spin_lock(&bl->bl_ext_lock); | 
					
						
							|  |  |  | 	/* Want to adjust for possible truncate */ | 
					
						
							|  |  |  | 	/* We now want to adjust argument range */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* XDR encode the ranges found */ | 
					
						
							|  |  |  | 	xdr_start = xdr_reserve_space(xdr, 8); | 
					
						
							|  |  |  | 	if (!xdr_start) | 
					
						
							|  |  |  | 		goto out; | 
					
						
							|  |  |  | 	list_for_each_entry_safe(lce, save, &bl->bl_commit, bse_node) { | 
					
						
							|  |  |  | 		p = xdr_reserve_space(xdr, 7 * 4 + sizeof(lce->bse_devid.data)); | 
					
						
							|  |  |  | 		if (!p) | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		p = xdr_encode_opaque_fixed(p, lce->bse_devid.data, NFS4_DEVICEID4_SIZE); | 
					
						
							|  |  |  | 		p = xdr_encode_hyper(p, lce->bse_f_offset << SECTOR_SHIFT); | 
					
						
							|  |  |  | 		p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT); | 
					
						
							|  |  |  | 		p = xdr_encode_hyper(p, 0LL); | 
					
						
							|  |  |  | 		*p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); | 
					
						
							|  |  |  | 		list_del(&lce->bse_node); | 
					
						
							|  |  |  | 		list_add_tail(&lce->bse_node, &bl->bl_committing); | 
					
						
							|  |  |  | 		bl->bl_count--; | 
					
						
							|  |  |  | 		count++; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4); | 
					
						
							|  |  |  | 	xdr_start[1] = cpu_to_be32(count); | 
					
						
							|  |  |  | out: | 
					
						
							|  |  |  | 	spin_unlock(&bl->bl_ext_lock); | 
					
						
							|  |  |  | 	dprintk("%s found %i ranges\n", __func__, count); | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:50 -04:00
										 |  |  | /* Helper function to set_to_rw that initialize a new extent */ | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | _prep_new_extent(struct pnfs_block_extent *new, | 
					
						
							|  |  |  | 		 struct pnfs_block_extent *orig, | 
					
						
							|  |  |  | 		 sector_t offset, sector_t length, int state) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	kref_init(&new->be_refcnt); | 
					
						
							|  |  |  | 	/* don't need to INIT_LIST_HEAD(&new->be_node) */ | 
					
						
							|  |  |  | 	memcpy(&new->be_devid, &orig->be_devid, sizeof(struct nfs4_deviceid)); | 
					
						
							|  |  |  | 	new->be_mdev = orig->be_mdev; | 
					
						
							|  |  |  | 	new->be_f_offset = offset; | 
					
						
							|  |  |  | 	new->be_length = length; | 
					
						
							|  |  |  | 	new->be_v_offset = orig->be_v_offset - orig->be_f_offset + offset; | 
					
						
							|  |  |  | 	new->be_state = state; | 
					
						
							|  |  |  | 	new->be_inval = orig->be_inval; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Tries to merge be with extent in front of it in list.
 | 
					
						
							|  |  |  |  * Frees storage if not used. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static struct pnfs_block_extent * | 
					
						
							|  |  |  | _front_merge(struct pnfs_block_extent *be, struct list_head *head, | 
					
						
							|  |  |  | 	     struct pnfs_block_extent *storage) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct pnfs_block_extent *prev; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (!storage) | 
					
						
							|  |  |  | 		goto no_merge; | 
					
						
							|  |  |  | 	if (&be->be_node == head || be->be_node.prev == head) | 
					
						
							|  |  |  | 		goto no_merge; | 
					
						
							|  |  |  | 	prev = list_entry(be->be_node.prev, struct pnfs_block_extent, be_node); | 
					
						
							|  |  |  | 	if ((prev->be_f_offset + prev->be_length != be->be_f_offset) || | 
					
						
							|  |  |  | 	    !extents_consistent(prev, be)) | 
					
						
							|  |  |  | 		goto no_merge; | 
					
						
							|  |  |  | 	_prep_new_extent(storage, prev, prev->be_f_offset, | 
					
						
							|  |  |  | 			 prev->be_length + be->be_length, prev->be_state); | 
					
						
							|  |  |  | 	list_replace(&prev->be_node, &storage->be_node); | 
					
						
							|  |  |  | 	bl_put_extent(prev); | 
					
						
							|  |  |  | 	list_del(&be->be_node); | 
					
						
							|  |  |  | 	bl_put_extent(be); | 
					
						
							|  |  |  | 	return storage; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  no_merge: | 
					
						
							|  |  |  | 	kfree(storage); | 
					
						
							|  |  |  | 	return be; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:52 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | static u64 | 
					
						
							|  |  |  | set_to_rw(struct pnfs_block_layout *bl, u64 offset, u64 length) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	u64 rv = offset + length; | 
					
						
							|  |  |  | 	struct pnfs_block_extent *be, *e1, *e2, *e3, *new, *old; | 
					
						
							|  |  |  | 	struct pnfs_block_extent *children[3]; | 
					
						
							|  |  |  | 	struct pnfs_block_extent *merge1 = NULL, *merge2 = NULL; | 
					
						
							|  |  |  | 	int i = 0, j; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s(%llu, %llu)\n", __func__, offset, length); | 
					
						
							|  |  |  | 	/* Create storage for up to three new extents e1, e2, e3 */ | 
					
						
							|  |  |  | 	e1 = kmalloc(sizeof(*e1), GFP_ATOMIC); | 
					
						
							|  |  |  | 	e2 = kmalloc(sizeof(*e2), GFP_ATOMIC); | 
					
						
							|  |  |  | 	e3 = kmalloc(sizeof(*e3), GFP_ATOMIC); | 
					
						
							|  |  |  | 	/* BUG - we are ignoring any failure */ | 
					
						
							|  |  |  | 	if (!e1 || !e2 || !e3) | 
					
						
							|  |  |  | 		goto out_nosplit; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	spin_lock(&bl->bl_ext_lock); | 
					
						
							|  |  |  | 	be = bl_find_get_extent_locked(bl, offset); | 
					
						
							|  |  |  | 	rv = be->be_f_offset + be->be_length; | 
					
						
							|  |  |  | 	if (be->be_state != PNFS_BLOCK_INVALID_DATA) { | 
					
						
							|  |  |  | 		spin_unlock(&bl->bl_ext_lock); | 
					
						
							|  |  |  | 		goto out_nosplit; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	/* Add e* to children, bumping e*'s krefs */ | 
					
						
							|  |  |  | 	if (be->be_f_offset != offset) { | 
					
						
							|  |  |  | 		_prep_new_extent(e1, be, be->be_f_offset, | 
					
						
							|  |  |  | 				 offset - be->be_f_offset, | 
					
						
							|  |  |  | 				 PNFS_BLOCK_INVALID_DATA); | 
					
						
							|  |  |  | 		children[i++] = e1; | 
					
						
							|  |  |  | 		print_bl_extent(e1); | 
					
						
							|  |  |  | 	} else | 
					
						
							|  |  |  | 		merge1 = e1; | 
					
						
							|  |  |  | 	_prep_new_extent(e2, be, offset, | 
					
						
							|  |  |  | 			 min(length, be->be_f_offset + be->be_length - offset), | 
					
						
							|  |  |  | 			 PNFS_BLOCK_READWRITE_DATA); | 
					
						
							|  |  |  | 	children[i++] = e2; | 
					
						
							|  |  |  | 	print_bl_extent(e2); | 
					
						
							|  |  |  | 	if (offset + length < be->be_f_offset + be->be_length) { | 
					
						
							|  |  |  | 		_prep_new_extent(e3, be, e2->be_f_offset + e2->be_length, | 
					
						
							|  |  |  | 				 be->be_f_offset + be->be_length - | 
					
						
							|  |  |  | 				 offset - length, | 
					
						
							|  |  |  | 				 PNFS_BLOCK_INVALID_DATA); | 
					
						
							|  |  |  | 		children[i++] = e3; | 
					
						
							|  |  |  | 		print_bl_extent(e3); | 
					
						
							|  |  |  | 	} else | 
					
						
							|  |  |  | 		merge2 = e3; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Remove be from list, and insert the e* */ | 
					
						
							|  |  |  | 	/* We don't get refs on e*, since this list is the base reference
 | 
					
						
							|  |  |  | 	 * set when init'ed. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	if (i < 3) | 
					
						
							|  |  |  | 		children[i] = NULL; | 
					
						
							|  |  |  | 	new = children[0]; | 
					
						
							|  |  |  | 	list_replace(&be->be_node, &new->be_node); | 
					
						
							|  |  |  | 	bl_put_extent(be); | 
					
						
							|  |  |  | 	new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge1); | 
					
						
							|  |  |  | 	for (j = 1; j < i; j++) { | 
					
						
							|  |  |  | 		old = new; | 
					
						
							|  |  |  | 		new = children[j]; | 
					
						
							|  |  |  | 		list_add(&new->be_node, &old->be_node); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if (merge2) { | 
					
						
							|  |  |  | 		/* This is a HACK, should just create a _back_merge function */ | 
					
						
							|  |  |  | 		new = list_entry(new->be_node.next, | 
					
						
							|  |  |  | 				 struct pnfs_block_extent, be_node); | 
					
						
							|  |  |  | 		new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge2); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	spin_unlock(&bl->bl_ext_lock); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Since we removed the base reference above, be is now scheduled for
 | 
					
						
							|  |  |  | 	 * destruction. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	bl_put_extent(be); | 
					
						
							|  |  |  | 	dprintk("%s returns %llu after split\n", __func__, rv); | 
					
						
							|  |  |  | 	return rv; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  out_nosplit: | 
					
						
							|  |  |  | 	kfree(e1); | 
					
						
							|  |  |  | 	kfree(e2); | 
					
						
							|  |  |  | 	kfree(e3); | 
					
						
							|  |  |  | 	dprintk("%s returns %llu without splitting\n", __func__, rv); | 
					
						
							|  |  |  | 	return rv; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | 
					
						
							|  |  |  | 			      const struct nfs4_layoutcommit_args *arg, | 
					
						
							|  |  |  | 			      int status) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct pnfs_block_short_extent *lce, *save; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dprintk("%s status %d\n", __func__, status); | 
					
						
							|  |  |  | 	list_for_each_entry_safe(lce, save, &bl->bl_committing, bse_node) { | 
					
						
							|  |  |  | 		if (likely(!status)) { | 
					
						
							|  |  |  | 			u64 offset = lce->bse_f_offset; | 
					
						
							|  |  |  | 			u64 end = offset + lce->bse_length; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			do { | 
					
						
							|  |  |  | 				offset = set_to_rw(bl, offset, end - offset); | 
					
						
							|  |  |  | 			} while (offset < end); | 
					
						
							|  |  |  | 			list_del(&lce->bse_node); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			kfree(lce); | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			list_del(&lce->bse_node); | 
					
						
							|  |  |  | 			spin_lock(&bl->bl_ext_lock); | 
					
						
							|  |  |  | 			add_to_commitlist(bl, lce); | 
					
						
							|  |  |  | 			spin_unlock(&bl->bl_ext_lock); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } |