[PATCH] fix nr_unused accounting, and avoid recursing in iput with I_WILL_FREE set
list_move(&inode->i_list, &inode_in_use);
 		} else {
 			list_move(&inode->i_list, &inode_unused);
+			inodes_stat.nr_unused++;
 		}
 	}
 	wake_up_inode(inode);
Are you sure the above diff is correct? It was added somewhere between
2.6.5 and 2.6.8. I think it's wrong.
The only way I can imagine the i_count to be zero in the above path, is
that I_WILL_FREE is set.  And if I_WILL_FREE is set, then we must not
increase nr_unused.  So I believe the above change is buggy and it will
definitely overstate the number of unused inodes and it should be backed
out.
Note that __writeback_single_inode before calling __sync_single_inode, can
drop the spinlock and we can have both the dirty and locked bitflags clear
here:
		spin_unlock(&inode_lock);
		__wait_on_inode(inode);
		iput(inode);
XXXXXXX
		spin_lock(&inode_lock);
	}
	use inode again here
a construct like the above makes zero sense from a reference counting
standpoint.
Either we don't ever use the inode again after the iput, or the
inode_lock should be taken _before_ executing the iput (i.e. a __iput
would be required). Taking the inode_lock after iput means the iget was
useless if we keep using the inode after the iput.
So the only chance the 2.6 was safe to call __writeback_single_inode
with the i_count == 0, is that I_WILL_FREE is set (I_WILL_FREE will
prevent the VM to free the inode in XXXXX).
Potentially calling the above iput with I_WILL_FREE was also wrong
because it would recurse in iput_final (the second mainline bug).
The below (untested) patch fixes the nr_unused accounting, avoids recursing
in iput when I_WILL_FREE is set and makes sure (with the BUG_ON) that we
don't corrupt memory and that all holders that don't set I_WILL_FREE, keeps
a reference on the inode!
Signed-off-by: Andrea Arcangeli <andrea@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
	
	
This commit is contained in:
		
					parent
					
						
							
								52303e8b5f
							
						
					
				
			
			
				commit
				
					
						7f04c26d71
					
				
			
		
					 2 changed files with 17 additions and 12 deletions
				
			
		|  | @ -230,7 +230,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 			 * The inode is clean, unused | ||||
| 			 */ | ||||
| 			list_move(&inode->i_list, &inode_unused); | ||||
| 			inodes_stat.nr_unused++; | ||||
| 		} | ||||
| 	} | ||||
| 	wake_up_inode(inode); | ||||
|  | @ -238,14 +237,20 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Write out an inode's dirty pages.  Called under inode_lock. | ||||
|  * Write out an inode's dirty pages.  Called under inode_lock.  Either the | ||||
|  * caller has ref on the inode (either via __iget or via syscall against an fd) | ||||
|  * or the inode has I_WILL_FREE set (via generic_forget_inode) | ||||
|  */ | ||||
| static int | ||||
| __writeback_single_inode(struct inode *inode, | ||||
| 			struct writeback_control *wbc) | ||||
| __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | ||||
| { | ||||
| 	wait_queue_head_t *wqh; | ||||
| 
 | ||||
| 	if (!atomic_read(&inode->i_count)) | ||||
| 		WARN_ON(!(inode->i_state & I_WILL_FREE)); | ||||
| 	else | ||||
| 		WARN_ON(inode->i_state & I_WILL_FREE); | ||||
| 
 | ||||
| 	if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_LOCK)) { | ||||
| 		list_move(&inode->i_list, &inode->i_sb->s_dirty); | ||||
| 		return 0; | ||||
|  | @ -259,11 +264,9 @@ __writeback_single_inode(struct inode *inode, | |||
| 
 | ||||
| 		wqh = bit_waitqueue(&inode->i_state, __I_LOCK); | ||||
| 		do { | ||||
| 			__iget(inode); | ||||
| 			spin_unlock(&inode_lock); | ||||
| 			__wait_on_bit(wqh, &wq, inode_wait, | ||||
| 							TASK_UNINTERRUPTIBLE); | ||||
| 			iput(inode); | ||||
| 			spin_lock(&inode_lock); | ||||
| 		} while (inode->i_state & I_LOCK); | ||||
| 	} | ||||
|  | @ -541,14 +544,15 @@ void sync_inodes(int wait) | |||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  *	write_inode_now	-	write an inode to disk | ||||
|  *	@inode: inode to write to disk | ||||
|  *	@sync: whether the write should be synchronous or not | ||||
|  * write_inode_now	-	write an inode to disk | ||||
|  * @inode: inode to write to disk | ||||
|  * @sync: whether the write should be synchronous or not | ||||
|  * | ||||
|  *	This function commits an inode to disk immediately if it is | ||||
|  *	dirty. This is primarily needed by knfsd. | ||||
|  * This function commits an inode to disk immediately if it is dirty. This is | ||||
|  * primarily needed by knfsd. | ||||
|  * | ||||
|  * The caller must either have a ref on the inode or must have set I_WILL_FREE. | ||||
|  */ | ||||
|   | ||||
| int write_inode_now(struct inode *inode, int sync) | ||||
| { | ||||
| 	int ret; | ||||
|  |  | |||
|  | @ -1088,6 +1088,7 @@ static void generic_forget_inode(struct inode *inode) | |||
| 	if (inode->i_data.nrpages) | ||||
| 		truncate_inode_pages(&inode->i_data, 0); | ||||
| 	clear_inode(inode); | ||||
| 	wake_up_inode(inode); | ||||
| 	destroy_inode(inode); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Andrea Arcangeli
				Andrea Arcangeli