fs: kill i_alloc_sem
i_alloc_sem is a rather special rw_semaphore. It's the last one that may be released by a non-owner, and it's write side is always mirrored by real exclusion. It's intended use it to wait for all pending direct I/O requests to finish before starting a truncate. Replace it with a hand-grown construct: - exclusion for truncates is already guaranteed by i_mutex, so it can simply fall way - the reader side is replaced by an i_dio_count member in struct inode that counts the number of pending direct I/O requests. Truncate can't proceed as long as it's non-zero - when i_dio_count reaches non-zero we wake up a pending truncate using wake_up_bit on a new bit in i_flags - new references to i_dio_count can't appear while we are waiting for it to read zero because the direct I/O count always needs i_mutex (or an equivalent like XFS's i_iolock) for starting a new operation. This scheme is much simpler, and saves the space of a spinlock_t and a struct list_head in struct inode (typically 160 bits on a non-debug 64-bit system). Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
		
					parent
					
						
							
								f9b5570d7f
							
						
					
				
			
			
				commit
				
					
						bd5fe6c5eb
					
				
			
		
					 13 changed files with 78 additions and 53 deletions
				
			
		| 
						 | 
					@ -233,16 +233,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
 | 
				
			||||||
		return error;
 | 
							return error;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (ia_valid & ATTR_SIZE)
 | 
						if (ia_valid & ATTR_SIZE)
 | 
				
			||||||
		down_write(&dentry->d_inode->i_alloc_sem);
 | 
							inode_dio_wait(inode);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (inode->i_op->setattr)
 | 
						if (inode->i_op->setattr)
 | 
				
			||||||
		error = inode->i_op->setattr(dentry, attr);
 | 
							error = inode->i_op->setattr(dentry, attr);
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
		error = simple_setattr(dentry, attr);
 | 
							error = simple_setattr(dentry, attr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (ia_valid & ATTR_SIZE)
 | 
					 | 
				
			||||||
		up_write(&dentry->d_inode->i_alloc_sem);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!error)
 | 
						if (!error)
 | 
				
			||||||
		fsnotify_change(dentry, ia_valid);
 | 
							fsnotify_change(dentry, ia_valid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -135,6 +135,50 @@ struct dio {
 | 
				
			||||||
	struct page *pages[DIO_PAGES];	/* page buffer */
 | 
						struct page *pages[DIO_PAGES];	/* page buffer */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void __inode_dio_wait(struct inode *inode)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
 | 
				
			||||||
 | 
						DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						do {
 | 
				
			||||||
 | 
							prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
 | 
				
			||||||
 | 
							if (atomic_read(&inode->i_dio_count))
 | 
				
			||||||
 | 
								schedule();
 | 
				
			||||||
 | 
						} while (atomic_read(&inode->i_dio_count));
 | 
				
			||||||
 | 
						finish_wait(wq, &q.wait);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * inode_dio_wait - wait for outstanding DIO requests to finish
 | 
				
			||||||
 | 
					 * @inode: inode to wait for
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Waits for all pending direct I/O requests to finish so that we can
 | 
				
			||||||
 | 
					 * proceed with a truncate or equivalent operation.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Must be called under a lock that serializes taking new references
 | 
				
			||||||
 | 
					 * to i_dio_count, usually by inode->i_mutex.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void inode_dio_wait(struct inode *inode)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (atomic_read(&inode->i_dio_count))
 | 
				
			||||||
 | 
							__inode_dio_wait(inode);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(inode_dio_wait);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * inode_dio_done - signal finish of a direct I/O requests
 | 
				
			||||||
 | 
					 * @inode: inode the direct I/O happens on
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This is called once we've finished processing a direct I/O request,
 | 
				
			||||||
 | 
					 * and is used to wake up callers waiting for direct I/O to be quiesced.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void inode_dio_done(struct inode *inode)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (atomic_dec_and_test(&inode->i_dio_count))
 | 
				
			||||||
 | 
							wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(inode_dio_done);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * How many pages are in the queue?
 | 
					 * How many pages are in the queue?
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
| 
						 | 
					@ -254,9 +298,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (dio->flags & DIO_LOCKING)
 | 
						if (dio->flags & DIO_LOCKING)
 | 
				
			||||||
		/* lockdep: non-owner release */
 | 
							inode_dio_done(dio->inode);
 | 
				
			||||||
		up_read_non_owner(&dio->inode->i_alloc_sem);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -980,9 +1022,6 @@ out:
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Releases both i_mutex and i_alloc_sem
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static ssize_t
 | 
					static ssize_t
 | 
				
			||||||
direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 
 | 
					direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 
 | 
				
			||||||
	const struct iovec *iov, loff_t offset, unsigned long nr_segs, 
 | 
						const struct iovec *iov, loff_t offset, unsigned long nr_segs, 
 | 
				
			||||||
| 
						 | 
					@ -1146,15 +1185,14 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 | 
				
			||||||
 *    For writes this function is called under i_mutex and returns with
 | 
					 *    For writes this function is called under i_mutex and returns with
 | 
				
			||||||
 *    i_mutex held, for reads, i_mutex is not held on entry, but it is
 | 
					 *    i_mutex held, for reads, i_mutex is not held on entry, but it is
 | 
				
			||||||
 *    taken and dropped again before returning.
 | 
					 *    taken and dropped again before returning.
 | 
				
			||||||
 *    For reads and writes i_alloc_sem is taken in shared mode and released
 | 
					 *    The i_dio_count counter keeps track of the number of outstanding
 | 
				
			||||||
 *    on I/O completion (which may happen asynchronously after returning to
 | 
					 *    direct I/O requests, and truncate waits for it to reach zero.
 | 
				
			||||||
 *    the caller).
 | 
					 *    New references to i_dio_count must only be grabbed with i_mutex
 | 
				
			||||||
 | 
					 *    held.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *  - if the flags value does NOT contain DIO_LOCKING we don't use any
 | 
					 *  - if the flags value does NOT contain DIO_LOCKING we don't use any
 | 
				
			||||||
 *    internal locking but rather rely on the filesystem to synchronize
 | 
					 *    internal locking but rather rely on the filesystem to synchronize
 | 
				
			||||||
 *    direct I/O reads/writes versus each other and truncate.
 | 
					 *    direct I/O reads/writes versus each other and truncate.
 | 
				
			||||||
 *    For reads and writes both i_mutex and i_alloc_sem are not held on
 | 
					 | 
				
			||||||
 *    entry and are never taken.
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
ssize_t
 | 
					ssize_t
 | 
				
			||||||
__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 | 
					__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 | 
				
			||||||
| 
						 | 
					@ -1234,10 +1272,9 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Will be released at I/O completion, possibly in a
 | 
							 * Will be decremented at I/O completion time.
 | 
				
			||||||
		 * different thread.
 | 
					 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		down_read_non_owner(&inode->i_alloc_sem);
 | 
							atomic_inc(&inode->i_dio_count);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -168,8 +168,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 | 
				
			||||||
	mutex_init(&inode->i_mutex);
 | 
						mutex_init(&inode->i_mutex);
 | 
				
			||||||
	lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
 | 
						lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	init_rwsem(&inode->i_alloc_sem);
 | 
						atomic_set(&inode->i_dio_count, 0);
 | 
				
			||||||
	lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mapping->a_ops = &empty_aops;
 | 
						mapping->a_ops = &empty_aops;
 | 
				
			||||||
	mapping->host = inode;
 | 
						mapping->host = inode;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1832,9 +1832,8 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 | 
				
			||||||
	 * fails again.
 | 
						 * fails again.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (unlikely(NInoTruncateFailed(ni))) {
 | 
						if (unlikely(NInoTruncateFailed(ni))) {
 | 
				
			||||||
		down_write(&vi->i_alloc_sem);
 | 
							inode_dio_wait(vi);
 | 
				
			||||||
		err = ntfs_truncate(vi);
 | 
							err = ntfs_truncate(vi);
 | 
				
			||||||
		up_write(&vi->i_alloc_sem);
 | 
					 | 
				
			||||||
		if (err || NInoTruncateFailed(ni)) {
 | 
							if (err || NInoTruncateFailed(ni)) {
 | 
				
			||||||
			if (!err)
 | 
								if (!err)
 | 
				
			||||||
				err = -EIO;
 | 
									err = -EIO;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2357,12 +2357,7 @@ static const char *es = "  Leaving inconsistent metadata.  Unmount and run "
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Returns 0 on success or -errno on error.
 | 
					 * Returns 0 on success or -errno on error.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Called with ->i_mutex held.  In all but one case ->i_alloc_sem is held for
 | 
					 * Called with ->i_mutex held.
 | 
				
			||||||
 * writing.  The only case in the kernel where ->i_alloc_sem is not held is
 | 
					 | 
				
			||||||
 * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
 | 
					 | 
				
			||||||
 * with the current i_size as the offset.  The analogous place in NTFS is in
 | 
					 | 
				
			||||||
 * fs/ntfs/file.c::ntfs_file_buffered_write() where we call vmtruncate() again
 | 
					 | 
				
			||||||
 * without holding ->i_alloc_sem.
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
int ntfs_truncate(struct inode *vi)
 | 
					int ntfs_truncate(struct inode *vi)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -2887,8 +2882,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
 | 
				
			||||||
 * We also abort all changes of user, group, and mode as we do not implement
 | 
					 * We also abort all changes of user, group, and mode as we do not implement
 | 
				
			||||||
 * the NTFS ACLs yet.
 | 
					 * the NTFS ACLs yet.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Called with ->i_mutex held.  For the ATTR_SIZE (i.e. ->truncate) case, also
 | 
					 * Called with ->i_mutex held.
 | 
				
			||||||
 * called with ->i_alloc_sem held for writing.
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
 | 
					int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -551,9 +551,8 @@ bail:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * ocfs2_dio_end_io is called by the dio core when a dio is finished.  We're
 | 
					 * ocfs2_dio_end_io is called by the dio core when a dio is finished.  We're
 | 
				
			||||||
 * particularly interested in the aio/dio case.  Like the core uses
 | 
					 * particularly interested in the aio/dio case.  We use the rw_lock DLM lock
 | 
				
			||||||
 * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from
 | 
					 * to protect io on one node from truncation on another.
 | 
				
			||||||
 * truncation on another.
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static void ocfs2_dio_end_io(struct kiocb *iocb,
 | 
					static void ocfs2_dio_end_io(struct kiocb *iocb,
 | 
				
			||||||
			     loff_t offset,
 | 
								     loff_t offset,
 | 
				
			||||||
| 
						 | 
					@ -569,7 +568,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 | 
				
			||||||
	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
 | 
						BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (ocfs2_iocb_is_sem_locked(iocb)) {
 | 
						if (ocfs2_iocb_is_sem_locked(iocb)) {
 | 
				
			||||||
		up_read(&inode->i_alloc_sem);
 | 
							inode_dio_done(inode);
 | 
				
			||||||
		ocfs2_iocb_clear_sem_locked(iocb);
 | 
							ocfs2_iocb_clear_sem_locked(iocb);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2236,9 +2236,9 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 | 
				
			||||||
	ocfs2_iocb_clear_sem_locked(iocb);
 | 
						ocfs2_iocb_clear_sem_locked(iocb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
relock:
 | 
					relock:
 | 
				
			||||||
	/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
 | 
						/* to match setattr's i_mutex -> rw_lock ordering */
 | 
				
			||||||
	if (direct_io) {
 | 
						if (direct_io) {
 | 
				
			||||||
		down_read(&inode->i_alloc_sem);
 | 
							atomic_inc(&inode->i_dio_count);
 | 
				
			||||||
		have_alloc_sem = 1;
 | 
							have_alloc_sem = 1;
 | 
				
			||||||
		/* communicate with ocfs2_dio_end_io */
 | 
							/* communicate with ocfs2_dio_end_io */
 | 
				
			||||||
		ocfs2_iocb_set_sem_locked(iocb);
 | 
							ocfs2_iocb_set_sem_locked(iocb);
 | 
				
			||||||
| 
						 | 
					@ -2290,7 +2290,7 @@ relock:
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (direct_io && !can_do_direct) {
 | 
						if (direct_io && !can_do_direct) {
 | 
				
			||||||
		ocfs2_rw_unlock(inode, rw_level);
 | 
							ocfs2_rw_unlock(inode, rw_level);
 | 
				
			||||||
		up_read(&inode->i_alloc_sem);
 | 
							inode_dio_done(inode);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		have_alloc_sem = 0;
 | 
							have_alloc_sem = 0;
 | 
				
			||||||
		rw_level = -1;
 | 
							rw_level = -1;
 | 
				
			||||||
| 
						 | 
					@ -2361,8 +2361,7 @@ out_dio:
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
 | 
						 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
 | 
				
			||||||
	 * function pointer which is called when o_direct io completes so that
 | 
						 * function pointer which is called when o_direct io completes so that
 | 
				
			||||||
	 * it can unlock our rw lock.  (it's the clustered equivalent of
 | 
						 * it can unlock our rw lock.
 | 
				
			||||||
	 * i_alloc_sem; protects truncate from racing with pending ios).
 | 
					 | 
				
			||||||
	 * Unfortunately there are error cases which call end_io and others
 | 
						 * Unfortunately there are error cases which call end_io and others
 | 
				
			||||||
	 * that don't.  so we don't have to unlock the rw_lock if either an
 | 
						 * that don't.  so we don't have to unlock the rw_lock if either an
 | 
				
			||||||
	 * async dio is going to do it in the future or an end_io after an
 | 
						 * async dio is going to do it in the future or an end_io after an
 | 
				
			||||||
| 
						 | 
					@ -2379,7 +2378,7 @@ out:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_sems:
 | 
					out_sems:
 | 
				
			||||||
	if (have_alloc_sem) {
 | 
						if (have_alloc_sem) {
 | 
				
			||||||
		up_read(&inode->i_alloc_sem);
 | 
							inode_dio_done(inode);
 | 
				
			||||||
		ocfs2_iocb_clear_sem_locked(iocb);
 | 
							ocfs2_iocb_clear_sem_locked(iocb);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2531,8 +2530,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 | 
				
			||||||
	 * need locks to protect pending reads from racing with truncate.
 | 
						 * need locks to protect pending reads from racing with truncate.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (filp->f_flags & O_DIRECT) {
 | 
						if (filp->f_flags & O_DIRECT) {
 | 
				
			||||||
		down_read(&inode->i_alloc_sem);
 | 
					 | 
				
			||||||
		have_alloc_sem = 1;
 | 
							have_alloc_sem = 1;
 | 
				
			||||||
 | 
							atomic_inc(&inode->i_dio_count);
 | 
				
			||||||
		ocfs2_iocb_set_sem_locked(iocb);
 | 
							ocfs2_iocb_set_sem_locked(iocb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ret = ocfs2_rw_lock(inode, 0);
 | 
							ret = ocfs2_rw_lock(inode, 0);
 | 
				
			||||||
| 
						 | 
					@ -2575,7 +2574,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bail:
 | 
					bail:
 | 
				
			||||||
	if (have_alloc_sem) {
 | 
						if (have_alloc_sem) {
 | 
				
			||||||
		up_read(&inode->i_alloc_sem);
 | 
							inode_dio_done(inode);
 | 
				
			||||||
		ocfs2_iocb_clear_sem_locked(iocb);
 | 
							ocfs2_iocb_clear_sem_locked(iocb);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (rw_level != -1)
 | 
						if (rw_level != -1)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -555,11 +555,10 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		reiserfs_write_unlock(inode->i_sb);
 | 
							reiserfs_write_unlock(inode->i_sb);
 | 
				
			||||||
		mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
 | 
							mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
 | 
				
			||||||
		down_write(&dentry->d_inode->i_alloc_sem);
 | 
							inode_dio_wait(dentry->d_inode);
 | 
				
			||||||
		reiserfs_write_lock(inode->i_sb);
 | 
							reiserfs_write_lock(inode->i_sb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		err = reiserfs_setattr(dentry, &newattrs);
 | 
							err = reiserfs_setattr(dentry, &newattrs);
 | 
				
			||||||
		up_write(&dentry->d_inode->i_alloc_sem);
 | 
					 | 
				
			||||||
		mutex_unlock(&dentry->d_inode->i_mutex);
 | 
							mutex_unlock(&dentry->d_inode->i_mutex);
 | 
				
			||||||
	} else
 | 
						} else
 | 
				
			||||||
		update_ctime(inode);
 | 
							update_ctime(inode);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -779,7 +779,7 @@ struct inode {
 | 
				
			||||||
	struct timespec		i_ctime;
 | 
						struct timespec		i_ctime;
 | 
				
			||||||
	blkcnt_t		i_blocks;
 | 
						blkcnt_t		i_blocks;
 | 
				
			||||||
	unsigned short          i_bytes;
 | 
						unsigned short          i_bytes;
 | 
				
			||||||
	struct rw_semaphore	i_alloc_sem;
 | 
						atomic_t		i_dio_count;
 | 
				
			||||||
	const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
 | 
						const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
 | 
				
			||||||
	struct file_lock	*i_flock;
 | 
						struct file_lock	*i_flock;
 | 
				
			||||||
	struct address_space	*i_mapping;
 | 
						struct address_space	*i_mapping;
 | 
				
			||||||
| 
						 | 
					@ -1705,6 +1705,10 @@ struct super_operations {
 | 
				
			||||||
 *			set during data writeback, and cleared with a wakeup
 | 
					 *			set during data writeback, and cleared with a wakeup
 | 
				
			||||||
 *			on the bit address once it is done.
 | 
					 *			on the bit address once it is done.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 | 
					 * I_REFERENCED		Marks the inode as recently references on the LRU list.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * I_DIO_WAKEUP		Never set.  Only used as a key for wait_on_bit().
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 * Q: What is the difference between I_WILL_FREE and I_FREEING?
 | 
					 * Q: What is the difference between I_WILL_FREE and I_FREEING?
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#define I_DIRTY_SYNC		(1 << 0)
 | 
					#define I_DIRTY_SYNC		(1 << 0)
 | 
				
			||||||
| 
						 | 
					@ -1718,6 +1722,8 @@ struct super_operations {
 | 
				
			||||||
#define __I_SYNC		7
 | 
					#define __I_SYNC		7
 | 
				
			||||||
#define I_SYNC			(1 << __I_SYNC)
 | 
					#define I_SYNC			(1 << __I_SYNC)
 | 
				
			||||||
#define I_REFERENCED		(1 << 8)
 | 
					#define I_REFERENCED		(1 << 8)
 | 
				
			||||||
 | 
					#define __I_DIO_WAKEUP		9
 | 
				
			||||||
 | 
					#define I_DIO_WAKEUP		(1 << I_DIO_WAKEUP)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
 | 
					#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1828,7 +1834,6 @@ struct file_system_type {
 | 
				
			||||||
	struct lock_class_key i_lock_key;
 | 
						struct lock_class_key i_lock_key;
 | 
				
			||||||
	struct lock_class_key i_mutex_key;
 | 
						struct lock_class_key i_mutex_key;
 | 
				
			||||||
	struct lock_class_key i_mutex_dir_key;
 | 
						struct lock_class_key i_mutex_dir_key;
 | 
				
			||||||
	struct lock_class_key i_alloc_sem_key;
 | 
					 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
 | 
					extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
 | 
				
			||||||
| 
						 | 
					@ -2404,6 +2409,8 @@ enum {
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void dio_end_io(struct bio *bio, int error);
 | 
					void dio_end_io(struct bio *bio, int error);
 | 
				
			||||||
 | 
					void inode_dio_wait(struct inode *inode);
 | 
				
			||||||
 | 
					void inode_dio_done(struct inode *inode);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 | 
					ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 | 
				
			||||||
	struct block_device *bdev, const struct iovec *iov, loff_t offset,
 | 
						struct block_device *bdev, const struct iovec *iov, loff_t offset,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -78,9 +78,6 @@
 | 
				
			||||||
 *  ->i_mutex			(generic_file_buffered_write)
 | 
					 *  ->i_mutex			(generic_file_buffered_write)
 | 
				
			||||||
 *    ->mmap_sem		(fault_in_pages_readable->do_page_fault)
 | 
					 *    ->mmap_sem		(fault_in_pages_readable->do_page_fault)
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *  ->i_mutex
 | 
					 | 
				
			||||||
 *    ->i_alloc_sem             (various)
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *  inode_wb_list_lock
 | 
					 *  inode_wb_list_lock
 | 
				
			||||||
 *    sb_lock			(fs/fs-writeback.c)
 | 
					 *    sb_lock			(fs/fs-writeback.c)
 | 
				
			||||||
 *    ->mapping->tree_lock	(__sync_single_inode)
 | 
					 *    ->mapping->tree_lock	(__sync_single_inode)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -218,7 +218,7 @@ static long madvise_remove(struct vm_area_struct *vma,
 | 
				
			||||||
	endoff = (loff_t)(end - vma->vm_start - 1)
 | 
						endoff = (loff_t)(end - vma->vm_start - 1)
 | 
				
			||||||
			+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
 | 
								+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* vmtruncate_range needs to take i_mutex and i_alloc_sem */
 | 
						/* vmtruncate_range needs to take i_mutex */
 | 
				
			||||||
	up_read(¤t->mm->mmap_sem);
 | 
						up_read(¤t->mm->mmap_sem);
 | 
				
			||||||
	error = vmtruncate_range(mapping->host, offset, endoff);
 | 
						error = vmtruncate_range(mapping->host, offset, endoff);
 | 
				
			||||||
	down_read(¤t->mm->mmap_sem);
 | 
						down_read(¤t->mm->mmap_sem);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -21,7 +21,6 @@
 | 
				
			||||||
 * Lock ordering in mm:
 | 
					 * Lock ordering in mm:
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * inode->i_mutex	(while writing or truncating, not reading or faulting)
 | 
					 * inode->i_mutex	(while writing or truncating, not reading or faulting)
 | 
				
			||||||
 *   inode->i_alloc_sem (vmtruncate_range)
 | 
					 | 
				
			||||||
 *   mm->mmap_sem
 | 
					 *   mm->mmap_sem
 | 
				
			||||||
 *     page->flags PG_locked (lock_page)
 | 
					 *     page->flags PG_locked (lock_page)
 | 
				
			||||||
 *       mapping->i_mmap_mutex
 | 
					 *       mapping->i_mmap_mutex
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -622,12 +622,11 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
 | 
				
			||||||
		return -ENOSYS;
 | 
							return -ENOSYS;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mutex_lock(&inode->i_mutex);
 | 
						mutex_lock(&inode->i_mutex);
 | 
				
			||||||
	down_write(&inode->i_alloc_sem);
 | 
						inode_dio_wait(inode);
 | 
				
			||||||
	unmap_mapping_range(mapping, offset, (end - offset), 1);
 | 
						unmap_mapping_range(mapping, offset, (end - offset), 1);
 | 
				
			||||||
	inode->i_op->truncate_range(inode, offset, end);
 | 
						inode->i_op->truncate_range(inode, offset, end);
 | 
				
			||||||
	/* unmap again to remove racily COWed private pages */
 | 
						/* unmap again to remove racily COWed private pages */
 | 
				
			||||||
	unmap_mapping_range(mapping, offset, (end - offset), 1);
 | 
						unmap_mapping_range(mapping, offset, (end - offset), 1);
 | 
				
			||||||
	up_write(&inode->i_alloc_sem);
 | 
					 | 
				
			||||||
	mutex_unlock(&inode->i_mutex);
 | 
						mutex_unlock(&inode->i_mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue