 7ef3ff2fea
			
		
	
	
	7ef3ff2fea
	
	
	
		
			
			Nilfs2 eventually hangs in a stress test with fsstress program.  This
issue was caused by the following deadlock over I_SYNC flag between
nilfs_segctor_thread() and writeback_sb_inodes():
  nilfs_segctor_thread()
    nilfs_segctor_thread_construct()
      nilfs_segctor_unlock()
        nilfs_dispose_list()
          iput()
            iput_final()
              evict()
                inode_wait_for_writeback()  * wait for I_SYNC flag
  writeback_sb_inodes()
     * set I_SYNC flag on inode->i_state
    __writeback_single_inode()
      do_writepages()
        nilfs_writepages()
          nilfs_construct_dsync_segment()
            nilfs_segctor_sync()
               * wait for completion of segment constructor
    inode_sync_complete()
       * clear I_SYNC flag after __writeback_single_inode() completed
writeback_sb_inodes() calls do_writepages() for dirty inodes after
setting I_SYNC flag on inode->i_state.  do_writepages() in turn calls
nilfs_writepages(), which can run segment constructor and wait for its
completion.  On the other hand, segment constructor calls iput(), which
can call evict() and wait for the I_SYNC flag on
inode_wait_for_writeback().
Since segment constructor doesn't know when I_SYNC will be set, it
cannot know whether iput() will block or not unless inode->i_nlink has a
non-zero count.  We can prevent evict() from being called in iput() by
implementing sop->drop_inode(), but it's not preferable to leave inodes
with i_nlink == 0 for long periods because it even defers file
truncation and inode deallocation.  So, this instead resolves the
deadlock by calling iput() asynchronously with a workqueue for inodes
with i_nlink == 0.
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Tested-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
	
			
		
			
				
	
	
		
			251 lines
		
	
	
	
		
			8.3 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			251 lines
		
	
	
	
		
			8.3 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * segment.h - NILFS Segment constructor prototypes and definitions
 | |
|  *
 | |
|  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or modify
 | |
|  * it under the terms of the GNU General Public License as published by
 | |
|  * the Free Software Foundation; either version 2 of the License, or
 | |
|  * (at your option) any later version.
 | |
|  *
 | |
|  * This program is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|  * GNU General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU General Public License
 | |
|  * along with this program; if not, write to the Free Software
 | |
|  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 | |
|  *
 | |
|  * Written by Ryusuke Konishi <ryusuke@osrg.net>
 | |
|  *
 | |
|  */
 | |
| #ifndef _NILFS_SEGMENT_H
 | |
| #define _NILFS_SEGMENT_H
 | |
| 
 | |
| #include <linux/types.h>
 | |
| #include <linux/fs.h>
 | |
| #include <linux/buffer_head.h>
 | |
| #include <linux/workqueue.h>
 | |
| #include <linux/nilfs2_fs.h>
 | |
| #include "nilfs.h"
 | |
| 
 | |
| struct nilfs_root;
 | |
| 
 | |
| /**
 | |
|  * struct nilfs_recovery_info - Recovery information
 | |
|  * @ri_need_recovery: Recovery status
 | |
|  * @ri_super_root: Block number of the last super root
 | |
|  * @ri_ri_cno: Number of the last checkpoint
 | |
|  * @ri_lsegs_start: Region for roll-forwarding (start block number)
 | |
|  * @ri_lsegs_end: Region for roll-forwarding (end block number)
 | |
|  * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start
 | |
|  * @ri_used_segments: List of segments to be mark active
 | |
|  * @ri_pseg_start: Block number of the last partial segment
 | |
|  * @ri_seq: Sequence number on the last partial segment
 | |
|  * @ri_segnum: Segment number on the last partial segment
 | |
|  * @ri_nextnum: Next segment number on the last partial segment
 | |
|  */
 | |
| struct nilfs_recovery_info {
 | |
| 	int			ri_need_recovery;
 | |
| 	sector_t		ri_super_root;
 | |
| 	__u64			ri_cno;
 | |
| 
 | |
| 	sector_t		ri_lsegs_start;
 | |
| 	sector_t		ri_lsegs_end;
 | |
| 	u64			ri_lsegs_start_seq;
 | |
| 	struct list_head	ri_used_segments;
 | |
| 	sector_t		ri_pseg_start;
 | |
| 	u64			ri_seq;
 | |
| 	__u64			ri_segnum;
 | |
| 	__u64			ri_nextnum;
 | |
| };
 | |
| 
 | |
| /* ri_need_recovery */
 | |
| #define NILFS_RECOVERY_SR_UPDATED	 1  /* The super root was updated */
 | |
| #define NILFS_RECOVERY_ROLLFORWARD_DONE	 2  /* Rollforward was carried out */
 | |
| 
 | |
| /**
 | |
|  * struct nilfs_cstage - Context of collection stage
 | |
|  * @scnt: Stage count
 | |
|  * @flags: State flags
 | |
|  * @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file
 | |
|  * @gc_inode_ptr: Pointer on the list of gc-inodes
 | |
|  */
 | |
| struct nilfs_cstage {
 | |
| 	int			scnt;
 | |
| 	unsigned		flags;
 | |
| 	struct nilfs_inode_info *dirty_file_ptr;
 | |
| 	struct nilfs_inode_info *gc_inode_ptr;
 | |
| };
 | |
| 
 | |
| struct nilfs_segment_buffer;
 | |
| 
 | |
| struct nilfs_segsum_pointer {
 | |
| 	struct buffer_head     *bh;
 | |
| 	unsigned		offset; /* offset in bytes */
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * struct nilfs_sc_info - Segment constructor information
 | |
|  * @sc_super: Back pointer to super_block struct
 | |
|  * @sc_root: root object of the current filesystem tree
 | |
|  * @sc_nblk_inc: Block count of current generation
 | |
|  * @sc_dirty_files: List of files to be written
 | |
|  * @sc_gc_inodes: List of GC inodes having blocks to be written
 | |
|  * @sc_iput_queue: list of inodes for which iput should be done
 | |
|  * @sc_iput_work: work struct to defer iput call
 | |
|  * @sc_freesegs: array of segment numbers to be freed
 | |
|  * @sc_nfreesegs: number of segments on @sc_freesegs
 | |
|  * @sc_dsync_inode: inode whose data pages are written for a sync operation
 | |
|  * @sc_dsync_start: start byte offset of data pages
 | |
|  * @sc_dsync_end: end byte offset of data pages (inclusive)
 | |
|  * @sc_segbufs: List of segment buffers
 | |
|  * @sc_write_logs: List of segment buffers to hold logs under writing
 | |
|  * @sc_segbuf_nblocks: Number of available blocks in segment buffers.
 | |
|  * @sc_curseg: Current segment buffer
 | |
|  * @sc_stage: Collection stage
 | |
|  * @sc_finfo_ptr: pointer to the current finfo struct in the segment summary
 | |
|  * @sc_binfo_ptr: pointer to the current binfo struct in the segment summary
 | |
|  * @sc_blk_cnt:	Block count of a file
 | |
|  * @sc_datablk_cnt: Data block count of a file
 | |
|  * @sc_nblk_this_inc: Number of blocks included in the current logical segment
 | |
|  * @sc_seg_ctime: Creation time
 | |
|  * @sc_cno: checkpoint number of current log
 | |
|  * @sc_flags: Internal flags
 | |
|  * @sc_state_lock: spinlock for sc_state and so on
 | |
|  * @sc_state: Segctord state flags
 | |
|  * @sc_flush_request: inode bitmap of metadata files to be flushed
 | |
|  * @sc_wait_request: Client request queue
 | |
|  * @sc_wait_daemon: Daemon wait queue
 | |
|  * @sc_wait_task: Start/end wait queue to control segctord task
 | |
|  * @sc_seq_request: Request counter
 | |
|  * @sc_seq_accept: Accepted request count
 | |
|  * @sc_seq_done: Completion counter
 | |
|  * @sc_sync: Request of explicit sync operation
 | |
|  * @sc_interval: Timeout value of background construction
 | |
|  * @sc_mjcp_freq: Frequency of creating checkpoints
 | |
|  * @sc_lseg_stime: Start time of the latest logical segment
 | |
|  * @sc_watermark: Watermark for the number of dirty buffers
 | |
|  * @sc_timer: Timer for segctord
 | |
|  * @sc_task: current thread of segctord
 | |
|  */
 | |
| struct nilfs_sc_info {
 | |
| 	struct super_block     *sc_super;
 | |
| 	struct nilfs_root      *sc_root;
 | |
| 
 | |
| 	unsigned long		sc_nblk_inc;
 | |
| 
 | |
| 	struct list_head	sc_dirty_files;
 | |
| 	struct list_head	sc_gc_inodes;
 | |
| 	struct list_head	sc_iput_queue;
 | |
| 	struct work_struct	sc_iput_work;
 | |
| 
 | |
| 	__u64		       *sc_freesegs;
 | |
| 	size_t			sc_nfreesegs;
 | |
| 
 | |
| 	struct nilfs_inode_info *sc_dsync_inode;
 | |
| 	loff_t			sc_dsync_start;
 | |
| 	loff_t			sc_dsync_end;
 | |
| 
 | |
| 	/* Segment buffers */
 | |
| 	struct list_head	sc_segbufs;
 | |
| 	struct list_head	sc_write_logs;
 | |
| 	unsigned long		sc_segbuf_nblocks;
 | |
| 	struct nilfs_segment_buffer *sc_curseg;
 | |
| 
 | |
| 	struct nilfs_cstage	sc_stage;
 | |
| 
 | |
| 	struct nilfs_segsum_pointer sc_finfo_ptr;
 | |
| 	struct nilfs_segsum_pointer sc_binfo_ptr;
 | |
| 	unsigned long		sc_blk_cnt;
 | |
| 	unsigned long		sc_datablk_cnt;
 | |
| 	unsigned long		sc_nblk_this_inc;
 | |
| 	time_t			sc_seg_ctime;
 | |
| 	__u64			sc_cno;
 | |
| 	unsigned long		sc_flags;
 | |
| 
 | |
| 	spinlock_t		sc_state_lock;
 | |
| 	unsigned long		sc_state;
 | |
| 	unsigned long		sc_flush_request;
 | |
| 
 | |
| 	wait_queue_head_t	sc_wait_request;
 | |
| 	wait_queue_head_t	sc_wait_daemon;
 | |
| 	wait_queue_head_t	sc_wait_task;
 | |
| 
 | |
| 	__u32			sc_seq_request;
 | |
| 	__u32			sc_seq_accepted;
 | |
| 	__u32			sc_seq_done;
 | |
| 
 | |
| 	int			sc_sync;
 | |
| 	unsigned long		sc_interval;
 | |
| 	unsigned long		sc_mjcp_freq;
 | |
| 	unsigned long		sc_lseg_stime;	/* in 1/HZ seconds */
 | |
| 	unsigned long		sc_watermark;
 | |
| 
 | |
| 	struct timer_list	sc_timer;
 | |
| 	struct task_struct     *sc_task;
 | |
| };
 | |
| 
 | |
| /* sc_flags */
 | |
| enum {
 | |
| 	NILFS_SC_DIRTY,		/* One or more dirty meta-data blocks exist */
 | |
| 	NILFS_SC_UNCLOSED,	/* Logical segment is not closed */
 | |
| 	NILFS_SC_SUPER_ROOT,	/* The latest segment has a super root */
 | |
| 	NILFS_SC_PRIOR_FLUSH,	/* Requesting immediate flush without making a
 | |
| 				   checkpoint */
 | |
| 	NILFS_SC_HAVE_DELTA,	/* Next checkpoint will have update of files
 | |
| 				   other than DAT, cpfile, sufile, or files
 | |
| 				   moved by GC */
 | |
| };
 | |
| 
 | |
| /* sc_state */
 | |
| #define NILFS_SEGCTOR_QUIT	    0x0001  /* segctord is being destroyed */
 | |
| #define NILFS_SEGCTOR_COMMIT	    0x0004  /* committed transaction exists */
 | |
| 
 | |
| /*
 | |
|  * Constant parameters
 | |
|  */
 | |
| #define NILFS_SC_CLEANUP_RETRY	    3  /* Retry count of construction when
 | |
| 					  destroying segctord */
 | |
| 
 | |
| /*
 | |
|  * Default values of timeout, in seconds.
 | |
|  */
 | |
| #define NILFS_SC_DEFAULT_TIMEOUT    5   /* Timeout value of dirty blocks.
 | |
| 					   It triggers construction of a
 | |
| 					   logical segment with a super root */
 | |
| #define NILFS_SC_DEFAULT_SR_FREQ    30  /* Maximum frequency of super root
 | |
| 					   creation */
 | |
| 
 | |
| /*
 | |
|  * The default threshold amount of data, in block counts.
 | |
|  */
 | |
| #define NILFS_SC_DEFAULT_WATERMARK  3600
 | |
| 
 | |
| /* super.c */
 | |
| extern struct kmem_cache *nilfs_transaction_cachep;
 | |
| 
 | |
| /* segment.c */
 | |
| extern void nilfs_relax_pressure_in_lock(struct super_block *);
 | |
| 
 | |
| extern int nilfs_construct_segment(struct super_block *);
 | |
| extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *,
 | |
| 					 loff_t, loff_t);
 | |
| extern void nilfs_flush_segment(struct super_block *, ino_t);
 | |
| extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *,
 | |
| 				void **);
 | |
| 
 | |
| int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root);
 | |
| void nilfs_detach_log_writer(struct super_block *sb);
 | |
| 
 | |
| /* recovery.c */
 | |
| extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t,
 | |
| 				       struct buffer_head **, int);
 | |
| extern int nilfs_search_super_root(struct the_nilfs *,
 | |
| 				   struct nilfs_recovery_info *);
 | |
| int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, struct super_block *sb,
 | |
| 			      struct nilfs_recovery_info *ri);
 | |
| extern void nilfs_dispose_segment_list(struct list_head *);
 | |
| 
 | |
| #endif /* _NILFS_SEGMENT_H */
 |