ext4: restructure writeback path
There are two issues with current writeback path in ext4. For one we don't necessarily map complete pages when blocksize < pagesize and thus needn't do any writeback in one iteration. We always map some blocks though so we will eventually finish mapping the page. Just if writeback races with other operations on the file, forward progress is not really guaranteed. The second problem is that current code structure makes it hard to associate all the bios to some range of pages with one io_end structure so that unwritten extents can be converted after all the bios are finished. This will be especially difficult later when io_end will be associated with reserved transaction handle. We restructure the writeback path to a relatively simple loop which first prepares extent of pages, then maps one or more extents so that no page is partially mapped, and once page is fully mapped it is submitted for IO. We keep all the mapping and IO submission information in mpage_da_data structure to somewhat reduce stack usage. Resulting code is somewhat shorter than the old one and hopefully also easier to read. Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
		
					parent
					
						
							
								fffb273997
							
						
					
				
			
			
				commit
				
					
						4e7ea81db5
					
				
			
		
					 4 changed files with 526 additions and 566 deletions
				
			
		|  | @ -176,21 +176,6 @@ struct ext4_map_blocks { | |||
| 	unsigned int m_flags; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * For delayed allocation tracking | ||||
|  */ | ||||
| struct mpage_da_data { | ||||
| 	struct inode *inode; | ||||
| 	sector_t b_blocknr;		/* start block number of extent */ | ||||
| 	size_t b_size;			/* size of extent */ | ||||
| 	unsigned long b_state;		/* state of the extent */ | ||||
| 	unsigned long first_page, next_page;	/* extent of pages */ | ||||
| 	struct writeback_control *wbc; | ||||
| 	int io_done; | ||||
| 	int pages_written; | ||||
| 	int retval; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Flags for ext4_io_end->flags | ||||
|  */ | ||||
|  |  | |||
							
								
								
									
										1009
									
								
								fs/ext4/inode.c
									
										
									
									
									
								
							
							
						
						
									
										1009
									
								
								fs/ext4/inode.c
									
										
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -360,9 +360,6 @@ static int io_submit_init_bio(struct ext4_io_submit *io, | |||
| 	bio->bi_bdev = bh->b_bdev; | ||||
| 	bio->bi_end_io = ext4_end_bio; | ||||
| 	bio->bi_private = ext4_get_io_end(io->io_end); | ||||
| 	if (!io->io_end->size) | ||||
| 		io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT) | ||||
| 				     + bh_offset(bh); | ||||
| 	io->io_bio = bio; | ||||
| 	io->io_next_block = bh->b_blocknr; | ||||
| 	return 0; | ||||
|  | @ -390,7 +387,6 @@ submit_and_retry: | |||
| 	io_end = io->io_end; | ||||
| 	if (test_clear_buffer_uninit(bh)) | ||||
| 		ext4_set_io_unwritten_flag(inode, io_end); | ||||
| 	io_end->size += bh->b_size; | ||||
| 	io->io_next_block++; | ||||
| 	return 0; | ||||
| } | ||||
|  |  | |||
|  | @ -324,43 +324,59 @@ TRACE_EVENT(ext4_da_writepages, | |||
| ); | ||||
| 
 | ||||
| TRACE_EVENT(ext4_da_write_pages, | ||||
| 	TP_PROTO(struct inode *inode, struct mpage_da_data *mpd), | ||||
| 	TP_PROTO(struct inode *inode, pgoff_t first_page, | ||||
| 		 struct writeback_control *wbc), | ||||
| 
 | ||||
| 	TP_ARGS(inode, mpd), | ||||
| 	TP_ARGS(inode, first_page, wbc), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(	dev_t,	dev			) | ||||
| 		__field(	ino_t,	ino			) | ||||
| 		__field(	__u64,	b_blocknr		) | ||||
| 		__field(	__u32,	b_size			) | ||||
| 		__field(	__u32,	b_state			) | ||||
| 		__field(	unsigned long,	first_page	) | ||||
| 		__field(	int,	io_done			) | ||||
| 		__field(	int,	pages_written		) | ||||
| 		__field(	int,	sync_mode		) | ||||
| 		__field(      pgoff_t,	first_page		) | ||||
| 		__field(	 long,	nr_to_write		) | ||||
| 		__field(	  int,	sync_mode		) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->dev		= inode->i_sb->s_dev; | ||||
| 		__entry->ino		= inode->i_ino; | ||||
| 		__entry->b_blocknr	= mpd->b_blocknr; | ||||
| 		__entry->b_size		= mpd->b_size; | ||||
| 		__entry->b_state	= mpd->b_state; | ||||
| 		__entry->first_page	= mpd->first_page; | ||||
| 		__entry->io_done	= mpd->io_done; | ||||
| 		__entry->pages_written	= mpd->pages_written; | ||||
| 		__entry->sync_mode	= mpd->wbc->sync_mode; | ||||
| 		__entry->first_page	= first_page; | ||||
| 		__entry->nr_to_write	= wbc->nr_to_write; | ||||
| 		__entry->sync_mode	= wbc->sync_mode; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x " | ||||
| 		  "first_page %lu io_done %d pages_written %d sync_mode %d", | ||||
| 	TP_printk("dev %d,%d ino %lu first_page %lu nr_to_write %ld " | ||||
| 		  "sync_mode %d", | ||||
| 		  MAJOR(__entry->dev), MINOR(__entry->dev), | ||||
| 		  (unsigned long) __entry->ino, | ||||
| 		  __entry->b_blocknr, __entry->b_size, | ||||
| 		  __entry->b_state, __entry->first_page, | ||||
| 		  __entry->io_done, __entry->pages_written, | ||||
| 		  __entry->sync_mode | ||||
|                   ) | ||||
| 		  (unsigned long) __entry->ino, __entry->first_page, | ||||
| 		  __entry->nr_to_write, __entry->sync_mode) | ||||
| ); | ||||
| 
 | ||||
| TRACE_EVENT(ext4_da_write_pages_extent, | ||||
| 	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map), | ||||
| 
 | ||||
| 	TP_ARGS(inode, map), | ||||
| 
 | ||||
| 	TP_STRUCT__entry( | ||||
| 		__field(	dev_t,	dev			) | ||||
| 		__field(	ino_t,	ino			) | ||||
| 		__field(	__u64,	lblk			) | ||||
| 		__field(	__u32,	len			) | ||||
| 		__field(	__u32,	flags			) | ||||
| 	), | ||||
| 
 | ||||
| 	TP_fast_assign( | ||||
| 		__entry->dev		= inode->i_sb->s_dev; | ||||
| 		__entry->ino		= inode->i_ino; | ||||
| 		__entry->lblk		= map->m_lblk; | ||||
| 		__entry->len		= map->m_len; | ||||
| 		__entry->flags		= map->m_flags; | ||||
| 	), | ||||
| 
 | ||||
| 	TP_printk("dev %d,%d ino %lu lblk %llu len %u flags 0x%04x", | ||||
| 		  MAJOR(__entry->dev), MINOR(__entry->dev), | ||||
| 		  (unsigned long) __entry->ino, __entry->lblk, __entry->len, | ||||
| 		  __entry->flags) | ||||
| ); | ||||
| 
 | ||||
| TRACE_EVENT(ext4_da_writepages_result, | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Jan Kara
				Jan Kara