This patch introduces a very limited functionality for atomic write support. In order to support atomic write, this patch adds two ioctls: o F2FS_IOC_START_ATOMIC_WRITE o F2FS_IOC_COMMIT_ATOMIC_WRITE The database engine should be aware of the following sequence. 1. open -> ioctl(F2FS_IOC_START_ATOMIC_WRITE); 2. writes : all the written data will be treated as atomic pages. 3. commit -> ioctl(F2FS_IOC_COMMIT_ATOMIC_WRITE); : this flushes all the data blocks to the disk, which will be shown all or nothing by f2fs recovery procedure. 4. repeat to #2. The IO pattens should be: ,- START_ATOMIC_WRITE ,- COMMIT_ATOMIC_WRITE CP | D D D D D D | FSYNC | D D D D | FSYNC ... `- COMMIT_ATOMIC_WRITE Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
		
			
				
	
	
		
			259 lines
		
	
	
	
		
			6.2 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			259 lines
		
	
	
	
		
			6.2 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * fs/f2fs/inline.c
 | 
						|
 * Copyright (c) 2013, Intel Corporation
 | 
						|
 * Authors: Huajun Li <huajun.li@intel.com>
 | 
						|
 *          Haicheng Li <haicheng.li@intel.com>
 | 
						|
 * This program is free software; you can redistribute it and/or modify
 | 
						|
 * it under the terms of the GNU General Public License version 2 as
 | 
						|
 * published by the Free Software Foundation.
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/fs.h>
 | 
						|
#include <linux/f2fs_fs.h>
 | 
						|
 | 
						|
#include "f2fs.h"
 | 
						|
 | 
						|
bool f2fs_may_inline(struct inode *inode)
 | 
						|
{
 | 
						|
	block_t nr_blocks;
 | 
						|
	loff_t i_size;
 | 
						|
 | 
						|
	if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
 | 
						|
		return false;
 | 
						|
 | 
						|
	if (f2fs_is_atomic_file(inode))
 | 
						|
		return false;
 | 
						|
 | 
						|
	nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
 | 
						|
	if (inode->i_blocks > nr_blocks)
 | 
						|
		return false;
 | 
						|
 | 
						|
	i_size = i_size_read(inode);
 | 
						|
	if (i_size > MAX_INLINE_DATA)
 | 
						|
		return false;
 | 
						|
 | 
						|
	return true;
 | 
						|
}
 | 
						|
 | 
						|
int f2fs_read_inline_data(struct inode *inode, struct page *page)
 | 
						|
{
 | 
						|
	struct page *ipage;
 | 
						|
	void *src_addr, *dst_addr;
 | 
						|
 | 
						|
	if (page->index) {
 | 
						|
		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
 | 
						|
		goto out;
 | 
						|
	}
 | 
						|
 | 
						|
	ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
 | 
						|
	if (IS_ERR(ipage)) {
 | 
						|
		unlock_page(page);
 | 
						|
		return PTR_ERR(ipage);
 | 
						|
	}
 | 
						|
 | 
						|
	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
 | 
						|
 | 
						|
	/* Copy the whole inline data block */
 | 
						|
	src_addr = inline_data_addr(ipage);
 | 
						|
	dst_addr = kmap(page);
 | 
						|
	memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
 | 
						|
	kunmap(page);
 | 
						|
	f2fs_put_page(ipage, 1);
 | 
						|
 | 
						|
out:
 | 
						|
	SetPageUptodate(page);
 | 
						|
	unlock_page(page);
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
 | 
						|
{
 | 
						|
	int err = 0;
 | 
						|
	struct page *ipage;
 | 
						|
	struct dnode_of_data dn;
 | 
						|
	void *src_addr, *dst_addr;
 | 
						|
	block_t new_blk_addr;
 | 
						|
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 | 
						|
	struct f2fs_io_info fio = {
 | 
						|
		.type = DATA,
 | 
						|
		.rw = WRITE_SYNC | REQ_PRIO,
 | 
						|
	};
 | 
						|
 | 
						|
	f2fs_lock_op(sbi);
 | 
						|
	ipage = get_node_page(sbi, inode->i_ino);
 | 
						|
	if (IS_ERR(ipage)) {
 | 
						|
		err = PTR_ERR(ipage);
 | 
						|
		goto out;
 | 
						|
	}
 | 
						|
 | 
						|
	/* someone else converted inline_data already */
 | 
						|
	if (!f2fs_has_inline_data(inode))
 | 
						|
		goto out;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * i_addr[0] is not used for inline data,
 | 
						|
	 * so reserving new block will not destroy inline data
 | 
						|
	 */
 | 
						|
	set_new_dnode(&dn, inode, ipage, NULL, 0);
 | 
						|
	err = f2fs_reserve_block(&dn, 0);
 | 
						|
	if (err)
 | 
						|
		goto out;
 | 
						|
 | 
						|
	f2fs_wait_on_page_writeback(page, DATA);
 | 
						|
	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
 | 
						|
 | 
						|
	/* Copy the whole inline data block */
 | 
						|
	src_addr = inline_data_addr(ipage);
 | 
						|
	dst_addr = kmap(page);
 | 
						|
	memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
 | 
						|
	kunmap(page);
 | 
						|
	SetPageUptodate(page);
 | 
						|
 | 
						|
	/* write data page to try to make data consistent */
 | 
						|
	set_page_writeback(page);
 | 
						|
	write_data_page(page, &dn, &new_blk_addr, &fio);
 | 
						|
	update_extent_cache(new_blk_addr, &dn);
 | 
						|
	f2fs_wait_on_page_writeback(page, DATA);
 | 
						|
 | 
						|
	/* clear inline data and flag after data writeback */
 | 
						|
	zero_user_segment(ipage, INLINE_DATA_OFFSET,
 | 
						|
				 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
 | 
						|
	clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
 | 
						|
	stat_dec_inline_inode(inode);
 | 
						|
 | 
						|
	sync_inode_page(&dn);
 | 
						|
	f2fs_put_dnode(&dn);
 | 
						|
out:
 | 
						|
	f2fs_unlock_op(sbi);
 | 
						|
	return err;
 | 
						|
}
 | 
						|
 | 
						|
int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size,
 | 
						|
						struct page *page)
 | 
						|
{
 | 
						|
	struct page *new_page = page;
 | 
						|
	int err;
 | 
						|
 | 
						|
	if (!f2fs_has_inline_data(inode))
 | 
						|
		return 0;
 | 
						|
	else if (to_size <= MAX_INLINE_DATA)
 | 
						|
		return 0;
 | 
						|
 | 
						|
	if (!page || page->index != 0) {
 | 
						|
		new_page = grab_cache_page(inode->i_mapping, 0);
 | 
						|
		if (!new_page)
 | 
						|
			return -ENOMEM;
 | 
						|
	}
 | 
						|
 | 
						|
	err = __f2fs_convert_inline_data(inode, new_page);
 | 
						|
	if (!page || page->index != 0)
 | 
						|
		f2fs_put_page(new_page, 1);
 | 
						|
	return err;
 | 
						|
}
 | 
						|
 | 
						|
int f2fs_write_inline_data(struct inode *inode,
 | 
						|
				struct page *page, unsigned size)
 | 
						|
{
 | 
						|
	void *src_addr, *dst_addr;
 | 
						|
	struct page *ipage;
 | 
						|
	struct dnode_of_data dn;
 | 
						|
	int err;
 | 
						|
 | 
						|
	set_new_dnode(&dn, inode, NULL, NULL, 0);
 | 
						|
	err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
 | 
						|
	if (err)
 | 
						|
		return err;
 | 
						|
	ipage = dn.inode_page;
 | 
						|
 | 
						|
	f2fs_wait_on_page_writeback(ipage, NODE);
 | 
						|
	zero_user_segment(ipage, INLINE_DATA_OFFSET,
 | 
						|
				 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
 | 
						|
	src_addr = kmap(page);
 | 
						|
	dst_addr = inline_data_addr(ipage);
 | 
						|
	memcpy(dst_addr, src_addr, size);
 | 
						|
	kunmap(page);
 | 
						|
 | 
						|
	/* Release the first data block if it is allocated */
 | 
						|
	if (!f2fs_has_inline_data(inode)) {
 | 
						|
		truncate_data_blocks_range(&dn, 1);
 | 
						|
		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
 | 
						|
		stat_inc_inline_inode(inode);
 | 
						|
	}
 | 
						|
 | 
						|
	set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
 | 
						|
	sync_inode_page(&dn);
 | 
						|
	f2fs_put_dnode(&dn);
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
void truncate_inline_data(struct inode *inode, u64 from)
 | 
						|
{
 | 
						|
	struct page *ipage;
 | 
						|
 | 
						|
	if (from >= MAX_INLINE_DATA)
 | 
						|
		return;
 | 
						|
 | 
						|
	ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
 | 
						|
	if (IS_ERR(ipage))
 | 
						|
		return;
 | 
						|
 | 
						|
	f2fs_wait_on_page_writeback(ipage, NODE);
 | 
						|
 | 
						|
	zero_user_segment(ipage, INLINE_DATA_OFFSET + from,
 | 
						|
				INLINE_DATA_OFFSET + MAX_INLINE_DATA);
 | 
						|
	set_page_dirty(ipage);
 | 
						|
	f2fs_put_page(ipage, 1);
 | 
						|
}
 | 
						|
 | 
						|
bool recover_inline_data(struct inode *inode, struct page *npage)
 | 
						|
{
 | 
						|
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 | 
						|
	struct f2fs_inode *ri = NULL;
 | 
						|
	void *src_addr, *dst_addr;
 | 
						|
	struct page *ipage;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * The inline_data recovery policy is as follows.
 | 
						|
	 * [prev.] [next] of inline_data flag
 | 
						|
	 *    o       o  -> recover inline_data
 | 
						|
	 *    o       x  -> remove inline_data, and then recover data blocks
 | 
						|
	 *    x       o  -> remove inline_data, and then recover inline_data
 | 
						|
	 *    x       x  -> recover data blocks
 | 
						|
	 */
 | 
						|
	if (IS_INODE(npage))
 | 
						|
		ri = F2FS_INODE(npage);
 | 
						|
 | 
						|
	if (f2fs_has_inline_data(inode) &&
 | 
						|
			ri && (ri->i_inline & F2FS_INLINE_DATA)) {
 | 
						|
process_inline:
 | 
						|
		ipage = get_node_page(sbi, inode->i_ino);
 | 
						|
		f2fs_bug_on(sbi, IS_ERR(ipage));
 | 
						|
 | 
						|
		f2fs_wait_on_page_writeback(ipage, NODE);
 | 
						|
 | 
						|
		src_addr = inline_data_addr(npage);
 | 
						|
		dst_addr = inline_data_addr(ipage);
 | 
						|
		memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
 | 
						|
		update_inode(inode, ipage);
 | 
						|
		f2fs_put_page(ipage, 1);
 | 
						|
		return true;
 | 
						|
	}
 | 
						|
 | 
						|
	if (f2fs_has_inline_data(inode)) {
 | 
						|
		ipage = get_node_page(sbi, inode->i_ino);
 | 
						|
		f2fs_bug_on(sbi, IS_ERR(ipage));
 | 
						|
		f2fs_wait_on_page_writeback(ipage, NODE);
 | 
						|
		zero_user_segment(ipage, INLINE_DATA_OFFSET,
 | 
						|
				 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
 | 
						|
		clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
 | 
						|
		update_inode(inode, ipage);
 | 
						|
		f2fs_put_page(ipage, 1);
 | 
						|
	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
 | 
						|
		truncate_blocks(inode, 0, false);
 | 
						|
		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
 | 
						|
		goto process_inline;
 | 
						|
	}
 | 
						|
	return false;
 | 
						|
}
 |