Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "This pull is mostly cleanups and fixes:

   - The raid5/6 cleanups from Zhao Lei fixup some long standing warts
     in the code and add improvements on top of the scrubbing support
     from 3.19.

   - Josef has round one of our ENOSPC fixes coming from large btrfs
     clusters here at FB.

   - Dave Sterba continues a long series of cleanups (thanks Dave), and
     Filipe continues hammering on corner cases in fsync and others

  This all was held up a little trying to track down a use-after-free in
  btrfs raid5/6.  It's not clear yet if this is just made easier to
  trigger with this pull or if its a new bug from the raid5/6 cleanups.
  Dave Sterba is the only one to trigger it so far, but he has a
  consistent way to reproduce, so we'll get it nailed shortly"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (68 commits)
  Btrfs: don't remove extents and xattrs when logging new names
  Btrfs: fix fsync data loss after adding hard link to inode
  Btrfs: fix BUG_ON in btrfs_orphan_add() when delete unused block group
  Btrfs: account for large extents with enospc
  Btrfs: don't set and clear delalloc for O_DIRECT writes
  Btrfs: only adjust outstanding_extents when we do a short write
  btrfs: Fix out-of-space bug
  Btrfs: scrub, fix sleep in atomic context
  Btrfs: fix scheduler warning when syncing log
  Btrfs: Remove unnecessary placeholder in btrfs_err_code
  btrfs: cleanup init for list in free-space-cache
  btrfs: delete chunk allocation attemp when setting block group ro
  btrfs: clear bio reference after submit_one_bio()
  Btrfs: fix scrub race leading to use-after-free
  Btrfs: add missing cleanup on sysfs init failure
  Btrfs: fix race between transaction commit and empty block group removal
  btrfs: add more checks to btrfs_read_sys_array
  btrfs: cleanup, rename a few variables in btrfs_read_sys_array
  btrfs: add checks for sys_chunk_array sizes
  btrfs: more superblock checks, lower bounds on devices and sectorsize/nodesize
  ...
This commit is contained in:
Linus Torvalds 2015-02-19 14:36:00 -08:00
commit 2b9fb532d4
34 changed files with 1065 additions and 863 deletions

View file

@ -318,7 +318,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
memcpy(&found, result, csum_size);
read_extent_buffer(buf, &val, 0, csum_size);
printk_ratelimited(KERN_INFO
printk_ratelimited(KERN_WARNING
"BTRFS: %s checksum verify failed on %llu wanted %X found %X "
"level %d\n",
root->fs_info->sb->s_id, buf->start,
@ -367,7 +367,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
ret = 0;
goto out;
}
printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
printk_ratelimited(KERN_ERR
"BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n",
eb->fs_info->sb->s_id, eb->start,
parent_transid, btrfs_header_generation(eb));
ret = 1;
@ -633,21 +634,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start "
printk_ratelimited(KERN_ERR "BTRFS (device %s): bad tree block start "
"%llu %llu\n",
eb->fs_info->sb->s_id, found_start, eb->start);
ret = -EIO;
goto err;
}
if (check_tree_block_fsid(root, eb)) {
printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n",
printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
eb->fs_info->sb->s_id, eb->start);
ret = -EIO;
goto err;
}
found_level = btrfs_header_level(eb);
if (found_level >= BTRFS_MAX_LEVEL) {
btrfs_info(root->fs_info, "bad tree block level %d",
btrfs_err(root->fs_info, "bad tree block level %d",
(int)btrfs_header_level(eb));
ret = -EIO;
goto err;
@ -1073,12 +1074,12 @@ static const struct address_space_operations btree_aops = {
.set_page_dirty = btree_set_page_dirty,
};
void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
void readahead_tree_block(struct btrfs_root *root, u64 bytenr)
{
struct extent_buffer *buf = NULL;
struct inode *btree_inode = root->fs_info->btree_inode;
buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
return;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
@ -1086,7 +1087,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
free_extent_buffer(buf);
}
int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
int mirror_num, struct extent_buffer **eb)
{
struct extent_buffer *buf = NULL;
@ -1094,7 +1095,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
int ret;
buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
return 0;
@ -1125,12 +1126,11 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
}
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize)
u64 bytenr)
{
if (btrfs_test_is_dummy_root(root))
return alloc_test_extent_buffer(root->fs_info, bytenr,
blocksize);
return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
return alloc_test_extent_buffer(root->fs_info, bytenr);
return alloc_extent_buffer(root->fs_info, bytenr);
}
@ -1152,7 +1152,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
struct extent_buffer *buf = NULL;
int ret;
buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize);
buf = btrfs_find_create_tree_block(root, bytenr);
if (!buf)
return NULL;
@ -1275,12 +1275,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
memset(&root->root_kobj, 0, sizeof(root->root_kobj));
if (fs_info)
root->defrag_trans_start = fs_info->generation;
else
root->defrag_trans_start = 0;
init_completion(&root->kobj_unregister);
root->root_key.objectid = objectid;
root->anon_dev = 0;
@ -1630,6 +1628,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
bool check_ref)
{
struct btrfs_root *root;
struct btrfs_path *path;
struct btrfs_key key;
int ret;
if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
@ -1669,8 +1669,17 @@ again:
if (ret)
goto fail;
ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID,
location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL);
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto fail;
}
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = location->objectid;
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
btrfs_free_path(path);
if (ret < 0)
goto fail;
if (ret == 0)
@ -2232,6 +2241,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
mutex_init(&fs_info->unused_bg_unpin_mutex);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
@ -2496,7 +2506,7 @@ int open_ctree(struct super_block *sb,
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
printk(KERN_ERR "BTRFS: has skinny extents\n");
printk(KERN_INFO "BTRFS: has skinny extents\n");
/*
* flag our filesystem as having big metadata blocks if
@ -2520,7 +2530,7 @@ int open_ctree(struct super_block *sb,
*/
if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
(sectorsize != nodesize)) {
printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes "
printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes "
"are not allowed for mixed block groups on %s\n",
sb->s_id);
goto fail_alloc;
@ -2628,12 +2638,12 @@ int open_ctree(struct super_block *sb,
sb->s_blocksize_bits = blksize_bits(sectorsize);
if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id);
printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
goto fail_sb_buffer;
}
if (sectorsize != PAGE_SIZE) {
printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) "
printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
"found on %s\n", (unsigned long)sectorsize, sb->s_id);
goto fail_sb_buffer;
}
@ -2642,7 +2652,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_read_sys_array(tree_root);
mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
printk(KERN_WARNING "BTRFS: failed to read the system "
printk(KERN_ERR "BTRFS: failed to read the system "
"array on %s\n", sb->s_id);
goto fail_sb_buffer;
}
@ -2657,7 +2667,7 @@ int open_ctree(struct super_block *sb,
generation);
if (!chunk_root->node ||
!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n",
printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
sb->s_id);
goto fail_tree_roots;
}
@ -2669,7 +2679,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_read_chunk_tree(chunk_root);
if (ret) {
printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n",
printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n",
sb->s_id);
goto fail_tree_roots;
}
@ -2681,7 +2691,7 @@ int open_ctree(struct super_block *sb,
btrfs_close_extra_devices(fs_info, fs_devices, 0);
if (!fs_devices->latest_bdev) {
printk(KERN_CRIT "BTRFS: failed to read devices on %s\n",
printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
sb->s_id);
goto fail_tree_roots;
}
@ -2765,7 +2775,7 @@ retry_root_backup:
ret = btrfs_recover_balance(fs_info);
if (ret) {
printk(KERN_WARNING "BTRFS: failed to recover balance\n");
printk(KERN_ERR "BTRFS: failed to recover balance\n");
goto fail_block_groups;
}
@ -3860,6 +3870,21 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
btrfs_super_log_root(sb));
/*
* Check the lower bound, the alignment and other constraints are
* checked later.
*/
if (btrfs_super_nodesize(sb) < 4096) {
printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
btrfs_super_nodesize(sb));
ret = -EINVAL;
}
if (btrfs_super_sectorsize(sb) < 4096) {
printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
btrfs_super_sectorsize(sb));
ret = -EINVAL;
}
if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
fs_info->fsid, sb->dev_item.fsid);
@ -3873,6 +3898,10 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
if (btrfs_super_num_devices(sb) > (1UL << 31))
printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
btrfs_super_num_devices(sb));
if (btrfs_super_num_devices(sb) == 0) {
printk(KERN_ERR "BTRFS: number of devices is 0\n");
ret = -EINVAL;
}
if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
@ -3880,6 +3909,25 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
/*
* Obvious sys_chunk_array corruptions, it must hold at least one key
* and one chunk
*/
if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
printk(KERN_ERR "BTRFS: system chunk array too big %u > %u\n",
btrfs_super_sys_array_size(sb),
BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
ret = -EINVAL;
}
if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
+ sizeof(struct btrfs_chunk)) {
printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n",
btrfs_super_sys_array_size(sb),
sizeof(struct btrfs_disk_key)
+ sizeof(struct btrfs_chunk));
ret = -EINVAL;
}
/*
* The generation is a global counter, we'll trust it more than the others
* but it's still possible that it's the one that's wrong.