Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (61 commits) ext4: Documention update for new ordered mode and delayed allocation ext4: do not set extents feature from the kernel ext4: Don't allow nonextenst mount option for large filesystem ext4: Enable delalloc by default. ext4: delayed allocation i_blocks fix for stat ext4: fix delalloc i_disksize early update issue ext4: Handle page without buffers in ext4_*_writepage() ext4: Add ordered mode support for delalloc ext4: Invert lock ordering of page_lock and transaction start in delalloc mm: Add range_cont mode for writeback ext4: delayed allocation ENOSPC handling percpu_counter: new function percpu_counter_sum_and_set ext4: Add delayed allocation support in data=writeback mode vfs: add hooks for ext4's delayed allocation support jbd2: Remove data=ordered mode support using jbd buffer heads ext4: Use new framework for data=ordered mode in JBD2 jbd2: Implement data=ordered mode handling via inodes vfs: export filemap_fdatawrite_range() ext4: Fix lock inversion in ext4_ext_truncate() ext4: Invert the locking order of page_lock and transaction start ...
This commit is contained in:
commit
8d2567a620
35 changed files with 2820 additions and 1057 deletions
|
@ -13,72 +13,93 @@ Mailing list: linux-ext4@vger.kernel.org
|
||||||
1. Quick usage instructions:
|
1. Quick usage instructions:
|
||||||
===========================
|
===========================
|
||||||
|
|
||||||
- Grab updated e2fsprogs from
|
- Compile and install the latest version of e2fsprogs (as of this
|
||||||
ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs-interim/
|
writing version 1.41) from:
|
||||||
This is a patchset on top of e2fsprogs-1.39, which can be found at
|
|
||||||
|
http://sourceforge.net/project/showfiles.php?group_id=2406
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/
|
ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/
|
||||||
|
|
||||||
- It's still mke2fs -j /dev/hda1
|
or grab the latest git repository from:
|
||||||
|
|
||||||
- mount /dev/hda1 /wherever -t ext4dev
|
git://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git
|
||||||
|
|
||||||
- To enable extents,
|
- Create a new filesystem using the ext4dev filesystem type:
|
||||||
|
|
||||||
mount /dev/hda1 /wherever -t ext4dev -o extents
|
# mke2fs -t ext4dev /dev/hda1
|
||||||
|
|
||||||
- The filesystem is compatible with the ext3 driver until you add a file
|
Or configure an existing ext3 filesystem to support extents and set
|
||||||
which has extents (ie: `mount -o extents', then create a file).
|
the test_fs flag to indicate that it's ok for an in-development
|
||||||
|
filesystem to touch this filesystem:
|
||||||
|
|
||||||
NOTE: The "extents" mount flag is temporary. It will soon go away and
|
# tune2fs -O extents -E test_fs /dev/hda1
|
||||||
extents will be enabled by the "-o extents" flag to mke2fs or tune2fs
|
|
||||||
|
If the filesystem was created with 128 byte inodes, it can be
|
||||||
|
converted to use 256 byte for greater efficiency via:
|
||||||
|
|
||||||
|
# tune2fs -I 256 /dev/hda1
|
||||||
|
|
||||||
|
(Note: we currently do not have tools to convert an ext4dev
|
||||||
|
filesystem back to ext3; so please do not do try this on production
|
||||||
|
filesystems.)
|
||||||
|
|
||||||
|
- Mounting:
|
||||||
|
|
||||||
|
# mount -t ext4dev /dev/hda1 /wherever
|
||||||
|
|
||||||
- When comparing performance with other filesystems, remember that
|
- When comparing performance with other filesystems, remember that
|
||||||
ext3/4 by default offers higher data integrity guarantees than most. So
|
ext3/4 by default offers higher data integrity guarantees than most.
|
||||||
when comparing with a metadata-only journalling filesystem, use `mount -o
|
So when comparing with a metadata-only journalling filesystem, such
|
||||||
data=writeback'. And you might as well use `mount -o nobh' too along
|
as ext3, use `mount -o data=writeback'. And you might as well use
|
||||||
with it. Making the journal larger than the mke2fs default often helps
|
`mount -o nobh' too along with it. Making the journal larger than
|
||||||
performance with metadata-intensive workloads.
|
the mke2fs default often helps performance with metadata-intensive
|
||||||
|
workloads.
|
||||||
|
|
||||||
2. Features
|
2. Features
|
||||||
===========
|
===========
|
||||||
|
|
||||||
2.1 Currently available
|
2.1 Currently available
|
||||||
|
|
||||||
* ability to use filesystems > 16TB
|
* ability to use filesystems > 16TB (e2fsprogs support not available yet)
|
||||||
* extent format reduces metadata overhead (RAM, IO for access, transactions)
|
* extent format reduces metadata overhead (RAM, IO for access, transactions)
|
||||||
* extent format more robust in face of on-disk corruption due to magics,
|
* extent format more robust in face of on-disk corruption due to magics,
|
||||||
* internal redunancy in tree
|
* internal redunancy in tree
|
||||||
|
* improved file allocation (multi-block alloc)
|
||||||
2.1 Previously available, soon to be enabled by default by "mkefs.ext4":
|
* fix 32000 subdirectory limit
|
||||||
|
* nsec timestamps for mtime, atime, ctime, create time
|
||||||
* dir_index and resize inode will be on by default
|
* inode version field on disk (NFSv4, Lustre)
|
||||||
* large inodes will be used by default for fast EAs, nsec timestamps, etc
|
* reduced e2fsck time via uninit_bg feature
|
||||||
|
* journal checksumming for robustness, performance
|
||||||
|
* persistent file preallocation (e.g for streaming media, databases)
|
||||||
|
* ability to pack bitmaps and inode tables into larger virtual groups via the
|
||||||
|
flex_bg feature
|
||||||
|
* large file support
|
||||||
|
* Inode allocation using large virtual block groups via flex_bg
|
||||||
|
* delayed allocation
|
||||||
|
* large block (up to pagesize) support
|
||||||
|
* efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force
|
||||||
|
the ordering)
|
||||||
|
|
||||||
2.2 Candidate features for future inclusion
|
2.2 Candidate features for future inclusion
|
||||||
|
|
||||||
There are several under discussion, whether they all make it in is
|
* Online defrag (patches available but not well tested)
|
||||||
partly a function of how much time everyone has to work on them:
|
* reduced mke2fs time via lazy itable initialization in conjuction with
|
||||||
|
the uninit_bg feature (capability to do this is available in e2fsprogs
|
||||||
|
but a kernel thread to do lazy zeroing of unused inode table blocks
|
||||||
|
after filesystem is first mounted is required for safety)
|
||||||
|
|
||||||
* improved file allocation (multi-block alloc, delayed alloc; basically done)
|
There are several others under discussion, whether they all make it in is
|
||||||
* fix 32000 subdirectory limit (patch exists, needs some e2fsck work)
|
partly a function of how much time everyone has to work on them. Features like
|
||||||
* nsec timestamps for mtime, atime, ctime, create time (patch exists,
|
metadata checksumming have been discussed and planned for a bit but no patches
|
||||||
needs some e2fsck work)
|
exist yet so I'm not sure they're in the near-term roadmap.
|
||||||
* inode version field on disk (NFSv4, Lustre; prototype exists)
|
|
||||||
* reduced mke2fs/e2fsck time via uninitialized groups (prototype exists)
|
|
||||||
* journal checksumming for robustness, performance (prototype exists)
|
|
||||||
* persistent file preallocation (e.g for streaming media, databases)
|
|
||||||
|
|
||||||
Features like metadata checksumming have been discussed and planned for
|
The big performance win will come with mballoc, delalloc and flex_bg
|
||||||
a bit but no patches exist yet so I'm not sure they're in the near-term
|
grouping of bitmaps and inode tables. Some test results available here:
|
||||||
roadmap.
|
|
||||||
|
|
||||||
The big performance win will come with mballoc and delalloc. CFS has
|
- http://www.bullopensource.org/ext4/20080530/ffsb-write-2.6.26-rc2.html
|
||||||
been using mballoc for a few years already with Lustre, and IBM + Bull
|
- http://www.bullopensource.org/ext4/20080530/ffsb-readwrite-2.6.26-rc2.html
|
||||||
did a lot of benchmarking on it. The reason it isn't in the first set of
|
|
||||||
patches is partly a manageability issue, and partly because it doesn't
|
|
||||||
directly affect the on-disk format (outside of much better allocation)
|
|
||||||
so it isn't critical to get into the first round of changes. I believe
|
|
||||||
Alex is working on a new set of patches right now.
|
|
||||||
|
|
||||||
3. Options
|
3. Options
|
||||||
==========
|
==========
|
||||||
|
@ -222,9 +243,11 @@ stripe=n Number of filesystem blocks that mballoc will try
|
||||||
to use for allocation size and alignment. For RAID5/6
|
to use for allocation size and alignment. For RAID5/6
|
||||||
systems this should be the number of data
|
systems this should be the number of data
|
||||||
disks * RAID chunk size in file system blocks.
|
disks * RAID chunk size in file system blocks.
|
||||||
|
delalloc (*) Deferring block allocation until write-out time.
|
||||||
|
nodelalloc Disable delayed allocation. Blocks are allocation
|
||||||
|
when data is copied from user to page cache.
|
||||||
Data Mode
|
Data Mode
|
||||||
---------
|
=========
|
||||||
There are 3 different data modes:
|
There are 3 different data modes:
|
||||||
|
|
||||||
* writeback mode
|
* writeback mode
|
||||||
|
@ -236,10 +259,10 @@ typically provide the best ext4 performance.
|
||||||
|
|
||||||
* ordered mode
|
* ordered mode
|
||||||
In data=ordered mode, ext4 only officially journals metadata, but it logically
|
In data=ordered mode, ext4 only officially journals metadata, but it logically
|
||||||
groups metadata and data blocks into a single unit called a transaction. When
|
groups metadata information related to data changes with the data blocks into a
|
||||||
it's time to write the new metadata out to disk, the associated data blocks
|
single unit called a transaction. When it's time to write the new metadata
|
||||||
are written first. In general, this mode performs slightly slower than
|
out to disk, the associated data blocks are written first. In general,
|
||||||
writeback but significantly faster than journal mode.
|
this mode performs slightly slower than writeback but significantly faster than journal mode.
|
||||||
|
|
||||||
* journal mode
|
* journal mode
|
||||||
data=journal mode provides full data and metadata journaling. All new data is
|
data=journal mode provides full data and metadata journaling. All new data is
|
||||||
|
@ -247,7 +270,8 @@ written to the journal first, and then to its final location.
|
||||||
In the event of a crash, the journal can be replayed, bringing both data and
|
In the event of a crash, the journal can be replayed, bringing both data and
|
||||||
metadata into a consistent state. This mode is the slowest except when data
|
metadata into a consistent state. This mode is the slowest except when data
|
||||||
needs to be read from and written to disk at the same time where it
|
needs to be read from and written to disk at the same time where it
|
||||||
outperforms all others modes.
|
outperforms all others modes. Curently ext4 does not have delayed
|
||||||
|
allocation support if this data journalling mode is selected.
|
||||||
|
|
||||||
References
|
References
|
||||||
==========
|
==========
|
||||||
|
@ -256,7 +280,8 @@ kernel source: <file:fs/ext4/>
|
||||||
<file:fs/jbd2/>
|
<file:fs/jbd2/>
|
||||||
|
|
||||||
programs: http://e2fsprogs.sourceforge.net/
|
programs: http://e2fsprogs.sourceforge.net/
|
||||||
http://ext2resize.sourceforge.net
|
|
||||||
|
|
||||||
useful links: http://fedoraproject.org/wiki/ext3-devel
|
useful links: http://fedoraproject.org/wiki/ext3-devel
|
||||||
http://www.bullopensource.org/ext4/
|
http://www.bullopensource.org/ext4/
|
||||||
|
http://ext4.wiki.kernel.org/index.php/Main_Page
|
||||||
|
http://fedoraproject.org/wiki/Features/Ext4
|
||||||
|
|
19
fs/buffer.c
19
fs/buffer.c
|
@ -1691,11 +1691,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
|
||||||
*/
|
*/
|
||||||
clear_buffer_dirty(bh);
|
clear_buffer_dirty(bh);
|
||||||
set_buffer_uptodate(bh);
|
set_buffer_uptodate(bh);
|
||||||
} else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
|
} else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
|
||||||
|
buffer_dirty(bh)) {
|
||||||
WARN_ON(bh->b_size != blocksize);
|
WARN_ON(bh->b_size != blocksize);
|
||||||
err = get_block(inode, block, bh, 1);
|
err = get_block(inode, block, bh, 1);
|
||||||
if (err)
|
if (err)
|
||||||
goto recover;
|
goto recover;
|
||||||
|
clear_buffer_delay(bh);
|
||||||
if (buffer_new(bh)) {
|
if (buffer_new(bh)) {
|
||||||
/* blockdev mappings never come here */
|
/* blockdev mappings never come here */
|
||||||
clear_buffer_new(bh);
|
clear_buffer_new(bh);
|
||||||
|
@ -1774,7 +1776,8 @@ recover:
|
||||||
bh = head;
|
bh = head;
|
||||||
/* Recovery: lock and submit the mapped buffers */
|
/* Recovery: lock and submit the mapped buffers */
|
||||||
do {
|
do {
|
||||||
if (buffer_mapped(bh) && buffer_dirty(bh)) {
|
if (buffer_mapped(bh) && buffer_dirty(bh) &&
|
||||||
|
!buffer_delay(bh)) {
|
||||||
lock_buffer(bh);
|
lock_buffer(bh);
|
||||||
mark_buffer_async_write(bh);
|
mark_buffer_async_write(bh);
|
||||||
} else {
|
} else {
|
||||||
|
@ -2061,6 +2064,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
|
||||||
struct page *page, void *fsdata)
|
struct page *page, void *fsdata)
|
||||||
{
|
{
|
||||||
struct inode *inode = mapping->host;
|
struct inode *inode = mapping->host;
|
||||||
|
int i_size_changed = 0;
|
||||||
|
|
||||||
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
|
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
|
||||||
|
|
||||||
|
@ -2073,12 +2077,21 @@ int generic_write_end(struct file *file, struct address_space *mapping,
|
||||||
*/
|
*/
|
||||||
if (pos+copied > inode->i_size) {
|
if (pos+copied > inode->i_size) {
|
||||||
i_size_write(inode, pos+copied);
|
i_size_write(inode, pos+copied);
|
||||||
mark_inode_dirty(inode);
|
i_size_changed = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
page_cache_release(page);
|
page_cache_release(page);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Don't mark the inode dirty under page lock. First, it unnecessarily
|
||||||
|
* makes the holding time of page lock longer. Second, it forces lock
|
||||||
|
* ordering of page lock and transaction start for journaling
|
||||||
|
* filesystems.
|
||||||
|
*/
|
||||||
|
if (i_size_changed)
|
||||||
|
mark_inode_dirty(inode);
|
||||||
|
|
||||||
return copied;
|
return copied;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(generic_write_end);
|
EXPORT_SYMBOL(generic_write_end);
|
||||||
|
|
211
fs/ext4/balloc.c
211
fs/ext4/balloc.c
|
@ -47,7 +47,7 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
|
||||||
ext4_group_t block_group)
|
ext4_group_t block_group)
|
||||||
{
|
{
|
||||||
ext4_group_t actual_group;
|
ext4_group_t actual_group;
|
||||||
ext4_get_group_no_and_offset(sb, block, &actual_group, 0);
|
ext4_get_group_no_and_offset(sb, block, &actual_group, NULL);
|
||||||
if (actual_group == block_group)
|
if (actual_group == block_group)
|
||||||
return 1;
|
return 1;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -121,12 +121,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
|
||||||
le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
|
le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
|
||||||
}
|
}
|
||||||
} else { /* For META_BG_BLOCK_GROUPS */
|
} else { /* For META_BG_BLOCK_GROUPS */
|
||||||
int group_rel = (block_group -
|
bit_max += ext4_bg_num_gdb(sb, block_group);
|
||||||
le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
|
|
||||||
EXT4_DESC_PER_BLOCK(sb);
|
|
||||||
if (group_rel == 0 || group_rel == 1 ||
|
|
||||||
(group_rel == EXT4_DESC_PER_BLOCK(sb) - 1))
|
|
||||||
bit_max += 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (block_group == sbi->s_groups_count - 1) {
|
if (block_group == sbi->s_groups_count - 1) {
|
||||||
|
@ -295,7 +290,7 @@ err_out:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* read_block_bitmap()
|
* ext4_read_block_bitmap()
|
||||||
* @sb: super block
|
* @sb: super block
|
||||||
* @block_group: given block group
|
* @block_group: given block group
|
||||||
*
|
*
|
||||||
|
@ -305,7 +300,7 @@ err_out:
|
||||||
* Return buffer_head on success or NULL in case of failure.
|
* Return buffer_head on success or NULL in case of failure.
|
||||||
*/
|
*/
|
||||||
struct buffer_head *
|
struct buffer_head *
|
||||||
read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
|
ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
|
||||||
{
|
{
|
||||||
struct ext4_group_desc * desc;
|
struct ext4_group_desc * desc;
|
||||||
struct buffer_head * bh = NULL;
|
struct buffer_head * bh = NULL;
|
||||||
|
@ -409,8 +404,7 @@ restart:
|
||||||
prev = rsv;
|
prev = rsv;
|
||||||
}
|
}
|
||||||
printk("Window map complete.\n");
|
printk("Window map complete.\n");
|
||||||
if (bad)
|
BUG_ON(bad);
|
||||||
BUG();
|
|
||||||
}
|
}
|
||||||
#define rsv_window_dump(root, verbose) \
|
#define rsv_window_dump(root, verbose) \
|
||||||
__rsv_window_dump((root), (verbose), __func__)
|
__rsv_window_dump((root), (verbose), __func__)
|
||||||
|
@ -694,7 +688,7 @@ do_more:
|
||||||
count -= overflow;
|
count -= overflow;
|
||||||
}
|
}
|
||||||
brelse(bitmap_bh);
|
brelse(bitmap_bh);
|
||||||
bitmap_bh = read_block_bitmap(sb, block_group);
|
bitmap_bh = ext4_read_block_bitmap(sb, block_group);
|
||||||
if (!bitmap_bh)
|
if (!bitmap_bh)
|
||||||
goto error_return;
|
goto error_return;
|
||||||
desc = ext4_get_group_desc (sb, block_group, &gd_bh);
|
desc = ext4_get_group_desc (sb, block_group, &gd_bh);
|
||||||
|
@ -810,6 +804,13 @@ do_more:
|
||||||
spin_unlock(sb_bgl_lock(sbi, block_group));
|
spin_unlock(sb_bgl_lock(sbi, block_group));
|
||||||
percpu_counter_add(&sbi->s_freeblocks_counter, count);
|
percpu_counter_add(&sbi->s_freeblocks_counter, count);
|
||||||
|
|
||||||
|
if (sbi->s_log_groups_per_flex) {
|
||||||
|
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
|
||||||
|
spin_lock(sb_bgl_lock(sbi, flex_group));
|
||||||
|
sbi->s_flex_groups[flex_group].free_blocks += count;
|
||||||
|
spin_unlock(sb_bgl_lock(sbi, flex_group));
|
||||||
|
}
|
||||||
|
|
||||||
/* We dirtied the bitmap block */
|
/* We dirtied the bitmap block */
|
||||||
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
|
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
|
||||||
err = ext4_journal_dirty_metadata(handle, bitmap_bh);
|
err = ext4_journal_dirty_metadata(handle, bitmap_bh);
|
||||||
|
@ -1599,22 +1600,34 @@ out:
|
||||||
/**
|
/**
|
||||||
* ext4_has_free_blocks()
|
* ext4_has_free_blocks()
|
||||||
* @sbi: in-core super block structure.
|
* @sbi: in-core super block structure.
|
||||||
|
* @nblocks: number of neeed blocks
|
||||||
*
|
*
|
||||||
* Check if filesystem has at least 1 free block available for allocation.
|
* Check if filesystem has free blocks available for allocation.
|
||||||
|
* Return the number of blocks avaible for allocation for this request
|
||||||
|
* On success, return nblocks
|
||||||
*/
|
*/
|
||||||
static int ext4_has_free_blocks(struct ext4_sb_info *sbi)
|
ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
|
||||||
|
ext4_fsblk_t nblocks)
|
||||||
{
|
{
|
||||||
ext4_fsblk_t free_blocks, root_blocks;
|
ext4_fsblk_t free_blocks;
|
||||||
|
ext4_fsblk_t root_blocks = 0;
|
||||||
|
|
||||||
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
|
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
|
||||||
root_blocks = ext4_r_blocks_count(sbi->s_es);
|
|
||||||
if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
|
if (!capable(CAP_SYS_RESOURCE) &&
|
||||||
sbi->s_resuid != current->fsuid &&
|
sbi->s_resuid != current->fsuid &&
|
||||||
(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
|
(sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
|
||||||
return 0;
|
root_blocks = ext4_r_blocks_count(sbi->s_es);
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
if (free_blocks - root_blocks < FBC_BATCH)
|
||||||
|
free_blocks =
|
||||||
|
percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
|
||||||
|
#endif
|
||||||
|
if (free_blocks - root_blocks < nblocks)
|
||||||
|
return free_blocks - root_blocks;
|
||||||
|
return nblocks;
|
||||||
}
|
}
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ext4_should_retry_alloc()
|
* ext4_should_retry_alloc()
|
||||||
|
@ -1630,7 +1643,7 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi)
|
||||||
*/
|
*/
|
||||||
int ext4_should_retry_alloc(struct super_block *sb, int *retries)
|
int ext4_should_retry_alloc(struct super_block *sb, int *retries)
|
||||||
{
|
{
|
||||||
if (!ext4_has_free_blocks(EXT4_SB(sb)) || (*retries)++ > 3)
|
if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
|
jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
|
||||||
|
@ -1639,20 +1652,24 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ext4_new_blocks_old() -- core block(s) allocation function
|
* ext4_old_new_blocks() -- core block bitmap based block allocation function
|
||||||
|
*
|
||||||
* @handle: handle to this transaction
|
* @handle: handle to this transaction
|
||||||
* @inode: file inode
|
* @inode: file inode
|
||||||
* @goal: given target block(filesystem wide)
|
* @goal: given target block(filesystem wide)
|
||||||
* @count: target number of blocks to allocate
|
* @count: target number of blocks to allocate
|
||||||
* @errp: error code
|
* @errp: error code
|
||||||
*
|
*
|
||||||
* ext4_new_blocks uses a goal block to assist allocation. It tries to
|
* ext4_old_new_blocks uses a goal block to assist allocation and look up
|
||||||
* allocate block(s) from the block group contains the goal block first. If that
|
* the block bitmap directly to do block allocation. It tries to
|
||||||
* fails, it will try to allocate block(s) from other block groups without
|
* allocate block(s) from the block group contains the goal block first. If
|
||||||
* any specific goal block.
|
* that fails, it will try to allocate block(s) from other block groups
|
||||||
|
* without any specific goal block.
|
||||||
|
*
|
||||||
|
* This function is called when -o nomballoc mount option is enabled
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
|
ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
|
||||||
ext4_fsblk_t goal, unsigned long *count, int *errp)
|
ext4_fsblk_t goal, unsigned long *count, int *errp)
|
||||||
{
|
{
|
||||||
struct buffer_head *bitmap_bh = NULL;
|
struct buffer_head *bitmap_bh = NULL;
|
||||||
|
@ -1676,13 +1693,26 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
|
||||||
ext4_group_t ngroups;
|
ext4_group_t ngroups;
|
||||||
unsigned long num = *count;
|
unsigned long num = *count;
|
||||||
|
|
||||||
*errp = -ENOSPC;
|
|
||||||
sb = inode->i_sb;
|
sb = inode->i_sb;
|
||||||
if (!sb) {
|
if (!sb) {
|
||||||
|
*errp = -ENODEV;
|
||||||
printk("ext4_new_block: nonexistent device");
|
printk("ext4_new_block: nonexistent device");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sbi = EXT4_SB(sb);
|
||||||
|
if (!EXT4_I(inode)->i_delalloc_reserved_flag) {
|
||||||
|
/*
|
||||||
|
* With delalloc we already reserved the blocks
|
||||||
|
*/
|
||||||
|
*count = ext4_has_free_blocks(sbi, *count);
|
||||||
|
}
|
||||||
|
if (*count == 0) {
|
||||||
|
*errp = -ENOSPC;
|
||||||
|
return 0; /*return with ENOSPC error */
|
||||||
|
}
|
||||||
|
num = *count;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check quota for allocation of this block.
|
* Check quota for allocation of this block.
|
||||||
*/
|
*/
|
||||||
|
@ -1706,11 +1736,6 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
|
||||||
if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
|
if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
|
||||||
my_rsv = &block_i->rsv_window_node;
|
my_rsv = &block_i->rsv_window_node;
|
||||||
|
|
||||||
if (!ext4_has_free_blocks(sbi)) {
|
|
||||||
*errp = -ENOSPC;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First, test whether the goal block is free.
|
* First, test whether the goal block is free.
|
||||||
*/
|
*/
|
||||||
|
@ -1734,7 +1759,7 @@ retry_alloc:
|
||||||
my_rsv = NULL;
|
my_rsv = NULL;
|
||||||
|
|
||||||
if (free_blocks > 0) {
|
if (free_blocks > 0) {
|
||||||
bitmap_bh = read_block_bitmap(sb, group_no);
|
bitmap_bh = ext4_read_block_bitmap(sb, group_no);
|
||||||
if (!bitmap_bh)
|
if (!bitmap_bh)
|
||||||
goto io_error;
|
goto io_error;
|
||||||
grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
|
grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
|
||||||
|
@ -1770,7 +1795,7 @@ retry_alloc:
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
brelse(bitmap_bh);
|
brelse(bitmap_bh);
|
||||||
bitmap_bh = read_block_bitmap(sb, group_no);
|
bitmap_bh = ext4_read_block_bitmap(sb, group_no);
|
||||||
if (!bitmap_bh)
|
if (!bitmap_bh)
|
||||||
goto io_error;
|
goto io_error;
|
||||||
/*
|
/*
|
||||||
|
@ -1882,8 +1907,16 @@ allocated:
|
||||||
le16_add_cpu(&gdp->bg_free_blocks_count, -num);
|
le16_add_cpu(&gdp->bg_free_blocks_count, -num);
|
||||||
gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
|
gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
|
||||||
spin_unlock(sb_bgl_lock(sbi, group_no));
|
spin_unlock(sb_bgl_lock(sbi, group_no));
|
||||||
|
if (!EXT4_I(inode)->i_delalloc_reserved_flag)
|
||||||
percpu_counter_sub(&sbi->s_freeblocks_counter, num);
|
percpu_counter_sub(&sbi->s_freeblocks_counter, num);
|
||||||
|
|
||||||
|
if (sbi->s_log_groups_per_flex) {
|
||||||
|
ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
|
||||||
|
spin_lock(sb_bgl_lock(sbi, flex_group));
|
||||||
|
sbi->s_flex_groups[flex_group].free_blocks -= num;
|
||||||
|
spin_unlock(sb_bgl_lock(sbi, flex_group));
|
||||||
|
}
|
||||||
|
|
||||||
BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
|
BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
|
||||||
err = ext4_journal_dirty_metadata(handle, gdp_bh);
|
err = ext4_journal_dirty_metadata(handle, gdp_bh);
|
||||||
if (!fatal)
|
if (!fatal)
|
||||||
|
@ -1915,46 +1948,104 @@ out:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
|
#define EXT4_META_BLOCK 0x1
|
||||||
ext4_fsblk_t goal, int *errp)
|
|
||||||
|
static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
|
||||||
|
ext4_lblk_t iblock, ext4_fsblk_t goal,
|
||||||
|
unsigned long *count, int *errp, int flags)
|
||||||
{
|
{
|
||||||
struct ext4_allocation_request ar;
|
struct ext4_allocation_request ar;
|
||||||
ext4_fsblk_t ret;
|
ext4_fsblk_t ret;
|
||||||
|
|
||||||
if (!test_opt(inode->i_sb, MBALLOC)) {
|
if (!test_opt(inode->i_sb, MBALLOC)) {
|
||||||
unsigned long count = 1;
|
return ext4_old_new_blocks(handle, inode, goal, count, errp);
|
||||||
ret = ext4_new_blocks_old(handle, inode, goal, &count, errp);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(&ar, 0, sizeof(ar));
|
memset(&ar, 0, sizeof(ar));
|
||||||
ar.inode = inode;
|
/* Fill with neighbour allocated blocks */
|
||||||
ar.goal = goal;
|
|
||||||
ar.len = 1;
|
|
||||||
ret = ext4_mb_new_blocks(handle, &ar, errp);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
|
|
||||||
ext4_fsblk_t goal, unsigned long *count, int *errp)
|
|
||||||
{
|
|
||||||
struct ext4_allocation_request ar;
|
|
||||||
ext4_fsblk_t ret;
|
|
||||||
|
|
||||||
if (!test_opt(inode->i_sb, MBALLOC)) {
|
|
||||||
ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(&ar, 0, sizeof(ar));
|
|
||||||
ar.inode = inode;
|
ar.inode = inode;
|
||||||
ar.goal = goal;
|
ar.goal = goal;
|
||||||
ar.len = *count;
|
ar.len = *count;
|
||||||
|
ar.logical = iblock;
|
||||||
|
|
||||||
|
if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK))
|
||||||
|
/* enable in-core preallocation for data block allocation */
|
||||||
|
ar.flags = EXT4_MB_HINT_DATA;
|
||||||
|
else
|
||||||
|
/* disable in-core preallocation for non-regular files */
|
||||||
|
ar.flags = 0;
|
||||||
|
|
||||||
ret = ext4_mb_new_blocks(handle, &ar, errp);
|
ret = ext4_mb_new_blocks(handle, &ar, errp);
|
||||||
*count = ar.len;
|
*count = ar.len;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
|
||||||
|
*
|
||||||
|
* @handle: handle to this transaction
|
||||||
|
* @inode: file inode
|
||||||
|
* @goal: given target block(filesystem wide)
|
||||||
|
* @count: total number of blocks need
|
||||||
|
* @errp: error code
|
||||||
|
*
|
||||||
|
* Return 1st allocated block numberon success, *count stores total account
|
||||||
|
* error stores in errp pointer
|
||||||
|
*/
|
||||||
|
ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
|
||||||
|
ext4_fsblk_t goal, unsigned long *count, int *errp)
|
||||||
|
{
|
||||||
|
ext4_fsblk_t ret;
|
||||||
|
ret = do_blk_alloc(handle, inode, 0, goal,
|
||||||
|
count, errp, EXT4_META_BLOCK);
|
||||||
|
/*
|
||||||
|
* Account for the allocated meta blocks
|
||||||
|
*/
|
||||||
|
if (!(*errp)) {
|
||||||
|
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
|
||||||
|
EXT4_I(inode)->i_allocated_meta_blocks += *count;
|
||||||
|
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ext4_new_meta_block() -- allocate block for meta data (indexing) blocks
|
||||||
|
*
|
||||||
|
* @handle: handle to this transaction
|
||||||
|
* @inode: file inode
|
||||||
|
* @goal: given target block(filesystem wide)
|
||||||
|
* @errp: error code
|
||||||
|
*
|
||||||
|
* Return allocated block number on success
|
||||||
|
*/
|
||||||
|
ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
|
||||||
|
ext4_fsblk_t goal, int *errp)
|
||||||
|
{
|
||||||
|
unsigned long count = 1;
|
||||||
|
return ext4_new_meta_blocks(handle, inode, goal, &count, errp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ext4_new_blocks() -- allocate data blocks
|
||||||
|
*
|
||||||
|
* @handle: handle to this transaction
|
||||||
|
* @inode: file inode
|
||||||
|
* @goal: given target block(filesystem wide)
|
||||||
|
* @count: total number of blocks need
|
||||||
|
* @errp: error code
|
||||||
|
*
|
||||||
|
* Return 1st allocated block numberon success, *count stores total account
|
||||||
|
* error stores in errp pointer
|
||||||
|
*/
|
||||||
|
|
||||||
|
ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
|
||||||
|
ext4_lblk_t iblock, ext4_fsblk_t goal,
|
||||||
|
unsigned long *count, int *errp)
|
||||||
|
{
|
||||||
|
return do_blk_alloc(handle, inode, iblock, goal, count, errp, 0);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ext4_count_free_blocks() -- count filesystem free blocks
|
* ext4_count_free_blocks() -- count filesystem free blocks
|
||||||
|
@ -1986,7 +2077,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
|
||||||
continue;
|
continue;
|
||||||
desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
|
desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
|
||||||
brelse(bitmap_bh);
|
brelse(bitmap_bh);
|
||||||
bitmap_bh = read_block_bitmap(sb, i);
|
bitmap_bh = ext4_read_block_bitmap(sb, i);
|
||||||
if (bitmap_bh == NULL)
|
if (bitmap_bh == NULL)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
|
@ -129,7 +129,8 @@ static int ext4_readdir(struct file * filp,
|
||||||
struct buffer_head *bh = NULL;
|
struct buffer_head *bh = NULL;
|
||||||
|
|
||||||
map_bh.b_state = 0;
|
map_bh.b_state = 0;
|
||||||
err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0);
|
err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
|
||||||
|
0, 0, 0);
|
||||||
if (err > 0) {
|
if (err > 0) {
|
||||||
pgoff_t index = map_bh.b_blocknr >>
|
pgoff_t index = map_bh.b_blocknr >>
|
||||||
(PAGE_CACHE_SHIFT - inode->i_blkbits);
|
(PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||||
|
@ -272,7 +273,7 @@ static void free_rb_tree_fname(struct rb_root *root)
|
||||||
|
|
||||||
while (n) {
|
while (n) {
|
||||||
/* Do the node's children first */
|
/* Do the node's children first */
|
||||||
if ((n)->rb_left) {
|
if (n->rb_left) {
|
||||||
n = n->rb_left;
|
n = n->rb_left;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -301,24 +302,18 @@ static void free_rb_tree_fname(struct rb_root *root)
|
||||||
parent->rb_right = NULL;
|
parent->rb_right = NULL;
|
||||||
n = parent;
|
n = parent;
|
||||||
}
|
}
|
||||||
root->rb_node = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static struct dir_private_info *create_dir_info(loff_t pos)
|
static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos)
|
||||||
{
|
{
|
||||||
struct dir_private_info *p;
|
struct dir_private_info *p;
|
||||||
|
|
||||||
p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
|
p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
|
||||||
if (!p)
|
if (!p)
|
||||||
return NULL;
|
return NULL;
|
||||||
p->root.rb_node = NULL;
|
|
||||||
p->curr_node = NULL;
|
|
||||||
p->extra_fname = NULL;
|
|
||||||
p->last_pos = 0;
|
|
||||||
p->curr_hash = pos2maj_hash(pos);
|
p->curr_hash = pos2maj_hash(pos);
|
||||||
p->curr_minor_hash = pos2min_hash(pos);
|
p->curr_minor_hash = pos2min_hash(pos);
|
||||||
p->next_hash = 0;
|
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -433,7 +428,7 @@ static int ext4_dx_readdir(struct file * filp,
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!info) {
|
if (!info) {
|
||||||
info = create_dir_info(filp->f_pos);
|
info = ext4_htree_create_dir_info(filp->f_pos);
|
||||||
if (!info)
|
if (!info)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
filp->private_data = info;
|
filp->private_data = info;
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
#include "ext4_i.h"
|
#include "ext4_i.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The second extended filesystem constants/structures
|
* The fourth extended filesystem constants/structures
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -45,7 +45,7 @@
|
||||||
#define ext4_debug(f, a...) \
|
#define ext4_debug(f, a...) \
|
||||||
do { \
|
do { \
|
||||||
printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
|
printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
|
||||||
__FILE__, __LINE__, __FUNCTION__); \
|
__FILE__, __LINE__, __func__); \
|
||||||
printk (KERN_DEBUG f, ## a); \
|
printk (KERN_DEBUG f, ## a); \
|
||||||
} while (0)
|
} while (0)
|
||||||
#else
|
#else
|
||||||
|
@ -74,6 +74,9 @@
|
||||||
#define EXT4_MB_HINT_GOAL_ONLY 256
|
#define EXT4_MB_HINT_GOAL_ONLY 256
|
||||||
/* goal is meaningful */
|
/* goal is meaningful */
|
||||||
#define EXT4_MB_HINT_TRY_GOAL 512
|
#define EXT4_MB_HINT_TRY_GOAL 512
|
||||||
|
/* blocks already pre-reserved by delayed allocation */
|
||||||
|
#define EXT4_MB_DELALLOC_RESERVED 1024
|
||||||
|
|
||||||
|
|
||||||
struct ext4_allocation_request {
|
struct ext4_allocation_request {
|
||||||
/* target inode for block we're allocating */
|
/* target inode for block we're allocating */
|
||||||
|
@ -170,6 +173,15 @@ struct ext4_group_desc
|
||||||
__u32 bg_reserved2[3];
|
__u32 bg_reserved2[3];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Structure of a flex block group info
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct flex_groups {
|
||||||
|
__u32 free_inodes;
|
||||||
|
__u32 free_blocks;
|
||||||
|
};
|
||||||
|
|
||||||
#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
|
#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
|
||||||
#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
|
#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
|
||||||
#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
|
#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
|
||||||
|
@ -527,6 +539,7 @@ do { \
|
||||||
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
|
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
|
||||||
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
|
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
|
||||||
#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
|
#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
|
||||||
|
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
|
||||||
/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
|
/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
|
||||||
#ifndef _LINUX_EXT2_FS_H
|
#ifndef _LINUX_EXT2_FS_H
|
||||||
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
|
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
|
||||||
|
@ -647,7 +660,10 @@ struct ext4_super_block {
|
||||||
__le16 s_mmp_interval; /* # seconds to wait in MMP checking */
|
__le16 s_mmp_interval; /* # seconds to wait in MMP checking */
|
||||||
__le64 s_mmp_block; /* Block for multi-mount protection */
|
__le64 s_mmp_block; /* Block for multi-mount protection */
|
||||||
__le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
|
__le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
|
||||||
__u32 s_reserved[163]; /* Padding to the end of the block */
|
__u8 s_log_groups_per_flex; /* FLEX_BG group size */
|
||||||
|
__u8 s_reserved_char_pad2;
|
||||||
|
__le16 s_reserved_pad;
|
||||||
|
__u32 s_reserved[162]; /* Padding to the end of the block */
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
|
@ -958,12 +974,17 @@ extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
|
||||||
extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
|
extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
|
||||||
extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
|
extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
|
||||||
ext4_group_t group);
|
ext4_group_t group);
|
||||||
extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode,
|
extern ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
|
||||||
ext4_fsblk_t goal, int *errp);
|
ext4_fsblk_t goal, int *errp);
|
||||||
extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
|
extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
|
||||||
ext4_fsblk_t goal, unsigned long *count, int *errp);
|
ext4_fsblk_t goal, unsigned long *count, int *errp);
|
||||||
extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
|
extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
|
||||||
|
ext4_lblk_t iblock, ext4_fsblk_t goal,
|
||||||
|
unsigned long *count, int *errp);
|
||||||
|
extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
|
||||||
ext4_fsblk_t goal, unsigned long *count, int *errp);
|
ext4_fsblk_t goal, unsigned long *count, int *errp);
|
||||||
|
extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
|
||||||
|
ext4_fsblk_t nblocks);
|
||||||
extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
|
extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
|
||||||
ext4_fsblk_t block, unsigned long count, int metadata);
|
ext4_fsblk_t block, unsigned long count, int metadata);
|
||||||
extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
|
extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
|
||||||
|
@ -1016,9 +1037,14 @@ extern int __init init_ext4_mballoc(void);
|
||||||
extern void exit_ext4_mballoc(void);
|
extern void exit_ext4_mballoc(void);
|
||||||
extern void ext4_mb_free_blocks(handle_t *, struct inode *,
|
extern void ext4_mb_free_blocks(handle_t *, struct inode *,
|
||||||
unsigned long, unsigned long, int, unsigned long *);
|
unsigned long, unsigned long, int, unsigned long *);
|
||||||
|
extern int ext4_mb_add_more_groupinfo(struct super_block *sb,
|
||||||
|
ext4_group_t i, struct ext4_group_desc *desc);
|
||||||
|
extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
|
||||||
|
ext4_grpblk_t add);
|
||||||
|
|
||||||
|
|
||||||
/* inode.c */
|
/* inode.c */
|
||||||
|
void ext4_da_release_space(struct inode *inode, int used, int to_free);
|
||||||
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
|
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
|
||||||
struct buffer_head *bh, ext4_fsblk_t blocknr);
|
struct buffer_head *bh, ext4_fsblk_t blocknr);
|
||||||
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
|
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
|
||||||
|
@ -1033,19 +1059,23 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
|
||||||
extern struct inode *ext4_iget(struct super_block *, unsigned long);
|
extern struct inode *ext4_iget(struct super_block *, unsigned long);
|
||||||
extern int ext4_write_inode (struct inode *, int);
|
extern int ext4_write_inode (struct inode *, int);
|
||||||
extern int ext4_setattr (struct dentry *, struct iattr *);
|
extern int ext4_setattr (struct dentry *, struct iattr *);
|
||||||
|
extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||||
|
struct kstat *stat);
|
||||||
extern void ext4_delete_inode (struct inode *);
|
extern void ext4_delete_inode (struct inode *);
|
||||||
extern int ext4_sync_inode (handle_t *, struct inode *);
|
extern int ext4_sync_inode (handle_t *, struct inode *);
|
||||||
extern void ext4_discard_reservation (struct inode *);
|
extern void ext4_discard_reservation (struct inode *);
|
||||||
extern void ext4_dirty_inode(struct inode *);
|
extern void ext4_dirty_inode(struct inode *);
|
||||||
extern int ext4_change_inode_journal_flag(struct inode *, int);
|
extern int ext4_change_inode_journal_flag(struct inode *, int);
|
||||||
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
|
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
|
||||||
|
extern int ext4_can_truncate(struct inode *inode);
|
||||||
extern void ext4_truncate (struct inode *);
|
extern void ext4_truncate (struct inode *);
|
||||||
extern void ext4_set_inode_flags(struct inode *);
|
extern void ext4_set_inode_flags(struct inode *);
|
||||||
extern void ext4_get_inode_flags(struct ext4_inode_info *);
|
extern void ext4_get_inode_flags(struct ext4_inode_info *);
|
||||||
extern void ext4_set_aops(struct inode *inode);
|
extern void ext4_set_aops(struct inode *inode);
|
||||||
extern int ext4_writepage_trans_blocks(struct inode *);
|
extern int ext4_writepage_trans_blocks(struct inode *);
|
||||||
extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
|
extern int ext4_block_truncate_page(handle_t *handle,
|
||||||
struct address_space *mapping, loff_t from);
|
struct address_space *mapping, loff_t from);
|
||||||
|
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
|
||||||
|
|
||||||
/* ioctl.c */
|
/* ioctl.c */
|
||||||
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
|
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
|
||||||
|
@ -1159,10 +1189,21 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
|
||||||
|
ext4_group_t block_group)
|
||||||
|
{
|
||||||
|
return block_group >> sbi->s_log_groups_per_flex;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi)
|
||||||
|
{
|
||||||
|
return 1 << sbi->s_log_groups_per_flex;
|
||||||
|
}
|
||||||
|
|
||||||
#define ext4_std_error(sb, errno) \
|
#define ext4_std_error(sb, errno) \
|
||||||
do { \
|
do { \
|
||||||
if ((errno)) \
|
if ((errno)) \
|
||||||
__ext4_std_error((sb), __FUNCTION__, (errno)); \
|
__ext4_std_error((sb), __func__, (errno)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1191,7 +1232,7 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||||
ext4_lblk_t iblock,
|
ext4_lblk_t iblock,
|
||||||
unsigned long max_blocks, struct buffer_head *bh_result,
|
unsigned long max_blocks, struct buffer_head *bh_result,
|
||||||
int create, int extend_disksize);
|
int create, int extend_disksize);
|
||||||
extern void ext4_ext_truncate(struct inode *, struct page *);
|
extern void ext4_ext_truncate(struct inode *);
|
||||||
extern void ext4_ext_init(struct super_block *);
|
extern void ext4_ext_init(struct super_block *);
|
||||||
extern void ext4_ext_release(struct super_block *);
|
extern void ext4_ext_release(struct super_block *);
|
||||||
extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
|
extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
|
||||||
|
@ -1199,7 +1240,7 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
|
||||||
extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
|
extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
|
||||||
sector_t block, unsigned long max_blocks,
|
sector_t block, unsigned long max_blocks,
|
||||||
struct buffer_head *bh, int create,
|
struct buffer_head *bh, int create,
|
||||||
int extend_disksize);
|
int extend_disksize, int flag);
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
|
|
||||||
#endif /* _EXT4_H */
|
#endif /* _EXT4_H */
|
||||||
|
|
|
@ -212,6 +212,7 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
|
||||||
(le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
|
(le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
|
||||||
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
|
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
|
||||||
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
|
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
|
||||||
extern int ext4_extent_tree_init(handle_t *, struct inode *);
|
extern int ext4_extent_tree_init(handle_t *, struct inode *);
|
||||||
|
|
|
@ -79,7 +79,7 @@ struct ext4_ext_cache {
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* third extended file system inode data in memory
|
* fourth extended file system inode data in memory
|
||||||
*/
|
*/
|
||||||
struct ext4_inode_info {
|
struct ext4_inode_info {
|
||||||
__le32 i_data[15]; /* unconverted */
|
__le32 i_data[15]; /* unconverted */
|
||||||
|
@ -150,6 +150,7 @@ struct ext4_inode_info {
|
||||||
*/
|
*/
|
||||||
struct rw_semaphore i_data_sem;
|
struct rw_semaphore i_data_sem;
|
||||||
struct inode vfs_inode;
|
struct inode vfs_inode;
|
||||||
|
struct jbd2_inode jinode;
|
||||||
|
|
||||||
unsigned long i_ext_generation;
|
unsigned long i_ext_generation;
|
||||||
struct ext4_ext_cache i_cached_extent;
|
struct ext4_ext_cache i_cached_extent;
|
||||||
|
@ -162,6 +163,13 @@ struct ext4_inode_info {
|
||||||
/* mballoc */
|
/* mballoc */
|
||||||
struct list_head i_prealloc_list;
|
struct list_head i_prealloc_list;
|
||||||
spinlock_t i_prealloc_lock;
|
spinlock_t i_prealloc_lock;
|
||||||
|
|
||||||
|
/* allocation reservation info for delalloc */
|
||||||
|
unsigned long i_reserved_data_blocks;
|
||||||
|
unsigned long i_reserved_meta_blocks;
|
||||||
|
unsigned long i_allocated_meta_blocks;
|
||||||
|
unsigned short i_delalloc_reserved_flag;
|
||||||
|
spinlock_t i_block_reservation_lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _EXT4_I */
|
#endif /* _EXT4_I */
|
||||||
|
|
|
@ -142,19 +142,17 @@ int __ext4_journal_dirty_metadata(const char *where,
|
||||||
handle_t *handle, struct buffer_head *bh);
|
handle_t *handle, struct buffer_head *bh);
|
||||||
|
|
||||||
#define ext4_journal_get_undo_access(handle, bh) \
|
#define ext4_journal_get_undo_access(handle, bh) \
|
||||||
__ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh))
|
__ext4_journal_get_undo_access(__func__, (handle), (bh))
|
||||||
#define ext4_journal_get_write_access(handle, bh) \
|
#define ext4_journal_get_write_access(handle, bh) \
|
||||||
__ext4_journal_get_write_access(__FUNCTION__, (handle), (bh))
|
__ext4_journal_get_write_access(__func__, (handle), (bh))
|
||||||
#define ext4_journal_revoke(handle, blocknr, bh) \
|
#define ext4_journal_revoke(handle, blocknr, bh) \
|
||||||
__ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
|
__ext4_journal_revoke(__func__, (handle), (blocknr), (bh))
|
||||||
#define ext4_journal_get_create_access(handle, bh) \
|
#define ext4_journal_get_create_access(handle, bh) \
|
||||||
__ext4_journal_get_create_access(__FUNCTION__, (handle), (bh))
|
__ext4_journal_get_create_access(__func__, (handle), (bh))
|
||||||
#define ext4_journal_dirty_metadata(handle, bh) \
|
#define ext4_journal_dirty_metadata(handle, bh) \
|
||||||
__ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
|
__ext4_journal_dirty_metadata(__func__, (handle), (bh))
|
||||||
#define ext4_journal_forget(handle, bh) \
|
#define ext4_journal_forget(handle, bh) \
|
||||||
__ext4_journal_forget(__FUNCTION__, (handle), (bh))
|
__ext4_journal_forget(__func__, (handle), (bh))
|
||||||
|
|
||||||
int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
|
|
||||||
|
|
||||||
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
|
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
|
||||||
int __ext4_journal_stop(const char *where, handle_t *handle);
|
int __ext4_journal_stop(const char *where, handle_t *handle);
|
||||||
|
@ -165,7 +163,7 @@ static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ext4_journal_stop(handle) \
|
#define ext4_journal_stop(handle) \
|
||||||
__ext4_journal_stop(__FUNCTION__, (handle))
|
__ext4_journal_stop(__func__, (handle))
|
||||||
|
|
||||||
static inline handle_t *ext4_journal_current_handle(void)
|
static inline handle_t *ext4_journal_current_handle(void)
|
||||||
{
|
{
|
||||||
|
@ -192,6 +190,11 @@ static inline int ext4_journal_force_commit(journal_t *journal)
|
||||||
return jbd2_journal_force_commit(journal);
|
return jbd2_journal_force_commit(journal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
|
||||||
|
{
|
||||||
|
return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
|
||||||
|
}
|
||||||
|
|
||||||
/* super.c */
|
/* super.c */
|
||||||
int ext4_force_commit(struct super_block *sb);
|
int ext4_force_commit(struct super_block *sb);
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,7 @@
|
||||||
#include <linux/rbtree.h>
|
#include <linux/rbtree.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* third extended-fs super-block data in memory
|
* fourth extended-fs super-block data in memory
|
||||||
*/
|
*/
|
||||||
struct ext4_sb_info {
|
struct ext4_sb_info {
|
||||||
unsigned long s_desc_size; /* Size of a group descriptor in bytes */
|
unsigned long s_desc_size; /* Size of a group descriptor in bytes */
|
||||||
|
@ -143,6 +143,9 @@ struct ext4_sb_info {
|
||||||
|
|
||||||
/* locality groups */
|
/* locality groups */
|
||||||
struct ext4_locality_group *s_locality_groups;
|
struct ext4_locality_group *s_locality_groups;
|
||||||
|
|
||||||
|
unsigned int s_log_groups_per_flex;
|
||||||
|
struct flex_groups *s_flex_groups;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _EXT4_SB */
|
#endif /* _EXT4_SB */
|
||||||
|
|
|
@ -92,17 +92,16 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
|
||||||
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
|
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
|
||||||
}
|
}
|
||||||
|
|
||||||
static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed)
|
static int ext4_ext_journal_restart(handle_t *handle, int needed)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (handle->h_buffer_credits > needed)
|
if (handle->h_buffer_credits > needed)
|
||||||
return handle;
|
return 0;
|
||||||
if (!ext4_journal_extend(handle, needed))
|
err = ext4_journal_extend(handle, needed);
|
||||||
return handle;
|
if (err)
|
||||||
err = ext4_journal_restart(handle, needed);
|
return err;
|
||||||
|
return ext4_journal_restart(handle, needed);
|
||||||
return handle;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -180,15 +179,18 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
|
||||||
return bg_start + colour + block;
|
return bg_start + colour + block;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocation for a meta data block
|
||||||
|
*/
|
||||||
static ext4_fsblk_t
|
static ext4_fsblk_t
|
||||||
ext4_ext_new_block(handle_t *handle, struct inode *inode,
|
ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
|
||||||
struct ext4_ext_path *path,
|
struct ext4_ext_path *path,
|
||||||
struct ext4_extent *ex, int *err)
|
struct ext4_extent *ex, int *err)
|
||||||
{
|
{
|
||||||
ext4_fsblk_t goal, newblock;
|
ext4_fsblk_t goal, newblock;
|
||||||
|
|
||||||
goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
|
goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
|
||||||
newblock = ext4_new_block(handle, inode, goal, err);
|
newblock = ext4_new_meta_block(handle, inode, goal, err);
|
||||||
return newblock;
|
return newblock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,6 +248,36 @@ static int ext4_ext_space_root_idx(struct inode *inode)
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Calculate the number of metadata blocks needed
|
||||||
|
* to allocate @blocks
|
||||||
|
* Worse case is one block per extent
|
||||||
|
*/
|
||||||
|
int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
|
||||||
|
{
|
||||||
|
int lcap, icap, rcap, leafs, idxs, num;
|
||||||
|
int newextents = blocks;
|
||||||
|
|
||||||
|
rcap = ext4_ext_space_root_idx(inode);
|
||||||
|
lcap = ext4_ext_space_block(inode);
|
||||||
|
icap = ext4_ext_space_block_idx(inode);
|
||||||
|
|
||||||
|
/* number of new leaf blocks needed */
|
||||||
|
num = leafs = (newextents + lcap - 1) / lcap;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Worse case, we need separate index block(s)
|
||||||
|
* to link all new leaf blocks
|
||||||
|
*/
|
||||||
|
idxs = (leafs + icap - 1) / icap;
|
||||||
|
do {
|
||||||
|
num += idxs;
|
||||||
|
idxs = (idxs + icap - 1) / icap;
|
||||||
|
} while (idxs > rcap);
|
||||||
|
|
||||||
|
return num;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
ext4_ext_max_entries(struct inode *inode, int depth)
|
ext4_ext_max_entries(struct inode *inode, int depth)
|
||||||
{
|
{
|
||||||
|
@ -524,6 +556,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
|
||||||
alloc = 1;
|
alloc = 1;
|
||||||
}
|
}
|
||||||
path[0].p_hdr = eh;
|
path[0].p_hdr = eh;
|
||||||
|
path[0].p_bh = NULL;
|
||||||
|
|
||||||
i = depth;
|
i = depth;
|
||||||
/* walk through the tree */
|
/* walk through the tree */
|
||||||
|
@ -552,12 +585,14 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
|
||||||
}
|
}
|
||||||
|
|
||||||
path[ppos].p_depth = i;
|
path[ppos].p_depth = i;
|
||||||
path[ppos].p_hdr = eh;
|
|
||||||
path[ppos].p_ext = NULL;
|
path[ppos].p_ext = NULL;
|
||||||
path[ppos].p_idx = NULL;
|
path[ppos].p_idx = NULL;
|
||||||
|
|
||||||
/* find extent */
|
/* find extent */
|
||||||
ext4_ext_binsearch(inode, path + ppos, block);
|
ext4_ext_binsearch(inode, path + ppos, block);
|
||||||
|
/* if not an empty leaf */
|
||||||
|
if (path[ppos].p_ext)
|
||||||
|
path[ppos].p_block = ext_pblock(path[ppos].p_ext);
|
||||||
|
|
||||||
ext4_ext_show_path(inode, path);
|
ext4_ext_show_path(inode, path);
|
||||||
|
|
||||||
|
@ -688,7 +723,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
|
||||||
/* allocate all needed blocks */
|
/* allocate all needed blocks */
|
||||||
ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
|
ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
|
||||||
for (a = 0; a < depth - at; a++) {
|
for (a = 0; a < depth - at; a++) {
|
||||||
newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
|
newblock = ext4_ext_new_meta_block(handle, inode, path,
|
||||||
|
newext, &err);
|
||||||
if (newblock == 0)
|
if (newblock == 0)
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
ablocks[a] = newblock;
|
ablocks[a] = newblock;
|
||||||
|
@ -884,7 +920,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
|
||||||
ext4_fsblk_t newblock;
|
ext4_fsblk_t newblock;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
|
newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err);
|
||||||
if (newblock == 0)
|
if (newblock == 0)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
|
@ -981,6 +1017,8 @@ repeat:
|
||||||
/* if we found index with free entry, then use that
|
/* if we found index with free entry, then use that
|
||||||
* entry: create all needed subtree and add new leaf */
|
* entry: create all needed subtree and add new leaf */
|
||||||
err = ext4_ext_split(handle, inode, path, newext, i);
|
err = ext4_ext_split(handle, inode, path, newext, i);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
/* refill path */
|
/* refill path */
|
||||||
ext4_ext_drop_refs(path);
|
ext4_ext_drop_refs(path);
|
||||||
|
@ -1883,11 +1921,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||||
credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
|
credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
handle = ext4_ext_journal_restart(handle, credits);
|
err = ext4_ext_journal_restart(handle, credits);
|
||||||
if (IS_ERR(handle)) {
|
if (err)
|
||||||
err = PTR_ERR(handle);
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
|
||||||
|
|
||||||
err = ext4_ext_get_access(handle, inode, path + depth);
|
err = ext4_ext_get_access(handle, inode, path + depth);
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -2529,6 +2565,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||||
int err = 0, depth, ret;
|
int err = 0, depth, ret;
|
||||||
unsigned long allocated = 0;
|
unsigned long allocated = 0;
|
||||||
struct ext4_allocation_request ar;
|
struct ext4_allocation_request ar;
|
||||||
|
loff_t disksize;
|
||||||
|
|
||||||
__clear_bit(BH_New, &bh_result->b_state);
|
__clear_bit(BH_New, &bh_result->b_state);
|
||||||
ext_debug("blocks %u/%lu requested for inode %u\n",
|
ext_debug("blocks %u/%lu requested for inode %u\n",
|
||||||
|
@ -2616,8 +2653,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||||
*/
|
*/
|
||||||
if (allocated > max_blocks)
|
if (allocated > max_blocks)
|
||||||
allocated = max_blocks;
|
allocated = max_blocks;
|
||||||
/* mark the buffer unwritten */
|
set_buffer_unwritten(bh_result);
|
||||||
__set_bit(BH_Unwritten, &bh_result->b_state);
|
|
||||||
goto out2;
|
goto out2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2716,14 +2752,19 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||||
goto out2;
|
goto out2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize)
|
|
||||||
EXT4_I(inode)->i_disksize = inode->i_size;
|
|
||||||
|
|
||||||
/* previous routine could use block we allocated */
|
/* previous routine could use block we allocated */
|
||||||
newblock = ext_pblock(&newex);
|
newblock = ext_pblock(&newex);
|
||||||
allocated = ext4_ext_get_actual_len(&newex);
|
allocated = ext4_ext_get_actual_len(&newex);
|
||||||
outnew:
|
outnew:
|
||||||
__set_bit(BH_New, &bh_result->b_state);
|
if (extend_disksize) {
|
||||||
|
disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
|
||||||
|
if (disksize > i_size_read(inode))
|
||||||
|
disksize = i_size_read(inode);
|
||||||
|
if (disksize > EXT4_I(inode)->i_disksize)
|
||||||
|
EXT4_I(inode)->i_disksize = disksize;
|
||||||
|
}
|
||||||
|
|
||||||
|
set_buffer_new(bh_result);
|
||||||
|
|
||||||
/* Cache only when it is _not_ an uninitialized extent */
|
/* Cache only when it is _not_ an uninitialized extent */
|
||||||
if (create != EXT4_CREATE_UNINITIALIZED_EXT)
|
if (create != EXT4_CREATE_UNINITIALIZED_EXT)
|
||||||
|
@ -2733,7 +2774,7 @@ out:
|
||||||
if (allocated > max_blocks)
|
if (allocated > max_blocks)
|
||||||
allocated = max_blocks;
|
allocated = max_blocks;
|
||||||
ext4_ext_show_leaf(inode, path);
|
ext4_ext_show_leaf(inode, path);
|
||||||
__set_bit(BH_Mapped, &bh_result->b_state);
|
set_buffer_mapped(bh_result);
|
||||||
bh_result->b_bdev = inode->i_sb->s_bdev;
|
bh_result->b_bdev = inode->i_sb->s_bdev;
|
||||||
bh_result->b_blocknr = newblock;
|
bh_result->b_blocknr = newblock;
|
||||||
out2:
|
out2:
|
||||||
|
@ -2744,7 +2785,7 @@ out2:
|
||||||
return err ? err : allocated;
|
return err ? err : allocated;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ext4_ext_truncate(struct inode * inode, struct page *page)
|
void ext4_ext_truncate(struct inode *inode)
|
||||||
{
|
{
|
||||||
struct address_space *mapping = inode->i_mapping;
|
struct address_space *mapping = inode->i_mapping;
|
||||||
struct super_block *sb = inode->i_sb;
|
struct super_block *sb = inode->i_sb;
|
||||||
|
@ -2757,18 +2798,14 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
|
||||||
*/
|
*/
|
||||||
err = ext4_writepage_trans_blocks(inode) + 3;
|
err = ext4_writepage_trans_blocks(inode) + 3;
|
||||||
handle = ext4_journal_start(inode, err);
|
handle = ext4_journal_start(inode, err);
|
||||||
if (IS_ERR(handle)) {
|
if (IS_ERR(handle))
|
||||||
if (page) {
|
|
||||||
clear_highpage(page);
|
|
||||||
flush_dcache_page(page);
|
|
||||||
unlock_page(page);
|
|
||||||
page_cache_release(page);
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
if (page)
|
if (inode->i_size & (sb->s_blocksize - 1))
|
||||||
ext4_block_truncate_page(handle, page, mapping, inode->i_size);
|
ext4_block_truncate_page(handle, mapping, inode->i_size);
|
||||||
|
|
||||||
|
if (ext4_orphan_add(handle, inode))
|
||||||
|
goto out_stop;
|
||||||
|
|
||||||
down_write(&EXT4_I(inode)->i_data_sem);
|
down_write(&EXT4_I(inode)->i_data_sem);
|
||||||
ext4_ext_invalidate_cache(inode);
|
ext4_ext_invalidate_cache(inode);
|
||||||
|
@ -2780,8 +2817,6 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
|
||||||
* Probably we need not scan at all,
|
* Probably we need not scan at all,
|
||||||
* because page truncation is enough.
|
* because page truncation is enough.
|
||||||
*/
|
*/
|
||||||
if (ext4_orphan_add(handle, inode))
|
|
||||||
goto out_stop;
|
|
||||||
|
|
||||||
/* we have to know where to truncate from in crash case */
|
/* we have to know where to truncate from in crash case */
|
||||||
EXT4_I(inode)->i_disksize = inode->i_size;
|
EXT4_I(inode)->i_disksize = inode->i_size;
|
||||||
|
@ -2798,6 +2833,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
|
||||||
handle->h_sync = 1;
|
handle->h_sync = 1;
|
||||||
|
|
||||||
out_stop:
|
out_stop:
|
||||||
|
up_write(&EXT4_I(inode)->i_data_sem);
|
||||||
/*
|
/*
|
||||||
* If this was a simple ftruncate() and the file will remain alive,
|
* If this was a simple ftruncate() and the file will remain alive,
|
||||||
* then we need to clear up the orphan record which we created above.
|
* then we need to clear up the orphan record which we created above.
|
||||||
|
@ -2808,7 +2844,6 @@ out_stop:
|
||||||
if (inode->i_nlink)
|
if (inode->i_nlink)
|
||||||
ext4_orphan_del(handle, inode);
|
ext4_orphan_del(handle, inode);
|
||||||
|
|
||||||
up_write(&EXT4_I(inode)->i_data_sem);
|
|
||||||
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
|
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
|
||||||
ext4_mark_inode_dirty(handle, inode);
|
ext4_mark_inode_dirty(handle, inode);
|
||||||
ext4_journal_stop(handle);
|
ext4_journal_stop(handle);
|
||||||
|
@ -2911,7 +2946,7 @@ retry:
|
||||||
}
|
}
|
||||||
ret = ext4_get_blocks_wrap(handle, inode, block,
|
ret = ext4_get_blocks_wrap(handle, inode, block,
|
||||||
max_blocks, &map_bh,
|
max_blocks, &map_bh,
|
||||||
EXT4_CREATE_UNINITIALIZED_EXT, 0);
|
EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
|
||||||
if (ret <= 0) {
|
if (ret <= 0) {
|
||||||
#ifdef EXT4FS_DEBUG
|
#ifdef EXT4FS_DEBUG
|
||||||
WARN_ON(ret <= 0);
|
WARN_ON(ret <= 0);
|
||||||
|
|
|
@ -123,6 +123,23 @@ force_commit:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct vm_operations_struct ext4_file_vm_ops = {
|
||||||
|
.fault = filemap_fault,
|
||||||
|
.page_mkwrite = ext4_page_mkwrite,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
struct address_space *mapping = file->f_mapping;
|
||||||
|
|
||||||
|
if (!mapping->a_ops->readpage)
|
||||||
|
return -ENOEXEC;
|
||||||
|
file_accessed(file);
|
||||||
|
vma->vm_ops = &ext4_file_vm_ops;
|
||||||
|
vma->vm_flags |= VM_CAN_NONLINEAR;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
const struct file_operations ext4_file_operations = {
|
const struct file_operations ext4_file_operations = {
|
||||||
.llseek = generic_file_llseek,
|
.llseek = generic_file_llseek,
|
||||||
.read = do_sync_read,
|
.read = do_sync_read,
|
||||||
|
@ -133,7 +150,7 @@ const struct file_operations ext4_file_operations = {
|
||||||
#ifdef CONFIG_COMPAT
|
#ifdef CONFIG_COMPAT
|
||||||
.compat_ioctl = ext4_compat_ioctl,
|
.compat_ioctl = ext4_compat_ioctl,
|
||||||
#endif
|
#endif
|
||||||
.mmap = generic_file_mmap,
|
.mmap = ext4_file_mmap,
|
||||||
.open = generic_file_open,
|
.open = generic_file_open,
|
||||||
.release = ext4_release_file,
|
.release = ext4_release_file,
|
||||||
.fsync = ext4_sync_file,
|
.fsync = ext4_sync_file,
|
||||||
|
@ -144,6 +161,7 @@ const struct file_operations ext4_file_operations = {
|
||||||
const struct inode_operations ext4_file_inode_operations = {
|
const struct inode_operations ext4_file_inode_operations = {
|
||||||
.truncate = ext4_truncate,
|
.truncate = ext4_truncate,
|
||||||
.setattr = ext4_setattr,
|
.setattr = ext4_setattr,
|
||||||
|
.getattr = ext4_getattr,
|
||||||
#ifdef CONFIG_EXT4DEV_FS_XATTR
|
#ifdef CONFIG_EXT4DEV_FS_XATTR
|
||||||
.setxattr = generic_setxattr,
|
.setxattr = generic_setxattr,
|
||||||
.getxattr = generic_getxattr,
|
.getxattr = generic_getxattr,
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/writeback.h>
|
#include <linux/writeback.h>
|
||||||
#include <linux/jbd2.h>
|
#include <linux/jbd2.h>
|
||||||
|
#include <linux/blkdev.h>
|
||||||
#include "ext4.h"
|
#include "ext4.h"
|
||||||
#include "ext4_jbd2.h"
|
#include "ext4_jbd2.h"
|
||||||
|
|
||||||
|
@ -45,6 +46,7 @@
|
||||||
int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
|
int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
|
||||||
{
|
{
|
||||||
struct inode *inode = dentry->d_inode;
|
struct inode *inode = dentry->d_inode;
|
||||||
|
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
J_ASSERT(ext4_journal_current_handle() == NULL);
|
J_ASSERT(ext4_journal_current_handle() == NULL);
|
||||||
|
@ -85,6 +87,8 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
|
||||||
.nr_to_write = 0, /* sys_fsync did this */
|
.nr_to_write = 0, /* sys_fsync did this */
|
||||||
};
|
};
|
||||||
ret = sync_inode(inode, &wbc);
|
ret = sync_inode(inode, &wbc);
|
||||||
|
if (journal && (journal->j_flags & JBD2_BARRIER))
|
||||||
|
blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
@ -13,7 +13,7 @@ extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
|
||||||
struct ext4_group_desc *gdp);
|
struct ext4_group_desc *gdp);
|
||||||
extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
|
extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
|
||||||
struct ext4_group_desc *gdp);
|
struct ext4_group_desc *gdp);
|
||||||
struct buffer_head *read_block_bitmap(struct super_block *sb,
|
struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
|
||||||
ext4_group_t block_group);
|
ext4_group_t block_group);
|
||||||
extern unsigned ext4_init_block_bitmap(struct super_block *sb,
|
extern unsigned ext4_init_block_bitmap(struct super_block *sb,
|
||||||
struct buffer_head *bh,
|
struct buffer_head *bh,
|
||||||
|
|
113
fs/ext4/ialloc.c
113
fs/ext4/ialloc.c
|
@ -157,6 +157,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
|
||||||
struct ext4_super_block * es;
|
struct ext4_super_block * es;
|
||||||
struct ext4_sb_info *sbi;
|
struct ext4_sb_info *sbi;
|
||||||
int fatal = 0, err;
|
int fatal = 0, err;
|
||||||
|
ext4_group_t flex_group;
|
||||||
|
|
||||||
if (atomic_read(&inode->i_count) > 1) {
|
if (atomic_read(&inode->i_count) > 1) {
|
||||||
printk ("ext4_free_inode: inode has count=%d\n",
|
printk ("ext4_free_inode: inode has count=%d\n",
|
||||||
|
@ -232,6 +233,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
|
||||||
if (is_directory)
|
if (is_directory)
|
||||||
percpu_counter_dec(&sbi->s_dirs_counter);
|
percpu_counter_dec(&sbi->s_dirs_counter);
|
||||||
|
|
||||||
|
if (sbi->s_log_groups_per_flex) {
|
||||||
|
flex_group = ext4_flex_group(sbi, block_group);
|
||||||
|
spin_lock(sb_bgl_lock(sbi, flex_group));
|
||||||
|
sbi->s_flex_groups[flex_group].free_inodes++;
|
||||||
|
spin_unlock(sb_bgl_lock(sbi, flex_group));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
|
BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
|
||||||
err = ext4_journal_dirty_metadata(handle, bh2);
|
err = ext4_journal_dirty_metadata(handle, bh2);
|
||||||
|
@ -286,6 +293,80 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define free_block_ratio 10
|
||||||
|
|
||||||
|
static int find_group_flex(struct super_block *sb, struct inode *parent,
|
||||||
|
ext4_group_t *best_group)
|
||||||
|
{
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||||
|
struct ext4_group_desc *desc;
|
||||||
|
struct buffer_head *bh;
|
||||||
|
struct flex_groups *flex_group = sbi->s_flex_groups;
|
||||||
|
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
|
||||||
|
ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
|
||||||
|
ext4_group_t ngroups = sbi->s_groups_count;
|
||||||
|
int flex_size = ext4_flex_bg_size(sbi);
|
||||||
|
ext4_group_t best_flex = parent_fbg_group;
|
||||||
|
int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
|
||||||
|
int flexbg_free_blocks;
|
||||||
|
int flex_freeb_ratio;
|
||||||
|
ext4_group_t n_fbg_groups;
|
||||||
|
ext4_group_t i;
|
||||||
|
|
||||||
|
n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >>
|
||||||
|
sbi->s_log_groups_per_flex;
|
||||||
|
|
||||||
|
find_close_to_parent:
|
||||||
|
flexbg_free_blocks = flex_group[best_flex].free_blocks;
|
||||||
|
flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
|
||||||
|
if (flex_group[best_flex].free_inodes &&
|
||||||
|
flex_freeb_ratio > free_block_ratio)
|
||||||
|
goto found_flexbg;
|
||||||
|
|
||||||
|
if (best_flex && best_flex == parent_fbg_group) {
|
||||||
|
best_flex--;
|
||||||
|
goto find_close_to_parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < n_fbg_groups; i++) {
|
||||||
|
if (i == parent_fbg_group || i == parent_fbg_group - 1)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
flexbg_free_blocks = flex_group[i].free_blocks;
|
||||||
|
flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
|
||||||
|
|
||||||
|
if (flex_freeb_ratio > free_block_ratio &&
|
||||||
|
flex_group[i].free_inodes) {
|
||||||
|
best_flex = i;
|
||||||
|
goto found_flexbg;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (best_flex < 0 ||
|
||||||
|
(flex_group[i].free_blocks >
|
||||||
|
flex_group[best_flex].free_blocks &&
|
||||||
|
flex_group[i].free_inodes))
|
||||||
|
best_flex = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!flex_group[best_flex].free_inodes ||
|
||||||
|
!flex_group[best_flex].free_blocks)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
found_flexbg:
|
||||||
|
for (i = best_flex * flex_size; i < ngroups &&
|
||||||
|
i < (best_flex + 1) * flex_size; i++) {
|
||||||
|
desc = ext4_get_group_desc(sb, i, &bh);
|
||||||
|
if (le16_to_cpu(desc->bg_free_inodes_count)) {
|
||||||
|
*best_group = i;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
out:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Orlov's allocator for directories.
|
* Orlov's allocator for directories.
|
||||||
*
|
*
|
||||||
|
@ -501,6 +582,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
|
||||||
struct inode *ret;
|
struct inode *ret;
|
||||||
ext4_group_t i;
|
ext4_group_t i;
|
||||||
int free = 0;
|
int free = 0;
|
||||||
|
ext4_group_t flex_group;
|
||||||
|
|
||||||
/* Cannot create files in a deleted directory */
|
/* Cannot create files in a deleted directory */
|
||||||
if (!dir || !dir->i_nlink)
|
if (!dir || !dir->i_nlink)
|
||||||
|
@ -514,6 +596,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
|
||||||
|
|
||||||
sbi = EXT4_SB(sb);
|
sbi = EXT4_SB(sb);
|
||||||
es = sbi->s_es;
|
es = sbi->s_es;
|
||||||
|
|
||||||
|
if (sbi->s_log_groups_per_flex) {
|
||||||
|
ret2 = find_group_flex(sb, dir, &group);
|
||||||
|
goto got_group;
|
||||||
|
}
|
||||||
|
|
||||||
if (S_ISDIR(mode)) {
|
if (S_ISDIR(mode)) {
|
||||||
if (test_opt (sb, OLDALLOC))
|
if (test_opt (sb, OLDALLOC))
|
||||||
ret2 = find_group_dir(sb, dir, &group);
|
ret2 = find_group_dir(sb, dir, &group);
|
||||||
|
@ -522,6 +610,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
|
||||||
} else
|
} else
|
||||||
ret2 = find_group_other(sb, dir, &group);
|
ret2 = find_group_other(sb, dir, &group);
|
||||||
|
|
||||||
|
got_group:
|
||||||
err = -ENOSPC;
|
err = -ENOSPC;
|
||||||
if (ret2 == -1)
|
if (ret2 == -1)
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -600,7 +689,7 @@ got:
|
||||||
/* We may have to initialize the block bitmap if it isn't already */
|
/* We may have to initialize the block bitmap if it isn't already */
|
||||||
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
|
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
|
||||||
gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
|
gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
|
||||||
struct buffer_head *block_bh = read_block_bitmap(sb, group);
|
struct buffer_head *block_bh = ext4_read_block_bitmap(sb, group);
|
||||||
|
|
||||||
BUFFER_TRACE(block_bh, "get block bitmap access");
|
BUFFER_TRACE(block_bh, "get block bitmap access");
|
||||||
err = ext4_journal_get_write_access(handle, block_bh);
|
err = ext4_journal_get_write_access(handle, block_bh);
|
||||||
|
@ -676,6 +765,13 @@ got:
|
||||||
percpu_counter_inc(&sbi->s_dirs_counter);
|
percpu_counter_inc(&sbi->s_dirs_counter);
|
||||||
sb->s_dirt = 1;
|
sb->s_dirt = 1;
|
||||||
|
|
||||||
|
if (sbi->s_log_groups_per_flex) {
|
||||||
|
flex_group = ext4_flex_group(sbi, group);
|
||||||
|
spin_lock(sb_bgl_lock(sbi, flex_group));
|
||||||
|
sbi->s_flex_groups[flex_group].free_inodes--;
|
||||||
|
spin_unlock(sb_bgl_lock(sbi, flex_group));
|
||||||
|
}
|
||||||
|
|
||||||
inode->i_uid = current->fsuid;
|
inode->i_uid = current->fsuid;
|
||||||
if (test_opt (sb, GRPID))
|
if (test_opt (sb, GRPID))
|
||||||
inode->i_gid = dir->i_gid;
|
inode->i_gid = dir->i_gid;
|
||||||
|
@ -740,14 +836,10 @@ got:
|
||||||
goto fail_free_drop;
|
goto fail_free_drop;
|
||||||
|
|
||||||
if (test_opt(sb, EXTENTS)) {
|
if (test_opt(sb, EXTENTS)) {
|
||||||
/* set extent flag only for diretory, file and normal symlink*/
|
/* set extent flag only for directory, file and normal symlink*/
|
||||||
if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
|
if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
|
||||||
EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
|
EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
|
||||||
ext4_ext_tree_init(handle, inode);
|
ext4_ext_tree_init(handle, inode);
|
||||||
err = ext4_update_incompat_feature(handle, sb,
|
|
||||||
EXT4_FEATURE_INCOMPAT_EXTENTS);
|
|
||||||
if (err)
|
|
||||||
goto fail_free_drop;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -817,6 +909,14 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
|
||||||
if (IS_ERR(inode))
|
if (IS_ERR(inode))
|
||||||
goto iget_failed;
|
goto iget_failed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the orphans has i_nlinks > 0 then it should be able to be
|
||||||
|
* truncated, otherwise it won't be removed from the orphan list
|
||||||
|
* during processing and an infinite loop will result.
|
||||||
|
*/
|
||||||
|
if (inode->i_nlink && !ext4_can_truncate(inode))
|
||||||
|
goto bad_orphan;
|
||||||
|
|
||||||
if (NEXT_ORPHAN(inode) > max_ino)
|
if (NEXT_ORPHAN(inode) > max_ino)
|
||||||
goto bad_orphan;
|
goto bad_orphan;
|
||||||
brelse(bitmap_bh);
|
brelse(bitmap_bh);
|
||||||
|
@ -838,6 +938,7 @@ bad_orphan:
|
||||||
printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
|
printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
|
||||||
NEXT_ORPHAN(inode));
|
NEXT_ORPHAN(inode));
|
||||||
printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
|
printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
|
||||||
|
printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
|
||||||
/* Avoid freeing blocks if we got a bad deleted inode */
|
/* Avoid freeing blocks if we got a bad deleted inode */
|
||||||
if (inode->i_nlink == 0)
|
if (inode->i_nlink == 0)
|
||||||
inode->i_blocks = 0;
|
inode->i_blocks = 0;
|
||||||
|
|
1599
fs/ext4/inode.c
1599
fs/ext4/inode.c
File diff suppressed because it is too large
Load diff
|
@ -381,22 +381,28 @@ static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
|
||||||
|
|
||||||
static inline int mb_find_next_zero_bit(void *addr, int max, int start)
|
static inline int mb_find_next_zero_bit(void *addr, int max, int start)
|
||||||
{
|
{
|
||||||
int fix = 0;
|
int fix = 0, ret, tmpmax;
|
||||||
addr = mb_correct_addr_and_bit(&fix, addr);
|
addr = mb_correct_addr_and_bit(&fix, addr);
|
||||||
max += fix;
|
tmpmax = max + fix;
|
||||||
start += fix;
|
start += fix;
|
||||||
|
|
||||||
return ext4_find_next_zero_bit(addr, max, start) - fix;
|
ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
|
||||||
|
if (ret > max)
|
||||||
|
return max;
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int mb_find_next_bit(void *addr, int max, int start)
|
static inline int mb_find_next_bit(void *addr, int max, int start)
|
||||||
{
|
{
|
||||||
int fix = 0;
|
int fix = 0, ret, tmpmax;
|
||||||
addr = mb_correct_addr_and_bit(&fix, addr);
|
addr = mb_correct_addr_and_bit(&fix, addr);
|
||||||
max += fix;
|
tmpmax = max + fix;
|
||||||
start += fix;
|
start += fix;
|
||||||
|
|
||||||
return ext4_find_next_bit(addr, max, start) - fix;
|
ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
|
||||||
|
if (ret > max)
|
||||||
|
return max;
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
|
static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
|
||||||
|
@ -803,6 +809,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
|
||||||
if (!buffer_uptodate(bh[i]))
|
if (!buffer_uptodate(bh[i]))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
err = 0;
|
||||||
first_block = page->index * blocks_per_page;
|
first_block = page->index * blocks_per_page;
|
||||||
for (i = 0; i < blocks_per_page; i++) {
|
for (i = 0; i < blocks_per_page; i++) {
|
||||||
int group;
|
int group;
|
||||||
|
@ -883,6 +890,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
||||||
int pnum;
|
int pnum;
|
||||||
int poff;
|
int poff;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
int ret;
|
||||||
|
|
||||||
mb_debug("load group %lu\n", group);
|
mb_debug("load group %lu\n", group);
|
||||||
|
|
||||||
|
@ -914,15 +922,21 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
||||||
if (page) {
|
if (page) {
|
||||||
BUG_ON(page->mapping != inode->i_mapping);
|
BUG_ON(page->mapping != inode->i_mapping);
|
||||||
if (!PageUptodate(page)) {
|
if (!PageUptodate(page)) {
|
||||||
ext4_mb_init_cache(page, NULL);
|
ret = ext4_mb_init_cache(page, NULL);
|
||||||
|
if (ret) {
|
||||||
|
unlock_page(page);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
mb_cmp_bitmaps(e4b, page_address(page) +
|
mb_cmp_bitmaps(e4b, page_address(page) +
|
||||||
(poff * sb->s_blocksize));
|
(poff * sb->s_blocksize));
|
||||||
}
|
}
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (page == NULL || !PageUptodate(page))
|
if (page == NULL || !PageUptodate(page)) {
|
||||||
|
ret = -EIO;
|
||||||
goto err;
|
goto err;
|
||||||
|
}
|
||||||
e4b->bd_bitmap_page = page;
|
e4b->bd_bitmap_page = page;
|
||||||
e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
|
e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
|
||||||
mark_page_accessed(page);
|
mark_page_accessed(page);
|
||||||
|
@ -938,14 +952,20 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
||||||
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
||||||
if (page) {
|
if (page) {
|
||||||
BUG_ON(page->mapping != inode->i_mapping);
|
BUG_ON(page->mapping != inode->i_mapping);
|
||||||
if (!PageUptodate(page))
|
if (!PageUptodate(page)) {
|
||||||
ext4_mb_init_cache(page, e4b->bd_bitmap);
|
ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
|
||||||
|
if (ret) {
|
||||||
|
unlock_page(page);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
}
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (page == NULL || !PageUptodate(page))
|
if (page == NULL || !PageUptodate(page)) {
|
||||||
|
ret = -EIO;
|
||||||
goto err;
|
goto err;
|
||||||
|
}
|
||||||
e4b->bd_buddy_page = page;
|
e4b->bd_buddy_page = page;
|
||||||
e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
|
e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
|
||||||
mark_page_accessed(page);
|
mark_page_accessed(page);
|
||||||
|
@ -962,7 +982,7 @@ err:
|
||||||
page_cache_release(e4b->bd_buddy_page);
|
page_cache_release(e4b->bd_buddy_page);
|
||||||
e4b->bd_buddy = NULL;
|
e4b->bd_buddy = NULL;
|
||||||
e4b->bd_bitmap = NULL;
|
e4b->bd_bitmap = NULL;
|
||||||
return -EIO;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ext4_mb_release_desc(struct ext4_buddy *e4b)
|
static void ext4_mb_release_desc(struct ext4_buddy *e4b)
|
||||||
|
@ -1031,7 +1051,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
|
static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
|
||||||
int first, int count)
|
int first, int count)
|
||||||
{
|
{
|
||||||
int block = 0;
|
int block = 0;
|
||||||
|
@ -1071,11 +1091,12 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
|
||||||
blocknr += block;
|
blocknr += block;
|
||||||
blocknr +=
|
blocknr +=
|
||||||
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
|
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
|
||||||
|
ext4_unlock_group(sb, e4b->bd_group);
|
||||||
ext4_error(sb, __func__, "double-free of inode"
|
ext4_error(sb, __func__, "double-free of inode"
|
||||||
" %lu's block %llu(bit %u in group %lu)\n",
|
" %lu's block %llu(bit %u in group %lu)\n",
|
||||||
inode ? inode->i_ino : 0, blocknr, block,
|
inode ? inode->i_ino : 0, blocknr, block,
|
||||||
e4b->bd_group);
|
e4b->bd_group);
|
||||||
|
ext4_lock_group(sb, e4b->bd_group);
|
||||||
}
|
}
|
||||||
mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
|
mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
|
||||||
e4b->bd_info->bb_counters[order]++;
|
e4b->bd_info->bb_counters[order]++;
|
||||||
|
@ -1113,8 +1134,6 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
|
||||||
} while (1);
|
} while (1);
|
||||||
}
|
}
|
||||||
mb_check_buddy(e4b);
|
mb_check_buddy(e4b);
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
|
static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
|
||||||
|
@ -1730,10 +1749,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
|
||||||
ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
|
ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
|
||||||
spin_unlock(&sbi->s_md_lock);
|
spin_unlock(&sbi->s_md_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* searching for the right group start from the goal value specified */
|
|
||||||
group = ac->ac_g_ex.fe_group;
|
|
||||||
|
|
||||||
/* Let's just scan groups to find more-less suitable blocks */
|
/* Let's just scan groups to find more-less suitable blocks */
|
||||||
cr = ac->ac_2order ? 0 : 1;
|
cr = ac->ac_2order ? 0 : 1;
|
||||||
/*
|
/*
|
||||||
|
@ -1743,6 +1758,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
|
||||||
repeat:
|
repeat:
|
||||||
for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
|
for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
|
||||||
ac->ac_criteria = cr;
|
ac->ac_criteria = cr;
|
||||||
|
/*
|
||||||
|
* searching for the right group start
|
||||||
|
* from the goal value specified
|
||||||
|
*/
|
||||||
|
group = ac->ac_g_ex.fe_group;
|
||||||
|
|
||||||
for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
|
for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
|
||||||
struct ext4_group_info *grp;
|
struct ext4_group_info *grp;
|
||||||
struct ext4_group_desc *desc;
|
struct ext4_group_desc *desc;
|
||||||
|
@ -1963,6 +1984,8 @@ static int ext4_mb_seq_history_open(struct inode *inode, struct file *file)
|
||||||
int rc;
|
int rc;
|
||||||
int size;
|
int size;
|
||||||
|
|
||||||
|
if (unlikely(sbi->s_mb_history == NULL))
|
||||||
|
return -ENOMEM;
|
||||||
s = kmalloc(sizeof(*s), GFP_KERNEL);
|
s = kmalloc(sizeof(*s), GFP_KERNEL);
|
||||||
if (s == NULL)
|
if (s == NULL)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
@ -2165,9 +2188,7 @@ static void ext4_mb_history_init(struct super_block *sb)
|
||||||
sbi->s_mb_history_cur = 0;
|
sbi->s_mb_history_cur = 0;
|
||||||
spin_lock_init(&sbi->s_mb_history_lock);
|
spin_lock_init(&sbi->s_mb_history_lock);
|
||||||
i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
|
i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
|
||||||
sbi->s_mb_history = kmalloc(i, GFP_KERNEL);
|
sbi->s_mb_history = kzalloc(i, GFP_KERNEL);
|
||||||
if (likely(sbi->s_mb_history != NULL))
|
|
||||||
memset(sbi->s_mb_history, 0, i);
|
|
||||||
/* if we can't allocate history, then we simple won't use it */
|
/* if we can't allocate history, then we simple won't use it */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2215,21 +2236,192 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
|
||||||
#define ext4_mb_history_init(sb)
|
#define ext4_mb_history_init(sb)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* Create and initialize ext4_group_info data for the given group. */
|
||||||
|
int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
||||||
|
struct ext4_group_desc *desc)
|
||||||
|
{
|
||||||
|
int i, len;
|
||||||
|
int metalen = 0;
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||||
|
struct ext4_group_info **meta_group_info;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* First check if this group is the first of a reserved block.
|
||||||
|
* If it's true, we have to allocate a new table of pointers
|
||||||
|
* to ext4_group_info structures
|
||||||
|
*/
|
||||||
|
if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
|
||||||
|
metalen = sizeof(*meta_group_info) <<
|
||||||
|
EXT4_DESC_PER_BLOCK_BITS(sb);
|
||||||
|
meta_group_info = kmalloc(metalen, GFP_KERNEL);
|
||||||
|
if (meta_group_info == NULL) {
|
||||||
|
printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
|
||||||
|
"buddy group\n");
|
||||||
|
goto exit_meta_group_info;
|
||||||
|
}
|
||||||
|
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
|
||||||
|
meta_group_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* calculate needed size. if change bb_counters size,
|
||||||
|
* don't forget about ext4_mb_generate_buddy()
|
||||||
|
*/
|
||||||
|
len = offsetof(typeof(**meta_group_info),
|
||||||
|
bb_counters[sb->s_blocksize_bits + 2]);
|
||||||
|
|
||||||
|
meta_group_info =
|
||||||
|
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
|
||||||
|
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
|
||||||
|
|
||||||
|
meta_group_info[i] = kzalloc(len, GFP_KERNEL);
|
||||||
|
if (meta_group_info[i] == NULL) {
|
||||||
|
printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
|
||||||
|
goto exit_group_info;
|
||||||
|
}
|
||||||
|
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
|
||||||
|
&(meta_group_info[i]->bb_state));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* initialize bb_free to be able to skip
|
||||||
|
* empty groups without initialization
|
||||||
|
*/
|
||||||
|
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
|
||||||
|
meta_group_info[i]->bb_free =
|
||||||
|
ext4_free_blocks_after_init(sb, group, desc);
|
||||||
|
} else {
|
||||||
|
meta_group_info[i]->bb_free =
|
||||||
|
le16_to_cpu(desc->bg_free_blocks_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
|
||||||
|
|
||||||
|
#ifdef DOUBLE_CHECK
|
||||||
|
{
|
||||||
|
struct buffer_head *bh;
|
||||||
|
meta_group_info[i]->bb_bitmap =
|
||||||
|
kmalloc(sb->s_blocksize, GFP_KERNEL);
|
||||||
|
BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
|
||||||
|
bh = ext4_read_block_bitmap(sb, group);
|
||||||
|
BUG_ON(bh == NULL);
|
||||||
|
memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
|
||||||
|
sb->s_blocksize);
|
||||||
|
put_bh(bh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
exit_group_info:
|
||||||
|
/* If a meta_group_info table has been allocated, release it now */
|
||||||
|
if (group % EXT4_DESC_PER_BLOCK(sb) == 0)
|
||||||
|
kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
|
||||||
|
exit_meta_group_info:
|
||||||
|
return -ENOMEM;
|
||||||
|
} /* ext4_mb_add_groupinfo */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Add a group to the existing groups.
|
||||||
|
* This function is used for online resize
|
||||||
|
*/
|
||||||
|
int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
|
||||||
|
struct ext4_group_desc *desc)
|
||||||
|
{
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||||
|
struct inode *inode = sbi->s_buddy_cache;
|
||||||
|
int blocks_per_page;
|
||||||
|
int block;
|
||||||
|
int pnum;
|
||||||
|
struct page *page;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
/* Add group based on group descriptor*/
|
||||||
|
err = ext4_mb_add_groupinfo(sb, group, desc);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cache pages containing dynamic mb_alloc datas (buddy and bitmap
|
||||||
|
* datas) are set not up to date so that they will be re-initilaized
|
||||||
|
* during the next call to ext4_mb_load_buddy
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Set buddy page as not up to date */
|
||||||
|
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
|
||||||
|
block = group * 2;
|
||||||
|
pnum = block / blocks_per_page;
|
||||||
|
page = find_get_page(inode->i_mapping, pnum);
|
||||||
|
if (page != NULL) {
|
||||||
|
ClearPageUptodate(page);
|
||||||
|
page_cache_release(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set bitmap page as not up to date */
|
||||||
|
block++;
|
||||||
|
pnum = block / blocks_per_page;
|
||||||
|
page = find_get_page(inode->i_mapping, pnum);
|
||||||
|
if (page != NULL) {
|
||||||
|
ClearPageUptodate(page);
|
||||||
|
page_cache_release(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update an existing group.
|
||||||
|
* This function is used for online resize
|
||||||
|
*/
|
||||||
|
void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
|
||||||
|
{
|
||||||
|
grp->bb_free += add;
|
||||||
|
}
|
||||||
|
|
||||||
static int ext4_mb_init_backend(struct super_block *sb)
|
static int ext4_mb_init_backend(struct super_block *sb)
|
||||||
{
|
{
|
||||||
ext4_group_t i;
|
ext4_group_t i;
|
||||||
int j, len, metalen;
|
int metalen;
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||||
int num_meta_group_infos =
|
struct ext4_super_block *es = sbi->s_es;
|
||||||
(sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) >>
|
int num_meta_group_infos;
|
||||||
EXT4_DESC_PER_BLOCK_BITS(sb);
|
int num_meta_group_infos_max;
|
||||||
|
int array_size;
|
||||||
struct ext4_group_info **meta_group_info;
|
struct ext4_group_info **meta_group_info;
|
||||||
|
struct ext4_group_desc *desc;
|
||||||
|
|
||||||
|
/* This is the number of blocks used by GDT */
|
||||||
|
num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) -
|
||||||
|
1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the total number of blocks used by GDT including
|
||||||
|
* the number of reserved blocks for GDT.
|
||||||
|
* The s_group_info array is allocated with this value
|
||||||
|
* to allow a clean online resize without a complex
|
||||||
|
* manipulation of pointer.
|
||||||
|
* The drawback is the unused memory when no resize
|
||||||
|
* occurs but it's very low in terms of pages
|
||||||
|
* (see comments below)
|
||||||
|
* Need to handle this properly when META_BG resizing is allowed
|
||||||
|
*/
|
||||||
|
num_meta_group_infos_max = num_meta_group_infos +
|
||||||
|
le16_to_cpu(es->s_reserved_gdt_blocks);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* array_size is the size of s_group_info array. We round it
|
||||||
|
* to the next power of two because this approximation is done
|
||||||
|
* internally by kmalloc so we can have some more memory
|
||||||
|
* for free here (e.g. may be used for META_BG resize).
|
||||||
|
*/
|
||||||
|
array_size = 1;
|
||||||
|
while (array_size < sizeof(*sbi->s_group_info) *
|
||||||
|
num_meta_group_infos_max)
|
||||||
|
array_size = array_size << 1;
|
||||||
/* An 8TB filesystem with 64-bit pointers requires a 4096 byte
|
/* An 8TB filesystem with 64-bit pointers requires a 4096 byte
|
||||||
* kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
|
* kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
|
||||||
* So a two level scheme suffices for now. */
|
* So a two level scheme suffices for now. */
|
||||||
sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) *
|
sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
|
||||||
num_meta_group_infos, GFP_KERNEL);
|
|
||||||
if (sbi->s_group_info == NULL) {
|
if (sbi->s_group_info == NULL) {
|
||||||
printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
|
printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
@ -2256,63 +2448,15 @@ static int ext4_mb_init_backend(struct super_block *sb)
|
||||||
sbi->s_group_info[i] = meta_group_info;
|
sbi->s_group_info[i] = meta_group_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* calculate needed size. if change bb_counters size,
|
|
||||||
* don't forget about ext4_mb_generate_buddy()
|
|
||||||
*/
|
|
||||||
len = sizeof(struct ext4_group_info);
|
|
||||||
len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2);
|
|
||||||
for (i = 0; i < sbi->s_groups_count; i++) {
|
for (i = 0; i < sbi->s_groups_count; i++) {
|
||||||
struct ext4_group_desc *desc;
|
|
||||||
|
|
||||||
meta_group_info =
|
|
||||||
sbi->s_group_info[i >> EXT4_DESC_PER_BLOCK_BITS(sb)];
|
|
||||||
j = i & (EXT4_DESC_PER_BLOCK(sb) - 1);
|
|
||||||
|
|
||||||
meta_group_info[j] = kzalloc(len, GFP_KERNEL);
|
|
||||||
if (meta_group_info[j] == NULL) {
|
|
||||||
printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
|
|
||||||
goto err_freebuddy;
|
|
||||||
}
|
|
||||||
desc = ext4_get_group_desc(sb, i, NULL);
|
desc = ext4_get_group_desc(sb, i, NULL);
|
||||||
if (desc == NULL) {
|
if (desc == NULL) {
|
||||||
printk(KERN_ERR
|
printk(KERN_ERR
|
||||||
"EXT4-fs: can't read descriptor %lu\n", i);
|
"EXT4-fs: can't read descriptor %lu\n", i);
|
||||||
i++;
|
|
||||||
goto err_freebuddy;
|
goto err_freebuddy;
|
||||||
}
|
}
|
||||||
memset(meta_group_info[j], 0, len);
|
if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
|
||||||
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
|
goto err_freebuddy;
|
||||||
&(meta_group_info[j]->bb_state));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* initialize bb_free to be able to skip
|
|
||||||
* empty groups without initialization
|
|
||||||
*/
|
|
||||||
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
|
|
||||||
meta_group_info[j]->bb_free =
|
|
||||||
ext4_free_blocks_after_init(sb, i, desc);
|
|
||||||
} else {
|
|
||||||
meta_group_info[j]->bb_free =
|
|
||||||
le16_to_cpu(desc->bg_free_blocks_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
INIT_LIST_HEAD(&meta_group_info[j]->bb_prealloc_list);
|
|
||||||
|
|
||||||
#ifdef DOUBLE_CHECK
|
|
||||||
{
|
|
||||||
struct buffer_head *bh;
|
|
||||||
meta_group_info[j]->bb_bitmap =
|
|
||||||
kmalloc(sb->s_blocksize, GFP_KERNEL);
|
|
||||||
BUG_ON(meta_group_info[j]->bb_bitmap == NULL);
|
|
||||||
bh = read_block_bitmap(sb, i);
|
|
||||||
BUG_ON(bh == NULL);
|
|
||||||
memcpy(meta_group_info[j]->bb_bitmap, bh->b_data,
|
|
||||||
sb->s_blocksize);
|
|
||||||
put_bh(bh);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -2336,6 +2480,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
|
||||||
unsigned i;
|
unsigned i;
|
||||||
unsigned offset;
|
unsigned offset;
|
||||||
unsigned max;
|
unsigned max;
|
||||||
|
int ret;
|
||||||
|
|
||||||
if (!test_opt(sb, MBALLOC))
|
if (!test_opt(sb, MBALLOC))
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -2370,12 +2515,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
|
||||||
} while (i <= sb->s_blocksize_bits + 1);
|
} while (i <= sb->s_blocksize_bits + 1);
|
||||||
|
|
||||||
/* init file for buddy data */
|
/* init file for buddy data */
|
||||||
i = ext4_mb_init_backend(sb);
|
ret = ext4_mb_init_backend(sb);
|
||||||
if (i) {
|
if (ret != 0) {
|
||||||
clear_opt(sbi->s_mount_opt, MBALLOC);
|
clear_opt(sbi->s_mount_opt, MBALLOC);
|
||||||
kfree(sbi->s_mb_offsets);
|
kfree(sbi->s_mb_offsets);
|
||||||
kfree(sbi->s_mb_maxs);
|
kfree(sbi->s_mb_maxs);
|
||||||
return i;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock_init(&sbi->s_md_lock);
|
spin_lock_init(&sbi->s_md_lock);
|
||||||
|
@ -2548,8 +2693,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
|
||||||
ext4_lock_group(sb, md->group);
|
ext4_lock_group(sb, md->group);
|
||||||
for (i = 0; i < md->num; i++) {
|
for (i = 0; i < md->num; i++) {
|
||||||
mb_debug(" %u", md->blocks[i]);
|
mb_debug(" %u", md->blocks[i]);
|
||||||
err = mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
|
mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
|
||||||
BUG_ON(err != 0);
|
|
||||||
}
|
}
|
||||||
mb_debug("\n");
|
mb_debug("\n");
|
||||||
ext4_unlock_group(sb, md->group);
|
ext4_unlock_group(sb, md->group);
|
||||||
|
@ -2575,25 +2719,24 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define MB_PROC_VALUE_READ(name) \
|
#define MB_PROC_FOPS(name) \
|
||||||
static int ext4_mb_read_##name(char *page, char **start, \
|
static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \
|
||||||
off_t off, int count, int *eof, void *data) \
|
|
||||||
{ \
|
{ \
|
||||||
struct ext4_sb_info *sbi = data; \
|
struct ext4_sb_info *sbi = m->private; \
|
||||||
int len; \
|
\
|
||||||
*eof = 1; \
|
seq_printf(m, "%ld\n", sbi->s_mb_##name); \
|
||||||
if (off != 0) \
|
|
||||||
return 0; \
|
return 0; \
|
||||||
len = sprintf(page, "%ld\n", sbi->s_mb_##name); \
|
} \
|
||||||
*start = page; \
|
\
|
||||||
return len; \
|
static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\
|
||||||
}
|
|
||||||
|
|
||||||
#define MB_PROC_VALUE_WRITE(name) \
|
|
||||||
static int ext4_mb_write_##name(struct file *file, \
|
|
||||||
const char __user *buf, unsigned long cnt, void *data) \
|
|
||||||
{ \
|
{ \
|
||||||
struct ext4_sb_info *sbi = data; \
|
return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
static ssize_t ext4_mb_##name##_proc_write(struct file *file, \
|
||||||
|
const char __user *buf, size_t cnt, loff_t *ppos) \
|
||||||
|
{ \
|
||||||
|
struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\
|
||||||
char str[32]; \
|
char str[32]; \
|
||||||
long value; \
|
long value; \
|
||||||
if (cnt >= sizeof(str)) \
|
if (cnt >= sizeof(str)) \
|
||||||
|
@ -2605,31 +2748,32 @@ static int ext4_mb_write_##name(struct file *file, \
|
||||||
return -ERANGE; \
|
return -ERANGE; \
|
||||||
sbi->s_mb_##name = value; \
|
sbi->s_mb_##name = value; \
|
||||||
return cnt; \
|
return cnt; \
|
||||||
}
|
} \
|
||||||
|
\
|
||||||
|
static const struct file_operations ext4_mb_##name##_proc_fops = { \
|
||||||
|
.owner = THIS_MODULE, \
|
||||||
|
.open = ext4_mb_##name##_proc_open, \
|
||||||
|
.read = seq_read, \
|
||||||
|
.llseek = seq_lseek, \
|
||||||
|
.release = single_release, \
|
||||||
|
.write = ext4_mb_##name##_proc_write, \
|
||||||
|
};
|
||||||
|
|
||||||
MB_PROC_VALUE_READ(stats);
|
MB_PROC_FOPS(stats);
|
||||||
MB_PROC_VALUE_WRITE(stats);
|
MB_PROC_FOPS(max_to_scan);
|
||||||
MB_PROC_VALUE_READ(max_to_scan);
|
MB_PROC_FOPS(min_to_scan);
|
||||||
MB_PROC_VALUE_WRITE(max_to_scan);
|
MB_PROC_FOPS(order2_reqs);
|
||||||
MB_PROC_VALUE_READ(min_to_scan);
|
MB_PROC_FOPS(stream_request);
|
||||||
MB_PROC_VALUE_WRITE(min_to_scan);
|
MB_PROC_FOPS(group_prealloc);
|
||||||
MB_PROC_VALUE_READ(order2_reqs);
|
|
||||||
MB_PROC_VALUE_WRITE(order2_reqs);
|
|
||||||
MB_PROC_VALUE_READ(stream_request);
|
|
||||||
MB_PROC_VALUE_WRITE(stream_request);
|
|
||||||
MB_PROC_VALUE_READ(group_prealloc);
|
|
||||||
MB_PROC_VALUE_WRITE(group_prealloc);
|
|
||||||
|
|
||||||
#define MB_PROC_HANDLER(name, var) \
|
#define MB_PROC_HANDLER(name, var) \
|
||||||
do { \
|
do { \
|
||||||
proc = create_proc_entry(name, mode, sbi->s_mb_proc); \
|
proc = proc_create_data(name, mode, sbi->s_mb_proc, \
|
||||||
|
&ext4_mb_##var##_proc_fops, sbi); \
|
||||||
if (proc == NULL) { \
|
if (proc == NULL) { \
|
||||||
printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \
|
printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \
|
||||||
goto err_out; \
|
goto err_out; \
|
||||||
} \
|
} \
|
||||||
proc->data = sbi; \
|
|
||||||
proc->read_proc = ext4_mb_read_##var ; \
|
|
||||||
proc->write_proc = ext4_mb_write_##var; \
|
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
static int ext4_mb_init_per_dev_proc(struct super_block *sb)
|
static int ext4_mb_init_per_dev_proc(struct super_block *sb)
|
||||||
|
@ -2639,6 +2783,10 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
|
||||||
struct proc_dir_entry *proc;
|
struct proc_dir_entry *proc;
|
||||||
char devname[64];
|
char devname[64];
|
||||||
|
|
||||||
|
if (proc_root_ext4 == NULL) {
|
||||||
|
sbi->s_mb_proc = NULL;
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
bdevname(sb->s_bdev, devname);
|
bdevname(sb->s_bdev, devname);
|
||||||
sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
|
sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
|
||||||
|
|
||||||
|
@ -2747,7 +2895,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
|
||||||
|
|
||||||
|
|
||||||
err = -EIO;
|
err = -EIO;
|
||||||
bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group);
|
bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
|
||||||
if (!bitmap_bh)
|
if (!bitmap_bh)
|
||||||
goto out_err;
|
goto out_err;
|
||||||
|
|
||||||
|
@ -2816,7 +2964,23 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
|
||||||
le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
|
le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
|
||||||
gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
|
gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
|
||||||
spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
|
spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
|
||||||
percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
|
|
||||||
|
/*
|
||||||
|
* free blocks account has already be reduced/reserved
|
||||||
|
* at write_begin() time for delayed allocation
|
||||||
|
* do not double accounting
|
||||||
|
*/
|
||||||
|
if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
|
||||||
|
percpu_counter_sub(&sbi->s_freeblocks_counter,
|
||||||
|
ac->ac_b_ex.fe_len);
|
||||||
|
|
||||||
|
if (sbi->s_log_groups_per_flex) {
|
||||||
|
ext4_group_t flex_group = ext4_flex_group(sbi,
|
||||||
|
ac->ac_b_ex.fe_group);
|
||||||
|
spin_lock(sb_bgl_lock(sbi, flex_group));
|
||||||
|
sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len;
|
||||||
|
spin_unlock(sb_bgl_lock(sbi, flex_group));
|
||||||
|
}
|
||||||
|
|
||||||
err = ext4_journal_dirty_metadata(handle, bitmap_bh);
|
err = ext4_journal_dirty_metadata(handle, bitmap_bh);
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -3473,8 +3637,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
|
||||||
if (bit >= end)
|
if (bit >= end)
|
||||||
break;
|
break;
|
||||||
next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
|
next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
|
||||||
if (next > end)
|
|
||||||
next = end;
|
|
||||||
start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
|
start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
|
||||||
le32_to_cpu(sbi->s_es->s_first_data_block);
|
le32_to_cpu(sbi->s_es->s_first_data_block);
|
||||||
mb_debug(" free preallocated %u/%u in group %u\n",
|
mb_debug(" free preallocated %u/%u in group %u\n",
|
||||||
|
@ -3569,7 +3731,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
|
||||||
if (list_empty(&grp->bb_prealloc_list))
|
if (list_empty(&grp->bb_prealloc_list))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
bitmap_bh = read_block_bitmap(sb, group);
|
bitmap_bh = ext4_read_block_bitmap(sb, group);
|
||||||
if (bitmap_bh == NULL) {
|
if (bitmap_bh == NULL) {
|
||||||
/* error handling here */
|
/* error handling here */
|
||||||
ext4_mb_release_desc(&e4b);
|
ext4_mb_release_desc(&e4b);
|
||||||
|
@ -3743,7 +3905,7 @@ repeat:
|
||||||
err = ext4_mb_load_buddy(sb, group, &e4b);
|
err = ext4_mb_load_buddy(sb, group, &e4b);
|
||||||
BUG_ON(err != 0); /* error handling here */
|
BUG_ON(err != 0); /* error handling here */
|
||||||
|
|
||||||
bitmap_bh = read_block_bitmap(sb, group);
|
bitmap_bh = ext4_read_block_bitmap(sb, group);
|
||||||
if (bitmap_bh == NULL) {
|
if (bitmap_bh == NULL) {
|
||||||
/* error handling here */
|
/* error handling here */
|
||||||
ext4_mb_release_desc(&e4b);
|
ext4_mb_release_desc(&e4b);
|
||||||
|
@ -4011,10 +4173,21 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
|
||||||
sbi = EXT4_SB(sb);
|
sbi = EXT4_SB(sb);
|
||||||
|
|
||||||
if (!test_opt(sb, MBALLOC)) {
|
if (!test_opt(sb, MBALLOC)) {
|
||||||
block = ext4_new_blocks_old(handle, ar->inode, ar->goal,
|
block = ext4_old_new_blocks(handle, ar->inode, ar->goal,
|
||||||
&(ar->len), errp);
|
&(ar->len), errp);
|
||||||
return block;
|
return block;
|
||||||
}
|
}
|
||||||
|
if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
|
||||||
|
/*
|
||||||
|
* With delalloc we already reserved the blocks
|
||||||
|
*/
|
||||||
|
ar->len = ext4_has_free_blocks(sbi, ar->len);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ar->len == 0) {
|
||||||
|
*errp = -ENOSPC;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
|
while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
|
||||||
ar->flags |= EXT4_MB_HINT_NOPREALLOC;
|
ar->flags |= EXT4_MB_HINT_NOPREALLOC;
|
||||||
|
@ -4026,10 +4199,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
|
||||||
}
|
}
|
||||||
inquota = ar->len;
|
inquota = ar->len;
|
||||||
|
|
||||||
|
if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
|
||||||
|
ar->flags |= EXT4_MB_DELALLOC_RESERVED;
|
||||||
|
|
||||||
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
|
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
|
||||||
if (!ac) {
|
if (!ac) {
|
||||||
|
ar->len = 0;
|
||||||
*errp = -ENOMEM;
|
*errp = -ENOMEM;
|
||||||
return 0;
|
goto out1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ext4_mb_poll_new_transaction(sb, handle);
|
ext4_mb_poll_new_transaction(sb, handle);
|
||||||
|
@ -4037,12 +4214,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
|
||||||
*errp = ext4_mb_initialize_context(ac, ar);
|
*errp = ext4_mb_initialize_context(ac, ar);
|
||||||
if (*errp) {
|
if (*errp) {
|
||||||
ar->len = 0;
|
ar->len = 0;
|
||||||
goto out;
|
goto out2;
|
||||||
}
|
}
|
||||||
|
|
||||||
ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
|
ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
|
||||||
if (!ext4_mb_use_preallocated(ac)) {
|
if (!ext4_mb_use_preallocated(ac)) {
|
||||||
|
|
||||||
ac->ac_op = EXT4_MB_HISTORY_ALLOC;
|
ac->ac_op = EXT4_MB_HISTORY_ALLOC;
|
||||||
ext4_mb_normalize_request(ac, ar);
|
ext4_mb_normalize_request(ac, ar);
|
||||||
repeat:
|
repeat:
|
||||||
|
@ -4085,11 +4261,12 @@ repeat:
|
||||||
|
|
||||||
ext4_mb_release_context(ac);
|
ext4_mb_release_context(ac);
|
||||||
|
|
||||||
out:
|
out2:
|
||||||
|
kmem_cache_free(ext4_ac_cachep, ac);
|
||||||
|
out1:
|
||||||
if (ar->len < inquota)
|
if (ar->len < inquota)
|
||||||
DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
|
DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
|
||||||
|
|
||||||
kmem_cache_free(ext4_ac_cachep, ac);
|
|
||||||
return block;
|
return block;
|
||||||
}
|
}
|
||||||
static void ext4_mb_poll_new_transaction(struct super_block *sb,
|
static void ext4_mb_poll_new_transaction(struct super_block *sb,
|
||||||
|
@ -4242,7 +4419,7 @@ do_more:
|
||||||
overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
|
overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
|
||||||
count -= overflow;
|
count -= overflow;
|
||||||
}
|
}
|
||||||
bitmap_bh = read_block_bitmap(sb, block_group);
|
bitmap_bh = ext4_read_block_bitmap(sb, block_group);
|
||||||
if (!bitmap_bh)
|
if (!bitmap_bh)
|
||||||
goto error_return;
|
goto error_return;
|
||||||
gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
|
gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
|
||||||
|
@ -4309,10 +4486,9 @@ do_more:
|
||||||
ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
|
ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
|
||||||
} else {
|
} else {
|
||||||
ext4_lock_group(sb, block_group);
|
ext4_lock_group(sb, block_group);
|
||||||
err = mb_free_blocks(inode, &e4b, bit, count);
|
mb_free_blocks(inode, &e4b, bit, count);
|
||||||
ext4_mb_return_to_preallocation(inode, &e4b, block, count);
|
ext4_mb_return_to_preallocation(inode, &e4b, block, count);
|
||||||
ext4_unlock_group(sb, block_group);
|
ext4_unlock_group(sb, block_group);
|
||||||
BUG_ON(err != 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock(sb_bgl_lock(sbi, block_group));
|
spin_lock(sb_bgl_lock(sbi, block_group));
|
||||||
|
@ -4321,6 +4497,13 @@ do_more:
|
||||||
spin_unlock(sb_bgl_lock(sbi, block_group));
|
spin_unlock(sb_bgl_lock(sbi, block_group));
|
||||||
percpu_counter_add(&sbi->s_freeblocks_counter, count);
|
percpu_counter_add(&sbi->s_freeblocks_counter, count);
|
||||||
|
|
||||||
|
if (sbi->s_log_groups_per_flex) {
|
||||||
|
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
|
||||||
|
spin_lock(sb_bgl_lock(sbi, flex_group));
|
||||||
|
sbi->s_flex_groups[flex_group].free_blocks += count;
|
||||||
|
spin_unlock(sb_bgl_lock(sbi, flex_group));
|
||||||
|
}
|
||||||
|
|
||||||
ext4_mb_release_desc(&e4b);
|
ext4_mb_release_desc(&e4b);
|
||||||
|
|
||||||
*freed += count;
|
*freed += count;
|
||||||
|
|
|
@ -182,6 +182,16 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
|
||||||
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
|
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
|
||||||
struct inode *inode);
|
struct inode *inode);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* p is at least 6 bytes before the end of page
|
||||||
|
*/
|
||||||
|
static inline struct ext4_dir_entry_2 *
|
||||||
|
ext4_next_entry(struct ext4_dir_entry_2 *p)
|
||||||
|
{
|
||||||
|
return (struct ext4_dir_entry_2 *)((char *)p +
|
||||||
|
ext4_rec_len_from_disk(p->rec_len));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Future: use high four bits of block for coalesce-on-delete flags
|
* Future: use high four bits of block for coalesce-on-delete flags
|
||||||
* Mask them off for now.
|
* Mask them off for now.
|
||||||
|
@ -231,13 +241,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
|
||||||
{
|
{
|
||||||
unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
|
unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
|
||||||
EXT4_DIR_REC_LEN(2) - infosize;
|
EXT4_DIR_REC_LEN(2) - infosize;
|
||||||
return 0? 20: entry_space / sizeof(struct dx_entry);
|
return entry_space / sizeof(struct dx_entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned dx_node_limit (struct inode *dir)
|
static inline unsigned dx_node_limit (struct inode *dir)
|
||||||
{
|
{
|
||||||
unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
|
unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
|
||||||
return 0? 22: entry_space / sizeof(struct dx_entry);
|
return entry_space / sizeof(struct dx_entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -553,15 +563,6 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* p is at least 6 bytes before the end of page
|
|
||||||
*/
|
|
||||||
static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p)
|
|
||||||
{
|
|
||||||
return (struct ext4_dir_entry_2 *)((char *)p +
|
|
||||||
ext4_rec_len_from_disk(p->rec_len));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function fills a red-black tree with information from a
|
* This function fills a red-black tree with information from a
|
||||||
* directory block. It returns the number directory entries loaded
|
* directory block. It returns the number directory entries loaded
|
||||||
|
@ -993,20 +994,22 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
|
||||||
de = (struct ext4_dir_entry_2 *) bh->b_data;
|
de = (struct ext4_dir_entry_2 *) bh->b_data;
|
||||||
top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
|
top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
|
||||||
EXT4_DIR_REC_LEN(0));
|
EXT4_DIR_REC_LEN(0));
|
||||||
for (; de < top; de = ext4_next_entry(de))
|
for (; de < top; de = ext4_next_entry(de)) {
|
||||||
if (ext4_match (namelen, name, de)) {
|
int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
|
||||||
if (!ext4_check_dir_entry("ext4_find_entry",
|
+ ((char *) de - bh->b_data);
|
||||||
dir, de, bh,
|
|
||||||
(block<<EXT4_BLOCK_SIZE_BITS(sb))
|
if (!ext4_check_dir_entry(__func__, dir, de, bh, off)) {
|
||||||
+((char *)de - bh->b_data))) {
|
brelse(bh);
|
||||||
brelse (bh);
|
|
||||||
*err = ERR_BAD_DX_DIR;
|
*err = ERR_BAD_DX_DIR;
|
||||||
goto errout;
|
goto errout;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ext4_match(namelen, name, de)) {
|
||||||
*res_dir = de;
|
*res_dir = de;
|
||||||
dx_release (frames);
|
dx_release(frames);
|
||||||
return bh;
|
return bh;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
brelse (bh);
|
brelse (bh);
|
||||||
/* Check to see if we should continue to search */
|
/* Check to see if we should continue to search */
|
||||||
retval = ext4_htree_next_block(dir, hash, frame,
|
retval = ext4_htree_next_block(dir, hash, frame,
|
||||||
|
|
|
@ -865,6 +865,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
|
||||||
gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
|
gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
|
||||||
gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
|
gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We can allocate memory for mb_alloc based on the new group
|
||||||
|
* descriptor
|
||||||
|
*/
|
||||||
|
if (test_opt(sb, MBALLOC)) {
|
||||||
|
err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
|
||||||
|
if (err)
|
||||||
|
goto exit_journal;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Make the new blocks and inodes valid next. We do this before
|
* Make the new blocks and inodes valid next. We do this before
|
||||||
* increasing the group count so that once the group is enabled,
|
* increasing the group count so that once the group is enabled,
|
||||||
|
@ -957,6 +966,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
|
||||||
handle_t *handle;
|
handle_t *handle;
|
||||||
int err;
|
int err;
|
||||||
unsigned long freed_blocks;
|
unsigned long freed_blocks;
|
||||||
|
ext4_group_t group;
|
||||||
|
struct ext4_group_info *grp;
|
||||||
|
|
||||||
/* We don't need to worry about locking wrt other resizers just
|
/* We don't need to worry about locking wrt other resizers just
|
||||||
* yet: we're going to revalidate es->s_blocks_count after
|
* yet: we're going to revalidate es->s_blocks_count after
|
||||||
|
@ -988,7 +999,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Handle the remaining blocks in the last group only. */
|
/* Handle the remaining blocks in the last group only. */
|
||||||
ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
|
ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
|
||||||
|
|
||||||
if (last == 0) {
|
if (last == 0) {
|
||||||
ext4_warning(sb, __func__,
|
ext4_warning(sb, __func__,
|
||||||
|
@ -1060,6 +1071,45 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
|
||||||
o_blocks_count + add);
|
o_blocks_count + add);
|
||||||
if ((err = ext4_journal_stop(handle)))
|
if ((err = ext4_journal_stop(handle)))
|
||||||
goto exit_put;
|
goto exit_put;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mark mballoc pages as not up to date so that they will be updated
|
||||||
|
* next time they are loaded by ext4_mb_load_buddy.
|
||||||
|
*/
|
||||||
|
if (test_opt(sb, MBALLOC)) {
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||||
|
struct inode *inode = sbi->s_buddy_cache;
|
||||||
|
int blocks_per_page;
|
||||||
|
int block;
|
||||||
|
int pnum;
|
||||||
|
struct page *page;
|
||||||
|
|
||||||
|
/* Set buddy page as not up to date */
|
||||||
|
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
|
||||||
|
block = group * 2;
|
||||||
|
pnum = block / blocks_per_page;
|
||||||
|
page = find_get_page(inode->i_mapping, pnum);
|
||||||
|
if (page != NULL) {
|
||||||
|
ClearPageUptodate(page);
|
||||||
|
page_cache_release(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set bitmap page as not up to date */
|
||||||
|
block++;
|
||||||
|
pnum = block / blocks_per_page;
|
||||||
|
page = find_get_page(inode->i_mapping, pnum);
|
||||||
|
if (page != NULL) {
|
||||||
|
ClearPageUptodate(page);
|
||||||
|
page_cache_release(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get the info on the last group */
|
||||||
|
grp = ext4_get_group_info(sb, group);
|
||||||
|
|
||||||
|
/* Update free blocks in group info */
|
||||||
|
ext4_mb_update_group_info(grp, add);
|
||||||
|
}
|
||||||
|
|
||||||
if (test_opt(sb, DEBUG))
|
if (test_opt(sb, DEBUG))
|
||||||
printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
|
printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
|
||||||
ext4_blocks_count(es));
|
ext4_blocks_count(es));
|
||||||
|
|
140
fs/ext4/super.c
140
fs/ext4/super.c
|
@ -506,6 +506,7 @@ static void ext4_put_super (struct super_block * sb)
|
||||||
ext4_ext_release(sb);
|
ext4_ext_release(sb);
|
||||||
ext4_xattr_put_super(sb);
|
ext4_xattr_put_super(sb);
|
||||||
jbd2_journal_destroy(sbi->s_journal);
|
jbd2_journal_destroy(sbi->s_journal);
|
||||||
|
sbi->s_journal = NULL;
|
||||||
if (!(sb->s_flags & MS_RDONLY)) {
|
if (!(sb->s_flags & MS_RDONLY)) {
|
||||||
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
|
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
|
||||||
es->s_state = cpu_to_le16(sbi->s_mount_state);
|
es->s_state = cpu_to_le16(sbi->s_mount_state);
|
||||||
|
@ -517,6 +518,7 @@ static void ext4_put_super (struct super_block * sb)
|
||||||
for (i = 0; i < sbi->s_gdb_count; i++)
|
for (i = 0; i < sbi->s_gdb_count; i++)
|
||||||
brelse(sbi->s_group_desc[i]);
|
brelse(sbi->s_group_desc[i]);
|
||||||
kfree(sbi->s_group_desc);
|
kfree(sbi->s_group_desc);
|
||||||
|
kfree(sbi->s_flex_groups);
|
||||||
percpu_counter_destroy(&sbi->s_freeblocks_counter);
|
percpu_counter_destroy(&sbi->s_freeblocks_counter);
|
||||||
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
||||||
percpu_counter_destroy(&sbi->s_dirs_counter);
|
percpu_counter_destroy(&sbi->s_dirs_counter);
|
||||||
|
@ -571,6 +573,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
|
||||||
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
|
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
|
||||||
INIT_LIST_HEAD(&ei->i_prealloc_list);
|
INIT_LIST_HEAD(&ei->i_prealloc_list);
|
||||||
spin_lock_init(&ei->i_prealloc_lock);
|
spin_lock_init(&ei->i_prealloc_lock);
|
||||||
|
jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
|
||||||
|
ei->i_reserved_data_blocks = 0;
|
||||||
|
ei->i_reserved_meta_blocks = 0;
|
||||||
|
ei->i_allocated_meta_blocks = 0;
|
||||||
|
ei->i_delalloc_reserved_flag = 0;
|
||||||
|
spin_lock_init(&(ei->i_block_reservation_lock));
|
||||||
return &ei->vfs_inode;
|
return &ei->vfs_inode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -635,6 +643,8 @@ static void ext4_clear_inode(struct inode *inode)
|
||||||
EXT4_I(inode)->i_block_alloc_info = NULL;
|
EXT4_I(inode)->i_block_alloc_info = NULL;
|
||||||
if (unlikely(rsv))
|
if (unlikely(rsv))
|
||||||
kfree(rsv);
|
kfree(rsv);
|
||||||
|
jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
|
||||||
|
&EXT4_I(inode)->jinode);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb)
|
static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb)
|
||||||
|
@ -671,7 +681,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
|
||||||
unsigned long def_mount_opts;
|
unsigned long def_mount_opts;
|
||||||
struct super_block *sb = vfs->mnt_sb;
|
struct super_block *sb = vfs->mnt_sb;
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||||
journal_t *journal = sbi->s_journal;
|
|
||||||
struct ext4_super_block *es = sbi->s_es;
|
struct ext4_super_block *es = sbi->s_es;
|
||||||
|
|
||||||
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
|
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
|
||||||
|
@ -747,6 +756,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
|
||||||
seq_puts(seq, ",nomballoc");
|
seq_puts(seq, ",nomballoc");
|
||||||
if (test_opt(sb, I_VERSION))
|
if (test_opt(sb, I_VERSION))
|
||||||
seq_puts(seq, ",i_version");
|
seq_puts(seq, ",i_version");
|
||||||
|
if (!test_opt(sb, DELALLOC))
|
||||||
|
seq_puts(seq, ",nodelalloc");
|
||||||
|
|
||||||
|
|
||||||
if (sbi->s_stripe)
|
if (sbi->s_stripe)
|
||||||
seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
|
seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
|
||||||
|
@ -894,7 +906,7 @@ enum {
|
||||||
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
|
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
|
||||||
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
|
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
|
||||||
Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
|
Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
|
||||||
Opt_mballoc, Opt_nomballoc, Opt_stripe,
|
Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
|
||||||
};
|
};
|
||||||
|
|
||||||
static match_table_t tokens = {
|
static match_table_t tokens = {
|
||||||
|
@ -953,6 +965,8 @@ static match_table_t tokens = {
|
||||||
{Opt_nomballoc, "nomballoc"},
|
{Opt_nomballoc, "nomballoc"},
|
||||||
{Opt_stripe, "stripe=%u"},
|
{Opt_stripe, "stripe=%u"},
|
||||||
{Opt_resize, "resize"},
|
{Opt_resize, "resize"},
|
||||||
|
{Opt_delalloc, "delalloc"},
|
||||||
|
{Opt_nodelalloc, "nodelalloc"},
|
||||||
{Opt_err, NULL},
|
{Opt_err, NULL},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -990,6 +1004,7 @@ static int parse_options (char *options, struct super_block *sb,
|
||||||
int qtype, qfmt;
|
int qtype, qfmt;
|
||||||
char *qname;
|
char *qname;
|
||||||
#endif
|
#endif
|
||||||
|
ext4_fsblk_t last_block;
|
||||||
|
|
||||||
if (!options)
|
if (!options)
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -1309,15 +1324,39 @@ set_qf_format:
|
||||||
clear_opt(sbi->s_mount_opt, NOBH);
|
clear_opt(sbi->s_mount_opt, NOBH);
|
||||||
break;
|
break;
|
||||||
case Opt_extents:
|
case Opt_extents:
|
||||||
|
if (!EXT4_HAS_INCOMPAT_FEATURE(sb,
|
||||||
|
EXT4_FEATURE_INCOMPAT_EXTENTS)) {
|
||||||
|
ext4_warning(sb, __func__,
|
||||||
|
"extents feature not enabled "
|
||||||
|
"on this filesystem, use tune2fs\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
set_opt (sbi->s_mount_opt, EXTENTS);
|
set_opt (sbi->s_mount_opt, EXTENTS);
|
||||||
break;
|
break;
|
||||||
case Opt_noextents:
|
case Opt_noextents:
|
||||||
|
/*
|
||||||
|
* When e2fsprogs support resizing an already existing
|
||||||
|
* ext3 file system to greater than 2**32 we need to
|
||||||
|
* add support to block allocator to handle growing
|
||||||
|
* already existing block mapped inode so that blocks
|
||||||
|
* allocated for them fall within 2**32
|
||||||
|
*/
|
||||||
|
last_block = ext4_blocks_count(sbi->s_es) - 1;
|
||||||
|
if (last_block > 0xffffffffULL) {
|
||||||
|
printk(KERN_ERR "EXT4-fs: Filesystem too "
|
||||||
|
"large to mount with "
|
||||||
|
"-o noextents options\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
clear_opt (sbi->s_mount_opt, EXTENTS);
|
clear_opt (sbi->s_mount_opt, EXTENTS);
|
||||||
break;
|
break;
|
||||||
case Opt_i_version:
|
case Opt_i_version:
|
||||||
set_opt(sbi->s_mount_opt, I_VERSION);
|
set_opt(sbi->s_mount_opt, I_VERSION);
|
||||||
sb->s_flags |= MS_I_VERSION;
|
sb->s_flags |= MS_I_VERSION;
|
||||||
break;
|
break;
|
||||||
|
case Opt_nodelalloc:
|
||||||
|
clear_opt(sbi->s_mount_opt, DELALLOC);
|
||||||
|
break;
|
||||||
case Opt_mballoc:
|
case Opt_mballoc:
|
||||||
set_opt(sbi->s_mount_opt, MBALLOC);
|
set_opt(sbi->s_mount_opt, MBALLOC);
|
||||||
break;
|
break;
|
||||||
|
@ -1331,6 +1370,9 @@ set_qf_format:
|
||||||
return 0;
|
return 0;
|
||||||
sbi->s_stripe = option;
|
sbi->s_stripe = option;
|
||||||
break;
|
break;
|
||||||
|
case Opt_delalloc:
|
||||||
|
set_opt(sbi->s_mount_opt, DELALLOC);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
printk (KERN_ERR
|
printk (KERN_ERR
|
||||||
"EXT4-fs: Unrecognized mount option \"%s\" "
|
"EXT4-fs: Unrecognized mount option \"%s\" "
|
||||||
|
@ -1443,6 +1485,54 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int ext4_fill_flex_info(struct super_block *sb)
|
||||||
|
{
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||||
|
struct ext4_group_desc *gdp = NULL;
|
||||||
|
struct buffer_head *bh;
|
||||||
|
ext4_group_t flex_group_count;
|
||||||
|
ext4_group_t flex_group;
|
||||||
|
int groups_per_flex = 0;
|
||||||
|
__u64 block_bitmap = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (!sbi->s_es->s_log_groups_per_flex) {
|
||||||
|
sbi->s_log_groups_per_flex = 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
|
||||||
|
groups_per_flex = 1 << sbi->s_log_groups_per_flex;
|
||||||
|
|
||||||
|
flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
|
||||||
|
groups_per_flex;
|
||||||
|
sbi->s_flex_groups = kmalloc(flex_group_count *
|
||||||
|
sizeof(struct flex_groups), GFP_KERNEL);
|
||||||
|
if (sbi->s_flex_groups == NULL) {
|
||||||
|
printk(KERN_ERR "EXT4-fs: not enough memory\n");
|
||||||
|
goto failed;
|
||||||
|
}
|
||||||
|
memset(sbi->s_flex_groups, 0, flex_group_count *
|
||||||
|
sizeof(struct flex_groups));
|
||||||
|
|
||||||
|
gdp = ext4_get_group_desc(sb, 1, &bh);
|
||||||
|
block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
|
||||||
|
|
||||||
|
for (i = 0; i < sbi->s_groups_count; i++) {
|
||||||
|
gdp = ext4_get_group_desc(sb, i, &bh);
|
||||||
|
|
||||||
|
flex_group = ext4_flex_group(sbi, i);
|
||||||
|
sbi->s_flex_groups[flex_group].free_inodes +=
|
||||||
|
le16_to_cpu(gdp->bg_free_inodes_count);
|
||||||
|
sbi->s_flex_groups[flex_group].free_blocks +=
|
||||||
|
le16_to_cpu(gdp->bg_free_blocks_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
failed:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
|
__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
|
||||||
struct ext4_group_desc *gdp)
|
struct ext4_group_desc *gdp)
|
||||||
{
|
{
|
||||||
|
@ -1810,8 +1900,8 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ext4_fill_super (struct super_block *sb, void *data, int silent)
|
static int ext4_fill_super (struct super_block *sb, void *data, int silent)
|
||||||
__releases(kernel_sem)
|
__releases(kernel_lock)
|
||||||
__acquires(kernel_sem)
|
__acquires(kernel_lock)
|
||||||
|
|
||||||
{
|
{
|
||||||
struct buffer_head * bh;
|
struct buffer_head * bh;
|
||||||
|
@ -1851,11 +1941,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
|
||||||
goto out_fail;
|
goto out_fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!sb_set_blocksize(sb, blocksize)) {
|
|
||||||
printk(KERN_ERR "EXT4-fs: bad blocksize %d.\n", blocksize);
|
|
||||||
goto out_fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The ext4 superblock will not be buffer aligned for other than 1kB
|
* The ext4 superblock will not be buffer aligned for other than 1kB
|
||||||
* block sizes. We need to calculate the offset from buffer start.
|
* block sizes. We need to calculate the offset from buffer start.
|
||||||
|
@ -1919,15 +2004,28 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* turn on extents feature by default in ext4 filesystem
|
* turn on extents feature by default in ext4 filesystem
|
||||||
* User -o noextents to turn it off
|
* only if feature flag already set by mkfs or tune2fs.
|
||||||
|
* Use -o noextents to turn it off
|
||||||
*/
|
*/
|
||||||
|
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
|
||||||
set_opt(sbi->s_mount_opt, EXTENTS);
|
set_opt(sbi->s_mount_opt, EXTENTS);
|
||||||
|
else
|
||||||
|
ext4_warning(sb, __func__,
|
||||||
|
"extents feature not enabled on this filesystem, "
|
||||||
|
"use tune2fs.\n");
|
||||||
/*
|
/*
|
||||||
* turn on mballoc feature by default in ext4 filesystem
|
* turn on mballoc code by default in ext4 filesystem
|
||||||
* User -o nomballoc to turn it off
|
* Use -o nomballoc to turn it off
|
||||||
*/
|
*/
|
||||||
set_opt(sbi->s_mount_opt, MBALLOC);
|
set_opt(sbi->s_mount_opt, MBALLOC);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* enable delayed allocation by default
|
||||||
|
* Use -o nodelalloc to turn it off
|
||||||
|
*/
|
||||||
|
set_opt(sbi->s_mount_opt, DELALLOC);
|
||||||
|
|
||||||
|
|
||||||
if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
|
if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
|
||||||
NULL, 0))
|
NULL, 0))
|
||||||
goto failed_mount;
|
goto failed_mount;
|
||||||
|
@ -2138,6 +2236,14 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
|
||||||
printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
|
printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
|
||||||
goto failed_mount2;
|
goto failed_mount2;
|
||||||
}
|
}
|
||||||
|
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
|
||||||
|
if (!ext4_fill_flex_info(sb)) {
|
||||||
|
printk(KERN_ERR
|
||||||
|
"EXT4-fs: unable to initialize "
|
||||||
|
"flex_bg meta info!\n");
|
||||||
|
goto failed_mount2;
|
||||||
|
}
|
||||||
|
|
||||||
sbi->s_gdb_count = db_count;
|
sbi->s_gdb_count = db_count;
|
||||||
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
|
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
|
||||||
spin_lock_init(&sbi->s_next_gen_lock);
|
spin_lock_init(&sbi->s_next_gen_lock);
|
||||||
|
@ -2358,6 +2464,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
|
||||||
test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
|
test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
|
||||||
"writeback");
|
"writeback");
|
||||||
|
|
||||||
|
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
|
||||||
|
printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
|
||||||
|
"requested data journaling mode\n");
|
||||||
|
clear_opt(sbi->s_mount_opt, DELALLOC);
|
||||||
|
} else if (test_opt(sb, DELALLOC))
|
||||||
|
printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
|
||||||
|
|
||||||
ext4_ext_init(sb);
|
ext4_ext_init(sb);
|
||||||
ext4_mb_init(sb, needs_recovery);
|
ext4_mb_init(sb, needs_recovery);
|
||||||
|
|
||||||
|
@ -2372,6 +2485,7 @@ cantfind_ext4:
|
||||||
|
|
||||||
failed_mount4:
|
failed_mount4:
|
||||||
jbd2_journal_destroy(sbi->s_journal);
|
jbd2_journal_destroy(sbi->s_journal);
|
||||||
|
sbi->s_journal = NULL;
|
||||||
failed_mount3:
|
failed_mount3:
|
||||||
percpu_counter_destroy(&sbi->s_freeblocks_counter);
|
percpu_counter_destroy(&sbi->s_freeblocks_counter);
|
||||||
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
||||||
|
@ -3325,7 +3439,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
|
||||||
err = ext4_journal_dirty_metadata(handle, bh);
|
err = ext4_journal_dirty_metadata(handle, bh);
|
||||||
else {
|
else {
|
||||||
/* Always do at least ordered writes for quotas */
|
/* Always do at least ordered writes for quotas */
|
||||||
err = ext4_journal_dirty_data(handle, bh);
|
err = ext4_jbd2_file_inode(handle, inode);
|
||||||
mark_buffer_dirty(bh);
|
mark_buffer_dirty(bh);
|
||||||
}
|
}
|
||||||
brelse(bh);
|
brelse(bh);
|
||||||
|
|
|
@ -810,7 +810,7 @@ inserted:
|
||||||
/* We need to allocate a new block */
|
/* We need to allocate a new block */
|
||||||
ext4_fsblk_t goal = ext4_group_first_block_no(sb,
|
ext4_fsblk_t goal = ext4_group_first_block_no(sb,
|
||||||
EXT4_I(inode)->i_block_group);
|
EXT4_I(inode)->i_block_group);
|
||||||
ext4_fsblk_t block = ext4_new_block(handle, inode,
|
ext4_fsblk_t block = ext4_new_meta_block(handle, inode,
|
||||||
goal, &error);
|
goal, &error);
|
||||||
if (error)
|
if (error)
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
|
|
@ -13,13 +13,11 @@
|
||||||
#include "ext4.h"
|
#include "ext4.h"
|
||||||
#include "xattr.h"
|
#include "xattr.h"
|
||||||
|
|
||||||
#define XATTR_TRUSTED_PREFIX "trusted."
|
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
|
ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
|
||||||
const char *name, size_t name_len)
|
const char *name, size_t name_len)
|
||||||
{
|
{
|
||||||
const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
|
const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
|
||||||
const size_t total_len = prefix_len + name_len + 1;
|
const size_t total_len = prefix_len + name_len + 1;
|
||||||
|
|
||||||
if (!capable(CAP_SYS_ADMIN))
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
|
|
|
@ -12,13 +12,11 @@
|
||||||
#include "ext4.h"
|
#include "ext4.h"
|
||||||
#include "xattr.h"
|
#include "xattr.h"
|
||||||
|
|
||||||
#define XATTR_USER_PREFIX "user."
|
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
|
ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
|
||||||
const char *name, size_t name_len)
|
const char *name, size_t name_len)
|
||||||
{
|
{
|
||||||
const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
|
const size_t prefix_len = XATTR_USER_PREFIX_LEN;
|
||||||
const size_t total_len = prefix_len + name_len + 1;
|
const size_t total_len = prefix_len + name_len + 1;
|
||||||
|
|
||||||
if (!test_opt(inode->i_sb, XATTR_USER))
|
if (!test_opt(inode->i_sb, XATTR_USER))
|
||||||
|
|
|
@ -688,7 +688,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
|
||||||
|
|
||||||
J_ASSERT(transaction->t_state == T_FINISHED);
|
J_ASSERT(transaction->t_state == T_FINISHED);
|
||||||
J_ASSERT(transaction->t_buffers == NULL);
|
J_ASSERT(transaction->t_buffers == NULL);
|
||||||
J_ASSERT(transaction->t_sync_datalist == NULL);
|
|
||||||
J_ASSERT(transaction->t_forget == NULL);
|
J_ASSERT(transaction->t_forget == NULL);
|
||||||
J_ASSERT(transaction->t_iobuf_list == NULL);
|
J_ASSERT(transaction->t_iobuf_list == NULL);
|
||||||
J_ASSERT(transaction->t_shadow_list == NULL);
|
J_ASSERT(transaction->t_shadow_list == NULL);
|
||||||
|
|
294
fs/jbd2/commit.c
294
fs/jbd2/commit.c
|
@ -22,6 +22,8 @@
|
||||||
#include <linux/pagemap.h>
|
#include <linux/pagemap.h>
|
||||||
#include <linux/jiffies.h>
|
#include <linux/jiffies.h>
|
||||||
#include <linux/crc32.h>
|
#include <linux/crc32.h>
|
||||||
|
#include <linux/writeback.h>
|
||||||
|
#include <linux/backing-dev.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Default IO end handler for temporary BJ_IO buffer_heads.
|
* Default IO end handler for temporary BJ_IO buffer_heads.
|
||||||
|
@ -37,8 +39,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When an ext3-ordered file is truncated, it is possible that many pages are
|
* When an ext4 file is truncated, it is possible that some pages are not
|
||||||
* not sucessfully freed, because they are attached to a committing transaction.
|
* successfully freed, because they are attached to a committing transaction.
|
||||||
* After the transaction commits, these pages are left on the LRU, with no
|
* After the transaction commits, these pages are left on the LRU, with no
|
||||||
* ->mapping, and with attached buffers. These pages are trivially reclaimable
|
* ->mapping, and with attached buffers. These pages are trivially reclaimable
|
||||||
* by the VM, but their apparent absence upsets the VM accounting, and it makes
|
* by the VM, but their apparent absence upsets the VM accounting, and it makes
|
||||||
|
@ -79,21 +81,6 @@ nope:
|
||||||
__brelse(bh);
|
__brelse(bh);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
|
|
||||||
* held. For ranking reasons we must trylock. If we lose, schedule away and
|
|
||||||
* return 0. j_list_lock is dropped in this case.
|
|
||||||
*/
|
|
||||||
static int inverted_lock(journal_t *journal, struct buffer_head *bh)
|
|
||||||
{
|
|
||||||
if (!jbd_trylock_bh_state(bh)) {
|
|
||||||
spin_unlock(&journal->j_list_lock);
|
|
||||||
schedule();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Done it all: now submit the commit record. We should have
|
* Done it all: now submit the commit record. We should have
|
||||||
* cleaned up our previous buffers by now, so if we are in abort
|
* cleaned up our previous buffers by now, so if we are in abort
|
||||||
|
@ -112,6 +99,7 @@ static int journal_submit_commit_record(journal_t *journal,
|
||||||
struct buffer_head *bh;
|
struct buffer_head *bh;
|
||||||
int ret;
|
int ret;
|
||||||
int barrier_done = 0;
|
int barrier_done = 0;
|
||||||
|
struct timespec now = current_kernel_time();
|
||||||
|
|
||||||
if (is_journal_aborted(journal))
|
if (is_journal_aborted(journal))
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -126,6 +114,8 @@ static int journal_submit_commit_record(journal_t *journal,
|
||||||
tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
|
tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
|
||||||
tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
|
tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
|
||||||
tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
|
tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
|
||||||
|
tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
|
||||||
|
tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
|
||||||
|
|
||||||
if (JBD2_HAS_COMPAT_FEATURE(journal,
|
if (JBD2_HAS_COMPAT_FEATURE(journal,
|
||||||
JBD2_FEATURE_COMPAT_CHECKSUM)) {
|
JBD2_FEATURE_COMPAT_CHECKSUM)) {
|
||||||
|
@ -197,159 +187,104 @@ static int journal_wait_on_commit_record(struct buffer_head *bh)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wait for all submitted IO to complete.
|
* write the filemap data using writepage() address_space_operations.
|
||||||
|
* We don't do block allocation here even for delalloc. We don't
|
||||||
|
* use writepages() because with dealyed allocation we may be doing
|
||||||
|
* block allocation in writepages().
|
||||||
*/
|
*/
|
||||||
static int journal_wait_on_locked_list(journal_t *journal,
|
static int journal_submit_inode_data_buffers(struct address_space *mapping)
|
||||||
transaction_t *commit_transaction)
|
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret;
|
||||||
struct journal_head *jh;
|
struct writeback_control wbc = {
|
||||||
|
.sync_mode = WB_SYNC_ALL,
|
||||||
|
.nr_to_write = mapping->nrpages * 2,
|
||||||
|
.range_start = 0,
|
||||||
|
.range_end = i_size_read(mapping->host),
|
||||||
|
.for_writepages = 1,
|
||||||
|
};
|
||||||
|
|
||||||
while (commit_transaction->t_locked_list) {
|
ret = generic_writepages(mapping, &wbc);
|
||||||
struct buffer_head *bh;
|
|
||||||
|
|
||||||
jh = commit_transaction->t_locked_list->b_tprev;
|
|
||||||
bh = jh2bh(jh);
|
|
||||||
get_bh(bh);
|
|
||||||
if (buffer_locked(bh)) {
|
|
||||||
spin_unlock(&journal->j_list_lock);
|
|
||||||
wait_on_buffer(bh);
|
|
||||||
if (unlikely(!buffer_uptodate(bh)))
|
|
||||||
ret = -EIO;
|
|
||||||
spin_lock(&journal->j_list_lock);
|
|
||||||
}
|
|
||||||
if (!inverted_lock(journal, bh)) {
|
|
||||||
put_bh(bh);
|
|
||||||
spin_lock(&journal->j_list_lock);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
|
|
||||||
__jbd2_journal_unfile_buffer(jh);
|
|
||||||
jbd_unlock_bh_state(bh);
|
|
||||||
jbd2_journal_remove_journal_head(bh);
|
|
||||||
put_bh(bh);
|
|
||||||
} else {
|
|
||||||
jbd_unlock_bh_state(bh);
|
|
||||||
}
|
|
||||||
put_bh(bh);
|
|
||||||
cond_resched_lock(&journal->j_list_lock);
|
|
||||||
}
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
|
||||||
|
|
||||||
static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < bufs; i++) {
|
|
||||||
wbuf[i]->b_end_io = end_buffer_write_sync;
|
|
||||||
/* We use-up our safety reference in submit_bh() */
|
|
||||||
submit_bh(WRITE, wbuf[i]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Submit all the data buffers to disk
|
* Submit all the data buffers of inode associated with the transaction to
|
||||||
|
* disk.
|
||||||
|
*
|
||||||
|
* We are in a committing transaction. Therefore no new inode can be added to
|
||||||
|
* our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
|
||||||
|
* operate on from being released while we write out pages.
|
||||||
*/
|
*/
|
||||||
static void journal_submit_data_buffers(journal_t *journal,
|
static int journal_submit_data_buffers(journal_t *journal,
|
||||||
transaction_t *commit_transaction)
|
transaction_t *commit_transaction)
|
||||||
{
|
{
|
||||||
struct journal_head *jh;
|
struct jbd2_inode *jinode;
|
||||||
struct buffer_head *bh;
|
int err, ret = 0;
|
||||||
int locked;
|
struct address_space *mapping;
|
||||||
int bufs = 0;
|
|
||||||
struct buffer_head **wbuf = journal->j_wbuf;
|
|
||||||
|
|
||||||
|
spin_lock(&journal->j_list_lock);
|
||||||
|
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
|
||||||
|
mapping = jinode->i_vfs_inode->i_mapping;
|
||||||
|
jinode->i_flags |= JI_COMMIT_RUNNING;
|
||||||
|
spin_unlock(&journal->j_list_lock);
|
||||||
/*
|
/*
|
||||||
* Whenever we unlock the journal and sleep, things can get added
|
* submit the inode data buffers. We use writepage
|
||||||
* onto ->t_sync_datalist, so we have to keep looping back to
|
* instead of writepages. Because writepages can do
|
||||||
* write_out_data until we *know* that the list is empty.
|
* block allocation with delalloc. We need to write
|
||||||
|
* only allocated blocks here.
|
||||||
|
*/
|
||||||
|
err = journal_submit_inode_data_buffers(mapping);
|
||||||
|
if (!ret)
|
||||||
|
ret = err;
|
||||||
|
spin_lock(&journal->j_list_lock);
|
||||||
|
J_ASSERT(jinode->i_transaction == commit_transaction);
|
||||||
|
jinode->i_flags &= ~JI_COMMIT_RUNNING;
|
||||||
|
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
|
||||||
|
}
|
||||||
|
spin_unlock(&journal->j_list_lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Wait for data submitted for writeout, refile inodes to proper
|
||||||
|
* transaction if needed.
|
||||||
*
|
*
|
||||||
* Cleanup any flushed data buffers from the data list. Even in
|
|
||||||
* abort mode, we want to flush this out as soon as possible.
|
|
||||||
*/
|
*/
|
||||||
write_out_data:
|
static int journal_finish_inode_data_buffers(journal_t *journal,
|
||||||
cond_resched();
|
transaction_t *commit_transaction)
|
||||||
spin_lock(&journal->j_list_lock);
|
{
|
||||||
|
struct jbd2_inode *jinode, *next_i;
|
||||||
|
int err, ret = 0;
|
||||||
|
|
||||||
while (commit_transaction->t_sync_datalist) {
|
/* For locking, see the comment in journal_submit_data_buffers() */
|
||||||
jh = commit_transaction->t_sync_datalist;
|
spin_lock(&journal->j_list_lock);
|
||||||
bh = jh2bh(jh);
|
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
|
||||||
locked = 0;
|
jinode->i_flags |= JI_COMMIT_RUNNING;
|
||||||
|
spin_unlock(&journal->j_list_lock);
|
||||||
|
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
|
||||||
|
if (!ret)
|
||||||
|
ret = err;
|
||||||
|
spin_lock(&journal->j_list_lock);
|
||||||
|
jinode->i_flags &= ~JI_COMMIT_RUNNING;
|
||||||
|
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
|
||||||
|
}
|
||||||
|
|
||||||
/* Get reference just to make sure buffer does not disappear
|
/* Now refile inode to proper lists */
|
||||||
* when we are forced to drop various locks */
|
list_for_each_entry_safe(jinode, next_i,
|
||||||
get_bh(bh);
|
&commit_transaction->t_inode_list, i_list) {
|
||||||
/* If the buffer is dirty, we need to submit IO and hence
|
list_del(&jinode->i_list);
|
||||||
* we need the buffer lock. We try to lock the buffer without
|
if (jinode->i_next_transaction) {
|
||||||
* blocking. If we fail, we need to drop j_list_lock and do
|
jinode->i_transaction = jinode->i_next_transaction;
|
||||||
* blocking lock_buffer().
|
jinode->i_next_transaction = NULL;
|
||||||
*/
|
list_add(&jinode->i_list,
|
||||||
if (buffer_dirty(bh)) {
|
&jinode->i_transaction->t_inode_list);
|
||||||
if (test_set_buffer_locked(bh)) {
|
|
||||||
BUFFER_TRACE(bh, "needs blocking lock");
|
|
||||||
spin_unlock(&journal->j_list_lock);
|
|
||||||
/* Write out all data to prevent deadlocks */
|
|
||||||
journal_do_submit_data(wbuf, bufs);
|
|
||||||
bufs = 0;
|
|
||||||
lock_buffer(bh);
|
|
||||||
spin_lock(&journal->j_list_lock);
|
|
||||||
}
|
|
||||||
locked = 1;
|
|
||||||
}
|
|
||||||
/* We have to get bh_state lock. Again out of order, sigh. */
|
|
||||||
if (!inverted_lock(journal, bh)) {
|
|
||||||
jbd_lock_bh_state(bh);
|
|
||||||
spin_lock(&journal->j_list_lock);
|
|
||||||
}
|
|
||||||
/* Someone already cleaned up the buffer? */
|
|
||||||
if (!buffer_jbd(bh)
|
|
||||||
|| jh->b_transaction != commit_transaction
|
|
||||||
|| jh->b_jlist != BJ_SyncData) {
|
|
||||||
jbd_unlock_bh_state(bh);
|
|
||||||
if (locked)
|
|
||||||
unlock_buffer(bh);
|
|
||||||
BUFFER_TRACE(bh, "already cleaned up");
|
|
||||||
put_bh(bh);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (locked && test_clear_buffer_dirty(bh)) {
|
|
||||||
BUFFER_TRACE(bh, "needs writeout, adding to array");
|
|
||||||
wbuf[bufs++] = bh;
|
|
||||||
__jbd2_journal_file_buffer(jh, commit_transaction,
|
|
||||||
BJ_Locked);
|
|
||||||
jbd_unlock_bh_state(bh);
|
|
||||||
if (bufs == journal->j_wbufsize) {
|
|
||||||
spin_unlock(&journal->j_list_lock);
|
|
||||||
journal_do_submit_data(wbuf, bufs);
|
|
||||||
bufs = 0;
|
|
||||||
goto write_out_data;
|
|
||||||
}
|
|
||||||
} else if (!locked && buffer_locked(bh)) {
|
|
||||||
__jbd2_journal_file_buffer(jh, commit_transaction,
|
|
||||||
BJ_Locked);
|
|
||||||
jbd_unlock_bh_state(bh);
|
|
||||||
put_bh(bh);
|
|
||||||
} else {
|
} else {
|
||||||
BUFFER_TRACE(bh, "writeout complete: unfile");
|
jinode->i_transaction = NULL;
|
||||||
__jbd2_journal_unfile_buffer(jh);
|
|
||||||
jbd_unlock_bh_state(bh);
|
|
||||||
if (locked)
|
|
||||||
unlock_buffer(bh);
|
|
||||||
jbd2_journal_remove_journal_head(bh);
|
|
||||||
/* Once for our safety reference, once for
|
|
||||||
* jbd2_journal_remove_journal_head() */
|
|
||||||
put_bh(bh);
|
|
||||||
put_bh(bh);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
spin_unlock(&journal->j_list_lock);
|
||||||
|
|
||||||
if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
|
return ret;
|
||||||
spin_unlock(&journal->j_list_lock);
|
|
||||||
goto write_out_data;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
spin_unlock(&journal->j_list_lock);
|
|
||||||
journal_do_submit_data(wbuf, bufs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
|
static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
|
||||||
|
@ -524,21 +459,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||||
* Now start flushing things to disk, in the order they appear
|
* Now start flushing things to disk, in the order they appear
|
||||||
* on the transaction lists. Data blocks go first.
|
* on the transaction lists. Data blocks go first.
|
||||||
*/
|
*/
|
||||||
err = 0;
|
err = journal_submit_data_buffers(journal, commit_transaction);
|
||||||
journal_submit_data_buffers(journal, commit_transaction);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Wait for all previously submitted IO to complete if commit
|
|
||||||
* record is to be written synchronously.
|
|
||||||
*/
|
|
||||||
spin_lock(&journal->j_list_lock);
|
|
||||||
if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
|
|
||||||
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
|
|
||||||
err = journal_wait_on_locked_list(journal,
|
|
||||||
commit_transaction);
|
|
||||||
|
|
||||||
spin_unlock(&journal->j_list_lock);
|
|
||||||
|
|
||||||
if (err)
|
if (err)
|
||||||
jbd2_journal_abort(journal, err);
|
jbd2_journal_abort(journal, err);
|
||||||
|
|
||||||
|
@ -546,16 +467,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||||
|
|
||||||
jbd_debug(3, "JBD: commit phase 2\n");
|
jbd_debug(3, "JBD: commit phase 2\n");
|
||||||
|
|
||||||
/*
|
|
||||||
* If we found any dirty or locked buffers, then we should have
|
|
||||||
* looped back up to the write_out_data label. If there weren't
|
|
||||||
* any then journal_clean_data_list should have wiped the list
|
|
||||||
* clean by now, so check that it is in fact empty.
|
|
||||||
*/
|
|
||||||
J_ASSERT (commit_transaction->t_sync_datalist == NULL);
|
|
||||||
|
|
||||||
jbd_debug (3, "JBD: commit phase 3\n");
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Way to go: we have now written out all of the data for a
|
* Way to go: we have now written out all of the data for a
|
||||||
* transaction! Now comes the tricky part: we need to write out
|
* transaction! Now comes the tricky part: we need to write out
|
||||||
|
@ -574,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||||
J_ASSERT(commit_transaction->t_nr_buffers <=
|
J_ASSERT(commit_transaction->t_nr_buffers <=
|
||||||
commit_transaction->t_outstanding_credits);
|
commit_transaction->t_outstanding_credits);
|
||||||
|
|
||||||
|
err = 0;
|
||||||
descriptor = NULL;
|
descriptor = NULL;
|
||||||
bufs = 0;
|
bufs = 0;
|
||||||
while (commit_transaction->t_buffers) {
|
while (commit_transaction->t_buffers) {
|
||||||
|
@ -748,15 +660,19 @@ start_journal_io:
|
||||||
&cbh, crc32_sum);
|
&cbh, crc32_sum);
|
||||||
if (err)
|
if (err)
|
||||||
__jbd2_journal_abort_hard(journal);
|
__jbd2_journal_abort_hard(journal);
|
||||||
|
|
||||||
spin_lock(&journal->j_list_lock);
|
|
||||||
err = journal_wait_on_locked_list(journal,
|
|
||||||
commit_transaction);
|
|
||||||
spin_unlock(&journal->j_list_lock);
|
|
||||||
if (err)
|
|
||||||
__jbd2_journal_abort_hard(journal);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the right place to wait for data buffers both for ASYNC
|
||||||
|
* and !ASYNC commit. If commit is ASYNC, we need to wait only after
|
||||||
|
* the commit block went to disk (which happens above). If commit is
|
||||||
|
* SYNC, we need to wait for data buffers before we start writing
|
||||||
|
* commit block, which happens below in such setting.
|
||||||
|
*/
|
||||||
|
err = journal_finish_inode_data_buffers(journal, commit_transaction);
|
||||||
|
if (err)
|
||||||
|
jbd2_journal_abort(journal, err);
|
||||||
|
|
||||||
/* Lo and behold: we have just managed to send a transaction to
|
/* Lo and behold: we have just managed to send a transaction to
|
||||||
the log. Before we can commit it, wait for the IO so far to
|
the log. Before we can commit it, wait for the IO so far to
|
||||||
complete. Control buffers being written are on the
|
complete. Control buffers being written are on the
|
||||||
|
@ -768,7 +684,7 @@ start_journal_io:
|
||||||
so we incur less scheduling load.
|
so we incur less scheduling load.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
jbd_debug(3, "JBD: commit phase 4\n");
|
jbd_debug(3, "JBD: commit phase 3\n");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* akpm: these are BJ_IO, and j_list_lock is not needed.
|
* akpm: these are BJ_IO, and j_list_lock is not needed.
|
||||||
|
@ -827,7 +743,7 @@ wait_for_iobuf:
|
||||||
|
|
||||||
J_ASSERT (commit_transaction->t_shadow_list == NULL);
|
J_ASSERT (commit_transaction->t_shadow_list == NULL);
|
||||||
|
|
||||||
jbd_debug(3, "JBD: commit phase 5\n");
|
jbd_debug(3, "JBD: commit phase 4\n");
|
||||||
|
|
||||||
/* Here we wait for the revoke record and descriptor record buffers */
|
/* Here we wait for the revoke record and descriptor record buffers */
|
||||||
wait_for_ctlbuf:
|
wait_for_ctlbuf:
|
||||||
|
@ -854,7 +770,7 @@ wait_for_iobuf:
|
||||||
/* AKPM: bforget here */
|
/* AKPM: bforget here */
|
||||||
}
|
}
|
||||||
|
|
||||||
jbd_debug(3, "JBD: commit phase 6\n");
|
jbd_debug(3, "JBD: commit phase 5\n");
|
||||||
|
|
||||||
if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
|
if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
|
||||||
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
|
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
|
||||||
|
@ -874,9 +790,9 @@ wait_for_iobuf:
|
||||||
transaction can be removed from any checkpoint list it was on
|
transaction can be removed from any checkpoint list it was on
|
||||||
before. */
|
before. */
|
||||||
|
|
||||||
jbd_debug(3, "JBD: commit phase 7\n");
|
jbd_debug(3, "JBD: commit phase 6\n");
|
||||||
|
|
||||||
J_ASSERT(commit_transaction->t_sync_datalist == NULL);
|
J_ASSERT(list_empty(&commit_transaction->t_inode_list));
|
||||||
J_ASSERT(commit_transaction->t_buffers == NULL);
|
J_ASSERT(commit_transaction->t_buffers == NULL);
|
||||||
J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
|
J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
|
||||||
J_ASSERT(commit_transaction->t_iobuf_list == NULL);
|
J_ASSERT(commit_transaction->t_iobuf_list == NULL);
|
||||||
|
@ -997,7 +913,7 @@ restart_loop:
|
||||||
|
|
||||||
/* Done with this transaction! */
|
/* Done with this transaction! */
|
||||||
|
|
||||||
jbd_debug(3, "JBD: commit phase 8\n");
|
jbd_debug(3, "JBD: commit phase 7\n");
|
||||||
|
|
||||||
J_ASSERT(commit_transaction->t_state == T_COMMIT);
|
J_ASSERT(commit_transaction->t_state == T_COMMIT);
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,6 @@ EXPORT_SYMBOL(jbd2_journal_unlock_updates);
|
||||||
EXPORT_SYMBOL(jbd2_journal_get_write_access);
|
EXPORT_SYMBOL(jbd2_journal_get_write_access);
|
||||||
EXPORT_SYMBOL(jbd2_journal_get_create_access);
|
EXPORT_SYMBOL(jbd2_journal_get_create_access);
|
||||||
EXPORT_SYMBOL(jbd2_journal_get_undo_access);
|
EXPORT_SYMBOL(jbd2_journal_get_undo_access);
|
||||||
EXPORT_SYMBOL(jbd2_journal_dirty_data);
|
|
||||||
EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
|
EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
|
||||||
EXPORT_SYMBOL(jbd2_journal_release_buffer);
|
EXPORT_SYMBOL(jbd2_journal_release_buffer);
|
||||||
EXPORT_SYMBOL(jbd2_journal_forget);
|
EXPORT_SYMBOL(jbd2_journal_forget);
|
||||||
|
@ -82,6 +81,10 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
|
||||||
EXPORT_SYMBOL(jbd2_journal_invalidatepage);
|
EXPORT_SYMBOL(jbd2_journal_invalidatepage);
|
||||||
EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
|
EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
|
||||||
EXPORT_SYMBOL(jbd2_journal_force_commit);
|
EXPORT_SYMBOL(jbd2_journal_force_commit);
|
||||||
|
EXPORT_SYMBOL(jbd2_journal_file_inode);
|
||||||
|
EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
|
||||||
|
EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
|
||||||
|
EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
|
||||||
|
|
||||||
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
|
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
|
||||||
static void __journal_abort_soft (journal_t *journal, int errno);
|
static void __journal_abort_soft (journal_t *journal, int errno);
|
||||||
|
@ -2194,6 +2197,54 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
|
||||||
jbd_unlock_bh_journal_head(bh);
|
jbd_unlock_bh_journal_head(bh);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize jbd inode head
|
||||||
|
*/
|
||||||
|
void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
|
||||||
|
{
|
||||||
|
jinode->i_transaction = NULL;
|
||||||
|
jinode->i_next_transaction = NULL;
|
||||||
|
jinode->i_vfs_inode = inode;
|
||||||
|
jinode->i_flags = 0;
|
||||||
|
INIT_LIST_HEAD(&jinode->i_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Function to be called before we start removing inode from memory (i.e.,
|
||||||
|
* clear_inode() is a fine place to be called from). It removes inode from
|
||||||
|
* transaction's lists.
|
||||||
|
*/
|
||||||
|
void jbd2_journal_release_jbd_inode(journal_t *journal,
|
||||||
|
struct jbd2_inode *jinode)
|
||||||
|
{
|
||||||
|
int writeout = 0;
|
||||||
|
|
||||||
|
if (!journal)
|
||||||
|
return;
|
||||||
|
restart:
|
||||||
|
spin_lock(&journal->j_list_lock);
|
||||||
|
/* Is commit writing out inode - we have to wait */
|
||||||
|
if (jinode->i_flags & JI_COMMIT_RUNNING) {
|
||||||
|
wait_queue_head_t *wq;
|
||||||
|
DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
|
||||||
|
wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
|
||||||
|
prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
|
||||||
|
spin_unlock(&journal->j_list_lock);
|
||||||
|
schedule();
|
||||||
|
finish_wait(wq, &wait.wait);
|
||||||
|
goto restart;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do we need to wait for data writeback? */
|
||||||
|
if (journal->j_committing_transaction == jinode->i_transaction)
|
||||||
|
writeout = 1;
|
||||||
|
if (jinode->i_transaction) {
|
||||||
|
list_del(&jinode->i_list);
|
||||||
|
jinode->i_transaction = NULL;
|
||||||
|
}
|
||||||
|
spin_unlock(&journal->j_list_lock);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* debugfs tunables
|
* debugfs tunables
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -41,7 +41,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
|
||||||
* new transaction and we can't block without protecting against other
|
* new transaction and we can't block without protecting against other
|
||||||
* processes trying to touch the journal while it is in transition.
|
* processes trying to touch the journal while it is in transition.
|
||||||
*
|
*
|
||||||
* Called under j_state_lock
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static transaction_t *
|
static transaction_t *
|
||||||
|
@ -52,6 +51,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
|
||||||
transaction->t_tid = journal->j_transaction_sequence++;
|
transaction->t_tid = journal->j_transaction_sequence++;
|
||||||
transaction->t_expires = jiffies + journal->j_commit_interval;
|
transaction->t_expires = jiffies + journal->j_commit_interval;
|
||||||
spin_lock_init(&transaction->t_handle_lock);
|
spin_lock_init(&transaction->t_handle_lock);
|
||||||
|
INIT_LIST_HEAD(&transaction->t_inode_list);
|
||||||
|
|
||||||
/* Set up the commit timer for the new transaction. */
|
/* Set up the commit timer for the new transaction. */
|
||||||
journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
|
journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
|
||||||
|
@ -942,183 +942,6 @@ out:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* int jbd2_journal_dirty_data() - mark a buffer as containing dirty data which
|
|
||||||
* needs to be flushed before we can commit the
|
|
||||||
* current transaction.
|
|
||||||
* @handle: transaction
|
|
||||||
* @bh: bufferhead to mark
|
|
||||||
*
|
|
||||||
* The buffer is placed on the transaction's data list and is marked as
|
|
||||||
* belonging to the transaction.
|
|
||||||
*
|
|
||||||
* Returns error number or 0 on success.
|
|
||||||
*
|
|
||||||
* jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage
|
|
||||||
* by kswapd.
|
|
||||||
*/
|
|
||||||
int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
|
|
||||||
{
|
|
||||||
journal_t *journal = handle->h_transaction->t_journal;
|
|
||||||
int need_brelse = 0;
|
|
||||||
struct journal_head *jh;
|
|
||||||
|
|
||||||
if (is_handle_aborted(handle))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
jh = jbd2_journal_add_journal_head(bh);
|
|
||||||
JBUFFER_TRACE(jh, "entry");
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The buffer could *already* be dirty. Writeout can start
|
|
||||||
* at any time.
|
|
||||||
*/
|
|
||||||
jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* What if the buffer is already part of a running transaction?
|
|
||||||
*
|
|
||||||
* There are two cases:
|
|
||||||
* 1) It is part of the current running transaction. Refile it,
|
|
||||||
* just in case we have allocated it as metadata, deallocated
|
|
||||||
* it, then reallocated it as data.
|
|
||||||
* 2) It is part of the previous, still-committing transaction.
|
|
||||||
* If all we want to do is to guarantee that the buffer will be
|
|
||||||
* written to disk before this new transaction commits, then
|
|
||||||
* being sure that the *previous* transaction has this same
|
|
||||||
* property is sufficient for us! Just leave it on its old
|
|
||||||
* transaction.
|
|
||||||
*
|
|
||||||
* In case (2), the buffer must not already exist as metadata
|
|
||||||
* --- that would violate write ordering (a transaction is free
|
|
||||||
* to write its data at any point, even before the previous
|
|
||||||
* committing transaction has committed). The caller must
|
|
||||||
* never, ever allow this to happen: there's nothing we can do
|
|
||||||
* about it in this layer.
|
|
||||||
*/
|
|
||||||
jbd_lock_bh_state(bh);
|
|
||||||
spin_lock(&journal->j_list_lock);
|
|
||||||
|
|
||||||
/* Now that we have bh_state locked, are we really still mapped? */
|
|
||||||
if (!buffer_mapped(bh)) {
|
|
||||||
JBUFFER_TRACE(jh, "unmapped buffer, bailing out");
|
|
||||||
goto no_journal;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (jh->b_transaction) {
|
|
||||||
JBUFFER_TRACE(jh, "has transaction");
|
|
||||||
if (jh->b_transaction != handle->h_transaction) {
|
|
||||||
JBUFFER_TRACE(jh, "belongs to older transaction");
|
|
||||||
J_ASSERT_JH(jh, jh->b_transaction ==
|
|
||||||
journal->j_committing_transaction);
|
|
||||||
|
|
||||||
/* @@@ IS THIS TRUE ? */
|
|
||||||
/*
|
|
||||||
* Not any more. Scenario: someone does a write()
|
|
||||||
* in data=journal mode. The buffer's transaction has
|
|
||||||
* moved into commit. Then someone does another
|
|
||||||
* write() to the file. We do the frozen data copyout
|
|
||||||
* and set b_next_transaction to point to j_running_t.
|
|
||||||
* And while we're in that state, someone does a
|
|
||||||
* writepage() in an attempt to pageout the same area
|
|
||||||
* of the file via a shared mapping. At present that
|
|
||||||
* calls jbd2_journal_dirty_data(), and we get right here.
|
|
||||||
* It may be too late to journal the data. Simply
|
|
||||||
* falling through to the next test will suffice: the
|
|
||||||
* data will be dirty and wil be checkpointed. The
|
|
||||||
* ordering comments in the next comment block still
|
|
||||||
* apply.
|
|
||||||
*/
|
|
||||||
//J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If we're journalling data, and this buffer was
|
|
||||||
* subject to a write(), it could be metadata, forget
|
|
||||||
* or shadow against the committing transaction. Now,
|
|
||||||
* someone has dirtied the same darn page via a mapping
|
|
||||||
* and it is being writepage()'d.
|
|
||||||
* We *could* just steal the page from commit, with some
|
|
||||||
* fancy locking there. Instead, we just skip it -
|
|
||||||
* don't tie the page's buffers to the new transaction
|
|
||||||
* at all.
|
|
||||||
* Implication: if we crash before the writepage() data
|
|
||||||
* is written into the filesystem, recovery will replay
|
|
||||||
* the write() data.
|
|
||||||
*/
|
|
||||||
if (jh->b_jlist != BJ_None &&
|
|
||||||
jh->b_jlist != BJ_SyncData &&
|
|
||||||
jh->b_jlist != BJ_Locked) {
|
|
||||||
JBUFFER_TRACE(jh, "Not stealing");
|
|
||||||
goto no_journal;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This buffer may be undergoing writeout in commit. We
|
|
||||||
* can't return from here and let the caller dirty it
|
|
||||||
* again because that can cause the write-out loop in
|
|
||||||
* commit to never terminate.
|
|
||||||
*/
|
|
||||||
if (buffer_dirty(bh)) {
|
|
||||||
get_bh(bh);
|
|
||||||
spin_unlock(&journal->j_list_lock);
|
|
||||||
jbd_unlock_bh_state(bh);
|
|
||||||
need_brelse = 1;
|
|
||||||
sync_dirty_buffer(bh);
|
|
||||||
jbd_lock_bh_state(bh);
|
|
||||||
spin_lock(&journal->j_list_lock);
|
|
||||||
/* Since we dropped the lock... */
|
|
||||||
if (!buffer_mapped(bh)) {
|
|
||||||
JBUFFER_TRACE(jh, "buffer got unmapped");
|
|
||||||
goto no_journal;
|
|
||||||
}
|
|
||||||
/* The buffer may become locked again at any
|
|
||||||
time if it is redirtied */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* journal_clean_data_list() may have got there first */
|
|
||||||
if (jh->b_transaction != NULL) {
|
|
||||||
JBUFFER_TRACE(jh, "unfile from commit");
|
|
||||||
__jbd2_journal_temp_unlink_buffer(jh);
|
|
||||||
/* It still points to the committing
|
|
||||||
* transaction; move it to this one so
|
|
||||||
* that the refile assert checks are
|
|
||||||
* happy. */
|
|
||||||
jh->b_transaction = handle->h_transaction;
|
|
||||||
}
|
|
||||||
/* The buffer will be refiled below */
|
|
||||||
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Special case --- the buffer might actually have been
|
|
||||||
* allocated and then immediately deallocated in the previous,
|
|
||||||
* committing transaction, so might still be left on that
|
|
||||||
* transaction's metadata lists.
|
|
||||||
*/
|
|
||||||
if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
|
|
||||||
JBUFFER_TRACE(jh, "not on correct data list: unfile");
|
|
||||||
J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
|
|
||||||
__jbd2_journal_temp_unlink_buffer(jh);
|
|
||||||
jh->b_transaction = handle->h_transaction;
|
|
||||||
JBUFFER_TRACE(jh, "file as data");
|
|
||||||
__jbd2_journal_file_buffer(jh, handle->h_transaction,
|
|
||||||
BJ_SyncData);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
JBUFFER_TRACE(jh, "not on a transaction");
|
|
||||||
__jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData);
|
|
||||||
}
|
|
||||||
no_journal:
|
|
||||||
spin_unlock(&journal->j_list_lock);
|
|
||||||
jbd_unlock_bh_state(bh);
|
|
||||||
if (need_brelse) {
|
|
||||||
BUFFER_TRACE(bh, "brelse");
|
|
||||||
__brelse(bh);
|
|
||||||
}
|
|
||||||
JBUFFER_TRACE(jh, "exit");
|
|
||||||
jbd2_journal_put_journal_head(jh);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
|
* int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
|
||||||
* @handle: transaction to add buffer to.
|
* @handle: transaction to add buffer to.
|
||||||
|
@ -1541,10 +1364,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
|
||||||
* Remove a buffer from the appropriate transaction list.
|
* Remove a buffer from the appropriate transaction list.
|
||||||
*
|
*
|
||||||
* Note that this function can *change* the value of
|
* Note that this function can *change* the value of
|
||||||
* bh->b_transaction->t_sync_datalist, t_buffers, t_forget,
|
* bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list,
|
||||||
* t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller
|
* t_log_list or t_reserved_list. If the caller is holding onto a copy of one
|
||||||
* is holding onto a copy of one of thee pointers, it could go bad.
|
* of these pointers, it could go bad. Generally the caller needs to re-read
|
||||||
* Generally the caller needs to re-read the pointer from the transaction_t.
|
* the pointer from the transaction_t.
|
||||||
*
|
*
|
||||||
* Called under j_list_lock. The journal may not be locked.
|
* Called under j_list_lock. The journal may not be locked.
|
||||||
*/
|
*/
|
||||||
|
@ -1566,9 +1389,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
|
||||||
switch (jh->b_jlist) {
|
switch (jh->b_jlist) {
|
||||||
case BJ_None:
|
case BJ_None:
|
||||||
return;
|
return;
|
||||||
case BJ_SyncData:
|
|
||||||
list = &transaction->t_sync_datalist;
|
|
||||||
break;
|
|
||||||
case BJ_Metadata:
|
case BJ_Metadata:
|
||||||
transaction->t_nr_buffers--;
|
transaction->t_nr_buffers--;
|
||||||
J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
|
J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
|
||||||
|
@ -1589,9 +1409,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
|
||||||
case BJ_Reserved:
|
case BJ_Reserved:
|
||||||
list = &transaction->t_reserved_list;
|
list = &transaction->t_reserved_list;
|
||||||
break;
|
break;
|
||||||
case BJ_Locked:
|
|
||||||
list = &transaction->t_locked_list;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__blist_del_buffer(list, jh);
|
__blist_del_buffer(list, jh);
|
||||||
|
@ -1634,15 +1451,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
spin_lock(&journal->j_list_lock);
|
spin_lock(&journal->j_list_lock);
|
||||||
if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) {
|
if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
|
||||||
if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
|
|
||||||
/* A written-back ordered data buffer */
|
|
||||||
JBUFFER_TRACE(jh, "release data");
|
|
||||||
__jbd2_journal_unfile_buffer(jh);
|
|
||||||
jbd2_journal_remove_journal_head(bh);
|
|
||||||
__brelse(bh);
|
|
||||||
}
|
|
||||||
} else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
|
|
||||||
/* written-back checkpointed metadata buffer */
|
/* written-back checkpointed metadata buffer */
|
||||||
if (jh->b_jlist == BJ_None) {
|
if (jh->b_jlist == BJ_None) {
|
||||||
JBUFFER_TRACE(jh, "remove from checkpoint list");
|
JBUFFER_TRACE(jh, "remove from checkpoint list");
|
||||||
|
@ -1656,12 +1465,43 @@ out:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* jbd2_journal_try_to_free_buffers() could race with
|
||||||
|
* jbd2_journal_commit_transaction(). The later might still hold the
|
||||||
|
* reference count to the buffers when inspecting them on
|
||||||
|
* t_syncdata_list or t_locked_list.
|
||||||
|
*
|
||||||
|
* jbd2_journal_try_to_free_buffers() will call this function to
|
||||||
|
* wait for the current transaction to finish syncing data buffers, before
|
||||||
|
* try to free that buffer.
|
||||||
|
*
|
||||||
|
* Called with journal->j_state_lock hold.
|
||||||
|
*/
|
||||||
|
static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
|
||||||
|
{
|
||||||
|
transaction_t *transaction;
|
||||||
|
tid_t tid;
|
||||||
|
|
||||||
|
spin_lock(&journal->j_state_lock);
|
||||||
|
transaction = journal->j_committing_transaction;
|
||||||
|
|
||||||
|
if (!transaction) {
|
||||||
|
spin_unlock(&journal->j_state_lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
tid = transaction->t_tid;
|
||||||
|
spin_unlock(&journal->j_state_lock);
|
||||||
|
jbd2_log_wait_commit(journal, tid);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* int jbd2_journal_try_to_free_buffers() - try to free page buffers.
|
* int jbd2_journal_try_to_free_buffers() - try to free page buffers.
|
||||||
* @journal: journal for operation
|
* @journal: journal for operation
|
||||||
* @page: to try and free
|
* @page: to try and free
|
||||||
* @unused_gfp_mask: unused
|
* @gfp_mask: we use the mask to detect how hard should we try to release
|
||||||
|
* buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
|
||||||
|
* release the buffers.
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* For all the buffers on this page,
|
* For all the buffers on this page,
|
||||||
|
@ -1690,9 +1530,11 @@ out:
|
||||||
* journal_try_to_free_buffer() is changing its state. But that
|
* journal_try_to_free_buffer() is changing its state. But that
|
||||||
* cannot happen because we never reallocate freed data as metadata
|
* cannot happen because we never reallocate freed data as metadata
|
||||||
* while the data is part of a transaction. Yes?
|
* while the data is part of a transaction. Yes?
|
||||||
|
*
|
||||||
|
* Return 0 on failure, 1 on success
|
||||||
*/
|
*/
|
||||||
int jbd2_journal_try_to_free_buffers(journal_t *journal,
|
int jbd2_journal_try_to_free_buffers(journal_t *journal,
|
||||||
struct page *page, gfp_t unused_gfp_mask)
|
struct page *page, gfp_t gfp_mask)
|
||||||
{
|
{
|
||||||
struct buffer_head *head;
|
struct buffer_head *head;
|
||||||
struct buffer_head *bh;
|
struct buffer_head *bh;
|
||||||
|
@ -1708,7 +1550,8 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
|
||||||
/*
|
/*
|
||||||
* We take our own ref against the journal_head here to avoid
|
* We take our own ref against the journal_head here to avoid
|
||||||
* having to add tons of locking around each instance of
|
* having to add tons of locking around each instance of
|
||||||
* jbd2_journal_remove_journal_head() and jbd2_journal_put_journal_head().
|
* jbd2_journal_remove_journal_head() and
|
||||||
|
* jbd2_journal_put_journal_head().
|
||||||
*/
|
*/
|
||||||
jh = jbd2_journal_grab_journal_head(bh);
|
jh = jbd2_journal_grab_journal_head(bh);
|
||||||
if (!jh)
|
if (!jh)
|
||||||
|
@ -1721,7 +1564,28 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
|
||||||
if (buffer_jbd(bh))
|
if (buffer_jbd(bh))
|
||||||
goto busy;
|
goto busy;
|
||||||
} while ((bh = bh->b_this_page) != head);
|
} while ((bh = bh->b_this_page) != head);
|
||||||
|
|
||||||
ret = try_to_free_buffers(page);
|
ret = try_to_free_buffers(page);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There are a number of places where jbd2_journal_try_to_free_buffers()
|
||||||
|
* could race with jbd2_journal_commit_transaction(), the later still
|
||||||
|
* holds the reference to the buffers to free while processing them.
|
||||||
|
* try_to_free_buffers() failed to free those buffers. Some of the
|
||||||
|
* caller of releasepage() request page buffers to be dropped, otherwise
|
||||||
|
* treat the fail-to-free as errors (such as generic_file_direct_IO())
|
||||||
|
*
|
||||||
|
* So, if the caller of try_to_release_page() wants the synchronous
|
||||||
|
* behaviour(i.e make sure buffers are dropped upon return),
|
||||||
|
* let's wait for the current transaction to finish flush of
|
||||||
|
* dirty data buffers, then try to free those buffers again,
|
||||||
|
* with the journal locked.
|
||||||
|
*/
|
||||||
|
if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
|
||||||
|
jbd2_journal_wait_for_transaction_sync_data(journal);
|
||||||
|
ret = try_to_free_buffers(page);
|
||||||
|
}
|
||||||
|
|
||||||
busy:
|
busy:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -1823,6 +1687,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
|
||||||
if (!buffer_jbd(bh))
|
if (!buffer_jbd(bh))
|
||||||
goto zap_buffer_unlocked;
|
goto zap_buffer_unlocked;
|
||||||
|
|
||||||
|
/* OK, we have data buffer in journaled mode */
|
||||||
spin_lock(&journal->j_state_lock);
|
spin_lock(&journal->j_state_lock);
|
||||||
jbd_lock_bh_state(bh);
|
jbd_lock_bh_state(bh);
|
||||||
spin_lock(&journal->j_list_lock);
|
spin_lock(&journal->j_list_lock);
|
||||||
|
@ -1886,15 +1751,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
|
||||||
}
|
}
|
||||||
} else if (transaction == journal->j_committing_transaction) {
|
} else if (transaction == journal->j_committing_transaction) {
|
||||||
JBUFFER_TRACE(jh, "on committing transaction");
|
JBUFFER_TRACE(jh, "on committing transaction");
|
||||||
if (jh->b_jlist == BJ_Locked) {
|
|
||||||
/*
|
|
||||||
* The buffer is on the committing transaction's locked
|
|
||||||
* list. We have the buffer locked, so I/O has
|
|
||||||
* completed. So we can nail the buffer now.
|
|
||||||
*/
|
|
||||||
may_free = __dispose_buffer(jh, transaction);
|
|
||||||
goto zap_buffer;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* If it is committing, we simply cannot touch it. We
|
* If it is committing, we simply cannot touch it. We
|
||||||
* can remove it's next_transaction pointer from the
|
* can remove it's next_transaction pointer from the
|
||||||
|
@ -2027,9 +1883,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
|
||||||
J_ASSERT_JH(jh, !jh->b_committed_data);
|
J_ASSERT_JH(jh, !jh->b_committed_data);
|
||||||
J_ASSERT_JH(jh, !jh->b_frozen_data);
|
J_ASSERT_JH(jh, !jh->b_frozen_data);
|
||||||
return;
|
return;
|
||||||
case BJ_SyncData:
|
|
||||||
list = &transaction->t_sync_datalist;
|
|
||||||
break;
|
|
||||||
case BJ_Metadata:
|
case BJ_Metadata:
|
||||||
transaction->t_nr_buffers++;
|
transaction->t_nr_buffers++;
|
||||||
list = &transaction->t_buffers;
|
list = &transaction->t_buffers;
|
||||||
|
@ -2049,9 +1902,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
|
||||||
case BJ_Reserved:
|
case BJ_Reserved:
|
||||||
list = &transaction->t_reserved_list;
|
list = &transaction->t_reserved_list;
|
||||||
break;
|
break;
|
||||||
case BJ_Locked:
|
|
||||||
list = &transaction->t_locked_list;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__blist_add_buffer(list, jh);
|
__blist_add_buffer(list, jh);
|
||||||
|
@ -2141,3 +1991,88 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
|
||||||
spin_unlock(&journal->j_list_lock);
|
spin_unlock(&journal->j_list_lock);
|
||||||
__brelse(bh);
|
__brelse(bh);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* File inode in the inode list of the handle's transaction
|
||||||
|
*/
|
||||||
|
int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
|
||||||
|
{
|
||||||
|
transaction_t *transaction = handle->h_transaction;
|
||||||
|
journal_t *journal = transaction->t_journal;
|
||||||
|
|
||||||
|
if (is_handle_aborted(handle))
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
|
||||||
|
transaction->t_tid);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* First check whether inode isn't already on the transaction's
|
||||||
|
* lists without taking the lock. Note that this check is safe
|
||||||
|
* without the lock as we cannot race with somebody removing inode
|
||||||
|
* from the transaction. The reason is that we remove inode from the
|
||||||
|
* transaction only in journal_release_jbd_inode() and when we commit
|
||||||
|
* the transaction. We are guarded from the first case by holding
|
||||||
|
* a reference to the inode. We are safe against the second case
|
||||||
|
* because if jinode->i_transaction == transaction, commit code
|
||||||
|
* cannot touch the transaction because we hold reference to it,
|
||||||
|
* and if jinode->i_next_transaction == transaction, commit code
|
||||||
|
* will only file the inode where we want it.
|
||||||
|
*/
|
||||||
|
if (jinode->i_transaction == transaction ||
|
||||||
|
jinode->i_next_transaction == transaction)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
spin_lock(&journal->j_list_lock);
|
||||||
|
|
||||||
|
if (jinode->i_transaction == transaction ||
|
||||||
|
jinode->i_next_transaction == transaction)
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
/* On some different transaction's list - should be
|
||||||
|
* the committing one */
|
||||||
|
if (jinode->i_transaction) {
|
||||||
|
J_ASSERT(jinode->i_next_transaction == NULL);
|
||||||
|
J_ASSERT(jinode->i_transaction ==
|
||||||
|
journal->j_committing_transaction);
|
||||||
|
jinode->i_next_transaction = transaction;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
/* Not on any transaction list... */
|
||||||
|
J_ASSERT(!jinode->i_next_transaction);
|
||||||
|
jinode->i_transaction = transaction;
|
||||||
|
list_add(&jinode->i_list, &transaction->t_inode_list);
|
||||||
|
done:
|
||||||
|
spin_unlock(&journal->j_list_lock);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function must be called when inode is journaled in ordered mode
|
||||||
|
* before truncation happens. It starts writeout of truncated part in
|
||||||
|
* case it is in the committing transaction so that we stand to ordered
|
||||||
|
* mode consistency guarantees.
|
||||||
|
*/
|
||||||
|
int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
|
||||||
|
loff_t new_size)
|
||||||
|
{
|
||||||
|
journal_t *journal;
|
||||||
|
transaction_t *commit_trans;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (!inode->i_transaction && !inode->i_next_transaction)
|
||||||
|
goto out;
|
||||||
|
journal = inode->i_transaction->t_journal;
|
||||||
|
spin_lock(&journal->j_state_lock);
|
||||||
|
commit_trans = journal->j_committing_transaction;
|
||||||
|
spin_unlock(&journal->j_state_lock);
|
||||||
|
if (inode->i_transaction == commit_trans) {
|
||||||
|
ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
|
||||||
|
new_size, LLONG_MAX);
|
||||||
|
if (ret)
|
||||||
|
jbd2_journal_abort(journal, ret);
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
12
fs/mpage.c
12
fs/mpage.c
|
@ -82,7 +82,7 @@ static void mpage_end_io_write(struct bio *bio, int err)
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct bio *mpage_bio_submit(int rw, struct bio *bio)
|
struct bio *mpage_bio_submit(int rw, struct bio *bio)
|
||||||
{
|
{
|
||||||
bio->bi_end_io = mpage_end_io_read;
|
bio->bi_end_io = mpage_end_io_read;
|
||||||
if (rw == WRITE)
|
if (rw == WRITE)
|
||||||
|
@ -90,6 +90,7 @@ static struct bio *mpage_bio_submit(int rw, struct bio *bio)
|
||||||
submit_bio(rw, bio);
|
submit_bio(rw, bio);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(mpage_bio_submit);
|
||||||
|
|
||||||
static struct bio *
|
static struct bio *
|
||||||
mpage_alloc(struct block_device *bdev,
|
mpage_alloc(struct block_device *bdev,
|
||||||
|
@ -435,14 +436,8 @@ EXPORT_SYMBOL(mpage_readpage);
|
||||||
* written, so it can intelligently allocate a suitably-sized BIO. For now,
|
* written, so it can intelligently allocate a suitably-sized BIO. For now,
|
||||||
* just allocate full-size (16-page) BIOs.
|
* just allocate full-size (16-page) BIOs.
|
||||||
*/
|
*/
|
||||||
struct mpage_data {
|
|
||||||
struct bio *bio;
|
|
||||||
sector_t last_block_in_bio;
|
|
||||||
get_block_t *get_block;
|
|
||||||
unsigned use_writepage;
|
|
||||||
};
|
|
||||||
|
|
||||||
static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
|
int __mpage_writepage(struct page *page, struct writeback_control *wbc,
|
||||||
void *data)
|
void *data)
|
||||||
{
|
{
|
||||||
struct mpage_data *mpd = data;
|
struct mpage_data *mpd = data;
|
||||||
|
@ -651,6 +646,7 @@ out:
|
||||||
mpd->bio = bio;
|
mpd->bio = bio;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(__mpage_writepage);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
|
* mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
|
||||||
|
|
|
@ -1740,6 +1740,8 @@ extern int wait_on_page_writeback_range(struct address_space *mapping,
|
||||||
pgoff_t start, pgoff_t end);
|
pgoff_t start, pgoff_t end);
|
||||||
extern int __filemap_fdatawrite_range(struct address_space *mapping,
|
extern int __filemap_fdatawrite_range(struct address_space *mapping,
|
||||||
loff_t start, loff_t end, int sync_mode);
|
loff_t start, loff_t end, int sync_mode);
|
||||||
|
extern int filemap_fdatawrite_range(struct address_space *mapping,
|
||||||
|
loff_t start, loff_t end);
|
||||||
|
|
||||||
extern long do_fsync(struct file *file, int datasync);
|
extern long do_fsync(struct file *file, int datasync);
|
||||||
extern void sync_supers(void);
|
extern void sync_supers(void);
|
||||||
|
|
|
@ -168,6 +168,8 @@ struct commit_header {
|
||||||
unsigned char h_chksum_size;
|
unsigned char h_chksum_size;
|
||||||
unsigned char h_padding[2];
|
unsigned char h_padding[2];
|
||||||
__be32 h_chksum[JBD2_CHECKSUM_BYTES];
|
__be32 h_chksum[JBD2_CHECKSUM_BYTES];
|
||||||
|
__be64 h_commit_sec;
|
||||||
|
__be32 h_commit_nsec;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -379,6 +381,38 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
|
||||||
bit_spin_unlock(BH_JournalHead, &bh->b_state);
|
bit_spin_unlock(BH_JournalHead, &bh->b_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Flags in jbd_inode->i_flags */
|
||||||
|
#define __JI_COMMIT_RUNNING 0
|
||||||
|
/* Commit of the inode data in progress. We use this flag to protect us from
|
||||||
|
* concurrent deletion of inode. We cannot use reference to inode for this
|
||||||
|
* since we cannot afford doing last iput() on behalf of kjournald
|
||||||
|
*/
|
||||||
|
#define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct jbd_inode is the structure linking inodes in ordered mode
|
||||||
|
* present in a transaction so that we can sync them during commit.
|
||||||
|
*/
|
||||||
|
struct jbd2_inode {
|
||||||
|
/* Which transaction does this inode belong to? Either the running
|
||||||
|
* transaction or the committing one. [j_list_lock] */
|
||||||
|
transaction_t *i_transaction;
|
||||||
|
|
||||||
|
/* Pointer to the running transaction modifying inode's data in case
|
||||||
|
* there is already a committing transaction touching it. [j_list_lock] */
|
||||||
|
transaction_t *i_next_transaction;
|
||||||
|
|
||||||
|
/* List of inodes in the i_transaction [j_list_lock] */
|
||||||
|
struct list_head i_list;
|
||||||
|
|
||||||
|
/* VFS inode this inode belongs to [constant during the lifetime
|
||||||
|
* of the structure] */
|
||||||
|
struct inode *i_vfs_inode;
|
||||||
|
|
||||||
|
/* Flags of inode [j_list_lock] */
|
||||||
|
unsigned int i_flags;
|
||||||
|
};
|
||||||
|
|
||||||
struct jbd2_revoke_table_s;
|
struct jbd2_revoke_table_s;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -508,24 +542,12 @@ struct transaction_s
|
||||||
*/
|
*/
|
||||||
struct journal_head *t_reserved_list;
|
struct journal_head *t_reserved_list;
|
||||||
|
|
||||||
/*
|
|
||||||
* Doubly-linked circular list of all buffers under writeout during
|
|
||||||
* commit [j_list_lock]
|
|
||||||
*/
|
|
||||||
struct journal_head *t_locked_list;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Doubly-linked circular list of all metadata buffers owned by this
|
* Doubly-linked circular list of all metadata buffers owned by this
|
||||||
* transaction [j_list_lock]
|
* transaction [j_list_lock]
|
||||||
*/
|
*/
|
||||||
struct journal_head *t_buffers;
|
struct journal_head *t_buffers;
|
||||||
|
|
||||||
/*
|
|
||||||
* Doubly-linked circular list of all data buffers still to be
|
|
||||||
* flushed before this transaction can be committed [j_list_lock]
|
|
||||||
*/
|
|
||||||
struct journal_head *t_sync_datalist;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Doubly-linked circular list of all forget buffers (superseded
|
* Doubly-linked circular list of all forget buffers (superseded
|
||||||
* buffers which we can un-checkpoint once this transaction commits)
|
* buffers which we can un-checkpoint once this transaction commits)
|
||||||
|
@ -564,6 +586,12 @@ struct transaction_s
|
||||||
*/
|
*/
|
||||||
struct journal_head *t_log_list;
|
struct journal_head *t_log_list;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* List of inodes whose data we've modified in data=ordered mode.
|
||||||
|
* [j_list_lock]
|
||||||
|
*/
|
||||||
|
struct list_head t_inode_list;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Protects info related to handles
|
* Protects info related to handles
|
||||||
*/
|
*/
|
||||||
|
@ -1004,7 +1032,6 @@ extern int jbd2_journal_extend (handle_t *, int nblocks);
|
||||||
extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
|
extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
|
||||||
extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
|
extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
|
||||||
extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
|
extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
|
||||||
extern int jbd2_journal_dirty_data (handle_t *, struct buffer_head *);
|
|
||||||
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
|
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
|
||||||
extern void jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
|
extern void jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
|
||||||
extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
|
extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
|
||||||
|
@ -1044,6 +1071,10 @@ extern void jbd2_journal_ack_err (journal_t *);
|
||||||
extern int jbd2_journal_clear_err (journal_t *);
|
extern int jbd2_journal_clear_err (journal_t *);
|
||||||
extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
|
extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
|
||||||
extern int jbd2_journal_force_commit(journal_t *);
|
extern int jbd2_journal_force_commit(journal_t *);
|
||||||
|
extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
|
||||||
|
extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size);
|
||||||
|
extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
|
||||||
|
extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* journal_head management
|
* journal_head management
|
||||||
|
@ -1179,15 +1210,13 @@ static inline int jbd_space_needed(journal_t *journal)
|
||||||
|
|
||||||
/* journaling buffer types */
|
/* journaling buffer types */
|
||||||
#define BJ_None 0 /* Not journaled */
|
#define BJ_None 0 /* Not journaled */
|
||||||
#define BJ_SyncData 1 /* Normal data: flush before commit */
|
#define BJ_Metadata 1 /* Normal journaled metadata */
|
||||||
#define BJ_Metadata 2 /* Normal journaled metadata */
|
#define BJ_Forget 2 /* Buffer superseded by this transaction */
|
||||||
#define BJ_Forget 3 /* Buffer superseded by this transaction */
|
#define BJ_IO 3 /* Buffer is for temporary IO use */
|
||||||
#define BJ_IO 4 /* Buffer is for temporary IO use */
|
#define BJ_Shadow 4 /* Buffer contents being shadowed to the log */
|
||||||
#define BJ_Shadow 5 /* Buffer contents being shadowed to the log */
|
#define BJ_LogCtl 5 /* Buffer contains log descriptors */
|
||||||
#define BJ_LogCtl 6 /* Buffer contains log descriptors */
|
#define BJ_Reserved 6 /* Buffer is reserved for access by journal */
|
||||||
#define BJ_Reserved 7 /* Buffer is reserved for access by journal */
|
#define BJ_Types 7
|
||||||
#define BJ_Locked 8 /* Locked for I/O during commit */
|
|
||||||
#define BJ_Types 9
|
|
||||||
|
|
||||||
extern int jbd_blocks_per_page(struct inode *inode);
|
extern int jbd_blocks_per_page(struct inode *inode);
|
||||||
|
|
||||||
|
|
|
@ -11,11 +11,21 @@
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_BLOCK
|
#ifdef CONFIG_BLOCK
|
||||||
|
|
||||||
|
struct mpage_data {
|
||||||
|
struct bio *bio;
|
||||||
|
sector_t last_block_in_bio;
|
||||||
|
get_block_t *get_block;
|
||||||
|
unsigned use_writepage;
|
||||||
|
};
|
||||||
|
|
||||||
struct writeback_control;
|
struct writeback_control;
|
||||||
|
|
||||||
|
struct bio *mpage_bio_submit(int rw, struct bio *bio);
|
||||||
int mpage_readpages(struct address_space *mapping, struct list_head *pages,
|
int mpage_readpages(struct address_space *mapping, struct list_head *pages,
|
||||||
unsigned nr_pages, get_block_t get_block);
|
unsigned nr_pages, get_block_t get_block);
|
||||||
int mpage_readpage(struct page *page, get_block_t get_block);
|
int mpage_readpage(struct page *page, get_block_t get_block);
|
||||||
|
int __mpage_writepage(struct page *page, struct writeback_control *wbc,
|
||||||
|
void *data);
|
||||||
int mpage_writepages(struct address_space *mapping,
|
int mpage_writepages(struct address_space *mapping,
|
||||||
struct writeback_control *wbc, get_block_t get_block);
|
struct writeback_control *wbc, get_block_t get_block);
|
||||||
int mpage_writepage(struct page *page, get_block_t *get_block,
|
int mpage_writepage(struct page *page, get_block_t *get_block,
|
||||||
|
|
|
@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
|
||||||
void percpu_counter_destroy(struct percpu_counter *fbc);
|
void percpu_counter_destroy(struct percpu_counter *fbc);
|
||||||
void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
|
void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
|
||||||
void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
|
void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
|
||||||
s64 __percpu_counter_sum(struct percpu_counter *fbc);
|
s64 __percpu_counter_sum(struct percpu_counter *fbc, int set);
|
||||||
|
|
||||||
static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
|
static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
|
||||||
{
|
{
|
||||||
|
@ -44,13 +44,19 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
|
||||||
|
|
||||||
static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
|
static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
|
||||||
{
|
{
|
||||||
s64 ret = __percpu_counter_sum(fbc);
|
s64 ret = __percpu_counter_sum(fbc, 0);
|
||||||
return ret < 0 ? 0 : ret;
|
return ret < 0 ? 0 : ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc)
|
||||||
|
{
|
||||||
|
return __percpu_counter_sum(fbc, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
|
static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
|
||||||
{
|
{
|
||||||
return __percpu_counter_sum(fbc);
|
return __percpu_counter_sum(fbc, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline s64 percpu_counter_read(struct percpu_counter *fbc)
|
static inline s64 percpu_counter_read(struct percpu_counter *fbc)
|
||||||
|
|
|
@ -63,6 +63,7 @@ struct writeback_control {
|
||||||
unsigned for_writepages:1; /* This is a writepages() call */
|
unsigned for_writepages:1; /* This is a writepages() call */
|
||||||
unsigned range_cyclic:1; /* range_start is cyclic */
|
unsigned range_cyclic:1; /* range_start is cyclic */
|
||||||
unsigned more_io:1; /* more io to be dispatched */
|
unsigned more_io:1; /* more io to be dispatched */
|
||||||
|
unsigned range_cont:1;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add);
|
||||||
* Add up all the per-cpu counts, return the result. This is a more accurate
|
* Add up all the per-cpu counts, return the result. This is a more accurate
|
||||||
* but much slower version of percpu_counter_read_positive()
|
* but much slower version of percpu_counter_read_positive()
|
||||||
*/
|
*/
|
||||||
s64 __percpu_counter_sum(struct percpu_counter *fbc)
|
s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
|
||||||
{
|
{
|
||||||
s64 ret;
|
s64 ret;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
@ -62,7 +62,12 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
|
||||||
for_each_online_cpu(cpu) {
|
for_each_online_cpu(cpu) {
|
||||||
s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
|
s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
|
||||||
ret += *pcount;
|
ret += *pcount;
|
||||||
|
if (set)
|
||||||
|
*pcount = 0;
|
||||||
}
|
}
|
||||||
|
if (set)
|
||||||
|
fbc->count = ret;
|
||||||
|
|
||||||
spin_unlock(&fbc->lock);
|
spin_unlock(&fbc->lock);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -236,11 +236,12 @@ int filemap_fdatawrite(struct address_space *mapping)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(filemap_fdatawrite);
|
EXPORT_SYMBOL(filemap_fdatawrite);
|
||||||
|
|
||||||
static int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
|
int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
|
||||||
loff_t end)
|
loff_t end)
|
||||||
{
|
{
|
||||||
return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
|
return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(filemap_fdatawrite_range);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* filemap_flush - mostly a non-blocking flush
|
* filemap_flush - mostly a non-blocking flush
|
||||||
|
|
|
@ -960,6 +960,9 @@ retry:
|
||||||
}
|
}
|
||||||
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
|
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
|
||||||
mapping->writeback_index = index;
|
mapping->writeback_index = index;
|
||||||
|
|
||||||
|
if (wbc->range_cont)
|
||||||
|
wbc->range_start = index << PAGE_CACHE_SHIFT;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(write_cache_pages);
|
EXPORT_SYMBOL(write_cache_pages);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue