Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: extent macros cleanup
  Fix compilation with EXT_DEBUG, also fix leXX_to_cpu conversions.
  ext4: remove extra IS_RDONLY() check
  ext4: Use is_power_of_2()
  Use zero_user_page() in ext4 where possible
  ext4: Remove 65000 subdirectory limit
  ext4: Expand extra_inodes space per the s_{want,min}_extra_isize fields 
  ext4: Add nanosecond timestamps
  jbd2: Move jbd2-debug file to debugfs
  jbd2: Fix CONFIG_JBD_DEBUG ifdef to be CONFIG_JBD2_DEBUG
  ext4: Set the journal JBD2_FEATURE_INCOMPAT_64BIT on large devices
  ext4: Make extents code sanely handle on-disk corruption
  ext4: copy i_flags to inode flags on write
  ext4: Enable extents by default
  Change on-disk format to support 2^15 uninitialized extents
  write support for preallocated blocks
  fallocate support in ext4
  sys_fallocate() implementation on i386, x86_64 and powerpc
This commit is contained in:
Linus Torvalds 2007-07-18 10:32:00 -07:00
commit d756d10e24
30 changed files with 1332 additions and 263 deletions

View file

@ -71,7 +71,7 @@
/*
* Maximal count of links to a file
*/
#define EXT4_LINK_MAX 32000
#define EXT4_LINK_MAX 65000
/*
* Macro-instructions used to manage several block sizes
@ -102,6 +102,7 @@
EXT4_GOOD_OLD_FIRST_INO : \
(s)->s_first_ino)
#endif
#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
/*
* Macro-instructions used to manage fragments
@ -201,6 +202,7 @@ struct ext4_group_desc
#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */
#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
/* Used to pass group descriptor data when online resize is done */
struct ext4_new_group_input {
@ -225,6 +227,11 @@ struct ext4_new_group_data {
__u32 free_blocks_count;
};
/*
* Following is used by preallocation code to tell get_blocks() that we
* want uninitialzed extents.
*/
#define EXT4_CREATE_UNINITIALIZED_EXT 2
/*
* ioctl commands
@ -237,7 +244,7 @@ struct ext4_new_group_data {
#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input)
#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
#endif
#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
@ -253,7 +260,7 @@ struct ext4_new_group_data {
#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
#endif
#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
@ -282,7 +289,7 @@ struct ext4_inode {
__le16 i_uid; /* Low 16 bits of Owner Uid */
__le32 i_size; /* Size in bytes */
__le32 i_atime; /* Access time */
__le32 i_ctime; /* Creation time */
__le32 i_ctime; /* Inode Change time */
__le32 i_mtime; /* Modification time */
__le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */
@ -331,10 +338,85 @@ struct ext4_inode {
} osd2; /* OS dependent 2 */
__le16 i_extra_isize;
__le16 i_pad1;
__le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
__le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
__le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
__le32 i_crtime; /* File Creation time */
__le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
};
#define i_size_high i_dir_acl
#define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
/*
* Extended fields will fit into an inode if the filesystem was formatted
* with large inodes (-I 256 or larger) and there are not currently any EAs
* consuming all of the available space. For new inodes we always reserve
* enough space for the kernel's known extended fields, but for inodes
* created with an old kernel this might not have been the case. None of
* the extended inode fields is critical for correct filesystem operation.
* This macro checks if a certain field fits in the inode. Note that
* inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
*/
#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
((offsetof(typeof(*ext4_inode), field) + \
sizeof((ext4_inode)->field)) \
<= (EXT4_GOOD_OLD_INODE_SIZE + \
(einode)->i_extra_isize)) \
static inline __le32 ext4_encode_extra_time(struct timespec *time)
{
return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
time->tv_sec >> 32 : 0) |
((time->tv_nsec << 2) & EXT4_NSEC_MASK));
}
static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
{
if (sizeof(time->tv_sec) > 4)
time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
<< 32;
time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
}
#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
do { \
(raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
(raw_inode)->xtime ## _extra = \
ext4_encode_extra_time(&(inode)->xtime); \
} while (0)
#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
(raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
(raw_inode)->xtime ## _extra = \
ext4_encode_extra_time(&(einode)->xtime); \
} while (0)
#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
do { \
(inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
ext4_decode_extra_time(&(inode)->xtime, \
raw_inode->xtime ## _extra); \
} while (0)
#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
(einode)->xtime.tv_sec = \
(signed)le32_to_cpu((raw_inode)->xtime); \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
ext4_decode_extra_time(&(einode)->xtime, \
raw_inode->xtime ## _extra); \
} while (0)
#if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1 osd1.linux1.l_i_reserved1
#define i_frag osd2.linux2.l_i_frag
@ -533,6 +615,13 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
return container_of(inode, struct ext4_inode_info, vfs_inode);
}
static inline struct timespec ext4_current_time(struct inode *inode)
{
return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
}
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT4_ROOT_INO ||
@ -603,6 +692,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@ -620,6 +711,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
EXT4_FEATURE_INCOMPAT_64BIT)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
/*
@ -862,6 +955,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern void ext4_truncate (struct inode *);
extern void ext4_set_inode_flags(struct inode *);
extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
@ -983,6 +1077,8 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
extern void ext4_ext_truncate(struct inode *, struct page *);
extern void ext4_ext_init(struct super_block *);
extern void ext4_ext_release(struct super_block *);
extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
loff_t len);
static inline int
ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
unsigned long max_blocks, struct buffer_head *bh,

View file

@ -141,7 +141,25 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
#define EXT_MAX_BLOCK 0xffffffff
#define EXT_MAX_LEN ((1UL << 15) - 1)
/*
* EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
* initialized extent. This is 2^15 and not (2^16 - 1), since we use the
* MSB of ee_len field in the extent datastructure to signify if this
* particular extent is an initialized extent or an uninitialized (i.e.
* preallocated).
* EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
* uninitialized extent.
* If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
* uninitialized one. In other words, if MSB of ee_len is set, it is an
* uninitialized extent with only one special scenario when ee_len = 0x8000.
* In this case we can not have an uninitialized extent of zero length and
* thus we make it as a special case of initialized extent with 0x8000 length.
* This way we get better extent-to-group alignment for initialized extents.
* Hence, the maximum number of blocks we can have in an *initialized*
* extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
*/
#define EXT_INIT_MAX_LEN (1UL << 15)
#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1)
#define EXT_FIRST_EXTENT(__hdr__) \
@ -188,8 +206,31 @@ ext4_ext_invalidate_cache(struct inode *inode)
EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
}
static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
{
/* We can not have an uninitialized extent of zero length! */
BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
}
static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
{
/* Extent with ee_len of 0x8000 is treated as an initialized extent */
return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
}
static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
{
return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
le16_to_cpu(ext->ee_len) :
(le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
}
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
extern int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *);
extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);

View file

@ -153,6 +153,11 @@ struct ext4_inode_info {
unsigned long i_ext_generation;
struct ext4_ext_cache i_cached_extent;
/*
* File creation time. Its function is same as that of
* struct timespec i_{a,c,m}time in the generic inode.
*/
struct timespec i_crtime;
};
#endif /* _LINUX_EXT4_FS_I */

View file

@ -73,7 +73,7 @@ struct ext4_sb_info {
struct list_head s_orphan;
unsigned long s_commit_interval;
struct block_device *journal_bdev;
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
#endif
@ -81,6 +81,7 @@ struct ext4_sb_info {
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
#ifdef EXTENTS_STATS
/* ext4 extents stats */

6
include/linux/falloc.h Normal file
View file

@ -0,0 +1,6 @@
#ifndef _FALLOC_H_
#define _FALLOC_H_
#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
#endif /* _FALLOC_H_ */

View file

@ -1147,6 +1147,8 @@ struct inode_operations {
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char *);
void (*truncate_range)(struct inode *, loff_t, loff_t);
long (*fallocate)(struct inode *inode, int mode, loff_t offset,
loff_t len);
};
struct seq_file;

View file

@ -50,14 +50,14 @@
*/
#define JBD_DEFAULT_MAX_COMMIT_AGE 5
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
/*
* Define JBD_EXPENSIVE_CHECKING to enable more expensive internal
* consistency checks. By default we don't do this unless
* CONFIG_JBD_DEBUG is on.
* CONFIG_JBD2_DEBUG is on.
*/
#define JBD_EXPENSIVE_CHECKING
extern int jbd2_journal_enable_debug;
extern u8 jbd2_journal_enable_debug;
#define jbd_debug(n, f, a...) \
do { \

View file

@ -610,6 +610,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
const struct itimerspec __user *utmr);
asmlinkage long sys_eventfd(unsigned int count);
asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);