| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2006-06-14 15:32:57 -04:00
										 |  |  |  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved. | 
					
						
							|  |  |  |  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This copyrighted material is made available to anyone wishing to use, | 
					
						
							|  |  |  |  * modify, copy, or redistribute it subject to the terms and conditions | 
					
						
							|  |  |  |  * of the GNU General Public License v.2. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | #ifndef __GFS2_ONDISK_DOT_H__
 | 
					
						
							|  |  |  | #define __GFS2_ONDISK_DOT_H__
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-30 21:12:38 +05:30
										 |  |  | #include <linux/types.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | #define GFS2_MAGIC		0x01161970
 | 
					
						
							|  |  |  | #define GFS2_BASIC_BLOCK	512
 | 
					
						
							|  |  |  | #define GFS2_BASIC_BLOCK_SHIFT	9
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Lock numbers of the LM_TYPE_NONDISK type */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_MOUNT_LOCK		0
 | 
					
						
							|  |  |  | #define GFS2_LIVE_LOCK		1
 | 
					
						
							| 
									
										
											  
											
												GFS2: remove transaction glock
GFS2 has a transaction glock, which must be grabbed for every
transaction, whose purpose is to deal with freezing the filesystem.
Aside from this involving a large amount of locking, it is very easy to
make the current fsfreeze code hang on unfreezing.
This patch rewrites how gfs2 handles freezing the filesystem. The
transaction glock is removed. In it's place is a freeze glock, which is
cached (but not held) in a shared state by every node in the cluster
when the filesystem is mounted. This lock only needs to be grabbed on
freezing, and actions which need to be safe from freezing, like
recovery.
When a node wants to freeze the filesystem, it grabs this glock
exclusively.  When the freeze glock state changes on the nodes (either
from shared to unlocked, or shared to exclusive), the filesystem does a
special log flush.  gfs2_log_flush() does all the work for flushing out
the and shutting down the incore log, and then it tries to grab the
freeze glock in a shared state again.  Since the filesystem is stuck in
gfs2_log_flush, no new transaction can start, and nothing can be written
to disk. Unfreezing the filesytem simply involes dropping the freeze
glock, allowing gfs2_log_flush() to grab and then release the shared
lock, so it is cached for next time.
However, in order for the unfreezing ioctl to occur, gfs2 needs to get a
shared lock on the filesystem root directory inode to check permissions.
If that glock has already been grabbed exclusively, fsfreeze will be
unable to get the shared lock and unfreeze the filesystem.
In order to allow the unfreeze, this patch makes gfs2 grab a shared lock
on the filesystem root directory during the freeze, and hold it until it
unfreezes the filesystem.  The functions which need to grab a shared
lock in order to allow the unfreeze ioctl to be issued now use the lock
grabbed by the freeze code instead.
The freeze and unfreeze code take care to make sure that this shared
lock will not be dropped while another process is using it.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
											
										 
											2014-05-01 22:26:55 -05:00
										 |  |  | #define GFS2_FREEZE_LOCK	2
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | #define GFS2_RENAME_LOCK	3
 | 
					
						
							| 
									
										
										
										
											2012-01-09 17:18:05 -05:00
										 |  |  | #define GFS2_CONTROL_LOCK	4
 | 
					
						
							|  |  |  | #define GFS2_MOUNTED_LOCK	5
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | /* Format numbers for various metadata types */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_NONE	0
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_SB		100
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_RG		200
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_RB		300
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_DI		400
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_IN		500
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_LF		600
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_JD		700
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_LH		800
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_LD		900
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_LB		1000
 | 
					
						
							| 
									
										
										
										
											2006-02-13 16:21:47 +00:00
										 |  |  | #define GFS2_FORMAT_EA		1600
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_ED		1700
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | #define GFS2_FORMAT_QC		1400
 | 
					
						
							|  |  |  | /* These are format numbers for entities contained in files */ | 
					
						
							| 
									
										
										
										
											2006-02-13 16:21:47 +00:00
										 |  |  | #define GFS2_FORMAT_RI		1100
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_DE		1200
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_QU		1500
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | /* These are part of the superblock */ | 
					
						
							|  |  |  | #define GFS2_FORMAT_FS		1801
 | 
					
						
							|  |  |  | #define GFS2_FORMAT_MULTI	1900
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * An on-disk inode number | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct gfs2_inum { | 
					
						
							|  |  |  | 	__be64 no_formal_ino; | 
					
						
							|  |  |  | 	__be64 no_addr; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Generic metadata head structure | 
					
						
							|  |  |  |  * Every inplace buffer logged in the journal must start with this. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_METATYPE_NONE	0
 | 
					
						
							|  |  |  | #define GFS2_METATYPE_SB	1
 | 
					
						
							|  |  |  | #define GFS2_METATYPE_RG	2
 | 
					
						
							|  |  |  | #define GFS2_METATYPE_RB	3
 | 
					
						
							|  |  |  | #define GFS2_METATYPE_DI	4
 | 
					
						
							|  |  |  | #define GFS2_METATYPE_IN	5
 | 
					
						
							|  |  |  | #define GFS2_METATYPE_LF	6
 | 
					
						
							|  |  |  | #define GFS2_METATYPE_JD	7
 | 
					
						
							|  |  |  | #define GFS2_METATYPE_LH	8
 | 
					
						
							|  |  |  | #define GFS2_METATYPE_LD	9
 | 
					
						
							| 
									
										
										
										
											2006-02-13 16:21:47 +00:00
										 |  |  | #define GFS2_METATYPE_LB	12
 | 
					
						
							|  |  |  | #define GFS2_METATYPE_EA	10
 | 
					
						
							|  |  |  | #define GFS2_METATYPE_ED	11
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | #define GFS2_METATYPE_QC	14
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct gfs2_meta_header { | 
					
						
							|  |  |  | 	__be32 mh_magic; | 
					
						
							|  |  |  | 	__be32 mh_type; | 
					
						
							|  |  |  | 	__be64 __pad0;		/* Was generation number in gfs1 */ | 
					
						
							|  |  |  | 	__be32 mh_format; | 
					
						
							| 
									
										
											  
											
												GFS2: Tag all metadata with jid
There are two spare field in the header common to all GFS2
metadata. One is just the right size to fit a journal id
in it, and this patch updates the journal code so that each
time a metadata block is modified, we tag it with the journal
id of the node which is performing the modification.
The reason for this is that it should make it much easier to
debug issues which arise if we can tell which node was the
last to modify a particular metadata block.
Since the field is updated before the block is written into
the journal, each journal should only contain metadata which
is tagged with its own journal id. The one exception to this
is the journal header block, which might have a different node's
id in it, if that journal was recovered by another node in the
cluster.
Thus each journal will contain a record of which nodes recovered
it, via the journal header.
The other field in the metadata header could potentially be
used to hold information about what kind of operation was
performed, but for the time being we just zero it on each
transaction so that if we use it for that in future, we'll
know that the information (where it exists) is reliable.
I did consider using the other field to hold the journal
sequence number, however since in GFS2's journaling we write
the modified data into the journal and not the original
data, this gives no information as to what action caused the
modification, so I think we can probably come up with a better
use for those 64 bits in the future.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
											
										 
											2009-11-06 16:20:51 +00:00
										 |  |  | 	/* This union is to keep userspace happy */ | 
					
						
							|  |  |  | 	union { | 
					
						
							|  |  |  | 		__be32 mh_jid;		/* Was incarnation number in gfs1 */ | 
					
						
							|  |  |  | 		__be32 __pad1; | 
					
						
							|  |  |  | 	}; | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * super-block structure | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * It's probably good if SIZEOF_SB <= GFS2_BASIC_BLOCK (512 bytes) | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Order is important, need to be able to read old superblocks to do on-disk | 
					
						
							|  |  |  |  * version upgrades. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Address of superblock in GFS2 basic blocks */ | 
					
						
							|  |  |  | #define GFS2_SB_ADDR		128
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* The lock number for the superblock (must be zero) */ | 
					
						
							|  |  |  | #define GFS2_SB_LOCK		0
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Requirement:  GFS2_LOCKNAME_LEN % 8 == 0
 | 
					
						
							|  |  |  |    Includes: the fencing zero at the end */ | 
					
						
							|  |  |  | #define GFS2_LOCKNAME_LEN	64
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct gfs2_sb { | 
					
						
							|  |  |  | 	struct gfs2_meta_header sb_header; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	__be32 sb_fs_format; | 
					
						
							|  |  |  | 	__be32 sb_multihost_format; | 
					
						
							|  |  |  | 	__u32  __pad0;	/* Was superblock flags in gfs1 */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	__be32 sb_bsize; | 
					
						
							|  |  |  | 	__be32 sb_bsize_shift; | 
					
						
							|  |  |  | 	__u32 __pad1;	/* Was journal segment size in gfs1 */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	struct gfs2_inum sb_master_dir; /* Was jindex dinode in gfs1 */ | 
					
						
							|  |  |  | 	struct gfs2_inum __pad2; /* Was rindex dinode in gfs1 */ | 
					
						
							|  |  |  | 	struct gfs2_inum sb_root_dir; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	char sb_lockproto[GFS2_LOCKNAME_LEN]; | 
					
						
							|  |  |  | 	char sb_locktable[GFS2_LOCKNAME_LEN]; | 
					
						
							| 
									
										
										
										
											2008-09-22 07:29:31 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	struct gfs2_inum __pad3; /* Was quota inode in gfs1 */ | 
					
						
							|  |  |  | 	struct gfs2_inum __pad4; /* Was licence inode in gfs1 */ | 
					
						
							|  |  |  | #define GFS2_HAS_UUID 1
 | 
					
						
							|  |  |  | 	__u8 sb_uuid[16]; /* The UUID, maybe 0 for backwards compat */ | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * resource index structure | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct gfs2_rindex { | 
					
						
							|  |  |  | 	__be64 ri_addr;	/* grp block disk address */ | 
					
						
							|  |  |  | 	__be32 ri_length;	/* length of rgrp header in fs blocks */ | 
					
						
							|  |  |  | 	__u32 __pad; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	__be64 ri_data0;	/* first data location */ | 
					
						
							|  |  |  | 	__be32 ri_data;	/* num of data blocks in rgrp */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	__be32 ri_bitbytes;	/* number of bytes in data bitmaps */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	__u8 ri_reserved[64]; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * resource group header structure | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Number of blocks per byte in rgrp */ | 
					
						
							|  |  |  | #define GFS2_NBBY		4
 | 
					
						
							|  |  |  | #define GFS2_BIT_SIZE		2
 | 
					
						
							|  |  |  | #define GFS2_BIT_MASK		0x00000003
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_BLKST_FREE		0
 | 
					
						
							|  |  |  | #define GFS2_BLKST_USED		1
 | 
					
						
							| 
									
										
										
										
											2006-06-14 15:32:57 -04:00
										 |  |  | #define GFS2_BLKST_UNLINKED	2
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | #define GFS2_BLKST_DINODE	3
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_RGF_JOURNAL	0x00000001
 | 
					
						
							|  |  |  | #define GFS2_RGF_METAONLY	0x00000002
 | 
					
						
							|  |  |  | #define GFS2_RGF_DATAONLY	0x00000004
 | 
					
						
							|  |  |  | #define GFS2_RGF_NOALLOC	0x00000008
 | 
					
						
							| 
									
										
										
										
											2012-02-08 12:58:32 +00:00
										 |  |  | #define GFS2_RGF_TRIMMED	0x00000010
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												GFS2: Use lvbs for storing rgrp information with mount option
Instead of reading in the resource groups when gfs2 is checking
for free space to allocate from, gfs2 can store the necessary infromation
in the resource group's lvb.  Also, instead of searching for unlinked
inodes in every resource group that's checked for free space, gfs2 can
store the number of unlinked but inodes in the lvb, and only check for
unlinked inodes if it will find some.
The first time a resource group is locked, the lvb must initialized.
Since this involves counting the unlinked inodes in the resource group,
this takes a little extra time.  But after that, if the resource group
is locked with GL_SKIP, the buffer head won't be read in unless it's
actually needed.
Enabling the resource groups lvbs is done via the rgrplvb mount option.  If
this option isn't set, the lvbs will still be set and updated, but they won't
be verfied or used by the filesystem.  To safely turn on this option, all of
the nodes mounting the filesystem must be running code with this patch, and
the filesystem must have been completely unmounted since they were updated.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
											
										 
											2012-05-29 23:01:09 -05:00
										 |  |  | struct gfs2_rgrp_lvb { | 
					
						
							|  |  |  | 	__be32 rl_magic; | 
					
						
							|  |  |  | 	__be32 rl_flags; | 
					
						
							|  |  |  | 	__be32 rl_free; | 
					
						
							|  |  |  | 	__be32 rl_dinodes; | 
					
						
							|  |  |  | 	__be64 rl_igeneration; | 
					
						
							|  |  |  | 	__be32 rl_unlinked; | 
					
						
							|  |  |  | 	__be32 __pad; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | struct gfs2_rgrp { | 
					
						
							|  |  |  | 	struct gfs2_meta_header rg_header; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	__be32 rg_flags; | 
					
						
							|  |  |  | 	__be32 rg_free; | 
					
						
							|  |  |  | 	__be32 rg_dinodes; | 
					
						
							| 
									
										
										
										
											2006-07-11 09:46:33 -04:00
										 |  |  | 	__be32 __pad; | 
					
						
							|  |  |  | 	__be64 rg_igeneration; | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-07-11 09:46:33 -04:00
										 |  |  | 	__u8 rg_reserved[80]; /* Several fields from gfs1 now reserved */ | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * quota structure | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct gfs2_quota { | 
					
						
							|  |  |  | 	__be64 qu_limit; | 
					
						
							|  |  |  | 	__be64 qu_warn; | 
					
						
							|  |  |  | 	__be64 qu_value; | 
					
						
							| 
									
										
										
										
											2009-12-10 18:52:54 -05:00
										 |  |  | 	__u8 qu_reserved[64]; | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * dinode structure | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_MAX_META_HEIGHT	10
 | 
					
						
							|  |  |  | #define GFS2_DIR_MAX_DEPTH	17
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define DT2IF(dt) (((dt) << 12) & S_IFMT)
 | 
					
						
							|  |  |  | #define IF2DT(sif) (((sif) & S_IFMT) >> 12)
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-03-31 15:01:28 -05:00
										 |  |  | enum { | 
					
						
							|  |  |  | 	gfs2fl_Jdata		= 0, | 
					
						
							|  |  |  | 	gfs2fl_ExHash		= 1, | 
					
						
							|  |  |  | 	gfs2fl_Unused		= 2, | 
					
						
							|  |  |  | 	gfs2fl_EaIndirect	= 3, | 
					
						
							|  |  |  | 	gfs2fl_Directio		= 4, | 
					
						
							|  |  |  | 	gfs2fl_Immutable	= 5, | 
					
						
							|  |  |  | 	gfs2fl_AppendOnly	= 6, | 
					
						
							|  |  |  | 	gfs2fl_NoAtime		= 7, | 
					
						
							|  |  |  | 	gfs2fl_Sync		= 8, | 
					
						
							|  |  |  | 	gfs2fl_System		= 9, | 
					
						
							| 
									
										
										
										
											2012-05-28 15:26:56 +01:00
										 |  |  | 	gfs2fl_TopLevel		= 10, | 
					
						
							| 
									
										
										
										
											2006-03-31 15:01:28 -05:00
										 |  |  | 	gfs2fl_TruncInProg	= 29, | 
					
						
							|  |  |  | 	gfs2fl_InheritDirectio	= 30, | 
					
						
							|  |  |  | 	gfs2fl_InheritJdata	= 31, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | /* Dinode flags */ | 
					
						
							|  |  |  | #define GFS2_DIF_JDATA			0x00000001
 | 
					
						
							|  |  |  | #define GFS2_DIF_EXHASH			0x00000002
 | 
					
						
							|  |  |  | #define GFS2_DIF_UNUSED			0x00000004  /* only in gfs1 */
 | 
					
						
							|  |  |  | #define GFS2_DIF_EA_INDIRECT		0x00000008
 | 
					
						
							|  |  |  | #define GFS2_DIF_DIRECTIO		0x00000010
 | 
					
						
							|  |  |  | #define GFS2_DIF_IMMUTABLE		0x00000020
 | 
					
						
							|  |  |  | #define GFS2_DIF_APPENDONLY		0x00000040
 | 
					
						
							|  |  |  | #define GFS2_DIF_NOATIME		0x00000080
 | 
					
						
							|  |  |  | #define GFS2_DIF_SYNC			0x00000100
 | 
					
						
							|  |  |  | #define GFS2_DIF_SYSTEM			0x00000200 /* New in gfs2 */
 | 
					
						
							| 
									
										
										
										
											2012-05-28 15:26:56 +01:00
										 |  |  | #define GFS2_DIF_TOPDIR			0x00000400 /* New in gfs2 */
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | #define GFS2_DIF_TRUNC_IN_PROG		0x20000000 /* New in gfs2 */
 | 
					
						
							| 
									
										
										
										
											2012-05-28 15:26:56 +01:00
										 |  |  | #define GFS2_DIF_INHERIT_DIRECTIO	0x40000000 /* only in gfs1 */
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | #define GFS2_DIF_INHERIT_JDATA		0x80000000
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct gfs2_dinode { | 
					
						
							|  |  |  | 	struct gfs2_meta_header di_header; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	struct gfs2_inum di_num; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	__be32 di_mode;	/* mode of file */ | 
					
						
							|  |  |  | 	__be32 di_uid;	/* owner's user id */ | 
					
						
							|  |  |  | 	__be32 di_gid;	/* owner's group id */ | 
					
						
							|  |  |  | 	__be32 di_nlink;	/* number of links to this file */ | 
					
						
							|  |  |  | 	__be64 di_size;	/* number of bytes in file */ | 
					
						
							|  |  |  | 	__be64 di_blocks;	/* number of blocks in file */ | 
					
						
							|  |  |  | 	__be64 di_atime;	/* time last accessed */ | 
					
						
							|  |  |  | 	__be64 di_mtime;	/* time last modified */ | 
					
						
							|  |  |  | 	__be64 di_ctime;	/* time last changed */ | 
					
						
							|  |  |  | 	__be32 di_major;	/* device major number */ | 
					
						
							|  |  |  | 	__be32 di_minor;	/* device minor number */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* This section varies from gfs1. Padding added to align with
 | 
					
						
							|  |  |  |          * remainder of dinode | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	__be64 di_goal_meta;	/* rgrp to alloc from next */ | 
					
						
							|  |  |  | 	__be64 di_goal_data;	/* data block goal */ | 
					
						
							| 
									
										
										
										
											2006-07-11 09:46:33 -04:00
										 |  |  | 	__be64 di_generation;	/* generation number for NFS */ | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	__be32 di_flags;	/* GFS2_DIF_... */ | 
					
						
							|  |  |  | 	__be32 di_payload_format;  /* GFS2_FORMAT_... */ | 
					
						
							|  |  |  | 	__u16 __pad1;	/* Was ditype in gfs1 */ | 
					
						
							|  |  |  | 	__be16 di_height;	/* height of metadata */ | 
					
						
							|  |  |  | 	__u32 __pad2;	/* Unused incarnation number from gfs1 */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* These only apply to directories  */ | 
					
						
							|  |  |  | 	__u16 __pad3;	/* Padding */ | 
					
						
							|  |  |  | 	__be16 di_depth;	/* Number of bits in the table */ | 
					
						
							|  |  |  | 	__be32 di_entries;	/* The number of entries in the directory */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	struct gfs2_inum __pad4; /* Unused even in current gfs1 */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	__be64 di_eattr;	/* extended attribute block number */ | 
					
						
							| 
									
										
										
										
											2007-06-05 09:39:18 +01:00
										 |  |  | 	__be32 di_atime_nsec;   /* nsec portion of atime */ | 
					
						
							|  |  |  | 	__be32 di_mtime_nsec;   /* nsec portion of mtime */ | 
					
						
							|  |  |  | 	__be32 di_ctime_nsec;   /* nsec portion of ctime */ | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-05 09:39:18 +01:00
										 |  |  | 	__u8 di_reserved[44]; | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * directory structure - many of these per directory file | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_FNAMESIZE		255
 | 
					
						
							|  |  |  | #define GFS2_DIRENT_SIZE(name_len) ((sizeof(struct gfs2_dirent) + (name_len) + 7) & ~7)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct gfs2_dirent { | 
					
						
							|  |  |  | 	struct gfs2_inum de_inum; | 
					
						
							|  |  |  | 	__be32 de_hash; | 
					
						
							| 
									
										
										
										
											2006-02-13 16:21:47 +00:00
										 |  |  | 	__be16 de_rec_len; | 
					
						
							|  |  |  | 	__be16 de_name_len; | 
					
						
							|  |  |  | 	__be16 de_type; | 
					
						
							| 
									
										
											  
											
												GFS2: Add meta readahead field in directory entries
The intent of this new field in the directory entry is to
allow a subsequent lookup to know how many blocks, which
are contiguous with the inode, contain metadata which relates
to the inode. This will then allow the issuing of a single
read to read these blocks, rather than reading the inode
first, and then issuing a second read for the metadata.
This only works under some fairly strict conditions, since
we do not have back pointers from inodes to directory entries
we must ensure that the blocks referenced in this way will
always belong to the inode.
This rules out being able to use this system for indirect
blocks, as these can change as a result of truncate/rewrite.
So the idea here is to restrict this to xattr blocks only
for the time being. For most inodes, that means only a
single block. Also, when using ACLs and/or SELinux or
other LSMs, these will be added at inode creation time
so that they will be contiguous with the inode on disk and
also will almost always be needed when we read the inode in
for permissions checks.
Once an xattr block for an inode is allocated, it will never
change until the inode is deallocated.
This patch adds the new field, a further patch will add the
readahead in due course.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
											
										 
											2014-02-07 11:23:22 +00:00
										 |  |  | 	union { | 
					
						
							|  |  |  | 		__u8 __pad[14]; | 
					
						
							|  |  |  | 		struct { | 
					
						
							|  |  |  | 			__be16 de_rahead; | 
					
						
							|  |  |  | 			__u8 pad2[12]; | 
					
						
							|  |  |  | 		}; | 
					
						
							|  |  |  | 	}; | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Header of leaf directory nodes | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct gfs2_leaf { | 
					
						
							|  |  |  | 	struct gfs2_meta_header lf_header; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	__be16 lf_depth;		/* Depth of leaf */ | 
					
						
							|  |  |  | 	__be16 lf_entries;		/* Number of dirents in leaf */ | 
					
						
							|  |  |  | 	__be32 lf_dirent_format;	/* Format of the dirents */ | 
					
						
							|  |  |  | 	__be64 lf_next;			/* Next leaf, if overflow */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-08 12:14:57 +00:00
										 |  |  | 	union { | 
					
						
							|  |  |  | 		__u8 lf_reserved[64]; | 
					
						
							|  |  |  | 		struct { | 
					
						
							|  |  |  | 			__be64 lf_inode;	/* Dir inode number */ | 
					
						
							|  |  |  | 			__be32 lf_dist;		/* Dist from inode on chain */ | 
					
						
							|  |  |  | 			__be32 lf_nsec;		/* Last ins/del usecs */ | 
					
						
							|  |  |  | 			__be64 lf_sec;		/* Last ins/del in secs */ | 
					
						
							|  |  |  | 			__u8 lf_reserved2[40]; | 
					
						
							|  |  |  | 		}; | 
					
						
							|  |  |  | 	}; | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Extended attribute header format | 
					
						
							| 
									
										
										
										
											2009-08-25 13:44:04 +01:00
										 |  |  |  * | 
					
						
							|  |  |  |  * This works in a similar way to dirents. There is a fixed size header | 
					
						
							|  |  |  |  * followed by a variable length section made up of the name and the | 
					
						
							|  |  |  |  * associated data. In the case of a "stuffed" entry, the value is | 
					
						
							|  |  |  |  * inline directly after the name, the ea_num_ptrs entry will be | 
					
						
							|  |  |  |  * zero in that case. For non-"stuffed" entries, there will be | 
					
						
							|  |  |  |  * a set of pointers (aligned to 8 byte boundary) to the block(s) | 
					
						
							|  |  |  |  * containing the value. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The blocks containing the values and the blocks containing the | 
					
						
							|  |  |  |  * extended attribute headers themselves all start with the common | 
					
						
							|  |  |  |  * metadata header. Each inode, if it has extended attributes, will | 
					
						
							|  |  |  |  * have either a single block containing the extended attribute headers | 
					
						
							|  |  |  |  * or a single indirect block pointing to blocks containing the | 
					
						
							| 
									
										
										
										
											2014-02-04 15:45:11 +00:00
										 |  |  |  * extended attribute headers. | 
					
						
							| 
									
										
										
										
											2009-08-25 13:44:04 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-02-04 15:45:11 +00:00
										 |  |  |  * The maximum size of the data part of an extended attribute is 64k | 
					
						
							| 
									
										
										
										
											2009-08-25 13:44:04 +01:00
										 |  |  |  * so the number of blocks required depends upon block size. Since the | 
					
						
							|  |  |  |  * block size also determines the number of pointers in an indirect | 
					
						
							|  |  |  |  * block, its a fairly complicated calculation to work out the maximum | 
					
						
							|  |  |  |  * number of blocks that an inode may have relating to extended attributes. | 
					
						
							|  |  |  |  * | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_EA_MAX_NAME_LEN	255
 | 
					
						
							|  |  |  | #define GFS2_EA_MAX_DATA_LEN	65536
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_EATYPE_UNUSED	0
 | 
					
						
							|  |  |  | #define GFS2_EATYPE_USR		1
 | 
					
						
							|  |  |  | #define GFS2_EATYPE_SYS		2
 | 
					
						
							| 
									
										
										
										
											2006-05-22 10:08:35 -04:00
										 |  |  | #define GFS2_EATYPE_SECURITY	3
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-05-22 10:08:35 -04:00
										 |  |  | #define GFS2_EATYPE_LAST	3
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | #define GFS2_EATYPE_VALID(x)	((x) <= GFS2_EATYPE_LAST)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_EAFLAG_LAST	0x01	/* last ea in block */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct gfs2_ea_header { | 
					
						
							|  |  |  | 	__be32 ea_rec_len; | 
					
						
							|  |  |  | 	__be32 ea_data_len; | 
					
						
							|  |  |  | 	__u8 ea_name_len;	/* no NULL pointer after the string */ | 
					
						
							|  |  |  | 	__u8 ea_type;		/* GFS2_EATYPE_... */ | 
					
						
							|  |  |  | 	__u8 ea_flags;		/* GFS2_EAFLAG_... */ | 
					
						
							|  |  |  | 	__u8 ea_num_ptrs; | 
					
						
							|  |  |  | 	__u32 __pad; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Log header structure | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_LOG_HEAD_UNMOUNT	0x00000001	/* log is clean */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct gfs2_log_header { | 
					
						
							|  |  |  | 	struct gfs2_meta_header lh_header; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	__be64 lh_sequence;	/* Sequence number of this transaction */ | 
					
						
							|  |  |  | 	__be32 lh_flags;	/* GFS2_LOG_HEAD_... */ | 
					
						
							|  |  |  | 	__be32 lh_tail;		/* Block number of log tail */ | 
					
						
							|  |  |  | 	__be32 lh_blkno; | 
					
						
							|  |  |  | 	__be32 lh_hash; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Log type descriptor | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_LOG_DESC_METADATA	300
 | 
					
						
							|  |  |  | /* ld_data1 is the number of metadata blocks in the descriptor.
 | 
					
						
							|  |  |  |    ld_data2 is unused. */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_LOG_DESC_REVOKE	301
 | 
					
						
							|  |  |  | /* ld_data1 is the number of revoke blocks in the descriptor.
 | 
					
						
							|  |  |  |    ld_data2 is unused. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-02-08 11:50:51 +00:00
										 |  |  | #define GFS2_LOG_DESC_JDATA	302
 | 
					
						
							|  |  |  | /* ld_data1 is the number of data blocks in the descriptor.
 | 
					
						
							|  |  |  |    ld_data2 is unused. */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | struct gfs2_log_descriptor { | 
					
						
							|  |  |  | 	struct gfs2_meta_header ld_header; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	__be32 ld_type;		/* GFS2_LOG_DESC_... */ | 
					
						
							|  |  |  | 	__be32 ld_length;	/* Number of buffers in this chunk */ | 
					
						
							|  |  |  | 	__be32 ld_data1;	/* descriptor-specific field */ | 
					
						
							|  |  |  | 	__be32 ld_data2;	/* descriptor-specific field */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	__u8 ld_reserved[32]; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Inum Range | 
					
						
							|  |  |  |  * Describe a range of formal inode numbers allocated to | 
					
						
							|  |  |  |  * one machine to assign to inodes. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_INUM_QUANTUM	1048576
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct gfs2_inum_range { | 
					
						
							|  |  |  | 	__be64 ir_start; | 
					
						
							|  |  |  | 	__be64 ir_length; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Statfs change | 
					
						
							|  |  |  |  * Describes an change to the pool of free and allocated | 
					
						
							|  |  |  |  * blocks. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct gfs2_statfs_change { | 
					
						
							|  |  |  | 	__be64 sc_total; | 
					
						
							|  |  |  | 	__be64 sc_free; | 
					
						
							|  |  |  | 	__be64 sc_dinodes; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Quota change | 
					
						
							|  |  |  |  * Describes an allocation change for a particular | 
					
						
							|  |  |  |  * user or group. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define GFS2_QCF_USER		0x00000001
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct gfs2_quota_change { | 
					
						
							|  |  |  | 	__be64 qc_change; | 
					
						
							|  |  |  | 	__be32 qc_flags;	/* GFS2_QCF_... */ | 
					
						
							|  |  |  | 	__be32 qc_id; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-16 17:02:19 -05:00
										 |  |  | struct gfs2_quota_lvb { | 
					
						
							|  |  |  |         __be32 qb_magic; | 
					
						
							| 
									
										
										
										
											2007-05-22 09:00:24 +02:00
										 |  |  |         __u32 __pad; | 
					
						
							| 
									
										
										
										
											2007-05-16 17:02:19 -05:00
										 |  |  |         __be64 qb_limit;      /* Hard limit of # blocks to alloc */ | 
					
						
							|  |  |  |         __be64 qb_warn;       /* Warn user when alloc is above this # */ | 
					
						
							|  |  |  |         __be64 qb_value;       /* Current # blocks allocated */ | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-01-16 16:50:04 +00:00
										 |  |  | #endif /* __GFS2_ONDISK_DOT_H__ */
 |