| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2005-11-02 14:58:39 +11:00
										 |  |  |  * Copyright (c) 2000-2005 Silicon Graphics, Inc. | 
					
						
							|  |  |  |  * All Rights Reserved. | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2005-11-02 14:58:39 +11:00
										 |  |  |  * This program is free software; you can redistribute it and/or | 
					
						
							|  |  |  |  * modify it under the terms of the GNU General Public License as | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  * published by the Free Software Foundation. | 
					
						
							|  |  |  |  * | 
					
						
							| 
									
										
										
										
											2005-11-02 14:58:39 +11:00
										 |  |  |  * This program is distributed in the hope that it would be useful, | 
					
						
							|  |  |  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  |  * GNU General Public License for more details. | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2005-11-02 14:58:39 +11:00
										 |  |  |  * You should have received a copy of the GNU General Public License | 
					
						
							|  |  |  |  * along with this program; if not, write the Free Software Foundation, | 
					
						
							|  |  |  |  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  */ | 
					
						
							|  |  |  | #ifndef __XFS_MOUNT_H__
 | 
					
						
							|  |  |  | #define	__XFS_MOUNT_H__
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-08-12 20:49:32 +10:00
										 |  |  | #ifdef __KERNEL__
 | 
					
						
							| 
									
										
										
										
											2008-10-30 17:05:38 +11:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-06-14 09:22:15 -05:00
										 |  |  | struct xlog; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | struct xfs_inode; | 
					
						
							| 
									
										
										
										
											2007-07-11 11:09:12 +10:00
										 |  |  | struct xfs_mru_cache; | 
					
						
							| 
									
										
										
										
											2008-05-21 16:41:01 +10:00
										 |  |  | struct xfs_nameops; | 
					
						
							| 
									
										
										
										
											2008-10-30 17:38:26 +11:00
										 |  |  | struct xfs_ail; | 
					
						
							| 
									
										
										
										
											2009-06-08 15:33:32 +02:00
										 |  |  | struct xfs_quotainfo; | 
					
						
							| 
									
										
										
										
											2013-10-29 22:11:46 +11:00
										 |  |  | struct xfs_dir_ops; | 
					
						
							| 
									
										
										
										
											2009-06-08 15:33:32 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-03-14 13:13:09 +11:00
										 |  |  | #ifdef HAVE_PERCPU_SB
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Valid per-cpu incore superblock counters. Note that if you add new counters, | 
					
						
							|  |  |  |  * you may need to define new counter disabled bit field descriptors as there | 
					
						
							|  |  |  |  * are more possible fields in the superblock that can fit in a bitfield on a | 
					
						
							|  |  |  |  * 32 bit platform. The XFS_SBS_* values for the current current counters just | 
					
						
							|  |  |  |  * fit. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | typedef struct xfs_icsb_cnts { | 
					
						
							|  |  |  | 	uint64_t	icsb_fdblocks; | 
					
						
							|  |  |  | 	uint64_t	icsb_ifree; | 
					
						
							|  |  |  | 	uint64_t	icsb_icount; | 
					
						
							| 
									
										
										
										
											2006-03-14 13:29:16 +11:00
										 |  |  | 	unsigned long	icsb_flags; | 
					
						
							| 
									
										
										
										
											2006-03-14 13:13:09 +11:00
										 |  |  | } xfs_icsb_cnts_t; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-03-14 13:29:16 +11:00
										 |  |  | #define XFS_ICSB_FLAG_LOCK	(1 << 0)	/* counter lock bit */
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-03-14 13:13:09 +11:00
										 |  |  | #define XFS_ICSB_LAZY_COUNT	(1 << 1)	/* accuracy not needed */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | extern int	xfs_icsb_init_counters(struct xfs_mount *); | 
					
						
							| 
									
										
										
										
											2007-02-10 18:36:29 +11:00
										 |  |  | extern void	xfs_icsb_reinit_counters(struct xfs_mount *); | 
					
						
							| 
									
										
										
										
											2008-05-20 15:10:52 +10:00
										 |  |  | extern void	xfs_icsb_destroy_counters(struct xfs_mount *); | 
					
						
							| 
									
										
										
										
											2008-04-22 17:34:37 +10:00
										 |  |  | extern void	xfs_icsb_sync_counters(struct xfs_mount *, int); | 
					
						
							|  |  |  | extern void	xfs_icsb_sync_counters_locked(struct xfs_mount *, int); | 
					
						
							| 
									
										
										
										
											2010-09-30 02:25:55 +00:00
										 |  |  | extern int	xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t, | 
					
						
							|  |  |  | 						int64_t, int); | 
					
						
							| 
									
										
										
										
											2006-03-14 13:13:09 +11:00
										 |  |  | 
 | 
					
						
							|  |  |  | #else
 | 
					
						
							| 
									
										
										
										
											2008-05-20 15:10:52 +10:00
										 |  |  | #define xfs_icsb_init_counters(mp)		(0)
 | 
					
						
							|  |  |  | #define xfs_icsb_destroy_counters(mp)		do { } while (0)
 | 
					
						
							|  |  |  | #define xfs_icsb_reinit_counters(mp)		do { } while (0)
 | 
					
						
							| 
									
										
										
										
											2008-04-22 17:34:37 +10:00
										 |  |  | #define xfs_icsb_sync_counters(mp, flags)	do { } while (0)
 | 
					
						
							| 
									
										
										
										
											2008-04-29 12:53:00 +10:00
										 |  |  | #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
 | 
					
						
							| 
									
										
										
										
											2010-09-30 02:25:55 +00:00
										 |  |  | #define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
 | 
					
						
							|  |  |  | 	xfs_mod_incore_sb(mp, field, delta, rsvd) | 
					
						
							| 
									
										
										
										
											2006-03-14 13:13:09 +11:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-01-04 11:35:03 +11:00
										 |  |  | /* dynamic preallocation free space thresholds, 5% down to 1% */ | 
					
						
							|  |  |  | enum { | 
					
						
							|  |  |  | 	XFS_LOWSP_1_PCNT = 0, | 
					
						
							|  |  |  | 	XFS_LOWSP_2_PCNT, | 
					
						
							|  |  |  | 	XFS_LOWSP_3_PCNT, | 
					
						
							|  |  |  | 	XFS_LOWSP_4_PCNT, | 
					
						
							|  |  |  | 	XFS_LOWSP_5_PCNT, | 
					
						
							|  |  |  | 	XFS_LOWSP_MAX, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | typedef struct xfs_mount { | 
					
						
							| 
									
										
										
										
											2007-08-30 17:21:30 +10:00
										 |  |  | 	struct super_block	*m_super; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	xfs_tid_t		m_tid;		/* next unused tid for fs */ | 
					
						
							| 
									
										
										
										
											2008-10-30 17:38:26 +11:00
										 |  |  | 	struct xfs_ail		*m_ail;		/* fs active log item list */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	xfs_sb_t		m_sb;		/* copy of fs superblock */ | 
					
						
							| 
									
										
										
										
											2007-10-11 17:42:32 +10:00
										 |  |  | 	spinlock_t		m_sb_lock;	/* sb counter lock */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	struct xfs_buf		*m_sb_bp;	/* buffer for superblock */ | 
					
						
							|  |  |  | 	char			*m_fsname;	/* filesystem name */ | 
					
						
							|  |  |  | 	int			m_fsname_len;	/* strlen of fs name */ | 
					
						
							| 
									
										
										
										
											2005-11-02 11:44:33 +11:00
										 |  |  | 	char			*m_rtname;	/* realtime device name */ | 
					
						
							|  |  |  | 	char			*m_logname;	/* external log device name */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int			m_bsize;	/* fs logical block size */ | 
					
						
							|  |  |  | 	xfs_agnumber_t		m_agfrotor;	/* last ag where space found */ | 
					
						
							|  |  |  | 	xfs_agnumber_t		m_agirotor;	/* last ag dir inode alloced */ | 
					
						
							| 
									
										
										
										
											2007-10-11 17:43:43 +10:00
										 |  |  | 	spinlock_t		m_agirotor_lock;/* .. and lock protecting it */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	xfs_agnumber_t		m_maxagi;	/* highest inode alloc group */ | 
					
						
							|  |  |  | 	uint			m_readio_log;	/* min read size log bytes */ | 
					
						
							|  |  |  | 	uint			m_readio_blocks; /* min read size blocks */ | 
					
						
							|  |  |  | 	uint			m_writeio_log;	/* min write size log bytes */ | 
					
						
							|  |  |  | 	uint			m_writeio_blocks; /* min write size blocks */ | 
					
						
							| 
									
										
										
										
											2012-06-14 09:22:15 -05:00
										 |  |  | 	struct xlog		*m_log;		/* log specific stuff */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int			m_logbufs;	/* number of log buffers */ | 
					
						
							|  |  |  | 	int			m_logbsize;	/* size of each log buffer */ | 
					
						
							|  |  |  | 	uint			m_rsumlevels;	/* rt summary levels */ | 
					
						
							|  |  |  | 	uint			m_rsumsize;	/* size of rt summary, bytes */ | 
					
						
							|  |  |  | 	struct xfs_inode	*m_rbmip;	/* pointer to bitmap inode */ | 
					
						
							|  |  |  | 	struct xfs_inode	*m_rsumip;	/* pointer to summary inode */ | 
					
						
							|  |  |  | 	struct xfs_inode	*m_rootip;	/* pointer to root directory */ | 
					
						
							|  |  |  | 	struct xfs_quotainfo	*m_quotainfo;	/* disk quota information */ | 
					
						
							|  |  |  | 	xfs_buftarg_t		*m_ddev_targp;	/* saves taking the address */ | 
					
						
							|  |  |  | 	xfs_buftarg_t		*m_logdev_targp;/* ptr to log device */ | 
					
						
							|  |  |  | 	xfs_buftarg_t		*m_rtdev_targp;	/* ptr to rt device */ | 
					
						
							|  |  |  | 	__uint8_t		m_blkbit_log;	/* blocklog + NBBY */ | 
					
						
							|  |  |  | 	__uint8_t		m_blkbb_log;	/* blocklog - BBSHIFT */ | 
					
						
							|  |  |  | 	__uint8_t		m_agno_log;	/* log #ag's */ | 
					
						
							|  |  |  | 	__uint8_t		m_agino_log;	/* #bits for agino in inum */ | 
					
						
							| 
									
										
											  
											
												xfs: increase inode cluster size for v5 filesystems
v5 filesystems use 512 byte inodes as a minimum, so read inodes in
clusters that are effectively half the size of a v4 filesystem with
256 byte inodes. For v5 fielsystems, scale the inode cluster size
with the size of the inode so that we keep a constant 32 inodes per
cluster ratio for all inode IO.
This only works if mkfs.xfs sets the inode alignment appropriately
for larger inode clusters, so this functionality is made conditional
on mkfs doing the right thing. xfs_repair needs to know about
the inode alignment changes, too.
Wall time:
	create	bulkstat	find+stat	ls -R	unlink
v4	237s	161s		173s		201s	299s
v5	235s	163s		205s		 31s	356s
patched	234s	160s		182s		 29s	317s
System time:
	create	bulkstat	find+stat	ls -R	unlink
v4	2601s	2490s		1653s		1656s	2960s
v5	2637s	2497s		1681s		  20s	3216s
patched	2613s	2451s		1658s		  20s	3007s
So, wall time same or down across the board, system time same or
down across the board, and cache hit rates all improve except for
the ls -R case which is a pure cold cache directory read workload
on v5 filesystems...
So, this patch removes most of the performance and CPU usage
differential between v4 and v5 filesystems on traversal related
workloads.
Note: while this patch is currently for v5 filesystems only, there
is no reason it can't be ported back to v4 filesystems.  This hasn't
been done here because bringing the code back to v4 requires
forwards and backwards kernel compatibility testing.  i.e. to
deterine if older kernels(*) do the right thing with larger inode
alignments but still only using 8k inode cluster sizes. None of this
testing and validation on v4 filesystems has been done, so for the
moment larger inode clusters is limited to v5 superblocks.
(*) a current default config v4 filesystem should mount just fine on
2.6.23 (when lazy-count support was introduced), and so if we change
the alignment emitted by mkfs without a feature bit then we have to
make sure it works properly on all kernels since 2.6.23. And if we
allow it to be changed when the lazy-count bit is not set, then it's
all kernels since v2 logs were introduced that need to be tested for
compatibility...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
											
										 
											2013-11-01 15:27:20 +11:00
										 |  |  | 	uint			m_inode_cluster_size;/* min inode buf size */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	uint			m_blockmask;	/* sb_blocksize-1 */ | 
					
						
							|  |  |  | 	uint			m_blockwsize;	/* sb_blocksize in words */ | 
					
						
							|  |  |  | 	uint			m_blockwmask;	/* blockwsize-1 */ | 
					
						
							| 
									
										
										
										
											2008-10-30 17:11:19 +11:00
										 |  |  | 	uint			m_alloc_mxr[2];	/* max alloc btree records */ | 
					
						
							|  |  |  | 	uint			m_alloc_mnr[2];	/* min alloc btree records */ | 
					
						
							|  |  |  | 	uint			m_bmap_dmxr[2];	/* max bmap btree records */ | 
					
						
							|  |  |  | 	uint			m_bmap_dmnr[2];	/* min bmap btree records */ | 
					
						
							|  |  |  | 	uint			m_inobt_mxr[2];	/* max inobt btree records */ | 
					
						
							|  |  |  | 	uint			m_inobt_mnr[2];	/* min inobt btree records */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	uint			m_ag_maxlevels;	/* XFS_AG_MAXLEVELS */ | 
					
						
							|  |  |  | 	uint			m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ | 
					
						
							| 
									
										
										
										
											2009-02-09 08:37:14 +01:00
										 |  |  | 	uint			m_in_maxlevels;	/* max inobt btree levels. */ | 
					
						
							| 
									
										
										
										
											2010-01-11 11:47:44 +00:00
										 |  |  | 	struct radix_tree_root	m_perag_tree;	/* per-ag accounting info */ | 
					
						
							|  |  |  | 	spinlock_t		m_perag_lock;	/* lock for m_perag_tree */ | 
					
						
							| 
									
										
										
										
											2007-08-30 17:21:54 +10:00
										 |  |  | 	struct mutex		m_growlock;	/* growfs mutex */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int			m_fixedfsid[2];	/* unchanged for life of FS */ | 
					
						
							|  |  |  | 	uint			m_dmevmask;	/* DMI events for this FS */ | 
					
						
							| 
									
										
										
										
											2005-11-02 15:09:22 +11:00
										 |  |  | 	__uint64_t		m_flags;	/* global mount flags */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	uint			m_dir_node_ents; /* #entries in a dir danode */ | 
					
						
							|  |  |  | 	uint			m_attr_node_ents; /* #entries in attr danode */ | 
					
						
							|  |  |  | 	int			m_ialloc_inos;	/* inodes in inode allocation */ | 
					
						
							|  |  |  | 	int			m_ialloc_blks;	/* blocks in inode allocation */ | 
					
						
							|  |  |  | 	int			m_inoalign_mask;/* mask sb_inoalignmt if used */ | 
					
						
							|  |  |  | 	uint			m_qflags;	/* quota status flags */ | 
					
						
							| 
									
										
										
										
											2013-08-12 20:49:56 +10:00
										 |  |  | 	struct xfs_trans_resv	m_resv;		/* precomputed res values */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	__uint64_t		m_maxicount;	/* maximum inode count */ | 
					
						
							|  |  |  | 	__uint64_t		m_resblks;	/* total reserved blocks */ | 
					
						
							|  |  |  | 	__uint64_t		m_resblks_avail;/* available reserved blocks */ | 
					
						
							| 
									
										
										
											
												xfs: don't hold onto reserved blocks on remount,ro
If we hold onto reserved blocks when doing a remount,ro we end
up writing the blocks used count to disk that includes the reserved
blocks. Reserved blocks are not actually used, so this results in
the values in the superblock being incorrect.
Hence if we run xfs_check or xfs_repair -n while the filesystem is
mounted remount,ro we end up with an inconsistent filesystem being
reported. Also, running xfs_copy on the remount,ro filesystem will
result in an inconsistent image being generated.
To fix this, unreserve the blocks when doing the remount,ro, and
reserved them again on remount,rw. This way a remount,ro filesystem
will appear consistent on disk to all utilities.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
											
										 
											2010-01-26 15:08:49 +11:00
										 |  |  | 	__uint64_t		m_resblks_save;	/* reserved blks @ remount,ro */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int			m_dalign;	/* stripe unit */ | 
					
						
							|  |  |  | 	int			m_swidth;	/* stripe width */ | 
					
						
							| 
									
										
										
										
											2006-03-29 08:55:14 +10:00
										 |  |  | 	int			m_sinoalign;	/* stripe unit inode alignment */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int			m_attr_magicpct;/* 37% of the blocksize */ | 
					
						
							|  |  |  | 	int			m_dir_magicpct;	/* 37% of the dir blocksize */ | 
					
						
							|  |  |  | 	__uint8_t		m_sectbb_log;	/* sectlog - BBSHIFT */ | 
					
						
							| 
									
										
										
										
											2008-05-21 16:41:01 +10:00
										 |  |  | 	const struct xfs_nameops *m_dirnameops;	/* vector of dir name ops */ | 
					
						
							| 
									
										
										
										
											2013-10-29 22:11:46 +11:00
										 |  |  | 	const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */ | 
					
						
							| 
									
										
										
										
											2013-10-29 22:11:51 +11:00
										 |  |  | 	const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int			m_dirblksize;	/* directory block sz--bytes */ | 
					
						
							|  |  |  | 	int			m_dirblkfsbs;	/* directory block sz--fsbs */ | 
					
						
							|  |  |  | 	xfs_dablk_t		m_dirdatablk;	/* blockno of dir data v2 */ | 
					
						
							|  |  |  | 	xfs_dablk_t		m_dirleafblk;	/* blockno of dir non-data v2 */ | 
					
						
							|  |  |  | 	xfs_dablk_t		m_dirfreeblk;	/* blockno of dirfreeindex v2 */ | 
					
						
							|  |  |  | 	uint			m_chsize;	/* size of next field */ | 
					
						
							|  |  |  | 	atomic_t		m_active_trans;	/* number trans frozen */ | 
					
						
							| 
									
										
										
										
											2006-03-14 13:13:09 +11:00
										 |  |  | #ifdef HAVE_PERCPU_SB
 | 
					
						
							| 
									
										
										
										
											2010-02-02 14:39:01 +09:00
										 |  |  | 	xfs_icsb_cnts_t __percpu *m_sb_cnts;	/* per-cpu superblock counters */ | 
					
						
							| 
									
										
										
										
											2006-03-14 13:13:09 +11:00
										 |  |  | 	unsigned long		m_icsb_counters; /* disabled per-cpu counters */ | 
					
						
							| 
									
										
										
										
											2006-03-14 13:23:52 +11:00
										 |  |  | 	struct notifier_block	m_icsb_notifier; /* hotplug cpu notifier */ | 
					
						
							| 
									
										
										
										
											2007-02-10 18:35:09 +11:00
										 |  |  | 	struct mutex		m_icsb_mutex;	/* balancer sync lock */ | 
					
						
							| 
									
										
										
										
											2006-03-14 13:13:09 +11:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2007-07-11 11:09:12 +10:00
										 |  |  | 	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */ | 
					
						
							| 
									
										
										
										
											2011-04-08 12:45:07 +10:00
										 |  |  | 	struct delayed_work	m_reclaim_work;	/* background inode reclaim */ | 
					
						
							| 
									
										
										
										
											2012-11-06 09:50:47 -05:00
										 |  |  | 	struct delayed_work	m_eofblocks_work; /* background eof blocks
 | 
					
						
							|  |  |  | 						     trimming */ | 
					
						
							| 
									
										
										
										
											2009-01-19 02:04:07 +01:00
										 |  |  | 	__int64_t		m_update_flags;	/* sb flags we need to update
 | 
					
						
							|  |  |  | 						   on the next remount,rw */ | 
					
						
							| 
									
										
										
										
											2011-01-04 11:35:03 +11:00
										 |  |  | 	int64_t			m_low_space[XFS_LOWSP_MAX]; | 
					
						
							|  |  |  | 						/* low free space thresholds */ | 
					
						
							| 
									
										
										
										
											2012-02-29 09:53:48 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	struct workqueue_struct	*m_data_workqueue; | 
					
						
							|  |  |  | 	struct workqueue_struct	*m_unwritten_workqueue; | 
					
						
							| 
									
										
										
										
											2012-04-23 17:54:32 +10:00
										 |  |  | 	struct workqueue_struct	*m_cil_workqueue; | 
					
						
							| 
									
										
										
										
											2012-10-08 21:56:05 +11:00
										 |  |  | 	struct workqueue_struct	*m_reclaim_workqueue; | 
					
						
							|  |  |  | 	struct workqueue_struct	*m_log_workqueue; | 
					
						
							| 
									
										
										
										
											2012-11-06 09:50:47 -05:00
										 |  |  | 	struct workqueue_struct *m_eofblocks_workqueue; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | } xfs_mount_t; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Flags for m_flags. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
											  
											
												[XFS] Lazy Superblock Counters
When we have a couple of hundred transactions on the fly at once, they all
typically modify the on disk superblock in some way.
create/unclink/mkdir/rmdir modify inode counts, allocation/freeing modify
free block counts.
When these counts are modified in a transaction, they must eventually lock
the superblock buffer and apply the mods. The buffer then remains locked
until the transaction is committed into the incore log buffer. The result
of this is that with enough transactions on the fly the incore superblock
buffer becomes a bottleneck.
The result of contention on the incore superblock buffer is that
transaction rates fall - the more pressure that is put on the superblock
buffer, the slower things go.
The key to removing the contention is to not require the superblock fields
in question to be locked. We do that by not marking the superblock dirty
in the transaction. IOWs, we modify the incore superblock but do not
modify the cached superblock buffer. In short, we do not log superblock
modifications to critical fields in the superblock on every transaction.
In fact we only do it just before we write the superblock to disk every
sync period or just before unmount.
This creates an interesting problem - if we don't log or write out the
fields in every transaction, then how do the values get recovered after a
crash? the answer is simple - we keep enough duplicate, logged information
in other structures that we can reconstruct the correct count after log
recovery has been performed.
It is the AGF and AGI structures that contain the duplicate information;
after recovery, we walk every AGI and AGF and sum their individual
counters to get the correct value, and we do a transaction into the log to
correct them. An optimisation of this is that if we have a clean unmount
record, we know the value in the superblock is correct, so we can avoid
the summation walk under normal conditions and so mount/recovery times do
not change under normal operation.
One wrinkle that was discovered during development was that the blocks
used in the freespace btrees are never accounted for in the AGF counters.
This was once a valid optimisation to make; when the filesystem is full,
the free space btrees are empty and consume no space. Hence when it
matters, the "accounting" is correct. But that means the when we do the
AGF summations, we would not have a correct count and xfs_check would
complain. Hence a new counter was added to track the number of blocks used
by the free space btrees. This is an *on-disk format change*.
As a result of this, lazy superblock counters are a mkfs option and at the
moment on linux there is no way to convert an old filesystem. This is
possible - xfs_db can be used to twiddle the right bits and then
xfs_repair will do the format conversion for you. Similarly, you can
convert backwards as well. At some point we'll add functionality to
xfs_admin to do the bit twiddling easily....
SGI-PV: 964999
SGI-Modid: xfs-linux-melb:xfs-kern:28652a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
											
										 
											2007-05-24 15:26:31 +10:00
										 |  |  | #define XFS_MOUNT_WSYNC		(1ULL << 0)	/* for nfs - all metadata ops
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 						   must be synchronous except | 
					
						
							|  |  |  | 						   for space allocations */ | 
					
						
							| 
									
										
											  
											
												[XFS] Lazy Superblock Counters
When we have a couple of hundred transactions on the fly at once, they all
typically modify the on disk superblock in some way.
create/unclink/mkdir/rmdir modify inode counts, allocation/freeing modify
free block counts.
When these counts are modified in a transaction, they must eventually lock
the superblock buffer and apply the mods. The buffer then remains locked
until the transaction is committed into the incore log buffer. The result
of this is that with enough transactions on the fly the incore superblock
buffer becomes a bottleneck.
The result of contention on the incore superblock buffer is that
transaction rates fall - the more pressure that is put on the superblock
buffer, the slower things go.
The key to removing the contention is to not require the superblock fields
in question to be locked. We do that by not marking the superblock dirty
in the transaction. IOWs, we modify the incore superblock but do not
modify the cached superblock buffer. In short, we do not log superblock
modifications to critical fields in the superblock on every transaction.
In fact we only do it just before we write the superblock to disk every
sync period or just before unmount.
This creates an interesting problem - if we don't log or write out the
fields in every transaction, then how do the values get recovered after a
crash? the answer is simple - we keep enough duplicate, logged information
in other structures that we can reconstruct the correct count after log
recovery has been performed.
It is the AGF and AGI structures that contain the duplicate information;
after recovery, we walk every AGI and AGF and sum their individual
counters to get the correct value, and we do a transaction into the log to
correct them. An optimisation of this is that if we have a clean unmount
record, we know the value in the superblock is correct, so we can avoid
the summation walk under normal conditions and so mount/recovery times do
not change under normal operation.
One wrinkle that was discovered during development was that the blocks
used in the freespace btrees are never accounted for in the AGF counters.
This was once a valid optimisation to make; when the filesystem is full,
the free space btrees are empty and consume no space. Hence when it
matters, the "accounting" is correct. But that means the when we do the
AGF summations, we would not have a correct count and xfs_check would
complain. Hence a new counter was added to track the number of blocks used
by the free space btrees. This is an *on-disk format change*.
As a result of this, lazy superblock counters are a mkfs option and at the
moment on linux there is no way to convert an old filesystem. This is
possible - xfs_db can be used to twiddle the right bits and then
xfs_repair will do the format conversion for you. Similarly, you can
convert backwards as well. At some point we'll add functionality to
xfs_admin to do the bit twiddling easily....
SGI-PV: 964999
SGI-Modid: xfs-linux-melb:xfs-kern:28652a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
											
										 
											2007-05-24 15:26:31 +10:00
										 |  |  | #define XFS_MOUNT_WAS_CLEAN	(1ULL << 3)
 | 
					
						
							| 
									
										
										
										
											2005-11-02 15:09:22 +11:00
										 |  |  | #define XFS_MOUNT_FS_SHUTDOWN	(1ULL << 4)	/* atomic stop of all filesystem
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 						   operations, typically for | 
					
						
							|  |  |  | 						   disk errors in metadata */ | 
					
						
							| 
									
										
										
										
											2011-05-20 13:45:32 +00:00
										 |  |  | #define XFS_MOUNT_DISCARD	(1ULL << 5)	/* discard unused blocks */
 | 
					
						
							| 
									
										
										
										
											2005-11-02 15:09:22 +11:00
										 |  |  | #define XFS_MOUNT_NOALIGN	(1ULL << 7)	/* turn off stripe alignment
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 						   allocations */ | 
					
						
							| 
									
										
										
										
											2006-01-11 15:32:01 +11:00
										 |  |  | #define XFS_MOUNT_ATTR2		(1ULL << 8)	/* allow use of attr2 format */
 | 
					
						
							| 
									
										
										
										
											2007-08-30 17:21:12 +10:00
										 |  |  | #define XFS_MOUNT_GRPID		(1ULL << 9)	/* group-ID assigned from directory */
 | 
					
						
							| 
									
										
										
										
											2005-11-02 15:09:22 +11:00
										 |  |  | #define XFS_MOUNT_NORECOVERY	(1ULL << 10)	/* no recovery - dirty fs */
 | 
					
						
							|  |  |  | #define XFS_MOUNT_DFLT_IOSIZE	(1ULL << 12)	/* set default i/o size */
 | 
					
						
							|  |  |  | #define XFS_MOUNT_32BITINODES	(1ULL << 14)	/* do not create inodes above
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 						 * 32 bits in size */ | 
					
						
							| 
									
										
										
										
											2007-08-30 17:21:12 +10:00
										 |  |  | #define XFS_MOUNT_SMALL_INUMS	(1ULL << 15)	/* users wants 32bit inodes */
 | 
					
						
							| 
									
										
										
										
											2005-11-02 15:09:22 +11:00
										 |  |  | #define XFS_MOUNT_NOUUID	(1ULL << 16)	/* ignore uuid during mount */
 | 
					
						
							|  |  |  | #define XFS_MOUNT_BARRIER	(1ULL << 17)
 | 
					
						
							| 
									
										
										
										
											2008-02-29 13:58:40 +11:00
										 |  |  | #define XFS_MOUNT_IKEEP		(1ULL << 18)	/* keep empty inode clusters*/
 | 
					
						
							| 
									
										
										
										
											2005-11-02 15:09:22 +11:00
										 |  |  | #define XFS_MOUNT_SWALLOC	(1ULL << 19)	/* turn on stripe width
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 						 * allocation */ | 
					
						
							| 
									
										
										
										
											2007-08-30 17:21:12 +10:00
										 |  |  | #define XFS_MOUNT_RDONLY	(1ULL << 20)	/* read-only fs */
 | 
					
						
							| 
									
										
										
										
											2005-11-02 15:09:22 +11:00
										 |  |  | #define XFS_MOUNT_DIRSYNC	(1ULL << 21)	/* synchronous directory ops */
 | 
					
						
							|  |  |  | #define XFS_MOUNT_COMPAT_IOSIZE	(1ULL << 22)	/* don't report large preferred
 | 
					
						
							| 
									
										
										
										
											2005-11-02 10:33:05 +11:00
										 |  |  | 						 * I/O size in stat() */ | 
					
						
							| 
									
										
										
										
											2007-07-11 11:09:12 +10:00
										 |  |  | #define XFS_MOUNT_FILESTREAMS	(1ULL << 24)	/* enable the filestreams
 | 
					
						
							|  |  |  | 						   allocator */ | 
					
						
							| 
									
										
										
										
											2008-04-30 18:15:28 +10:00
										 |  |  | #define XFS_MOUNT_NOATTR2	(1ULL << 25)	/* disable use of attr2 format */
 | 
					
						
							| 
									
										
										
										
											2005-11-02 10:33:05 +11:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Default minimum read and write sizes. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | #define XFS_READIO_LOG_LARGE	16
 | 
					
						
							|  |  |  | #define XFS_WRITEIO_LOG_LARGE	16
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2005-05-05 13:28:29 -07:00
										 |  |  |  * Max and min values for mount-option defined I/O | 
					
						
							|  |  |  |  * preallocation sizes. | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  */ | 
					
						
							| 
									
										
										
										
											2005-05-05 13:28:29 -07:00
										 |  |  | #define XFS_MAX_IO_LOG		30	/* 1G */
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #define XFS_MIN_IO_LOG		PAGE_SHIFT
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Synchronous read and write sizes.  This should be | 
					
						
							|  |  |  |  * better for NFSv2 wsync filesystems. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2009-03-29 09:55:42 +02:00
										 |  |  | #define	XFS_WSYNC_READIO_LOG	15	/* 32k */
 | 
					
						
							|  |  |  | #define	XFS_WSYNC_WRITEIO_LOG	14	/* 16k */
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-11-02 10:33:05 +11:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Allow large block sizes to be reported to userspace programs if the | 
					
						
							| 
									
										
										
										
											2008-04-10 12:22:07 +10:00
										 |  |  |  * "largeio" mount option is used. | 
					
						
							| 
									
										
										
										
											2005-11-02 10:33:05 +11:00
										 |  |  |  * | 
					
						
							|  |  |  |  * If compatibility mode is specified, simply return the basic unit of caching | 
					
						
							|  |  |  |  * so that we don't get inefficient read/modify/write I/O from user apps. | 
					
						
							|  |  |  |  * Otherwise.... | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * If the underlying volume is a stripe, then return the stripe width in bytes | 
					
						
							|  |  |  |  * as the recommended I/O size. It is not a stripe and we've set a default | 
					
						
							|  |  |  |  * buffered I/O size, return that, otherwise return the compat default. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline unsigned long | 
					
						
							|  |  |  | xfs_preferred_iosize(xfs_mount_t *mp) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE) | 
					
						
							|  |  |  | 		return PAGE_CACHE_SIZE; | 
					
						
							|  |  |  | 	return (mp->m_swidth ? | 
					
						
							|  |  |  | 		(mp->m_swidth << mp->m_sb.sb_blocklog) : | 
					
						
							|  |  |  | 		((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ? | 
					
						
							|  |  |  | 			(1 << (int)MAX(mp->m_readio_log, mp->m_writeio_log)) : | 
					
						
							|  |  |  | 			PAGE_CACHE_SIZE)); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												[XFS] Lazy Superblock Counters
When we have a couple of hundred transactions on the fly at once, they all
typically modify the on disk superblock in some way.
create/unclink/mkdir/rmdir modify inode counts, allocation/freeing modify
free block counts.
When these counts are modified in a transaction, they must eventually lock
the superblock buffer and apply the mods. The buffer then remains locked
until the transaction is committed into the incore log buffer. The result
of this is that with enough transactions on the fly the incore superblock
buffer becomes a bottleneck.
The result of contention on the incore superblock buffer is that
transaction rates fall - the more pressure that is put on the superblock
buffer, the slower things go.
The key to removing the contention is to not require the superblock fields
in question to be locked. We do that by not marking the superblock dirty
in the transaction. IOWs, we modify the incore superblock but do not
modify the cached superblock buffer. In short, we do not log superblock
modifications to critical fields in the superblock on every transaction.
In fact we only do it just before we write the superblock to disk every
sync period or just before unmount.
This creates an interesting problem - if we don't log or write out the
fields in every transaction, then how do the values get recovered after a
crash? the answer is simple - we keep enough duplicate, logged information
in other structures that we can reconstruct the correct count after log
recovery has been performed.
It is the AGF and AGI structures that contain the duplicate information;
after recovery, we walk every AGI and AGF and sum their individual
counters to get the correct value, and we do a transaction into the log to
correct them. An optimisation of this is that if we have a clean unmount
record, we know the value in the superblock is correct, so we can avoid
the summation walk under normal conditions and so mount/recovery times do
not change under normal operation.
One wrinkle that was discovered during development was that the blocks
used in the freespace btrees are never accounted for in the AGF counters.
This was once a valid optimisation to make; when the filesystem is full,
the free space btrees are empty and consume no space. Hence when it
matters, the "accounting" is correct. But that means the when we do the
AGF summations, we would not have a correct count and xfs_check would
complain. Hence a new counter was added to track the number of blocks used
by the free space btrees. This is an *on-disk format change*.
As a result of this, lazy superblock counters are a mkfs option and at the
moment on linux there is no way to convert an old filesystem. This is
possible - xfs_db can be used to twiddle the right bits and then
xfs_repair will do the format conversion for you. Similarly, you can
convert backwards as well. At some point we'll add functionality to
xfs_admin to do the bit twiddling easily....
SGI-PV: 964999
SGI-Modid: xfs-linux-melb:xfs-kern:28652a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
											
										 
											2007-05-24 15:26:31 +10:00
										 |  |  | #define XFS_LAST_UNMOUNT_WAS_CLEAN(mp)	\
 | 
					
						
							|  |  |  | 				((mp)->m_flags & XFS_MOUNT_WAS_CLEAN) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #define XFS_FORCED_SHUTDOWN(mp)	((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
 | 
					
						
							| 
									
										
										
										
											2007-08-30 17:20:39 +10:00
										 |  |  | void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, | 
					
						
							|  |  |  | 		int lnnum); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #define xfs_force_shutdown(m,f)	\
 | 
					
						
							| 
									
										
										
										
											2007-08-30 17:20:39 +10:00
										 |  |  | 	xfs_do_force_shutdown(m, f, __FILE__, __LINE__) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-11-28 14:23:36 +11:00
										 |  |  | #define SHUTDOWN_META_IO_ERROR	0x0001	/* write attempt to metadata failed */
 | 
					
						
							|  |  |  | #define SHUTDOWN_LOG_IO_ERROR	0x0002	/* write attempt to the log failed */
 | 
					
						
							|  |  |  | #define SHUTDOWN_FORCE_UMOUNT	0x0004	/* shutdown from a forced unmount */
 | 
					
						
							|  |  |  | #define SHUTDOWN_CORRUPT_INCORE	0x0008	/* corrupt in-memory data structures */
 | 
					
						
							|  |  |  | #define SHUTDOWN_REMOTE_REQ	0x0010	/* shutdown came from remote cell */
 | 
					
						
							|  |  |  | #define SHUTDOWN_DEVICE_REQ	0x0020	/* failed all paths to the device */
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Flags for xfs_mountfs | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2006-03-31 13:04:17 +10:00
										 |  |  | #define XFS_MFSI_QUIET		0x40	/* Be silent if mount errors found */
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-11-02 14:38:42 +11:00
										 |  |  | static inline xfs_agnumber_t | 
					
						
							|  |  |  | xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2005-11-02 14:38:42 +11:00
										 |  |  | 	xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d); | 
					
						
							|  |  |  | 	do_div(ld, mp->m_sb.sb_agblocks); | 
					
						
							|  |  |  | 	return (xfs_agnumber_t) ld; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-11-02 14:38:42 +11:00
										 |  |  | static inline xfs_agblock_t | 
					
						
							|  |  |  | xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2005-11-02 14:38:42 +11:00
										 |  |  | 	xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d); | 
					
						
							|  |  |  | 	return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-02-10 18:35:15 +11:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Per-cpu superblock locking functions | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | #ifdef HAVE_PERCPU_SB
 | 
					
						
							| 
									
										
										
										
											2009-11-14 16:17:22 +00:00
										 |  |  | static inline void | 
					
						
							| 
									
										
										
										
											2007-02-10 18:35:15 +11:00
										 |  |  | xfs_icsb_lock(xfs_mount_t *mp) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	mutex_lock(&mp->m_icsb_mutex); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-11-14 16:17:22 +00:00
										 |  |  | static inline void | 
					
						
							| 
									
										
										
										
											2007-02-10 18:35:15 +11:00
										 |  |  | xfs_icsb_unlock(xfs_mount_t *mp) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	mutex_unlock(&mp->m_icsb_mutex); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | #define xfs_icsb_lock(mp)
 | 
					
						
							|  |  |  | #define xfs_icsb_unlock(mp)
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * This structure is for use by the xfs_mod_incore_sb_batch() routine. | 
					
						
							| 
									
										
										
										
											2007-02-10 18:36:10 +11:00
										 |  |  |  * xfs_growfs can specify a few fields which are more than int limit | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  */ | 
					
						
							|  |  |  | typedef struct xfs_mod_sb { | 
					
						
							|  |  |  | 	xfs_sb_field_t	msb_field;	/* Field to modify, see below */ | 
					
						
							| 
									
										
										
										
											2007-02-10 18:36:10 +11:00
										 |  |  | 	int64_t		msb_delta;	/* Change to make to specified field */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | } xfs_mod_sb_t; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-08-12 20:49:55 +10:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Per-ag incore structure, copies of information in agf and agi, to improve the | 
					
						
							|  |  |  |  * performance of allocation group selection. This is defined for the kernel | 
					
						
							|  |  |  |  * only, and hence is defined here instead of in xfs_ag.h. You need the struct | 
					
						
							|  |  |  |  * xfs_mount to be defined to look up a xfs_perag anyway (via mp->m_perag_tree), | 
					
						
							|  |  |  |  * so this doesn't introduce any strange header file dependencies. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | typedef struct xfs_perag { | 
					
						
							|  |  |  | 	struct xfs_mount *pag_mount;	/* owner filesystem */ | 
					
						
							|  |  |  | 	xfs_agnumber_t	pag_agno;	/* AG this structure belongs to */ | 
					
						
							|  |  |  | 	atomic_t	pag_ref;	/* perag reference count */ | 
					
						
							|  |  |  | 	char		pagf_init;	/* this agf's entry is initialized */ | 
					
						
							|  |  |  | 	char		pagi_init;	/* this agi's entry is initialized */ | 
					
						
							|  |  |  | 	char		pagf_metadata;	/* the agf is preferred to be metadata */ | 
					
						
							|  |  |  | 	char		pagi_inodeok;	/* The agi is ok for inodes */ | 
					
						
							|  |  |  | 	__uint8_t	pagf_levels[XFS_BTNUM_AGF]; | 
					
						
							|  |  |  | 					/* # of levels in bno & cnt btree */ | 
					
						
							|  |  |  | 	__uint32_t	pagf_flcount;	/* count of blocks in freelist */ | 
					
						
							|  |  |  | 	xfs_extlen_t	pagf_freeblks;	/* total free blocks */ | 
					
						
							|  |  |  | 	xfs_extlen_t	pagf_longest;	/* longest free space */ | 
					
						
							|  |  |  | 	__uint32_t	pagf_btreeblks;	/* # of blocks held in AGF btrees */ | 
					
						
							|  |  |  | 	xfs_agino_t	pagi_freecount;	/* number of free inodes */ | 
					
						
							|  |  |  | 	xfs_agino_t	pagi_count;	/* number of allocated inodes */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/*
 | 
					
						
							|  |  |  | 	 * Inode allocation search lookup optimisation. | 
					
						
							|  |  |  | 	 * If the pagino matches, the search for new inodes | 
					
						
							|  |  |  | 	 * doesn't need to search the near ones again straight away | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	xfs_agino_t	pagl_pagino; | 
					
						
							|  |  |  | 	xfs_agino_t	pagl_leftrec; | 
					
						
							|  |  |  | 	xfs_agino_t	pagl_rightrec; | 
					
						
							|  |  |  | 	spinlock_t	pagb_lock;	/* lock for pagb_tree */ | 
					
						
							|  |  |  | 	struct rb_root	pagb_tree;	/* ordered tree of busy extents */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	atomic_t        pagf_fstrms;    /* # of filestreams active in this AG */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	spinlock_t	pag_ici_lock;	/* incore inode cache lock */ | 
					
						
							|  |  |  | 	struct radix_tree_root pag_ici_root;	/* incore inode cache root */ | 
					
						
							|  |  |  | 	int		pag_ici_reclaimable;	/* reclaimable inodes */ | 
					
						
							|  |  |  | 	struct mutex	pag_ici_reclaim_lock;	/* serialisation point */ | 
					
						
							|  |  |  | 	unsigned long	pag_ici_reclaim_cursor;	/* reclaim restart point */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* buffer cache index */ | 
					
						
							|  |  |  | 	spinlock_t	pag_buf_lock;	/* lock for pag_buf_tree */ | 
					
						
							|  |  |  | 	struct rb_root	pag_buf_tree;	/* ordered tree of active buffers */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* for rcu-safe freeing */ | 
					
						
							|  |  |  | 	struct rcu_head	rcu_head; | 
					
						
							|  |  |  | 	int		pagb_count;	/* pagb slots in use */ | 
					
						
							|  |  |  | } xfs_perag_t; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-06-29 22:10:14 +00:00
										 |  |  | extern int	xfs_log_sbcount(xfs_mount_t *); | 
					
						
							| 
									
										
										
										
											2010-02-05 22:59:53 +00:00
										 |  |  | extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); | 
					
						
							| 
									
										
										
										
											2008-08-13 16:49:32 +10:00
										 |  |  | extern int	xfs_mountfs(xfs_mount_t *mp); | 
					
						
							| 
									
										
										
										
											2013-08-12 20:49:41 +10:00
										 |  |  | extern int	xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount, | 
					
						
							|  |  |  | 				     xfs_agnumber_t *maxagi); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-08-13 16:49:57 +10:00
										 |  |  | extern void	xfs_unmountfs(xfs_mount_t *); | 
					
						
							| 
									
										
										
										
											2007-02-10 18:36:10 +11:00
										 |  |  | extern int	xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | extern int	xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, | 
					
						
							|  |  |  | 			uint, int); | 
					
						
							| 
									
										
										
										
											2009-01-19 02:04:07 +01:00
										 |  |  | extern int	xfs_mount_log_sb(xfs_mount_t *, __int64_t); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); | 
					
						
							| 
									
										
										
										
											2006-03-31 13:04:17 +10:00
										 |  |  | extern int	xfs_readsb(xfs_mount_t *, int); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | extern void	xfs_freesb(xfs_mount_t *); | 
					
						
							| 
									
										
											  
											
												[XFS] Lazy Superblock Counters
When we have a couple of hundred transactions on the fly at once, they all
typically modify the on disk superblock in some way.
create/unclink/mkdir/rmdir modify inode counts, allocation/freeing modify
free block counts.
When these counts are modified in a transaction, they must eventually lock
the superblock buffer and apply the mods. The buffer then remains locked
until the transaction is committed into the incore log buffer. The result
of this is that with enough transactions on the fly the incore superblock
buffer becomes a bottleneck.
The result of contention on the incore superblock buffer is that
transaction rates fall - the more pressure that is put on the superblock
buffer, the slower things go.
The key to removing the contention is to not require the superblock fields
in question to be locked. We do that by not marking the superblock dirty
in the transaction. IOWs, we modify the incore superblock but do not
modify the cached superblock buffer. In short, we do not log superblock
modifications to critical fields in the superblock on every transaction.
In fact we only do it just before we write the superblock to disk every
sync period or just before unmount.
This creates an interesting problem - if we don't log or write out the
fields in every transaction, then how do the values get recovered after a
crash? the answer is simple - we keep enough duplicate, logged information
in other structures that we can reconstruct the correct count after log
recovery has been performed.
It is the AGF and AGI structures that contain the duplicate information;
after recovery, we walk every AGI and AGF and sum their individual
counters to get the correct value, and we do a transaction into the log to
correct them. An optimisation of this is that if we have a clean unmount
record, we know the value in the superblock is correct, so we can avoid
the summation walk under normal conditions and so mount/recovery times do
not change under normal operation.
One wrinkle that was discovered during development was that the blocks
used in the freespace btrees are never accounted for in the AGF counters.
This was once a valid optimisation to make; when the filesystem is full,
the free space btrees are empty and consume no space. Hence when it
matters, the "accounting" is correct. But that means the when we do the
AGF summations, we would not have a correct count and xfs_check would
complain. Hence a new counter was added to track the number of blocks used
by the free space btrees. This is an *on-disk format change*.
As a result of this, lazy superblock counters are a mkfs option and at the
moment on linux there is no way to convert an old filesystem. This is
possible - xfs_db can be used to twiddle the right bits and then
xfs_repair will do the format conversion for you. Similarly, you can
convert backwards as well. At some point we'll add functionality to
xfs_admin to do the bit twiddling easily....
SGI-PV: 964999
SGI-Modid: xfs-linux-melb:xfs-kern:28652a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
											
										 
											2007-05-24 15:26:31 +10:00
										 |  |  | extern int	xfs_fs_writable(xfs_mount_t *); | 
					
						
							| 
									
										
										
										
											2007-05-14 18:24:02 +10:00
										 |  |  | extern int	xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-17 19:36:13 +00:00
										 |  |  | extern int	xfs_dev_is_read_only(struct xfs_mount *, char *); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-01-04 11:35:03 +11:00
										 |  |  | extern void	xfs_set_low_space_thresholds(struct xfs_mount *); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #endif	/* __KERNEL__ */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #endif	/* __XFS_MOUNT_H__ */
 |