| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #ifndef _NFS_FS_SB
 | 
					
						
							|  |  |  | #define _NFS_FS_SB
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <linux/list.h>
 | 
					
						
							|  |  |  | #include <linux/backing-dev.h>
 | 
					
						
							| 
									
										
										
										
											2012-01-17 22:04:24 -05:00
										 |  |  | #include <linux/idr.h>
 | 
					
						
							| 
									
										
										
										
											2007-11-08 04:05:04 -05:00
										 |  |  | #include <linux/wait.h>
 | 
					
						
							| 
									
										
										
										
											2009-04-01 09:21:53 -04:00
										 |  |  | #include <linux/nfs_xdr.h>
 | 
					
						
							|  |  |  | #include <linux/sunrpc/xprt.h>
 | 
					
						
							| 
									
										
										
										
											2007-11-08 04:05:04 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-07-26 16:09:06 -07:00
										 |  |  | #include <linux/atomic.h>
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-01 09:21:53 -04:00
										 |  |  | struct nfs4_session; | 
					
						
							| 
									
										
										
										
											2006-03-20 13:44:13 -05:00
										 |  |  | struct nfs_iostats; | 
					
						
							| 
									
										
										
										
											2008-01-11 17:09:52 -05:00
										 |  |  | struct nlm_host; | 
					
						
							| 
									
										
										
										
											2009-04-01 09:22:03 -04:00
										 |  |  | struct nfs4_sequence_args; | 
					
						
							|  |  |  | struct nfs4_sequence_res; | 
					
						
							|  |  |  | struct nfs_server; | 
					
						
							| 
									
										
										
										
											2010-06-16 09:52:26 -04:00
										 |  |  | struct nfs4_minor_version_ops; | 
					
						
							| 
									
										
										
										
											2012-05-21 22:44:31 -04:00
										 |  |  | struct nfs41_server_scope; | 
					
						
							| 
									
										
										
										
											2012-02-17 15:20:26 -05:00
										 |  |  | struct nfs41_impl_id; | 
					
						
							| 
									
										
										
										
											2006-03-20 13:44:13 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-08-22 20:06:10 -04:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * The nfs_client identifies our client state to the server. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct nfs_client { | 
					
						
							|  |  |  | 	atomic_t		cl_count; | 
					
						
							| 
									
										
										
										
											2012-06-14 13:08:38 -04:00
										 |  |  | 	atomic_t		cl_mds_count; | 
					
						
							| 
									
										
										
										
											2006-08-22 20:06:10 -04:00
										 |  |  | 	int			cl_cons_state;	/* current construction state (-ve: init error) */ | 
					
						
							|  |  |  | #define NFS_CS_READY		0		/* ready to be used */
 | 
					
						
							|  |  |  | #define NFS_CS_INITING		1		/* busy initialising */
 | 
					
						
							| 
									
										
										
										
											2009-04-01 09:22:38 -04:00
										 |  |  | #define NFS_CS_SESSION_INITING	2		/* busy initialising  session */
 | 
					
						
							| 
									
										
										
										
											2006-08-22 20:06:10 -04:00
										 |  |  | 	unsigned long		cl_res_state;	/* NFS resources state */ | 
					
						
							|  |  |  | #define NFS_CS_CALLBACK		1		/* - callback started */
 | 
					
						
							|  |  |  | #define NFS_CS_IDMAP		2		/* - idmap started */
 | 
					
						
							| 
									
										
										
										
											2006-08-24 01:03:05 -04:00
										 |  |  | #define NFS_CS_RENEWD		3		/* - renewd started */
 | 
					
						
							| 
									
										
										
										
											2011-03-01 01:34:10 +00:00
										 |  |  | #define NFS_CS_STOP_RENEW	4		/* no more state to renew */
 | 
					
						
							| 
									
										
										
										
											2011-03-01 01:34:11 +00:00
										 |  |  | #define NFS_CS_CHECK_LEASE_TIME	5		/* need to check lease time */
 | 
					
						
							| 
									
										
										
										
											2012-05-21 22:46:07 -04:00
										 |  |  | 	unsigned long		cl_flags;	/* behavior switches */ | 
					
						
							|  |  |  | #define NFS_CS_NORESVPORT	0		/* - use ephemeral src port */
 | 
					
						
							|  |  |  | #define NFS_CS_DISCRTRY		1		/* - disconnect on RPC retry */
 | 
					
						
							| 
									
										
										
										
											2012-09-14 17:24:11 -04:00
										 |  |  | #define NFS_CS_MIGRATION	2		/* - transparent state migr */
 | 
					
						
							| 
									
										
										
										
											2013-04-14 11:49:51 -04:00
										 |  |  | #define NFS_CS_INFINITE_SLOTS	3		/* - don't limit TCP slots */
 | 
					
						
							| 
									
										
										
										
											2013-09-24 12:06:07 -04:00
										 |  |  | #define NFS_CS_NO_RETRANS_TIMEOUT	4	/* - Disable retransmit timeouts */
 | 
					
						
							| 
									
										
										
										
											2007-12-10 14:58:15 -05:00
										 |  |  | 	struct sockaddr_storage	cl_addr;	/* server identifier */ | 
					
						
							|  |  |  | 	size_t			cl_addrlen; | 
					
						
							| 
									
										
										
										
											2006-08-22 20:06:10 -04:00
										 |  |  | 	char *			cl_hostname;	/* hostname of server */ | 
					
						
							| 
									
										
										
										
											2014-06-21 20:52:17 -04:00
										 |  |  | 	char *			cl_acceptor;	/* GSSAPI acceptor name */ | 
					
						
							| 
									
										
										
										
											2006-08-22 20:06:10 -04:00
										 |  |  | 	struct list_head	cl_share_link;	/* link in global client list */ | 
					
						
							|  |  |  | 	struct list_head	cl_superblocks;	/* List of nfs_server structs */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	struct rpc_clnt *	cl_rpcclient; | 
					
						
							| 
									
										
										
										
											2006-08-22 20:06:12 -04:00
										 |  |  | 	const struct nfs_rpc_ops *rpc_ops;	/* NFS protocol vector */ | 
					
						
							| 
									
										
										
										
											2008-01-03 16:29:06 -05:00
										 |  |  | 	int			cl_proto;	/* Network transport protocol */ | 
					
						
							| 
									
										
										
										
											2012-07-30 16:05:16 -04:00
										 |  |  | 	struct nfs_subversion *	cl_nfs_mod;	/* pointer to nfs version module */ | 
					
						
							| 
									
										
										
										
											2006-08-22 20:06:10 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-01 09:21:49 -04:00
										 |  |  | 	u32			cl_minorversion;/* NFSv4 minorversion */ | 
					
						
							| 
									
										
										
										
											2008-04-07 20:50:11 -04:00
										 |  |  | 	struct rpc_cred		*cl_machine_cred; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-07-30 16:05:25 -04:00
										 |  |  | #if IS_ENABLED(CONFIG_NFS_V4)
 | 
					
						
							| 
									
										
										
										
											2013-09-06 14:14:00 -04:00
										 |  |  | 	struct list_head	cl_ds_clients; /* auth flavor data servers */ | 
					
						
							| 
									
										
										
										
											2006-08-22 20:06:10 -04:00
										 |  |  | 	u64			cl_clientid;	/* constant */ | 
					
						
							| 
									
										
										
										
											2011-04-24 14:28:18 -04:00
										 |  |  | 	nfs4_verifier		cl_confirm;	/* Clientid verifier */ | 
					
						
							| 
									
										
										
										
											2006-08-22 20:06:10 -04:00
										 |  |  | 	unsigned long		cl_state; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	spinlock_t		cl_lock; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	unsigned long		cl_lease_time; | 
					
						
							|  |  |  | 	unsigned long		cl_last_renewal; | 
					
						
							| 
									
										
										
										
											2006-11-22 14:54:01 +00:00
										 |  |  | 	struct delayed_work	cl_renewd; | 
					
						
							| 
									
										
										
										
											2006-08-22 20:06:10 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	struct rpc_wait_queue	cl_rpcwaitq; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* idmapper */ | 
					
						
							|  |  |  | 	struct idmap *		cl_idmap; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Our own IP address, as a null-terminated string.
 | 
					
						
							| 
									
										
										
										
											2012-07-11 16:30:50 -04:00
										 |  |  | 	 * This is used to generate the mv0 callback address. | 
					
						
							| 
									
										
										
										
											2006-08-22 20:06:10 -04:00
										 |  |  | 	 */ | 
					
						
							| 
									
										
										
										
											2007-12-10 14:57:01 -05:00
										 |  |  | 	char			cl_ipaddr[48]; | 
					
						
							| 
									
										
										
										
											2011-01-06 02:04:30 +00:00
										 |  |  | 	u32			cl_cb_ident;	/* v4.0 callback identifier */ | 
					
						
							| 
									
										
										
										
											2010-06-16 09:52:26 -04:00
										 |  |  | 	const struct nfs4_minor_version_ops *cl_mvops; | 
					
						
							| 
									
										
										
										
											2013-10-17 14:13:02 -04:00
										 |  |  | 	unsigned long		cl_mig_gen; | 
					
						
							| 
									
										
										
										
											2009-04-03 16:42:42 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-08-09 12:49:11 -04:00
										 |  |  | 	/* NFSv4.0 transport blocking */ | 
					
						
							|  |  |  | 	struct nfs4_slot_table	*cl_slot_tbl; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-01 09:22:29 -04:00
										 |  |  | 	/* The sequence id to use for the next CREATE_SESSION */ | 
					
						
							|  |  |  | 	u32			cl_seqid; | 
					
						
							|  |  |  | 	/* The flags used for obtaining the clientid during EXCHANGE_ID */ | 
					
						
							|  |  |  | 	u32			cl_exchange_flags; | 
					
						
							| 
									
										
										
										
											2012-05-21 22:44:22 -04:00
										 |  |  | 	struct nfs4_session	*cl_session;	/* shared session */ | 
					
						
							| 
									
										
										
										
											2012-09-14 17:24:32 -04:00
										 |  |  | 	bool			cl_preserve_clid; | 
					
						
							| 
									
										
										
										
											2012-05-21 22:46:16 -04:00
										 |  |  | 	struct nfs41_server_owner *cl_serverowner; | 
					
						
							| 
									
										
										
										
											2012-05-21 22:44:31 -04:00
										 |  |  | 	struct nfs41_server_scope *cl_serverscope; | 
					
						
							| 
									
										
										
										
											2012-05-21 22:44:41 -04:00
										 |  |  | 	struct nfs41_impl_id	*cl_implid; | 
					
						
							| 
									
										
										
										
											2013-08-13 16:37:32 -04:00
										 |  |  | 	/* nfs 4.1+ state protection modes: */ | 
					
						
							|  |  |  | 	unsigned long		cl_sp4_flags; | 
					
						
							|  |  |  | #define NFS_SP4_MACH_CRED_MINIMAL  1	/* Minimal sp4_mach_cred - state ops
 | 
					
						
							|  |  |  | 					 * must use machine cred */ | 
					
						
							| 
									
										
										
										
											2013-08-13 16:37:34 -04:00
										 |  |  | #define NFS_SP4_MACH_CRED_CLEANUP  2	/* CLOSE and LOCKU */
 | 
					
						
							| 
									
										
										
										
											2013-08-13 16:37:35 -04:00
										 |  |  | #define NFS_SP4_MACH_CRED_SECINFO  3	/* SECINFO and SECINFO_NO_NAME */
 | 
					
						
							| 
									
										
										
										
											2013-08-13 16:37:36 -04:00
										 |  |  | #define NFS_SP4_MACH_CRED_STATEID  4	/* TEST_STATEID and FREE_STATEID */
 | 
					
						
							| 
									
										
										
										
											2013-08-13 16:37:37 -04:00
										 |  |  | #define NFS_SP4_MACH_CRED_WRITE    5	/* WRITE */
 | 
					
						
							|  |  |  | #define NFS_SP4_MACH_CRED_COMMIT   6	/* COMMIT */
 | 
					
						
							| 
									
										
										
										
											2011-03-09 16:00:53 -05:00
										 |  |  | #endif /* CONFIG_NFS_V4 */
 | 
					
						
							| 
									
										
										
										
											2009-04-01 09:21:53 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-03 16:42:42 +01:00
										 |  |  | #ifdef CONFIG_NFS_FSCACHE
 | 
					
						
							|  |  |  | 	struct fscache_cookie	*fscache;	/* client index cache cookie */ | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2011-05-31 19:05:47 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-05-21 22:44:50 -04:00
										 |  |  | 	struct net		*cl_net; | 
					
						
							| 
									
										
										
										
											2006-08-22 20:06:10 -04:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * NFS client parameters stored in the superblock. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | struct nfs_server { | 
					
						
							| 
									
										
										
										
											2006-08-22 20:06:11 -04:00
										 |  |  | 	struct nfs_client *	nfs_client;	/* shared client and NFS4 state */ | 
					
						
							| 
									
										
											  
											
												NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
 (1) The root and intervening nodes may not be accessible to the client.
     With NFS2 and NFS3, for instance, mountd is called on the server to get
     the filehandle for the tip of a path. mountd won't give us handles for
     anything we don't have permission to access, and so we can't set up NFS
     inodes for such nodes, and so can't easily set up dentries (we'd have to
     have ghost inodes or something).
     With this patch we don't actually create dentries until we get handles
     from the server that we can use to set up their inodes, and we don't
     actually bind them into the tree until we know for sure where they go.
 (2) Inaccessible symbolic links.
     If we're asked to mount two exports from the server, eg:
	mount warthog:/warthog/aaa/xxx /mmm
	mount warthog:/warthog/bbb/yyy /nnn
     We may not be able to access anything nearer the root than xxx and yyy,
     but we may find out later that /mmm/www/yyy, say, is actually the same
     directory as the one mounted on /nnn. What we might then find out, for
     example, is that /warthog/bbb was actually a symbolic link to
     /warthog/aaa/xxx/www, but we can't actually determine that by talking to
     the server until /warthog is made available by NFS.
     This would lead to having constructed an errneous dentry tree which we
     can't easily fix. We can end up with a dentry marked as a directory when
     it should actually be a symlink, or we could end up with an apparently
     hardlinked directory.
     With this patch we need not make assumptions about the type of a dentry
     for which we can't retrieve information, nor need we assume we know its
     place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
 (1) The server record construction/destruction has been abstracted out into
     its own set of functions to make things easier to get right.  These have
     been moved into fs/nfs/client.c.
     All the code in fs/nfs/client.c has to do with the management of
     connections to servers, and doesn't touch superblocks in any way; the
     remaining code in fs/nfs/super.c has to do with VFS superblock management.
 (2) The sequence of events undertaken by NFS mount is now reordered:
     (a) A volume representation (struct nfs_server) is allocated.
     (b) A server representation (struct nfs_client) is acquired.  This may be
     	 allocated or shared, and is keyed on server address, port and NFS
     	 version.
     (c) If allocated, the client representation is initialised.  The state
     	 member variable of nfs_client is used to prevent a race during
     	 initialisation from two mounts.
     (d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
     	 the root filehandle for the mount (fs/nfs/getroot.c).  For NFS2/3 we
     	 are given the root FH in advance.
     (e) The volume FSID is probed for on the root FH.
     (f) The volume representation is initialised from the FSINFO record
     	 retrieved on the root FH.
     (g) sget() is called to acquire a superblock.  This may be allocated or
     	 shared, keyed on client pointer and FSID.
     (h) If allocated, the superblock is initialised.
     (i) If the superblock is shared, then the new nfs_server record is
     	 discarded.
     (j) The root dentry for this mount is looked up from the root FH.
     (k) The root dentry for this mount is assigned to the vfsmount.
 (3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
     returns; this function now attaches disconnected trees from alternate
     roots that happen to be discovered attached to a directory being read (in
     the same way nfs_lookup() is made to do for lookup ops).
     The new d_materialise_unique() function is now used to do this, thus
     permitting the whole thing to be done under one set of locks, and thus
     avoiding any race between mount and lookup operations on the same
     directory.
 (4) The client management code uses a new debug facility: NFSDBG_CLIENT which
     is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
 (5) Clone mounts are now called xdev mounts.
 (6) Use the dentry passed to the statfs() op as the handle for retrieving fs
     statistics rather than the root dentry of the superblock (which is now a
     dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
											
										 
											2006-08-22 20:06:13 -04:00
										 |  |  | 	struct list_head	client_link;	/* List of other nfs_server structs
 | 
					
						
							|  |  |  | 						 * that share the same client | 
					
						
							|  |  |  | 						 */ | 
					
						
							|  |  |  | 	struct list_head	master_link;	/* link in master servers list */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	struct rpc_clnt *	client;		/* RPC client handle */ | 
					
						
							| 
									
										
										
										
											2005-06-22 17:16:27 +00:00
										 |  |  | 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */ | 
					
						
							| 
									
										
										
										
											2008-01-11 17:09:52 -05:00
										 |  |  | 	struct nlm_host		*nlm_host;	/* NLM client handle */ | 
					
						
							| 
									
										
										
										
											2010-02-02 14:39:01 +09:00
										 |  |  | 	struct nfs_iostats __percpu *io_stats;	/* I/O statistics */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	struct backing_dev_info	backing_dev_info; | 
					
						
							| 
									
										
										
										
											2007-05-08 00:35:12 -07:00
										 |  |  | 	atomic_long_t		writeback;	/* number of writeback pages */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int			flags;		/* various flags */ | 
					
						
							|  |  |  | 	unsigned int		caps;		/* server capabilities */ | 
					
						
							|  |  |  | 	unsigned int		rsize;		/* read size */ | 
					
						
							|  |  |  | 	unsigned int		rpages;		/* read size (in pages) */ | 
					
						
							|  |  |  | 	unsigned int		wsize;		/* write size */ | 
					
						
							|  |  |  | 	unsigned int		wpages;		/* write size (in pages) */ | 
					
						
							|  |  |  | 	unsigned int		wtmult;		/* server disk block size */ | 
					
						
							|  |  |  | 	unsigned int		dtsize;		/* readdir size */ | 
					
						
							| 
									
										
										
										
											2008-03-14 14:10:22 -04:00
										 |  |  | 	unsigned short		port;		/* "port=" setting */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	unsigned int		bsize;		/* server block size */ | 
					
						
							|  |  |  | 	unsigned int		acregmin;	/* attr cache timeouts */ | 
					
						
							|  |  |  | 	unsigned int		acregmax; | 
					
						
							|  |  |  | 	unsigned int		acdirmin; | 
					
						
							|  |  |  | 	unsigned int		acdirmax; | 
					
						
							|  |  |  | 	unsigned int		namelen; | 
					
						
							| 
									
										
										
										
											2009-04-03 16:42:42 +01:00
										 |  |  | 	unsigned int		options;	/* extra options enabled by mount */ | 
					
						
							|  |  |  | #define NFS_OPTION_FSCACHE	0x00000001	/* - local caching enabled */
 | 
					
						
							| 
									
										
										
										
											2012-09-14 17:24:11 -04:00
										 |  |  | #define NFS_OPTION_MIGRATION	0x00000002	/* - NFSv4 migration enabled */
 | 
					
						
							| 
									
										
											  
											
												NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
 (1) The root and intervening nodes may not be accessible to the client.
     With NFS2 and NFS3, for instance, mountd is called on the server to get
     the filehandle for the tip of a path. mountd won't give us handles for
     anything we don't have permission to access, and so we can't set up NFS
     inodes for such nodes, and so can't easily set up dentries (we'd have to
     have ghost inodes or something).
     With this patch we don't actually create dentries until we get handles
     from the server that we can use to set up their inodes, and we don't
     actually bind them into the tree until we know for sure where they go.
 (2) Inaccessible symbolic links.
     If we're asked to mount two exports from the server, eg:
	mount warthog:/warthog/aaa/xxx /mmm
	mount warthog:/warthog/bbb/yyy /nnn
     We may not be able to access anything nearer the root than xxx and yyy,
     but we may find out later that /mmm/www/yyy, say, is actually the same
     directory as the one mounted on /nnn. What we might then find out, for
     example, is that /warthog/bbb was actually a symbolic link to
     /warthog/aaa/xxx/www, but we can't actually determine that by talking to
     the server until /warthog is made available by NFS.
     This would lead to having constructed an errneous dentry tree which we
     can't easily fix. We can end up with a dentry marked as a directory when
     it should actually be a symlink, or we could end up with an apparently
     hardlinked directory.
     With this patch we need not make assumptions about the type of a dentry
     for which we can't retrieve information, nor need we assume we know its
     place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
 (1) The server record construction/destruction has been abstracted out into
     its own set of functions to make things easier to get right.  These have
     been moved into fs/nfs/client.c.
     All the code in fs/nfs/client.c has to do with the management of
     connections to servers, and doesn't touch superblocks in any way; the
     remaining code in fs/nfs/super.c has to do with VFS superblock management.
 (2) The sequence of events undertaken by NFS mount is now reordered:
     (a) A volume representation (struct nfs_server) is allocated.
     (b) A server representation (struct nfs_client) is acquired.  This may be
     	 allocated or shared, and is keyed on server address, port and NFS
     	 version.
     (c) If allocated, the client representation is initialised.  The state
     	 member variable of nfs_client is used to prevent a race during
     	 initialisation from two mounts.
     (d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
     	 the root filehandle for the mount (fs/nfs/getroot.c).  For NFS2/3 we
     	 are given the root FH in advance.
     (e) The volume FSID is probed for on the root FH.
     (f) The volume representation is initialised from the FSINFO record
     	 retrieved on the root FH.
     (g) sget() is called to acquire a superblock.  This may be allocated or
     	 shared, keyed on client pointer and FSID.
     (h) If allocated, the superblock is initialised.
     (i) If the superblock is shared, then the new nfs_server record is
     	 discarded.
     (j) The root dentry for this mount is looked up from the root FH.
     (k) The root dentry for this mount is assigned to the vfsmount.
 (3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
     returns; this function now attaches disconnected trees from alternate
     roots that happen to be discovered attached to a directory being read (in
     the same way nfs_lookup() is made to do for lookup ops).
     The new d_materialise_unique() function is now used to do this, thus
     permitting the whole thing to be done under one set of locks, and thus
     avoiding any race between mount and lookup operations on the same
     directory.
 (4) The client management code uses a new debug facility: NFSDBG_CLIENT which
     is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
 (5) Clone mounts are now called xdev mounts.
 (6) Use the dentry passed to the statfs() op as the handle for retrieving fs
     statistics rather than the root dentry of the superblock (which is now a
     dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
											
										 
											2006-08-22 20:06:13 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-06-09 09:34:19 -04:00
										 |  |  | 	struct nfs_fsid		fsid; | 
					
						
							| 
									
										
											  
											
												NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
 (1) The root and intervening nodes may not be accessible to the client.
     With NFS2 and NFS3, for instance, mountd is called on the server to get
     the filehandle for the tip of a path. mountd won't give us handles for
     anything we don't have permission to access, and so we can't set up NFS
     inodes for such nodes, and so can't easily set up dentries (we'd have to
     have ghost inodes or something).
     With this patch we don't actually create dentries until we get handles
     from the server that we can use to set up their inodes, and we don't
     actually bind them into the tree until we know for sure where they go.
 (2) Inaccessible symbolic links.
     If we're asked to mount two exports from the server, eg:
	mount warthog:/warthog/aaa/xxx /mmm
	mount warthog:/warthog/bbb/yyy /nnn
     We may not be able to access anything nearer the root than xxx and yyy,
     but we may find out later that /mmm/www/yyy, say, is actually the same
     directory as the one mounted on /nnn. What we might then find out, for
     example, is that /warthog/bbb was actually a symbolic link to
     /warthog/aaa/xxx/www, but we can't actually determine that by talking to
     the server until /warthog is made available by NFS.
     This would lead to having constructed an errneous dentry tree which we
     can't easily fix. We can end up with a dentry marked as a directory when
     it should actually be a symlink, or we could end up with an apparently
     hardlinked directory.
     With this patch we need not make assumptions about the type of a dentry
     for which we can't retrieve information, nor need we assume we know its
     place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
 (1) The server record construction/destruction has been abstracted out into
     its own set of functions to make things easier to get right.  These have
     been moved into fs/nfs/client.c.
     All the code in fs/nfs/client.c has to do with the management of
     connections to servers, and doesn't touch superblocks in any way; the
     remaining code in fs/nfs/super.c has to do with VFS superblock management.
 (2) The sequence of events undertaken by NFS mount is now reordered:
     (a) A volume representation (struct nfs_server) is allocated.
     (b) A server representation (struct nfs_client) is acquired.  This may be
     	 allocated or shared, and is keyed on server address, port and NFS
     	 version.
     (c) If allocated, the client representation is initialised.  The state
     	 member variable of nfs_client is used to prevent a race during
     	 initialisation from two mounts.
     (d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
     	 the root filehandle for the mount (fs/nfs/getroot.c).  For NFS2/3 we
     	 are given the root FH in advance.
     (e) The volume FSID is probed for on the root FH.
     (f) The volume representation is initialised from the FSINFO record
     	 retrieved on the root FH.
     (g) sget() is called to acquire a superblock.  This may be allocated or
     	 shared, keyed on client pointer and FSID.
     (h) If allocated, the superblock is initialised.
     (i) If the superblock is shared, then the new nfs_server record is
     	 discarded.
     (j) The root dentry for this mount is looked up from the root FH.
     (k) The root dentry for this mount is assigned to the vfsmount.
 (3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
     returns; this function now attaches disconnected trees from alternate
     roots that happen to be discovered attached to a directory being read (in
     the same way nfs_lookup() is made to do for lookup ops).
     The new d_materialise_unique() function is now used to do this, thus
     permitting the whole thing to be done under one set of locks, and thus
     avoiding any race between mount and lookup operations on the same
     directory.
 (4) The client management code uses a new debug facility: NFSDBG_CLIENT which
     is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
 (5) Clone mounts are now called xdev mounts.
 (6) Use the dentry passed to the statfs() op as the handle for retrieving fs
     statistics rather than the root dentry of the superblock (which is now a
     dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
											
										 
											2006-08-22 20:06:13 -04:00
										 |  |  | 	__u64			maxfilesize;	/* maximum file size */ | 
					
						
							| 
									
										
										
										
											2010-10-12 16:30:05 -07:00
										 |  |  | 	struct timespec		time_delta;	/* smallest time granularity */ | 
					
						
							| 
									
										
										
										
											2006-03-20 13:44:15 -05:00
										 |  |  | 	unsigned long		mount_time;	/* when this fs was mounted */ | 
					
						
							| 
									
										
										
										
											2013-10-17 14:12:56 -04:00
										 |  |  | 	struct super_block	*super;		/* VFS super block */ | 
					
						
							| 
									
										
											  
											
												NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
 (1) The root and intervening nodes may not be accessible to the client.
     With NFS2 and NFS3, for instance, mountd is called on the server to get
     the filehandle for the tip of a path. mountd won't give us handles for
     anything we don't have permission to access, and so we can't set up NFS
     inodes for such nodes, and so can't easily set up dentries (we'd have to
     have ghost inodes or something).
     With this patch we don't actually create dentries until we get handles
     from the server that we can use to set up their inodes, and we don't
     actually bind them into the tree until we know for sure where they go.
 (2) Inaccessible symbolic links.
     If we're asked to mount two exports from the server, eg:
	mount warthog:/warthog/aaa/xxx /mmm
	mount warthog:/warthog/bbb/yyy /nnn
     We may not be able to access anything nearer the root than xxx and yyy,
     but we may find out later that /mmm/www/yyy, say, is actually the same
     directory as the one mounted on /nnn. What we might then find out, for
     example, is that /warthog/bbb was actually a symbolic link to
     /warthog/aaa/xxx/www, but we can't actually determine that by talking to
     the server until /warthog is made available by NFS.
     This would lead to having constructed an errneous dentry tree which we
     can't easily fix. We can end up with a dentry marked as a directory when
     it should actually be a symlink, or we could end up with an apparently
     hardlinked directory.
     With this patch we need not make assumptions about the type of a dentry
     for which we can't retrieve information, nor need we assume we know its
     place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
 (1) The server record construction/destruction has been abstracted out into
     its own set of functions to make things easier to get right.  These have
     been moved into fs/nfs/client.c.
     All the code in fs/nfs/client.c has to do with the management of
     connections to servers, and doesn't touch superblocks in any way; the
     remaining code in fs/nfs/super.c has to do with VFS superblock management.
 (2) The sequence of events undertaken by NFS mount is now reordered:
     (a) A volume representation (struct nfs_server) is allocated.
     (b) A server representation (struct nfs_client) is acquired.  This may be
     	 allocated or shared, and is keyed on server address, port and NFS
     	 version.
     (c) If allocated, the client representation is initialised.  The state
     	 member variable of nfs_client is used to prevent a race during
     	 initialisation from two mounts.
     (d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
     	 the root filehandle for the mount (fs/nfs/getroot.c).  For NFS2/3 we
     	 are given the root FH in advance.
     (e) The volume FSID is probed for on the root FH.
     (f) The volume representation is initialised from the FSINFO record
     	 retrieved on the root FH.
     (g) sget() is called to acquire a superblock.  This may be allocated or
     	 shared, keyed on client pointer and FSID.
     (h) If allocated, the superblock is initialised.
     (i) If the superblock is shared, then the new nfs_server record is
     	 discarded.
     (j) The root dentry for this mount is looked up from the root FH.
     (k) The root dentry for this mount is assigned to the vfsmount.
 (3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
     returns; this function now attaches disconnected trees from alternate
     roots that happen to be discovered attached to a directory being read (in
     the same way nfs_lookup() is made to do for lookup ops).
     The new d_materialise_unique() function is now used to do this, thus
     permitting the whole thing to be done under one set of locks, and thus
     avoiding any race between mount and lookup operations on the same
     directory.
 (4) The client management code uses a new debug facility: NFSDBG_CLIENT which
     is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
 (5) Clone mounts are now called xdev mounts.
 (6) Use the dentry passed to the statfs() op as the handle for retrieving fs
     statistics rather than the root dentry of the superblock (which is now a
     dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
											
										 
											2006-08-22 20:06:13 -04:00
										 |  |  | 	dev_t			s_dev;		/* superblock dev numbers */ | 
					
						
							| 
									
										
										
										
											2013-10-18 15:15:17 -04:00
										 |  |  | 	struct nfs_auth_info	auth_info;	/* parsed auth flavors */ | 
					
						
							| 
									
										
											  
											
												NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
 (1) The root and intervening nodes may not be accessible to the client.
     With NFS2 and NFS3, for instance, mountd is called on the server to get
     the filehandle for the tip of a path. mountd won't give us handles for
     anything we don't have permission to access, and so we can't set up NFS
     inodes for such nodes, and so can't easily set up dentries (we'd have to
     have ghost inodes or something).
     With this patch we don't actually create dentries until we get handles
     from the server that we can use to set up their inodes, and we don't
     actually bind them into the tree until we know for sure where they go.
 (2) Inaccessible symbolic links.
     If we're asked to mount two exports from the server, eg:
	mount warthog:/warthog/aaa/xxx /mmm
	mount warthog:/warthog/bbb/yyy /nnn
     We may not be able to access anything nearer the root than xxx and yyy,
     but we may find out later that /mmm/www/yyy, say, is actually the same
     directory as the one mounted on /nnn. What we might then find out, for
     example, is that /warthog/bbb was actually a symbolic link to
     /warthog/aaa/xxx/www, but we can't actually determine that by talking to
     the server until /warthog is made available by NFS.
     This would lead to having constructed an errneous dentry tree which we
     can't easily fix. We can end up with a dentry marked as a directory when
     it should actually be a symlink, or we could end up with an apparently
     hardlinked directory.
     With this patch we need not make assumptions about the type of a dentry
     for which we can't retrieve information, nor need we assume we know its
     place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
 (1) The server record construction/destruction has been abstracted out into
     its own set of functions to make things easier to get right.  These have
     been moved into fs/nfs/client.c.
     All the code in fs/nfs/client.c has to do with the management of
     connections to servers, and doesn't touch superblocks in any way; the
     remaining code in fs/nfs/super.c has to do with VFS superblock management.
 (2) The sequence of events undertaken by NFS mount is now reordered:
     (a) A volume representation (struct nfs_server) is allocated.
     (b) A server representation (struct nfs_client) is acquired.  This may be
     	 allocated or shared, and is keyed on server address, port and NFS
     	 version.
     (c) If allocated, the client representation is initialised.  The state
     	 member variable of nfs_client is used to prevent a race during
     	 initialisation from two mounts.
     (d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
     	 the root filehandle for the mount (fs/nfs/getroot.c).  For NFS2/3 we
     	 are given the root FH in advance.
     (e) The volume FSID is probed for on the root FH.
     (f) The volume representation is initialised from the FSINFO record
     	 retrieved on the root FH.
     (g) sget() is called to acquire a superblock.  This may be allocated or
     	 shared, keyed on client pointer and FSID.
     (h) If allocated, the superblock is initialised.
     (i) If the superblock is shared, then the new nfs_server record is
     	 discarded.
     (j) The root dentry for this mount is looked up from the root FH.
     (k) The root dentry for this mount is assigned to the vfsmount.
 (3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
     returns; this function now attaches disconnected trees from alternate
     roots that happen to be discovered attached to a directory being read (in
     the same way nfs_lookup() is made to do for lookup ops).
     The new d_materialise_unique() function is now used to do this, thus
     permitting the whole thing to be done under one set of locks, and thus
     avoiding any race between mount and lookup operations on the same
     directory.
 (4) The client management code uses a new debug facility: NFSDBG_CLIENT which
     is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
 (5) Clone mounts are now called xdev mounts.
 (6) Use the dentry passed to the statfs() op as the handle for retrieving fs
     statistics rather than the root dentry of the superblock (which is now a
     dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
											
										 
											2006-08-22 20:06:13 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-03 16:42:42 +01:00
										 |  |  | #ifdef CONFIG_NFS_FSCACHE
 | 
					
						
							|  |  |  | 	struct nfs_fscache_key	*fscache_key;	/* unique key for superblock */ | 
					
						
							|  |  |  | 	struct fscache_cookie	*fscache;	/* superblock cookie */ | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-07-31 16:39:04 -04:00
										 |  |  | 	u32			pnfs_blksize;	/* layout_blksize attr */ | 
					
						
							| 
									
										
										
										
											2012-07-30 16:05:25 -04:00
										 |  |  | #if IS_ENABLED(CONFIG_NFS_V4)
 | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:37 -04:00
										 |  |  | 	u32			attr_bitmask[3];/* V4 bitmask representing the set
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 						   of attributes supported on this | 
					
						
							|  |  |  | 						   filesystem */ | 
					
						
							| 
									
										
										
										
											2013-05-22 12:50:44 -04:00
										 |  |  | 	u32			attr_bitmask_nl[3]; | 
					
						
							|  |  |  | 						/* V4 bitmask representing the
 | 
					
						
							|  |  |  | 						   set of attributes supported | 
					
						
							|  |  |  | 						   on this filesystem excluding | 
					
						
							|  |  |  | 						   the label support bit. */ | 
					
						
							| 
									
										
										
										
											2013-05-22 12:50:41 -04:00
										 |  |  | 	u32			cache_consistency_bitmask[3]; | 
					
						
							| 
									
										
										
										
											2009-03-11 14:10:28 -04:00
										 |  |  | 						/* V4 bitmask representing the subset
 | 
					
						
							|  |  |  | 						   of change attribute, size, ctime | 
					
						
							|  |  |  | 						   and mtime attributes supported by | 
					
						
							|  |  |  | 						   the server */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	u32			acl_bitmask;	/* V4 bitmask representing the ACEs
 | 
					
						
							|  |  |  | 						   that are supported on this | 
					
						
							|  |  |  | 						   filesystem */ | 
					
						
							| 
									
										
										
										
											2012-03-01 17:02:05 -05:00
										 |  |  | 	u32			fh_expire_type;	/* V4 bitmask representing file
 | 
					
						
							|  |  |  | 						   handle volatility type for | 
					
						
							|  |  |  | 						   this filesystem */ | 
					
						
							| 
									
										
										
										
											2010-10-20 00:17:58 -04:00
										 |  |  | 	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */ | 
					
						
							| 
									
										
										
										
											2011-01-06 11:36:32 +00:00
										 |  |  | 	struct rpc_wait_queue	roc_rpcwaitq; | 
					
						
							| 
									
										
										
										
											2011-07-30 20:52:46 -04:00
										 |  |  | 	void			*pnfs_ld_data;	/* per mount point data */ | 
					
						
							| 
									
										
										
										
											2010-12-24 01:32:43 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* the following fields are protected by nfs_client->cl_lock */ | 
					
						
							|  |  |  | 	struct rb_root		state_owners; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2012-01-17 22:04:24 -05:00
										 |  |  | 	struct ida		openowner_id; | 
					
						
							| 
									
										
										
										
											2012-01-17 22:04:25 -05:00
										 |  |  | 	struct ida		lockowner_id; | 
					
						
							| 
									
										
											  
											
												NFS: Cache state owners after files are closed
Servers have a finite amount of memory to store NFSv4 open and lock
owners.  Moreover, servers may have a difficult time determining when
they can reap their state owner table, thanks to gray areas in the
NFSv4 protocol specification.  Thus clients should be careful to reuse
state owners when possible.
Currently Linux is not too careful.  When a user has closed all her
files on one mount point, the state owner's reference count goes to
zero, and it is released.  The next OPEN allocates a new one.  A
workload that serially opens and closes files can run through a large
number of open owners this way.
When a state owner's reference count goes to zero, slap it onto a free
list for that nfs_server, with an expiry time.  Garbage collect before
looking for a state owner.  This makes state owners for active users
available for re-use.
Now that there can be unused state owners remaining at umount time,
purge the state owner free list when a server is destroyed.  Also be
sure not to reclaim unused state owners during state recovery.
This change has benefits for the client as well.  For some workloads,
this approach drops the number of OPEN_CONFIRM calls from the same as
the number of OPEN calls, down to just one.  This reduces wire traffic
and thus open(2) latency.  Before this patch, untarring a kernel
source tarball shows the OPEN_CONFIRM call counter steadily increasing
through the test.  With the patch, the OPEN_CONFIRM count remains at 1
throughout the entire untar.
As long as the expiry time is kept short, I don't think garbage
collection should be terribly expensive, although it does bounce the
clp->cl_lock around a bit.
[ At some point we should rationalize the use of the nfs_server
->destroy method. ]
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[Trond: Fixed a garbage collection race and a few efficiency issues]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
											
										 
											2011-12-06 16:13:48 -05:00
										 |  |  | 	struct list_head	state_owners_lru; | 
					
						
							| 
									
										
										
										
											2011-06-01 16:44:44 -04:00
										 |  |  | 	struct list_head	layouts; | 
					
						
							| 
									
										
										
										
											2010-12-24 01:33:04 +00:00
										 |  |  | 	struct list_head	delegations; | 
					
						
							| 
									
										
										
										
											2013-10-17 14:13:02 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	unsigned long		mig_gen; | 
					
						
							|  |  |  | 	unsigned long		mig_status; | 
					
						
							|  |  |  | #define NFS_MIG_IN_TRANSITION		(1)
 | 
					
						
							|  |  |  | #define NFS_MIG_FAILED			(2)
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
 (1) The root and intervening nodes may not be accessible to the client.
     With NFS2 and NFS3, for instance, mountd is called on the server to get
     the filehandle for the tip of a path. mountd won't give us handles for
     anything we don't have permission to access, and so we can't set up NFS
     inodes for such nodes, and so can't easily set up dentries (we'd have to
     have ghost inodes or something).
     With this patch we don't actually create dentries until we get handles
     from the server that we can use to set up their inodes, and we don't
     actually bind them into the tree until we know for sure where they go.
 (2) Inaccessible symbolic links.
     If we're asked to mount two exports from the server, eg:
	mount warthog:/warthog/aaa/xxx /mmm
	mount warthog:/warthog/bbb/yyy /nnn
     We may not be able to access anything nearer the root than xxx and yyy,
     but we may find out later that /mmm/www/yyy, say, is actually the same
     directory as the one mounted on /nnn. What we might then find out, for
     example, is that /warthog/bbb was actually a symbolic link to
     /warthog/aaa/xxx/www, but we can't actually determine that by talking to
     the server until /warthog is made available by NFS.
     This would lead to having constructed an errneous dentry tree which we
     can't easily fix. We can end up with a dentry marked as a directory when
     it should actually be a symlink, or we could end up with an apparently
     hardlinked directory.
     With this patch we need not make assumptions about the type of a dentry
     for which we can't retrieve information, nor need we assume we know its
     place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
 (1) The server record construction/destruction has been abstracted out into
     its own set of functions to make things easier to get right.  These have
     been moved into fs/nfs/client.c.
     All the code in fs/nfs/client.c has to do with the management of
     connections to servers, and doesn't touch superblocks in any way; the
     remaining code in fs/nfs/super.c has to do with VFS superblock management.
 (2) The sequence of events undertaken by NFS mount is now reordered:
     (a) A volume representation (struct nfs_server) is allocated.
     (b) A server representation (struct nfs_client) is acquired.  This may be
     	 allocated or shared, and is keyed on server address, port and NFS
     	 version.
     (c) If allocated, the client representation is initialised.  The state
     	 member variable of nfs_client is used to prevent a race during
     	 initialisation from two mounts.
     (d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
     	 the root filehandle for the mount (fs/nfs/getroot.c).  For NFS2/3 we
     	 are given the root FH in advance.
     (e) The volume FSID is probed for on the root FH.
     (f) The volume representation is initialised from the FSINFO record
     	 retrieved on the root FH.
     (g) sget() is called to acquire a superblock.  This may be allocated or
     	 shared, keyed on client pointer and FSID.
     (h) If allocated, the superblock is initialised.
     (i) If the superblock is shared, then the new nfs_server record is
     	 discarded.
     (j) The root dentry for this mount is looked up from the root FH.
     (k) The root dentry for this mount is assigned to the vfsmount.
 (3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
     returns; this function now attaches disconnected trees from alternate
     roots that happen to be discovered attached to a directory being read (in
     the same way nfs_lookup() is made to do for lookup ops).
     The new d_materialise_unique() function is now used to do this, thus
     permitting the whole thing to be done under one set of locks, and thus
     avoiding any race between mount and lookup operations on the same
     directory.
 (4) The client management code uses a new debug facility: NFSDBG_CLIENT which
     is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
 (5) Clone mounts are now called xdev mounts.
 (6) Use the dentry passed to the statfs() op as the handle for retrieving fs
     statistics rather than the root dentry of the superblock (which is now a
     dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
											
										 
											2006-08-22 20:06:13 -04:00
										 |  |  | 	void (*destroy)(struct nfs_server *); | 
					
						
							| 
									
										
										
										
											2007-11-08 04:05:04 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	atomic_t active; /* Keep trace of any activity to this server */ | 
					
						
							| 
									
										
										
										
											2008-03-14 14:10:30 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* mountd-related mount options */ | 
					
						
							|  |  |  | 	struct sockaddr_storage	mountd_address; | 
					
						
							|  |  |  | 	size_t			mountd_addrlen; | 
					
						
							|  |  |  | 	u32			mountd_version; | 
					
						
							|  |  |  | 	unsigned short		mountd_port; | 
					
						
							|  |  |  | 	unsigned short		mountd_protocol; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Server capabilities */ | 
					
						
							|  |  |  | #define NFS_CAP_READDIRPLUS	(1U << 0)
 | 
					
						
							|  |  |  | #define NFS_CAP_HARDLINKS	(1U << 1)
 | 
					
						
							|  |  |  | #define NFS_CAP_SYMLINKS	(1U << 2)
 | 
					
						
							|  |  |  | #define NFS_CAP_ACLS		(1U << 3)
 | 
					
						
							|  |  |  | #define NFS_CAP_ATOMIC_OPEN	(1U << 4)
 | 
					
						
							| 
									
										
										
										
											2009-08-09 15:06:19 -04:00
										 |  |  | #define NFS_CAP_CHANGE_ATTR	(1U << 5)
 | 
					
						
							|  |  |  | #define NFS_CAP_FILEID		(1U << 6)
 | 
					
						
							|  |  |  | #define NFS_CAP_MODE		(1U << 7)
 | 
					
						
							|  |  |  | #define NFS_CAP_NLINK		(1U << 8)
 | 
					
						
							|  |  |  | #define NFS_CAP_OWNER		(1U << 9)
 | 
					
						
							|  |  |  | #define NFS_CAP_OWNER_GROUP	(1U << 10)
 | 
					
						
							|  |  |  | #define NFS_CAP_ATIME		(1U << 11)
 | 
					
						
							|  |  |  | #define NFS_CAP_CTIME		(1U << 12)
 | 
					
						
							|  |  |  | #define NFS_CAP_MTIME		(1U << 13)
 | 
					
						
							| 
									
										
										
										
											2010-04-11 16:48:44 -04:00
										 |  |  | #define NFS_CAP_POSIX_LOCK	(1U << 14)
 | 
					
						
							| 
									
										
										
										
											2011-02-22 15:44:32 -08:00
										 |  |  | #define NFS_CAP_UIDGID_NOMAP	(1U << 15)
 | 
					
						
							| 
									
										
										
										
											2013-03-17 15:31:15 -04:00
										 |  |  | #define NFS_CAP_STATEID_NFSV41	(1U << 16)
 | 
					
						
							| 
									
										
										
										
											2013-03-15 16:44:28 -04:00
										 |  |  | #define NFS_CAP_ATOMIC_OPEN_V1	(1U << 17)
 | 
					
						
							| 
									
										
										
										
											2013-05-22 12:50:39 -04:00
										 |  |  | #define NFS_CAP_SECURITY_LABEL	(1U << 18)
 | 
					
						
							| 
									
										
										
										
											2014-09-26 13:58:48 -04:00
										 |  |  | #define NFS_CAP_SEEK		(1U << 19)
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | #endif
 |