nfsd4: allow large readdirs
Currently we limit readdir results to a single page. This can result in a performance regression compared to NFSv3 when reading large directories. Signed-off-by: J. Bruce Fields <bfields@redhat.com>
This commit is contained in:
		
					parent
					
						
							
								32aaa62ede
							
						
					
				
			
			
				commit
				
					
						561f0ed498
					
				
			
		
					 3 changed files with 82 additions and 69 deletions
				
			
		|  | @ -1500,13 +1500,14 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||||||
| 
 | 
 | ||||||
| static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||||||
| { | { | ||||||
|  | 	u32 maxcount = svc_max_payload(rqstp); | ||||||
| 	u32 rlen = op->u.readdir.rd_maxcount; | 	u32 rlen = op->u.readdir.rd_maxcount; | ||||||
| 
 | 
 | ||||||
| 	if (rlen > PAGE_SIZE) | 	if (rlen > maxcount) | ||||||
| 		rlen = PAGE_SIZE; | 		rlen = maxcount; | ||||||
| 
 | 
 | ||||||
| 	return (op_encode_hdr_size + op_encode_verifier_maxsz) | 	return (op_encode_hdr_size + op_encode_verifier_maxsz + | ||||||
| 		 * sizeof(__be32) + rlen; | 		XDR_QUADLEN(rlen)) * sizeof(__be32); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | ||||||
|  |  | ||||||
|  | @ -2575,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static __be32 | static __be32 | ||||||
| nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, | nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd, | ||||||
| 		const char *name, int namlen, __be32 **p, int buflen) | 			const char *name, int namlen) | ||||||
| { | { | ||||||
| 	struct svc_export *exp = cd->rd_fhp->fh_export; | 	struct svc_export *exp = cd->rd_fhp->fh_export; | ||||||
| 	struct dentry *dentry; | 	struct dentry *dentry; | ||||||
|  | @ -2628,8 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, | ||||||
| 
 | 
 | ||||||
| 	} | 	} | ||||||
| out_encode: | out_encode: | ||||||
| 	nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry, | 	nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval, | ||||||
| 					cd->rd_bmval, |  | ||||||
| 					cd->rd_rqstp, ignore_crossmnt); | 					cd->rd_rqstp, ignore_crossmnt); | ||||||
| out_put: | out_put: | ||||||
| 	dput(dentry); | 	dput(dentry); | ||||||
|  | @ -2638,9 +2637,12 @@ out_put: | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static __be32 * | static __be32 * | ||||||
| nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr) | nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr) | ||||||
| { | { | ||||||
| 	if (buflen < 6) | 	__be32 *p; | ||||||
|  | 
 | ||||||
|  | 	p = xdr_reserve_space(xdr, 6); | ||||||
|  | 	if (!p) | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	*p++ = htonl(2); | 	*p++ = htonl(2); | ||||||
| 	*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */ | 	*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */ | ||||||
|  | @ -2657,10 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, | ||||||
| { | { | ||||||
| 	struct readdir_cd *ccd = ccdv; | 	struct readdir_cd *ccd = ccdv; | ||||||
| 	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); | 	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); | ||||||
| 	int buflen; | 	struct xdr_stream *xdr = cd->xdr; | ||||||
| 	__be32 *p = cd->buffer; | 	int start_offset = xdr->buf->len; | ||||||
| 	__be32 *cookiep; | 	int cookie_offset; | ||||||
|  | 	int entry_bytes; | ||||||
| 	__be32 nfserr = nfserr_toosmall; | 	__be32 nfserr = nfserr_toosmall; | ||||||
|  | 	__be64 wire_offset; | ||||||
|  | 	__be32 *p; | ||||||
| 
 | 
 | ||||||
| 	/* In nfsv4, "." and ".." never make it onto the wire.. */ | 	/* In nfsv4, "." and ".." never make it onto the wire.. */ | ||||||
| 	if (name && isdotent(name, namlen)) { | 	if (name && isdotent(name, namlen)) { | ||||||
|  | @ -2668,19 +2673,24 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, | ||||||
| 		return 0; | 		return 0; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (cd->offset) | 	if (cd->cookie_offset) { | ||||||
| 		xdr_encode_hyper(cd->offset, (u64) offset); | 		wire_offset = cpu_to_be64(offset); | ||||||
|  | 		write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset, | ||||||
|  | 							&wire_offset, 8); | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	buflen = cd->buflen - 4 - XDR_QUADLEN(namlen); | 	p = xdr_reserve_space(xdr, 4); | ||||||
| 	if (buflen < 0) | 	if (!p) | ||||||
| 		goto fail; | 		goto fail; | ||||||
| 
 |  | ||||||
| 	*p++ = xdr_one;                             /* mark entry present */ | 	*p++ = xdr_one;                             /* mark entry present */ | ||||||
| 	cookiep = p; | 	cookie_offset = xdr->buf->len; | ||||||
|  | 	p = xdr_reserve_space(xdr, 3*4 + namlen); | ||||||
|  | 	if (!p) | ||||||
|  | 		goto fail; | ||||||
| 	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */ | 	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */ | ||||||
| 	p = xdr_encode_array(p, name, namlen);      /* name length & name */ | 	p = xdr_encode_array(p, name, namlen);      /* name length & name */ | ||||||
| 
 | 
 | ||||||
| 	nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen); | 	nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen); | ||||||
| 	switch (nfserr) { | 	switch (nfserr) { | ||||||
| 	case nfs_ok: | 	case nfs_ok: | ||||||
| 		break; | 		break; | ||||||
|  | @ -2699,19 +2709,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, | ||||||
| 		 */ | 		 */ | ||||||
| 		if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)) | 		if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)) | ||||||
| 			goto fail; | 			goto fail; | ||||||
| 		p = nfsd4_encode_rdattr_error(p, buflen, nfserr); | 		p = nfsd4_encode_rdattr_error(xdr, nfserr); | ||||||
| 		if (p == NULL) { | 		if (p == NULL) { | ||||||
| 			nfserr = nfserr_toosmall; | 			nfserr = nfserr_toosmall; | ||||||
| 			goto fail; | 			goto fail; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	cd->buflen -= (p - cd->buffer); | 	nfserr = nfserr_toosmall; | ||||||
| 	cd->buffer = p; | 	entry_bytes = xdr->buf->len - start_offset; | ||||||
| 	cd->offset = cookiep; | 	if (entry_bytes > cd->rd_maxcount) | ||||||
|  | 		goto fail; | ||||||
|  | 	cd->rd_maxcount -= entry_bytes; | ||||||
|  | 	cd->cookie_offset = cookie_offset; | ||||||
| skip_entry: | skip_entry: | ||||||
| 	cd->common.err = nfs_ok; | 	cd->common.err = nfs_ok; | ||||||
| 	return 0; | 	return 0; | ||||||
| fail: | fail: | ||||||
|  | 	xdr_truncate_encode(xdr, start_offset); | ||||||
| 	cd->common.err = nfserr; | 	cd->common.err = nfserr; | ||||||
| 	return -EINVAL; | 	return -EINVAL; | ||||||
| } | } | ||||||
|  | @ -3206,10 +3220,11 @@ static __be32 | ||||||
| nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) | nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) | ||||||
| { | { | ||||||
| 	int maxcount; | 	int maxcount; | ||||||
|  | 	int bytes_left; | ||||||
| 	loff_t offset; | 	loff_t offset; | ||||||
|  | 	__be64 wire_offset; | ||||||
| 	struct xdr_stream *xdr = &resp->xdr; | 	struct xdr_stream *xdr = &resp->xdr; | ||||||
| 	int starting_len = xdr->buf->len; | 	int starting_len = xdr->buf->len; | ||||||
| 	__be32 *page, *tailbase; |  | ||||||
| 	__be32 *p; | 	__be32 *p; | ||||||
| 
 | 
 | ||||||
| 	if (nfserr) | 	if (nfserr) | ||||||
|  | @ -3219,38 +3234,38 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 | ||||||
| 	if (!p) | 	if (!p) | ||||||
| 		return nfserr_resource; | 		return nfserr_resource; | ||||||
| 
 | 
 | ||||||
| 	if (resp->xdr.buf->page_len) |  | ||||||
| 		return nfserr_resource; |  | ||||||
| 	if (!*resp->rqstp->rq_next_page) |  | ||||||
| 		return nfserr_resource; |  | ||||||
| 
 |  | ||||||
| 	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ | 	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ | ||||||
| 	WRITE32(0); | 	WRITE32(0); | ||||||
| 	WRITE32(0); | 	WRITE32(0); | ||||||
| 	resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) | 	resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) | ||||||
| 				- (char *)resp->xdr.buf->head[0].iov_base; | 				- (char *)resp->xdr.buf->head[0].iov_base; | ||||||
| 	tailbase = p; |  | ||||||
| 
 |  | ||||||
| 	maxcount = PAGE_SIZE; |  | ||||||
| 	if (maxcount > readdir->rd_maxcount) |  | ||||||
| 		maxcount = readdir->rd_maxcount; |  | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Convert from bytes to words, account for the two words already | 	 * Number of bytes left for directory entries allowing for the | ||||||
| 	 * written, make sure to leave two words at the end for the next | 	 * final 8 bytes of the readdir and a following failed op: | ||||||
| 	 * pointer and eof field. |  | ||||||
| 	 */ | 	 */ | ||||||
| 	maxcount = (maxcount >> 2) - 4; | 	bytes_left = xdr->buf->buflen - xdr->buf->len | ||||||
| 	if (maxcount < 0) { | 			- COMPOUND_ERR_SLACK_SPACE - 8; | ||||||
|  | 	if (bytes_left < 0) { | ||||||
|  | 		nfserr = nfserr_resource; | ||||||
|  | 		goto err_no_verf; | ||||||
|  | 	} | ||||||
|  | 	maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX); | ||||||
|  | 	/*
 | ||||||
|  | 	 * Note the rfc defines rd_maxcount as the size of the | ||||||
|  | 	 * READDIR4resok structure, which includes the verifier above | ||||||
|  | 	 * and the 8 bytes encoded at the end of this function: | ||||||
|  | 	 */ | ||||||
|  | 	if (maxcount < 16) { | ||||||
| 		nfserr = nfserr_toosmall; | 		nfserr = nfserr_toosmall; | ||||||
| 		goto err_no_verf; | 		goto err_no_verf; | ||||||
| 	} | 	} | ||||||
|  | 	maxcount = min_t(int, maxcount-16, bytes_left); | ||||||
| 
 | 
 | ||||||
| 	page = page_address(*(resp->rqstp->rq_next_page++)); | 	readdir->xdr = xdr; | ||||||
|  | 	readdir->rd_maxcount = maxcount; | ||||||
| 	readdir->common.err = 0; | 	readdir->common.err = 0; | ||||||
| 	readdir->buflen = maxcount; | 	readdir->cookie_offset = 0; | ||||||
| 	readdir->buffer = page; |  | ||||||
| 	readdir->offset = NULL; |  | ||||||
| 
 | 
 | ||||||
| 	offset = readdir->rd_cookie; | 	offset = readdir->rd_cookie; | ||||||
| 	nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, | 	nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, | ||||||
|  | @ -3258,33 +3273,31 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 | ||||||
| 			      &readdir->common, nfsd4_encode_dirent); | 			      &readdir->common, nfsd4_encode_dirent); | ||||||
| 	if (nfserr == nfs_ok && | 	if (nfserr == nfs_ok && | ||||||
| 	    readdir->common.err == nfserr_toosmall && | 	    readdir->common.err == nfserr_toosmall && | ||||||
| 	    readdir->buffer == page)  | 	    xdr->buf->len == starting_len + 8) { | ||||||
|  | 		/* nothing encoded; which limit did we hit?: */ | ||||||
|  | 		if (maxcount - 16 < bytes_left) | ||||||
|  | 			/* It was the fault of rd_maxcount: */ | ||||||
| 			nfserr = nfserr_toosmall; | 			nfserr = nfserr_toosmall; | ||||||
|  | 		else | ||||||
|  | 			/* We ran out of buffer space: */ | ||||||
|  | 			nfserr = nfserr_resource; | ||||||
|  | 	} | ||||||
| 	if (nfserr) | 	if (nfserr) | ||||||
| 		goto err_no_verf; | 		goto err_no_verf; | ||||||
| 
 | 
 | ||||||
| 	if (readdir->offset) | 	if (readdir->cookie_offset) { | ||||||
| 		xdr_encode_hyper(readdir->offset, offset); | 		wire_offset = cpu_to_be64(offset); | ||||||
|  | 		write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset, | ||||||
|  | 							&wire_offset, 8); | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	p = readdir->buffer; | 	p = xdr_reserve_space(xdr, 8); | ||||||
|  | 	if (!p) { | ||||||
|  | 		WARN_ON_ONCE(1); | ||||||
|  | 		goto err_no_verf; | ||||||
|  | 	} | ||||||
| 	*p++ = 0;	/* no more entries */ | 	*p++ = 0;	/* no more entries */ | ||||||
| 	*p++ = htonl(readdir->common.err == nfserr_eof); | 	*p++ = htonl(readdir->common.err == nfserr_eof); | ||||||
| 	resp->xdr.buf->page_len = ((char *)p) - |  | ||||||
| 		(char*)page_address(*(resp->rqstp->rq_next_page-1)); |  | ||||||
| 	xdr->buf->len += xdr->buf->page_len; |  | ||||||
| 
 |  | ||||||
| 	xdr->iov = xdr->buf->tail; |  | ||||||
| 
 |  | ||||||
| 	xdr->page_ptr++; |  | ||||||
| 	xdr->buf->buflen -= PAGE_SIZE; |  | ||||||
| 	xdr->iov = xdr->buf->tail; |  | ||||||
| 
 |  | ||||||
| 	/* Use rest of head for padding and remaining ops: */ |  | ||||||
| 	resp->xdr.buf->tail[0].iov_base = tailbase; |  | ||||||
| 	resp->xdr.buf->tail[0].iov_len = 0; |  | ||||||
| 	resp->xdr.p = resp->xdr.buf->tail[0].iov_base; |  | ||||||
| 	resp->xdr.end = resp->xdr.p + |  | ||||||
| 			(PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4; |  | ||||||
| 
 | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
| err_no_verf: | err_no_verf: | ||||||
|  |  | ||||||
|  | @ -287,9 +287,8 @@ struct nfsd4_readdir { | ||||||
| 	struct svc_fh * rd_fhp;             /* response */ | 	struct svc_fh * rd_fhp;             /* response */ | ||||||
| 
 | 
 | ||||||
| 	struct readdir_cd	common; | 	struct readdir_cd	common; | ||||||
| 	__be32 *		buffer; | 	struct xdr_stream	*xdr; | ||||||
| 	int			buflen; | 	int			cookie_offset; | ||||||
| 	__be32 *		offset; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct nfsd4_release_lockowner { | struct nfsd4_release_lockowner { | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 J. Bruce Fields
				J. Bruce Fields