Before when creating a new inode, we'd set the sb->s_dirt flag, and sometime later the system would write out s_nextid as part of the sb_info. Also on inode sync we would force the sb sync as well. Define the s_nextid as a new partition attribute and set it every time we create a new object. At mount we read it from it's new place. We now never set sb->s_dirt anywhere in exofs. write_super is actually never called. The call to exofs_write_super from exofs_put_super is also removed because the VFS always calls ->sync_fs before calling ->put_super twice. To stay backward-and-forward compatible we also write the old s_nextid in the super_block object at unmount, and support zero length attribute on mount. This also fixes a BUG where in layouts when group_width was not a divisor of EXOFS_SUPER_ID (0x10000) the s_nextid was not read from the device it was written to. Because of the sliding window layout trick, and because the read was always done from the 0 device but the write was done via the raid engine that might slide the device view. Now we read and write through the raid engine. Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
		
			
				
	
	
		
			308 lines
		
	
	
	
		
			9.1 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			308 lines
		
	
	
	
		
			9.1 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * Copyright (C) 2005, 2006
 | 
						|
 * Avishay Traeger (avishay@gmail.com)
 | 
						|
 * Copyright (C) 2008, 2009
 | 
						|
 * Boaz Harrosh <bharrosh@panasas.com>
 | 
						|
 *
 | 
						|
 * Copyrights for code taken from ext2:
 | 
						|
 *     Copyright (C) 1992, 1993, 1994, 1995
 | 
						|
 *     Remy Card (card@masi.ibp.fr)
 | 
						|
 *     Laboratoire MASI - Institut Blaise Pascal
 | 
						|
 *     Universite Pierre et Marie Curie (Paris VI)
 | 
						|
 *     from
 | 
						|
 *     linux/fs/minix/inode.c
 | 
						|
 *     Copyright (C) 1991, 1992  Linus Torvalds
 | 
						|
 *
 | 
						|
 * This file is part of exofs.
 | 
						|
 *
 | 
						|
 * exofs is free software; you can redistribute it and/or modify
 | 
						|
 * it under the terms of the GNU General Public License as published by
 | 
						|
 * the Free Software Foundation.  Since it is based on ext2, and the only
 | 
						|
 * valid version of GPL for the Linux kernel is version 2, the only valid
 | 
						|
 * version of GPL for exofs is version 2.
 | 
						|
 *
 | 
						|
 * exofs is distributed in the hope that it will be useful,
 | 
						|
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
 * GNU General Public License for more details.
 | 
						|
 *
 | 
						|
 * You should have received a copy of the GNU General Public License
 | 
						|
 * along with exofs; if not, write to the Free Software
 | 
						|
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 | 
						|
 */
 | 
						|
#ifndef __EXOFS_H__
 | 
						|
#define __EXOFS_H__
 | 
						|
 | 
						|
#include <linux/fs.h>
 | 
						|
#include <linux/time.h>
 | 
						|
#include <linux/backing-dev.h>
 | 
						|
#include "common.h"
 | 
						|
 | 
						|
/* FIXME: Remove once pnfs hits mainline
 | 
						|
 * #include <linux/exportfs/pnfs_osd_xdr.h>
 | 
						|
 */
 | 
						|
#include "pnfs.h"
 | 
						|
 | 
						|
#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
 | 
						|
 | 
						|
#ifdef CONFIG_EXOFS_DEBUG
 | 
						|
#define EXOFS_DBGMSG(fmt, a...) \
 | 
						|
	printk(KERN_NOTICE "exofs @%s:%d: " fmt, __func__, __LINE__, ##a)
 | 
						|
#else
 | 
						|
#define EXOFS_DBGMSG(fmt, a...) \
 | 
						|
	do { if (0) printk(fmt, ##a); } while (0)
 | 
						|
#endif
 | 
						|
 | 
						|
/* u64 has problems with printk this will cast it to unsigned long long */
 | 
						|
#define _LLU(x) (unsigned long long)(x)
 | 
						|
 | 
						|
struct exofs_layout {
 | 
						|
	osd_id		s_pid;			/* partition ID of file system*/
 | 
						|
 | 
						|
	/* Our way of looking at the data_map */
 | 
						|
	unsigned stripe_unit;
 | 
						|
	unsigned mirrors_p1;
 | 
						|
 | 
						|
	unsigned group_width;
 | 
						|
	u64	 group_depth;
 | 
						|
	unsigned group_count;
 | 
						|
 | 
						|
	enum exofs_inode_layout_gen_functions lay_func;
 | 
						|
 | 
						|
	unsigned	s_numdevs;		/* Num of devices in array    */
 | 
						|
	struct osd_dev	*s_ods[0];		/* Variable length            */
 | 
						|
};
 | 
						|
 | 
						|
/*
 | 
						|
 * our extension to the in-memory superblock
 | 
						|
 */
 | 
						|
struct exofs_sb_info {
 | 
						|
	struct exofs_sb_stats s_ess;		/* Written often, pre-allocate*/
 | 
						|
	int		s_timeout;		/* timeout for OSD operations */
 | 
						|
	uint64_t	s_nextid;		/* highest object ID used     */
 | 
						|
	uint32_t	s_numfiles;		/* number of files on fs      */
 | 
						|
	spinlock_t	s_next_gen_lock;	/* spinlock for gen # update  */
 | 
						|
	u32		s_next_generation;	/* next gen # to use          */
 | 
						|
	atomic_t	s_curr_pending;		/* number of pending commands */
 | 
						|
	uint8_t		s_cred[OSD_CAP_LEN];	/* credential for the fscb    */
 | 
						|
	struct 		backing_dev_info bdi;	/* register our bdi with VFS  */
 | 
						|
 | 
						|
	struct pnfs_osd_data_map data_map;	/* Default raid to use
 | 
						|
						 * FIXME: Needed ?
 | 
						|
						 */
 | 
						|
/*	struct exofs_layout	dir_layout;*/	/* Default dir layout */
 | 
						|
	struct exofs_layout	layout;		/* Default files layout,
 | 
						|
						 * contains the variable osd_dev
 | 
						|
						 * array. Keep last */
 | 
						|
	struct osd_dev	*_min_one_dev[1];	/* Place holder for one dev   */
 | 
						|
};
 | 
						|
 | 
						|
/*
 | 
						|
 * our extension to the in-memory inode
 | 
						|
 */
 | 
						|
struct exofs_i_info {
 | 
						|
	struct inode   vfs_inode;          /* normal in-memory inode          */
 | 
						|
	wait_queue_head_t i_wq;            /* wait queue for inode            */
 | 
						|
	unsigned long  i_flags;            /* various atomic flags            */
 | 
						|
	uint32_t       i_data[EXOFS_IDATA];/*short symlink names and device #s*/
 | 
						|
	uint32_t       i_dir_start_lookup; /* which page to start lookup      */
 | 
						|
	uint64_t       i_commit_size;      /* the object's written length     */
 | 
						|
	uint8_t        i_cred[OSD_CAP_LEN];/* all-powerful credential         */
 | 
						|
};
 | 
						|
 | 
						|
static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
 | 
						|
{
 | 
						|
	return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
 | 
						|
}
 | 
						|
 | 
						|
struct exofs_io_state;
 | 
						|
typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private);
 | 
						|
 | 
						|
struct exofs_io_state {
 | 
						|
	struct kref		kref;
 | 
						|
 | 
						|
	void			*private;
 | 
						|
	exofs_io_done_fn	done;
 | 
						|
 | 
						|
	struct exofs_layout	*layout;
 | 
						|
	struct osd_obj_id	obj;
 | 
						|
	u8			*cred;
 | 
						|
 | 
						|
	/* Global read/write IO*/
 | 
						|
	loff_t			offset;
 | 
						|
	unsigned long		length;
 | 
						|
	void			*kern_buff;
 | 
						|
 | 
						|
	struct page		**pages;
 | 
						|
	unsigned		nr_pages;
 | 
						|
	unsigned		pgbase;
 | 
						|
	unsigned		pages_consumed;
 | 
						|
 | 
						|
	/* Attributes */
 | 
						|
	unsigned		in_attr_len;
 | 
						|
	struct osd_attr 	*in_attr;
 | 
						|
	unsigned		out_attr_len;
 | 
						|
	struct osd_attr 	*out_attr;
 | 
						|
 | 
						|
	/* Variable array of size numdevs */
 | 
						|
	unsigned numdevs;
 | 
						|
	struct exofs_per_dev_state {
 | 
						|
		struct osd_request *or;
 | 
						|
		struct bio *bio;
 | 
						|
		loff_t offset;
 | 
						|
		unsigned length;
 | 
						|
		unsigned dev;
 | 
						|
	} per_dev[];
 | 
						|
};
 | 
						|
 | 
						|
static inline unsigned exofs_io_state_size(unsigned numdevs)
 | 
						|
{
 | 
						|
	return sizeof(struct exofs_io_state) +
 | 
						|
		sizeof(struct exofs_per_dev_state) * numdevs;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * our inode flags
 | 
						|
 */
 | 
						|
#define OBJ_2BCREATED	0	/* object will be created soon*/
 | 
						|
#define OBJ_CREATED	1	/* object has been created on the osd*/
 | 
						|
 | 
						|
static inline int obj_2bcreated(struct exofs_i_info *oi)
 | 
						|
{
 | 
						|
	return test_bit(OBJ_2BCREATED, &oi->i_flags);
 | 
						|
}
 | 
						|
 | 
						|
static inline void set_obj_2bcreated(struct exofs_i_info *oi)
 | 
						|
{
 | 
						|
	set_bit(OBJ_2BCREATED, &oi->i_flags);
 | 
						|
}
 | 
						|
 | 
						|
static inline int obj_created(struct exofs_i_info *oi)
 | 
						|
{
 | 
						|
	return test_bit(OBJ_CREATED, &oi->i_flags);
 | 
						|
}
 | 
						|
 | 
						|
static inline void set_obj_created(struct exofs_i_info *oi)
 | 
						|
{
 | 
						|
	set_bit(OBJ_CREATED, &oi->i_flags);
 | 
						|
}
 | 
						|
 | 
						|
int __exofs_wait_obj_created(struct exofs_i_info *oi);
 | 
						|
static inline int wait_obj_created(struct exofs_i_info *oi)
 | 
						|
{
 | 
						|
	if (likely(obj_created(oi)))
 | 
						|
		return 0;
 | 
						|
 | 
						|
	return __exofs_wait_obj_created(oi);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * get to our inode from the vfs inode
 | 
						|
 */
 | 
						|
static inline struct exofs_i_info *exofs_i(struct inode *inode)
 | 
						|
{
 | 
						|
	return container_of(inode, struct exofs_i_info, vfs_inode);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Given a layout, object_number and stripe_index return the associated global
 | 
						|
 * dev_index
 | 
						|
 */
 | 
						|
unsigned exofs_layout_od_id(struct exofs_layout *layout,
 | 
						|
			    osd_id obj_no, unsigned layout_index);
 | 
						|
/*
 | 
						|
 * Maximum count of links to a file
 | 
						|
 */
 | 
						|
#define EXOFS_LINK_MAX           32000
 | 
						|
 | 
						|
/*************************
 | 
						|
 * function declarations *
 | 
						|
 *************************/
 | 
						|
 | 
						|
/* ios.c */
 | 
						|
void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
 | 
						|
			   const struct osd_obj_id *obj);
 | 
						|
int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
 | 
						|
		    u64 offset, void *p, unsigned length);
 | 
						|
 | 
						|
int  exofs_get_io_state(struct exofs_layout *layout,
 | 
						|
			struct exofs_io_state **ios);
 | 
						|
void exofs_put_io_state(struct exofs_io_state *ios);
 | 
						|
 | 
						|
int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
 | 
						|
 | 
						|
int exofs_sbi_create(struct exofs_io_state *ios);
 | 
						|
int exofs_sbi_remove(struct exofs_io_state *ios);
 | 
						|
int exofs_sbi_write(struct exofs_io_state *ios);
 | 
						|
int exofs_sbi_read(struct exofs_io_state *ios);
 | 
						|
 | 
						|
int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr);
 | 
						|
 | 
						|
int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
 | 
						|
static inline int exofs_oi_write(struct exofs_i_info *oi,
 | 
						|
				 struct exofs_io_state *ios)
 | 
						|
{
 | 
						|
	ios->obj.id = exofs_oi_objno(oi);
 | 
						|
	ios->cred = oi->i_cred;
 | 
						|
	return exofs_sbi_write(ios);
 | 
						|
}
 | 
						|
 | 
						|
static inline int exofs_oi_read(struct exofs_i_info *oi,
 | 
						|
				struct exofs_io_state *ios)
 | 
						|
{
 | 
						|
	ios->obj.id = exofs_oi_objno(oi);
 | 
						|
	ios->cred = oi->i_cred;
 | 
						|
	return exofs_sbi_read(ios);
 | 
						|
}
 | 
						|
 | 
						|
/* inode.c               */
 | 
						|
unsigned exofs_max_io_pages(struct exofs_layout *layout,
 | 
						|
			    unsigned expected_pages);
 | 
						|
int exofs_setattr(struct dentry *, struct iattr *);
 | 
						|
int exofs_write_begin(struct file *file, struct address_space *mapping,
 | 
						|
		loff_t pos, unsigned len, unsigned flags,
 | 
						|
		struct page **pagep, void **fsdata);
 | 
						|
extern struct inode *exofs_iget(struct super_block *, unsigned long);
 | 
						|
struct inode *exofs_new_inode(struct inode *, int);
 | 
						|
extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
 | 
						|
extern void exofs_evict_inode(struct inode *);
 | 
						|
 | 
						|
/* dir.c:                */
 | 
						|
int exofs_add_link(struct dentry *, struct inode *);
 | 
						|
ino_t exofs_inode_by_name(struct inode *, struct dentry *);
 | 
						|
int exofs_delete_entry(struct exofs_dir_entry *, struct page *);
 | 
						|
int exofs_make_empty(struct inode *, struct inode *);
 | 
						|
struct exofs_dir_entry *exofs_find_entry(struct inode *, struct dentry *,
 | 
						|
					 struct page **);
 | 
						|
int exofs_empty_dir(struct inode *);
 | 
						|
struct exofs_dir_entry *exofs_dotdot(struct inode *, struct page **);
 | 
						|
ino_t exofs_parent_ino(struct dentry *child);
 | 
						|
int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
 | 
						|
		    struct inode *);
 | 
						|
 | 
						|
/* super.c               */
 | 
						|
int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
 | 
						|
 | 
						|
/*********************
 | 
						|
 * operation vectors *
 | 
						|
 *********************/
 | 
						|
/* dir.c:            */
 | 
						|
extern const struct file_operations exofs_dir_operations;
 | 
						|
 | 
						|
/* file.c            */
 | 
						|
extern const struct inode_operations exofs_file_inode_operations;
 | 
						|
extern const struct file_operations exofs_file_operations;
 | 
						|
 | 
						|
/* inode.c           */
 | 
						|
extern const struct address_space_operations exofs_aops;
 | 
						|
extern const struct osd_attr g_attr_logical_length;
 | 
						|
 | 
						|
/* namei.c           */
 | 
						|
extern const struct inode_operations exofs_dir_inode_operations;
 | 
						|
extern const struct inode_operations exofs_special_inode_operations;
 | 
						|
 | 
						|
/* symlink.c         */
 | 
						|
extern const struct inode_operations exofs_symlink_inode_operations;
 | 
						|
extern const struct inode_operations exofs_fast_symlink_inode_operations;
 | 
						|
 | 
						|
#endif
 |