ceph: factor out libceph from Ceph file system

This factors out protocol and low-level storage parts of ceph into a
separate libceph module living in net/ceph and include/linux/ceph.  This
is mostly a matter of moving files around.  However, a few key pieces
of the interface change as well:

 - ceph_client becomes ceph_fs_client and ceph_client, where the latter
   captures the mon and osd clients, and the fs_client gets the mds client
   and file system specific pieces.
 - Mount option parsing and debugfs setup is correspondingly broken into
   two pieces.
 - The mon client gets a generic handler callback for otherwise unknown
   messages (mds map, in this case).
 - The basic supported/required feature bits can be expanded (and are by
   ceph_fs_client).

No functional change, aside from some subtle error handling cases that got
cleaned up in the refactoring process.

Signed-off-by: Sage Weil <sage@newdream.net>
This commit is contained in:
Yehuda Sadeh 2010-04-06 15:14:15 -07:00 committed by Sage Weil
parent ae1533b62b
commit 3d14c5d2b6
73 changed files with 2590 additions and 1862 deletions

View file

@ -1,5 +1,6 @@
#include "ceph_debug.h"
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/file.h>
@ -38,8 +39,8 @@
static struct ceph_mds_request *
prepare_open_request(struct super_block *sb, int flags, int create_mode)
{
struct ceph_client *client = ceph_sb_to_client(sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
int want_auth = USE_ANY_MDS;
int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN;
@ -117,8 +118,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
int ceph_open(struct inode *inode, struct file *file)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_client *client = ceph_sb_to_client(inode->i_sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
struct ceph_file_info *cf = file->private_data;
struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
@ -216,8 +217,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
struct nameidata *nd, int mode,
int locked_dir)
{
struct ceph_client *client = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct file *file = nd->intent.open.file;
struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
struct ceph_mds_request *req;
@ -269,163 +270,6 @@ int ceph_release(struct inode *inode, struct file *file)
return 0;
}
/*
* build a vector of user pages
*/
static struct page **get_direct_page_vector(const char __user *data,
int num_pages,
loff_t off, size_t len)
{
struct page **pages;
int rc;
pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
if (!pages)
return ERR_PTR(-ENOMEM);
down_read(&current->mm->mmap_sem);
rc = get_user_pages(current, current->mm, (unsigned long)data,
num_pages, 0, 0, pages, NULL);
up_read(&current->mm->mmap_sem);
if (rc < 0)
goto fail;
return pages;
fail:
kfree(pages);
return ERR_PTR(rc);
}
static void put_page_vector(struct page **pages, int num_pages)
{
int i;
for (i = 0; i < num_pages; i++)
put_page(pages[i]);
kfree(pages);
}
void ceph_release_page_vector(struct page **pages, int num_pages)
{
int i;
for (i = 0; i < num_pages; i++)
__free_pages(pages[i], 0);
kfree(pages);
}
/*
* allocate a vector new pages
*/
static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
{
struct page **pages;
int i;
pages = kmalloc(sizeof(*pages) * num_pages, flags);
if (!pages)
return ERR_PTR(-ENOMEM);
for (i = 0; i < num_pages; i++) {
pages[i] = __page_cache_alloc(flags);
if (pages[i] == NULL) {
ceph_release_page_vector(pages, i);
return ERR_PTR(-ENOMEM);
}
}
return pages;
}
/*
* copy user data into a page vector
*/
static int copy_user_to_page_vector(struct page **pages,
const char __user *data,
loff_t off, size_t len)
{
int i = 0;
int po = off & ~PAGE_CACHE_MASK;
int left = len;
int l, bad;
while (left > 0) {
l = min_t(int, PAGE_CACHE_SIZE-po, left);
bad = copy_from_user(page_address(pages[i]) + po, data, l);
if (bad == l)
return -EFAULT;
data += l - bad;
left -= l - bad;
po += l - bad;
if (po == PAGE_CACHE_SIZE) {
po = 0;
i++;
}
}
return len;
}
/*
* copy user data from a page vector into a user pointer
*/
static int copy_page_vector_to_user(struct page **pages, char __user *data,
loff_t off, size_t len)
{
int i = 0;
int po = off & ~PAGE_CACHE_MASK;
int left = len;
int l, bad;
while (left > 0) {
l = min_t(int, left, PAGE_CACHE_SIZE-po);
bad = copy_to_user(data, page_address(pages[i]) + po, l);
if (bad == l)
return -EFAULT;
data += l - bad;
left -= l - bad;
if (po) {
po += l - bad;
if (po == PAGE_CACHE_SIZE)
po = 0;
}
i++;
}
return len;
}
/*
* Zero an extent within a page vector. Offset is relative to the
* start of the first page.
*/
static void zero_page_vector_range(int off, int len, struct page **pages)
{
int i = off >> PAGE_CACHE_SHIFT;
off &= ~PAGE_CACHE_MASK;
dout("zero_page_vector_page %u~%u\n", off, len);
/* leading partial page? */
if (off) {
int end = min((int)PAGE_CACHE_SIZE, off + len);
dout("zeroing %d %p head from %d\n", i, pages[i],
(int)off);
zero_user_segment(pages[i], off, end);
len -= (end - off);
i++;
}
while (len >= PAGE_CACHE_SIZE) {
dout("zeroing %d %p len=%d\n", i, pages[i], len);
zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
len -= PAGE_CACHE_SIZE;
i++;
}
/* trailing partial page? */
if (len) {
dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
zero_user_segment(pages[i], 0, len);
}
}
/*
* Read a range of bytes striped over one or more objects. Iterate over
* objects we stripe over. (That's not atomic, but good enough for now.)
@ -438,7 +282,7 @@ static int striped_read(struct inode *inode,
struct page **pages, int num_pages,
int *checkeof)
{
struct ceph_client *client = ceph_inode_to_client(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
u64 pos, this_len;
int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
@ -459,7 +303,7 @@ static int striped_read(struct inode *inode,
more:
this_len = left;
ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode),
ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, pos, &this_len,
ci->i_truncate_seq,
ci->i_truncate_size,
@ -477,8 +321,8 @@ more:
if (read < pos - off) {
dout(" zero gap %llu to %llu\n", off + read, pos);
zero_page_vector_range(page_off + read,
pos - off - read, pages);
ceph_zero_page_vector_range(page_off + read,
pos - off - read, pages);
}
pos += ret;
read = pos - off;
@ -495,8 +339,8 @@ more:
/* was original extent fully inside i_size? */
if (pos + left <= inode->i_size) {
dout("zero tail\n");
zero_page_vector_range(page_off + read, len - read,
pages);
ceph_zero_page_vector_range(page_off + read, len - read,
pages);
read = len;
goto out;
}
@ -531,7 +375,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
(file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
if (file->f_flags & O_DIRECT) {
pages = get_direct_page_vector(data, num_pages, off, len);
pages = ceph_get_direct_page_vector(data, num_pages, off, len);
/*
* flush any page cache pages in this range. this
@ -552,13 +396,13 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
ret = striped_read(inode, off, len, pages, num_pages, checkeof);
if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
ret = copy_page_vector_to_user(pages, data, off, ret);
ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
if (ret >= 0)
*poff = off + ret;
done:
if (file->f_flags & O_DIRECT)
put_page_vector(pages, num_pages);
ceph_put_page_vector(pages, num_pages);
else
ceph_release_page_vector(pages, num_pages);
dout("sync_read result %d\n", ret);
@ -594,7 +438,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
{
struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_client *client = ceph_inode_to_client(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_request *req;
struct page **pages;
int num_pages;
@ -642,7 +486,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
*/
more:
len = left;
req = ceph_osdc_new_request(&client->osdc, &ci->i_layout,
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), pos, &len,
CEPH_OSD_OP_WRITE, flags,
ci->i_snap_realm->cached_context,
@ -655,7 +499,7 @@ more:
num_pages = calc_pages_for(pos, len);
if (file->f_flags & O_DIRECT) {
pages = get_direct_page_vector(data, num_pages, pos, len);
pages = ceph_get_direct_page_vector(data, num_pages, pos, len);
if (IS_ERR(pages)) {
ret = PTR_ERR(pages);
goto out;
@ -673,7 +517,7 @@ more:
ret = PTR_ERR(pages);
goto out;
}
ret = copy_user_to_page_vector(pages, data, pos, len);
ret = ceph_copy_user_to_page_vector(pages, data, pos, len);
if (ret < 0) {
ceph_release_page_vector(pages, num_pages);
goto out;
@ -689,7 +533,7 @@ more:
req->r_num_pages = num_pages;
req->r_inode = inode;
ret = ceph_osdc_start_request(&client->osdc, req, false);
ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!ret) {
if (req->r_safe_callback) {
/*
@ -701,11 +545,11 @@ more:
spin_unlock(&ci->i_unsafe_lock);
ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
}
ret = ceph_osdc_wait_request(&client->osdc, req);
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
}
if (file->f_flags & O_DIRECT)
put_page_vector(pages, num_pages);
ceph_put_page_vector(pages, num_pages);
else if (file->f_flags & O_SYNC)
ceph_release_page_vector(pages, num_pages);
@ -814,7 +658,8 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
struct ceph_file_info *fi = file->private_data;
struct inode *inode = file->f_dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
struct ceph_osd_client *osdc =
&ceph_sb_to_client(inode->i_sb)->client->osdc;
loff_t endoff = pos + iov->iov_len;
int want, got = 0;
int ret, err;