mirror of
https://github.com/followmsi/android_kernel_google_msm.git
synced 2024-11-06 23:17:41 +00:00
[PATCH] FUSE - device functions
This adds the FUSE device handling functions. This contains the following files: o dev.c - fuse device operations (read, write, release, poll) - registers misc device - support for sending requests to userspace Signed-off-by: Miklos Szeredi <miklos@szeredi.hu> Signed-off-by: Adrian Bunk <bunk@stusta.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
d8a5ba4545
commit
334f485df8
6 changed files with 1537 additions and 7 deletions
341
Documentation/filesystems/fuse.txt
Normal file
341
Documentation/filesystems/fuse.txt
Normal file
|
@ -0,0 +1,341 @@
|
|||
Definitions
|
||||
~~~~~~~~~~~
|
||||
|
||||
Userspace filesystem:
|
||||
|
||||
A filesystem in which data and metadata are provided by an ordinary
|
||||
userspace process. The filesystem can be accessed normally through
|
||||
the kernel interface.
|
||||
|
||||
Filesystem daemon:
|
||||
|
||||
The process(es) providing the data and metadata of the filesystem.
|
||||
|
||||
Non-privileged mount (or user mount):
|
||||
|
||||
A userspace filesystem mounted by a non-privileged (non-root) user.
|
||||
The filesystem daemon is running with the privileges of the mounting
|
||||
user. NOTE: this is not the same as mounts allowed with the "user"
|
||||
option in /etc/fstab, which is not discussed here.
|
||||
|
||||
Mount owner:
|
||||
|
||||
The user who does the mounting.
|
||||
|
||||
User:
|
||||
|
||||
The user who is performing filesystem operations.
|
||||
|
||||
What is FUSE?
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
FUSE is a userspace filesystem framework. It consists of a kernel
|
||||
module (fuse.ko), a userspace library (libfuse.*) and a mount utility
|
||||
(fusermount).
|
||||
|
||||
One of the most important features of FUSE is allowing secure,
|
||||
non-privileged mounts. This opens up new possibilities for the use of
|
||||
filesystems. A good example is sshfs: a secure network filesystem
|
||||
using the sftp protocol.
|
||||
|
||||
The userspace library and utilities are available from the FUSE
|
||||
homepage:
|
||||
|
||||
http://fuse.sourceforge.net/
|
||||
|
||||
Mount options
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
'fd=N'
|
||||
|
||||
The file descriptor to use for communication between the userspace
|
||||
filesystem and the kernel. The file descriptor must have been
|
||||
obtained by opening the FUSE device ('/dev/fuse').
|
||||
|
||||
'rootmode=M'
|
||||
|
||||
The file mode of the filesystem's root in octal representation.
|
||||
|
||||
'user_id=N'
|
||||
|
||||
The numeric user id of the mount owner.
|
||||
|
||||
'group_id=N'
|
||||
|
||||
The numeric group id of the mount owner.
|
||||
|
||||
'default_permissions'
|
||||
|
||||
By default FUSE doesn't check file access permissions, the
|
||||
filesystem is free to implement it's access policy or leave it to
|
||||
the underlying file access mechanism (e.g. in case of network
|
||||
filesystems). This option enables permission checking, restricting
|
||||
access based on file mode. This is option is usually useful
|
||||
together with the 'allow_other' mount option.
|
||||
|
||||
'allow_other'
|
||||
|
||||
This option overrides the security measure restricting file access
|
||||
to the user mounting the filesystem. This option is by default only
|
||||
allowed to root, but this restriction can be removed with a
|
||||
(userspace) configuration option.
|
||||
|
||||
'kernel_cache'
|
||||
|
||||
This option disables flushing the cache of the file contents on
|
||||
every open(). This should only be enabled on filesystems, where the
|
||||
file data is never changed externally (not through the mounted FUSE
|
||||
filesystem). Thus it is not suitable for network filesystems and
|
||||
other "intermediate" filesystems.
|
||||
|
||||
NOTE: if this option is not specified (and neither 'direct_io') data
|
||||
is still cached after the open(), so a read() system call will not
|
||||
always initiate a read operation.
|
||||
|
||||
'direct_io'
|
||||
|
||||
This option disables the use of page cache (file content cache) in
|
||||
the kernel for this filesystem. This has several affects:
|
||||
|
||||
- Each read() or write() system call will initiate one or more
|
||||
read or write operations, data will not be cached in the
|
||||
kernel.
|
||||
|
||||
- The return value of the read() and write() system calls will
|
||||
correspond to the return values of the read and write
|
||||
operations. This is useful for example if the file size is not
|
||||
known in advance (before reading it).
|
||||
|
||||
'max_read=N'
|
||||
|
||||
With this option the maximum size of read operations can be set.
|
||||
The default is infinite. Note that the size of read requests is
|
||||
limited anyway to 32 pages (which is 128kbyte on i386).
|
||||
|
||||
How do non-privileged mounts work?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Since the mount() system call is a privileged operation, a helper
|
||||
program (fusermount) is needed, which is installed setuid root.
|
||||
|
||||
The implication of providing non-privileged mounts is that the mount
|
||||
owner must not be able to use this capability to compromise the
|
||||
system. Obvious requirements arising from this are:
|
||||
|
||||
A) mount owner should not be able to get elevated privileges with the
|
||||
help of the mounted filesystem
|
||||
|
||||
B) mount owner should not get illegitimate access to information from
|
||||
other users' and the super user's processes
|
||||
|
||||
C) mount owner should not be able to induce undesired behavior in
|
||||
other users' or the super user's processes
|
||||
|
||||
How are requirements fulfilled?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
A) The mount owner could gain elevated privileges by either:
|
||||
|
||||
1) creating a filesystem containing a device file, then opening
|
||||
this device
|
||||
|
||||
2) creating a filesystem containing a suid or sgid application,
|
||||
then executing this application
|
||||
|
||||
The solution is not to allow opening device files and ignore
|
||||
setuid and setgid bits when executing programs. To ensure this
|
||||
fusermount always adds "nosuid" and "nodev" to the mount options
|
||||
for non-privileged mounts.
|
||||
|
||||
B) If another user is accessing files or directories in the
|
||||
filesystem, the filesystem daemon serving requests can record the
|
||||
exact sequence and timing of operations performed. This
|
||||
information is otherwise inaccessible to the mount owner, so this
|
||||
counts as an information leak.
|
||||
|
||||
The solution to this problem will be presented in point 2) of C).
|
||||
|
||||
C) There are several ways in which the mount owner can induce
|
||||
undesired behavior in other users' processes, such as:
|
||||
|
||||
1) mounting a filesystem over a file or directory which the mount
|
||||
owner could otherwise not be able to modify (or could only
|
||||
make limited modifications).
|
||||
|
||||
This is solved in fusermount, by checking the access
|
||||
permissions on the mountpoint and only allowing the mount if
|
||||
the mount owner can do unlimited modification (has write
|
||||
access to the mountpoint, and mountpoint is not a "sticky"
|
||||
directory)
|
||||
|
||||
2) Even if 1) is solved the mount owner can change the behavior
|
||||
of other users' processes.
|
||||
|
||||
i) It can slow down or indefinitely delay the execution of a
|
||||
filesystem operation creating a DoS against the user or the
|
||||
whole system. For example a suid application locking a
|
||||
system file, and then accessing a file on the mount owner's
|
||||
filesystem could be stopped, and thus causing the system
|
||||
file to be locked forever.
|
||||
|
||||
ii) It can present files or directories of unlimited length, or
|
||||
directory structures of unlimited depth, possibly causing a
|
||||
system process to eat up diskspace, memory or other
|
||||
resources, again causing DoS.
|
||||
|
||||
The solution to this as well as B) is not to allow processes
|
||||
to access the filesystem, which could otherwise not be
|
||||
monitored or manipulated by the mount owner. Since if the
|
||||
mount owner can ptrace a process, it can do all of the above
|
||||
without using a FUSE mount, the same criteria as used in
|
||||
ptrace can be used to check if a process is allowed to access
|
||||
the filesystem or not.
|
||||
|
||||
Note that the ptrace check is not strictly necessary to
|
||||
prevent B/2/i, it is enough to check if mount owner has enough
|
||||
privilege to send signal to the process accessing the
|
||||
filesystem, since SIGSTOP can be used to get a similar effect.
|
||||
|
||||
I think these limitations are unacceptable?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
If a sysadmin trusts the users enough, or can ensure through other
|
||||
measures, that system processes will never enter non-privileged
|
||||
mounts, it can relax the last limitation with a "user_allow_other"
|
||||
config option. If this config option is set, the mounting user can
|
||||
add the "allow_other" mount option which disables the check for other
|
||||
users' processes.
|
||||
|
||||
Kernel - userspace interface
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The following diagram shows how a filesystem operation (in this
|
||||
example unlink) is performed in FUSE.
|
||||
|
||||
NOTE: everything in this description is greatly simplified
|
||||
|
||||
| "rm /mnt/fuse/file" | FUSE filesystem daemon
|
||||
| |
|
||||
| | >sys_read()
|
||||
| | >fuse_dev_read()
|
||||
| | >request_wait()
|
||||
| | [sleep on fc->waitq]
|
||||
| |
|
||||
| >sys_unlink() |
|
||||
| >fuse_unlink() |
|
||||
| [get request from |
|
||||
| fc->unused_list] |
|
||||
| >request_send() |
|
||||
| [queue req on fc->pending] |
|
||||
| [wake up fc->waitq] | [woken up]
|
||||
| >request_wait_answer() |
|
||||
| [sleep on req->waitq] |
|
||||
| | <request_wait()
|
||||
| | [remove req from fc->pending]
|
||||
| | [copy req to read buffer]
|
||||
| | [add req to fc->processing]
|
||||
| | <fuse_dev_read()
|
||||
| | <sys_read()
|
||||
| |
|
||||
| | [perform unlink]
|
||||
| |
|
||||
| | >sys_write()
|
||||
| | >fuse_dev_write()
|
||||
| | [look up req in fc->processing]
|
||||
| | [remove from fc->processing]
|
||||
| | [copy write buffer to req]
|
||||
| [woken up] | [wake up req->waitq]
|
||||
| | <fuse_dev_write()
|
||||
| | <sys_write()
|
||||
| <request_wait_answer() |
|
||||
| <request_send() |
|
||||
| [add request to |
|
||||
| fc->unused_list] |
|
||||
| <fuse_unlink() |
|
||||
| <sys_unlink() |
|
||||
|
||||
There are a couple of ways in which to deadlock a FUSE filesystem.
|
||||
Since we are talking about unprivileged userspace programs,
|
||||
something must be done about these.
|
||||
|
||||
Scenario 1 - Simple deadlock
|
||||
-----------------------------
|
||||
|
||||
| "rm /mnt/fuse/file" | FUSE filesystem daemon
|
||||
| |
|
||||
| >sys_unlink("/mnt/fuse/file") |
|
||||
| [acquire inode semaphore |
|
||||
| for "file"] |
|
||||
| >fuse_unlink() |
|
||||
| [sleep on req->waitq] |
|
||||
| | <sys_read()
|
||||
| | >sys_unlink("/mnt/fuse/file")
|
||||
| | [acquire inode semaphore
|
||||
| | for "file"]
|
||||
| | *DEADLOCK*
|
||||
|
||||
The solution for this is to allow requests to be interrupted while
|
||||
they are in userspace:
|
||||
|
||||
| [interrupted by signal] |
|
||||
| <fuse_unlink() |
|
||||
| [release semaphore] | [semaphore acquired]
|
||||
| <sys_unlink() |
|
||||
| | >fuse_unlink()
|
||||
| | [queue req on fc->pending]
|
||||
| | [wake up fc->waitq]
|
||||
| | [sleep on req->waitq]
|
||||
|
||||
If the filesystem daemon was single threaded, this will stop here,
|
||||
since there's no other thread to dequeue and execute the request.
|
||||
In this case the solution is to kill the FUSE daemon as well. If
|
||||
there are multiple serving threads, you just have to kill them as
|
||||
long as any remain.
|
||||
|
||||
Moral: a filesystem which deadlocks, can soon find itself dead.
|
||||
|
||||
Scenario 2 - Tricky deadlock
|
||||
----------------------------
|
||||
|
||||
This one needs a carefully crafted filesystem. It's a variation on
|
||||
the above, only the call back to the filesystem is not explicit,
|
||||
but is caused by a pagefault.
|
||||
|
||||
| Kamikaze filesystem thread 1 | Kamikaze filesystem thread 2
|
||||
| |
|
||||
| [fd = open("/mnt/fuse/file")] | [request served normally]
|
||||
| [mmap fd to 'addr'] |
|
||||
| [close fd] | [FLUSH triggers 'magic' flag]
|
||||
| [read a byte from addr] |
|
||||
| >do_page_fault() |
|
||||
| [find or create page] |
|
||||
| [lock page] |
|
||||
| >fuse_readpage() |
|
||||
| [queue READ request] |
|
||||
| [sleep on req->waitq] |
|
||||
| | [read request to buffer]
|
||||
| | [create reply header before addr]
|
||||
| | >sys_write(addr - headerlength)
|
||||
| | >fuse_dev_write()
|
||||
| | [look up req in fc->processing]
|
||||
| | [remove from fc->processing]
|
||||
| | [copy write buffer to req]
|
||||
| | >do_page_fault()
|
||||
| | [find or create page]
|
||||
| | [lock page]
|
||||
| | * DEADLOCK *
|
||||
|
||||
Solution is again to let the the request be interrupted (not
|
||||
elaborated further).
|
||||
|
||||
An additional problem is that while the write buffer is being
|
||||
copied to the request, the request must not be interrupted. This
|
||||
is because the destination address of the copy may not be valid
|
||||
after the request is interrupted.
|
||||
|
||||
This is solved with doing the copy atomically, and allowing
|
||||
interruption while the page(s) belonging to the write buffer are
|
||||
faulted with get_user_pages(). The 'req->locked' flag indicates
|
||||
when the copy is taking place, and interruption is delayed until
|
||||
this flag is unset.
|
||||
|
|
@ -4,4 +4,4 @@
|
|||
|
||||
obj-$(CONFIG_FUSE_FS) += fuse.o
|
||||
|
||||
fuse-objs := inode.o
|
||||
fuse-objs := dev.o inode.o
|
||||
|
|
884
fs/fuse/dev.c
Normal file
884
fs/fuse/dev.c
Normal file
|
@ -0,0 +1,884 @@
|
|||
/*
|
||||
FUSE: Filesystem in Userspace
|
||||
Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
|
||||
|
||||
This program can be distributed under the terms of the GNU GPL.
|
||||
See the file COPYING.
|
||||
*/
|
||||
|
||||
#include "fuse_i.h"
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
|
||||
|
||||
static kmem_cache_t *fuse_req_cachep;
|
||||
|
||||
static inline struct fuse_conn *fuse_get_conn(struct file *file)
|
||||
{
|
||||
struct fuse_conn *fc;
|
||||
spin_lock(&fuse_lock);
|
||||
fc = file->private_data;
|
||||
if (fc && !fc->sb)
|
||||
fc = NULL;
|
||||
spin_unlock(&fuse_lock);
|
||||
return fc;
|
||||
}
|
||||
|
||||
static inline void fuse_request_init(struct fuse_req *req)
|
||||
{
|
||||
memset(req, 0, sizeof(*req));
|
||||
INIT_LIST_HEAD(&req->list);
|
||||
init_waitqueue_head(&req->waitq);
|
||||
atomic_set(&req->count, 1);
|
||||
}
|
||||
|
||||
struct fuse_req *fuse_request_alloc(void)
|
||||
{
|
||||
struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
|
||||
if (req)
|
||||
fuse_request_init(req);
|
||||
return req;
|
||||
}
|
||||
|
||||
void fuse_request_free(struct fuse_req *req)
|
||||
{
|
||||
kmem_cache_free(fuse_req_cachep, req);
|
||||
}
|
||||
|
||||
static inline void block_sigs(sigset_t *oldset)
|
||||
{
|
||||
sigset_t mask;
|
||||
|
||||
siginitsetinv(&mask, sigmask(SIGKILL));
|
||||
sigprocmask(SIG_BLOCK, &mask, oldset);
|
||||
}
|
||||
|
||||
static inline void restore_sigs(sigset_t *oldset)
|
||||
{
|
||||
sigprocmask(SIG_SETMASK, oldset, NULL);
|
||||
}
|
||||
|
||||
void fuse_reset_request(struct fuse_req *req)
|
||||
{
|
||||
int preallocated = req->preallocated;
|
||||
BUG_ON(atomic_read(&req->count) != 1);
|
||||
fuse_request_init(req);
|
||||
req->preallocated = preallocated;
|
||||
}
|
||||
|
||||
static void __fuse_get_request(struct fuse_req *req)
|
||||
{
|
||||
atomic_inc(&req->count);
|
||||
}
|
||||
|
||||
/* Must be called with > 1 refcount */
|
||||
static void __fuse_put_request(struct fuse_req *req)
|
||||
{
|
||||
BUG_ON(atomic_read(&req->count) < 2);
|
||||
atomic_dec(&req->count);
|
||||
}
|
||||
|
||||
static struct fuse_req *do_get_request(struct fuse_conn *fc)
|
||||
{
|
||||
struct fuse_req *req;
|
||||
|
||||
spin_lock(&fuse_lock);
|
||||
BUG_ON(list_empty(&fc->unused_list));
|
||||
req = list_entry(fc->unused_list.next, struct fuse_req, list);
|
||||
list_del_init(&req->list);
|
||||
spin_unlock(&fuse_lock);
|
||||
fuse_request_init(req);
|
||||
req->preallocated = 1;
|
||||
req->in.h.uid = current->fsuid;
|
||||
req->in.h.gid = current->fsgid;
|
||||
req->in.h.pid = current->pid;
|
||||
return req;
|
||||
}
|
||||
|
||||
struct fuse_req *fuse_get_request(struct fuse_conn *fc)
|
||||
{
|
||||
if (down_interruptible(&fc->outstanding_sem))
|
||||
return NULL;
|
||||
return do_get_request(fc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Non-interruptible version of the above function is for operations
|
||||
* which can't legally return -ERESTART{SYS,NOINTR}. This can still
|
||||
* return NULL, but only in case the signal is SIGKILL.
|
||||
*/
|
||||
struct fuse_req *fuse_get_request_nonint(struct fuse_conn *fc)
|
||||
{
|
||||
int intr;
|
||||
sigset_t oldset;
|
||||
|
||||
block_sigs(&oldset);
|
||||
intr = down_interruptible(&fc->outstanding_sem);
|
||||
restore_sigs(&oldset);
|
||||
return intr ? NULL : do_get_request(fc);
|
||||
}
|
||||
|
||||
static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
|
||||
{
|
||||
spin_lock(&fuse_lock);
|
||||
if (req->preallocated)
|
||||
list_add(&req->list, &fc->unused_list);
|
||||
else
|
||||
fuse_request_free(req);
|
||||
|
||||
/* If we are in debt decrease that first */
|
||||
if (fc->outstanding_debt)
|
||||
fc->outstanding_debt--;
|
||||
else
|
||||
up(&fc->outstanding_sem);
|
||||
spin_unlock(&fuse_lock);
|
||||
}
|
||||
|
||||
void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
|
||||
{
|
||||
if (atomic_dec_and_test(&req->count))
|
||||
fuse_putback_request(fc, req);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called when a request is finished. Either a reply
|
||||
* has arrived or it was interrupted (and not yet sent) or some error
|
||||
* occured during communication with userspace, or the device file was
|
||||
* closed. It decreases the referece count for the request. In case
|
||||
* of a background request the referece to the stored objects are
|
||||
* released. The requester thread is woken up (if still waiting), and
|
||||
* finally the request is either freed or put on the unused_list
|
||||
*
|
||||
* Called with fuse_lock, unlocks it
|
||||
*/
|
||||
static void request_end(struct fuse_conn *fc, struct fuse_req *req)
|
||||
{
|
||||
int putback;
|
||||
req->finished = 1;
|
||||
putback = atomic_dec_and_test(&req->count);
|
||||
spin_unlock(&fuse_lock);
|
||||
if (req->background) {
|
||||
if (req->inode)
|
||||
iput(req->inode);
|
||||
if (req->inode2)
|
||||
iput(req->inode2);
|
||||
if (req->file)
|
||||
fput(req->file);
|
||||
}
|
||||
wake_up(&req->waitq);
|
||||
if (req->in.h.opcode == FUSE_INIT) {
|
||||
int i;
|
||||
|
||||
if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
|
||||
fc->conn_error = 1;
|
||||
|
||||
/* After INIT reply is received other requests can go
|
||||
out. So do (FUSE_MAX_OUTSTANDING - 1) number of
|
||||
up()s on outstanding_sem. The last up() is done in
|
||||
fuse_putback_request() */
|
||||
for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
|
||||
up(&fc->outstanding_sem);
|
||||
}
|
||||
if (putback)
|
||||
fuse_putback_request(fc, req);
|
||||
}
|
||||
|
||||
static void background_request(struct fuse_req *req)
|
||||
{
|
||||
/* Need to get hold of the inode(s) and/or file used in the
|
||||
request, so FORGET and RELEASE are not sent too early */
|
||||
req->background = 1;
|
||||
if (req->inode)
|
||||
req->inode = igrab(req->inode);
|
||||
if (req->inode2)
|
||||
req->inode2 = igrab(req->inode2);
|
||||
if (req->file)
|
||||
get_file(req->file);
|
||||
}
|
||||
|
||||
static int request_wait_answer_nonint(struct fuse_req *req)
|
||||
{
|
||||
int err;
|
||||
sigset_t oldset;
|
||||
block_sigs(&oldset);
|
||||
err = wait_event_interruptible(req->waitq, req->finished);
|
||||
restore_sigs(&oldset);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Called with fuse_lock held. Releases, and then reacquires it. */
|
||||
static void request_wait_answer(struct fuse_req *req, int interruptible)
|
||||
{
|
||||
int intr;
|
||||
|
||||
spin_unlock(&fuse_lock);
|
||||
if (interruptible)
|
||||
intr = wait_event_interruptible(req->waitq, req->finished);
|
||||
else
|
||||
intr = request_wait_answer_nonint(req);
|
||||
spin_lock(&fuse_lock);
|
||||
if (intr && interruptible && req->sent) {
|
||||
/* If request is already in userspace, only allow KILL
|
||||
signal to interrupt */
|
||||
spin_unlock(&fuse_lock);
|
||||
intr = request_wait_answer_nonint(req);
|
||||
spin_lock(&fuse_lock);
|
||||
}
|
||||
if (!intr)
|
||||
return;
|
||||
|
||||
if (!interruptible || req->sent)
|
||||
req->out.h.error = -EINTR;
|
||||
else
|
||||
req->out.h.error = -ERESTARTNOINTR;
|
||||
|
||||
req->interrupted = 1;
|
||||
if (req->locked) {
|
||||
/* This is uninterruptible sleep, because data is
|
||||
being copied to/from the buffers of req. During
|
||||
locked state, there mustn't be any filesystem
|
||||
operation (e.g. page fault), since that could lead
|
||||
to deadlock */
|
||||
spin_unlock(&fuse_lock);
|
||||
wait_event(req->waitq, !req->locked);
|
||||
spin_lock(&fuse_lock);
|
||||
}
|
||||
if (!req->sent && !list_empty(&req->list)) {
|
||||
list_del(&req->list);
|
||||
__fuse_put_request(req);
|
||||
} else if (!req->finished && req->sent)
|
||||
background_request(req);
|
||||
}
|
||||
|
||||
static unsigned len_args(unsigned numargs, struct fuse_arg *args)
|
||||
{
|
||||
unsigned nbytes = 0;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < numargs; i++)
|
||||
nbytes += args[i].size;
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
|
||||
{
|
||||
fc->reqctr++;
|
||||
/* zero is special */
|
||||
if (fc->reqctr == 0)
|
||||
fc->reqctr = 1;
|
||||
req->in.h.unique = fc->reqctr;
|
||||
req->in.h.len = sizeof(struct fuse_in_header) +
|
||||
len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
|
||||
if (!req->preallocated) {
|
||||
/* If request is not preallocated (either FORGET or
|
||||
RELEASE), then still decrease outstanding_sem, so
|
||||
user can't open infinite number of files while not
|
||||
processing the RELEASE requests. However for
|
||||
efficiency do it without blocking, so if down()
|
||||
would block, just increase the debt instead */
|
||||
if (down_trylock(&fc->outstanding_sem))
|
||||
fc->outstanding_debt++;
|
||||
}
|
||||
list_add_tail(&req->list, &fc->pending);
|
||||
wake_up(&fc->waitq);
|
||||
}
|
||||
|
||||
static void request_send_wait(struct fuse_conn *fc, struct fuse_req *req,
|
||||
int interruptible)
|
||||
{
|
||||
req->isreply = 1;
|
||||
spin_lock(&fuse_lock);
|
||||
if (!fc->file)
|
||||
req->out.h.error = -ENOTCONN;
|
||||
else if (fc->conn_error)
|
||||
req->out.h.error = -ECONNREFUSED;
|
||||
else {
|
||||
queue_request(fc, req);
|
||||
/* acquire extra reference, since request is still needed
|
||||
after request_end() */
|
||||
__fuse_get_request(req);
|
||||
|
||||
request_wait_answer(req, interruptible);
|
||||
}
|
||||
spin_unlock(&fuse_lock);
|
||||
}
|
||||
|
||||
void request_send(struct fuse_conn *fc, struct fuse_req *req)
|
||||
{
|
||||
request_send_wait(fc, req, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Non-interruptible version of the above function is for operations
|
||||
* which can't legally return -ERESTART{SYS,NOINTR}. This can still
|
||||
* be interrupted but only with SIGKILL.
|
||||
*/
|
||||
void request_send_nonint(struct fuse_conn *fc, struct fuse_req *req)
|
||||
{
|
||||
request_send_wait(fc, req, 0);
|
||||
}
|
||||
|
||||
static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
|
||||
{
|
||||
spin_lock(&fuse_lock);
|
||||
if (fc->file) {
|
||||
queue_request(fc, req);
|
||||
spin_unlock(&fuse_lock);
|
||||
} else {
|
||||
req->out.h.error = -ENOTCONN;
|
||||
request_end(fc, req);
|
||||
}
|
||||
}
|
||||
|
||||
void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
|
||||
{
|
||||
req->isreply = 0;
|
||||
request_send_nowait(fc, req);
|
||||
}
|
||||
|
||||
void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
|
||||
{
|
||||
req->isreply = 1;
|
||||
background_request(req);
|
||||
request_send_nowait(fc, req);
|
||||
}
|
||||
|
||||
void fuse_send_init(struct fuse_conn *fc)
|
||||
{
|
||||
/* This is called from fuse_read_super() so there's guaranteed
|
||||
to be a request available */
|
||||
struct fuse_req *req = do_get_request(fc);
|
||||
struct fuse_init_in_out *arg = &req->misc.init_in_out;
|
||||
arg->major = FUSE_KERNEL_VERSION;
|
||||
arg->minor = FUSE_KERNEL_MINOR_VERSION;
|
||||
req->in.h.opcode = FUSE_INIT;
|
||||
req->in.numargs = 1;
|
||||
req->in.args[0].size = sizeof(*arg);
|
||||
req->in.args[0].value = arg;
|
||||
req->out.numargs = 1;
|
||||
req->out.args[0].size = sizeof(*arg);
|
||||
req->out.args[0].value = arg;
|
||||
request_send_background(fc, req);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock the request. Up to the next unlock_request() there mustn't be
|
||||
* anything that could cause a page-fault. If the request was already
|
||||
* interrupted bail out.
|
||||
*/
|
||||
static inline int lock_request(struct fuse_req *req)
|
||||
{
|
||||
int err = 0;
|
||||
if (req) {
|
||||
spin_lock(&fuse_lock);
|
||||
if (req->interrupted)
|
||||
err = -ENOENT;
|
||||
else
|
||||
req->locked = 1;
|
||||
spin_unlock(&fuse_lock);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlock request. If it was interrupted during being locked, the
|
||||
* requester thread is currently waiting for it to be unlocked, so
|
||||
* wake it up.
|
||||
*/
|
||||
static inline void unlock_request(struct fuse_req *req)
|
||||
{
|
||||
if (req) {
|
||||
spin_lock(&fuse_lock);
|
||||
req->locked = 0;
|
||||
if (req->interrupted)
|
||||
wake_up(&req->waitq);
|
||||
spin_unlock(&fuse_lock);
|
||||
}
|
||||
}
|
||||
|
||||
struct fuse_copy_state {
|
||||
int write;
|
||||
struct fuse_req *req;
|
||||
const struct iovec *iov;
|
||||
unsigned long nr_segs;
|
||||
unsigned long seglen;
|
||||
unsigned long addr;
|
||||
struct page *pg;
|
||||
void *mapaddr;
|
||||
void *buf;
|
||||
unsigned len;
|
||||
};
|
||||
|
||||
static void fuse_copy_init(struct fuse_copy_state *cs, int write,
|
||||
struct fuse_req *req, const struct iovec *iov,
|
||||
unsigned long nr_segs)
|
||||
{
|
||||
memset(cs, 0, sizeof(*cs));
|
||||
cs->write = write;
|
||||
cs->req = req;
|
||||
cs->iov = iov;
|
||||
cs->nr_segs = nr_segs;
|
||||
}
|
||||
|
||||
/* Unmap and put previous page of userspace buffer */
|
||||
static inline void fuse_copy_finish(struct fuse_copy_state *cs)
|
||||
{
|
||||
if (cs->mapaddr) {
|
||||
kunmap_atomic(cs->mapaddr, KM_USER0);
|
||||
if (cs->write) {
|
||||
flush_dcache_page(cs->pg);
|
||||
set_page_dirty_lock(cs->pg);
|
||||
}
|
||||
put_page(cs->pg);
|
||||
cs->mapaddr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get another pagefull of userspace buffer, and map it to kernel
|
||||
* address space, and lock request
|
||||
*/
|
||||
static int fuse_copy_fill(struct fuse_copy_state *cs)
|
||||
{
|
||||
unsigned long offset;
|
||||
int err;
|
||||
|
||||
unlock_request(cs->req);
|
||||
fuse_copy_finish(cs);
|
||||
if (!cs->seglen) {
|
||||
BUG_ON(!cs->nr_segs);
|
||||
cs->seglen = cs->iov[0].iov_len;
|
||||
cs->addr = (unsigned long) cs->iov[0].iov_base;
|
||||
cs->iov ++;
|
||||
cs->nr_segs --;
|
||||
}
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
|
||||
&cs->pg, NULL);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
if (err < 0)
|
||||
return err;
|
||||
BUG_ON(err != 1);
|
||||
offset = cs->addr % PAGE_SIZE;
|
||||
cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
|
||||
cs->buf = cs->mapaddr + offset;
|
||||
cs->len = min(PAGE_SIZE - offset, cs->seglen);
|
||||
cs->seglen -= cs->len;
|
||||
cs->addr += cs->len;
|
||||
|
||||
return lock_request(cs->req);
|
||||
}
|
||||
|
||||
/* Do as much copy to/from userspace buffer as we can */
|
||||
static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
|
||||
unsigned *size)
|
||||
{
|
||||
unsigned ncpy = min(*size, cs->len);
|
||||
if (val) {
|
||||
if (cs->write)
|
||||
memcpy(cs->buf, *val, ncpy);
|
||||
else
|
||||
memcpy(*val, cs->buf, ncpy);
|
||||
*val += ncpy;
|
||||
}
|
||||
*size -= ncpy;
|
||||
cs->len -= ncpy;
|
||||
cs->buf += ncpy;
|
||||
return ncpy;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy a page in the request to/from the userspace buffer. Must be
|
||||
* done atomically
|
||||
*/
|
||||
static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
|
||||
unsigned offset, unsigned count, int zeroing)
|
||||
{
|
||||
if (page && zeroing && count < PAGE_SIZE) {
|
||||
void *mapaddr = kmap_atomic(page, KM_USER1);
|
||||
memset(mapaddr, 0, PAGE_SIZE);
|
||||
kunmap_atomic(mapaddr, KM_USER1);
|
||||
}
|
||||
while (count) {
|
||||
int err;
|
||||
if (!cs->len && (err = fuse_copy_fill(cs)))
|
||||
return err;
|
||||
if (page) {
|
||||
void *mapaddr = kmap_atomic(page, KM_USER1);
|
||||
void *buf = mapaddr + offset;
|
||||
offset += fuse_copy_do(cs, &buf, &count);
|
||||
kunmap_atomic(mapaddr, KM_USER1);
|
||||
} else
|
||||
offset += fuse_copy_do(cs, NULL, &count);
|
||||
}
|
||||
if (page && !cs->write)
|
||||
flush_dcache_page(page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Copy pages in the request to/from userspace buffer */
|
||||
static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
|
||||
int zeroing)
|
||||
{
|
||||
unsigned i;
|
||||
struct fuse_req *req = cs->req;
|
||||
unsigned offset = req->page_offset;
|
||||
unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
|
||||
|
||||
for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
|
||||
struct page *page = req->pages[i];
|
||||
int err = fuse_copy_page(cs, page, offset, count, zeroing);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
nbytes -= count;
|
||||
count = min(nbytes, (unsigned) PAGE_SIZE);
|
||||
offset = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Copy a single argument in the request to/from userspace buffer */
|
||||
static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
|
||||
{
|
||||
while (size) {
|
||||
int err;
|
||||
if (!cs->len && (err = fuse_copy_fill(cs)))
|
||||
return err;
|
||||
fuse_copy_do(cs, &val, &size);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Copy request arguments to/from userspace buffer */
|
||||
static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
|
||||
unsigned argpages, struct fuse_arg *args,
|
||||
int zeroing)
|
||||
{
|
||||
int err = 0;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; !err && i < numargs; i++) {
|
||||
struct fuse_arg *arg = &args[i];
|
||||
if (i == numargs - 1 && argpages)
|
||||
err = fuse_copy_pages(cs, arg->size, zeroing);
|
||||
else
|
||||
err = fuse_copy_one(cs, arg->value, arg->size);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Wait until a request is available on the pending list */
|
||||
static void request_wait(struct fuse_conn *fc)
|
||||
{
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
|
||||
add_wait_queue_exclusive(&fc->waitq, &wait);
|
||||
while (fc->sb && list_empty(&fc->pending)) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (signal_pending(current))
|
||||
break;
|
||||
|
||||
spin_unlock(&fuse_lock);
|
||||
schedule();
|
||||
spin_lock(&fuse_lock);
|
||||
}
|
||||
set_current_state(TASK_RUNNING);
|
||||
remove_wait_queue(&fc->waitq, &wait);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read a single request into the userspace filesystem's buffer. This
|
||||
* function waits until a request is available, then removes it from
|
||||
* the pending list and copies request data to userspace buffer. If
|
||||
* no reply is needed (FORGET) or request has been interrupted or
|
||||
* there was an error during the copying then it's finished by calling
|
||||
* request_end(). Otherwise add it to the processing list, and set
|
||||
* the 'sent' flag.
|
||||
*/
|
||||
static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t *off)
|
||||
{
|
||||
int err;
|
||||
struct fuse_conn *fc;
|
||||
struct fuse_req *req;
|
||||
struct fuse_in *in;
|
||||
struct fuse_copy_state cs;
|
||||
unsigned reqsize;
|
||||
|
||||
spin_lock(&fuse_lock);
|
||||
fc = file->private_data;
|
||||
err = -EPERM;
|
||||
if (!fc)
|
||||
goto err_unlock;
|
||||
request_wait(fc);
|
||||
err = -ENODEV;
|
||||
if (!fc->sb)
|
||||
goto err_unlock;
|
||||
err = -ERESTARTSYS;
|
||||
if (list_empty(&fc->pending))
|
||||
goto err_unlock;
|
||||
|
||||
req = list_entry(fc->pending.next, struct fuse_req, list);
|
||||
list_del_init(&req->list);
|
||||
spin_unlock(&fuse_lock);
|
||||
|
||||
in = &req->in;
|
||||
reqsize = req->in.h.len;
|
||||
fuse_copy_init(&cs, 1, req, iov, nr_segs);
|
||||
err = -EINVAL;
|
||||
if (iov_length(iov, nr_segs) >= reqsize) {
|
||||
err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
|
||||
if (!err)
|
||||
err = fuse_copy_args(&cs, in->numargs, in->argpages,
|
||||
(struct fuse_arg *) in->args, 0);
|
||||
}
|
||||
fuse_copy_finish(&cs);
|
||||
|
||||
spin_lock(&fuse_lock);
|
||||
req->locked = 0;
|
||||
if (!err && req->interrupted)
|
||||
err = -ENOENT;
|
||||
if (err) {
|
||||
if (!req->interrupted)
|
||||
req->out.h.error = -EIO;
|
||||
request_end(fc, req);
|
||||
return err;
|
||||
}
|
||||
if (!req->isreply)
|
||||
request_end(fc, req);
|
||||
else {
|
||||
req->sent = 1;
|
||||
list_add_tail(&req->list, &fc->processing);
|
||||
spin_unlock(&fuse_lock);
|
||||
}
|
||||
return reqsize;
|
||||
|
||||
err_unlock:
|
||||
spin_unlock(&fuse_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
static ssize_t fuse_dev_read(struct file *file, char __user *buf,
|
||||
size_t nbytes, loff_t *off)
|
||||
{
|
||||
struct iovec iov;
|
||||
iov.iov_len = nbytes;
|
||||
iov.iov_base = buf;
|
||||
return fuse_dev_readv(file, &iov, 1, off);
|
||||
}
|
||||
|
||||
/* Look up request on processing list by unique ID */
|
||||
static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
|
||||
{
|
||||
struct list_head *entry;
|
||||
|
||||
list_for_each(entry, &fc->processing) {
|
||||
struct fuse_req *req;
|
||||
req = list_entry(entry, struct fuse_req, list);
|
||||
if (req->in.h.unique == unique)
|
||||
return req;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
|
||||
unsigned nbytes)
|
||||
{
|
||||
unsigned reqsize = sizeof(struct fuse_out_header);
|
||||
|
||||
if (out->h.error)
|
||||
return nbytes != reqsize ? -EINVAL : 0;
|
||||
|
||||
reqsize += len_args(out->numargs, out->args);
|
||||
|
||||
if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
|
||||
return -EINVAL;
|
||||
else if (reqsize > nbytes) {
|
||||
struct fuse_arg *lastarg = &out->args[out->numargs-1];
|
||||
unsigned diffsize = reqsize - nbytes;
|
||||
if (diffsize > lastarg->size)
|
||||
return -EINVAL;
|
||||
lastarg->size -= diffsize;
|
||||
}
|
||||
return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
|
||||
out->page_zeroing);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write a single reply to a request. First the header is copied from
|
||||
* the write buffer. The request is then searched on the processing
|
||||
* list by the unique ID found in the header. If found, then remove
|
||||
* it from the list and copy the rest of the buffer to the request.
|
||||
* The request is finished by calling request_end()
|
||||
*/
|
||||
static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t *off)
|
||||
{
|
||||
int err;
|
||||
unsigned nbytes = iov_length(iov, nr_segs);
|
||||
struct fuse_req *req;
|
||||
struct fuse_out_header oh;
|
||||
struct fuse_copy_state cs;
|
||||
struct fuse_conn *fc = fuse_get_conn(file);
|
||||
if (!fc)
|
||||
return -ENODEV;
|
||||
|
||||
fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
|
||||
if (nbytes < sizeof(struct fuse_out_header))
|
||||
return -EINVAL;
|
||||
|
||||
err = fuse_copy_one(&cs, &oh, sizeof(oh));
|
||||
if (err)
|
||||
goto err_finish;
|
||||
err = -EINVAL;
|
||||
if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
|
||||
oh.len != nbytes)
|
||||
goto err_finish;
|
||||
|
||||
spin_lock(&fuse_lock);
|
||||
req = request_find(fc, oh.unique);
|
||||
err = -EINVAL;
|
||||
if (!req)
|
||||
goto err_unlock;
|
||||
|
||||
list_del_init(&req->list);
|
||||
if (req->interrupted) {
|
||||
request_end(fc, req);
|
||||
fuse_copy_finish(&cs);
|
||||
return -ENOENT;
|
||||
}
|
||||
req->out.h = oh;
|
||||
req->locked = 1;
|
||||
cs.req = req;
|
||||
spin_unlock(&fuse_lock);
|
||||
|
||||
err = copy_out_args(&cs, &req->out, nbytes);
|
||||
fuse_copy_finish(&cs);
|
||||
|
||||
spin_lock(&fuse_lock);
|
||||
req->locked = 0;
|
||||
if (!err) {
|
||||
if (req->interrupted)
|
||||
err = -ENOENT;
|
||||
} else if (!req->interrupted)
|
||||
req->out.h.error = -EIO;
|
||||
request_end(fc, req);
|
||||
|
||||
return err ? err : nbytes;
|
||||
|
||||
err_unlock:
|
||||
spin_unlock(&fuse_lock);
|
||||
err_finish:
|
||||
fuse_copy_finish(&cs);
|
||||
return err;
|
||||
}
|
||||
|
||||
static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
|
||||
size_t nbytes, loff_t *off)
|
||||
{
|
||||
struct iovec iov;
|
||||
iov.iov_len = nbytes;
|
||||
iov.iov_base = (char __user *) buf;
|
||||
return fuse_dev_writev(file, &iov, 1, off);
|
||||
}
|
||||
|
||||
static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
|
||||
{
|
||||
struct fuse_conn *fc = fuse_get_conn(file);
|
||||
unsigned mask = POLLOUT | POLLWRNORM;
|
||||
|
||||
if (!fc)
|
||||
return -ENODEV;
|
||||
|
||||
poll_wait(file, &fc->waitq, wait);
|
||||
|
||||
spin_lock(&fuse_lock);
|
||||
if (!list_empty(&fc->pending))
|
||||
mask |= POLLIN | POLLRDNORM;
|
||||
spin_unlock(&fuse_lock);
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* Abort all requests on the given list (pending or processing) */
|
||||
static void end_requests(struct fuse_conn *fc, struct list_head *head)
|
||||
{
|
||||
while (!list_empty(head)) {
|
||||
struct fuse_req *req;
|
||||
req = list_entry(head->next, struct fuse_req, list);
|
||||
list_del_init(&req->list);
|
||||
req->out.h.error = -ECONNABORTED;
|
||||
request_end(fc, req);
|
||||
spin_lock(&fuse_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static int fuse_dev_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct fuse_conn *fc;
|
||||
|
||||
spin_lock(&fuse_lock);
|
||||
fc = file->private_data;
|
||||
if (fc) {
|
||||
fc->file = NULL;
|
||||
end_requests(fc, &fc->pending);
|
||||
end_requests(fc, &fc->processing);
|
||||
fuse_release_conn(fc);
|
||||
}
|
||||
spin_unlock(&fuse_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct file_operations fuse_dev_operations = {
|
||||
.owner = THIS_MODULE,
|
||||
.llseek = no_llseek,
|
||||
.read = fuse_dev_read,
|
||||
.readv = fuse_dev_readv,
|
||||
.write = fuse_dev_write,
|
||||
.writev = fuse_dev_writev,
|
||||
.poll = fuse_dev_poll,
|
||||
.release = fuse_dev_release,
|
||||
};
|
||||
|
||||
static struct miscdevice fuse_miscdevice = {
|
||||
.minor = FUSE_MINOR,
|
||||
.name = "fuse",
|
||||
.fops = &fuse_dev_operations,
|
||||
};
|
||||
|
||||
int __init fuse_dev_init(void)
|
||||
{
|
||||
int err = -ENOMEM;
|
||||
fuse_req_cachep = kmem_cache_create("fuse_request",
|
||||
sizeof(struct fuse_req),
|
||||
0, 0, NULL, NULL);
|
||||
if (!fuse_req_cachep)
|
||||
goto out;
|
||||
|
||||
err = misc_register(&fuse_miscdevice);
|
||||
if (err)
|
||||
goto out_cache_clean;
|
||||
|
||||
return 0;
|
||||
|
||||
out_cache_clean:
|
||||
kmem_cache_destroy(fuse_req_cachep);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
void fuse_dev_cleanup(void)
|
||||
{
|
||||
misc_deregister(&fuse_miscdevice);
|
||||
kmem_cache_destroy(fuse_req_cachep);
|
||||
}
|
223
fs/fuse/fuse_i.h
223
fs/fuse/fuse_i.h
|
@ -15,6 +15,12 @@
|
|||
#include <linux/backing-dev.h>
|
||||
#include <asm/semaphore.h>
|
||||
|
||||
/** Max number of pages that can be used in a single read request */
|
||||
#define FUSE_MAX_PAGES_PER_REQ 32
|
||||
|
||||
/** If more requests are outstanding, then the operation will block */
|
||||
#define FUSE_MAX_OUTSTANDING 10
|
||||
|
||||
/** FUSE inode */
|
||||
struct fuse_inode {
|
||||
/** Inode data */
|
||||
|
@ -28,6 +34,123 @@ struct fuse_inode {
|
|||
unsigned long i_time;
|
||||
};
|
||||
|
||||
/** One input argument of a request */
|
||||
struct fuse_in_arg {
|
||||
unsigned size;
|
||||
const void *value;
|
||||
};
|
||||
|
||||
/** The request input */
|
||||
struct fuse_in {
|
||||
/** The request header */
|
||||
struct fuse_in_header h;
|
||||
|
||||
/** True if the data for the last argument is in req->pages */
|
||||
unsigned argpages:1;
|
||||
|
||||
/** Number of arguments */
|
||||
unsigned numargs;
|
||||
|
||||
/** Array of arguments */
|
||||
struct fuse_in_arg args[3];
|
||||
};
|
||||
|
||||
/** One output argument of a request */
|
||||
struct fuse_arg {
|
||||
unsigned size;
|
||||
void *value;
|
||||
};
|
||||
|
||||
/** The request output */
|
||||
struct fuse_out {
|
||||
/** Header returned from userspace */
|
||||
struct fuse_out_header h;
|
||||
|
||||
/** Last argument is variable length (can be shorter than
|
||||
arg->size) */
|
||||
unsigned argvar:1;
|
||||
|
||||
/** Last argument is a list of pages to copy data to */
|
||||
unsigned argpages:1;
|
||||
|
||||
/** Zero partially or not copied pages */
|
||||
unsigned page_zeroing:1;
|
||||
|
||||
/** Number or arguments */
|
||||
unsigned numargs;
|
||||
|
||||
/** Array of arguments */
|
||||
struct fuse_arg args[3];
|
||||
};
|
||||
|
||||
struct fuse_req;
|
||||
struct fuse_conn;
|
||||
|
||||
/**
|
||||
* A request to the client
|
||||
*/
|
||||
struct fuse_req {
|
||||
/** This can be on either unused_list, pending or processing
|
||||
lists in fuse_conn */
|
||||
struct list_head list;
|
||||
|
||||
/** refcount */
|
||||
atomic_t count;
|
||||
|
||||
/** True if the request has reply */
|
||||
unsigned isreply:1;
|
||||
|
||||
/** The request is preallocated */
|
||||
unsigned preallocated:1;
|
||||
|
||||
/** The request was interrupted */
|
||||
unsigned interrupted:1;
|
||||
|
||||
/** Request is sent in the background */
|
||||
unsigned background:1;
|
||||
|
||||
/** Data is being copied to/from the request */
|
||||
unsigned locked:1;
|
||||
|
||||
/** Request has been sent to userspace */
|
||||
unsigned sent:1;
|
||||
|
||||
/** The request is finished */
|
||||
unsigned finished:1;
|
||||
|
||||
/** The request input */
|
||||
struct fuse_in in;
|
||||
|
||||
/** The request output */
|
||||
struct fuse_out out;
|
||||
|
||||
/** Used to wake up the task waiting for completion of request*/
|
||||
wait_queue_head_t waitq;
|
||||
|
||||
/** Data for asynchronous requests */
|
||||
union {
|
||||
struct fuse_init_in_out init_in_out;
|
||||
} misc;
|
||||
|
||||
/** page vector */
|
||||
struct page *pages[FUSE_MAX_PAGES_PER_REQ];
|
||||
|
||||
/** number of pages in vector */
|
||||
unsigned num_pages;
|
||||
|
||||
/** offset of data on first page */
|
||||
unsigned page_offset;
|
||||
|
||||
/** Inode used in the request */
|
||||
struct inode *inode;
|
||||
|
||||
/** Second inode used in the request (or NULL) */
|
||||
struct inode *inode2;
|
||||
|
||||
/** File used in the request (or NULL) */
|
||||
struct file *file;
|
||||
};
|
||||
|
||||
/**
|
||||
* A Fuse connection.
|
||||
*
|
||||
|
@ -39,9 +162,37 @@ struct fuse_conn {
|
|||
/** The superblock of the mounted filesystem */
|
||||
struct super_block *sb;
|
||||
|
||||
/** The opened client device */
|
||||
struct file *file;
|
||||
|
||||
/** The user id for this mount */
|
||||
uid_t user_id;
|
||||
|
||||
/** Readers of the connection are waiting on this */
|
||||
wait_queue_head_t waitq;
|
||||
|
||||
/** The list of pending requests */
|
||||
struct list_head pending;
|
||||
|
||||
/** The list of requests being processed */
|
||||
struct list_head processing;
|
||||
|
||||
/** Controls the maximum number of outstanding requests */
|
||||
struct semaphore outstanding_sem;
|
||||
|
||||
/** This counts the number of outstanding requests if
|
||||
outstanding_sem would go negative */
|
||||
unsigned outstanding_debt;
|
||||
|
||||
/** The list of unused requests */
|
||||
struct list_head unused_list;
|
||||
|
||||
/** The next unique request id */
|
||||
u64 reqctr;
|
||||
|
||||
/** Connection failed (version mismatch) */
|
||||
unsigned conn_error : 1;
|
||||
|
||||
/** Backing dev info */
|
||||
struct backing_dev_info bdi;
|
||||
};
|
||||
|
@ -71,13 +222,20 @@ static inline u64 get_node_id(struct inode *inode)
|
|||
return get_fuse_inode(inode)->nodeid;
|
||||
}
|
||||
|
||||
/** Device operations */
|
||||
extern struct file_operations fuse_dev_operations;
|
||||
|
||||
/**
|
||||
* This is the single global spinlock which protects FUSE's structures
|
||||
*
|
||||
* The following data is protected by this lock:
|
||||
*
|
||||
* - the private_data field of the device file
|
||||
* - the s_fs_info field of the super block
|
||||
* - unused_list, pending, processing lists in fuse_conn
|
||||
* - the unique request ID counter reqctr in fuse_conn
|
||||
* - the sb (super_block) field in fuse_conn
|
||||
* - the file (device file) field in fuse_conn
|
||||
*/
|
||||
extern spinlock_t fuse_lock;
|
||||
|
||||
|
@ -87,3 +245,68 @@ extern spinlock_t fuse_lock;
|
|||
*/
|
||||
void fuse_release_conn(struct fuse_conn *fc);
|
||||
|
||||
/**
|
||||
* Initialize the client device
|
||||
*/
|
||||
int fuse_dev_init(void);
|
||||
|
||||
/**
|
||||
* Cleanup the client device
|
||||
*/
|
||||
void fuse_dev_cleanup(void);
|
||||
|
||||
/**
|
||||
* Allocate a request
|
||||
*/
|
||||
struct fuse_req *fuse_request_alloc(void);
|
||||
|
||||
/**
|
||||
* Free a request
|
||||
*/
|
||||
void fuse_request_free(struct fuse_req *req);
|
||||
|
||||
/**
|
||||
* Reinitialize a request, the preallocated flag is left unmodified
|
||||
*/
|
||||
void fuse_reset_request(struct fuse_req *req);
|
||||
|
||||
/**
|
||||
* Reserve a preallocated request
|
||||
*/
|
||||
struct fuse_req *fuse_get_request(struct fuse_conn *fc);
|
||||
|
||||
/**
|
||||
* Reserve a preallocated request, only interruptible by SIGKILL
|
||||
*/
|
||||
struct fuse_req *fuse_get_request_nonint(struct fuse_conn *fc);
|
||||
|
||||
/**
|
||||
* Decrement reference count of a request. If count goes to zero put
|
||||
* on unused list (preallocated) or free reqest (not preallocated).
|
||||
*/
|
||||
void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
|
||||
|
||||
/**
|
||||
* Send a request (synchronous, interruptible)
|
||||
*/
|
||||
void request_send(struct fuse_conn *fc, struct fuse_req *req);
|
||||
|
||||
/**
|
||||
* Send a request (synchronous, non-interruptible except by SIGKILL)
|
||||
*/
|
||||
void request_send_nonint(struct fuse_conn *fc, struct fuse_req *req);
|
||||
|
||||
/**
|
||||
* Send a request with no reply
|
||||
*/
|
||||
void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
|
||||
|
||||
/**
|
||||
* Send a request in the background
|
||||
*/
|
||||
void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
|
||||
|
||||
/**
|
||||
* Send the INIT message
|
||||
*/
|
||||
void fuse_send_init(struct fuse_conn *fc);
|
||||
|
|
|
@ -151,6 +151,8 @@ static void fuse_put_super(struct super_block *sb)
|
|||
mount_count --;
|
||||
fc->sb = NULL;
|
||||
fc->user_id = 0;
|
||||
/* Flush all readers on this fs */
|
||||
wake_up_all(&fc->waitq);
|
||||
fuse_release_conn(fc);
|
||||
*get_fuse_conn_super_p(sb) = NULL;
|
||||
spin_unlock(&fuse_lock);
|
||||
|
@ -229,9 +231,22 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void free_conn(struct fuse_conn *fc)
|
||||
{
|
||||
while (!list_empty(&fc->unused_list)) {
|
||||
struct fuse_req *req;
|
||||
req = list_entry(fc->unused_list.next, struct fuse_req, list);
|
||||
list_del(&req->list);
|
||||
fuse_request_free(req);
|
||||
}
|
||||
kfree(fc);
|
||||
}
|
||||
|
||||
/* Must be called with the fuse lock held */
|
||||
void fuse_release_conn(struct fuse_conn *fc)
|
||||
{
|
||||
kfree(fc);
|
||||
if (!fc->sb && !fc->file)
|
||||
free_conn(fc);
|
||||
}
|
||||
|
||||
static struct fuse_conn *new_conn(void)
|
||||
|
@ -240,11 +255,27 @@ static struct fuse_conn *new_conn(void)
|
|||
|
||||
fc = kmalloc(sizeof(*fc), GFP_KERNEL);
|
||||
if (fc != NULL) {
|
||||
int i;
|
||||
memset(fc, 0, sizeof(*fc));
|
||||
fc->sb = NULL;
|
||||
fc->file = NULL;
|
||||
fc->user_id = 0;
|
||||
init_waitqueue_head(&fc->waitq);
|
||||
INIT_LIST_HEAD(&fc->pending);
|
||||
INIT_LIST_HEAD(&fc->processing);
|
||||
INIT_LIST_HEAD(&fc->unused_list);
|
||||
sema_init(&fc->outstanding_sem, 0);
|
||||
for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
|
||||
struct fuse_req *req = fuse_request_alloc();
|
||||
if (!req) {
|
||||
free_conn(fc);
|
||||
return NULL;
|
||||
}
|
||||
list_add(&req->list, &fc->unused_list);
|
||||
}
|
||||
fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
|
||||
fc->bdi.unplug_io_fn = default_unplug_io_fn;
|
||||
fc->reqctr = 0;
|
||||
}
|
||||
return fc;
|
||||
}
|
||||
|
@ -253,11 +284,20 @@ static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
|
|||
{
|
||||
struct fuse_conn *fc;
|
||||
|
||||
if (file->f_op != &fuse_dev_operations)
|
||||
return ERR_PTR(-EINVAL);
|
||||
fc = new_conn();
|
||||
if (fc == NULL)
|
||||
return NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
spin_lock(&fuse_lock);
|
||||
fc->sb = sb;
|
||||
if (file->private_data) {
|
||||
free_conn(fc);
|
||||
fc = ERR_PTR(-EINVAL);
|
||||
} else {
|
||||
file->private_data = fc;
|
||||
fc->sb = sb;
|
||||
fc->file = file;
|
||||
}
|
||||
spin_unlock(&fuse_lock);
|
||||
return fc;
|
||||
}
|
||||
|
@ -315,8 +355,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
|
|||
|
||||
fc = get_conn(file, sb);
|
||||
fput(file);
|
||||
if (fc == NULL)
|
||||
return -EINVAL;
|
||||
if (IS_ERR(fc))
|
||||
return PTR_ERR(fc);
|
||||
|
||||
fc->user_id = d.user_id;
|
||||
|
||||
|
@ -336,6 +376,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
|
|||
iput(root);
|
||||
goto err;
|
||||
}
|
||||
fuse_send_init(fc);
|
||||
return 0;
|
||||
|
||||
err:
|
||||
|
@ -411,8 +452,14 @@ static int __init fuse_init(void)
|
|||
if (res)
|
||||
goto err;
|
||||
|
||||
res = fuse_dev_init();
|
||||
if (res)
|
||||
goto err_fs_cleanup;
|
||||
|
||||
return 0;
|
||||
|
||||
err_fs_cleanup:
|
||||
fuse_fs_cleanup();
|
||||
err:
|
||||
return res;
|
||||
}
|
||||
|
@ -422,6 +469,7 @@ static void __exit fuse_exit(void)
|
|||
printk(KERN_DEBUG "fuse exit\n");
|
||||
|
||||
fuse_fs_cleanup();
|
||||
fuse_dev_cleanup();
|
||||
}
|
||||
|
||||
module_init(fuse_init);
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
#include <asm/types.h>
|
||||
|
||||
/** Version number of this interface */
|
||||
#define FUSE_KERNEL_VERSION 5
|
||||
#define FUSE_KERNEL_VERSION 6
|
||||
|
||||
/** Minor version number of this interface */
|
||||
#define FUSE_KERNEL_MINOR_VERSION 1
|
||||
|
@ -19,6 +19,12 @@
|
|||
/** The node ID of the root inode */
|
||||
#define FUSE_ROOT_ID 1
|
||||
|
||||
/** The major number of the fuse character device */
|
||||
#define FUSE_MAJOR 10
|
||||
|
||||
/** The minor number of the fuse character device */
|
||||
#define FUSE_MINOR 229
|
||||
|
||||
struct fuse_attr {
|
||||
__u64 ino;
|
||||
__u64 size;
|
||||
|
@ -36,3 +42,31 @@ struct fuse_attr {
|
|||
__u32 rdev;
|
||||
};
|
||||
|
||||
enum fuse_opcode {
|
||||
FUSE_INIT = 26
|
||||
};
|
||||
|
||||
/* Conservative buffer size for the client */
|
||||
#define FUSE_MAX_IN 8192
|
||||
|
||||
struct fuse_init_in_out {
|
||||
__u32 major;
|
||||
__u32 minor;
|
||||
};
|
||||
|
||||
struct fuse_in_header {
|
||||
__u32 len;
|
||||
__u32 opcode;
|
||||
__u64 unique;
|
||||
__u64 nodeid;
|
||||
__u32 uid;
|
||||
__u32 gid;
|
||||
__u32 pid;
|
||||
};
|
||||
|
||||
struct fuse_out_header {
|
||||
__u32 len;
|
||||
__s32 error;
|
||||
__u64 unique;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue