From 3d6392cfbd7dc11f23058e3493683afab4ac13a3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Jul 2007 12:38:05 +0200 Subject: [PATCH 01/25] bsg: support for full generic block layer SG v3 Signed-off-by: Jens Axboe --- block/Kconfig | 7 + block/Makefile | 1 + block/bsg.c | 997 +++++++++++++++++++++++++++++++++++++++ block/ll_rw_blk.c | 8 + block/scsi_ioctl.c | 163 ++++--- drivers/ide/ide-floppy.c | 29 +- drivers/ide/ide.c | 10 +- include/linux/blkdev.h | 12 + include/linux/bsg.h | 21 + include/linux/genhd.h | 2 + 10 files changed, 1171 insertions(+), 79 deletions(-) create mode 100644 block/bsg.c create mode 100644 include/linux/bsg.h diff --git a/block/Kconfig b/block/Kconfig index 285935134bcd..da12f2649cce 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -51,4 +51,11 @@ config LSF endif # BLOCK +config BLK_DEV_BSG + bool "Block layer SG support" + default y + ---help--- + Saying Y here will enable generic SG (SCSI generic) v3 + support for any block device. + source block/Kconfig.iosched diff --git a/block/Makefile b/block/Makefile index 4b84d0d5947b..959feeb253be 100644 --- a/block/Makefile +++ b/block/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_BLOCK) := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o +obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_AS) += as-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o diff --git a/block/bsg.c b/block/bsg.c new file mode 100644 index 000000000000..724b69391cdc --- /dev/null +++ b/block/bsg.c @@ -0,0 +1,997 @@ +/* + * bsg.c - block layer implementation of the sg v3 interface + * + * Copyright (C) 2004 Jens Axboe SUSE Labs + * Copyright (C) 2004 Peter M. Jones + * + * This file is subject to the terms and conditions of the GNU General Public + * License version 2. See the file "COPYING" in the main directory of this + * archive for more details. + * + */ +/* + * TODO + * - Should this get merged, block/scsi_ioctl.c will be migrated into + * this file. To keep maintenance down, it's easier to have them + * seperated right now. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static char bsg_version[] = "block layer sg (bsg) 0.4"; + +struct bsg_command; + +struct bsg_device { + struct gendisk *disk; + request_queue_t *queue; + spinlock_t lock; + struct list_head busy_list; + struct list_head done_list; + struct hlist_node dev_list; + atomic_t ref_count; + int minor; + int queued_cmds; + int done_cmds; + unsigned long *cmd_bitmap; + struct bsg_command *cmd_map; + wait_queue_head_t wq_done; + wait_queue_head_t wq_free; + char name[BDEVNAME_SIZE]; + int max_queue; + unsigned long flags; +}; + +enum { + BSG_F_BLOCK = 1, + BSG_F_WRITE_PERM = 2, +}; + +/* + * command allocation bitmap defines + */ +#define BSG_CMDS_PAGE_ORDER (1) +#define BSG_CMDS_PER_LONG (sizeof(unsigned long) * 8) +#define BSG_CMDS_MASK (BSG_CMDS_PER_LONG - 1) +#define BSG_CMDS_BYTES (PAGE_SIZE * (1 << BSG_CMDS_PAGE_ORDER)) +#define BSG_CMDS (BSG_CMDS_BYTES / sizeof(struct bsg_command)) + +#undef BSG_DEBUG + +#ifdef BSG_DEBUG +#define dprintk(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ##args) +#else +#define dprintk(fmt, args...) +#endif + +#define list_entry_bc(entry) list_entry((entry), struct bsg_command, list) + +/* + * just for testing + */ +#define BSG_MAJOR (240) + +static DEFINE_MUTEX(bsg_mutex); +static int bsg_device_nr; + +#define BSG_LIST_SIZE (8) +#define bsg_list_idx(minor) ((minor) & (BSG_LIST_SIZE - 1)) +static struct hlist_head bsg_device_list[BSG_LIST_SIZE]; + +static struct class *bsg_class; +static LIST_HEAD(bsg_class_list); + +/* + * our internal command type + */ +struct bsg_command { + struct bsg_device *bd; + struct list_head list; + struct request *rq; + struct bio *bio; + int err; + struct sg_io_hdr hdr; + struct sg_io_hdr __user *uhdr; + char sense[SCSI_SENSE_BUFFERSIZE]; +}; + +static void bsg_free_command(struct bsg_command *bc) +{ + struct bsg_device *bd = bc->bd; + unsigned long bitnr = bc - bd->cmd_map; + unsigned long flags; + + dprintk("%s: command bit offset %lu\n", bd->name, bitnr); + + spin_lock_irqsave(&bd->lock, flags); + bd->queued_cmds--; + __clear_bit(bitnr, bd->cmd_bitmap); + spin_unlock_irqrestore(&bd->lock, flags); + + wake_up(&bd->wq_free); +} + +static struct bsg_command *__bsg_alloc_command(struct bsg_device *bd) +{ + struct bsg_command *bc = NULL; + unsigned long *map; + int free_nr; + + spin_lock_irq(&bd->lock); + + if (bd->queued_cmds >= bd->max_queue) + goto out; + + for (free_nr = 0, map = bd->cmd_bitmap; *map == ~0UL; map++) + free_nr += BSG_CMDS_PER_LONG; + + BUG_ON(*map == ~0UL); + + bd->queued_cmds++; + free_nr += ffz(*map); + __set_bit(free_nr, bd->cmd_bitmap); + spin_unlock_irq(&bd->lock); + + bc = bd->cmd_map + free_nr; + memset(bc, 0, sizeof(*bc)); + bc->bd = bd; + INIT_LIST_HEAD(&bc->list); + dprintk("%s: returning free cmd %p (bit %d)\n", bd->name, bc, free_nr); + return bc; +out: + dprintk("%s: failed (depth %d)\n", bd->name, bd->queued_cmds); + spin_unlock_irq(&bd->lock); + return bc; +} + +static inline void +bsg_del_done_cmd(struct bsg_device *bd, struct bsg_command *bc) +{ + bd->done_cmds--; + list_del(&bc->list); +} + +static inline void +bsg_add_done_cmd(struct bsg_device *bd, struct bsg_command *bc) +{ + bd->done_cmds++; + list_add_tail(&bc->list, &bd->done_list); + wake_up(&bd->wq_done); +} + +static inline int bsg_io_schedule(struct bsg_device *bd, int state) +{ + DEFINE_WAIT(wait); + int ret = 0; + + spin_lock_irq(&bd->lock); + + BUG_ON(bd->done_cmds > bd->queued_cmds); + + /* + * -ENOSPC or -ENODATA? I'm going for -ENODATA, meaning "I have no + * work to do", even though we return -ENOSPC after this same test + * during bsg_write() -- there, it means our buffer can't have more + * bsg_commands added to it, thus has no space left. + */ + if (bd->done_cmds == bd->queued_cmds) { + ret = -ENODATA; + goto unlock; + } + + if (!test_bit(BSG_F_BLOCK, &bd->flags)) { + ret = -EAGAIN; + goto unlock; + } + + prepare_to_wait(&bd->wq_done, &wait, state); + spin_unlock_irq(&bd->lock); + io_schedule(); + finish_wait(&bd->wq_done, &wait); + + if ((state == TASK_INTERRUPTIBLE) && signal_pending(current)) + ret = -ERESTARTSYS; + + return ret; +unlock: + spin_unlock_irq(&bd->lock); + return ret; +} + +/* + * get a new free command, blocking if needed and specified + */ +static struct bsg_command *bsg_get_command(struct bsg_device *bd) +{ + struct bsg_command *bc; + int ret; + + do { + bc = __bsg_alloc_command(bd); + if (bc) + break; + + ret = bsg_io_schedule(bd, TASK_INTERRUPTIBLE); + if (ret) { + bc = ERR_PTR(ret); + break; + } + + } while (1); + + return bc; +} + +/* + * Check if sg_io_hdr from user is allowed and valid + */ +static int +bsg_validate_sghdr(request_queue_t *q, struct sg_io_hdr *hdr, int *rw) +{ + if (hdr->interface_id != 'S') + return -EINVAL; + if (hdr->cmd_len > BLK_MAX_CDB) + return -EINVAL; + if (hdr->dxfer_len > (q->max_sectors << 9)) + return -EIO; + + /* + * looks sane, if no data then it should be fine from our POV + */ + if (!hdr->dxfer_len) + return 0; + + switch (hdr->dxfer_direction) { + case SG_DXFER_TO_FROM_DEV: + case SG_DXFER_FROM_DEV: + *rw = READ; + break; + case SG_DXFER_TO_DEV: + *rw = WRITE; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* + * map sg_io_hdr to a request. for scatter-gather sg_io_hdr, we map + * each segment to a bio and string multiple bio's to the request + */ +static struct request * +bsg_map_hdr(struct bsg_device *bd, int rw, struct sg_io_hdr *hdr) +{ + request_queue_t *q = bd->queue; + struct sg_iovec iov; + struct sg_iovec __user *u_iov; + struct request *rq; + int ret, i = 0; + + dprintk("map hdr %p/%d/%d\n", hdr->dxferp, hdr->dxfer_len, + hdr->iovec_count); + + ret = bsg_validate_sghdr(q, hdr, &rw); + if (ret) + return ERR_PTR(ret); + + /* + * map scatter-gather elements seperately and string them to request + */ + rq = blk_get_request(q, rw, GFP_KERNEL); + ret = blk_fill_sghdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM, + &bd->flags)); + if (ret) { + blk_put_request(rq); + return ERR_PTR(ret); + } + + if (!hdr->iovec_count) { + ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); + if (ret) + goto out; + } + + u_iov = hdr->dxferp; + for (ret = 0, i = 0; i < hdr->iovec_count; i++, u_iov++) { + if (copy_from_user(&iov, u_iov, sizeof(iov))) { + ret = -EFAULT; + break; + } + + if (!iov.iov_len || !iov.iov_base) { + ret = -EINVAL; + break; + } + + ret = blk_rq_map_user(q, rq, iov.iov_base, iov.iov_len); + if (ret) + break; + } + + /* + * bugger, cleanup + */ + if (ret) { +out: + dprintk("failed map at %d: %d\n", i, ret); + blk_unmap_sghdr_rq(rq, hdr); + rq = ERR_PTR(ret); + } + + return rq; +} + +/* + * async completion call-back from the block layer, when scsi/ide/whatever + * calls end_that_request_last() on a request + */ +static void bsg_rq_end_io(struct request *rq, int uptodate) +{ + struct bsg_command *bc = rq->end_io_data; + struct bsg_device *bd = bc->bd; + unsigned long flags; + + dprintk("%s: finished rq %p bio %p, bc %p offset %ld stat %d\n", + bd->name, rq, bc, bc->bio, bc - bd->cmd_map, uptodate); + + bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration); + + spin_lock_irqsave(&bd->lock, flags); + list_del(&bc->list); + bsg_add_done_cmd(bd, bc); + spin_unlock_irqrestore(&bd->lock, flags); +} + +/* + * do final setup of a 'bc' and submit the matching 'rq' to the block + * layer for io + */ +static void bsg_add_command(struct bsg_device *bd, request_queue_t *q, + struct bsg_command *bc, struct request *rq) +{ + rq->sense = bc->sense; + rq->sense_len = 0; + + /* + * add bc command to busy queue and submit rq for io + */ + bc->rq = rq; + bc->bio = rq->bio; + bc->hdr.duration = jiffies; + spin_lock_irq(&bd->lock); + list_add_tail(&bc->list, &bd->busy_list); + spin_unlock_irq(&bd->lock); + + dprintk("%s: queueing rq %p, bc %p\n", bd->name, rq, bc); + + rq->end_io_data = bc; + blk_execute_rq_nowait(q, bd->disk, rq, 1, bsg_rq_end_io); +} + +static inline struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd) +{ + struct bsg_command *bc = NULL; + + spin_lock_irq(&bd->lock); + if (bd->done_cmds) { + bc = list_entry_bc(bd->done_list.next); + bsg_del_done_cmd(bd, bc); + } + spin_unlock_irq(&bd->lock); + + return bc; +} + +/* + * Get a finished command from the done list + */ +static struct bsg_command *__bsg_get_done_cmd(struct bsg_device *bd, int state) +{ + struct bsg_command *bc; + int ret; + + do { + bc = bsg_next_done_cmd(bd); + if (bc) + break; + + ret = bsg_io_schedule(bd, state); + if (ret) { + bc = ERR_PTR(ret); + break; + } + } while (1); + + dprintk("%s: returning done %p\n", bd->name, bc); + + return bc; +} + +static struct bsg_command * +bsg_get_done_cmd(struct bsg_device *bd, const struct iovec *iov) +{ + return __bsg_get_done_cmd(bd, TASK_INTERRUPTIBLE); +} + +static struct bsg_command * +bsg_get_done_cmd_nosignals(struct bsg_device *bd) +{ + return __bsg_get_done_cmd(bd, TASK_UNINTERRUPTIBLE); +} + +static int bsg_complete_all_commands(struct bsg_device *bd) +{ + struct bsg_command *bc; + int ret, tret; + + dprintk("%s: entered\n", bd->name); + + set_bit(BSG_F_BLOCK, &bd->flags); + + /* + * wait for all commands to complete + */ + ret = 0; + do { + ret = bsg_io_schedule(bd, TASK_UNINTERRUPTIBLE); + /* + * look for -ENODATA specifically -- we'll sometimes get + * -ERESTARTSYS when we've taken a signal, but we can't + * return until we're done freeing the queue, so ignore + * it. The signal will get handled when we're done freeing + * the bsg_device. + */ + } while (ret != -ENODATA); + + /* + * discard done commands + */ + ret = 0; + do { + bc = bsg_get_done_cmd_nosignals(bd); + + /* + * we _must_ complete before restarting, because + * bsg_release can't handle this failing. + */ + if (PTR_ERR(bc) == -ERESTARTSYS) + continue; + if (IS_ERR(bc)) { + ret = PTR_ERR(bc); + break; + } + + tret = blk_complete_sghdr_rq(bc->rq, &bc->hdr, bc->bio); + if (!ret) + ret = tret; + + bsg_free_command(bc); + } while (1); + + return ret; +} + +typedef struct bsg_command *(*bsg_command_callback)(struct bsg_device *bd, const struct iovec *iov); + +static ssize_t +__bsg_read(char __user *buf, size_t count, bsg_command_callback get_bc, + struct bsg_device *bd, const struct iovec *iov, ssize_t *bytes_read) +{ + struct bsg_command *bc; + int nr_commands, ret; + + if (count % sizeof(struct sg_io_hdr)) + return -EINVAL; + + ret = 0; + nr_commands = count / sizeof(struct sg_io_hdr); + while (nr_commands) { + bc = get_bc(bd, iov); + if (IS_ERR(bc)) { + ret = PTR_ERR(bc); + break; + } + + /* + * this is the only case where we need to copy data back + * after completing the request. so do that here, + * bsg_complete_work() cannot do that for us + */ + ret = blk_complete_sghdr_rq(bc->rq, &bc->hdr, bc->bio); + + if (copy_to_user(buf, (char *) &bc->hdr, sizeof(bc->hdr))) + ret = -EFAULT; + + bsg_free_command(bc); + + if (ret) + break; + + buf += sizeof(struct sg_io_hdr); + *bytes_read += sizeof(struct sg_io_hdr); + nr_commands--; + } + + return ret; +} + +static inline void bsg_set_block(struct bsg_device *bd, struct file *file) +{ + if (file->f_flags & O_NONBLOCK) + clear_bit(BSG_F_BLOCK, &bd->flags); + else + set_bit(BSG_F_BLOCK, &bd->flags); +} + +static inline void bsg_set_write_perm(struct bsg_device *bd, struct file *file) +{ + if (file->f_mode & FMODE_WRITE) + set_bit(BSG_F_WRITE_PERM, &bd->flags); + else + clear_bit(BSG_F_WRITE_PERM, &bd->flags); +} + +static inline int err_block_err(int ret) +{ + if (ret && ret != -ENOSPC && ret != -ENODATA && ret != -EAGAIN) + return 1; + + return 0; +} + +static ssize_t +bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + struct bsg_device *bd = file->private_data; + int ret; + ssize_t bytes_read; + + dprintk("%s: read %lu bytes\n", bd->name, count); + + bsg_set_block(bd, file); + bytes_read = 0; + ret = __bsg_read(buf, count, bsg_get_done_cmd, + bd, NULL, &bytes_read); + *ppos = bytes_read; + + if (!bytes_read || (bytes_read && err_block_err(ret))) + bytes_read = ret; + + return bytes_read; +} + +static ssize_t __bsg_write(struct bsg_device *bd, const char __user *buf, + size_t count, ssize_t *bytes_read) +{ + struct bsg_command *bc; + struct request *rq; + int ret, nr_commands; + + if (count % sizeof(struct sg_io_hdr)) + return -EINVAL; + + nr_commands = count / sizeof(struct sg_io_hdr); + rq = NULL; + bc = NULL; + ret = 0; + while (nr_commands) { + request_queue_t *q = bd->queue; + int rw = READ; + + bc = bsg_get_command(bd); + if (!bc) + break; + if (IS_ERR(bc)) { + ret = PTR_ERR(bc); + bc = NULL; + break; + } + + bc->uhdr = (struct sg_io_hdr __user *) buf; + if (copy_from_user(&bc->hdr, buf, sizeof(bc->hdr))) { + ret = -EFAULT; + break; + } + + /* + * get a request, fill in the blanks, and add to request queue + */ + rq = bsg_map_hdr(bd, rw, &bc->hdr); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + rq = NULL; + break; + } + + bsg_add_command(bd, q, bc, rq); + bc = NULL; + rq = NULL; + nr_commands--; + buf += sizeof(struct sg_io_hdr); + *bytes_read += sizeof(struct sg_io_hdr); + } + + if (rq) + blk_unmap_sghdr_rq(rq, &bc->hdr); + if (bc) + bsg_free_command(bc); + + return ret; +} + +static ssize_t +bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) +{ + struct bsg_device *bd = file->private_data; + ssize_t bytes_read; + int ret; + + dprintk("%s: write %lu bytes\n", bd->name, count); + + bsg_set_block(bd, file); + bsg_set_write_perm(bd, file); + + bytes_read = 0; + ret = __bsg_write(bd, buf, count, &bytes_read); + *ppos = bytes_read; + + /* + * return bytes written on non-fatal errors + */ + if (!bytes_read || (bytes_read && err_block_err(ret))) + bytes_read = ret; + + dprintk("%s: returning %lu\n", bd->name, bytes_read); + return bytes_read; +} + +static void bsg_free_device(struct bsg_device *bd) +{ + if (bd->cmd_map) + free_pages((unsigned long) bd->cmd_map, BSG_CMDS_PAGE_ORDER); + + kfree(bd->cmd_bitmap); + kfree(bd); +} + +static struct bsg_device *bsg_alloc_device(void) +{ + struct bsg_command *cmd_map; + unsigned long *cmd_bitmap; + struct bsg_device *bd; + int bits; + + bd = kzalloc(sizeof(struct bsg_device), GFP_KERNEL); + if (unlikely(!bd)) + return NULL; + + spin_lock_init(&bd->lock); + + bd->max_queue = BSG_CMDS; + + bits = (BSG_CMDS / BSG_CMDS_PER_LONG) + 1; + cmd_bitmap = kzalloc(bits * sizeof(unsigned long), GFP_KERNEL); + if (!cmd_bitmap) + goto out_free_bd; + bd->cmd_bitmap = cmd_bitmap; + + cmd_map = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, + BSG_CMDS_PAGE_ORDER); + if (!cmd_map) + goto out_free_bitmap; + bd->cmd_map = cmd_map; + + INIT_LIST_HEAD(&bd->busy_list); + INIT_LIST_HEAD(&bd->done_list); + INIT_HLIST_NODE(&bd->dev_list); + + init_waitqueue_head(&bd->wq_free); + init_waitqueue_head(&bd->wq_done); + return bd; + +out_free_bitmap: + kfree(cmd_bitmap); +out_free_bd: + kfree(bd); + return NULL; +} + +static int bsg_put_device(struct bsg_device *bd) +{ + int ret = 0; + + mutex_lock(&bsg_mutex); + + if (!atomic_dec_and_test(&bd->ref_count)) + goto out; + + dprintk("%s: tearing down\n", bd->name); + + /* + * close can always block + */ + set_bit(BSG_F_BLOCK, &bd->flags); + + /* + * correct error detection baddies here again. it's the responsibility + * of the app to properly reap commands before close() if it wants + * fool-proof error detection + */ + ret = bsg_complete_all_commands(bd); + + blk_put_queue(bd->queue); + hlist_del(&bd->dev_list); + bsg_free_device(bd); +out: + mutex_unlock(&bsg_mutex); + return ret; +} + +static struct bsg_device *bsg_add_device(struct inode *inode, + struct gendisk *disk, + struct file *file) +{ + struct bsg_device *bd = NULL; +#ifdef BSG_DEBUG + unsigned char buf[32]; +#endif + + bd = bsg_alloc_device(); + if (!bd) + return ERR_PTR(-ENOMEM); + + bd->disk = disk; + bd->queue = disk->queue; + kobject_get(&disk->queue->kobj); + bsg_set_block(bd, file); + + atomic_set(&bd->ref_count, 1); + bd->minor = iminor(inode); + mutex_lock(&bsg_mutex); + hlist_add_head(&bd->dev_list,&bsg_device_list[bsg_list_idx(bd->minor)]); + + strncpy(bd->name, disk->disk_name, sizeof(bd->name) - 1); + dprintk("bound to <%s>, max queue %d\n", + format_dev_t(buf, i->i_rdev), bd->max_queue); + + mutex_unlock(&bsg_mutex); + return bd; +} + +static struct bsg_device *__bsg_get_device(int minor) +{ + struct hlist_head *list = &bsg_device_list[bsg_list_idx(minor)]; + struct bsg_device *bd = NULL; + struct hlist_node *entry; + + mutex_lock(&bsg_mutex); + + hlist_for_each(entry, list) { + bd = hlist_entry(entry, struct bsg_device, dev_list); + if (bd->minor == minor) { + atomic_inc(&bd->ref_count); + break; + } + + bd = NULL; + } + + mutex_unlock(&bsg_mutex); + return bd; +} + +static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file) +{ + struct bsg_device *bd = __bsg_get_device(iminor(inode)); + struct bsg_class_device *bcd, *__bcd; + + if (bd) + return bd; + + /* + * find the class device + */ + bcd = NULL; + mutex_lock(&bsg_mutex); + list_for_each_entry(__bcd, &bsg_class_list, list) { + if (__bcd->minor == iminor(inode)) { + bcd = __bcd; + break; + } + } + mutex_unlock(&bsg_mutex); + + if (!bcd) + return ERR_PTR(-ENODEV); + + return bsg_add_device(inode, bcd->disk, file); +} + +static int bsg_open(struct inode *inode, struct file *file) +{ + struct bsg_device *bd = bsg_get_device(inode, file); + + if (IS_ERR(bd)) + return PTR_ERR(bd); + + file->private_data = bd; + return 0; +} + +static int bsg_release(struct inode *inode, struct file *file) +{ + struct bsg_device *bd = file->private_data; + + file->private_data = NULL; + return bsg_put_device(bd); +} + +static unsigned int bsg_poll(struct file *file, poll_table *wait) +{ + struct bsg_device *bd = file->private_data; + unsigned int mask = 0; + + poll_wait(file, &bd->wq_done, wait); + poll_wait(file, &bd->wq_free, wait); + + spin_lock_irq(&bd->lock); + if (!list_empty(&bd->done_list)) + mask |= POLLIN | POLLRDNORM; + if (bd->queued_cmds >= bd->max_queue) + mask |= POLLOUT; + spin_unlock_irq(&bd->lock); + + return mask; +} + +static int +bsg_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct bsg_device *bd = file->private_data; + int __user *uarg = (int __user *) arg; + + if (!bd) + return -ENXIO; + + switch (cmd) { + /* + * our own ioctls + */ + case SG_GET_COMMAND_Q: + return put_user(bd->max_queue, uarg); + case SG_SET_COMMAND_Q: { + int queue; + + if (get_user(queue, uarg)) + return -EFAULT; + if (queue > BSG_CMDS || queue < 1) + return -EINVAL; + + bd->max_queue = queue; + return 0; + } + + /* + * SCSI/sg ioctls + */ + case SG_GET_VERSION_NUM: + case SCSI_IOCTL_GET_IDLUN: + case SCSI_IOCTL_GET_BUS_NUMBER: + case SG_SET_TIMEOUT: + case SG_GET_TIMEOUT: + case SG_GET_RESERVED_SIZE: + case SG_SET_RESERVED_SIZE: + case SG_EMULATED_HOST: + case SG_IO: + case SCSI_IOCTL_SEND_COMMAND: { + void __user *uarg = (void __user *) arg; + return scsi_cmd_ioctl(file, bd->disk, cmd, uarg); + } + /* + * block device ioctls + */ + default: +#if 0 + return ioctl_by_bdev(bd->bdev, cmd, arg); +#else + return -ENOTTY; +#endif + } +} + +static struct file_operations bsg_fops = { + .read = bsg_read, + .write = bsg_write, + .poll = bsg_poll, + .open = bsg_open, + .release = bsg_release, + .ioctl = bsg_ioctl, + .owner = THIS_MODULE, +}; + +void bsg_unregister_disk(struct gendisk *disk) +{ + struct bsg_class_device *bcd = &disk->bsg_dev; + + if (!bcd->class_dev) + return; + + mutex_lock(&bsg_mutex); + sysfs_remove_link(&bcd->disk->queue->kobj, "bsg"); + class_device_destroy(bsg_class, MKDEV(BSG_MAJOR, bcd->minor)); + bcd->class_dev = NULL; + list_del_init(&bcd->list); + mutex_unlock(&bsg_mutex); +} + +int bsg_register_disk(struct gendisk *disk) +{ + request_queue_t *q = disk->queue; + struct bsg_class_device *bcd; + dev_t dev; + + /* + * we need a proper transport to send commands, not a stacked device + */ + if (!q->request_fn) + return 0; + + bcd = &disk->bsg_dev; + memset(bcd, 0, sizeof(*bcd)); + INIT_LIST_HEAD(&bcd->list); + + mutex_lock(&bsg_mutex); + dev = MKDEV(BSG_MAJOR, bsg_device_nr); + bcd->minor = bsg_device_nr; + bsg_device_nr++; + bcd->disk = disk; + bcd->class_dev = class_device_create(bsg_class, NULL, dev, bcd->dev, "%s", disk->disk_name); + list_add_tail(&bcd->list, &bsg_class_list); + sysfs_create_link(&q->kobj, &bcd->class_dev->kobj, "bsg"); + mutex_unlock(&bsg_mutex); + return 0; +} + +static int __init bsg_init(void) +{ + int ret, i; + + for (i = 0; i < BSG_LIST_SIZE; i++) + INIT_HLIST_HEAD(&bsg_device_list[i]); + + bsg_class = class_create(THIS_MODULE, "bsg"); + if (IS_ERR(bsg_class)) + return PTR_ERR(bsg_class); + + ret = register_chrdev(BSG_MAJOR, "bsg", &bsg_fops); + if (ret) { + class_destroy(bsg_class); + return ret; + } + + printk(KERN_INFO "%s loaded\n", bsg_version); + return 0; +} + +MODULE_AUTHOR("Jens Axboe"); +MODULE_DESCRIPTION("Block layer SGSI generic (sg) driver"); +MODULE_LICENSE("GPL"); + +subsys_initcall(bsg_init); diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index ef42bb2b12b6..3795e0708a22 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -4091,6 +4091,13 @@ int blk_register_queue(struct gendisk *disk) return ret; } + ret = bsg_register_disk(disk); + if (ret) { + elv_unregister_queue(q); + kobject_unregister(&q->kobj); + return ret; + } + return 0; } @@ -4099,6 +4106,7 @@ void blk_unregister_queue(struct gendisk *disk) request_queue_t *q = disk->queue; if (q && q->request_fn) { + bsg_unregister_disk(disk); elv_unregister_queue(q); kobject_uevent(&q->kobj, KOBJ_REMOVE); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index e83f1dbf7c29..88fd008d38bd 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -41,8 +41,6 @@ const unsigned char scsi_command_size[8] = EXPORT_SYMBOL(scsi_command_size); -#define BLK_DEFAULT_TIMEOUT (60 * HZ) - #include static int sg_get_version(int __user *p) @@ -114,7 +112,7 @@ static int sg_emulated_host(request_queue_t *q, int __user *p) #define safe_for_read(cmd) [cmd] = CMD_READ_SAFE #define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE -static int verify_command(struct file *file, unsigned char *cmd) +static int verify_command(unsigned char *cmd, int has_write_perm) { static unsigned char cmd_type[256] = { @@ -193,18 +191,11 @@ static int verify_command(struct file *file, unsigned char *cmd) safe_for_write(GPCMD_SET_STREAMING), }; unsigned char type = cmd_type[cmd[0]]; - int has_write_perm = 0; /* Anybody who can open the device can do a read-safe command */ if (type & CMD_READ_SAFE) return 0; - /* - * file can be NULL from ioctl_by_bdev()... - */ - if (file) - has_write_perm = file->f_mode & FMODE_WRITE; - /* Write-safe commands just require a writable open.. */ if ((type & CMD_WRITE_SAFE) && has_write_perm) return 0; @@ -222,24 +213,104 @@ static int verify_command(struct file *file, unsigned char *cmd) return -EPERM; } +int blk_fill_sghdr_rq(request_queue_t *q, struct request *rq, + struct sg_io_hdr *hdr, int has_write_perm) +{ + memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ + + if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) + return -EFAULT; + if (verify_command(rq->cmd, has_write_perm)) + return -EPERM; + + /* + * fill in request structure + */ + rq->cmd_len = hdr->cmd_len; + rq->cmd_type = REQ_TYPE_BLOCK_PC; + + rq->timeout = (hdr->timeout * HZ) / 1000; + if (!rq->timeout) + rq->timeout = q->sg_timeout; + if (!rq->timeout) + rq->timeout = BLK_DEFAULT_SG_TIMEOUT; + + return 0; +} +EXPORT_SYMBOL_GPL(blk_fill_sghdr_rq); + +/* + * unmap a request that was previously mapped to this sg_io_hdr. handles + * both sg and non-sg sg_io_hdr. + */ +int blk_unmap_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr) +{ + struct bio *bio = rq->bio; + + /* + * also releases request + */ + if (!hdr->iovec_count) + return blk_rq_unmap_user(bio, hdr->dxfer_len); + + rq_for_each_bio(bio, rq) + bio_unmap_user(bio); + + blk_put_request(rq); + return 0; +} +EXPORT_SYMBOL_GPL(blk_unmap_sghdr_rq); + +int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, + struct bio *bio) +{ + int r, ret = 0; + + /* + * fill in all the output members + */ + hdr->status = rq->errors & 0xff; + hdr->masked_status = status_byte(rq->errors); + hdr->msg_status = msg_byte(rq->errors); + hdr->host_status = host_byte(rq->errors); + hdr->driver_status = driver_byte(rq->errors); + hdr->info = 0; + if (hdr->masked_status || hdr->host_status || hdr->driver_status) + hdr->info |= SG_INFO_CHECK; + hdr->resid = rq->data_len; + hdr->sb_len_wr = 0; + + if (rq->sense_len && hdr->sbp) { + int len = min((unsigned int) hdr->mx_sb_len, rq->sense_len); + + if (!copy_to_user(hdr->sbp, rq->sense, len)) + hdr->sb_len_wr = len; + else + ret = -EFAULT; + } + + rq->bio = bio; + r = blk_unmap_sghdr_rq(rq, hdr); + if (ret) + r = ret; + + return r; +} +EXPORT_SYMBOL_GPL(blk_complete_sghdr_rq); + static int sg_io(struct file *file, request_queue_t *q, struct gendisk *bd_disk, struct sg_io_hdr *hdr) { - unsigned long start_time, timeout; - int writing = 0, ret = 0; + unsigned long start_time; + int writing = 0, ret = 0, has_write_perm = 0; struct request *rq; char sense[SCSI_SENSE_BUFFERSIZE]; - unsigned char cmd[BLK_MAX_CDB]; struct bio *bio; if (hdr->interface_id != 'S') return -EINVAL; if (hdr->cmd_len > BLK_MAX_CDB) return -EINVAL; - if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len)) - return -EFAULT; - if (verify_command(file, cmd)) - return -EPERM; if (hdr->dxfer_len > (q->max_hw_sectors << 9)) return -EIO; @@ -260,25 +331,14 @@ static int sg_io(struct file *file, request_queue_t *q, if (!rq) return -ENOMEM; - /* - * fill in request structure - */ - rq->cmd_len = hdr->cmd_len; - memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ - memcpy(rq->cmd, cmd, hdr->cmd_len); + if (file) + has_write_perm = file->f_mode & FMODE_WRITE; - memset(sense, 0, sizeof(sense)); - rq->sense = sense; - rq->sense_len = 0; - - rq->cmd_type = REQ_TYPE_BLOCK_PC; - - timeout = msecs_to_jiffies(hdr->timeout); - rq->timeout = (timeout < INT_MAX) ? timeout : INT_MAX; - if (!rq->timeout) - rq->timeout = q->sg_timeout; - if (!rq->timeout) - rq->timeout = BLK_DEFAULT_TIMEOUT; + if (blk_fill_sghdr_rq(q, rq, hdr, has_write_perm)) { + blk_rq_unmap_user(bio, hdr->dxfer_len); + blk_put_request(rq); + return -EFAULT; + } if (hdr->iovec_count) { const int size = sizeof(struct sg_iovec) * hdr->iovec_count; @@ -306,6 +366,9 @@ static int sg_io(struct file *file, request_queue_t *q, goto out; bio = rq->bio; + memset(sense, 0, sizeof(sense)); + rq->sense = sense; + rq->sense_len = 0; rq->retries = 0; start_time = jiffies; @@ -316,31 +379,9 @@ static int sg_io(struct file *file, request_queue_t *q, */ blk_execute_rq(q, bd_disk, rq, 0); - /* write to all output members */ - hdr->status = 0xff & rq->errors; - hdr->masked_status = status_byte(rq->errors); - hdr->msg_status = msg_byte(rq->errors); - hdr->host_status = host_byte(rq->errors); - hdr->driver_status = driver_byte(rq->errors); - hdr->info = 0; - if (hdr->masked_status || hdr->host_status || hdr->driver_status) - hdr->info |= SG_INFO_CHECK; - hdr->resid = rq->data_len; hdr->duration = ((jiffies - start_time) * 1000) / HZ; - hdr->sb_len_wr = 0; - if (rq->sense_len && hdr->sbp) { - int len = min((unsigned int) hdr->mx_sb_len, rq->sense_len); - - if (!copy_to_user(hdr->sbp, rq->sense, len)) - hdr->sb_len_wr = len; - } - - if (blk_rq_unmap_user(bio)) - ret = -EFAULT; - - /* may not have succeeded, but output values written to control - * structure (struct sg_io_hdr). */ + return blk_complete_sghdr_rq(rq, hdr, bio); out: blk_put_request(rq); return ret; @@ -427,7 +468,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q, if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) goto error; - err = verify_command(file, rq->cmd); + err = verify_command(rq->cmd, file->f_mode & FMODE_WRITE); if (err) goto error; @@ -454,7 +495,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q, rq->retries = 1; break; default: - rq->timeout = BLK_DEFAULT_TIMEOUT; + rq->timeout = BLK_DEFAULT_SG_TIMEOUT; break; } @@ -501,7 +542,7 @@ static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int c rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->data = NULL; rq->data_len = 0; - rq->timeout = BLK_DEFAULT_TIMEOUT; + rq->timeout = BLK_DEFAULT_SG_TIMEOUT; memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd[0] = cmd; rq->cmd[4] = data; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index f429be88c4f9..a21f585b1caa 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -1258,19 +1258,25 @@ static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t set_bit(PC_DMA_RECOMMENDED, &pc->flags); } -static int +static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy, idefloppy_pc_t *pc, struct request *rq) { - /* - * just support eject for now, it would not be hard to make the - * REQ_BLOCK_PC support fully-featured - */ - if (rq->cmd[0] != IDEFLOPPY_START_STOP_CMD) - return 1; - idefloppy_init_pc(pc); + pc->callback = &idefloppy_rw_callback; memcpy(pc->c, rq->cmd, sizeof(pc->c)); - return 0; + pc->rq = rq; + pc->b_count = rq->data_len; + if (rq->data_len && rq_data_dir(rq) == WRITE) + set_bit(PC_WRITING, &pc->flags); + pc->buffer = rq->data; + if (rq->bio) + set_bit(PC_DMA_RECOMMENDED, &pc->flags); + + /* + * possibly problematic, doesn't look like ide-floppy correctly + * handled scattered requests if dma fails... + */ + pc->request_transfer = pc->buffer_size = rq->data_len; } /* @@ -1317,10 +1323,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request pc = (idefloppy_pc_t *) rq->buffer; } else if (blk_pc_request(rq)) { pc = idefloppy_next_pc_storage(drive); - if (idefloppy_blockpc_cmd(floppy, pc, rq)) { - idefloppy_do_end_request(drive, 0, 0); - return ide_stopped; - } + idefloppy_blockpc_cmd(floppy, pc, rq); } else { blk_dump_rq_flags(rq, "ide-floppy: unsupported command in queue"); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index c948a5c17a5d..9ae60a7400a2 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -1049,9 +1049,13 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device unsigned long flags; ide_driver_t *drv; void __user *p = (void __user *)arg; - int err = 0, (*setfunc)(ide_drive_t *, int); + int err, (*setfunc)(ide_drive_t *, int); u8 *val; + err = scsi_cmd_ioctl(file, bdev->bd_disk, cmd, p); + if (err != -ENOTTY) + return err; + switch (cmd) { case HDIO_GET_32BIT: val = &drive->io_32bit; goto read_val; case HDIO_GET_KEEPSETTINGS: val = &drive->keep_settings; goto read_val; @@ -1171,10 +1175,6 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device return 0; } - case CDROMEJECT: - case CDROMCLOSETRAY: - return scsi_cmd_ioctl(file, bdev->bd_disk, cmd, p); - case HDIO_GET_BUSSTATE: if (!capable(CAP_SYS_ADMIN)) return -EACCES; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fae138bd2207..53002d40efa2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -41,6 +41,8 @@ struct elevator_queue; typedef struct elevator_queue elevator_t; struct request_pm_state; struct blk_trace; +struct request; +struct sg_io_hdr; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -607,6 +609,11 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn; #define BLK_BOUNCE_ANY ((u64)blk_max_pfn << PAGE_SHIFT) #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) +/* + * default timeout for SG_IO if none specified + */ +#define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) + #ifdef CONFIG_MMU extern int init_emergency_isa_pool(void); extern void blk_queue_bounce(request_queue_t *q, struct bio **bio); @@ -680,6 +687,11 @@ extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *, struct request *, int, rq_end_io_fn *); +extern int blk_fill_sghdr_rq(request_queue_t *, struct request *, + struct sg_io_hdr *, int); +extern int blk_unmap_sghdr_rq(struct request *, struct sg_io_hdr *); +extern int blk_complete_sghdr_rq(struct request *, struct sg_io_hdr *, + struct bio *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) { diff --git a/include/linux/bsg.h b/include/linux/bsg.h new file mode 100644 index 000000000000..dc0d7282c4cb --- /dev/null +++ b/include/linux/bsg.h @@ -0,0 +1,21 @@ +#ifndef BSG_H +#define BSG_H + +#if defined(CONFIG_BLK_DEV_BSG) +struct bsg_class_device { + struct class_device *class_dev; + struct device *dev; + int minor; + struct gendisk *disk; + struct list_head list; +}; + +extern int bsg_register_disk(struct gendisk *); +extern void bsg_unregister_disk(struct gendisk *); +#else +struct bsg_class_device { }; +#define bsg_register_disk(disk) (0) +#define bsg_unregister_disk(disk) do { } while (0) +#endif + +#endif diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9756fc102a83..8c43d7032612 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -67,6 +67,7 @@ struct partition { #include #include #include +#include struct partition { unsigned char boot_ind; /* 0x80 - active */ @@ -91,6 +92,7 @@ struct hd_struct { #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; #endif + struct bsg_class_device bsg_dev; }; #define GENHD_FL_REMOVABLE 1 From ac6b91b8035bd269a1fd42474f907d107c074805 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 20 Dec 2006 11:17:43 +0100 Subject: [PATCH 02/25] block: changes for blk_rq_unmap_user new API This converts block/scsi_ioctl.c use blk_rq_unmap_user new API. blk_unmap_sghdr_rq is too simple and it might be better to remove it. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/scsi_ioctl.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 88fd008d38bd..daded70ffbb1 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -245,17 +245,7 @@ EXPORT_SYMBOL_GPL(blk_fill_sghdr_rq); */ int blk_unmap_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr) { - struct bio *bio = rq->bio; - - /* - * also releases request - */ - if (!hdr->iovec_count) - return blk_rq_unmap_user(bio, hdr->dxfer_len); - - rq_for_each_bio(bio, rq) - bio_unmap_user(bio); - + blk_rq_unmap_user(rq->bio); blk_put_request(rq); return 0; } @@ -335,7 +325,6 @@ static int sg_io(struct file *file, request_queue_t *q, has_write_perm = file->f_mode & FMODE_WRITE; if (blk_fill_sghdr_rq(q, rq, hdr, has_write_perm)) { - blk_rq_unmap_user(bio, hdr->dxfer_len); blk_put_request(rq); return -EFAULT; } From 9e69fbb5373f7c081acdf2b75d7bac7e95023dd1 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 20 Dec 2006 11:18:22 +0100 Subject: [PATCH 03/25] bsg: minor cleanups This just kills linux/config.h and dprintk warnings. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 724b69391cdc..53a09a52d154 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -16,7 +16,6 @@ * seperated right now. * */ -#include #include #include #include @@ -347,8 +346,8 @@ static void bsg_rq_end_io(struct request *rq, int uptodate) struct bsg_device *bd = bc->bd; unsigned long flags; - dprintk("%s: finished rq %p bio %p, bc %p offset %ld stat %d\n", - bd->name, rq, bc, bc->bio, bc - bd->cmd_map, uptodate); + dprintk("%s: finished rq %p bc %p, bio %p offset %d stat %d\n", + bd->name, rq, bc, bc->bio, bc - bd->cmd_map, uptodate); bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration); @@ -562,7 +561,7 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) int ret; ssize_t bytes_read; - dprintk("%s: read %lu bytes\n", bd->name, count); + dprintk("%s: read %Zd bytes\n", bd->name, count); bsg_set_block(bd, file); bytes_read = 0; @@ -642,7 +641,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) ssize_t bytes_read; int ret; - dprintk("%s: write %lu bytes\n", bd->name, count); + dprintk("%s: write %Zd bytes\n", bd->name, count); bsg_set_block(bd, file); bsg_set_write_perm(bd, file); @@ -657,7 +656,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) if (!bytes_read || (bytes_read && err_block_err(ret))) bytes_read = ret; - dprintk("%s: returning %lu\n", bd->name, bytes_read); + dprintk("%s: returning %Zd\n", bd->name, bytes_read); return bytes_read; } @@ -768,7 +767,7 @@ static struct bsg_device *bsg_add_device(struct inode *inode, strncpy(bd->name, disk->disk_name, sizeof(bd->name) - 1); dprintk("bound to <%s>, max queue %d\n", - format_dev_t(buf, i->i_rdev), bd->max_queue); + format_dev_t(buf, inode->i_rdev), bd->max_queue); mutex_unlock(&bsg_mutex); return bd; From 337ad41deae1b56e56731246322a93251df86e79 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 20 Dec 2006 11:18:54 +0100 Subject: [PATCH 04/25] block: export blk_verify_command for SG v4 blk_fill_sghdr_rq doesn't work for SG v4 so verify_command needed to be exported. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/scsi_ioctl.c | 7 ++++--- include/linux/blkdev.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index daded70ffbb1..db53b2c268d3 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -112,7 +112,7 @@ static int sg_emulated_host(request_queue_t *q, int __user *p) #define safe_for_read(cmd) [cmd] = CMD_READ_SAFE #define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE -static int verify_command(unsigned char *cmd, int has_write_perm) +int blk_verify_command(unsigned char *cmd, int has_write_perm) { static unsigned char cmd_type[256] = { @@ -212,6 +212,7 @@ static int verify_command(unsigned char *cmd, int has_write_perm) /* Otherwise fail it with an "Operation not permitted" */ return -EPERM; } +EXPORT_SYMBOL_GPL(blk_verify_command); int blk_fill_sghdr_rq(request_queue_t *q, struct request *rq, struct sg_io_hdr *hdr, int has_write_perm) @@ -220,7 +221,7 @@ int blk_fill_sghdr_rq(request_queue_t *q, struct request *rq, if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) return -EFAULT; - if (verify_command(rq->cmd, has_write_perm)) + if (blk_verify_command(rq->cmd, has_write_perm)) return -EPERM; /* @@ -457,7 +458,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q, if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) goto error; - err = verify_command(rq->cmd, file->f_mode & FMODE_WRITE); + err = blk_verify_command(rq->cmd, file->f_mode & FMODE_WRITE); if (err) goto error; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 53002d40efa2..f6bc0d03ffad 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -692,6 +692,7 @@ extern int blk_fill_sghdr_rq(request_queue_t *, struct request *, extern int blk_unmap_sghdr_rq(struct request *, struct sg_io_hdr *); extern int blk_complete_sghdr_rq(struct request *, struct sg_io_hdr *, struct bio *); +extern int blk_verify_command(unsigned char *, int); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) { From 45977d0e87ac988d04fccfb89221727aaf8d78a4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 20 Dec 2006 11:19:32 +0100 Subject: [PATCH 05/25] bsg: add sg_io_v4 structure This patch adds sg_io_v4 structure that Doug proposed last month. There's one major change from the RFC. I dropped iovec, which needs compat stuff. The bsg code simply calls blk_rq_map_user against dout_xferp/din_xferp. So if possible, the page frames are directly mapped. If not possible, the block layer allocates new page frames and does memory copies. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/bsg.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/include/linux/bsg.h b/include/linux/bsg.h index dc0d7282c4cb..0d212cc06abf 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -1,6 +1,47 @@ #ifndef BSG_H #define BSG_H +struct sg_io_v4 { + int32_t guard; /* [i] 'Q' to differentiate from v3 */ + uint32_t protocol; /* [i] 0 -> SCSI , .... */ + uint32_t subprotocol; /* [i] 0 -> SCSI command, 1 -> SCSI task + management function, .... */ + + uint32_t request_len; /* [i] in bytes */ + uint64_t request; /* [i], [*i] {SCSI: cdb} */ + uint32_t request_attr; /* [i] {SCSI: task attribute} */ + uint32_t request_tag; /* [i] {SCSI: task tag (only if flagged)} */ + uint32_t request_priority; /* [i] {SCSI: task priority} */ + uint32_t max_response_len; /* [i] in bytes */ + uint64_t response; /* [i], [*o] {SCSI: (auto)sense data} */ + + /* "din_" for data in (from device); "dout_" for data out (to device) */ + uint32_t dout_xfer_len; /* [i] bytes to be transferred to device */ + uint32_t din_xfer_len; /* [i] bytes to be transferred from device */ + uint64_t dout_xferp; /* [i], [*i] */ + uint64_t din_xferp; /* [i], [*o] */ + + uint32_t timeout; /* [i] units: millisecond */ + uint32_t flags; /* [i] bit mask */ + uint64_t usr_ptr; /* [i->o] unused internally */ + uint32_t spare_in; /* [i] */ + + uint32_t driver_status; /* [o] 0 -> ok */ + uint32_t transport_status; /* [o] 0 -> ok */ + uint32_t device_status; /* [o] {SCSI: command completion status} */ + uint32_t retry_delay; /* [o] {SCSI: status auxiliary information} */ + uint32_t info; /* [o] additional information */ + uint32_t duration; /* [o] time to complete, in milliseconds */ + uint32_t response_len; /* [o] bytes of response actually written */ + int32_t din_resid; /* [o] actual_din_xfer_len - din_xfer_len */ + uint32_t generated_tag; /* [o] {SCSI: task tag that transport chose} */ + uint32_t spare_out; /* [o] */ + + uint32_t padding; +}; + +#ifdef __KERNEL__ + #if defined(CONFIG_BLK_DEV_BSG) struct bsg_class_device { struct class_device *class_dev; @@ -18,4 +59,6 @@ struct bsg_class_device { }; #define bsg_unregister_disk(disk) do { } while (0) #endif +#endif /* __KERNEL__ */ + #endif From 70e36eceaf897da11aa0b4d82b46ca66e65a05f1 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 20 Dec 2006 11:20:15 +0100 Subject: [PATCH 06/25] bsg: replace SG v3 with SG v4 This patch replaces SG v3 in bsg with SG v4 (except for SG_IO). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 212 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 128 insertions(+), 84 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 53a09a52d154..6d139d20ec99 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -103,8 +103,8 @@ struct bsg_command { struct request *rq; struct bio *bio; int err; - struct sg_io_hdr hdr; - struct sg_io_hdr __user *uhdr; + struct sg_io_v4 hdr; + struct sg_io_v4 __user *uhdr; char sense[SCSI_SENSE_BUFFERSIZE]; }; @@ -235,57 +235,82 @@ static struct bsg_command *bsg_get_command(struct bsg_device *bd) return bc; } -/* - * Check if sg_io_hdr from user is allowed and valid - */ -static int -bsg_validate_sghdr(request_queue_t *q, struct sg_io_hdr *hdr, int *rw) +static int blk_fill_sgv4_hdr_rq(request_queue_t *q, struct request *rq, + struct sg_io_v4 *hdr, int has_write_perm) { - if (hdr->interface_id != 'S') - return -EINVAL; - if (hdr->cmd_len > BLK_MAX_CDB) - return -EINVAL; - if (hdr->dxfer_len > (q->max_sectors << 9)) - return -EIO; + memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ + + if (copy_from_user(rq->cmd, (void *)(unsigned long)hdr->request, + hdr->request_len)) + return -EFAULT; + if (blk_verify_command(rq->cmd, has_write_perm)) + return -EPERM; /* - * looks sane, if no data then it should be fine from our POV + * fill in request structure */ - if (!hdr->dxfer_len) - return 0; + rq->cmd_len = hdr->request_len; + rq->cmd_type = REQ_TYPE_BLOCK_PC; - switch (hdr->dxfer_direction) { - case SG_DXFER_TO_FROM_DEV: - case SG_DXFER_FROM_DEV: - *rw = READ; - break; - case SG_DXFER_TO_DEV: - *rw = WRITE; - break; - default: - return -EINVAL; - } + rq->timeout = (hdr->timeout * HZ) / 1000; + if (!rq->timeout) + rq->timeout = q->sg_timeout; + if (!rq->timeout) + rq->timeout = BLK_DEFAULT_SG_TIMEOUT; return 0; } /* - * map sg_io_hdr to a request. for scatter-gather sg_io_hdr, we map - * each segment to a bio and string multiple bio's to the request + * Check if sg_io_v4 from user is allowed and valid + */ +static int +bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw) +{ + if (hdr->guard != 'Q') + return -EINVAL; + if (hdr->request_len > BLK_MAX_CDB) + return -EINVAL; + if (hdr->dout_xfer_len > (q->max_sectors << 9) || + hdr->din_xfer_len > (q->max_sectors << 9)) + return -EIO; + + /* not supported currently */ + if (hdr->protocol || hdr->subprotocol) + return -EINVAL; + + /* + * looks sane, if no data then it should be fine from our POV + */ + if (!hdr->dout_xfer_len && !hdr->din_xfer_len) + return 0; + + /* not supported currently */ + if (hdr->dout_xfer_len && hdr->din_xfer_len) + return -EINVAL; + + *rw = hdr->dout_xfer_len ? WRITE : READ; + + return 0; +} + +/* + * map sg_io_v4 to a request. */ static struct request * -bsg_map_hdr(struct bsg_device *bd, int rw, struct sg_io_hdr *hdr) +bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr) { request_queue_t *q = bd->queue; - struct sg_iovec iov; - struct sg_iovec __user *u_iov; struct request *rq; - int ret, i = 0; + int ret, rw; + unsigned int dxfer_len; + void *dxferp = NULL; - dprintk("map hdr %p/%d/%d\n", hdr->dxferp, hdr->dxfer_len, - hdr->iovec_count); + dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp, + hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp, + hdr->din_xfer_len); - ret = bsg_validate_sghdr(q, hdr, &rw); + ret = bsg_validate_sgv4_hdr(q, hdr, &rw); if (ret) return ERR_PTR(ret); @@ -293,44 +318,29 @@ bsg_map_hdr(struct bsg_device *bd, int rw, struct sg_io_hdr *hdr) * map scatter-gather elements seperately and string them to request */ rq = blk_get_request(q, rw, GFP_KERNEL); - ret = blk_fill_sghdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM, - &bd->flags)); + ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM, + &bd->flags)); if (ret) { blk_put_request(rq); return ERR_PTR(ret); } - if (!hdr->iovec_count) { - ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); - if (ret) - goto out; - } + if (hdr->dout_xfer_len) { + dxfer_len = hdr->dout_xfer_len; + dxferp = (void*)(unsigned long)hdr->dout_xferp; + } else if (hdr->din_xfer_len) { + dxfer_len = hdr->din_xfer_len; + dxferp = (void*)(unsigned long)hdr->din_xferp; + } else + dxfer_len = 0; - u_iov = hdr->dxferp; - for (ret = 0, i = 0; i < hdr->iovec_count; i++, u_iov++) { - if (copy_from_user(&iov, u_iov, sizeof(iov))) { - ret = -EFAULT; - break; + if (dxfer_len) { + ret = blk_rq_map_user(q, rq, dxferp, dxfer_len); + if (ret) { + dprintk("failed map at %d\n", ret); + blk_put_request(rq); + rq = ERR_PTR(ret); } - - if (!iov.iov_len || !iov.iov_base) { - ret = -EINVAL; - break; - } - - ret = blk_rq_map_user(q, rq, iov.iov_base, iov.iov_len); - if (ret) - break; - } - - /* - * bugger, cleanup - */ - if (ret) { -out: - dprintk("failed map at %d: %d\n", i, ret); - blk_unmap_sghdr_rq(rq, hdr); - rq = ERR_PTR(ret); } return rq; @@ -346,7 +356,7 @@ static void bsg_rq_end_io(struct request *rq, int uptodate) struct bsg_device *bd = bc->bd; unsigned long flags; - dprintk("%s: finished rq %p bc %p, bio %p offset %d stat %d\n", + dprintk("%s: finished rq %p bc %p, bio %p offset %Zd stat %d\n", bd->name, rq, bc, bc->bio, bc - bd->cmd_map, uptodate); bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration); @@ -434,6 +444,42 @@ bsg_get_done_cmd_nosignals(struct bsg_device *bd) return __bsg_get_done_cmd(bd, TASK_UNINTERRUPTIBLE); } +static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, + struct bio *bio) +{ + int ret = 0; + + dprintk("rq %p bio %p %u\n", rq, bio, rq->errors); + /* + * fill in all the output members + */ + hdr->device_status = status_byte(rq->errors); + hdr->transport_status = host_byte(rq->errors); + hdr->driver_status = driver_byte(rq->errors); + hdr->info = 0; + if (hdr->device_status || hdr->transport_status || hdr->driver_status) + hdr->info |= SG_INFO_CHECK; + hdr->din_resid = rq->data_len; + hdr->response_len = 0; + + if (rq->sense_len && hdr->response) { + int len = min((unsigned int) hdr->max_response_len, + rq->sense_len); + + ret = copy_to_user((void*)(unsigned long)hdr->response, + rq->sense, len); + if (!ret) + hdr->response_len = len; + else + ret = -EFAULT; + } + + blk_rq_unmap_user(bio); + blk_put_request(rq); + + return ret; +} + static int bsg_complete_all_commands(struct bsg_device *bd) { struct bsg_command *bc; @@ -476,7 +522,7 @@ static int bsg_complete_all_commands(struct bsg_device *bd) break; } - tret = blk_complete_sghdr_rq(bc->rq, &bc->hdr, bc->bio); + tret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio); if (!ret) ret = tret; @@ -495,11 +541,11 @@ __bsg_read(char __user *buf, size_t count, bsg_command_callback get_bc, struct bsg_command *bc; int nr_commands, ret; - if (count % sizeof(struct sg_io_hdr)) + if (count % sizeof(struct sg_io_v4)) return -EINVAL; ret = 0; - nr_commands = count / sizeof(struct sg_io_hdr); + nr_commands = count / sizeof(struct sg_io_v4); while (nr_commands) { bc = get_bc(bd, iov); if (IS_ERR(bc)) { @@ -512,7 +558,7 @@ __bsg_read(char __user *buf, size_t count, bsg_command_callback get_bc, * after completing the request. so do that here, * bsg_complete_work() cannot do that for us */ - ret = blk_complete_sghdr_rq(bc->rq, &bc->hdr, bc->bio); + ret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio); if (copy_to_user(buf, (char *) &bc->hdr, sizeof(bc->hdr))) ret = -EFAULT; @@ -522,8 +568,8 @@ __bsg_read(char __user *buf, size_t count, bsg_command_callback get_bc, if (ret) break; - buf += sizeof(struct sg_io_hdr); - *bytes_read += sizeof(struct sg_io_hdr); + buf += sizeof(struct sg_io_v4); + *bytes_read += sizeof(struct sg_io_v4); nr_commands--; } @@ -582,16 +628,15 @@ static ssize_t __bsg_write(struct bsg_device *bd, const char __user *buf, struct request *rq; int ret, nr_commands; - if (count % sizeof(struct sg_io_hdr)) + if (count % sizeof(struct sg_io_v4)) return -EINVAL; - nr_commands = count / sizeof(struct sg_io_hdr); + nr_commands = count / sizeof(struct sg_io_v4); rq = NULL; bc = NULL; ret = 0; while (nr_commands) { request_queue_t *q = bd->queue; - int rw = READ; bc = bsg_get_command(bd); if (!bc) @@ -602,7 +647,7 @@ static ssize_t __bsg_write(struct bsg_device *bd, const char __user *buf, break; } - bc->uhdr = (struct sg_io_hdr __user *) buf; + bc->uhdr = (struct sg_io_v4 __user *) buf; if (copy_from_user(&bc->hdr, buf, sizeof(bc->hdr))) { ret = -EFAULT; break; @@ -611,7 +656,7 @@ static ssize_t __bsg_write(struct bsg_device *bd, const char __user *buf, /* * get a request, fill in the blanks, and add to request queue */ - rq = bsg_map_hdr(bd, rw, &bc->hdr); + rq = bsg_map_hdr(bd, &bc->hdr); if (IS_ERR(rq)) { ret = PTR_ERR(rq); rq = NULL; @@ -622,12 +667,10 @@ static ssize_t __bsg_write(struct bsg_device *bd, const char __user *buf, bc = NULL; rq = NULL; nr_commands--; - buf += sizeof(struct sg_io_hdr); - *bytes_read += sizeof(struct sg_io_hdr); + buf += sizeof(struct sg_io_v4); + *bytes_read += sizeof(struct sg_io_v4); } - if (rq) - blk_unmap_sghdr_rq(rq, &bc->hdr); if (bc) bsg_free_command(bc); @@ -898,11 +941,12 @@ bsg_ioctl(struct inode *inode, struct file *file, unsigned int cmd, case SG_GET_RESERVED_SIZE: case SG_SET_RESERVED_SIZE: case SG_EMULATED_HOST: - case SG_IO: case SCSI_IOCTL_SEND_COMMAND: { void __user *uarg = (void __user *) arg; return scsi_cmd_ioctl(file, bd->disk, cmd, uarg); } + case SG_IO: + return -EINVAL; /* * block device ioctls */ From 10e8855b945193a62801429af3aab9f7e27ef56a Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 20 Dec 2006 11:20:57 +0100 Subject: [PATCH 07/25] bsg: add SG_IO to SG v4 This adds SG_IO support to SG v4. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 6d139d20ec99..9dc5d36a52e5 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -945,8 +945,27 @@ bsg_ioctl(struct inode *inode, struct file *file, unsigned int cmd, void __user *uarg = (void __user *) arg; return scsi_cmd_ioctl(file, bd->disk, cmd, uarg); } - case SG_IO: - return -EINVAL; + case SG_IO: { + struct request *rq; + struct bio *bio; + struct sg_io_v4 hdr; + + if (copy_from_user(&hdr, uarg, sizeof(hdr))) + return -EFAULT; + + rq = bsg_map_hdr(bd, &hdr); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + bio = rq->bio; + blk_execute_rq(bd->queue, bd->disk, rq, 0); + blk_complete_sgv4_hdr_rq(rq, &hdr, bio); + + if (copy_to_user(uarg, &hdr, sizeof(hdr))) + return -EFAULT; + else + return 0; + } /* * block device ioctls */ From 1594a3f0eb526c73bc3915e8da13f2abf0ea1acd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Dec 2006 11:23:35 +0100 Subject: [PATCH 08/25] bsg: use u32 etc instead of uint32_t Signed-off-by: Jens Axboe --- include/linux/bsg.h | 58 ++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 0d212cc06abf..f968726cfadc 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -2,42 +2,42 @@ #define BSG_H struct sg_io_v4 { - int32_t guard; /* [i] 'Q' to differentiate from v3 */ - uint32_t protocol; /* [i] 0 -> SCSI , .... */ - uint32_t subprotocol; /* [i] 0 -> SCSI command, 1 -> SCSI task + s32 guard; /* [i] 'Q' to differentiate from v3 */ + u32 protocol; /* [i] 0 -> SCSI , .... */ + u32 subprotocol; /* [i] 0 -> SCSI command, 1 -> SCSI task management function, .... */ - uint32_t request_len; /* [i] in bytes */ - uint64_t request; /* [i], [*i] {SCSI: cdb} */ - uint32_t request_attr; /* [i] {SCSI: task attribute} */ - uint32_t request_tag; /* [i] {SCSI: task tag (only if flagged)} */ - uint32_t request_priority; /* [i] {SCSI: task priority} */ - uint32_t max_response_len; /* [i] in bytes */ - uint64_t response; /* [i], [*o] {SCSI: (auto)sense data} */ + u32 request_len; /* [i] in bytes */ + u64 request; /* [i], [*i] {SCSI: cdb} */ + u32 request_attr; /* [i] {SCSI: task attribute} */ + u32 request_tag; /* [i] {SCSI: task tag (only if flagged)} */ + u32 request_priority; /* [i] {SCSI: task priority} */ + u32 max_response_len; /* [i] in bytes */ + u64 response; /* [i], [*o] {SCSI: (auto)sense data} */ /* "din_" for data in (from device); "dout_" for data out (to device) */ - uint32_t dout_xfer_len; /* [i] bytes to be transferred to device */ - uint32_t din_xfer_len; /* [i] bytes to be transferred from device */ - uint64_t dout_xferp; /* [i], [*i] */ - uint64_t din_xferp; /* [i], [*o] */ + u32 dout_xfer_len; /* [i] bytes to be transferred to device */ + u32 din_xfer_len; /* [i] bytes to be transferred from device */ + u64 dout_xferp; /* [i], [*i] */ + u64 din_xferp; /* [i], [*o] */ - uint32_t timeout; /* [i] units: millisecond */ - uint32_t flags; /* [i] bit mask */ - uint64_t usr_ptr; /* [i->o] unused internally */ - uint32_t spare_in; /* [i] */ + u32 timeout; /* [i] units: millisecond */ + u32 flags; /* [i] bit mask */ + u64 usr_ptr; /* [i->o] unused internally */ + u32 spare_in; /* [i] */ - uint32_t driver_status; /* [o] 0 -> ok */ - uint32_t transport_status; /* [o] 0 -> ok */ - uint32_t device_status; /* [o] {SCSI: command completion status} */ - uint32_t retry_delay; /* [o] {SCSI: status auxiliary information} */ - uint32_t info; /* [o] additional information */ - uint32_t duration; /* [o] time to complete, in milliseconds */ - uint32_t response_len; /* [o] bytes of response actually written */ - int32_t din_resid; /* [o] actual_din_xfer_len - din_xfer_len */ - uint32_t generated_tag; /* [o] {SCSI: task tag that transport chose} */ - uint32_t spare_out; /* [o] */ + u32 driver_status; /* [o] 0 -> ok */ + u32 transport_status; /* [o] 0 -> ok */ + u32 device_status; /* [o] {SCSI: command completion status} */ + u32 retry_delay; /* [o] {SCSI: status auxiliary information} */ + u32 info; /* [o] additional information */ + u32 duration; /* [o] time to complete, in milliseconds */ + u32 response_len; /* [o] bytes of response actually written */ + s32 din_resid; /* [o] actual_din_xfer_len - din_xfer_len */ + u32 generated_tag; /* [o] {SCSI: task tag that transport chose} */ + u32 spare_out; /* [o] */ - uint32_t padding; + u32 padding; }; #ifdef __KERNEL__ From b711afa6959e5c8f457f1687b5d4a485c7974f32 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Dec 2006 11:25:23 +0100 Subject: [PATCH 09/25] bsg: style cleanup Signed-off-by: Jens Axboe --- block/bsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 9dc5d36a52e5..6f05a397dc3a 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -963,8 +963,8 @@ bsg_ioctl(struct inode *inode, struct file *file, unsigned int cmd, if (copy_to_user(uarg, &hdr, sizeof(hdr))) return -EFAULT; - else - return 0; + + return 0; } /* * block device ioctls From 2ef7086a207d3d899ae88602a135fe1c24e1811f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Dec 2006 11:26:11 +0100 Subject: [PATCH 10/25] bsg: silence a bogus gcc warning Signed-off-by: Jens Axboe --- block/bsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bsg.c b/block/bsg.c index 6f05a397dc3a..9d77a0c72457 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -302,7 +302,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr) { request_queue_t *q = bd->queue; struct request *rq; - int ret, rw; + int ret, rw = 0; /* shut up gcc */ unsigned int dxfer_len; void *dxferp = NULL; From 3862153b673516b2efa0447b9b3778f47ac8f8c8 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 22 Dec 2006 09:43:51 +0100 Subject: [PATCH 11/25] Replace s32, u32 and u64 with __s32, __u32 and __u64 in bsg.h for userspace Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/bsg.h | 58 ++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/include/linux/bsg.h b/include/linux/bsg.h index f968726cfadc..2154a6dfbd53 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -2,42 +2,42 @@ #define BSG_H struct sg_io_v4 { - s32 guard; /* [i] 'Q' to differentiate from v3 */ - u32 protocol; /* [i] 0 -> SCSI , .... */ - u32 subprotocol; /* [i] 0 -> SCSI command, 1 -> SCSI task + __s32 guard; /* [i] 'Q' to differentiate from v3 */ + __u32 protocol; /* [i] 0 -> SCSI , .... */ + __u32 subprotocol; /* [i] 0 -> SCSI command, 1 -> SCSI task management function, .... */ - u32 request_len; /* [i] in bytes */ - u64 request; /* [i], [*i] {SCSI: cdb} */ - u32 request_attr; /* [i] {SCSI: task attribute} */ - u32 request_tag; /* [i] {SCSI: task tag (only if flagged)} */ - u32 request_priority; /* [i] {SCSI: task priority} */ - u32 max_response_len; /* [i] in bytes */ - u64 response; /* [i], [*o] {SCSI: (auto)sense data} */ + __u32 request_len; /* [i] in bytes */ + __u64 request; /* [i], [*i] {SCSI: cdb} */ + __u32 request_attr; /* [i] {SCSI: task attribute} */ + __u32 request_tag; /* [i] {SCSI: task tag (only if flagged)} */ + __u32 request_priority; /* [i] {SCSI: task priority} */ + __u32 max_response_len; /* [i] in bytes */ + __u64 response; /* [i], [*o] {SCSI: (auto)sense data} */ /* "din_" for data in (from device); "dout_" for data out (to device) */ - u32 dout_xfer_len; /* [i] bytes to be transferred to device */ - u32 din_xfer_len; /* [i] bytes to be transferred from device */ - u64 dout_xferp; /* [i], [*i] */ - u64 din_xferp; /* [i], [*o] */ + __u32 dout_xfer_len; /* [i] bytes to be transferred to device */ + __u32 din_xfer_len; /* [i] bytes to be transferred from device */ + __u64 dout_xferp; /* [i], [*i] */ + __u64 din_xferp; /* [i], [*o] */ - u32 timeout; /* [i] units: millisecond */ - u32 flags; /* [i] bit mask */ - u64 usr_ptr; /* [i->o] unused internally */ - u32 spare_in; /* [i] */ + __u32 timeout; /* [i] units: millisecond */ + __u32 flags; /* [i] bit mask */ + __u64 usr_ptr; /* [i->o] unused internally */ + __u32 spare_in; /* [i] */ - u32 driver_status; /* [o] 0 -> ok */ - u32 transport_status; /* [o] 0 -> ok */ - u32 device_status; /* [o] {SCSI: command completion status} */ - u32 retry_delay; /* [o] {SCSI: status auxiliary information} */ - u32 info; /* [o] additional information */ - u32 duration; /* [o] time to complete, in milliseconds */ - u32 response_len; /* [o] bytes of response actually written */ - s32 din_resid; /* [o] actual_din_xfer_len - din_xfer_len */ - u32 generated_tag; /* [o] {SCSI: task tag that transport chose} */ - u32 spare_out; /* [o] */ + __u32 driver_status; /* [o] 0 -> ok */ + __u32 transport_status; /* [o] 0 -> ok */ + __u32 device_status; /* [o] {SCSI: command completion status} */ + __u32 retry_delay; /* [o] {SCSI: status auxiliary information} */ + __u32 info; /* [o] additional information */ + __u32 duration; /* [o] time to complete, in milliseconds */ + __u32 response_len; /* [o] bytes of response actually written */ + __s32 din_resid; /* [o] actual_din_xfer_len - din_xfer_len */ + __u32 generated_tag; /* [o] {SCSI: task tag that transport chose} */ + __u32 spare_out; /* [o] */ - u32 padding; + __u32 padding; }; #ifdef __KERNEL__ From 5309cb38de65eddd5f7e125da750accf949f29e8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 23 Jan 2007 16:24:41 +0100 Subject: [PATCH 12/25] Add queue resizing support Just get rid of the preallocated command map, use the slab cache to get/free commands instead. Original patch from FUJITA Tomonori , changed by me to not use a mempool. Signed-off-by: Jens Axboe --- block/bsg.c | 96 ++++++++++++++++++----------------------------------- 1 file changed, 32 insertions(+), 64 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 9d77a0c72457..c56618ae54c3 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -33,8 +33,6 @@ static char bsg_version[] = "block layer sg (bsg) 0.4"; -struct bsg_command; - struct bsg_device { struct gendisk *disk; request_queue_t *queue; @@ -46,8 +44,6 @@ struct bsg_device { int minor; int queued_cmds; int done_cmds; - unsigned long *cmd_bitmap; - struct bsg_command *cmd_map; wait_queue_head_t wq_done; wait_queue_head_t wq_free; char name[BDEVNAME_SIZE]; @@ -60,14 +56,7 @@ enum { BSG_F_WRITE_PERM = 2, }; -/* - * command allocation bitmap defines - */ -#define BSG_CMDS_PAGE_ORDER (1) -#define BSG_CMDS_PER_LONG (sizeof(unsigned long) * 8) -#define BSG_CMDS_MASK (BSG_CMDS_PER_LONG - 1) -#define BSG_CMDS_BYTES (PAGE_SIZE * (1 << BSG_CMDS_PAGE_ORDER)) -#define BSG_CMDS (BSG_CMDS_BYTES / sizeof(struct bsg_command)) +#define BSG_DEFAULT_CMDS 64 #undef BSG_DEBUG @@ -94,6 +83,8 @@ static struct hlist_head bsg_device_list[BSG_LIST_SIZE]; static struct class *bsg_class; static LIST_HEAD(bsg_class_list); +static struct kmem_cache *bsg_cmd_cachep; + /* * our internal command type */ @@ -111,14 +102,12 @@ struct bsg_command { static void bsg_free_command(struct bsg_command *bc) { struct bsg_device *bd = bc->bd; - unsigned long bitnr = bc - bd->cmd_map; unsigned long flags; - dprintk("%s: command bit offset %lu\n", bd->name, bitnr); + kmem_cache_free(bsg_cmd_cachep, bc); spin_lock_irqsave(&bd->lock, flags); bd->queued_cmds--; - __clear_bit(bitnr, bd->cmd_bitmap); spin_unlock_irqrestore(&bd->lock, flags); wake_up(&bd->wq_free); @@ -127,32 +116,29 @@ static void bsg_free_command(struct bsg_command *bc) static struct bsg_command *__bsg_alloc_command(struct bsg_device *bd) { struct bsg_command *bc = NULL; - unsigned long *map; - int free_nr; spin_lock_irq(&bd->lock); if (bd->queued_cmds >= bd->max_queue) goto out; - for (free_nr = 0, map = bd->cmd_bitmap; *map == ~0UL; map++) - free_nr += BSG_CMDS_PER_LONG; - - BUG_ON(*map == ~0UL); - bd->queued_cmds++; - free_nr += ffz(*map); - __set_bit(free_nr, bd->cmd_bitmap); spin_unlock_irq(&bd->lock); - bc = bd->cmd_map + free_nr; + bc = kmem_cache_alloc(bsg_cmd_cachep, GFP_USER); + if (unlikely(!bc)) { + spin_lock_irq(&bd->lock); + goto alloc_fail; + } + memset(bc, 0, sizeof(*bc)); bc->bd = bd; INIT_LIST_HEAD(&bc->list); - dprintk("%s: returning free cmd %p (bit %d)\n", bd->name, bc, free_nr); + dprintk("%s: returning free cmd %p\n", bd->name, bc); return bc; +alloc_fail: + bd->queued_cmds--; out: - dprintk("%s: failed (depth %d)\n", bd->name, bd->queued_cmds); spin_unlock_irq(&bd->lock); return bc; } @@ -356,8 +342,8 @@ static void bsg_rq_end_io(struct request *rq, int uptodate) struct bsg_device *bd = bc->bd; unsigned long flags; - dprintk("%s: finished rq %p bc %p, bio %p offset %Zd stat %d\n", - bd->name, rq, bc, bc->bio, bc - bd->cmd_map, uptodate); + dprintk("%s: finished rq %p bc %p, bio %p stat %d\n", + bd->name, rq, bc, bc->bio, uptodate); bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration); @@ -703,21 +689,9 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) return bytes_read; } -static void bsg_free_device(struct bsg_device *bd) -{ - if (bd->cmd_map) - free_pages((unsigned long) bd->cmd_map, BSG_CMDS_PAGE_ORDER); - - kfree(bd->cmd_bitmap); - kfree(bd); -} - static struct bsg_device *bsg_alloc_device(void) { - struct bsg_command *cmd_map; - unsigned long *cmd_bitmap; struct bsg_device *bd; - int bits; bd = kzalloc(sizeof(struct bsg_device), GFP_KERNEL); if (unlikely(!bd)) @@ -725,19 +699,7 @@ static struct bsg_device *bsg_alloc_device(void) spin_lock_init(&bd->lock); - bd->max_queue = BSG_CMDS; - - bits = (BSG_CMDS / BSG_CMDS_PER_LONG) + 1; - cmd_bitmap = kzalloc(bits * sizeof(unsigned long), GFP_KERNEL); - if (!cmd_bitmap) - goto out_free_bd; - bd->cmd_bitmap = cmd_bitmap; - - cmd_map = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, - BSG_CMDS_PAGE_ORDER); - if (!cmd_map) - goto out_free_bitmap; - bd->cmd_map = cmd_map; + bd->max_queue = BSG_DEFAULT_CMDS; INIT_LIST_HEAD(&bd->busy_list); INIT_LIST_HEAD(&bd->done_list); @@ -746,12 +708,6 @@ static struct bsg_device *bsg_alloc_device(void) init_waitqueue_head(&bd->wq_free); init_waitqueue_head(&bd->wq_done); return bd; - -out_free_bitmap: - kfree(cmd_bitmap); -out_free_bd: - kfree(bd); - return NULL; } static int bsg_put_device(struct bsg_device *bd) @@ -779,7 +735,7 @@ static int bsg_put_device(struct bsg_device *bd) blk_put_queue(bd->queue); hlist_del(&bd->dev_list); - bsg_free_device(bd); + kfree(bd); out: mutex_unlock(&bsg_mutex); return ret; @@ -918,15 +874,17 @@ bsg_ioctl(struct inode *inode, struct file *file, unsigned int cmd, */ case SG_GET_COMMAND_Q: return put_user(bd->max_queue, uarg); - case SG_SET_COMMAND_Q: { + case SG_SET_COMMAND_Q: { int queue; if (get_user(queue, uarg)) return -EFAULT; - if (queue > BSG_CMDS || queue < 1) + if (queue < 1) return -EINVAL; + spin_lock_irq(&bd->lock); bd->max_queue = queue; + spin_unlock_irq(&bd->lock); return 0; } @@ -1035,15 +993,25 @@ static int __init bsg_init(void) { int ret, i; + bsg_cmd_cachep = kmem_cache_create("bsg_cmd", + sizeof(struct bsg_command), 0, 0, NULL, NULL); + if (!bsg_cmd_cachep) { + printk(KERN_ERR "bsg: failed creating slab cache\n"); + return -ENOMEM; + } + for (i = 0; i < BSG_LIST_SIZE; i++) INIT_HLIST_HEAD(&bsg_device_list[i]); bsg_class = class_create(THIS_MODULE, "bsg"); - if (IS_ERR(bsg_class)) + if (IS_ERR(bsg_class)) { + kmem_cache_destroy(bsg_cmd_cachep); return PTR_ERR(bsg_class); + } ret = register_chrdev(BSG_MAJOR, "bsg", &bsg_fops); if (ret) { + kmem_cache_destroy(bsg_cmd_cachep); class_destroy(bsg_class); return ret; } From 264a0472183ce7ad05eea3360f5907880a1e5e78 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 23 Jan 2007 16:30:17 +0100 Subject: [PATCH 13/25] bsg: add cheasy error checks for sysfs stuff Signed-off-by: Jens Axboe --- block/bsg.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/block/bsg.c b/block/bsg.c index c56618ae54c3..e97e3ec08ffc 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -983,10 +983,19 @@ int bsg_register_disk(struct gendisk *disk) bsg_device_nr++; bcd->disk = disk; bcd->class_dev = class_device_create(bsg_class, NULL, dev, bcd->dev, "%s", disk->disk_name); + if (!bcd->class_dev) + goto err; list_add_tail(&bcd->list, &bsg_class_list); - sysfs_create_link(&q->kobj, &bcd->class_dev->kobj, "bsg"); + if (sysfs_create_link(&q->kobj, &bcd->class_dev->kobj, "bsg")) + goto err; mutex_unlock(&bsg_mutex); return 0; +err: + bsg_device_nr--; + if (bcd->class_dev) + class_device_destroy(bsg_class, MKDEV(BSG_MAJOR, bcd->minor)); + mutex_unlock(&bsg_mutex); + return -ENOMEM; } static int __init bsg_init(void) From 7e75d73080d822d2bbbd5b0f7f293719dd1f9109 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Jan 2007 09:05:54 +0100 Subject: [PATCH 14/25] bsg: simplify __bsg_alloc_command failpath Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index e97e3ec08ffc..c85d961ee41e 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -128,7 +128,8 @@ static struct bsg_command *__bsg_alloc_command(struct bsg_device *bd) bc = kmem_cache_alloc(bsg_cmd_cachep, GFP_USER); if (unlikely(!bc)) { spin_lock_irq(&bd->lock); - goto alloc_fail; + bd->queued_cmds--; + goto out; } memset(bc, 0, sizeof(*bc)); @@ -136,8 +137,6 @@ static struct bsg_command *__bsg_alloc_command(struct bsg_device *bd) INIT_LIST_HEAD(&bc->list); dprintk("%s: returning free cmd %p\n", bd->name, bc); return bc; -alloc_fail: - bd->queued_cmds--; out: spin_unlock_irq(&bd->lock); return bc; From 45e79a3acdcf54113b3d7b23e9e64e6541dbfeb5 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 9 Jul 2007 12:39:20 +0200 Subject: [PATCH 15/25] bsg: add a request_queue argument to scsi_cmd_ioctl() bsg uses scsi_cmd_ioctl() for some SCSI/sg ioctl commands. scsi_cmd_ioctl() gets a request queue from a gendisk arguement. This prevents bsg being bound to SCSI devices that don't have a gendisk (like OSD). This adds a request_queue argument to scsi_cmd_ioctl(). The SCSI/sg ioctl commands doesn't use a gendisk so it's safe for any SCSI devices to use scsi_cmd_ioctl(). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 2 +- block/scsi_ioctl.c | 10 +++------- drivers/block/ub.c | 2 +- drivers/cdrom/cdrom.c | 3 ++- drivers/ide/ide.c | 2 +- drivers/scsi/sd.c | 2 +- drivers/scsi/st.c | 3 ++- include/linux/blkdev.h | 3 ++- 8 files changed, 13 insertions(+), 14 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index c85d961ee41e..0427ece9b6d8 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -900,7 +900,7 @@ bsg_ioctl(struct inode *inode, struct file *file, unsigned int cmd, case SG_EMULATED_HOST: case SCSI_IOCTL_SEND_COMMAND: { void __user *uarg = (void __user *) arg; - return scsi_cmd_ioctl(file, bd->disk, cmd, uarg); + return scsi_cmd_ioctl(file, bd->queue, bd->disk, cmd, uarg); } case SG_IO: { struct request *rq; diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index db53b2c268d3..a26ba07955fe 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -548,16 +548,12 @@ static inline int blk_send_start_stop(request_queue_t *q, struct gendisk *bd_dis return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data); } -int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, void __user *arg) +int scsi_cmd_ioctl(struct file *file, struct request_queue *q, + struct gendisk *bd_disk, unsigned int cmd, void __user *arg) { - request_queue_t *q; int err; - q = bd_disk->queue; - if (!q) - return -ENXIO; - - if (blk_get_queue(q)) + if (!q || blk_get_queue(q)) return -ENXIO; switch (cmd) { diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 18c8b6c0db20..8b13d7d2cb63 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -1709,7 +1709,7 @@ static int ub_bd_ioctl(struct inode *inode, struct file *filp, struct gendisk *disk = inode->i_bdev->bd_disk; void __user *usermem = (void __user *) arg; - return scsi_cmd_ioctl(filp, disk, cmd, usermem); + return scsi_cmd_ioctl(filp, disk->queue, disk, cmd, usermem); } /* diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index aa5468f487ba..499019bf8f40 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2695,11 +2695,12 @@ int cdrom_ioctl(struct file * file, struct cdrom_device_info *cdi, { void __user *argp = (void __user *)arg; int ret; + struct gendisk *disk = ip->i_bdev->bd_disk; /* * Try the generic SCSI command ioctl's first. */ - ret = scsi_cmd_ioctl(file, ip->i_bdev->bd_disk, cmd, argp); + ret = scsi_cmd_ioctl(file, disk->queue, disk, cmd, argp); if (ret != -ENOTTY) return ret; diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 9ae60a7400a2..8cd7694593c9 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -1052,7 +1052,7 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device int err, (*setfunc)(ide_drive_t *, int); u8 *val; - err = scsi_cmd_ioctl(file, bdev->bd_disk, cmd, p); + err = scsi_cmd_ioctl(file, bdev->bd_disk->queue, bdev->bd_disk, cmd, p); if (err != -ENOTTY) return err; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 448d316f12d7..424d557284a9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -684,7 +684,7 @@ static int sd_ioctl(struct inode * inode, struct file * filp, case SCSI_IOCTL_GET_BUS_NUMBER: return scsi_ioctl(sdp, cmd, p); default: - error = scsi_cmd_ioctl(filp, disk, cmd, p); + error = scsi_cmd_ioctl(filp, disk->queue, disk, cmd, p); if (error != -ENOTTY) return error; } diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 55bfeccf68a2..a4f7b8465773 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -3549,7 +3549,8 @@ static int st_ioctl(struct inode *inode, struct file *file, !capable(CAP_SYS_RAWIO)) i = -EPERM; else - i = scsi_cmd_ioctl(file, STp->disk, cmd_in, p); + i = scsi_cmd_ioctl(file, STp->disk->queue, + STp->disk, cmd_in, p); if (i != -ENOTTY) return i; break; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f6bc0d03ffad..2746632c2267 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -644,7 +644,8 @@ extern void blk_requeue_request(request_queue_t *, struct request *); extern void blk_plug_device(request_queue_t *); extern int blk_remove_plug(request_queue_t *); extern void blk_recount_segments(request_queue_t *, struct bio *); -extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *); +extern int scsi_cmd_ioctl(struct file *, struct request_queue *, + struct gendisk *, unsigned int, void __user *); extern int sg_scsi_ioctl(struct file *, struct request_queue *, struct gendisk *, struct scsi_ioctl_command __user *); From d351af01b9307566135cb0f355ca65d0952c10b5 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 9 Jul 2007 12:40:35 +0200 Subject: [PATCH 16/25] bsg: bind bsg to request_queue instead of gendisk This patch binds bsg devices to request_queue instead of gendisk. Any objects (like transport entities) can define own request_handler and create own bsg device. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 37 +++++++++++++++++-------------------- block/ll_rw_blk.c | 4 ++-- include/linux/blkdev.h | 5 +++++ include/linux/bsg.h | 10 +++++----- include/linux/genhd.h | 2 -- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 0427ece9b6d8..4ea4bedb413f 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -34,7 +34,6 @@ static char bsg_version[] = "block layer sg (bsg) 0.4"; struct bsg_device { - struct gendisk *disk; request_queue_t *queue; spinlock_t lock; struct list_head busy_list; @@ -46,7 +45,7 @@ struct bsg_device { int done_cmds; wait_queue_head_t wq_done; wait_queue_head_t wq_free; - char name[BDEVNAME_SIZE]; + char name[BUS_ID_SIZE]; int max_queue; unsigned long flags; }; @@ -375,7 +374,7 @@ static void bsg_add_command(struct bsg_device *bd, request_queue_t *q, dprintk("%s: queueing rq %p, bc %p\n", bd->name, rq, bc); rq->end_io_data = bc; - blk_execute_rq_nowait(q, bd->disk, rq, 1, bsg_rq_end_io); + blk_execute_rq_nowait(q, NULL, rq, 1, bsg_rq_end_io); } static inline struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd) @@ -741,7 +740,7 @@ out: } static struct bsg_device *bsg_add_device(struct inode *inode, - struct gendisk *disk, + struct request_queue *rq, struct file *file) { struct bsg_device *bd = NULL; @@ -753,17 +752,16 @@ static struct bsg_device *bsg_add_device(struct inode *inode, if (!bd) return ERR_PTR(-ENOMEM); - bd->disk = disk; - bd->queue = disk->queue; - kobject_get(&disk->queue->kobj); + bd->queue = rq; + kobject_get(&rq->kobj); bsg_set_block(bd, file); atomic_set(&bd->ref_count, 1); bd->minor = iminor(inode); mutex_lock(&bsg_mutex); - hlist_add_head(&bd->dev_list,&bsg_device_list[bsg_list_idx(bd->minor)]); + hlist_add_head(&bd->dev_list, &bsg_device_list[bsg_list_idx(bd->minor)]); - strncpy(bd->name, disk->disk_name, sizeof(bd->name) - 1); + strncpy(bd->name, rq->bsg_dev.class_dev->class_id, sizeof(bd->name) - 1); dprintk("bound to <%s>, max queue %d\n", format_dev_t(buf, inode->i_rdev), bd->max_queue); @@ -817,7 +815,7 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file) if (!bcd) return ERR_PTR(-ENODEV); - return bsg_add_device(inode, bcd->disk, file); + return bsg_add_device(inode, bcd->queue, file); } static int bsg_open(struct inode *inode, struct file *file) @@ -900,7 +898,7 @@ bsg_ioctl(struct inode *inode, struct file *file, unsigned int cmd, case SG_EMULATED_HOST: case SCSI_IOCTL_SEND_COMMAND: { void __user *uarg = (void __user *) arg; - return scsi_cmd_ioctl(file, bd->queue, bd->disk, cmd, uarg); + return scsi_cmd_ioctl(file, bd->queue, NULL, cmd, uarg); } case SG_IO: { struct request *rq; @@ -915,7 +913,7 @@ bsg_ioctl(struct inode *inode, struct file *file, unsigned int cmd, return PTR_ERR(rq); bio = rq->bio; - blk_execute_rq(bd->queue, bd->disk, rq, 0); + blk_execute_rq(bd->queue, NULL, rq, 0); blk_complete_sgv4_hdr_rq(rq, &hdr, bio); if (copy_to_user(uarg, &hdr, sizeof(hdr))) @@ -945,24 +943,23 @@ static struct file_operations bsg_fops = { .owner = THIS_MODULE, }; -void bsg_unregister_disk(struct gendisk *disk) +void bsg_unregister_queue(struct request_queue *q) { - struct bsg_class_device *bcd = &disk->bsg_dev; + struct bsg_class_device *bcd = &q->bsg_dev; if (!bcd->class_dev) return; mutex_lock(&bsg_mutex); - sysfs_remove_link(&bcd->disk->queue->kobj, "bsg"); + sysfs_remove_link(&q->kobj, "bsg"); class_device_destroy(bsg_class, MKDEV(BSG_MAJOR, bcd->minor)); bcd->class_dev = NULL; list_del_init(&bcd->list); mutex_unlock(&bsg_mutex); } -int bsg_register_disk(struct gendisk *disk) +int bsg_register_queue(struct request_queue *q, char *name) { - request_queue_t *q = disk->queue; struct bsg_class_device *bcd; dev_t dev; @@ -972,7 +969,7 @@ int bsg_register_disk(struct gendisk *disk) if (!q->request_fn) return 0; - bcd = &disk->bsg_dev; + bcd = &q->bsg_dev; memset(bcd, 0, sizeof(*bcd)); INIT_LIST_HEAD(&bcd->list); @@ -980,8 +977,8 @@ int bsg_register_disk(struct gendisk *disk) dev = MKDEV(BSG_MAJOR, bsg_device_nr); bcd->minor = bsg_device_nr; bsg_device_nr++; - bcd->disk = disk; - bcd->class_dev = class_device_create(bsg_class, NULL, dev, bcd->dev, "%s", disk->disk_name); + bcd->queue = q; + bcd->class_dev = class_device_create(bsg_class, NULL, dev, bcd->dev, "%s", name); if (!bcd->class_dev) goto err; list_add_tail(&bcd->list, &bsg_class_list); diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 3795e0708a22..74a5498c29a1 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -4091,7 +4091,7 @@ int blk_register_queue(struct gendisk *disk) return ret; } - ret = bsg_register_disk(disk); + ret = bsg_register_queue(q, disk->disk_name); if (ret) { elv_unregister_queue(q); kobject_unregister(&q->kobj); @@ -4106,7 +4106,7 @@ void blk_unregister_queue(struct gendisk *disk) request_queue_t *q = disk->queue; if (q && q->request_fn) { - bsg_unregister_disk(disk); + bsg_unregister_queue(q); elv_unregister_queue(q); kobject_uevent(&q->kobj, KOBJ_REMOVE); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2746632c2267..24b474e05a44 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -470,6 +471,10 @@ struct request_queue unsigned int bi_size; struct mutex sysfs_lock; + +#if defined(CONFIG_BLK_DEV_BSG) + struct bsg_class_device bsg_dev; +#endif }; #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 2154a6dfbd53..0475a6d3ff6a 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -47,16 +47,16 @@ struct bsg_class_device { struct class_device *class_dev; struct device *dev; int minor; - struct gendisk *disk; struct list_head list; + struct request_queue *queue; }; -extern int bsg_register_disk(struct gendisk *); -extern void bsg_unregister_disk(struct gendisk *); +extern int bsg_register_queue(struct request_queue *, char *); +extern void bsg_unregister_queue(struct request_queue *); #else struct bsg_class_device { }; -#define bsg_register_disk(disk) (0) -#define bsg_unregister_disk(disk) do { } while (0) +#define bsg_register_queue(disk, name) (0) +#define bsg_unregister_queue(disk) do { } while (0) #endif #endif /* __KERNEL__ */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 8c43d7032612..9756fc102a83 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -67,7 +67,6 @@ struct partition { #include #include #include -#include struct partition { unsigned char boot_ind; /* 0x80 - active */ @@ -92,7 +91,6 @@ struct hd_struct { #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; #endif - struct bsg_class_device bsg_dev; }; #define GENHD_FL_REMOVABLE 1 From 4e2872d6b0252d33f28ea67f33704208ca781978 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 28 Mar 2007 13:29:24 +0200 Subject: [PATCH 17/25] bind bsg to all SCSI devices This patch binds bsg to all SCSI devices (their request queues) like the current sg driver does. We can send SCSI commands to non disk and cdrom scsi devices like OSD via bsg. This patch removes bsg_register_queue from blk_register_queue so bsg devices aren't bound to non SCSI block devices. If they want bsg, I'll send a patch to do that. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 59 +++++++++++++++++++++++++++++++++++++++++------ block/ll_rw_blk.c | 8 ------- 2 files changed, 52 insertions(+), 15 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 4ea4bedb413f..cd0221c61bfe 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include static char bsg_version[] = "block layer sg (bsg) 0.4"; @@ -962,6 +964,8 @@ int bsg_register_queue(struct request_queue *q, char *name) { struct bsg_class_device *bcd; dev_t dev; + int ret; + struct class_device *class_dev = NULL; /* * we need a proper transport to send commands, not a stacked device @@ -978,22 +982,54 @@ int bsg_register_queue(struct request_queue *q, char *name) bcd->minor = bsg_device_nr; bsg_device_nr++; bcd->queue = q; - bcd->class_dev = class_device_create(bsg_class, NULL, dev, bcd->dev, "%s", name); - if (!bcd->class_dev) + class_dev = class_device_create(bsg_class, NULL, dev, bcd->dev, "%s", name); + if (IS_ERR(class_dev)) { + ret = PTR_ERR(class_dev); goto err; + } + bcd->class_dev = class_dev; + + if (q->kobj.dentry) { + ret = sysfs_create_link(&q->kobj, &bcd->class_dev->kobj, "bsg"); + if (ret) + goto err; + } + list_add_tail(&bcd->list, &bsg_class_list); - if (sysfs_create_link(&q->kobj, &bcd->class_dev->kobj, "bsg")) - goto err; + mutex_unlock(&bsg_mutex); return 0; err: bsg_device_nr--; - if (bcd->class_dev) + if (class_dev) class_device_destroy(bsg_class, MKDEV(BSG_MAJOR, bcd->minor)); mutex_unlock(&bsg_mutex); - return -ENOMEM; + return ret; } +static int bsg_add(struct class_device *cl_dev, struct class_interface *cl_intf) +{ + int ret; + struct scsi_device *sdp = to_scsi_device(cl_dev->dev); + struct request_queue *rq = sdp->request_queue; + + if (rq->kobj.parent) + ret = bsg_register_queue(rq, kobject_name(rq->kobj.parent)); + else + ret = bsg_register_queue(rq, kobject_name(&sdp->sdev_gendev.kobj)); + return ret; +} + +static void bsg_remove(struct class_device *cl_dev, struct class_interface *cl_intf) +{ + bsg_unregister_queue(to_scsi_device(cl_dev->dev)->request_queue); +} + +static struct class_interface bsg_intf = { + .add = bsg_add, + .remove = bsg_remove, +}; + static int __init bsg_init(void) { int ret, i; @@ -1021,6 +1057,15 @@ static int __init bsg_init(void) return ret; } + ret = scsi_register_interface(&bsg_intf); + if (ret) { + printk(KERN_ERR "bsg: failed register scsi interface %d\n", ret); + kmem_cache_destroy(bsg_cmd_cachep); + class_destroy(bsg_class); + unregister_chrdev(BSG_MAJOR, "bsg"); + return ret; + } + printk(KERN_INFO "%s loaded\n", bsg_version); return 0; } @@ -1029,4 +1074,4 @@ MODULE_AUTHOR("Jens Axboe"); MODULE_DESCRIPTION("Block layer SGSI generic (sg) driver"); MODULE_LICENSE("GPL"); -subsys_initcall(bsg_init); +device_initcall(bsg_init); diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 74a5498c29a1..ef42bb2b12b6 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -4091,13 +4091,6 @@ int blk_register_queue(struct gendisk *disk) return ret; } - ret = bsg_register_queue(q, disk->disk_name); - if (ret) { - elv_unregister_queue(q); - kobject_unregister(&q->kobj); - return ret; - } - return 0; } @@ -4106,7 +4099,6 @@ void blk_unregister_queue(struct gendisk *disk) request_queue_t *q = disk->queue; if (q && q->request_fn) { - bsg_unregister_queue(q); elv_unregister_queue(q); kobject_uevent(&q->kobj, KOBJ_REMOVE); From 292b7f27129272c9ec0ee5fa56abb6f9061b1d83 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 28 Mar 2007 13:29:58 +0200 Subject: [PATCH 18/25] improve bsg device allocation This patch addresses on two issues on bsg device allocation. - the current maxium number of bsg devices is 256. It's too small if we allocate bsg devices to all SCSI devices, transport entities, etc. This increses the maxium number to 32768 (taken from the sg driver). - SCSI devices are dynamically added and removed. Currently, bsg can't handle it well since bsd_device->minor is simply increased. This is dependent on the patchset that I posted yesterday: http://marc.info/?l=linux-scsi&m=117440208726755&w=2 Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 49 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index cd0221c61bfe..4ef3cc550244 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -58,6 +58,7 @@ enum { }; #define BSG_DEFAULT_CMDS 64 +#define BSG_MAX_DEVS 32768 #undef BSG_DEBUG @@ -75,7 +76,7 @@ enum { #define BSG_MAJOR (240) static DEFINE_MUTEX(bsg_mutex); -static int bsg_device_nr; +static int bsg_device_nr, bsg_minor_idx; #define BSG_LIST_SIZE (8) #define bsg_list_idx(minor) ((minor) & (BSG_LIST_SIZE - 1)) @@ -957,14 +958,15 @@ void bsg_unregister_queue(struct request_queue *q) class_device_destroy(bsg_class, MKDEV(BSG_MAJOR, bcd->minor)); bcd->class_dev = NULL; list_del_init(&bcd->list); + bsg_device_nr--; mutex_unlock(&bsg_mutex); } int bsg_register_queue(struct request_queue *q, char *name) { - struct bsg_class_device *bcd; + struct bsg_class_device *bcd, *__bcd; dev_t dev; - int ret; + int ret = -EMFILE; struct class_device *class_dev = NULL; /* @@ -978,10 +980,27 @@ int bsg_register_queue(struct request_queue *q, char *name) INIT_LIST_HEAD(&bcd->list); mutex_lock(&bsg_mutex); - dev = MKDEV(BSG_MAJOR, bsg_device_nr); - bcd->minor = bsg_device_nr; - bsg_device_nr++; + if (bsg_device_nr == BSG_MAX_DEVS) { + printk(KERN_ERR "bsg: too many bsg devices\n"); + goto err; + } + +retry: + list_for_each_entry(__bcd, &bsg_class_list, list) { + if (__bcd->minor == bsg_minor_idx) { + bsg_minor_idx++; + if (bsg_minor_idx == BSG_MAX_DEVS) + bsg_minor_idx = 0; + goto retry; + } + } + + bcd->minor = bsg_minor_idx++; + if (bsg_minor_idx == BSG_MAX_DEVS) + bsg_minor_idx = 0; + bcd->queue = q; + dev = MKDEV(BSG_MAJOR, bcd->minor); class_dev = class_device_create(bsg_class, NULL, dev, bcd->dev, "%s", name); if (IS_ERR(class_dev)) { ret = PTR_ERR(class_dev); @@ -996,11 +1015,11 @@ int bsg_register_queue(struct request_queue *q, char *name) } list_add_tail(&bcd->list, &bsg_class_list); + bsg_device_nr++; mutex_unlock(&bsg_mutex); return 0; err: - bsg_device_nr--; if (class_dev) class_device_destroy(bsg_class, MKDEV(BSG_MAJOR, bcd->minor)); mutex_unlock(&bsg_mutex); @@ -1030,6 +1049,11 @@ static struct class_interface bsg_intf = { .remove = bsg_remove, }; +static struct cdev bsg_cdev = { + .kobj = {.name = "bsg", }, + .owner = THIS_MODULE, +}; + static int __init bsg_init(void) { int ret, i; @@ -1050,13 +1074,22 @@ static int __init bsg_init(void) return PTR_ERR(bsg_class); } - ret = register_chrdev(BSG_MAJOR, "bsg", &bsg_fops); + ret = register_chrdev_region(MKDEV(BSG_MAJOR, 0), BSG_MAX_DEVS, "bsg"); if (ret) { kmem_cache_destroy(bsg_cmd_cachep); class_destroy(bsg_class); return ret; } + cdev_init(&bsg_cdev, &bsg_fops); + ret = cdev_add(&bsg_cdev, MKDEV(BSG_MAJOR, 0), BSG_MAX_DEVS); + if (ret) { + kmem_cache_destroy(bsg_cmd_cachep); + class_destroy(bsg_class); + unregister_chrdev_region(MKDEV(BSG_MAJOR, 0), BSG_MAX_DEVS); + return ret; + } + ret = scsi_register_interface(&bsg_intf); if (ret) { printk(KERN_ERR "bsg: failed register scsi interface %d\n", ret); From 4cf0723ac89b5f2189da2ad07ef875de26b83c77 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 30 Mar 2007 11:19:39 +0200 Subject: [PATCH 19/25] bsg: minor bug fixes This fixes the following minor issues: - add EXPORT_SYMBOL_GPL for bsg_register_queue and bsg_unregister_queue. - shut up gcc warnings Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 4 +++- include/linux/bsg.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 4ef3cc550244..a333c9337093 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -961,8 +961,9 @@ void bsg_unregister_queue(struct request_queue *q) bsg_device_nr--; mutex_unlock(&bsg_mutex); } +EXPORT_SYMBOL_GPL(bsg_unregister_queue); -int bsg_register_queue(struct request_queue *q, char *name) +int bsg_register_queue(struct request_queue *q, const char *name) { struct bsg_class_device *bcd, *__bcd; dev_t dev; @@ -1025,6 +1026,7 @@ err: mutex_unlock(&bsg_mutex); return ret; } +EXPORT_SYMBOL_GPL(bsg_register_queue); static int bsg_add(struct class_device *cl_dev, struct class_interface *cl_intf) { diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 0475a6d3ff6a..0dd01f90ba5e 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -51,7 +51,7 @@ struct bsg_class_device { struct request_queue *queue; }; -extern int bsg_register_queue(struct request_queue *, char *); +extern int bsg_register_queue(struct request_queue *, const char *); extern void bsg_unregister_queue(struct request_queue *); #else struct bsg_class_device { }; From e7d72173248c29c6f9ba14e40374266e1b954964 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 8 May 2007 15:32:03 +0200 Subject: [PATCH 20/25] bsg: fix a blocking read bug This patch fixes a bug that read() returns ENODATA even with a blocking file descriptor when there are no commands pending. This also includes some cleanups. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 84 +++++++++++++++-------------------------------------- 1 file changed, 23 insertions(+), 61 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index a333c9337093..2f78d7d34b9d 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -115,9 +115,9 @@ static void bsg_free_command(struct bsg_command *bc) wake_up(&bd->wq_free); } -static struct bsg_command *__bsg_alloc_command(struct bsg_device *bd) +static struct bsg_command *bsg_alloc_command(struct bsg_device *bd) { - struct bsg_command *bc = NULL; + struct bsg_command *bc = ERR_PTR(-EINVAL); spin_lock_irq(&bd->lock); @@ -131,6 +131,7 @@ static struct bsg_command *__bsg_alloc_command(struct bsg_device *bd) if (unlikely(!bc)) { spin_lock_irq(&bd->lock); bd->queued_cmds--; + bc = ERR_PTR(-ENOMEM); goto out; } @@ -198,30 +199,6 @@ unlock: return ret; } -/* - * get a new free command, blocking if needed and specified - */ -static struct bsg_command *bsg_get_command(struct bsg_device *bd) -{ - struct bsg_command *bc; - int ret; - - do { - bc = __bsg_alloc_command(bd); - if (bc) - break; - - ret = bsg_io_schedule(bd, TASK_INTERRUPTIBLE); - if (ret) { - bc = ERR_PTR(ret); - break; - } - - } while (1); - - return bc; -} - static int blk_fill_sgv4_hdr_rq(request_queue_t *q, struct request *rq, struct sg_io_v4 *hdr, int has_write_perm) { @@ -397,7 +374,7 @@ static inline struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd) /* * Get a finished command from the done list */ -static struct bsg_command *__bsg_get_done_cmd(struct bsg_device *bd, int state) +static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd) { struct bsg_command *bc; int ret; @@ -407,9 +384,14 @@ static struct bsg_command *__bsg_get_done_cmd(struct bsg_device *bd, int state) if (bc) break; - ret = bsg_io_schedule(bd, state); + if (!test_bit(BSG_F_BLOCK, &bd->flags)) { + bc = ERR_PTR(-EAGAIN); + break; + } + + ret = wait_event_interruptible(bd->wq_done, bd->done_cmds); if (ret) { - bc = ERR_PTR(ret); + bc = ERR_PTR(-ERESTARTSYS); break; } } while (1); @@ -419,18 +401,6 @@ static struct bsg_command *__bsg_get_done_cmd(struct bsg_device *bd, int state) return bc; } -static struct bsg_command * -bsg_get_done_cmd(struct bsg_device *bd, const struct iovec *iov) -{ - return __bsg_get_done_cmd(bd, TASK_INTERRUPTIBLE); -} - -static struct bsg_command * -bsg_get_done_cmd_nosignals(struct bsg_device *bd) -{ - return __bsg_get_done_cmd(bd, TASK_UNINTERRUPTIBLE); -} - static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, struct bio *bio) { @@ -496,19 +466,16 @@ static int bsg_complete_all_commands(struct bsg_device *bd) */ ret = 0; do { - bc = bsg_get_done_cmd_nosignals(bd); - - /* - * we _must_ complete before restarting, because - * bsg_release can't handle this failing. - */ - if (PTR_ERR(bc) == -ERESTARTSYS) - continue; - if (IS_ERR(bc)) { - ret = PTR_ERR(bc); + spin_lock_irq(&bd->lock); + if (!bd->queued_cmds) { + spin_unlock_irq(&bd->lock); break; } + bc = bsg_get_done_cmd(bd); + if (IS_ERR(bc)) + break; + tret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio); if (!ret) ret = tret; @@ -519,11 +486,9 @@ static int bsg_complete_all_commands(struct bsg_device *bd) return ret; } -typedef struct bsg_command *(*bsg_command_callback)(struct bsg_device *bd, const struct iovec *iov); - static ssize_t -__bsg_read(char __user *buf, size_t count, bsg_command_callback get_bc, - struct bsg_device *bd, const struct iovec *iov, ssize_t *bytes_read) +__bsg_read(char __user *buf, size_t count, struct bsg_device *bd, + const struct iovec *iov, ssize_t *bytes_read) { struct bsg_command *bc; int nr_commands, ret; @@ -534,7 +499,7 @@ __bsg_read(char __user *buf, size_t count, bsg_command_callback get_bc, ret = 0; nr_commands = count / sizeof(struct sg_io_v4); while (nr_commands) { - bc = get_bc(bd, iov); + bc = bsg_get_done_cmd(bd); if (IS_ERR(bc)) { ret = PTR_ERR(bc); break; @@ -598,8 +563,7 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) bsg_set_block(bd, file); bytes_read = 0; - ret = __bsg_read(buf, count, bsg_get_done_cmd, - bd, NULL, &bytes_read); + ret = __bsg_read(buf, count, bd, NULL, &bytes_read); *ppos = bytes_read; if (!bytes_read || (bytes_read && err_block_err(ret))) @@ -625,9 +589,7 @@ static ssize_t __bsg_write(struct bsg_device *bd, const char __user *buf, while (nr_commands) { request_queue_t *q = bd->queue; - bc = bsg_get_command(bd); - if (!bc) - break; + bc = bsg_alloc_command(bd); if (IS_ERR(bc)) { ret = PTR_ERR(bc); bc = NULL; From efba1a31f3f8fe9672eb96cd26e97fb96891f1c0 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 7 Jun 2007 13:24:06 +0200 Subject: [PATCH 21/25] bsg: fix the deadlock on discarding done commands The previous commit introduced a deadlock in discarding commands, because we forget to unlock the bd spinlock. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/bsg.c b/block/bsg.c index 2f78d7d34b9d..5f4abc902a00 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -471,6 +471,7 @@ static int bsg_complete_all_commands(struct bsg_device *bd) spin_unlock_irq(&bd->lock); break; } + spin_unlock_irq(&bd->lock); bc = bsg_get_done_cmd(bd); if (IS_ERR(bc)) From abae1fde63fcdd2a3abaa0d7930938d8326f83d2 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 16 Jul 2007 08:52:14 +0200 Subject: [PATCH 22/25] add a struct request pointer to the request structure This adds a struct request pointer to the request structure for the second data phase (bidi for now). A request queue supporting bidi requests sets QUEUE_FLAG_BIDI. This prevents sending bidi requests to a non-bidi queue. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/ll_rw_blk.c | 1 + include/linux/blkdev.h | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index ef42bb2b12b6..11e4235d0b0c 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -256,6 +256,7 @@ static void rq_init(request_queue_t *q, struct request *rq) rq->end_io = NULL; rq->end_io_data = NULL; rq->completion_data = NULL; + rq->next_rq = NULL; } /** diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 24b474e05a44..b32564a1e105 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -317,6 +317,9 @@ struct request { */ rq_end_io_fn *end_io; void *end_io_data; + + /* for bidi */ + struct request *next_rq; }; /* @@ -486,6 +489,7 @@ struct request_queue #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ +#define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ enum { /* @@ -550,6 +554,7 @@ enum { #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) +#define blk_bidi_rq(rq) ((rq)->next_rq != NULL) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) From 2c9ecdf40af0554ee9a2b1cbbbbdbc77f90a40e1 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 16 Jul 2007 08:52:15 +0200 Subject: [PATCH 23/25] bsg: add bidi support bsg uses the rq->next_rq pointer for a bidi request. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 74 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 5f4abc902a00..13ecc951a4c0 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -95,6 +95,7 @@ struct bsg_command { struct list_head list; struct request *rq; struct bio *bio; + struct bio *bidi_bio; int err; struct sg_io_v4 hdr; struct sg_io_v4 __user *uhdr; @@ -243,16 +244,6 @@ bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw) if (hdr->protocol || hdr->subprotocol) return -EINVAL; - /* - * looks sane, if no data then it should be fine from our POV - */ - if (!hdr->dout_xfer_len && !hdr->din_xfer_len) - return 0; - - /* not supported currently */ - if (hdr->dout_xfer_len && hdr->din_xfer_len) - return -EINVAL; - *rw = hdr->dout_xfer_len ? WRITE : READ; return 0; @@ -265,7 +256,7 @@ static struct request * bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr) { request_queue_t *q = bd->queue; - struct request *rq; + struct request *rq, *next_rq = NULL; int ret, rw = 0; /* shut up gcc */ unsigned int dxfer_len; void *dxferp = NULL; @@ -282,11 +273,30 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr) * map scatter-gather elements seperately and string them to request */ rq = blk_get_request(q, rw, GFP_KERNEL); + if (!rq) + return ERR_PTR(-ENOMEM); ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM, &bd->flags)); - if (ret) { - blk_put_request(rq); - return ERR_PTR(ret); + if (ret) + goto out; + + if (rw == WRITE && hdr->din_xfer_len) { + if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) { + ret = -EOPNOTSUPP; + goto out; + } + + next_rq = blk_get_request(q, READ, GFP_KERNEL); + if (!next_rq) { + ret = -ENOMEM; + goto out; + } + rq->next_rq = next_rq; + + dxferp = (void*)(unsigned long)hdr->din_xferp; + ret = blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len); + if (ret) + goto out; } if (hdr->dout_xfer_len) { @@ -300,14 +310,17 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr) if (dxfer_len) { ret = blk_rq_map_user(q, rq, dxferp, dxfer_len); - if (ret) { - dprintk("failed map at %d\n", ret); - blk_put_request(rq); - rq = ERR_PTR(ret); - } + if (ret) + goto out; } - return rq; +out: + blk_put_request(rq); + if (next_rq) { + blk_rq_unmap_user(next_rq->bio); + blk_put_request(next_rq); + } + return ERR_PTR(ret); } /* @@ -346,6 +359,8 @@ static void bsg_add_command(struct bsg_device *bd, request_queue_t *q, */ bc->rq = rq; bc->bio = rq->bio; + if (rq->next_rq) + bc->bidi_bio = rq->next_rq->bio; bc->hdr.duration = jiffies; spin_lock_irq(&bd->lock); list_add_tail(&bc->list, &bd->busy_list); @@ -402,7 +417,7 @@ static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd) } static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, - struct bio *bio) + struct bio *bio, struct bio *bidi_bio) { int ret = 0; @@ -431,6 +446,11 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, ret = -EFAULT; } + if (rq->next_rq) { + blk_rq_unmap_user(bidi_bio); + blk_put_request(rq->next_rq); + } + blk_rq_unmap_user(bio); blk_put_request(rq); @@ -477,7 +497,8 @@ static int bsg_complete_all_commands(struct bsg_device *bd) if (IS_ERR(bc)) break; - tret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio); + tret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio, + bc->bidi_bio); if (!ret) ret = tret; @@ -511,7 +532,8 @@ __bsg_read(char __user *buf, size_t count, struct bsg_device *bd, * after completing the request. so do that here, * bsg_complete_work() cannot do that for us */ - ret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio); + ret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio, + bc->bidi_bio); if (copy_to_user(buf, (char *) &bc->hdr, sizeof(bc->hdr))) ret = -EFAULT; @@ -868,7 +890,7 @@ bsg_ioctl(struct inode *inode, struct file *file, unsigned int cmd, } case SG_IO: { struct request *rq; - struct bio *bio; + struct bio *bio, *bidi_bio = NULL; struct sg_io_v4 hdr; if (copy_from_user(&hdr, uarg, sizeof(hdr))) @@ -879,8 +901,10 @@ bsg_ioctl(struct inode *inode, struct file *file, unsigned int cmd, return PTR_ERR(rq); bio = rq->bio; + if (rq->next_rq) + bidi_bio = rq->next_rq->bio; blk_execute_rq(bd->queue, NULL, rq, 0); - blk_complete_sgv4_hdr_rq(rq, &hdr, bio); + blk_complete_sgv4_hdr_rq(rq, &hdr, bio, bidi_bio); if (copy_to_user(uarg, &hdr, sizeof(hdr))) return -EFAULT; From 15d10b611fa94b52f004a08a1d4cf7b39de3cba3 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 16 Jul 2007 08:52:16 +0200 Subject: [PATCH 24/25] bsg: add SCSI transport-level request support This enables bsg to handle SCSI transport-level request like SAS management protocol (SMP). - add BSG_SUB_PROTOCOL_{SCSI_CMD, SCSI_TMF, SCSI_TRANSPORT} definitions. - SCSI transport-level requests skip blk_verify_command(). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/bsg.c | 27 +++++++++++++++++++++------ include/linux/bsg.h | 6 ++++++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 13ecc951a4c0..461c9f56f3ee 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -208,7 +208,11 @@ static int blk_fill_sgv4_hdr_rq(request_queue_t *q, struct request *rq, if (copy_from_user(rq->cmd, (void *)(unsigned long)hdr->request, hdr->request_len)) return -EFAULT; - if (blk_verify_command(rq->cmd, has_write_perm)) + + if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) { + if (blk_verify_command(rq->cmd, has_write_perm)) + return -EPERM; + } else if (!capable(CAP_SYS_RAWIO)) return -EPERM; /* @@ -232,6 +236,8 @@ static int blk_fill_sgv4_hdr_rq(request_queue_t *q, struct request *rq, static int bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw) { + int ret = 0; + if (hdr->guard != 'Q') return -EINVAL; if (hdr->request_len > BLK_MAX_CDB) @@ -240,13 +246,22 @@ bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw) hdr->din_xfer_len > (q->max_sectors << 9)) return -EIO; - /* not supported currently */ - if (hdr->protocol || hdr->subprotocol) - return -EINVAL; + switch (hdr->protocol) { + case BSG_PROTOCOL_SCSI: + switch (hdr->subprotocol) { + case BSG_SUB_PROTOCOL_SCSI_CMD: + case BSG_SUB_PROTOCOL_SCSI_TRANSPORT: + break; + default: + ret = -EINVAL; + } + break; + default: + ret = -EINVAL; + } *rw = hdr->dout_xfer_len ? WRITE : READ; - - return 0; + return ret; } /* diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 0dd01f90ba5e..bd998ca6cb2e 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -1,6 +1,12 @@ #ifndef BSG_H #define BSG_H +#define BSG_PROTOCOL_SCSI 0 + +#define BSG_SUB_PROTOCOL_SCSI_CMD 0 +#define BSG_SUB_PROTOCOL_SCSI_TMF 1 +#define BSG_SUB_PROTOCOL_SCSI_TRANSPORT 2 + struct sg_io_v4 { __s32 guard; /* [i] 'Q' to differentiate from v3 */ __u32 protocol; /* [i] 0 -> SCSI , .... */ From 58ff411e0d21592565ac9ab34f33a434f26e018b Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 16 Jul 2007 08:52:17 +0200 Subject: [PATCH 25/25] bsg: Kconfig updates This updates bsg entry in Kconfig: - bsg supports sg v4 - bsg depends on SCSI - it might be better to mark it experimental for a while Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/Kconfig b/block/Kconfig index da12f2649cce..1d16b08e1506 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -53,9 +53,10 @@ endif # BLOCK config BLK_DEV_BSG bool "Block layer SG support" + depends on SCSI && EXPERIMENTAL default y ---help--- - Saying Y here will enable generic SG (SCSI generic) v3 + Saying Y here will enable generic SG (SCSI generic) v4 support for any block device. source block/Kconfig.iosched