[dm-devel] dm-multipath based engenio-lsi hardware handler
Chandra Seetharaman
sekharan at us.ibm.com
Wed Mar 21 19:23:34 UTC 2007
Hi All,
I took a old version of code that was originally written by Mike
Christie and started testing it.
Found some issues, fixed them, added some additional logic for
completeness.
Here is the code in its current state. Please provide me with your
feedback.
Also, find the attached diagram to see the current behavior of the
hardware handler.
Thanks,
chandra
/*
* Engenio/LSI RDAC DM HW handler
*
* Copyright (C) 2005 Mike Christie. All rights reserved.
* Copyright (C) Chandra Seetharaman, IBM Corp. 2007
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include "dm.h"
#include "dm-hw-handler.h"
#define RDAC_DM_HWH_NAME "rdac"
#define RDAC_DM_HWH_VER "0.4"
/*
* LSI mode page stuff
*
* These struct definitions and the forming of the
* mode page were taken from the LSI RDAC 2.4 GPL'd
* driver, and then converted to Linux conventions.
*/
#define RDAC_QUIESCENCE_TIME 20;
/*
* Page Codes
*/
#define RDAC_PAGE_CODE_REDUNDANT_CONTROLLER 0x2c
/*
* Controller modes definitions
*/
#define RDAC_MODE_TRANSFER_ALL_LUNS 0x01
#define RDAC_MODE_TRANSFER_SPECIFIED_LUNS 0x02
/*
* RDAC Options field
*/
#define RDAC_FORCED_QUIESENCE 0x02
/*
* default value TODO - make configurable
*/
#define RDAC_FAILOVER_TIMEOUT (60 * HZ)
struct rdac_mode_6_hdr {
u8 data_len;
u8 medium_type;
u8 device_params;
u8 block_desc_len;
};
struct rdac_mode_10_hdr {
u16 data_len;
u8 medium_type;
u8 device_params;
u16 reserved;
u16 block_desc_len;
};
struct rdac_mode_common {
u8 controller_serial[16];
u8 alt_controller_serial[16];
u8 rdac_mode[2];
u8 alt_rdac_mode[2];
u8 quiescence_timeout;
u8 rdac_options;
};
struct rdac_pg_legacy {
struct rdac_mode_6_hdr hdr;
u8 page_code;
u8 page_len;
struct rdac_mode_common common;
#define MODE6_MAX_LUN 32
u8 lun_table[MODE6_MAX_LUN];
u8 reserved2[32];
u8 reserved3;
u8 reserved4;
};
struct rdac_pg_expanded {
struct rdac_mode_10_hdr hdr;
u8 page_code;
u8 subpage_code;
u8 page_len[2];
struct rdac_mode_common common;
u8 lun_table[256];
u8 reserved3;
u8 reserved4;
};
struct c9_inquiry {
u8 peripheral_info;
u8 page_code; /* 0xC9 */
u8 reserved1;
u8 page_len;
u8 page_id[4]; /* "vace" */
u8 avte_cvp;
u8 path_prio;
u8 reserved2[38];
};
#define SUBSYS_ID_LEN 16
#define SLOT_ID_LEN 2
struct c4_inquiry {
u8 peripheral_info;
u8 page_code; /* 0xC4 */
u8 reserved1;
u8 page_len;
u8 page_id[4]; /* "subs" */
u8 subsys_id[SUBSYS_ID_LEN];
u8 revision[4];
u8 slot_id[SUBSYS_ID_LEN];
u8 reserved[2];
};
struct rdac_handler {
unsigned use_10_ms;
unsigned timeout;
struct rdac_controller *ctlr;
#define UNINITIALIZED_LUN (1 << 8)
#define UNSUPPORTED_LUN (2 << 8)
unsigned lun;
unsigned char sense[SCSI_SENSE_BUFFERSIZE];
};
struct rdac_private {
struct rdac_handler *h;
struct path *path;
struct list_head entry; /* list of all controllers */
};
struct rdac_controller {
u8 subsys_id[SUBSYS_ID_LEN];
u8 slot_id[SLOT_ID_LEN];
struct kref kref;
struct list_head node; /* list of all controllers */
spinlock_t lock;
int submitted;
struct list_head cmd_list; /* list of commands to be submitted */
};
struct c8_inquiry {
u8 peripheral_info;
u8 page_code; /* 0xC8 */
u8 reserved1;
u8 page_len;
u8 page_id[4]; /* "edid" */
u8 reserved2[3];
u8 vol_uniq_id_len;
u8 vol_uniq_id[16];
u8 vol_user_label_len;
u8 vol_user_label[60];
u8 array_uniq_id_len;
u8 array_unique_id[16];
u8 array_user_label_len;
u8 array_user_label[60];
u8 lun[8];
};
static LIST_HEAD(ctlr_list);
static spinlock_t list_lock = SPIN_LOCK_UNLOCKED;
static inline void free_bio(struct bio *bio)
{
__free_page(bio->bi_io_vec[0].bv_page);
bio_put(bio);
}
static void submit_inquiry(struct rdac_handler *, struct path *, int,
unsigned int, bio_end_io_t);
static int c9_inquiry_endio(struct bio *, unsigned int, int);
#define submit_c9_inquiry(h, path) \
submit_inquiry(h, path, 0xC9, sizeof(struct c9_inquiry), \
c9_inquiry_endio)
#define submit_c4_inquiry(h, path) \
submit_inquiry(h, path, 0xC4, sizeof(struct c4_inquiry), \
c4_inquiry_endio)
#define submit_c8_inquiry(h, path) \
submit_inquiry(h, path, 0xC8, sizeof(struct c8_inquiry), \
c8_inquiry_endio)
static void rdac_resubmit_all(struct rdac_handler *h)
{
struct rdac_private *tmp, *p;
struct rdac_controller *ctlr = h->ctlr;
spin_lock(&ctlr->lock);
list_for_each_entry_safe(p, tmp, &ctlr->cmd_list, entry) {
submit_c9_inquiry(p->h, p->path);
list_del(&p->entry);
kfree(p);
}
ctlr->submitted = 0;
spin_unlock(&ctlr->lock);
}
static int mode_select_endio(struct bio *bio, unsigned int done, int error)
{
struct rdac_private *p = bio->bi_private;
struct rdac_handler *h = p->h;
struct path *path = p->path;
int sense = bio_sense_value(bio);
kfree(p);
if (bio->bi_size)
return 1;
/* If it is retryable failure, submit the c9 inquiry again */
if (sense == 0x59136 || sense == 0x68b02 || sense == 0xb8b02) {
/* 0x59136 - Command lock contention
* 0x[6b]8b02 - Quiesense in progress or achieved
*/
submit_c9_inquiry(h, path);
goto done;
}
if (sense)
DMINFO("dm-rdac: MODE_SELECT failed on %s with sense 0x%x",
path->dev->name, sense);
if (error || sense)
dm_pg_init_complete(path, MP_FAIL_PATH);
else
dm_pg_init_complete(path, 0);
done:
rdac_resubmit_all(h);
/* request is freed in block layer */
free_bio(bio);
return 0;
}
static struct bio *get_rdac_bio(struct path *path, unsigned data_size,
bio_end_io_t endio, int rw, struct rdac_handler *h)
{
struct bio *bio;
struct page *page;
struct rdac_private *p;
bio = bio_alloc(GFP_ATOMIC, 1);
if (!bio)
return NULL;
if (rw == WRITE)
bio->bi_rw |= (1 << BIO_RW);
bio->bi_bdev = path->dev->bdev;
bio->bi_sector = 0;
bio->bi_end_io = endio;
p = kmalloc(sizeof(*p), GFP_ATOMIC);
if (!p)
goto bio;
p->path = path;
p->h = h;
bio->bi_private = p;
page = alloc_page(GFP_ATOMIC);
if (!page)
goto free_private;
if (bio_add_page(bio, page, data_size, 0) == data_size)
return bio;
__free_page(page);
free_private:
kfree(p);
bio:
bio_put(bio);
return NULL;
}
static struct request *get_rdac_req(struct rdac_handler *h,
struct bio *bio, struct path *path, int rw)
{
struct request *rq;
struct block_device *bdev = bio->bi_bdev;
struct request_queue *q = bdev_get_queue(bdev);
rq = blk_get_request(q, rw, GFP_ATOMIC);
if (!rq) {
DMINFO("dm-rdac: get_failover_req: blk_get_request failed");
return NULL;
}
rq->bio = rq->biotail = bio;
blk_rq_bio_prep(q, rq, bio);
rq->rq_disk = bdev->bd_contains->bd_disk;
/* bio backed don't set data */
rq->buffer = rq->data = NULL;
/* rq data_len used for pc cmd's request_bufflen */
rq->data_len = bio->bi_size;
rq->sense = h->sense;
memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
rq->sense_len = 0;
memset(&rq->cmd, 0, BLK_MAX_CDB);
rq->timeout = h->timeout;
rq->flags |= (REQ_BLOCK_PC | REQ_FAILFAST | REQ_NOMERGE);
return rq;
}
static struct request *rdac_failover_get(struct rdac_handler *h,
struct path *path)
{
struct bio *bio;
struct request *rq;
struct rdac_mode_common *common;
unsigned data_size = h->use_10_ms ? sizeof(struct rdac_pg_expanded) :
sizeof(struct rdac_pg_legacy);
/* get bio backing */
if (data_size > PAGE_SIZE)
/* this should never happen */
return NULL;
bio = get_rdac_bio(path, data_size, mode_select_endio, WRITE, h);
if (!bio) {
DMERR("dm-rdac: rdac_failover_get: no bio");
return NULL;
}
if (h->use_10_ms) {
struct rdac_pg_expanded *rdac_pg;
rdac_pg = (struct rdac_pg_expanded *)bio_data(bio);
memset(rdac_pg, 0, data_size);
common = &rdac_pg->common;
rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER + 0x40;
rdac_pg->subpage_code = 0x1;
rdac_pg->page_len[0] = 0x01;
rdac_pg->page_len[1] = 0x28;
rdac_pg->lun_table[h->lun] = 0x81;
} else {
struct rdac_pg_legacy *rdac_pg;
rdac_pg = (struct rdac_pg_legacy *)bio_data(bio);
memset(rdac_pg, 0, data_size);
common = &rdac_pg->common;
rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER;
rdac_pg->page_len = 0x68;
rdac_pg->lun_table[h->lun] = 0x81;
}
common->rdac_mode[1] = RDAC_MODE_TRANSFER_SPECIFIED_LUNS;
common->quiescence_timeout = RDAC_QUIESCENCE_TIME;
common->rdac_options = RDAC_FORCED_QUIESENCE;
/* get request for block layer packet command */
rq = get_rdac_req(h, bio, path, WRITE);
if (!rq) {
DMERR("dm-rdac: rdac_failover_get: no rq");
free_bio(bio);
return NULL;
}
/* Prepare the command. */
if (h->use_10_ms) {
rq->cmd[0] = MODE_SELECT_10;
rq->cmd[7] = data_size >> 8;
rq->cmd[8] = data_size & 0xff;
} else {
rq->cmd[0] = MODE_SELECT;
rq->cmd[4] = data_size;
}
rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
return rq;
}
static int submit_mode_select(struct rdac_handler *h,
struct path *path)
{
int ret = 1;
struct request *rq;
struct request_queue *q = bdev_get_queue(path->dev->bdev);
if (!q) {
DMINFO("dm-rdac: submit_mode_select: no queue");
goto fail_path;
}
rq = rdac_failover_get(h, path);
if (!rq) {
DMERR("dm-rdac: submit_mode_select: no rq");
goto fail_path;
}
DMINFO("dm-rdac: queueing MODE_SELECT command on %s", path->dev->name);
elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1);
ret = 0;
fail_path:
return ret;
}
static void release_ctlr(struct kref *kref)
{
struct rdac_controller *ctlr;
ctlr = container_of(kref, struct rdac_controller, kref);
spin_lock(&list_lock);
list_del(&ctlr->node);
spin_unlock(&list_lock);
kfree(ctlr);
}
static struct rdac_controller *get_controller(u8 *subsys_id, u8 *slot_id)
{
struct rdac_controller *ctlr, *tmp;
spin_lock(&list_lock);
list_for_each_entry(tmp, &ctlr_list, node) {
if ((memcmp(tmp->subsys_id, subsys_id, SUBSYS_ID_LEN) == 0) &&
(memcmp(tmp->slot_id, slot_id, SLOT_ID_LEN) == 0)) {
kref_get(&tmp->kref);
spin_unlock(&list_lock);
return tmp;
}
}
ctlr = kmalloc(sizeof(*ctlr), GFP_ATOMIC);
if (!ctlr)
goto done;
/* initialize fields of controller */
memcpy(ctlr->subsys_id, subsys_id, SUBSYS_ID_LEN);
memcpy(ctlr->slot_id, slot_id, SLOT_ID_LEN);
kref_init(&ctlr->kref);
spin_lock_init(&ctlr->lock);
ctlr->submitted = 0;
INIT_LIST_HEAD(&ctlr->cmd_list);
list_add(&ctlr->node, &ctlr_list);
done:
spin_unlock(&list_lock);
return ctlr;
}
static int c4_inquiry_endio(struct bio *bio, unsigned int done, int error)
{
struct rdac_private *p = bio->bi_private;
struct rdac_handler *h = p->h;
struct path *path = p->path;
struct c4_inquiry *sp;
kfree(p);
if (error) {
dm_pg_init_complete(path, MP_FAIL_PATH);
goto done;
}
sp = (struct c4_inquiry *)bio_data(bio);
h->ctlr = get_controller(sp->subsys_id, sp->slot_id);
if (h->ctlr)
submit_c9_inquiry(h, path);
else
dm_pg_init_complete(path, MP_FAIL_PATH);
done:
/* request is freed in block layer */
free_bio(bio);
return 0;
}
static int c9_inquiry_endio(struct bio *bio, unsigned int done, int error)
{
struct rdac_private *p = bio->bi_private;
struct rdac_handler *h = p->h;
struct path *path = p->path;
struct c9_inquiry *sp;
if (error) {
dm_pg_init_complete(path, MP_FAIL_PATH);
goto done;
}
/* We need to look at the sense keys here to take clear action.
* For now simple logic: if controller owns the lun, return
* dm_pg_init_complete(), otherwise submit MODE SELECT.
*/
sp = (struct c9_inquiry *)bio_data(bio);
if (sp->avte_cvp & 0x1) {
dm_pg_init_complete(path, 0);
goto done;
}
if (h->ctlr) {
spin_lock(&h->ctlr->lock);
if (!h->ctlr->submitted) {
if (submit_mode_select(h, path) != 0)
dm_pg_init_complete(path, MP_FAIL_PATH);
else
h->ctlr->submitted = 1;
} else {
list_add(&p->entry, &h->ctlr->cmd_list);
p = NULL; /* Reuse p, do not free */
}
spin_unlock(&h->ctlr->lock);
} else
submit_c4_inquiry(h, path);
done:
kfree(p);
/* request is freed in block layer */
free_bio(bio);
return 0;
}
static int c8_inquiry_endio(struct bio *bio, unsigned int done, int error)
{
struct rdac_private *p = bio->bi_private;
struct rdac_handler *h = p->h;
struct path *path = p->path;
struct c8_inquiry *sp;
kfree(p);
if (error) {
dm_pg_init_complete(path, MP_FAIL_PATH);
goto done;
}
/* We need to look at the sense keys here to take clear action.
* For now simple logic: Get the lun from the inquiry page.
*/
sp = (struct c8_inquiry *)bio_data(bio);
if (sp->lun[7] >= MODE6_MAX_LUN && !h->use_10_ms) {
DMERR("dm-rdac: MODE_SELECT6 cannot support device(%s) with "
"lun %d(>=%d)\n", path->dev->name, sp->lun[7],
MODE6_MAX_LUN);
h->lun = UNSUPPORTED_LUN;
dm_pg_init_complete(path, MP_FAIL_PATH);
goto done;
}
h->lun = sp->lun[7]; /* currently it uses only one byte */
submit_c9_inquiry(h, path);
done:
/* request is freed in block layer */
free_bio(bio);
return 0;
}
static struct request *rdac_inquiry_get(struct rdac_handler *h,
struct path *path, int page_code,
unsigned int len, bio_end_io_t endio)
{
struct bio *bio;
struct request *rq;
bio = get_rdac_bio(path, len, endio, READ, h);
if (!bio) {
DMERR("dm-rdac: rdac_inquiry_get: no bio");
return NULL;
}
memset(bio_data(bio), 0, len);
/* get request for block layer packet command */
rq = get_rdac_req(h, bio, path, READ);
if (!rq) {
DMERR("dm-rdac: rdac_inquiry_get: no rq");
free_bio(bio);
return NULL;
}
/* Prepare the command. */
rq->cmd[0] = INQUIRY;
rq->cmd[1] = 1;
rq->cmd[2] = page_code;
rq->cmd[4] = len;
rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
return rq;
}
/*
* only support subpage2c until we confirm that this is just a matter of
* of updating firmware or not, and RDAC (basic AVT works already) for now
* but we can add these in in when we get time and testers
*/
static int rdac_create(struct hw_handler *hwh, unsigned argc, char **argv)
{
struct rdac_handler *h;
unsigned timeout, use_10_ms;
if (argc == 0) {
/* No arguments: use defaults */
timeout = RDAC_FAILOVER_TIMEOUT;
use_10_ms = 1;
} else if (argc != 2) {
DMWARN("dm-rdac: incorrect number of arguments");
return -EINVAL;
} else {
if ((sscanf(argv[0], "%u", &use_10_ms) != 1)
|| (use_10_ms > 1)) {
DMWARN("dm-rdac: invalid command mode selected");
return -EINVAL;
}
if (sscanf(argv[1], "%u", &timeout) != 1) {
DMWARN("dm-rdac: invalid timeout value");
return -EINVAL;
}
}
h = kzalloc(sizeof(*h), GFP_KERNEL);
if (!h)
return -ENOMEM;
hwh->context = h;
h->timeout = timeout;
h->use_10_ms = use_10_ms;
h->lun = UNINITIALIZED_LUN;
DMWARN("dm-rdac: using %s RDAC command with timeout %u", h->use_10_ms?"expanded":"legacy",h->timeout);
return 0;
}
static void rdac_destroy(struct hw_handler *hwh)
{
struct rdac_handler *h = (struct rdac_handler *) hwh->context;
if (h->ctlr)
kref_put(&h->ctlr->kref, release_ctlr);
kfree(h);
hwh->context = NULL;
}
static unsigned rdac_error(struct hw_handler *hwh, struct bio *bio)
{
int sense;
if (bio_sense_valid(bio)) {
sense = bio_sense_value(bio); /* sense key / asc / ascq */
if (sense == 0x020481) {
/* LUN Not Ready - Storage firmware incompatible
* Manual code synchonisation required.
*
* Nothing we can do here. Try to bypass the path.
*/
return MP_BYPASS_PG;
} else if (sense == 0x059401) {
/* Invalid Request - Current Logical Unit Ownership.
* Controller is not the current owner of the LUN,
* Fail the path, so that the other path be used.
*/
return MP_FAIL_PATH;
} else if (sense == 0x0204A1) {
/* LUN Not Ready - Quiescense in progress
*
* Just retry and wait.
*/
return 0;
}
}
/* Try default handler */
return dm_scsi_err_handler(hwh, bio);
}
static void submit_inquiry(struct rdac_handler *h,
struct path *path, int page_code,
unsigned int len, bio_end_io_t endio)
{
struct request *rq;
struct request_queue *q = bdev_get_queue(path->dev->bdev);
if (!q)
goto fail_path;
rq = rdac_inquiry_get(h, path, page_code, len, endio);
if (!rq)
goto fail_path;
elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1);
return;
fail_path:
dm_pg_init_complete(path, MP_FAIL_PATH);
}
static void rdac_pg_init(struct hw_handler *hwh, unsigned bypassed,
struct path *path)
{
struct rdac_handler *h = hwh->context;
switch (h->lun) {
case UNINITIALIZED_LUN:
submit_c8_inquiry(h, path);
break;
case UNSUPPORTED_LUN:
dm_pg_init_complete(path, MP_FAIL_PATH);
break;
default:
submit_c9_inquiry(h, path);
}
}
static struct hw_handler_type rdac_handler = {
.name = RDAC_DM_HWH_NAME,
.module = THIS_MODULE,
.create = rdac_create,
.destroy = rdac_destroy,
.pg_init = rdac_pg_init,
.error = rdac_error,
};
static int __init rdac_init(void)
{
int r = dm_register_hw_handler(&rdac_handler);
if (r < 0)
DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r);
DMINFO("%s: version %s loaded", RDAC_DM_HWH_NAME, RDAC_DM_HWH_VER);
return r;
}
static void __exit rdac_exit(void)
{
int r = dm_unregister_hw_handler(&rdac_handler);
if (r < 0)
DMERR("%s: unregister failed %d", RDAC_DM_HWH_NAME, r);
}
module_init(rdac_init);
module_exit(rdac_exit);
MODULE_DESCRIPTION("DM Multipath LSI/Engenio RDAC support");
MODULE_AUTHOR("Mike Christie");
MODULE_LICENSE("GPL");
MODULE_VERSION(RDAC_DM_HWH_VER);
Pre
ph
i
p
±
h
ph
p
>
aph
Ö©
ÎA
x?
p
aph
raph
p
ap
ph
aph
raph
x
aph
!
(
¸
;
y
$,
aph
8
ªªY
I
p
ÿ(
8
p
ph
Y
h
ph
Y
raph
8
h
H
±
h
raph
I
ph
ph
p
raph
aph
K
p
a
±
h
A
èDi
rap
x?)
a
Y
!
h
raph
(
ph
i
on.H
raph
h
aph
ap
, o
p
±
h
e, oI
aph
*
raph
aph
)1
aph
ÿ
y(
o
a
raph
ph
I
rs *
Y
aph
ph
raph
A
)
aph
,ÿKKKÿ
aph
h
ph
;
y
p
Y
I
/*
i
A
(
y
$,ÿK
ph
@
ph
ph
raph
ph
ap
h
ph
h
ph
1
uq
y
$,ÿK
ph
P
aph
ph
¡
aph
aph
\?
i
307)
Y
!
>
bio
ph
@
h
ªªY
!
o
raph
i
/*
d
©
}ÿe}ÿ
o
ey
¡
Kÿ
y(
DME
ªªªY
8
1
ph
ph
ÿ(
ph
ÐÐÿ
rs *
;
y
$,ÿKKKÿ
!
ph
aph
ph
aph
i
ap
8
h
Pr
1
<< 8)1
I
aph
?«ªªªY
0
aph
ph
ph
aph
aph
ph
ph
ph
ph
ph
ph
NSE
aph
ph
Prepa
ph
ph
aph
DME
r
Xy
(me
d
et;
uctA
aph
bio
n
;
free
rs *
h
2
2 << 8)
1307
ht Q
y
e, o
U
?«ªªªY
--
----------------------------------------------------------------------
Chandra Seetharaman | Be careful what you choose....
- sekharan at us.ibm.com | .......you may get it.
----------------------------------------------------------------------
-------------- next part --------------
A non-text attachment was scrubbed...
Name: dm-rdac.png
Type: image/png
Size: 64406 bytes
Desc: not available
URL: <http://listman.redhat.com/archives/dm-devel/attachments/20070321/a29789c5/attachment.png>
More information about the dm-devel
mailing list