[PATCH] sbp2: fix spinlock recursion

sbp2util_mark_command_completed takes a lock which was already taken by
sbp2scsi_complete_all_commands.  This is a regression in Linux 2.6.15.

 Reported by Kristian Harms at
	https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=187394

[ More complete commentary, as response to questions by Andrew: ]

> This changes the call environment for all implementations of
> ->Current_done().  Are they all safe to call under this lock?

Short answer: Yes, trust me.  ;-) Long answer:

The done() callbacks are passed on to sbp2 from the SCSI stack along
with each SCSI command via the queuecommand hook.  The done() callback
is safe to call in atomic context.  So does
Documentation/scsi/scsi_mid_low_api.txt say, and many if not all SCSI
low-level handlers rely on this fact.  So whatever this callback does,
it is "self-contained" and it won't conflict with sbp2's internal ORB
list handling.  In particular, it won't race with the
sbp2_command_orb_lock.

Moreover, sbp2 already calls the done() handler with
sbp2_command_orb_lock taken in sbp2scsi_complete_all_commands().  I
admit this is ultimately no proof of correctness, especially since this
portion of code introduced the spinlock recursion in the first place and
we didn't realize it since this code's submission before 2.6.15 until
now.  (I have learned a lesson from this.)

I stress-tested my patch on x86 uniprocessor with a preemptible SMP
kernel (alas I have no SMP machine yet) and made sure that all code
paths which involve the sbp2_command_orb_lock were gone through multiple
times.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Stefan Richter 2006-04-01 21:11:41 +02:00 committed by Linus Torvalds
parent b043b673dc
commit 24c7cd0630

View File

@ -496,22 +496,17 @@ static struct sbp2_command_info *sbp2util_find_command_for_orb(
/*
* This function finds the sbp2_command for a given outstanding SCpnt.
* Only looks at the inuse list.
* Must be called with scsi_id->sbp2_command_orb_lock held.
*/
static struct sbp2_command_info *sbp2util_find_command_for_SCpnt(struct scsi_id_instance_data *scsi_id, void *SCpnt)
static struct sbp2_command_info *sbp2util_find_command_for_SCpnt(
struct scsi_id_instance_data *scsi_id, void *SCpnt)
{
struct sbp2_command_info *command;
unsigned long flags;
spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
if (!list_empty(&scsi_id->sbp2_command_orb_inuse)) {
list_for_each_entry(command, &scsi_id->sbp2_command_orb_inuse, list) {
if (command->Current_SCpnt == SCpnt) {
spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
if (!list_empty(&scsi_id->sbp2_command_orb_inuse))
list_for_each_entry(command, &scsi_id->sbp2_command_orb_inuse, list)
if (command->Current_SCpnt == SCpnt)
return command;
}
}
}
spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
return NULL;
}
@ -580,17 +575,15 @@ static void sbp2util_free_command_dma(struct sbp2_command_info *command)
/*
* This function moves a command to the completed orb list.
* Must be called with scsi_id->sbp2_command_orb_lock held.
*/
static void sbp2util_mark_command_completed(struct scsi_id_instance_data *scsi_id,
struct sbp2_command_info *command)
static void sbp2util_mark_command_completed(
struct scsi_id_instance_data *scsi_id,
struct sbp2_command_info *command)
{
unsigned long flags;
spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
list_del(&command->list);
sbp2util_free_command_dma(command);
list_add_tail(&command->list, &scsi_id->sbp2_command_orb_completed);
spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
}
/*
@ -2148,7 +2141,9 @@ static int sbp2_handle_status_write(struct hpsb_host *host, int nodeid, int dest
* Matched status with command, now grab scsi command pointers and check status
*/
SCpnt = command->Current_SCpnt;
spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
sbp2util_mark_command_completed(scsi_id, command);
spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
if (SCpnt) {
@ -2484,6 +2479,7 @@ static int sbp2scsi_abort(struct scsi_cmnd *SCpnt)
(struct scsi_id_instance_data *)SCpnt->device->host->hostdata[0];
struct sbp2scsi_host_info *hi = scsi_id->hi;
struct sbp2_command_info *command;
unsigned long flags;
SBP2_ERR("aborting sbp2 command");
scsi_print_command(SCpnt);
@ -2494,6 +2490,7 @@ static int sbp2scsi_abort(struct scsi_cmnd *SCpnt)
* Right now, just return any matching command structures
* to the free pool.
*/
spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
command = sbp2util_find_command_for_SCpnt(scsi_id, SCpnt);
if (command) {
SBP2_DEBUG("Found command to abort");
@ -2511,6 +2508,7 @@ static int sbp2scsi_abort(struct scsi_cmnd *SCpnt)
command->Current_done(command->Current_SCpnt);
}
}
spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
/*
* Initiate a fetch agent reset.