From 840fe6359c1db978d01fceb8a023f5f6efdf7f1c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 5 Aug 2010 22:20:09 +0200 Subject: [PATCH 1/6] ieee1394: Adjust confusing if indentation Indent the branch of an if. Signed-off-by: Julia Lawall Signed-off-by: Stefan Richter --- drivers/ieee1394/ohci1394.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c index d0dc1db80b29..50815022cff1 100644 --- a/drivers/ieee1394/ohci1394.c +++ b/drivers/ieee1394/ohci1394.c @@ -1106,7 +1106,7 @@ static int ohci_iso_recv_init(struct hpsb_iso *iso) if (recv->block_irq_interval * 4 > iso->buf_packets) recv->block_irq_interval = iso->buf_packets / 4; if (recv->block_irq_interval < 1) - recv->block_irq_interval = 1; + recv->block_irq_interval = 1; /* choose a buffer stride */ /* must be a power of 2, and <= PAGE_SIZE */ From 6c74340bce253ea95c9ee801b3c411a333937edf Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Mon, 16 Aug 2010 21:58:03 +0200 Subject: [PATCH 2/6] firewire: sbp2: fix memory leak in sbp2_cancel_orbs or at send error When an ORB was canceled (Command ORB i.e. SCSI request timed out, or Management ORB timed out), or there was a send error in the initial transaction, we missed to drop one of the ORB's references and thus leaked memory. Background: In total, we hold 3 references to each Operation Request Block: - 1 during sbp2_scsi_queuecommand() or sbp2_send_management_orb() respectively, - 1 for the duration of the write transaction to the ORB_Pointer or Management_Agent register of the target, - 1 for as long as the ORB stays within the lu->orb_list, until the ORB is unlinked from the list and the orb->callback was executed. The latter one of these 3 references is finished - normally by sbp2_status_write() when the target wrote status for a pending ORB, - or by sbp2_cancel_orbs() in case of an ORB time-out, - or by complete_transaction() in case of a send error. Of them, the latter two lacked the kref_put. Add the missing kref_put()s. Add comments to the gets and puts of references for transaction callbacks and ORB callbacks so that it is easier to see what is supposed to happen. Signed-off-by: Stefan Richter --- drivers/firewire/sbp2.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 9f76171717e5..e6cbe491f7ee 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -450,7 +450,7 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request, if (&orb->link != &lu->orb_list) { orb->callback(orb, &status); - kref_put(&orb->kref, free_orb); + kref_put(&orb->kref, free_orb); /* orb callback reference */ } else { fw_error("status write for unknown orb\n"); } @@ -480,12 +480,14 @@ static void complete_transaction(struct fw_card *card, int rcode, if (orb->rcode != RCODE_COMPLETE) { list_del(&orb->link); spin_unlock_irqrestore(&card->lock, flags); + orb->callback(orb, NULL); + kref_put(&orb->kref, free_orb); /* orb callback reference */ } else { spin_unlock_irqrestore(&card->lock, flags); } - kref_put(&orb->kref, free_orb); + kref_put(&orb->kref, free_orb); /* transaction callback reference */ } static void sbp2_send_orb(struct sbp2_orb *orb, struct sbp2_logical_unit *lu, @@ -501,9 +503,8 @@ static void sbp2_send_orb(struct sbp2_orb *orb, struct sbp2_logical_unit *lu, list_add_tail(&orb->link, &lu->orb_list); spin_unlock_irqrestore(&device->card->lock, flags); - /* Take a ref for the orb list and for the transaction callback. */ - kref_get(&orb->kref); - kref_get(&orb->kref); + kref_get(&orb->kref); /* transaction callback reference */ + kref_get(&orb->kref); /* orb callback reference */ fw_send_request(device->card, &orb->t, TCODE_WRITE_BLOCK_REQUEST, node_id, generation, device->max_speed, offset, @@ -530,6 +531,7 @@ static int sbp2_cancel_orbs(struct sbp2_logical_unit *lu) orb->rcode = RCODE_CANCELLED; orb->callback(orb, NULL); + kref_put(&orb->kref, free_orb); /* orb callback reference */ } return retval; From a481e97d3cdc40b9d58271675bd4f0abb79d4872 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Mon, 16 Aug 2010 22:13:34 +0200 Subject: [PATCH 3/6] firewire: sbp2: fix stall with "Unsolicited response" Fix I/O stalls with some 4-bay RAID enclosures which are based on OXUF936QSE: - Onnto dataTale RSM4QO, old firmware (not anymore with current firmware), - inXtron Hydra Super-S LCM, old as well as current firmware when used in RAID-5 mode, perhaps also in other RAID modes. The stalls happen during heavy or moderate disk traffic in periods that are a multiple of 5 minutes, roughly twice per hour. They are caused by the target responding too late to an ORB_Pointer register write: The target responds after Split_Timeout, hence firewire-core cancels the transaction, and firewire-sbp2 fails the SCSI request. The SCSI core retries the request, that fails again (and again), hence SCSI core calls firewire-sbp2's abort handler (and even the Management_Agent register write in the abort handler has the transaction timeout problem). During all that, the process which issued the I/O is stalled in I/O wait state. Meanwhile, the target actually acts on the first failed SCSI request: It responds to the ORB_Pointer write later (seen in the kernel log as "firewire_core: Unsolicited response") and also finishes the SCSI request with proper status (seen in the kernel log as "firewire_sbp2: status write for unknown orb"). So let's just ignore RCODE_CANCELLED in the transaction callback and wait for the target to complete the ORB nevertheless. This requires a small modification is sbp2_cancel_orbs(); it now needs to call orb->callback() regardless whether fw_cancel_transaction() found the transaction unfinished or finished. A different solution is to increase Split_Timeout on the local node. (Tested: 2000ms timeout; maybe 1000ms or something like that works too. 200ms is insufficient. Standard is 100ms.) However, I rather not do this because any software on any node could change the Split_Timeout to something unsuitable. Or such a large Split_Timeout may be undesirable for other purposes. Signed-off-by: Stefan Richter --- drivers/firewire/sbp2.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index e6cbe491f7ee..bfae4b309791 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -472,12 +472,18 @@ static void complete_transaction(struct fw_card *card, int rcode, * So this callback only sets the rcode if it hasn't already * been set and only does the cleanup if the transaction * failed and we didn't already get a status write. + * + * Here we treat RCODE_CANCELLED like RCODE_COMPLETE because some + * OXUF936QSE firmwares occasionally respond after Split_Timeout and + * complete the ORB just fine. Note, we also get RCODE_CANCELLED + * from sbp2_cancel_orbs() if fw_cancel_transaction() == 0. */ spin_lock_irqsave(&card->lock, flags); if (orb->rcode == -1) orb->rcode = rcode; - if (orb->rcode != RCODE_COMPLETE) { + + if (orb->rcode != RCODE_COMPLETE && orb->rcode != RCODE_CANCELLED) { list_del(&orb->link); spin_unlock_irqrestore(&card->lock, flags); @@ -526,8 +532,7 @@ static int sbp2_cancel_orbs(struct sbp2_logical_unit *lu) list_for_each_entry_safe(orb, next, &list, link) { retval = 0; - if (fw_cancel_transaction(device->card, &orb->t) == 0) - continue; + fw_cancel_transaction(device->card, &orb->t); orb->rcode = RCODE_CANCELLED; orb->callback(orb, NULL); From 1bf145fed572583d4cb7c1784689a0b42c997ba6 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Mon, 16 Aug 2010 23:45:54 +0200 Subject: [PATCH 4/6] firewire: net: fix unicast reception RCODE in failure paths The incoming request hander fwnet_receive_packet() expects subsequent datagram handling code to return non-zero on errors. However, almost none of the failure paths did so. Fix them all. (This error reporting is used to send and RCODE_CONFLICT_ERROR to the sender node in such failure cases. Two modes of failure exist: Out of memory, or firewire-net is unaware of any peer node to which a fragment or an ARP packet belongs. However, it is unclear whether a sender can actually make use of such information. A Linux peer apparently can't. Maybe it should all be simplified to void functions.) Reported-by: Julia Lawall Signed-off-by: Stefan Richter --- drivers/firewire/net.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index da17d409a244..33f8421c71cc 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -579,7 +579,7 @@ static int fwnet_finish_incoming_packet(struct net_device *net, if (!peer) { fw_notify("No peer for ARP packet from %016llx\n", (unsigned long long)peer_guid); - goto failed_proto; + goto no_peer; } /* @@ -656,7 +656,7 @@ static int fwnet_finish_incoming_packet(struct net_device *net, return 0; - failed_proto: + no_peer: net->stats.rx_errors++; net->stats.rx_dropped++; @@ -664,7 +664,7 @@ static int fwnet_finish_incoming_packet(struct net_device *net, if (netif_queue_stopped(net)) netif_wake_queue(net); - return 0; + return -ENOENT; } static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len, @@ -701,7 +701,7 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len, fw_error("out of memory\n"); net->stats.rx_dropped++; - return -1; + return -ENOMEM; } skb_reserve(skb, (net->hard_header_len + 15) & ~15); memcpy(skb_put(skb, len), buf, len); @@ -726,8 +726,10 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len, spin_lock_irqsave(&dev->lock, flags); peer = fwnet_peer_find_by_node_id(dev, source_node_id, generation); - if (!peer) - goto bad_proto; + if (!peer) { + retval = -ENOENT; + goto fail; + } pd = fwnet_pd_find(peer, datagram_label); if (pd == NULL) { @@ -741,7 +743,7 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len, dg_size, buf, fg_off, len); if (pd == NULL) { retval = -ENOMEM; - goto bad_proto; + goto fail; } peer->pdg_size++; } else { @@ -755,9 +757,9 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len, pd = fwnet_pd_new(net, peer, datagram_label, dg_size, buf, fg_off, len); if (pd == NULL) { - retval = -ENOMEM; peer->pdg_size--; - goto bad_proto; + retval = -ENOMEM; + goto fail; } } else { if (!fwnet_pd_update(peer, pd, buf, fg_off, len)) { @@ -768,7 +770,8 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len, */ fwnet_pd_delete(pd); peer->pdg_size--; - goto bad_proto; + retval = -ENOMEM; + goto fail; } } } /* new datagram or add to existing one */ @@ -794,14 +797,13 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len, spin_unlock_irqrestore(&dev->lock, flags); return 0; - - bad_proto: + fail: spin_unlock_irqrestore(&dev->lock, flags); if (netif_queue_stopped(net)) netif_wake_queue(net); - return 0; + return retval; } static void fwnet_receive_packet(struct fw_card *card, struct fw_request *r, From 2222bcb76790f4f61f39ec1514946a7593b07e02 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Wed, 18 Aug 2010 15:05:02 +0200 Subject: [PATCH 5/6] firewire: core: do not use del_timer_sync() in interrupt context Because we might be in interrupt context, replace del_timer_sync() with del_timer(). If the timer is already running, we know that it will clean up the transaction, so we do not need to do any further processing in the normal transaction handler. Many thanks to Yong Zhang for diagnosing this. Reported-by: Stefan Richter Signed-off-by: Clemens Ladisch Signed-off-by: Stefan Richter --- drivers/firewire/core-transaction.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index ca7ca56661e0..b42a0bde8494 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -81,6 +81,10 @@ static int close_transaction(struct fw_transaction *transaction, spin_lock_irqsave(&card->lock, flags); list_for_each_entry(t, &card->transaction_list, link) { if (t == transaction) { + if (!del_timer(&t->split_timeout_timer)) { + spin_unlock_irqrestore(&card->lock, flags); + goto timed_out; + } list_del_init(&t->link); card->tlabel_mask &= ~(1ULL << t->tlabel); break; @@ -89,11 +93,11 @@ static int close_transaction(struct fw_transaction *transaction, spin_unlock_irqrestore(&card->lock, flags); if (&t->link != &card->transaction_list) { - del_timer_sync(&t->split_timeout_timer); t->callback(card, rcode, NULL, 0, t->callback_data); return 0; } + timed_out: return -ENOENT; } @@ -921,6 +925,10 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p) spin_lock_irqsave(&card->lock, flags); list_for_each_entry(t, &card->transaction_list, link) { if (t->node_id == source && t->tlabel == tlabel) { + if (!del_timer(&t->split_timeout_timer)) { + spin_unlock_irqrestore(&card->lock, flags); + goto timed_out; + } list_del_init(&t->link); card->tlabel_mask &= ~(1ULL << t->tlabel); break; @@ -929,6 +937,7 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p) spin_unlock_irqrestore(&card->lock, flags); if (&t->link == &card->transaction_list) { + timed_out: fw_notify("Unsolicited response (source %x, tlabel %x)\n", source, tlabel); return; @@ -963,8 +972,6 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p) break; } - del_timer_sync(&t->split_timeout_timer); - /* * The response handler may be executed while the request handler * is still pending. Cancel the request handler. From a4dc090b6cb445257d2a8e44f85395ced6d1ed3e Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 28 Aug 2010 14:21:26 +0200 Subject: [PATCH 6/6] firewire: ohci: work around VIA and NEC PHY packet reception bug VIA VT6306, VIA VT6308, and NEC OrangeLink controllers do not write packet event codes for received PHY packets (or perhaps write evt_no_status, hard to tell). Work around it by overwriting the packet's ACK by ack_complete, so that upper layers that listen to PHY packet reception get to see these packets. (Also tested: TI TSB82AA2, TI TSB43AB22/A, TI XIO2213A, Agere FW643, JMicron JMB381 --- these do not exhibit this bug.) Clemens proposed a quirks flag for that, IOW whitelist known misbehaving controllers for this workaround. Though to me it seems harmless enough to enable for all controllers. The log_ar_at_event() debug log will continue to show the original status from the DMA unit. Reported-by: Clemens Ladisch (VT6308) Signed-off-by: Stefan Richter --- drivers/firewire/ohci.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 7f03540cabe8..be29b0bb2471 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -694,7 +694,15 @@ static __le32 *handle_ar_packet(struct ar_context *ctx, __le32 *buffer) log_ar_at_event('R', p.speed, p.header, evt); /* - * The OHCI bus reset handler synthesizes a phy packet with + * Several controllers, notably from NEC and VIA, forget to + * write ack_complete status at PHY packet reception. + */ + if (evt == OHCI1394_evt_no_status && + (p.header[0] & 0xff) == (OHCI1394_phy_tcode << 4)) + p.ack = ACK_COMPLETE; + + /* + * The OHCI bus reset handler synthesizes a PHY packet with * the new generation number when a bus reset happens (see * section 8.4.2.3). This helps us determine when a request * was received and make sure we send the response in the same