From 5f0c9c48e7265039c3f945aaf44a1c6ae8adbd01 Mon Sep 17 00:00:00 2001 From: Ed Cashin <ecashin@coraid.com> Date: Mon, 17 Dec 2012 16:03:49 -0800 Subject: [PATCH] aoe: use high-resolution RTTs with fallback to low-res These changes improve the accuracy of the decision about whether it's time to retransmit an AoE command by using the microsecond-resolution gettimeofday instead of jiffies. Because the system time can jump suddenly, the decision reverts to using jiffies if the high-resolution time difference is relatively large. Otherwise the AoE targets could be considered failed inappropriately. Signed-off-by: Ed Cashin <ecashin@coraid.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/block/aoe/aoe.h | 9 +++--- drivers/block/aoe/aoecmd.c | 57 +++++++++++++++++++++++++++++++++----- 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 9e884acd75fc..9fb68fc3b280 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -88,8 +88,7 @@ enum { TIMERTICK = HZ / 10, RTTSCALE = 8, RTTDSCALE = 3, - MAXTIMER = HZ << 1, - RTTAVG_INIT = HZ / 4 << RTTSCALE, + RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE, RTTDEV_INIT = RTTAVG_INIT / 4, }; @@ -106,6 +105,8 @@ struct buf { struct frame { struct list_head head; u32 tag; + struct timeval sent; /* high-res time packet was sent */ + u32 sent_jiffs; /* low-res jiffies-based sent time */ ulong waited; struct aoetgt *t; /* parent target I belong to */ sector_t lba; @@ -143,11 +144,11 @@ struct aoedev { struct aoedev *next; ulong sysminor; ulong aoemajor; + u32 rttavg; /* scaled AoE round trip time average */ + u32 rttdev; /* scaled round trip time mean deviation */ u16 aoeminor; u16 flags; u16 nopen; /* (bd_openers isn't available without sleeping) */ - u16 rttavg; /* scaled AoE round trip time average */ - u16 rttdev; /* scaled round trip time mean deviation */ u16 fw_ver; /* version of blade's firmware */ u16 lasttag; /* last tag sent */ u16 useme; diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 9aefbe3957ca..a99220ad6262 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -387,6 +387,8 @@ aoecmd_ata_rw(struct aoedev *d) skb->dev = t->ifp->nd; skb = skb_clone(skb, GFP_ATOMIC); if (skb) { + do_gettimeofday(&f->sent); + f->sent_jiffs = (u32) jiffies; __skb_queue_head_init(&queue); __skb_queue_tail(&queue, skb); aoenet_xmit(&queue); @@ -475,11 +477,45 @@ resend(struct aoedev *d, struct frame *f) skb = skb_clone(skb, GFP_ATOMIC); if (skb == NULL) return; + do_gettimeofday(&f->sent); + f->sent_jiffs = (u32) jiffies; __skb_queue_head_init(&queue); __skb_queue_tail(&queue, skb); aoenet_xmit(&queue); } +static int +tsince_hr(struct frame *f) +{ + struct timeval now; + int n; + + do_gettimeofday(&now); + n = now.tv_usec - f->sent.tv_usec; + n += (now.tv_sec - f->sent.tv_sec) * USEC_PER_SEC; + + if (n < 0) + n = -n; + + /* For relatively long periods, use jiffies to avoid + * discrepancies caused by updates to the system time. + * + * On system with HZ of 1000, 32-bits is over 49 days + * worth of jiffies, or over 71 minutes worth of usecs. + * + * Jiffies overflow is handled by subtraction of unsigned ints: + * (gdb) print (unsigned) 2 - (unsigned) 0xfffffffe + * $3 = 4 + * (gdb) + */ + if (n > USEC_PER_SEC / 4) { + n = ((u32) jiffies) - f->sent_jiffs; + n *= USEC_PER_SEC / HZ; + } + + return n; +} + static int tsince(u32 tag) { @@ -489,7 +525,7 @@ tsince(u32 tag) n -= tag & 0xffff; if (n < 0) n += 1<<16; - return n; + return jiffies_to_usecs(n + 1); } static struct aoeif * @@ -552,6 +588,7 @@ sthtith(struct aoedev *d) nf->bv = f->bv; nf->bv_off = f->bv_off; nf->waited = 0; + nf->sent_jiffs = f->sent_jiffs; f->skb = skb; aoe_freetframe(f); ht->nout--; @@ -621,7 +658,7 @@ rexmit_timer(ulong vp) head = &d->factive[i]; list_for_each_safe(pos, nx, head) { f = list_entry(pos, struct frame, head); - if (tsince(f->tag) < timeout) + if (tsince_hr(f) < timeout) break; /* end of expired frames */ /* move to flist for later processing */ list_move_tail(pos, &flist); @@ -632,8 +669,8 @@ rexmit_timer(ulong vp) while (!list_empty(&flist)) { pos = flist.next; f = list_entry(pos, struct frame, head); - n = f->waited += tsince(f->tag); - n /= HZ; + n = f->waited += tsince_hr(f); + n /= USEC_PER_SEC; if (n > aoe_deadsecs) { /* Waited too long. Device failure. * Hang all frames on first hash bucket for downdev @@ -1193,12 +1230,12 @@ aoecmd_ata_rsp(struct sk_buff *skb) n = be32_to_cpu(get_unaligned(&h->tag)); f = getframe(d, n); if (f) { - calc_rttavg(d, f->t, tsince(n)); + calc_rttavg(d, f->t, tsince_hr(f)); f->t->nout--; } else { f = getframe_deferred(d, n); if (f) { - calc_rttavg(d, NULL, tsince(n)); + calc_rttavg(d, NULL, tsince_hr(f)); } else { calc_rttavg(d, NULL, tsince(n)); spin_unlock_irqrestore(&d->lock, flags); @@ -1276,7 +1313,13 @@ aoecmd_ata_id(struct aoedev *d) d->rttdev = RTTDEV_INIT; d->timer.function = rexmit_timer; - return skb_clone(skb, GFP_ATOMIC); + skb = skb_clone(skb, GFP_ATOMIC); + if (skb) { + do_gettimeofday(&f->sent); + f->sent_jiffs = (u32) jiffies; + } + + return skb; } static struct aoetgt *