linux/net/tipc/monitor.c
Parthasarathy Bhuvaragan 7b3f522964 tipc: make cluster size threshold for monitoring configurable
In this commit, we introduce support to configure the minimum
threshold to activate the new link monitoring algorithm.

Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-07-26 14:26:42 -07:00

664 lines
18 KiB
C

/*
* net/tipc/monitor.c
*
* Copyright (c) 2016, Ericsson AB
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "core.h"
#include "addr.h"
#include "monitor.h"
#define MAX_MON_DOMAIN 64
#define MON_TIMEOUT 120000
#define MAX_PEER_DOWN_EVENTS 4
/* struct tipc_mon_domain: domain record to be transferred between peers
* @len: actual size of domain record
* @gen: current generation of sender's domain
* @ack_gen: most recent generation of self's domain acked by peer
* @member_cnt: number of domain member nodes described in this record
* @up_map: bit map indicating which of the members the sender considers up
* @members: identity of the domain members
*/
struct tipc_mon_domain {
u16 len;
u16 gen;
u16 ack_gen;
u16 member_cnt;
u64 up_map;
u32 members[MAX_MON_DOMAIN];
};
/* struct tipc_peer: state of a peer node and its domain
* @addr: tipc node identity of peer
* @head_map: shows which other nodes currently consider peer 'up'
* @domain: most recent domain record from peer
* @hash: position in hashed lookup list
* @list: position in linked list, in circular ascending order by 'addr'
* @applied: number of reported domain members applied on this monitor list
* @is_up: peer is up as seen from this node
* @is_head: peer is assigned domain head as seen from this node
* @is_local: peer is in local domain and should be continuously monitored
* @down_cnt: - numbers of other peers which have reported this on lost
*/
struct tipc_peer {
u32 addr;
struct tipc_mon_domain *domain;
struct hlist_node hash;
struct list_head list;
u8 applied;
u8 down_cnt;
bool is_up;
bool is_head;
bool is_local;
};
struct tipc_monitor {
struct hlist_head peers[NODE_HTABLE_SIZE];
int peer_cnt;
struct tipc_peer *self;
rwlock_t lock;
struct tipc_mon_domain cache;
u16 list_gen;
u16 dom_gen;
struct net *net;
struct timer_list timer;
unsigned long timer_intv;
};
static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
{
return tipc_net(net)->monitors[bearer_id];
}
const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
/* dom_rec_len(): actual length of domain record for transport
*/
static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
{
return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32));
}
/* dom_size() : calculate size of own domain based on number of peers
*/
static int dom_size(int peers)
{
int i = 0;
while ((i * i) < peers)
i++;
return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
}
static void map_set(u64 *up_map, int i, unsigned int v)
{
*up_map &= ~(1ULL << i);
*up_map |= ((u64)v << i);
}
static int map_get(u64 up_map, int i)
{
return (up_map & (1 << i)) >> i;
}
static struct tipc_peer *peer_prev(struct tipc_peer *peer)
{
return list_last_entry(&peer->list, struct tipc_peer, list);
}
static struct tipc_peer *peer_nxt(struct tipc_peer *peer)
{
return list_first_entry(&peer->list, struct tipc_peer, list);
}
static struct tipc_peer *peer_head(struct tipc_peer *peer)
{
while (!peer->is_head)
peer = peer_prev(peer);
return peer;
}
static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr)
{
struct tipc_peer *peer;
unsigned int thash = tipc_hashfn(addr);
hlist_for_each_entry(peer, &mon->peers[thash], hash) {
if (peer->addr == addr)
return peer;
}
return NULL;
}
static struct tipc_peer *get_self(struct net *net, int bearer_id)
{
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
return mon->self;
}
static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon)
{
struct tipc_net *tn = tipc_net(net);
return mon->peer_cnt > tn->mon_threshold;
}
/* mon_identify_lost_members() : - identify amd mark potentially lost members
*/
static void mon_identify_lost_members(struct tipc_peer *peer,
struct tipc_mon_domain *dom_bef,
int applied_bef)
{
struct tipc_peer *member = peer;
struct tipc_mon_domain *dom_aft = peer->domain;
int applied_aft = peer->applied;
int i;
for (i = 0; i < applied_bef; i++) {
member = peer_nxt(member);
/* Do nothing if self or peer already see member as down */
if (!member->is_up || !map_get(dom_bef->up_map, i))
continue;
/* Loss of local node must be detected by active probing */
if (member->is_local)
continue;
/* Start probing if member was removed from applied domain */
if (!applied_aft || (applied_aft < i)) {
member->down_cnt = 1;
continue;
}
/* Member loss is confirmed if it is still in applied domain */
if (!map_get(dom_aft->up_map, i))
member->down_cnt++;
}
}
/* mon_apply_domain() : match a peer's domain record against monitor list
*/
static void mon_apply_domain(struct tipc_monitor *mon,
struct tipc_peer *peer)
{
struct tipc_mon_domain *dom = peer->domain;
struct tipc_peer *member;
u32 addr;
int i;
if (!dom || !peer->is_up)
return;
/* Scan across domain members and match against monitor list */
peer->applied = 0;
member = peer_nxt(peer);
for (i = 0; i < dom->member_cnt; i++) {
addr = dom->members[i];
if (addr != member->addr)
return;
peer->applied++;
member = peer_nxt(member);
}
}
/* mon_update_local_domain() : update after peer addition/removal/up/down
*/
static void mon_update_local_domain(struct tipc_monitor *mon)
{
struct tipc_peer *self = mon->self;
struct tipc_mon_domain *cache = &mon->cache;
struct tipc_mon_domain *dom = self->domain;
struct tipc_peer *peer = self;
u64 prev_up_map = dom->up_map;
u16 member_cnt, i;
bool diff;
/* Update local domain size based on current size of cluster */
member_cnt = dom_size(mon->peer_cnt) - 1;
self->applied = member_cnt;
/* Update native and cached outgoing local domain records */
dom->len = dom_rec_len(dom, member_cnt);
diff = dom->member_cnt != member_cnt;
dom->member_cnt = member_cnt;
for (i = 0; i < member_cnt; i++) {
peer = peer_nxt(peer);
diff |= dom->members[i] != peer->addr;
dom->members[i] = peer->addr;
map_set(&dom->up_map, i, peer->is_up);
cache->members[i] = htonl(peer->addr);
}
diff |= dom->up_map != prev_up_map;
if (!diff)
return;
dom->gen = ++mon->dom_gen;
cache->len = htons(dom->len);
cache->gen = htons(dom->gen);
cache->member_cnt = htons(member_cnt);
cache->up_map = cpu_to_be64(dom->up_map);
mon_apply_domain(mon, self);
}
/* mon_update_neighbors() : update preceding neighbors of added/removed peer
*/
static void mon_update_neighbors(struct tipc_monitor *mon,
struct tipc_peer *peer)
{
int dz, i;
dz = dom_size(mon->peer_cnt);
for (i = 0; i < dz; i++) {
mon_apply_domain(mon, peer);
peer = peer_prev(peer);
}
}
/* mon_assign_roles() : reassign peer roles after a network change
* The monitor list is consistent at this stage; i.e., each peer is monitoring
* a set of domain members as matched between domain record and the monitor list
*/
static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head)
{
struct tipc_peer *peer = peer_nxt(head);
struct tipc_peer *self = mon->self;
int i = 0;
for (; peer != self; peer = peer_nxt(peer)) {
peer->is_local = false;
/* Update domain member */
if (i++ < head->applied) {
peer->is_head = false;
if (head == self)
peer->is_local = true;
continue;
}
/* Assign next domain head */
if (!peer->is_up)
continue;
if (peer->is_head)
break;
head = peer;
head->is_head = true;
i = 0;
}
mon->list_gen++;
}
void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
{
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
struct tipc_peer *self = get_self(net, bearer_id);
struct tipc_peer *peer, *prev, *head;
write_lock_bh(&mon->lock);
peer = get_peer(mon, addr);
if (!peer)
goto exit;
prev = peer_prev(peer);
list_del(&peer->list);
hlist_del(&peer->hash);
kfree(peer->domain);
kfree(peer);
mon->peer_cnt--;
head = peer_head(prev);
if (head == self)
mon_update_local_domain(mon);
mon_update_neighbors(mon, prev);
/* Revert to full-mesh monitoring if we reach threshold */
if (!tipc_mon_is_active(net, mon)) {
list_for_each_entry(peer, &self->list, list) {
kfree(peer->domain);
peer->domain = NULL;
peer->applied = 0;
}
}
mon_assign_roles(mon, head);
exit:
write_unlock_bh(&mon->lock);
}
static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr,
struct tipc_peer **peer)
{
struct tipc_peer *self = mon->self;
struct tipc_peer *cur, *prev, *p;
p = kzalloc(sizeof(*p), GFP_ATOMIC);
*peer = p;
if (!p)
return false;
p->addr = addr;
/* Add new peer to lookup list */
INIT_LIST_HEAD(&p->list);
hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]);
/* Sort new peer into iterator list, in ascending circular order */
prev = self;
list_for_each_entry(cur, &self->list, list) {
if ((addr > prev->addr) && (addr < cur->addr))
break;
if (((addr < cur->addr) || (addr > prev->addr)) &&
(prev->addr > cur->addr))
break;
prev = cur;
}
list_add_tail(&p->list, &cur->list);
mon->peer_cnt++;
mon_update_neighbors(mon, p);
return true;
}
void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id)
{
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
struct tipc_peer *self = get_self(net, bearer_id);
struct tipc_peer *peer, *head;
write_lock_bh(&mon->lock);
peer = get_peer(mon, addr);
if (!peer && !tipc_mon_add_peer(mon, addr, &peer))
goto exit;
peer->is_up = true;
head = peer_head(peer);
if (head == self)
mon_update_local_domain(mon);
mon_assign_roles(mon, head);
exit:
write_unlock_bh(&mon->lock);
}
void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
{
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
struct tipc_peer *self = get_self(net, bearer_id);
struct tipc_peer *peer, *head;
struct tipc_mon_domain *dom;
int applied;
write_lock_bh(&mon->lock);
peer = get_peer(mon, addr);
if (!peer) {
pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id);
goto exit;
}
applied = peer->applied;
peer->applied = 0;
dom = peer->domain;
peer->domain = NULL;
if (peer->is_head)
mon_identify_lost_members(peer, dom, applied);
kfree(dom);
peer->is_up = false;
peer->is_head = false;
peer->is_local = false;
peer->down_cnt = 0;
head = peer_head(peer);
if (head == self)
mon_update_local_domain(mon);
mon_assign_roles(mon, head);
exit:
write_unlock_bh(&mon->lock);
}
/* tipc_mon_rcv - process monitor domain event message
*/
void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
struct tipc_mon_state *state, int bearer_id)
{
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
struct tipc_mon_domain *arrv_dom = data;
struct tipc_mon_domain dom_bef;
struct tipc_mon_domain *dom;
struct tipc_peer *peer;
u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
u16 new_gen = ntohs(arrv_dom->gen);
u16 acked_gen = ntohs(arrv_dom->ack_gen);
bool probing = state->probing;
int i, applied_bef;
state->probing = false;
if (!dlen)
return;
/* Sanity check received domain record */
if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) {
pr_warn_ratelimited("Received illegal domain record\n");
return;
}
/* Synch generation numbers with peer if link just came up */
if (!state->synched) {
state->peer_gen = new_gen - 1;
state->acked_gen = acked_gen;
state->synched = true;
}
if (more(acked_gen, state->acked_gen))
state->acked_gen = acked_gen;
/* Drop duplicate unless we are waiting for a probe response */
if (!more(new_gen, state->peer_gen) && !probing)
return;
write_lock_bh(&mon->lock);
peer = get_peer(mon, addr);
if (!peer || !peer->is_up)
goto exit;
/* Peer is confirmed, stop any ongoing probing */
peer->down_cnt = 0;
/* Task is done for duplicate record */
if (!more(new_gen, state->peer_gen))
goto exit;
state->peer_gen = new_gen;
/* Cache current domain record for later use */
dom_bef.member_cnt = 0;
dom = peer->domain;
if (dom)
memcpy(&dom_bef, dom, dom->len);
/* Transform and store received domain record */
if (!dom || (dom->len < new_dlen)) {
kfree(dom);
dom = kmalloc(new_dlen, GFP_ATOMIC);
peer->domain = dom;
if (!dom)
goto exit;
}
dom->len = new_dlen;
dom->gen = new_gen;
dom->member_cnt = new_member_cnt;
dom->up_map = be64_to_cpu(arrv_dom->up_map);
for (i = 0; i < new_member_cnt; i++)
dom->members[i] = ntohl(arrv_dom->members[i]);
/* Update peers affected by this domain record */
applied_bef = peer->applied;
mon_apply_domain(mon, peer);
mon_identify_lost_members(peer, &dom_bef, applied_bef);
mon_assign_roles(mon, peer_head(peer));
exit:
write_unlock_bh(&mon->lock);
}
void tipc_mon_prep(struct net *net, void *data, int *dlen,
struct tipc_mon_state *state, int bearer_id)
{
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
struct tipc_mon_domain *dom = data;
u16 gen = mon->dom_gen;
u16 len;
if (!tipc_mon_is_active(net, mon))
return;
/* Send only a dummy record with ack if peer has acked our last sent */
if (likely(state->acked_gen == gen)) {
len = dom_rec_len(dom, 0);
*dlen = len;
dom->len = htons(len);
dom->gen = htons(gen);
dom->ack_gen = htons(state->peer_gen);
dom->member_cnt = 0;
return;
}
/* Send the full record */
read_lock_bh(&mon->lock);
len = ntohs(mon->cache.len);
*dlen = len;
memcpy(data, &mon->cache, len);
read_unlock_bh(&mon->lock);
dom->ack_gen = htons(state->peer_gen);
}
void tipc_mon_get_state(struct net *net, u32 addr,
struct tipc_mon_state *state,
int bearer_id)
{
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
struct tipc_peer *peer;
/* Used cached state if table has not changed */
if (!state->probing &&
(state->list_gen == mon->list_gen) &&
(state->acked_gen == mon->dom_gen))
return;
read_lock_bh(&mon->lock);
peer = get_peer(mon, addr);
if (peer) {
state->probing = state->acked_gen != mon->dom_gen;
state->probing |= peer->down_cnt;
state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS;
state->monitoring = peer->is_local;
state->monitoring |= peer->is_head;
state->list_gen = mon->list_gen;
}
read_unlock_bh(&mon->lock);
}
static void mon_timeout(unsigned long m)
{
struct tipc_monitor *mon = (void *)m;
struct tipc_peer *self;
int best_member_cnt = dom_size(mon->peer_cnt) - 1;
write_lock_bh(&mon->lock);
self = mon->self;
if (self && (best_member_cnt != self->applied)) {
mon_update_local_domain(mon);
mon_assign_roles(mon, self);
}
write_unlock_bh(&mon->lock);
mod_timer(&mon->timer, jiffies + mon->timer_intv);
}
int tipc_mon_create(struct net *net, int bearer_id)
{
struct tipc_net *tn = tipc_net(net);
struct tipc_monitor *mon;
struct tipc_peer *self;
struct tipc_mon_domain *dom;
if (tn->monitors[bearer_id])
return 0;
mon = kzalloc(sizeof(*mon), GFP_ATOMIC);
self = kzalloc(sizeof(*self), GFP_ATOMIC);
dom = kzalloc(sizeof(*dom), GFP_ATOMIC);
if (!mon || !self || !dom) {
kfree(mon);
kfree(self);
kfree(dom);
return -ENOMEM;
}
tn->monitors[bearer_id] = mon;
rwlock_init(&mon->lock);
mon->net = net;
mon->peer_cnt = 1;
mon->self = self;
self->domain = dom;
self->addr = tipc_own_addr(net);
self->is_up = true;
self->is_head = true;
INIT_LIST_HEAD(&self->list);
setup_timer(&mon->timer, mon_timeout, (unsigned long)mon);
mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
mod_timer(&mon->timer, jiffies + mon->timer_intv);
return 0;
}
void tipc_mon_delete(struct net *net, int bearer_id)
{
struct tipc_net *tn = tipc_net(net);
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
struct tipc_peer *self = get_self(net, bearer_id);
struct tipc_peer *peer, *tmp;
write_lock_bh(&mon->lock);
tn->monitors[bearer_id] = NULL;
list_for_each_entry_safe(peer, tmp, &self->list, list) {
list_del(&peer->list);
hlist_del(&peer->hash);
kfree(peer->domain);
kfree(peer);
}
mon->self = NULL;
write_unlock_bh(&mon->lock);
del_timer_sync(&mon->timer);
kfree(self->domain);
kfree(self);
kfree(mon);
}
int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size)
{
struct tipc_net *tn = tipc_net(net);
if (cluster_size > TIPC_CLUSTER_SIZE)
return -EINVAL;
tn->mon_threshold = cluster_size;
return 0;
}