linux/net/sched/act_police.c
Patrick McHardy 1e9b3d5339 [NET_SCHED]: policer: restore compatibility with old iproute binaries
The tc actions increased the size of struct tc_police, which broke
compatibility with old iproute binaries since both the act_police
and the old NET_CLS_POLICE code check for an exact size match.

Since the new members are not even used, the simple fix is to also
accept the size of the old structure. Dumping is not affected since
old userspace will receive a bigger structure, which is handled fine.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-12-02 21:32:07 -08:00

632 lines
15 KiB
C

/*
* net/sched/police.c Input police filter.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
* J Hadi Salim (action changes)
*/
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/module.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <net/sock.h>
#include <net/act_api.h>
#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log])
#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log])
#define POL_TAB_MASK 15
static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
static u32 police_idx_gen;
static DEFINE_RWLOCK(police_lock);
static struct tcf_hashinfo police_hash_info = {
.htab = tcf_police_ht,
.hmask = POL_TAB_MASK,
.lock = &police_lock,
};
/* old policer structure from before tc actions */
struct tc_police_compat
{
u32 index;
int action;
u32 limit;
u32 burst;
u32 mtu;
struct tc_ratespec rate;
struct tc_ratespec peakrate;
};
/* Each policer is serialized by its individual spinlock */
#ifdef CONFIG_NET_CLS_ACT
static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
int type, struct tc_action *a)
{
struct tcf_common *p;
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
struct rtattr *r;
read_lock(&police_lock);
s_i = cb->args[0];
for (i = 0; i < (POL_TAB_MASK + 1); i++) {
p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)];
for (; p; p = p->tcfc_next) {
index++;
if (index < s_i)
continue;
a->priv = p;
a->order = index;
r = (struct rtattr*) skb->tail;
RTA_PUT(skb, a->order, 0, NULL);
if (type == RTM_DELACTION)
err = tcf_action_dump_1(skb, a, 0, 1);
else
err = tcf_action_dump_1(skb, a, 0, 0);
if (err < 0) {
index--;
skb_trim(skb, (u8*)r - skb->data);
goto done;
}
r->rta_len = skb->tail - (u8*)r;
n_i++;
}
}
done:
read_unlock(&police_lock);
if (n_i)
cb->args[0] += n_i;
return n_i;
rtattr_failure:
skb_trim(skb, (u8*)r - skb->data);
goto done;
}
#endif
void tcf_police_destroy(struct tcf_police *p)
{
unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
struct tcf_common **p1p;
for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
if (*p1p == &p->common) {
write_lock_bh(&police_lock);
*p1p = p->tcf_next;
write_unlock_bh(&police_lock);
#ifdef CONFIG_NET_ESTIMATOR
gen_kill_estimator(&p->tcf_bstats,
&p->tcf_rate_est);
#endif
if (p->tcfp_R_tab)
qdisc_put_rtab(p->tcfp_R_tab);
if (p->tcfp_P_tab)
qdisc_put_rtab(p->tcfp_P_tab);
kfree(p);
return;
}
}
BUG_TRAP(0);
}
#ifdef CONFIG_NET_CLS_ACT
static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
struct tc_action *a, int ovr, int bind)
{
unsigned h;
int ret = 0, err;
struct rtattr *tb[TCA_POLICE_MAX];
struct tc_police *parm;
struct tcf_police *police;
struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
int size;
if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
return -EINVAL;
if (tb[TCA_POLICE_TBF-1] == NULL)
return -EINVAL;
size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]);
if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
return -EINVAL;
parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
if (tb[TCA_POLICE_RESULT-1] != NULL &&
RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
return -EINVAL;
if (tb[TCA_POLICE_RESULT-1] != NULL &&
RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
return -EINVAL;
if (parm->index) {
struct tcf_common *pc;
pc = tcf_hash_lookup(parm->index, &police_hash_info);
if (pc != NULL) {
a->priv = pc;
police = to_police(pc);
if (bind) {
police->tcf_bindcnt += 1;
police->tcf_refcnt += 1;
}
if (ovr)
goto override;
return ret;
}
}
police = kzalloc(sizeof(*police), GFP_KERNEL);
if (police == NULL)
return -ENOMEM;
ret = ACT_P_CREATED;
police->tcf_refcnt = 1;
spin_lock_init(&police->tcf_lock);
police->tcf_stats_lock = &police->tcf_lock;
if (bind)
police->tcf_bindcnt = 1;
override:
if (parm->rate.rate) {
err = -ENOMEM;
R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
if (R_tab == NULL)
goto failure;
if (parm->peakrate.rate) {
P_tab = qdisc_get_rtab(&parm->peakrate,
tb[TCA_POLICE_PEAKRATE-1]);
if (P_tab == NULL) {
qdisc_put_rtab(R_tab);
goto failure;
}
}
}
/* No failure allowed after this point */
spin_lock_bh(&police->tcf_lock);
if (R_tab != NULL) {
qdisc_put_rtab(police->tcfp_R_tab);
police->tcfp_R_tab = R_tab;
}
if (P_tab != NULL) {
qdisc_put_rtab(police->tcfp_P_tab);
police->tcfp_P_tab = P_tab;
}
if (tb[TCA_POLICE_RESULT-1])
police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
police->tcfp_toks = police->tcfp_burst = parm->burst;
police->tcfp_mtu = parm->mtu;
if (police->tcfp_mtu == 0) {
police->tcfp_mtu = ~0;
if (police->tcfp_R_tab)
police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
}
if (police->tcfp_P_tab)
police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
police->tcf_action = parm->action;
#ifdef CONFIG_NET_ESTIMATOR
if (tb[TCA_POLICE_AVRATE-1])
police->tcfp_ewma_rate =
*(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
if (est)
gen_replace_estimator(&police->tcf_bstats,
&police->tcf_rate_est,
police->tcf_stats_lock, est);
#endif
spin_unlock_bh(&police->tcf_lock);
if (ret != ACT_P_CREATED)
return ret;
PSCHED_GET_TIME(police->tcfp_t_c);
police->tcf_index = parm->index ? parm->index :
tcf_hash_new_index(&police_idx_gen, &police_hash_info);
h = tcf_hash(police->tcf_index, POL_TAB_MASK);
write_lock_bh(&police_lock);
police->tcf_next = tcf_police_ht[h];
tcf_police_ht[h] = &police->common;
write_unlock_bh(&police_lock);
a->priv = police;
return ret;
failure:
if (ret == ACT_P_CREATED)
kfree(police);
return err;
}
static int tcf_act_police_cleanup(struct tc_action *a, int bind)
{
struct tcf_police *p = a->priv;
if (p != NULL)
return tcf_police_release(p, bind);
return 0;
}
static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
struct tcf_result *res)
{
struct tcf_police *police = a->priv;
psched_time_t now;
long toks;
long ptoks = 0;
spin_lock(&police->tcf_lock);
police->tcf_bstats.bytes += skb->len;
police->tcf_bstats.packets++;
#ifdef CONFIG_NET_ESTIMATOR
if (police->tcfp_ewma_rate &&
police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
police->tcf_qstats.overlimits++;
spin_unlock(&police->tcf_lock);
return police->tcf_action;
}
#endif
if (skb->len <= police->tcfp_mtu) {
if (police->tcfp_R_tab == NULL) {
spin_unlock(&police->tcf_lock);
return police->tcfp_result;
}
PSCHED_GET_TIME(now);
toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
police->tcfp_burst);
if (police->tcfp_P_tab) {
ptoks = toks + police->tcfp_ptoks;
if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
ptoks = (long)L2T_P(police, police->tcfp_mtu);
ptoks -= L2T_P(police, skb->len);
}
toks += police->tcfp_toks;
if (toks > (long)police->tcfp_burst)
toks = police->tcfp_burst;
toks -= L2T(police, skb->len);
if ((toks|ptoks) >= 0) {
police->tcfp_t_c = now;
police->tcfp_toks = toks;
police->tcfp_ptoks = ptoks;
spin_unlock(&police->tcf_lock);
return police->tcfp_result;
}
}
police->tcf_qstats.overlimits++;
spin_unlock(&police->tcf_lock);
return police->tcf_action;
}
static int
tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
unsigned char *b = skb->tail;
struct tcf_police *police = a->priv;
struct tc_police opt;
opt.index = police->tcf_index;
opt.action = police->tcf_action;
opt.mtu = police->tcfp_mtu;
opt.burst = police->tcfp_burst;
opt.refcnt = police->tcf_refcnt - ref;
opt.bindcnt = police->tcf_bindcnt - bind;
if (police->tcfp_R_tab)
opt.rate = police->tcfp_R_tab->rate;
else
memset(&opt.rate, 0, sizeof(opt.rate));
if (police->tcfp_P_tab)
opt.peakrate = police->tcfp_P_tab->rate;
else
memset(&opt.peakrate, 0, sizeof(opt.peakrate));
RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
if (police->tcfp_result)
RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
&police->tcfp_result);
#ifdef CONFIG_NET_ESTIMATOR
if (police->tcfp_ewma_rate)
RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
#endif
return skb->len;
rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
MODULE_AUTHOR("Alexey Kuznetsov");
MODULE_DESCRIPTION("Policing actions");
MODULE_LICENSE("GPL");
static struct tc_action_ops act_police_ops = {
.kind = "police",
.hinfo = &police_hash_info,
.type = TCA_ID_POLICE,
.capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_act_police,
.dump = tcf_act_police_dump,
.cleanup = tcf_act_police_cleanup,
.lookup = tcf_hash_search,
.init = tcf_act_police_locate,
.walk = tcf_act_police_walker
};
static int __init
police_init_module(void)
{
return tcf_register_action(&act_police_ops);
}
static void __exit
police_cleanup_module(void)
{
tcf_unregister_action(&act_police_ops);
}
module_init(police_init_module);
module_exit(police_cleanup_module);
#else /* CONFIG_NET_CLS_ACT */
static struct tcf_common *tcf_police_lookup(u32 index)
{
struct tcf_hashinfo *hinfo = &police_hash_info;
struct tcf_common *p;
read_lock(hinfo->lock);
for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
p = p->tcfc_next) {
if (p->tcfc_index == index)
break;
}
read_unlock(hinfo->lock);
return p;
}
static u32 tcf_police_new_index(void)
{
u32 *idx_gen = &police_idx_gen;
u32 val = *idx_gen;
do {
if (++val == 0)
val = 1;
} while (tcf_police_lookup(val));
return (*idx_gen = val);
}
struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
{
unsigned int h;
struct tcf_police *police;
struct rtattr *tb[TCA_POLICE_MAX];
struct tc_police *parm;
int size;
if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
return NULL;
if (tb[TCA_POLICE_TBF-1] == NULL)
return NULL;
size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]);
if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
return NULL;
parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
if (parm->index) {
struct tcf_common *pc;
pc = tcf_police_lookup(parm->index);
if (pc) {
police = to_police(pc);
police->tcf_refcnt++;
return police;
}
}
police = kzalloc(sizeof(*police), GFP_KERNEL);
if (unlikely(!police))
return NULL;
police->tcf_refcnt = 1;
spin_lock_init(&police->tcf_lock);
police->tcf_stats_lock = &police->tcf_lock;
if (parm->rate.rate) {
police->tcfp_R_tab =
qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
if (police->tcfp_R_tab == NULL)
goto failure;
if (parm->peakrate.rate) {
police->tcfp_P_tab =
qdisc_get_rtab(&parm->peakrate,
tb[TCA_POLICE_PEAKRATE-1]);
if (police->tcfp_P_tab == NULL)
goto failure;
}
}
if (tb[TCA_POLICE_RESULT-1]) {
if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
goto failure;
police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
}
#ifdef CONFIG_NET_ESTIMATOR
if (tb[TCA_POLICE_AVRATE-1]) {
if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
goto failure;
police->tcfp_ewma_rate =
*(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
}
#endif
police->tcfp_toks = police->tcfp_burst = parm->burst;
police->tcfp_mtu = parm->mtu;
if (police->tcfp_mtu == 0) {
police->tcfp_mtu = ~0;
if (police->tcfp_R_tab)
police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
}
if (police->tcfp_P_tab)
police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
PSCHED_GET_TIME(police->tcfp_t_c);
police->tcf_index = parm->index ? parm->index :
tcf_police_new_index();
police->tcf_action = parm->action;
#ifdef CONFIG_NET_ESTIMATOR
if (est)
gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est,
police->tcf_stats_lock, est);
#endif
h = tcf_hash(police->tcf_index, POL_TAB_MASK);
write_lock_bh(&police_lock);
police->tcf_next = tcf_police_ht[h];
tcf_police_ht[h] = &police->common;
write_unlock_bh(&police_lock);
return police;
failure:
if (police->tcfp_R_tab)
qdisc_put_rtab(police->tcfp_R_tab);
kfree(police);
return NULL;
}
int tcf_police(struct sk_buff *skb, struct tcf_police *police)
{
psched_time_t now;
long toks;
long ptoks = 0;
spin_lock(&police->tcf_lock);
police->tcf_bstats.bytes += skb->len;
police->tcf_bstats.packets++;
#ifdef CONFIG_NET_ESTIMATOR
if (police->tcfp_ewma_rate &&
police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
police->tcf_qstats.overlimits++;
spin_unlock(&police->tcf_lock);
return police->tcf_action;
}
#endif
if (skb->len <= police->tcfp_mtu) {
if (police->tcfp_R_tab == NULL) {
spin_unlock(&police->tcf_lock);
return police->tcfp_result;
}
PSCHED_GET_TIME(now);
toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
police->tcfp_burst);
if (police->tcfp_P_tab) {
ptoks = toks + police->tcfp_ptoks;
if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
ptoks = (long)L2T_P(police, police->tcfp_mtu);
ptoks -= L2T_P(police, skb->len);
}
toks += police->tcfp_toks;
if (toks > (long)police->tcfp_burst)
toks = police->tcfp_burst;
toks -= L2T(police, skb->len);
if ((toks|ptoks) >= 0) {
police->tcfp_t_c = now;
police->tcfp_toks = toks;
police->tcfp_ptoks = ptoks;
spin_unlock(&police->tcf_lock);
return police->tcfp_result;
}
}
police->tcf_qstats.overlimits++;
spin_unlock(&police->tcf_lock);
return police->tcf_action;
}
EXPORT_SYMBOL(tcf_police);
int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
{
unsigned char *b = skb->tail;
struct tc_police opt;
opt.index = police->tcf_index;
opt.action = police->tcf_action;
opt.mtu = police->tcfp_mtu;
opt.burst = police->tcfp_burst;
if (police->tcfp_R_tab)
opt.rate = police->tcfp_R_tab->rate;
else
memset(&opt.rate, 0, sizeof(opt.rate));
if (police->tcfp_P_tab)
opt.peakrate = police->tcfp_P_tab->rate;
else
memset(&opt.peakrate, 0, sizeof(opt.peakrate));
RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
if (police->tcfp_result)
RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
&police->tcfp_result);
#ifdef CONFIG_NET_ESTIMATOR
if (police->tcfp_ewma_rate)
RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
#endif
return skb->len;
rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police)
{
struct gnet_dump d;
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
TCA_XSTATS, police->tcf_stats_lock,
&d) < 0)
goto errout;
if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 ||
#ifdef CONFIG_NET_ESTIMATOR
gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 ||
#endif
gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0)
goto errout;
if (gnet_stats_finish_copy(&d) < 0)
goto errout;
return 0;
errout:
return -1;
}
#endif /* CONFIG_NET_CLS_ACT */