From c68cd6cc21eb329c47ff020ff7412bf58176984e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 17 Jun 2010 06:12:26 +0200 Subject: [PATCH 1/9] netfilter: nf_nat: support user-specified SNAT rules in LOCAL_IN 2.6.34 introduced 'conntrack zones' to deal with cases where packets from multiple identical networks are handled by conntrack/NAT. Packets are looped through veth devices, during which they are NATed to private addresses, after which they can continue normally through the stack and possibly have NAT rules applied a second time. This works well, but is needlessly complicated for cases where only a single SNAT/DNAT mapping needs to be applied to these packets. In that case, all that needs to be done is to assign each network to a seperate zone and perform NAT as usual. However this doesn't work for packets destined for the machine performing NAT itself since its corrently not possible to configure SNAT mappings for the LOCAL_IN chain. This patch adds a new INPUT chain to the NAT table and changes the targets performing SNAT to be usable in that chain. Example usage with two identical networks (192.168.0.0/24) on eth0/eth1: iptables -t raw -A PREROUTING -i eth0 -j CT --zone 1 iptables -t raw -A PREROUTING -i eth0 -j MARK --set-mark 1 iptables -t raw -A PREROUTING -i eth1 -j CT --zone 2 iptabels -t raw -A PREROUTING -i eth1 -j MARK --set-mark 2 iptables -t nat -A INPUT -m mark --mark 1 -j NETMAP --to 10.0.0.0/24 iptables -t nat -A POSTROUTING -m mark --mark 1 -j NETMAP --to 10.0.0.0/24 iptables -t nat -A INPUT -m mark --mark 2 -j NETMAP --to 10.0.1.0/24 iptables -t nat -A POSTROUTING -m mark --mark 2 -j NETMAP --to 10.0.1.0/24 iptables -t raw -A PREROUTING -d 10.0.0.0/24 -j CT --zone 1 iptables -t raw -A OUTPUT -d 10.0.0.0/24 -j CT --zone 1 iptables -t raw -A PREROUTING -d 10.0.1.0/24 -j CT --zone 2 iptables -t raw -A OUTPUT -d 10.0.1.0/24 -j CT --zone 2 iptables -t nat -A PREROUTING -d 10.0.0.0/24 -j NETMAP --to 192.168.0.0/24 iptables -t nat -A OUTPUT -d 10.0.0.0/24 -j NETMAP --to 192.168.0.0/24 iptables -t nat -A PREROUTING -d 10.0.1.0/24 -j NETMAP --to 192.168.0.0/24 iptables -t nat -A OUTPUT -d 10.0.1.0/24 -j NETMAP --to 192.168.0.0/24 Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_nat_rule.h | 2 -- net/ipv4/netfilter/ipt_NETMAP.c | 6 ++++-- net/ipv4/netfilter/nf_nat_rule.c | 10 ++++++---- net/ipv4/netfilter/nf_nat_standalone.c | 8 +------- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/include/net/netfilter/nf_nat_rule.h b/include/net/netfilter/nf_nat_rule.h index e4a18ae361c6..2890bdc4cd92 100644 --- a/include/net/netfilter/nf_nat_rule.h +++ b/include/net/netfilter/nf_nat_rule.h @@ -12,6 +12,4 @@ extern int nf_nat_rule_find(struct sk_buff *skb, const struct net_device *out, struct nf_conn *ct); -extern unsigned int -alloc_null_binding(struct nf_conn *ct, unsigned int hooknum); #endif /* _NF_NAT_RULE_H */ diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index f43867d1697f..6cdb298f1035 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -48,7 +48,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || par->hooknum == NF_INET_POST_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT); + par->hooknum == NF_INET_LOCAL_OUT || + par->hooknum == NF_INET_LOCAL_IN); ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); @@ -77,7 +78,8 @@ static struct xt_target netmap_tg_reg __read_mostly = { .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT), + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), .checkentry = netmap_tg_check, .me = THIS_MODULE }; diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 98ed78281aee..ebbd319f62f5 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -28,7 +28,8 @@ #define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ (1 << NF_INET_POST_ROUTING) | \ - (1 << NF_INET_LOCAL_OUT)) + (1 << NF_INET_LOCAL_OUT) | \ + (1 << NF_INET_LOCAL_IN)) static const struct xt_table nat_table = { .name = "nat", @@ -45,7 +46,8 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) enum ip_conntrack_info ctinfo; const struct nf_nat_multi_range_compat *mr = par->targinfo; - NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); + NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || + par->hooknum == NF_INET_LOCAL_IN); ct = nf_ct_get(skb, &ctinfo); @@ -99,7 +101,7 @@ static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) return 0; } -unsigned int +static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) { /* Force range to this IP; let proto decide mapping for @@ -141,7 +143,7 @@ static struct xt_target ipt_snat_reg __read_mostly = { .target = ipt_snat_target, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", - .hooks = 1 << NF_INET_POST_ROUTING, + .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), .checkentry = ipt_snat_checkentry, .family = AF_INET, }; diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 6723c682250d..95481fee8bdb 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -131,13 +131,7 @@ nf_nat_fn(unsigned int hooknum, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - if (hooknum == NF_INET_LOCAL_IN) - /* LOCAL_IN hook doesn't have a chain! */ - ret = alloc_null_binding(ct, hooknum); - else - ret = nf_nat_rule_find(skb, hooknum, in, out, - ct); - + ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else From 26ec037f9841e49cc5c615deb8e1e73e5beab2ca Mon Sep 17 00:00:00 2001 From: Nick Chalk Date: Tue, 22 Jun 2010 08:07:01 +0200 Subject: [PATCH 2/9] IPVS: one-packet scheduling Allow one-packet scheduling for UDP connections. When the fwmark-based or normal virtual service is marked with '-o' or '--ops' options all connections are created only to schedule one packet. Useful to schedule UDP packets from same client port to different real servers. Recommended with RR or WRR schedulers (the connections are not visible with ipvsadm -L). Signed-off-by: Nick Chalk Signed-off-by: Simon Horman Signed-off-by: Patrick McHardy --- include/linux/ip_vs.h | 2 ++ net/netfilter/ipvs/ip_vs_conn.c | 10 +++++++--- net/netfilter/ipvs/ip_vs_core.c | 20 ++++++++++++++++---- net/netfilter/ipvs/ip_vs_ctl.c | 10 ++++++---- 4 files changed, 31 insertions(+), 11 deletions(-) diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index dfc170362842..9708de265bb1 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -19,6 +19,7 @@ */ #define IP_VS_SVC_F_PERSISTENT 0x0001 /* persistent port */ #define IP_VS_SVC_F_HASHED 0x0002 /* hashed entry */ +#define IP_VS_SVC_F_ONEPACKET 0x0004 /* one-packet scheduling */ /* * Destination Server Flags @@ -85,6 +86,7 @@ #define IP_VS_CONN_F_SEQ_MASK 0x0600 /* in/out sequence mask */ #define IP_VS_CONN_F_NO_CPORT 0x0800 /* no client port set yet */ #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ +#define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ #define IP_VS_SCHEDNAME_MAXLEN 16 #define IP_VS_IFNAME_MAXLEN 16 diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index d8f7e8ef67b4..717e6233d50f 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -158,6 +158,9 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) unsigned hash; int ret; + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + return 0; + /* Hash by protocol, client address and port */ hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); @@ -355,8 +358,9 @@ struct ip_vs_conn *ip_vs_conn_out_get */ void ip_vs_conn_put(struct ip_vs_conn *cp) { - /* reset it expire in its timeout */ - mod_timer(&cp->timer, jiffies+cp->timeout); + unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ? + 0 : cp->timeout; + mod_timer(&cp->timer, jiffies+t); __ip_vs_conn_put(cp); } @@ -649,7 +653,7 @@ static void ip_vs_conn_expire(unsigned long data) /* * unhash it if it is hashed in the conn table */ - if (!ip_vs_conn_unhash(cp)) + if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) goto expire_later; /* diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 1cd6e3fd058b..50907d8472a3 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -194,6 +194,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, struct ip_vs_dest *dest; struct ip_vs_conn *ct; __be16 dport; /* destination port to forward */ + __be16 flags; union nf_inet_addr snet; /* source network of the client, after masking */ @@ -340,6 +341,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, dport = ports[1]; } + flags = (svc->flags & IP_VS_SVC_F_ONEPACKET + && iph.protocol == IPPROTO_UDP)? + IP_VS_CONN_F_ONE_PACKET : 0; + /* * Create a new connection according to the template */ @@ -347,7 +352,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, &iph.saddr, ports[0], &iph.daddr, ports[1], &dest->addr, dport, - 0, + flags, dest); if (cp == NULL) { ip_vs_conn_put(ct); @@ -377,7 +382,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; struct ip_vs_dest *dest; - __be16 _ports[2], *pptr; + __be16 _ports[2], *pptr, flags; ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); @@ -407,6 +412,10 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) return NULL; } + flags = (svc->flags & IP_VS_SVC_F_ONEPACKET + && iph.protocol == IPPROTO_UDP)? + IP_VS_CONN_F_ONE_PACKET : 0; + /* * Create a connection entry. */ @@ -414,7 +423,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) &iph.saddr, pptr[0], &iph.daddr, pptr[1], &dest->addr, dest->port ? dest->port : pptr[1], - 0, + flags, dest); if (cp == NULL) return NULL; @@ -464,6 +473,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { int ret, cs; struct ip_vs_conn *cp; + __u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && + iph.protocol == IPPROTO_UDP)? + IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; ip_vs_service_put(svc); @@ -474,7 +486,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, &iph.saddr, pptr[0], &iph.daddr, pptr[1], &daddr, 0, - IP_VS_CONN_F_BYPASS, + IP_VS_CONN_F_BYPASS | flags, NULL); if (cp == NULL) return NF_DROP; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 36dc1d88c2fa..0f0c079c422a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1864,14 +1864,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) svc->scheduler->name); else #endif - seq_printf(seq, "%s %08X:%04X %s ", + seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - svc->scheduler->name); + svc->scheduler->name, + (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { - seq_printf(seq, "FWM %08X %s ", - svc->fwmark, svc->scheduler->name); + seq_printf(seq, "FWM %08X %s %s", + svc->fwmark, svc->scheduler->name, + (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } if (svc->flags & IP_VS_SVC_F_PERSISTENT) From 600069daf792f497ae1e1da74e2fff6ac5de6c47 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 22 Jun 2010 08:13:31 +0200 Subject: [PATCH 3/9] netfilter: xt_IDLETIMER needs kdev_t.h Add header file to fix build error: net/netfilter/xt_IDLETIMER.c:276: error: implicit declaration of function 'MKDEV' Signed-off-by: Randy Dunlap Signed-off-by: Patrick McHardy --- net/netfilter/xt_IDLETIMER.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index e11090a0675c..be1f22e13545 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include From fe6fb552858f686f39e33d7b0a33fe56dacea0bf Mon Sep 17 00:00:00 2001 From: Arnd Hannemann Date: Tue, 22 Jun 2010 08:22:21 +0200 Subject: [PATCH 4/9] netfilter: fix simple typo in KConfig for netfiltert xt_TEE Destination was spelled wrong in KConfig. Signed-off-by: Arnd Hannemann Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 413ed24a968a..21be53598987 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -515,7 +515,7 @@ config NETFILTER_XT_TARGET_RATEEST To compile it as a module, choose M here. If unsure, say N. config NETFILTER_XT_TARGET_TEE - tristate '"TEE" - packet cloning to alternate destiantion' + tristate '"TEE" - packet cloning to alternate destination' depends on NETFILTER_ADVANCED depends on (IPV6 || IPV6=n) depends on !NF_CONNTRACK || NF_CONNTRACK From a8756201ba4189bca3ee1a6ec4e290f467ee09ab Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Fri, 25 Jun 2010 14:44:07 +0200 Subject: [PATCH 5/9] netfilter: xt_connbytes: Force CT accounting to be enabled Check at rule install time that CT accounting is enabled. Force it to be enabled if not while also emitting a warning since this is not the default state. This is in preparation for deprecating CONFIG_NF_CT_ACCT upon which CONFIG_NETFILTER_XT_MATCH_CONNBYTES depended being set. Added 2 CT accounting support functions: nf_ct_acct_enabled() - Get CT accounting state. nf_ct_set_acct() - Enable/disable CT accountuing. Signed-off-by: Tim Gardner Acked-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_acct.h | 12 ++++++++++++ net/netfilter/xt_connbytes.c | 10 ++++++++++ 2 files changed, 22 insertions(+) diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index 03e218f0be43..4e9c63a20db2 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -45,6 +45,18 @@ struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) extern unsigned int seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir); +/* Check if connection tracking accounting is enabled */ +static inline bool nf_ct_acct_enabled(struct net *net) +{ + return net->ct.sysctl_acct != 0; +} + +/* Enable/disable connection tracking accounting */ +static inline void nf_ct_set_acct(struct net *net, bool enable) +{ + net->ct.sysctl_acct = enable; +} + extern int nf_conntrack_acct_init(struct net *net); extern void nf_conntrack_acct_fini(struct net *net); diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 73517835303d..5b138506690e 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -112,6 +112,16 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par) if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); + + /* + * This filter cannot function correctly unless connection tracking + * accounting is enabled, so complain in the hope that someone notices. + */ + if (!nf_ct_acct_enabled(par->net)) { + pr_warning("Forcing CT accounting to be enabled\n"); + nf_ct_set_acct(par->net, true); + } + return ret; } From d70a011dbbaa6335a19deb63ec3eb613f48faafd Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Fri, 25 Jun 2010 14:46:56 +0200 Subject: [PATCH 6/9] netfilter: complete the deprecation of CONFIG_NF_CT_ACCT CONFIG_NF_CT_ACCT has been deprecated for awhile and was originally scheduled for removal by 2.6.29. Removing support for this config option also stops this deprecation warning message in the kernel log. [ 61.669627] nf_conntrack version 0.5.0 (16384 buckets, 65536 max) [ 61.669850] CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use [ 61.669852] nf_conntrack.acct=1 kernel parameter, acct=1 nf_conntrack module option or [ 61.669853] sysctl net.netfilter.nf_conntrack_acct=1 to enable it. Signed-off-by: Tim Gardner [Patrick: changed default value to 0] Signed-off-by: Patrick McHardy --- Documentation/feature-removal-schedule.txt | 9 --------- Documentation/kernel-parameters.txt | 3 +-- net/netfilter/Kconfig | 22 ---------------------- net/netfilter/nf_conntrack_acct.c | 14 +------------- 4 files changed, 2 insertions(+), 46 deletions(-) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 672be0109d02..92f021aac92c 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -303,15 +303,6 @@ Who: Johannes Berg --------------------------- -What: CONFIG_NF_CT_ACCT -When: 2.6.29 -Why: Accounting can now be enabled/disabled without kernel recompilation. - Currently used only to set a default value for a feature that is also - controlled by a kernel/module/sysfs/sysctl parameter. -Who: Krzysztof Piotr Oledzki - ---------------------------- - What: sysfs ui for changing p4-clockmod parameters When: September 2009 Why: See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1808f1157f30..cee62515ffa1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1597,8 +1597,7 @@ and is between 256 and 4096 characters. It is defined in the file [NETFILTER] Enable connection tracking flow accounting 0 to disable accounting 1 to enable accounting - Default value depends on CONFIG_NF_CT_ACCT that is - going to be removed in 2.6.29. + Default value is 0. nfsaddrs= [NFS] See Documentation/filesystems/nfs/nfsroot.txt. diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 21be53598987..aa2f106347e4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -40,27 +40,6 @@ config NF_CONNTRACK if NF_CONNTRACK -config NF_CT_ACCT - bool "Connection tracking flow accounting" - depends on NETFILTER_ADVANCED - help - If this option is enabled, the connection tracking code will - keep per-flow packet and byte counters. - - Those counters can be used for flow-based accounting or the - `connbytes' match. - - Please note that currently this option only sets a default state. - You may change it at boot time with nf_conntrack.acct=0/1 kernel - parameter or by loading the nf_conntrack module with acct=0/1. - - You may also disable/enable it on a running system with: - sysctl net.netfilter.nf_conntrack_acct=0/1 - - This option will be removed in 2.6.29. - - If unsure, say `N'. - config NF_CONNTRACK_MARK bool 'Connection mark tracking support' depends on NETFILTER_ADVANCED @@ -630,7 +609,6 @@ config NETFILTER_XT_MATCH_CONNBYTES tristate '"connbytes" per-connection counter match support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED - select NF_CT_ACCT help This option adds a `connbytes' match, which allows you to match the number of bytes and/or packets for each direction within a connection. diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index ab81b380eae6..5178c691ecbf 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -17,13 +17,7 @@ #include #include -#ifdef CONFIG_NF_CT_ACCT -#define NF_CT_ACCT_DEFAULT 1 -#else -#define NF_CT_ACCT_DEFAULT 0 -#endif - -static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT; +static int nf_ct_acct __read_mostly; module_param_named(acct, nf_ct_acct, bool, 0644); MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); @@ -114,12 +108,6 @@ int nf_conntrack_acct_init(struct net *net) net->ct.sysctl_acct = nf_ct_acct; if (net_eq(net, &init_net)) { -#ifdef CONFIG_NF_CT_ACCT - printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n"); - printk(KERN_WARNING "nf_conntrack.acct=1 kernel parameter, acct=1 nf_conntrack module option or\n"); - printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); -#endif - ret = nf_ct_extend_register(&acct_extend); if (ret < 0) { printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); From cf377eb4aeded926375d4d0fe0b66ba95f0521e1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 28 Jun 2010 14:12:41 +0200 Subject: [PATCH 7/9] netfilter: ipt_LOG/ip6t_LOG: remove comparison within loop Remove the comparison within the loop to print the macheader by prepending the colon to all but the first printk. Based on suggestion by Jan Engelhardt . Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_LOG.c | 12 ++++++------ net/ipv6/netfilter/ip6t_LOG.c | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 5234f4f3499a..0a452a54adbe 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -411,12 +411,12 @@ ipt_log_packet(u_int8_t pf, skb->mac_header != skb->network_header) { int i; const unsigned char *p = skb_mac_header(skb); - for (i = 0; i < skb->dev->hard_header_len; i++,p++) - printk("%02x%c", *p, - i==skb->dev->hard_header_len - 1 - ? ' ':':'); - } else - printk(" "); + + printk("%02x", *p++); + for (i = 1; i < skb->dev->hard_header_len; i++, p++) + printk(":%02x", *p); + } + printk(" "); } dump_packet(loginfo, skb, 0); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index af4ee11f2066..4c7ddac7c62b 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -414,9 +414,9 @@ ip6t_log_packet(u_int8_t pf, p = NULL; if (p != NULL) { - for (i = 0; i < len; i++) - printk("%02x%s", p[i], - i == len - 1 ? "" : ":"); + printk("%02x", *p++); + for (i = 1; i < len; i++) + printk(":%02x", p[i]); } printk(" "); From 7eb9282cd0efac08b8377cbd5037ba297c77e3f7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 28 Jun 2010 14:16:08 +0200 Subject: [PATCH 8/9] netfilter: ipt_LOG/ip6t_LOG: add option to print decoded MAC header The LOG targets print the entire MAC header as one long string, which is not readable very well: IN=eth0 OUT= MAC=00:15:f2:24:91:f8:00:1b:24:dc:61:e6:08:00 ... Add an option to decode known header formats (currently just ARPHRD_ETHER devices) in their individual fields: IN=eth0 OUT= MACSRC=00:1b:24:dc:61:e6 MACDST=00:15:f2:24:91:f8 MACPROTO=0800 ... IN=eth0 OUT= MACSRC=00:1b:24:dc:61:e6 MACDST=00:15:f2:24:91:f8 MACPROTO=86dd ... The option needs to be explicitly enabled by userspace to avoid breaking existing parsers. Signed-off-by: Patrick McHardy --- include/linux/netfilter_ipv4/ipt_LOG.h | 3 +- include/linux/netfilter_ipv6/ip6t_LOG.h | 3 +- net/ipv4/netfilter/ipt_LOG.c | 54 ++++++++++++----- net/ipv6/netfilter/ip6t_LOG.c | 81 ++++++++++++++++--------- 4 files changed, 97 insertions(+), 44 deletions(-) diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h index 90fa6525ef9c..dcdbadf9fd4a 100644 --- a/include/linux/netfilter_ipv4/ipt_LOG.h +++ b/include/linux/netfilter_ipv4/ipt_LOG.h @@ -7,7 +7,8 @@ #define IPT_LOG_IPOPT 0x04 /* Log IP options */ #define IPT_LOG_UID 0x08 /* Log UID owning local socket */ #define IPT_LOG_NFLOG 0x10 /* Unsupported, don't reuse */ -#define IPT_LOG_MASK 0x1f +#define IPT_LOG_MACDECODE 0x20 /* Decode MAC header */ +#define IPT_LOG_MASK 0x2f struct ipt_log_info { unsigned char level; diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h index 0d0119b0458c..9dd5579e02ec 100644 --- a/include/linux/netfilter_ipv6/ip6t_LOG.h +++ b/include/linux/netfilter_ipv6/ip6t_LOG.h @@ -7,7 +7,8 @@ #define IP6T_LOG_IPOPT 0x04 /* Log IP options */ #define IP6T_LOG_UID 0x08 /* Log UID owning local socket */ #define IP6T_LOG_NFLOG 0x10 /* Unsupported, don't use */ -#define IP6T_LOG_MASK 0x1f +#define IP6T_LOG_MACDECODE 0x20 /* Decode MAC header */ +#define IP6T_LOG_MASK 0x2f struct ip6t_log_info { unsigned char level; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 0a452a54adbe..915fc17d7ce2 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -363,6 +364,42 @@ static void dump_packet(const struct nf_loginfo *info, /* maxlen = 230+ 91 + 230 + 252 = 803 */ } +static void dump_mac_header(const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & IPT_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + printk("MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int i; + + printk("%02x", *p++); + for (i = 1; i < dev->hard_header_len; i++, p++) + printk(":%02x", *p); + } + printk(" "); +} + static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { @@ -404,20 +441,9 @@ ipt_log_packet(u_int8_t pf, } #endif - if (in && !out) { - /* MAC logging for input chain only. */ - printk("MAC="); - if (skb->dev && skb->dev->hard_header_len && - skb->mac_header != skb->network_header) { - int i; - const unsigned char *p = skb_mac_header(skb); - - printk("%02x", *p++); - for (i = 1; i < skb->dev->hard_header_len; i++, p++) - printk(":%02x", *p); - } - printk(" "); - } + /* MAC logging for input path only. */ + if (in && !out) + dump_mac_header(loginfo, skb); dump_packet(loginfo, skb, 0); printk("\n"); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 4c7ddac7c62b..0a07ae7b933f 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -373,6 +373,56 @@ static void dump_packet(const struct nf_loginfo *info, printk("MARK=0x%x ", skb->mark); } +static void dump_mac_header(const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & IP6T_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + printk("MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int len = dev->hard_header_len; + unsigned int i; + + if (dev->type == ARPHRD_SIT && + (p -= ETH_HLEN) < skb->head) + p = NULL; + + if (p != NULL) { + printk("%02x", *p++); + for (i = 1; i < len; i++) + printk(":%02x", p[i]); + } + printk(" "); + + if (dev->type == ARPHRD_SIT) { + const struct iphdr *iph = + (struct iphdr *)skb_mac_header(skb); + printk("TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr); + } + } else + printk(" "); +} + static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { @@ -400,35 +450,10 @@ ip6t_log_packet(u_int8_t pf, prefix, in ? in->name : "", out ? out->name : ""); - if (in && !out) { - unsigned int len; - /* MAC logging for input chain only. */ - printk("MAC="); - if (skb->dev && (len = skb->dev->hard_header_len) && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - int i; - if (skb->dev->type == ARPHRD_SIT && - (p -= ETH_HLEN) < skb->head) - p = NULL; - - if (p != NULL) { - printk("%02x", *p++); - for (i = 1; i < len; i++) - printk(":%02x", p[i]); - } - printk(" "); - - if (skb->dev->type == ARPHRD_SIT) { - const struct iphdr *iph = - (struct iphdr *)skb_mac_header(skb); - printk("TUNNEL=%pI4->%pI4 ", - &iph->saddr, &iph->daddr); - } - } else - printk(" "); - } + /* MAC logging for input path only. */ + if (in && !out) + dump_mac_header(loginfo, skb); dump_packet(loginfo, skb, skb_network_offset(skb), 1); printk("\n"); From 4df53d8bab65cf2c18daebd51a5a4847e03f1943 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 2 Jul 2010 09:32:57 +0200 Subject: [PATCH 9/9] bridge: add per bridge device controls for invoking iptables Support more fine grained control of bridge netfilter iptables invocation by adding seperate brnf_call_*tables parameters for each device using the sysfs interface. Packets are passed to layer 3 netfilter when either the global parameter or the per bridge parameter is enabled. Acked-by: Stephen Hemminger Acked-by: David S. Miller Signed-off-by: Patrick McHardy --- net/bridge/br_netfilter.c | 31 ++++++++++++----- net/bridge/br_private.h | 3 ++ net/bridge/br_sysfs_br.c | 72 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 9 deletions(-) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 6bb6f7c9e6e1..f1d49ae23411 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -55,6 +55,9 @@ static int brnf_call_arptables __read_mostly = 1; static int brnf_filter_vlan_tagged __read_mostly = 0; static int brnf_filter_pppoe_tagged __read_mostly = 0; #else +#define brnf_call_iptables 1 +#define brnf_call_ip6tables 1 +#define brnf_call_arptables 1 #define brnf_filter_vlan_tagged 0 #define brnf_filter_pppoe_tagged 0 #endif @@ -543,25 +546,30 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct net_bridge_port *p; + struct net_bridge *br; struct iphdr *iph; __u32 len = nf_bridge_encap_header_len(skb); if (unlikely(!pskb_may_pull(skb, len))) goto out; + p = rcu_dereference(in->br_port); + if (p == NULL) + goto out; + br = p->br; + if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) { -#ifdef CONFIG_SYSCTL - if (!brnf_call_ip6tables) + if (!brnf_call_ip6tables && !br->nf_call_ip6tables) return NF_ACCEPT; -#endif + nf_bridge_pull_encap_header_rcsum(skb); return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); } -#ifdef CONFIG_SYSCTL - if (!brnf_call_iptables) + + if (!brnf_call_iptables && !br->nf_call_iptables) return NF_ACCEPT; -#endif if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb)) @@ -714,12 +722,17 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct net_bridge_port *p; + struct net_bridge *br; struct net_device **d = (struct net_device **)(skb->cb); -#ifdef CONFIG_SYSCTL - if (!brnf_call_arptables) + p = rcu_dereference(out->br_port); + if (p == NULL) + return NF_ACCEPT; + br = p->br; + + if (!brnf_call_arptables && !br->nf_call_arptables) return NF_ACCEPT; -#endif if (skb->protocol != htons(ETH_P_ARP)) { if (!IS_VLAN_ARP(skb)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c83519b555bb..7484065da303 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -164,6 +164,9 @@ struct net_bridge unsigned long feature_mask; #ifdef CONFIG_BRIDGE_NETFILTER struct rtable fake_rtable; + bool nf_call_iptables; + bool nf_call_ip6tables; + bool nf_call_arptables; #endif unsigned long flags; #define BR_SET_MAC_ADDR 0x00000001 diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 486b8f3861d2..5c1e5559ebba 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -611,6 +611,73 @@ static DEVICE_ATTR(multicast_startup_query_interval, S_IRUGO | S_IWUSR, show_multicast_startup_query_interval, store_multicast_startup_query_interval); #endif +#ifdef CONFIG_BRIDGE_NETFILTER +static ssize_t show_nf_call_iptables( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->nf_call_iptables); +} + +static int set_nf_call_iptables(struct net_bridge *br, unsigned long val) +{ + br->nf_call_iptables = val ? true : false; + return 0; +} + +static ssize_t store_nf_call_iptables( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_iptables); +} +static DEVICE_ATTR(nf_call_iptables, S_IRUGO | S_IWUSR, + show_nf_call_iptables, store_nf_call_iptables); + +static ssize_t show_nf_call_ip6tables( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->nf_call_ip6tables); +} + +static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val) +{ + br->nf_call_ip6tables = val ? true : false; + return 0; +} + +static ssize_t store_nf_call_ip6tables( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_ip6tables); +} +static DEVICE_ATTR(nf_call_ip6tables, S_IRUGO | S_IWUSR, + show_nf_call_ip6tables, store_nf_call_ip6tables); + +static ssize_t show_nf_call_arptables( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->nf_call_arptables); +} + +static int set_nf_call_arptables(struct net_bridge *br, unsigned long val) +{ + br->nf_call_arptables = val ? true : false; + return 0; +} + +static ssize_t store_nf_call_arptables( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_arptables); +} +static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR, + show_nf_call_arptables, store_nf_call_arptables); +#endif static struct attribute *bridge_attrs[] = { &dev_attr_forward_delay.attr, @@ -644,6 +711,11 @@ static struct attribute *bridge_attrs[] = { &dev_attr_multicast_query_interval.attr, &dev_attr_multicast_query_response_interval.attr, &dev_attr_multicast_startup_query_interval.attr, +#endif +#ifdef CONFIG_BRIDGE_NETFILTER + &dev_attr_nf_call_iptables.attr, + &dev_attr_nf_call_ip6tables.attr, + &dev_attr_nf_call_arptables.attr, #endif NULL };