linux/net/ipv4/tcp_memcontrol.c
Tejun Heo 073219e995 cgroup: clean up cgroup_subsys names and initialization
cgroup_subsys is a bit messier than it needs to be.

* The name of a subsys can be different from its internal identifier
  defined in cgroup_subsys.h.  Most subsystems use the matching name
  but three - cpu, memory and perf_event - use different ones.

* cgroup_subsys_id enums are postfixed with _subsys_id and each
  cgroup_subsys is postfixed with _subsys.  cgroup.h is widely
  included throughout various subsystems, it doesn't and shouldn't
  have claim on such generic names which don't have any qualifier
  indicating that they belong to cgroup.

* cgroup_subsys->subsys_id should always equal the matching
  cgroup_subsys_id enum; however, we require each controller to
  initialize it and then BUG if they don't match, which is a bit
  silly.

This patch cleans up cgroup_subsys names and initialization by doing
the followings.

* cgroup_subsys_id enums are now postfixed with _cgrp_id, and each
  cgroup_subsys with _cgrp_subsys.

* With the above, renaming subsys identifiers to match the userland
  visible names doesn't cause any naming conflicts.  All non-matching
  identifiers are renamed to match the official names.

  cpu_cgroup -> cpu
  mem_cgroup -> memory
  perf -> perf_event

* controllers no longer need to initialize ->subsys_id and ->name.
  They're generated in cgroup core and set automatically during boot.

* Redundant cgroup_subsys declarations removed.

* While updating BUG_ON()s in cgroup_init_early(), convert them to
  WARN()s.  BUGging that early during boot is stupid - the kernel
  can't print anything, even through serial console and the trap
  handler doesn't even link stack frame properly for back-tracing.

This patch doesn't introduce any behavior changes.

v2: Rebased on top of fe1217c4f3 ("net: net_cls: move cgroupfs
    classid handling into core").

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: "David S. Miller" <davem@davemloft.net>
Acked-by: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Acked-by: Ingo Molnar <mingo@redhat.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Thomas Graf <tgraf@suug.ch>
2014-02-08 10:36:58 -05:00

226 lines
5.4 KiB
C

#include <net/tcp.h>
#include <net/tcp_memcontrol.h>
#include <net/sock.h>
#include <net/ip.h>
#include <linux/nsproxy.h>
#include <linux/memcontrol.h>
#include <linux/module.h>
int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
{
/*
* The root cgroup does not use res_counters, but rather,
* rely on the data already collected by the network
* subsystem
*/
struct res_counter *res_parent = NULL;
struct cg_proto *cg_proto, *parent_cg;
struct mem_cgroup *parent = parent_mem_cgroup(memcg);
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return 0;
cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0];
cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1];
cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2];
cg_proto->memory_pressure = 0;
cg_proto->memcg = memcg;
parent_cg = tcp_prot.proto_cgroup(parent);
if (parent_cg)
res_parent = &parent_cg->memory_allocated;
res_counter_init(&cg_proto->memory_allocated, res_parent);
percpu_counter_init(&cg_proto->sockets_allocated, 0);
return 0;
}
EXPORT_SYMBOL(tcp_init_cgroup);
void tcp_destroy_cgroup(struct mem_cgroup *memcg)
{
struct cg_proto *cg_proto;
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return;
percpu_counter_destroy(&cg_proto->sockets_allocated);
}
EXPORT_SYMBOL(tcp_destroy_cgroup);
static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
{
struct cg_proto *cg_proto;
int i;
int ret;
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return -EINVAL;
if (val > RES_COUNTER_MAX)
val = RES_COUNTER_MAX;
ret = res_counter_set_limit(&cg_proto->memory_allocated, val);
if (ret)
return ret;
for (i = 0; i < 3; i++)
cg_proto->sysctl_mem[i] = min_t(long, val >> PAGE_SHIFT,
sysctl_tcp_mem[i]);
if (val == RES_COUNTER_MAX)
clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
else if (val != RES_COUNTER_MAX) {
/*
* The active bit needs to be written after the static_key
* update. This is what guarantees that the socket activation
* function is the last one to run. See sock_update_memcg() for
* details, and note that we don't mark any socket as belonging
* to this memcg until that flag is up.
*
* We need to do this, because static_keys will span multiple
* sites, but we can't control their order. If we mark a socket
* as accounted, but the accounting functions are not patched in
* yet, we'll lose accounting.
*
* We never race with the readers in sock_update_memcg(),
* because when this value change, the code to process it is not
* patched in yet.
*
* The activated bit is used to guarantee that no two writers
* will do the update in the same memcg. Without that, we can't
* properly shutdown the static key.
*/
if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
static_key_slow_inc(&memcg_socket_limit_enabled);
set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
}
return 0;
}
static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
const char *buffer)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
unsigned long long val;
int ret = 0;
switch (cft->private) {
case RES_LIMIT:
/* see memcontrol.c */
ret = res_counter_memparse_write_strategy(buffer, &val);
if (ret)
break;
ret = tcp_update_limit(memcg, val);
break;
default:
ret = -EINVAL;
break;
}
return ret;
}
static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
{
struct cg_proto *cg_proto;
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return default_val;
return res_counter_read_u64(&cg_proto->memory_allocated, type);
}
static u64 tcp_read_usage(struct mem_cgroup *memcg)
{
struct cg_proto *cg_proto;
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT;
return res_counter_read_u64(&cg_proto->memory_allocated, RES_USAGE);
}
static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
u64 val;
switch (cft->private) {
case RES_LIMIT:
val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
break;
case RES_USAGE:
val = tcp_read_usage(memcg);
break;
case RES_FAILCNT:
case RES_MAX_USAGE:
val = tcp_read_stat(memcg, cft->private, 0);
break;
default:
BUG();
}
return val;
}
static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
{
struct mem_cgroup *memcg;
struct cg_proto *cg_proto;
memcg = mem_cgroup_from_css(css);
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return 0;
switch (event) {
case RES_MAX_USAGE:
res_counter_reset_max(&cg_proto->memory_allocated);
break;
case RES_FAILCNT:
res_counter_reset_failcnt(&cg_proto->memory_allocated);
break;
}
return 0;
}
static struct cftype tcp_files[] = {
{
.name = "kmem.tcp.limit_in_bytes",
.write_string = tcp_cgroup_write,
.read_u64 = tcp_cgroup_read,
.private = RES_LIMIT,
},
{
.name = "kmem.tcp.usage_in_bytes",
.read_u64 = tcp_cgroup_read,
.private = RES_USAGE,
},
{
.name = "kmem.tcp.failcnt",
.private = RES_FAILCNT,
.trigger = tcp_cgroup_reset,
.read_u64 = tcp_cgroup_read,
},
{
.name = "kmem.tcp.max_usage_in_bytes",
.private = RES_MAX_USAGE,
.trigger = tcp_cgroup_reset,
.read_u64 = tcp_cgroup_read,
},
{ } /* terminate */
};
static int __init tcp_memcontrol_init(void)
{
WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, tcp_files));
return 0;
}
__initcall(tcp_memcontrol_init);