mirror of
https://github.com/FEX-Emu/linux.git
synced 2025-01-03 07:41:40 +00:00
x86/intel_rdt: Limit C-states dynamically when pseudo-locking active
Deeper C-states impact cache content through shrinking of the cache or flushing entire cache to memory before reducing power to the cache. Deeper C-states will thus negatively impact the pseudo-locked regions. To avoid impacting pseudo-locked regions C-states are limited on pseudo-locked region creation so that cores associated with the pseudo-locked region are prevented from entering deeper C-states. This is accomplished by requesting a CPU latency target which will prevent the core from entering C6 across all supported platforms. Signed-off-by: Reinette Chatre <reinette.chatre@intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: fenghua.yu@intel.com Cc: tony.luck@intel.com Cc: vikas.shivappa@linux.intel.com Cc: gavin.hindman@intel.com Cc: jithu.joseph@intel.com Cc: dave.hansen@intel.com Cc: hpa@zytor.com Link: https://lkml.kernel.org/r/1ef4f99dd6ba12fa6fb44c5a1141e75f952b9cd9.1529706536.git.reinette.chatre@intel.com
This commit is contained in:
parent
f3be1e7b2c
commit
6fc0de37f6
@ -461,8 +461,8 @@ in the cache via carefully configuring the CAT feature and controlling
|
|||||||
application behavior. There is no guarantee that data is placed in
|
application behavior. There is no guarantee that data is placed in
|
||||||
cache. Instructions like INVD, WBINVD, CLFLUSH, etc. can still evict
|
cache. Instructions like INVD, WBINVD, CLFLUSH, etc. can still evict
|
||||||
“locked” data from cache. Power management C-states may shrink or
|
“locked” data from cache. Power management C-states may shrink or
|
||||||
power off cache. It is thus recommended to limit the processor maximum
|
power off cache. Deeper C-states will automatically be restricted on
|
||||||
C-state, for example, by setting the processor.max_cstate kernel parameter.
|
pseudo-locked region creation.
|
||||||
|
|
||||||
It is required that an application using a pseudo-locked region runs
|
It is required that an application using a pseudo-locked region runs
|
||||||
with affinity to the cores (or a subset of the cores) associated
|
with affinity to the cores (or a subset of the cores) associated
|
||||||
|
@ -142,6 +142,7 @@ struct mongroup {
|
|||||||
* region
|
* region
|
||||||
* @debugfs_dir: pointer to this region's directory in the debugfs
|
* @debugfs_dir: pointer to this region's directory in the debugfs
|
||||||
* filesystem
|
* filesystem
|
||||||
|
* @pm_reqs: Power management QoS requests related to this region
|
||||||
*/
|
*/
|
||||||
struct pseudo_lock_region {
|
struct pseudo_lock_region {
|
||||||
struct rdt_resource *r;
|
struct rdt_resource *r;
|
||||||
@ -155,6 +156,7 @@ struct pseudo_lock_region {
|
|||||||
void *kmem;
|
void *kmem;
|
||||||
unsigned int minor;
|
unsigned int minor;
|
||||||
struct dentry *debugfs_dir;
|
struct dentry *debugfs_dir;
|
||||||
|
struct list_head pm_reqs;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <linux/debugfs.h>
|
#include <linux/debugfs.h>
|
||||||
#include <linux/kthread.h>
|
#include <linux/kthread.h>
|
||||||
#include <linux/mman.h>
|
#include <linux/mman.h>
|
||||||
|
#include <linux/pm_qos.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
|
||||||
@ -175,6 +176,76 @@ static struct rdtgroup *region_find_by_minor(unsigned int minor)
|
|||||||
return rdtgrp_match;
|
return rdtgrp_match;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* pseudo_lock_pm_req - A power management QoS request list entry
|
||||||
|
* @list: Entry within the @pm_reqs list for a pseudo-locked region
|
||||||
|
* @req: PM QoS request
|
||||||
|
*/
|
||||||
|
struct pseudo_lock_pm_req {
|
||||||
|
struct list_head list;
|
||||||
|
struct dev_pm_qos_request req;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
|
||||||
|
{
|
||||||
|
struct pseudo_lock_pm_req *pm_req, *next;
|
||||||
|
|
||||||
|
list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
|
||||||
|
dev_pm_qos_remove_request(&pm_req->req);
|
||||||
|
list_del(&pm_req->list);
|
||||||
|
kfree(pm_req);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* pseudo_lock_cstates_constrain - Restrict cores from entering C6
|
||||||
|
*
|
||||||
|
* To prevent the cache from being affected by power management entering
|
||||||
|
* C6 has to be avoided. This is accomplished by requesting a latency
|
||||||
|
* requirement lower than lowest C6 exit latency of all supported
|
||||||
|
* platforms as found in the cpuidle state tables in the intel_idle driver.
|
||||||
|
* At this time it is possible to do so with a single latency requirement
|
||||||
|
* for all supported platforms.
|
||||||
|
*
|
||||||
|
* Since Goldmont is supported, which is affected by X86_BUG_MONITOR,
|
||||||
|
* the ACPI latencies need to be considered while keeping in mind that C2
|
||||||
|
* may be set to map to deeper sleep states. In this case the latency
|
||||||
|
* requirement needs to prevent entering C2 also.
|
||||||
|
*/
|
||||||
|
static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
|
||||||
|
{
|
||||||
|
struct pseudo_lock_pm_req *pm_req;
|
||||||
|
int cpu;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
for_each_cpu(cpu, &plr->d->cpu_mask) {
|
||||||
|
pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
|
||||||
|
if (!pm_req) {
|
||||||
|
rdt_last_cmd_puts("fail allocating mem for PM QoS\n");
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out_err;
|
||||||
|
}
|
||||||
|
ret = dev_pm_qos_add_request(get_cpu_device(cpu),
|
||||||
|
&pm_req->req,
|
||||||
|
DEV_PM_QOS_RESUME_LATENCY,
|
||||||
|
30);
|
||||||
|
if (ret < 0) {
|
||||||
|
rdt_last_cmd_printf("fail to add latency req cpu%d\n",
|
||||||
|
cpu);
|
||||||
|
kfree(pm_req);
|
||||||
|
ret = -1;
|
||||||
|
goto out_err;
|
||||||
|
}
|
||||||
|
list_add(&pm_req->list, &plr->pm_reqs);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
out_err:
|
||||||
|
pseudo_lock_cstates_relax(plr);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* pseudo_lock_region_init - Initialize pseudo-lock region information
|
* pseudo_lock_region_init - Initialize pseudo-lock region information
|
||||||
* @plr: pseudo-lock region
|
* @plr: pseudo-lock region
|
||||||
@ -242,6 +313,7 @@ static int pseudo_lock_init(struct rdtgroup *rdtgrp)
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
init_waitqueue_head(&plr->lock_thread_wq);
|
init_waitqueue_head(&plr->lock_thread_wq);
|
||||||
|
INIT_LIST_HEAD(&plr->pm_reqs);
|
||||||
rdtgrp->plr = plr;
|
rdtgrp->plr = plr;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -1135,6 +1207,12 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
|
|||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
ret = pseudo_lock_cstates_constrain(plr);
|
||||||
|
if (ret < 0) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out_region;
|
||||||
|
}
|
||||||
|
|
||||||
plr->thread_done = 0;
|
plr->thread_done = 0;
|
||||||
|
|
||||||
thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
|
thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
|
||||||
@ -1143,7 +1221,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
|
|||||||
if (IS_ERR(thread)) {
|
if (IS_ERR(thread)) {
|
||||||
ret = PTR_ERR(thread);
|
ret = PTR_ERR(thread);
|
||||||
rdt_last_cmd_printf("locking thread returned error %d\n", ret);
|
rdt_last_cmd_printf("locking thread returned error %d\n", ret);
|
||||||
goto out_region;
|
goto out_cstates;
|
||||||
}
|
}
|
||||||
|
|
||||||
kthread_bind(thread, plr->cpu);
|
kthread_bind(thread, plr->cpu);
|
||||||
@ -1161,7 +1239,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
|
|||||||
* empty pseudo-locking loop.
|
* empty pseudo-locking loop.
|
||||||
*/
|
*/
|
||||||
rdt_last_cmd_puts("locking thread interrupted\n");
|
rdt_last_cmd_puts("locking thread interrupted\n");
|
||||||
goto out_region;
|
goto out_cstates;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
|
if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
|
||||||
@ -1222,6 +1300,8 @@ out_minor:
|
|||||||
pseudo_lock_minor_release(new_minor);
|
pseudo_lock_minor_release(new_minor);
|
||||||
out_debugfs:
|
out_debugfs:
|
||||||
debugfs_remove_recursive(plr->debugfs_dir);
|
debugfs_remove_recursive(plr->debugfs_dir);
|
||||||
|
out_cstates:
|
||||||
|
pseudo_lock_cstates_relax(plr);
|
||||||
out_region:
|
out_region:
|
||||||
pseudo_lock_region_clear(plr);
|
pseudo_lock_region_clear(plr);
|
||||||
out:
|
out:
|
||||||
@ -1255,6 +1335,7 @@ void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
|
|||||||
goto free;
|
goto free;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pseudo_lock_cstates_relax(plr);
|
||||||
debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
|
debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
|
||||||
device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
|
device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
|
||||||
pseudo_lock_minor_release(plr->minor);
|
pseudo_lock_minor_release(plr->minor);
|
||||||
|
Loading…
Reference in New Issue
Block a user