lima: implement BO cache

Allocating BOs is expensive, so we should avoid doing that by caching
freed BOs.

BO cache is modelled after one in v3d driver and works as follows:

- in lima_bo_create() check if we have matching BO in cache and return
  it if there's one, allocate new BO otherwise.
- in lima_bo_unreference() (renamed from lima_bo_free()): put BO in
  cache instead of freeing it and remove all stale BOs from cache

Reviewed-by: Qiang Yu <yuq825@gmail.com>
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
This commit is contained in:
Vasily Khoruzhick 2019-09-07 19:33:07 -07:00
parent 9f897a2b4c
commit d214778753
8 changed files with 212 additions and 30 deletions

View File

@ -30,6 +30,7 @@
#include "drm-uapi/lima_drm.h"
#include "util/u_hash_table.h"
#include "util/u_math.h"
#include "util/os_time.h"
#include "os/os_mman.h"
@ -37,6 +38,7 @@
#include "lima_screen.h"
#include "lima_bo.h"
#include "lima_util.h"
#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
@ -68,6 +70,16 @@ err_out0:
return false;
}
bool lima_bo_cache_init(struct lima_screen *screen)
{
mtx_init(&screen->bo_cache_lock, mtx_plain);
list_inithead(&screen->bo_cache_time);
for (int i = 0; i < NR_BO_CACHE_BUCKETS; i++)
list_inithead(&screen->bo_cache_buckets[i]);
return true;
}
void lima_bo_table_fini(struct lima_screen *screen)
{
mtx_destroy(&screen->bo_table_lock);
@ -75,6 +87,13 @@ void lima_bo_table_fini(struct lima_screen *screen)
util_hash_table_destroy(screen->bo_flink_names);
}
static void
lima_bo_cache_remove(struct lima_bo *bo)
{
list_del(&bo->size_list);
list_del(&bo->time_list);
}
static void lima_close_kms_handle(struct lima_screen *screen, uint32_t handle)
{
struct drm_gem_close args = {
@ -84,6 +103,36 @@ static void lima_close_kms_handle(struct lima_screen *screen, uint32_t handle)
drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &args);
}
static void
lima_bo_free(struct lima_bo *bo)
{
struct lima_screen *screen = bo->screen;
mtx_lock(&screen->bo_table_lock);
util_hash_table_remove(screen->bo_handles,
(void *)(uintptr_t)bo->handle);
if (bo->flink_name)
util_hash_table_remove(screen->bo_flink_names,
(void *)(uintptr_t)bo->flink_name);
mtx_unlock(&screen->bo_table_lock);
if (bo->map)
lima_bo_unmap(bo);
lima_close_kms_handle(screen, bo->handle);
free(bo);
}
void lima_bo_cache_fini(struct lima_screen *screen)
{
mtx_destroy(&screen->bo_cache_lock);
list_for_each_entry_safe(struct lima_bo, entry,
&screen->bo_cache_time, time_list) {
lima_bo_cache_remove(entry);
lima_bo_free(entry);
}
}
static bool lima_bo_get_info(struct lima_bo *bo)
{
struct drm_lima_gem_info req = {
@ -98,10 +147,112 @@ static bool lima_bo_get_info(struct lima_bo *bo)
return true;
}
static unsigned
lima_bucket_index(unsigned size)
{
/* Round down to POT to compute a bucket index */
unsigned bucket_index = util_logbase2(size);
/* Clamp the bucket index; all huge allocations will be
* sorted into the largest bucket */
bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET,
MAX_BO_CACHE_BUCKET);
/* Reindex from 0 */
return (bucket_index - MIN_BO_CACHE_BUCKET);
}
static struct list_head *
lima_bo_cache_get_bucket(struct lima_screen *screen, unsigned size)
{
return &screen->bo_cache_buckets[lima_bucket_index(size)];
}
static void
lima_bo_cache_free_stale_bos(struct lima_screen *screen, time_t time)
{
list_for_each_entry_safe(struct lima_bo, entry,
&screen->bo_cache_time, time_list) {
/* Free BOs that are sitting idle for longer than 5 seconds */
if (time - entry->free_time > 6) {
lima_bo_cache_remove(entry);
lima_bo_free(entry);
} else
break;
}
}
static bool
lima_bo_cache_put(struct lima_bo *bo)
{
if (!bo->cacheable)
return false;
struct lima_screen *screen = bo->screen;
mtx_lock(&screen->bo_cache_lock);
struct list_head *bucket = lima_bo_cache_get_bucket(screen, bo->size);
if (!bucket) {
mtx_unlock(&screen->bo_cache_lock);
return false;
}
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
bo->free_time = time.tv_sec;
list_addtail(&bo->size_list, bucket);
list_addtail(&bo->time_list, &screen->bo_cache_time);
lima_bo_cache_free_stale_bos(screen, time.tv_sec);
mtx_unlock(&screen->bo_cache_lock);
return true;
}
static struct lima_bo *
lima_bo_cache_get(struct lima_screen *screen, uint32_t size, uint32_t flags)
{
struct lima_bo *bo = NULL;
mtx_lock(&screen->bo_cache_lock);
struct list_head *bucket = lima_bo_cache_get_bucket(screen, size);
if (!bucket) {
mtx_unlock(&screen->bo_cache_lock);
return false;
}
list_for_each_entry_safe(struct lima_bo, entry, bucket, size_list) {
if (entry->size >= size &&
entry->flags == flags) {
/* Check if BO is idle. If it's not it's better to allocate new one */
if (!lima_bo_wait(entry, LIMA_GEM_WAIT_WRITE, 0))
break;
lima_bo_cache_remove(entry);
p_atomic_set(&entry->refcnt, 1);
bo = entry;
break;
}
}
mtx_unlock(&screen->bo_cache_lock);
return bo;
}
struct lima_bo *lima_bo_create(struct lima_screen *screen,
uint32_t size, uint32_t flags)
{
struct lima_bo *bo;
/* Try to get bo from cache first */
bo = lima_bo_cache_get(screen, size, flags);
if (bo)
return bo;
size = align(size, LIMA_PAGE_SIZE);
struct drm_lima_gem_create req = {
.size = size,
.flags = flags,
@ -110,12 +261,17 @@ struct lima_bo *lima_bo_create(struct lima_screen *screen,
if (!(bo = calloc(1, sizeof(*bo))))
return NULL;
list_inithead(&bo->time_list);
list_inithead(&bo->size_list);
if (drmIoctl(screen->fd, DRM_IOCTL_LIMA_GEM_CREATE, &req))
goto err_out0;
bo->screen = screen;
bo->size = req.size;
bo->flags = req.flags;
bo->handle = req.handle;
bo->cacheable = !(lima_debug & LIMA_DEBUG_NO_BO_CACHE);
p_atomic_set(&bo->refcnt, 1);
if (!lima_bo_get_info(bo))
@ -130,25 +286,16 @@ err_out0:
return NULL;
}
void lima_bo_free(struct lima_bo *bo)
void lima_bo_unreference(struct lima_bo *bo)
{
if (!p_atomic_dec_zero(&bo->refcnt))
return;
struct lima_screen *screen = bo->screen;
mtx_lock(&screen->bo_table_lock);
util_hash_table_remove(screen->bo_handles,
(void *)(uintptr_t)bo->handle);
if (bo->flink_name)
util_hash_table_remove(screen->bo_flink_names,
(void *)(uintptr_t)bo->flink_name);
mtx_unlock(&screen->bo_table_lock);
/* Try to put it into cache */
if (lima_bo_cache_put(bo))
return;
if (bo->map)
lima_bo_unmap(bo);
lima_close_kms_handle(screen, bo->handle);
free(bo);
lima_bo_free(bo);
}
void *lima_bo_map(struct lima_bo *bo)
@ -175,6 +322,9 @@ bool lima_bo_export(struct lima_bo *bo, struct winsys_handle *handle)
{
struct lima_screen *screen = bo->screen;
/* Don't cache exported BOs */
bo->cacheable = false;
switch (handle->type) {
case WINSYS_HANDLE_TYPE_SHARED:
if (!bo->flink_name) {
@ -271,6 +421,8 @@ struct lima_bo *lima_bo_import(struct lima_screen *screen,
if (bo) {
p_atomic_inc(&bo->refcnt);
/* Don't cache imported BOs */
bo->cacheable = false;
mtx_unlock(&screen->bo_table_lock);
return bo;
}
@ -282,6 +434,10 @@ struct lima_bo *lima_bo_import(struct lima_screen *screen,
return NULL;
}
/* Don't cache imported BOs */
bo->cacheable = false;
list_inithead(&bo->time_list);
list_inithead(&bo->size_list);
bo->screen = screen;
p_atomic_set(&bo->refcnt, 1);

View File

@ -28,12 +28,18 @@
#include <stdint.h>
#include "util/u_atomic.h"
#include "util/list.h"
struct lima_bo {
struct lima_screen *screen;
struct list_head time_list;
struct list_head size_list;
int refcnt;
bool cacheable;
time_t free_time;
uint32_t size;
uint32_t flags;
uint32_t handle;
uint64_t offset;
uint32_t flink_name;
@ -44,10 +50,12 @@ struct lima_bo {
bool lima_bo_table_init(struct lima_screen *screen);
void lima_bo_table_fini(struct lima_screen *screen);
bool lima_bo_cache_init(struct lima_screen *screen);
void lima_bo_cache_fini(struct lima_screen *screen);
struct lima_bo *lima_bo_create(struct lima_screen *screen, uint32_t size,
uint32_t flags);
void lima_bo_free(struct lima_bo *bo);
void lima_bo_unreference(struct lima_bo *bo);
static inline void lima_bo_reference(struct lima_bo *bo)
{

View File

@ -138,13 +138,13 @@ lima_context_destroy(struct pipe_context *pctx)
for (int i = 0; i < LIMA_CTX_PLB_MAX_NUM; i++) {
if (ctx->plb[i])
lima_bo_free(ctx->plb[i]);
lima_bo_unreference(ctx->plb[i]);
if (ctx->gp_tile_heap[i])
lima_bo_free(ctx->gp_tile_heap[i]);
lima_bo_unreference(ctx->gp_tile_heap[i]);
}
if (ctx->plb_gp_stream)
lima_bo_free(ctx->plb_gp_stream);
lima_bo_unreference(ctx->plb_gp_stream);
if (ctx->plb_pp_stream)
assert(!_mesa_hash_table_num_entries(ctx->plb_pp_stream));

View File

@ -302,7 +302,7 @@ lima_delete_fs_state(struct pipe_context *pctx, void *hwcso)
struct lima_fs_shader_state *so = hwcso;
if (so->bo)
lima_bo_free(so->bo);
lima_bo_unreference(so->bo);
ralloc_free(so);
}
@ -396,7 +396,7 @@ lima_delete_vs_state(struct pipe_context *pctx, void *hwcso)
struct lima_vs_shader_state *so = hwcso;
if (so->bo)
lima_bo_free(so->bo);
lima_bo_unreference(so->bo);
ralloc_free(so);
}

View File

@ -259,7 +259,7 @@ lima_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *pres)
struct lima_resource *res = lima_resource(pres);
if (res->bo)
lima_bo_free(res->bo);
lima_bo_unreference(res->bo);
if (res->scanout)
renderonly_scanout_destroy(res->scanout, screen->ro);
@ -528,7 +528,7 @@ lima_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf)
struct lima_ctx_plb_pp_stream *s = entry->data;
if (--s->refcnt == 0) {
if (s->bo)
lima_bo_free(s->bo);
lima_bo_unreference(s->bo);
_mesa_hash_table_remove(ctx->plb_pp_stream, entry);
ralloc_free(s);
}

View File

@ -61,8 +61,9 @@ lima_screen_destroy(struct pipe_screen *pscreen)
free(screen->ro);
if (screen->pp_buffer)
lima_bo_free(screen->pp_buffer);
lima_bo_unreference(screen->pp_buffer);
lima_bo_cache_fini(screen);
lima_bo_table_fini(screen);
ralloc_free(screen);
}
@ -418,6 +419,8 @@ static const struct debug_named_value debug_options[] = {
"dump GPU command stream to $PWD/lima.dump" },
{ "shaderdb", LIMA_DEBUG_SHADERDB,
"print shader information for shaderdb" },
{ "nobocache", LIMA_DEBUG_NO_BO_CACHE,
"disable BO cache" },
{ NULL }
};
@ -478,16 +481,20 @@ lima_screen_create(int fd, struct renderonly *ro)
if (!lima_screen_query_info(screen))
goto err_out0;
if (!lima_bo_table_init(screen))
if (!lima_bo_cache_init(screen))
goto err_out0;
if (!lima_bo_table_init(screen))
goto err_out1;
screen->pp_ra = ppir_regalloc_init(screen);
if (!screen->pp_ra)
goto err_out1;
goto err_out2;
screen->pp_buffer = lima_bo_create(screen, pp_buffer_size, 0);
if (!screen->pp_buffer)
goto err_out1;
goto err_out2;
screen->pp_buffer->cacheable = false;
/* fs program for clear buffer?
* const0 1 0 0 -1.67773, mov.v0 $0 ^const0.xxxx, stop
@ -534,7 +541,7 @@ lima_screen_create(int fd, struct renderonly *ro)
screen->ro = renderonly_dup(ro);
if (!screen->ro) {
fprintf(stderr, "Failed to dup renderonly object\n");
goto err_out2;
goto err_out3;
}
}
@ -559,10 +566,12 @@ lima_screen_create(int fd, struct renderonly *ro)
return &screen->base;
err_out3:
lima_bo_unreference(screen->pp_buffer);
err_out2:
lima_bo_free(screen->pp_buffer);
err_out1:
lima_bo_table_fini(screen);
err_out1:
lima_bo_cache_fini(screen);
err_out0:
ralloc_free(screen);
return NULL;

View File

@ -37,6 +37,7 @@
#define LIMA_DEBUG_PP (1 << 1)
#define LIMA_DEBUG_DUMP (1 << 2)
#define LIMA_DEBUG_SHADERDB (1 << 3)
#define LIMA_DEBUG_NO_BO_CACHE (1 << 4)
extern uint32_t lima_debug;
extern FILE *lima_dump_command_stream;
@ -46,6 +47,11 @@ extern int lima_ppir_force_spilling;
struct ra_regs;
#define MIN_BO_CACHE_BUCKET (12) /* 2^12 = 4KB */
#define MAX_BO_CACHE_BUCKET (22) /* 2^22 = 4MB */
#define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1)
struct lima_screen {
struct pipe_screen base;
struct renderonly *ro;
@ -60,8 +66,11 @@ struct lima_screen {
/* bo table */
mtx_t bo_table_lock;
mtx_t bo_cache_lock;
struct util_hash_table *bo_handles;
struct util_hash_table *bo_flink_names;
struct list_head bo_cache_buckets[NR_BO_CACHE_BUCKETS];
struct list_head bo_cache_time;
struct slab_parent_pool transfer_pool;

View File

@ -145,7 +145,7 @@ bool lima_submit_start(struct lima_submit *submit, void *frame, uint32_t size)
bool ret = drmIoctl(submit->screen->fd, DRM_IOCTL_LIMA_GEM_SUBMIT, &req) == 0;
util_dynarray_foreach(&submit->bos, struct lima_bo *, bo) {
lima_bo_free(*bo);
lima_bo_unreference(*bo);
}
util_dynarray_clear(&submit->gem_bos);