darling-xnu/osfmk/ipc/ipc_entry.c
2023-05-16 21:41:14 -07:00

779 lines
20 KiB
C

/*
* Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
*/
/*
* Mach Operating System
* Copyright (c) 1991,1990,1989 Carnegie Mellon University
* All Rights Reserved.
*
* Permission to use, copy, modify and distribute this software and its
* documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie Mellon
* the rights to redistribute these changes.
*/
/*
*/
/*
* File: ipc/ipc_entry.c
* Author: Rich Draves
* Date: 1989
*
* Primitive functions to manipulate translation entries.
*/
#include <mach_debug.h>
#include <mach/kern_return.h>
#include <mach/port.h>
#include <kern/assert.h>
#include <kern/sched_prim.h>
#include <kern/zalloc.h>
#include <kern/misc_protos.h>
#include <ipc/port.h>
#include <ipc/ipc_entry.h>
#include <ipc/ipc_space.h>
#include <ipc/ipc_object.h>
#include <ipc/ipc_hash.h>
#include <ipc/ipc_table.h>
#include <ipc/ipc_port.h>
#include <string.h>
#include <sys/kdebug.h>
/*
* Routine: ipc_entry_lookup
* Purpose:
* Searches for an entry, given its name.
* Conditions:
* The space must be read or write locked throughout.
* The space must be active.
*/
ipc_entry_t
ipc_entry_lookup(
ipc_space_t space,
mach_port_name_t name)
{
mach_port_index_t index;
ipc_entry_t entry;
assert(is_active(space));
index = MACH_PORT_INDEX(name);
if (index < space->is_table_size) {
entry = &space->is_table[index];
if (IE_BITS_GEN(entry->ie_bits) != MACH_PORT_GEN(name) ||
IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE) {
entry = IE_NULL;
}
} else {
entry = IE_NULL;
}
assert((entry == IE_NULL) || IE_BITS_TYPE(entry->ie_bits));
return entry;
}
/*
* Routine: ipc_entries_hold
* Purpose:
* Verifies that there are at least 'entries_needed'
* free list members
* Conditions:
* The space is write-locked and active throughout.
* An object may be locked. Will not allocate memory.
* Returns:
* KERN_SUCCESS Free entries were found.
* KERN_NO_SPACE No entry allocated.
*/
kern_return_t
ipc_entries_hold(
ipc_space_t space,
uint32_t entries_needed)
{
ipc_entry_t table;
mach_port_index_t next_free = 0;
uint32_t i;
/*
* Assume that all new entries will need hashing.
* If the table is more than 87.5% full pretend we didn't have space.
*/
if (space->is_table_hashed + entries_needed >
space->is_table_size * 7 / 8) {
return KERN_NO_SPACE;
}
assert(is_active(space));
table = &space->is_table[0];
for (i = 0; i < entries_needed; i++) {
next_free = table[next_free].ie_next;
if (next_free == 0) {
return KERN_NO_SPACE;
}
assert(next_free < space->is_table_size);
assert(table[next_free].ie_object == IO_NULL);
}
return KERN_SUCCESS;
}
/*
* Routine: ipc_entry_claim
* Purpose:
* Take formal ownership of a held entry.
* Conditions:
* The space is write-locked and active throughout.
* An object may be locked. Will not allocate memory.
*
* Note: The returned entry must be marked as modified before
* releasing the space lock
*/
kern_return_t
ipc_entry_claim(
ipc_space_t space,
mach_port_name_t *namep,
ipc_entry_t *entryp)
{
ipc_entry_t entry;
ipc_entry_t table;
mach_port_index_t first_free;
mach_port_gen_t gen;
mach_port_name_t new_name;
table = &space->is_table[0];
first_free = table->ie_next;
assert(first_free != 0);
entry = &table[first_free];
table->ie_next = entry->ie_next;
space->is_table_free--;
assert(table->ie_next < space->is_table_size);
/*
* Initialize the new entry: increment gencount and reset
* rollover point if it rolled over, and clear ie_request.
*/
gen = ipc_entry_new_gen(entry->ie_bits);
if (__improbable(ipc_entry_gen_rolled(entry->ie_bits, gen))) {
ipc_entry_bits_t roll = ipc_space_get_rollpoint(space);
gen = ipc_entry_new_rollpoint(roll);
}
entry->ie_bits = gen;
entry->ie_request = IE_REQ_NONE;
/*
* The new name can't be MACH_PORT_NULL because index
* is non-zero. It can't be MACH_PORT_DEAD because
* the table isn't allowed to grow big enough.
* (See comment in ipc/ipc_table.h.)
*/
new_name = MACH_PORT_MAKE(first_free, gen);
assert(MACH_PORT_VALID(new_name));
*namep = new_name;
*entryp = entry;
return KERN_SUCCESS;
}
/*
* Routine: ipc_entry_alloc
* Purpose:
* Allocate an entry out of the space.
* Conditions:
* The space is not locked before, but it is write-locked after
* if the call is successful. May allocate memory.
* Returns:
* KERN_SUCCESS An entry was allocated.
* KERN_INVALID_TASK The space is dead.
* KERN_NO_SPACE No room for an entry in the space.
* KERN_RESOURCE_SHORTAGE Couldn't allocate memory for an entry.
*/
kern_return_t
ipc_entry_alloc(
ipc_space_t space,
mach_port_name_t *namep,
ipc_entry_t *entryp)
{
kern_return_t kr;
is_write_lock(space);
for (;;) {
if (!is_active(space)) {
is_write_unlock(space);
return KERN_INVALID_TASK;
}
kr = ipc_entries_hold(space, 1);
if (kr == KERN_SUCCESS) {
return ipc_entry_claim(space, namep, entryp);
}
kr = ipc_entry_grow_table(space, ITS_SIZE_NONE);
if (kr != KERN_SUCCESS) {
return kr; /* space is unlocked */
}
}
}
/*
* Routine: ipc_entry_alloc_name
* Purpose:
* Allocates/finds an entry with a specific name.
* If an existing entry is returned, its type will be nonzero.
* Conditions:
* The space is not locked before, but it is write-locked after
* if the call is successful. May allocate memory.
* Returns:
* KERN_SUCCESS Found existing entry with same name.
* KERN_SUCCESS Allocated a new entry.
* KERN_INVALID_TASK The space is dead.
* KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
* KERN_FAILURE Couldn't allocate requested name.
*/
kern_return_t
ipc_entry_alloc_name(
ipc_space_t space,
mach_port_name_t name,
ipc_entry_t *entryp)
{
mach_port_index_t index = MACH_PORT_INDEX(name);
mach_port_gen_t gen = MACH_PORT_GEN(name);
if (index > ipc_table_max_entries()) {
return KERN_NO_SPACE;
}
assert(MACH_PORT_VALID(name));
is_write_lock(space);
for (;;) {
ipc_entry_t entry;
if (!is_active(space)) {
is_write_unlock(space);
return KERN_INVALID_TASK;
}
/*
* If we are under the table cutoff,
* there are usually four cases:
* 1) The entry is reserved (index 0)
* 2) The entry is inuse, for the same name
* 3) The entry is inuse, for a different name
* 4) The entry is free
* For a task with a "fast" IPC space, we disallow
* cases 1) and 3), because ports cannot be renamed.
*/
if (index < space->is_table_size) {
ipc_entry_t table = space->is_table;
entry = &table[index];
if (index == 0) {
/* case #1 - the entry is reserved */
assert(!IE_BITS_TYPE(entry->ie_bits));
assert(!IE_BITS_GEN(entry->ie_bits));
is_write_unlock(space);
return KERN_FAILURE;
} else if (IE_BITS_TYPE(entry->ie_bits)) {
if (IE_BITS_GEN(entry->ie_bits) == gen) {
/* case #2 -- the entry is inuse, for the same name */
*entryp = entry;
return KERN_SUCCESS;
} else {
/* case #3 -- the entry is inuse, for a different name. */
/* Collisions are not allowed */
is_write_unlock(space);
return KERN_FAILURE;
}
} else {
mach_port_index_t free_index, next_index;
/*
* case #4 -- the entry is free
* Rip the entry out of the free list.
*/
for (free_index = 0;
(next_index = table[free_index].ie_next)
!= index;
free_index = next_index) {
continue;
}
table[free_index].ie_next =
table[next_index].ie_next;
space->is_table_free--;
/* mark the previous entry modified - reconstructing the name */
ipc_entry_modified(space,
MACH_PORT_MAKE(free_index,
IE_BITS_GEN(table[free_index].ie_bits)),
&table[free_index]);
entry->ie_bits = gen;
entry->ie_request = IE_REQ_NONE;
*entryp = entry;
assert(entry->ie_object == IO_NULL);
return KERN_SUCCESS;
}
}
/*
* We grow the table so that the name
* index fits in the array space.
* Because the space will be unlocked,
* we must restart.
*/
kern_return_t kr;
kr = ipc_entry_grow_table(space, index + 1);
if (kr != KERN_SUCCESS) {
/* space is unlocked */
return kr;
}
continue;
}
}
/*
* Routine: ipc_entry_dealloc
* Purpose:
* Deallocates an entry from a space.
* Conditions:
* The space must be write-locked throughout.
* The space must be active.
*/
void
ipc_entry_dealloc(
ipc_space_t space,
mach_port_name_t name,
ipc_entry_t entry)
{
ipc_entry_t table;
ipc_entry_num_t size;
mach_port_index_t index;
assert(is_active(space));
assert(entry->ie_object == IO_NULL);
assert(entry->ie_request == IE_REQ_NONE);
#if 1
if (entry->ie_request != IE_REQ_NONE) {
panic("ipc_entry_dealloc()\n");
}
#endif
index = MACH_PORT_INDEX(name);
table = space->is_table;
size = space->is_table_size;
if ((index < size) && (entry == &table[index])) {
assert(IE_BITS_GEN(entry->ie_bits) == MACH_PORT_GEN(name));
entry->ie_bits &= (IE_BITS_GEN_MASK | IE_BITS_ROLL_MASK);
entry->ie_next = table->ie_next;
table->ie_next = index;
space->is_table_free++;
} else {
/*
* Nothing to do. The entry does not match
* so there is nothing to deallocate.
*/
assert(index < size);
assert(entry == &table[index]);
assert(IE_BITS_GEN(entry->ie_bits) == MACH_PORT_GEN(name));
}
ipc_entry_modified(space, name, entry);
}
/*
* Routine: ipc_entry_modified
* Purpose:
* Note that an entry was modified in a space.
* Conditions:
* Assumes exclusive write access to the space,
* either through a write lock or being the cleaner
* on an inactive space.
*/
void
ipc_entry_modified(
ipc_space_t space,
mach_port_name_t name,
__assert_only ipc_entry_t entry)
{
ipc_entry_t table;
ipc_entry_num_t size;
mach_port_index_t index;
index = MACH_PORT_INDEX(name);
table = space->is_table;
size = space->is_table_size;
assert(index < size);
assert(entry == &table[index]);
assert(space->is_low_mod <= size);
assert(space->is_high_mod < size);
if (index < space->is_low_mod) {
space->is_low_mod = index;
}
if (index > space->is_high_mod) {
space->is_high_mod = index;
}
KERNEL_DEBUG_CONSTANT(
MACHDBG_CODE(DBG_MACH_IPC, MACH_IPC_PORT_ENTRY_MODIFY) | DBG_FUNC_NONE,
space->is_task ? task_pid(space->is_task) : 0,
name,
entry->ie_bits,
0,
0);
}
#define IPC_ENTRY_GROW_STATS 1
#if IPC_ENTRY_GROW_STATS
static uint64_t ipc_entry_grow_count = 0;
static uint64_t ipc_entry_grow_rescan = 0;
static uint64_t ipc_entry_grow_rescan_max = 0;
static uint64_t ipc_entry_grow_rescan_entries = 0;
static uint64_t ipc_entry_grow_rescan_entries_max = 0;
static uint64_t ipc_entry_grow_freelist_entries = 0;
static uint64_t ipc_entry_grow_freelist_entries_max = 0;
#endif
/*
* Routine: ipc_entry_grow_table
* Purpose:
* Grows the table in a space.
* Conditions:
* The space must be write-locked and active before.
* If successful, the space is also returned locked.
* On failure, the space is returned unlocked.
* Allocates memory.
* Returns:
* KERN_SUCCESS Grew the table.
* KERN_SUCCESS Somebody else grew the table.
* KERN_SUCCESS The space died.
* KERN_NO_SPACE Table has maximum size already.
* KERN_RESOURCE_SHORTAGE Couldn't allocate a new table.
*/
kern_return_t
ipc_entry_grow_table(
ipc_space_t space,
ipc_table_elems_t target_size)
{
ipc_entry_num_t osize, size, nsize, psize;
ipc_entry_t otable, table;
ipc_table_size_t oits, its, nits;
mach_port_index_t i, free_index;
mach_port_index_t low_mod, hi_mod;
ipc_table_index_t sanity;
#if IPC_ENTRY_GROW_STATS
uint64_t rescan_count = 0;
#endif
assert(is_active(space));
if (is_growing(space)) {
/*
* Somebody else is growing the table.
* We just wait for them to finish.
*/
is_write_sleep(space);
return KERN_SUCCESS;
}
otable = space->is_table;
its = space->is_table_next;
size = its->its_size;
/*
* Since is_table_next points to the next natural size
* we can identify the current size entry.
*/
oits = its - 1;
osize = oits->its_size;
/*
* If there is no target size, then the new size is simply
* specified by is_table_next. If there is a target
* size, then search for the next entry.
*/
if (target_size != ITS_SIZE_NONE) {
if (target_size <= osize) {
/* the space is locked */
return KERN_SUCCESS;
}
psize = osize;
while ((psize != size) && (target_size > size)) {
psize = size;
its++;
size = its->its_size;
}
if (psize == size) {
is_write_unlock(space);
return KERN_NO_SPACE;
}
}
if (osize == size) {
is_write_unlock(space);
return KERN_NO_SPACE;
}
nits = its + 1;
nsize = nits->its_size;
assert((osize < size) && (size <= nsize));
/*
* We'll attempt to grow the table.
*
* Because we will be copying without the space lock, reset
* the lowest_mod index to just beyond the end of the current
* table. Modification of entries (other than hashes) will
* bump this downward, and we only have to reprocess entries
* above that mark. Eventually, we'll get done.
*/
is_start_growing(space);
space->is_low_mod = osize;
space->is_high_mod = 0;
#if IPC_ENTRY_GROW_STATS
ipc_entry_grow_count++;
#endif
is_write_unlock(space);
table = it_entries_alloc(its);
if (table == IE_NULL) {
is_write_lock(space);
is_done_growing(space);
is_write_unlock(space);
thread_wakeup((event_t) space);
return KERN_RESOURCE_SHORTAGE;
}
ipc_space_rand_freelist(space, table, osize, size);
/* clear out old entries in new table */
memset((void *)table, 0, osize * sizeof(*table));
low_mod = 0;
hi_mod = osize - 1;
rescan:
/*
* Within the range of the table that changed, determine what we
* have to take action on. For each entry, take a snapshot of the
* corresponding entry in the old table (so it won't change
* during this iteration). The snapshot may not be self-consistent
* (if we caught it in the middle of being changed), so be very
* cautious with the values.
*/
for (i = low_mod; i <= hi_mod; i++) {
ipc_entry_t entry = &table[i];
struct ipc_entry osnap = otable[i];
if (entry->ie_object != osnap.ie_object ||
IE_BITS_TYPE(entry->ie_bits) != IE_BITS_TYPE(osnap.ie_bits)) {
if (entry->ie_object != IO_NULL &&
IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_SEND) {
ipc_hash_table_delete(table, size, entry->ie_object, i, entry);
}
entry->ie_object = osnap.ie_object;
entry->ie_bits = osnap.ie_bits;
entry->ie_request = osnap.ie_request; /* or ie_next */
if (entry->ie_object != IO_NULL &&
IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_SEND) {
ipc_hash_table_insert(table, size, entry->ie_object, i, entry);
}
} else {
assert(entry->ie_object == osnap.ie_object);
entry->ie_bits = osnap.ie_bits;
entry->ie_request = osnap.ie_request; /* or ie_next */
}
}
table[0].ie_next = otable[0].ie_next; /* always rebase the freelist */
/*
* find the end of the freelist (should be short). But be careful,
* the list items can change so only follow through truly free entries
* (no problem stopping short in those cases, because we'll rescan).
*/
free_index = 0;
for (sanity = 0; sanity < osize; sanity++) {
if (table[free_index].ie_object != IPC_OBJECT_NULL) {
break;
}
i = table[free_index].ie_next;
if (i == 0 || i >= osize) {
break;
}
free_index = i;
}
#if IPC_ENTRY_GROW_STATS
ipc_entry_grow_freelist_entries += sanity;
if (sanity > ipc_entry_grow_freelist_entries_max) {
ipc_entry_grow_freelist_entries_max = sanity;
}
#endif
is_write_lock(space);
/*
* We need to do a wakeup on the space,
* to rouse waiting threads. We defer
* this until the space is unlocked,
* because we don't want them to spin.
*/
if (!is_active(space)) {
/*
* The space died while it was unlocked.
*/
is_done_growing(space);
is_write_unlock(space);
thread_wakeup((event_t) space);
it_entries_free(its, table);
is_write_lock(space);
return KERN_SUCCESS;
}
/* If the space changed while unlocked, go back and process the changes */
if (space->is_low_mod < osize) {
assert(space->is_high_mod > 0);
low_mod = space->is_low_mod;
space->is_low_mod = osize;
hi_mod = space->is_high_mod;
space->is_high_mod = 0;
is_write_unlock(space);
#if IPC_ENTRY_GROW_STATS
rescan_count++;
if (rescan_count > ipc_entry_grow_rescan_max) {
ipc_entry_grow_rescan_max = rescan_count;
}
ipc_entry_grow_rescan++;
ipc_entry_grow_rescan_entries += hi_mod - low_mod + 1;
if (hi_mod - low_mod + 1 > ipc_entry_grow_rescan_entries_max) {
ipc_entry_grow_rescan_entries_max = hi_mod - low_mod + 1;
}
#endif
goto rescan;
}
/* link new free entries onto the rest of the freelist */
assert(table[free_index].ie_next == 0 &&
table[free_index].ie_object == IO_NULL);
table[free_index].ie_next = osize;
assert(space->is_table == otable);
assert((space->is_table_next == its) ||
(target_size != ITS_SIZE_NONE));
assert(space->is_table_size == osize);
space->is_table = table;
space->is_table_size = size;
space->is_table_next = nits;
space->is_table_free += size - osize;
is_done_growing(space);
is_write_unlock(space);
thread_wakeup((event_t) space);
/*
* Now we need to free the old table.
*/
it_entries_free(oits, otable);
is_write_lock(space);
return KERN_SUCCESS;
}
/*
* Routine: ipc_entry_name_mask
* Purpose:
* Ensure a mach port name has the default ipc entry
* generation bits set. This can be used to ensure that
* a name passed in by user space matches names generated
* by the kernel.
* Conditions:
* None.
* Returns:
* 'name' input with default generation bits masked or added
* as appropriate.
*/
mach_port_name_t
ipc_entry_name_mask(mach_port_name_t name)
{
#ifndef NO_PORT_GEN
static mach_port_name_t null_name = MACH_PORT_MAKE(0, IE_BITS_GEN_MASK + IE_BITS_GEN_ONE);
return name | null_name;
#else
static mach_port_name_t null_name = MACH_PORT_MAKE(0, ~(IE_BITS_GEN_MASK + IE_BITS_GEN_ONE));
return name & ~null_name;
#endif
}