From de165b287446733c41efdb455eca895dccb3d674 Mon Sep 17 00:00:00 2001 From: Thomas A Date: Thu, 13 Jan 2022 20:14:23 -0800 Subject: [PATCH] Add Back `dthread` Folder --- dthread/README.md | 4 + dthread/include/kern/kern_internal.h | 1 + dthread/include/kern/kern_trace.h | 1 + dthread/include/kern/synch_internal.h | 1 + dthread/include/kern/workqueue_internal.h | 1 + dthread/include/private/qos_private.h | 225 ++ dthread/include/pthread/qos.h | 304 ++ .../include/sys/_pthread/_pthread_attr_t.h | 32 + dthread/include/sys/_pthread/_pthread_t.h | 32 + dthread/include/sys/_pthread/_pthread_types.h | 120 + dthread/include/sys/qos.h | 200 ++ dthread/include/sys/qos_private.h | 40 + dthread/kern_init.c | 65 + dthread/kern_internal.h | 227 ++ dthread/kern_support.c | 1020 ++++++ dthread/kern_synch.c | 2835 +++++++++++++++++ dthread/kern_trace.h | 164 + dthread/pthread_kext.c | 201 ++ dthread/pthread_kext.h | 7 + dthread/synch_internal.h | 173 + dthread/workqueue_internal.h | 54 + 21 files changed, 5707 insertions(+) create mode 100644 dthread/README.md create mode 120000 dthread/include/kern/kern_internal.h create mode 120000 dthread/include/kern/kern_trace.h create mode 120000 dthread/include/kern/synch_internal.h create mode 120000 dthread/include/kern/workqueue_internal.h create mode 100644 dthread/include/private/qos_private.h create mode 100644 dthread/include/pthread/qos.h create mode 100644 dthread/include/sys/_pthread/_pthread_attr_t.h create mode 100644 dthread/include/sys/_pthread/_pthread_t.h create mode 100644 dthread/include/sys/_pthread/_pthread_types.h create mode 100644 dthread/include/sys/qos.h create mode 100644 dthread/include/sys/qos_private.h create mode 100644 dthread/kern_init.c create mode 100644 dthread/kern_internal.h create mode 100644 dthread/kern_support.c create mode 100644 dthread/kern_synch.c create mode 100644 dthread/kern_trace.h create mode 100644 dthread/pthread_kext.c create mode 100644 dthread/pthread_kext.h create mode 100644 dthread/synch_internal.h create mode 100644 dthread/workqueue_internal.h diff --git a/dthread/README.md b/dthread/README.md new file mode 100644 index 0000000..2c216a8 --- /dev/null +++ b/dthread/README.md @@ -0,0 +1,4 @@ +# dthread +This directory is home to Darling's port of Apple's pthread kext. The original source is from libpthread-416.60.2 + +The goal here is to have Apple's pthread code work with our LKM with as few modifications as possible. diff --git a/dthread/include/kern/kern_internal.h b/dthread/include/kern/kern_internal.h new file mode 120000 index 0000000..f42288c --- /dev/null +++ b/dthread/include/kern/kern_internal.h @@ -0,0 +1 @@ +../../kern_internal.h \ No newline at end of file diff --git a/dthread/include/kern/kern_trace.h b/dthread/include/kern/kern_trace.h new file mode 120000 index 0000000..79d0d08 --- /dev/null +++ b/dthread/include/kern/kern_trace.h @@ -0,0 +1 @@ +../../kern_trace.h \ No newline at end of file diff --git a/dthread/include/kern/synch_internal.h b/dthread/include/kern/synch_internal.h new file mode 120000 index 0000000..eb6820e --- /dev/null +++ b/dthread/include/kern/synch_internal.h @@ -0,0 +1 @@ +../../synch_internal.h \ No newline at end of file diff --git a/dthread/include/kern/workqueue_internal.h b/dthread/include/kern/workqueue_internal.h new file mode 120000 index 0000000..3333ce8 --- /dev/null +++ b/dthread/include/kern/workqueue_internal.h @@ -0,0 +1 @@ +../../workqueue_internal.h \ No newline at end of file diff --git a/dthread/include/private/qos_private.h b/dthread/include/private/qos_private.h new file mode 100644 index 0000000..0a456ea --- /dev/null +++ b/dthread/include/private/qos_private.h @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2013-2014 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _QOS_PRIVATE_H +#define _QOS_PRIVATE_H + +#include +#include +#include /* qos_class_t */ +#include + +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL +// allow __DARWIN_C_LEVEL to turn off the use of mach_port_t +#include +#endif + +// redeffed here to avoid leaving __QOS_ENUM defined in the public header +#define __QOS_ENUM(name, type, ...) enum { __VA_ARGS__ }; typedef type name##_t +#define __QOS_AVAILABLE_10_10 +#define __QOS_AVAILABLE_10_11 +#define __QOS_AVAILABLE_10_12 +#ifdef __DARLING__ +// i'm pretty sure Apple should be defining this too +#define __QOS_AVAILABLE_10_15_1 +#endif + +#if defined(__has_feature) && defined(__has_extension) +#if __has_feature(objc_fixed_enum) || __has_extension(cxx_strong_enums) +#undef __QOS_ENUM +#define __QOS_ENUM(name, type, ...) typedef enum : type { __VA_ARGS__ } name##_t +#endif +#if __has_feature(enumerator_attributes) +#undef __QOS_AVAILABLE_10_10 +#define __QOS_AVAILABLE_10_10 __API_AVAILABLE(macos(10.10), ios(8.0)) +#undef __QOS_AVAILABLE_10_11 +#define __QOS_AVAILABLE_10_11 __API_AVAILABLE(macos(10.11), ios(9.0), tvos(9.0), watchos(2.0)) +#undef __QOS_AVAILABLE_10_12 +#define __QOS_AVAILABLE_10_12 __API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0)) +#undef __QOS_AVAILABLE_10_15_1 +#define __QOS_AVAILABLE_10_15_1 __API_AVAILABLE(macos(10.15.1), ios(13.2), tvos(13.2), watchos(6.2)) +#endif +#endif + +// This enum matches workq_set_self_flags in +// xnu's workqueue_internal.h. +__QOS_ENUM(_pthread_set_flags, unsigned int, + _PTHREAD_SET_SELF_QOS_FLAG __QOS_AVAILABLE_10_10 = 0x1, + _PTHREAD_SET_SELF_VOUCHER_FLAG __QOS_AVAILABLE_10_10 = 0x2, + _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG __QOS_AVAILABLE_10_11 = 0x4, + _PTHREAD_SET_SELF_TIMESHARE_FLAG __QOS_AVAILABLE_10_11 = 0x8, + _PTHREAD_SET_SELF_WQ_KEVENT_UNBIND __QOS_AVAILABLE_10_12 = 0x10, + _PTHREAD_SET_SELF_ALTERNATE_AMX __QOS_AVAILABLE_10_15_1 = 0x20, +); + +#undef __QOS_ENUM +#undef __QOS_AVAILABLE_10_10 +#undef __QOS_AVAILABLE_10_11 +#undef __QOS_AVAILABLE_10_12 + +#ifndef KERNEL + +__BEGIN_DECLS + +/*! + * @function pthread_set_qos_class_np + * + * @abstract + * Sets the requested QOS class and relative priority of the current thread. + * + * @discussion + * The QOS class and relative priority represent an overall combination of + * system quality of service attributes on a thread. + * + * Subsequent calls to interfaces such as pthread_setschedparam() that are + * incompatible or in conflict with the QOS class system will unset the QOS + * class requested with this interface and pthread_get_qos_class_np() will + * return QOS_CLASS_UNSPECIFIED thereafter. A thread so modified is permanently + * opted-out of the QOS class system and calls to this function to request a QOS + * class for such a thread will fail and return EPERM. + * + * @param __pthread + * The current thread as returned by pthread_self(). + * EINVAL will be returned if any other thread is provided. + * + * @param __qos_class + * A QOS class value: + * - QOS_CLASS_USER_INTERACTIVE + * - QOS_CLASS_USER_INITIATED + * - QOS_CLASS_DEFAULT + * - QOS_CLASS_UTILITY + * - QOS_CLASS_BACKGROUND + * - QOS_CLASS_MAINTENANCE + * EINVAL will be returned if any other value is provided. + * + * @param __relative_priority + * A relative priority within the QOS class. This value is a negative offset + * from the maximum supported scheduler priority for the given class. + * EINVAL will be returned if the value is greater than zero or less than + * QOS_MIN_RELATIVE_PRIORITY. + * + * @return + * Zero if successful, othwerise an errno value. + */ +__API_DEPRECATED_WITH_REPLACEMENT("pthread_set_qos_class_self_np", macos(10.10, 10.10), ios(8.0, 8.0)) +int +pthread_set_qos_class_np(pthread_t __pthread, + qos_class_t __qos_class, + int __relative_priority); + +/* Private interfaces for libdispatch to encode/decode specific values of pthread_priority_t. */ + +// Encode a class+priority pair into a pthread_priority_t, +__API_AVAILABLE(macos(10.10), ios(8.0)) +pthread_priority_t +_pthread_qos_class_encode(qos_class_t qos_class, int relative_priority, unsigned long flags); + +// Decode a pthread_priority_t into a class+priority pair. +__API_AVAILABLE(macos(10.10), ios(8.0)) +qos_class_t +_pthread_qos_class_decode(pthread_priority_t priority, int *relative_priority, unsigned long *flags); + +// Encode a legacy workqueue API priority into a pthread_priority_t. This API +// is deprecated and can be removed when the simulator no longer uses it. +__API_DEPRECATED("no longer used", macos(10.10, 10.13), ios(8.0, 11.0)) +pthread_priority_t +_pthread_qos_class_encode_workqueue(int queue_priority, unsigned long flags); + +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL + +// Set QoS or voucher, or both, on pthread_self() +__API_AVAILABLE(macos(10.10), ios(8.0)) +int +_pthread_set_properties_self(_pthread_set_flags_t flags, pthread_priority_t priority, mach_port_t voucher); + +// Set self to fixed priority without disturbing QoS or priority +__API_AVAILABLE(macos(10.10), ios(8.0)) +int +pthread_set_fixedpriority_self(void); + +// Inverse of pthread_set_fixedpriority_self() +__API_AVAILABLE(macos(10.10), ios(8.0)) +int +pthread_set_timeshare_self(void); + +// Set self to avoid running on the same AMX as +// other work in this group. +// Only allowed on non-workqueue pthreads +__API_AVAILABLE(macos(10.15.1), ios(13.2), tvos(13.2), watchos(6.2)) +int +pthread_prefer_alternate_amx_self(void); + +/*! + * @const PTHREAD_MAX_PARALLELISM_PHYSICAL + * Flag that can be used with pthread_qos_max_parallelism() and + * pthread_time_constraint_max_parallelism() to ask for a count of physical + * compute units available for parallelism (default is logical). + */ +#define PTHREAD_MAX_PARALLELISM_PHYSICAL 0x1 + +/*! + * @function pthread_qos_max_parallelism + * + * @abstract + * Returns the number of compute units available for parallel computation at + * a specified QoS class. + * + * @param qos + * The specified QoS class. + * + * @param flags + * 0 or PTHREAD_MAX_PARALLELISM_PHYSICAL. + * + * @return + * The number of compute units available for parallel computation for the + * specified QoS, or -1 on failure (with errno set accordingly). + */ +__API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0)) +int +pthread_qos_max_parallelism(qos_class_t qos, unsigned long flags); + +/*! + * @function pthread_time_constraint_max_parallelism() + * + * @abstract + * Returns the number of compute units available for parallel computation on + * realtime threads. + * + * @param flags + * 0 or PTHREAD_MAX_PARALLELISM_PHYSICAL. + * + * @return + * The number of compute units available for parallel computation on realtime + * threads, or -1 on failure (with errno set accordingly). + */ +__API_AVAILABLE(macos(10.13), ios(11.0), tvos(11.0), watchos(4.0)) +int +pthread_time_constraint_max_parallelism(unsigned long flags); + +#endif // __DARWIN_C_LEVEL >= __DARWIN_C_FULL + +__END_DECLS + +#endif // KERNEL + +#endif //_QOS_PRIVATE_H diff --git a/dthread/include/pthread/qos.h b/dthread/include/pthread/qos.h new file mode 100644 index 0000000..9c1bfd8 --- /dev/null +++ b/dthread/include/pthread/qos.h @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2013-2014 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _PTHREAD_QOS_H +#define _PTHREAD_QOS_H + +#include +#include /* pthread_attr_t */ +#include /* pthread_t */ +#include + +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL + +#include + +#ifndef KERNEL + +#if __has_feature(assume_nonnull) +_Pragma("clang assume_nonnull begin") +#endif +__BEGIN_DECLS + +/*! + * @function pthread_attr_set_qos_class_np + * + * @abstract + * Sets the QOS class and relative priority of a pthread attribute structure + * which may be used to specify the requested QOS class of newly created + * threads. + * + * @discussion + * The QOS class and relative priority represent an overall combination of + * system quality of service attributes on a thread. + * + * Subsequent calls to interfaces such as pthread_attr_setschedparam() that are + * incompatible or in conflict with the QOS class system will unset the QOS + * class requested with this interface and pthread_attr_get_qos_class_np() will + * return QOS_CLASS_UNSPECIFIED. + * + * @param __attr + * The pthread attribute structure to modify. + * + * @param __qos_class + * A QOS class value: + * - QOS_CLASS_USER_INTERACTIVE + * - QOS_CLASS_USER_INITIATED + * - QOS_CLASS_DEFAULT + * - QOS_CLASS_UTILITY + * - QOS_CLASS_BACKGROUND + * EINVAL will be returned if any other value is provided. + * + * @param __relative_priority + * A relative priority within the QOS class. This value is a negative offset + * from the maximum supported scheduler priority for the given class. + * EINVAL will be returned if the value is greater than zero or less than + * QOS_MIN_RELATIVE_PRIORITY. + * + * @return + * Zero if successful, otherwise an errno value. + */ +__API_AVAILABLE(macos(10.10), ios(8.0)) +int +pthread_attr_set_qos_class_np(pthread_attr_t *__attr, + qos_class_t __qos_class, int __relative_priority); + +/*! + * @function pthread_attr_get_qos_class_np + * + * @abstract + * Gets the QOS class and relative priority of a pthread attribute structure. + * + * @param __attr + * The pthread attribute structure to inspect. + * + * @param __qos_class + * On output, a QOS class value: + * - QOS_CLASS_USER_INTERACTIVE + * - QOS_CLASS_USER_INITIATED + * - QOS_CLASS_DEFAULT + * - QOS_CLASS_UTILITY + * - QOS_CLASS_BACKGROUND + * - QOS_CLASS_UNSPECIFIED + * This value may be NULL in which case no value is returned. + * + * @param __relative_priority + * On output, a relative priority offset within the QOS class. + * This value may be NULL in which case no value is returned. + * + * @return + * Zero if successful, otherwise an errno value. + */ +__API_AVAILABLE(macos(10.10), ios(8.0)) +int +pthread_attr_get_qos_class_np(pthread_attr_t * __restrict __attr, + qos_class_t * _Nullable __restrict __qos_class, + int * _Nullable __restrict __relative_priority); + +/*! + * @function pthread_set_qos_class_self_np + * + * @abstract + * Sets the requested QOS class and relative priority of the current thread. + * + * @discussion + * The QOS class and relative priority represent an overall combination of + * system quality of service attributes on a thread. + * + * Subsequent calls to interfaces such as pthread_setschedparam() that are + * incompatible or in conflict with the QOS class system will unset the QOS + * class requested with this interface and pthread_get_qos_class_np() will + * return QOS_CLASS_UNSPECIFIED thereafter. A thread so modified is permanently + * opted-out of the QOS class system and calls to this function to request a QOS + * class for such a thread will fail and return EPERM. + * + * @param __qos_class + * A QOS class value: + * - QOS_CLASS_USER_INTERACTIVE + * - QOS_CLASS_USER_INITIATED + * - QOS_CLASS_DEFAULT + * - QOS_CLASS_UTILITY + * - QOS_CLASS_BACKGROUND + * EINVAL will be returned if any other value is provided. + * + * @param __relative_priority + * A relative priority within the QOS class. This value is a negative offset + * from the maximum supported scheduler priority for the given class. + * EINVAL will be returned if the value is greater than zero or less than + * QOS_MIN_RELATIVE_PRIORITY. + * + * @return + * Zero if successful, otherwise an errno value. + */ +__API_AVAILABLE(macos(10.10), ios(8.0)) +int +pthread_set_qos_class_self_np(qos_class_t __qos_class, + int __relative_priority); + +/*! + * @function pthread_get_qos_class_np + * + * @abstract + * Gets the requested QOS class and relative priority of a thread. + * + * @param __pthread + * The target thread to inspect. + * + * @param __qos_class + * On output, a QOS class value: + * - QOS_CLASS_USER_INTERACTIVE + * - QOS_CLASS_USER_INITIATED + * - QOS_CLASS_DEFAULT + * - QOS_CLASS_UTILITY + * - QOS_CLASS_BACKGROUND + * - QOS_CLASS_UNSPECIFIED + * This value may be NULL in which case no value is returned. + * + * @param __relative_priority + * On output, a relative priority offset within the QOS class. + * This value may be NULL in which case no value is returned. + * + * @return + * Zero if successful, otherwise an errno value. + */ +__API_AVAILABLE(macos(10.10), ios(8.0)) +int +pthread_get_qos_class_np(pthread_t __pthread, + qos_class_t * _Nullable __restrict __qos_class, + int * _Nullable __restrict __relative_priority); + +/*! + * @typedef pthread_override_t + * + * @abstract + * An opaque object representing a QOS class override of a thread. + * + * @discussion + * A QOS class override of a target thread expresses that an item of pending + * work classified with a specific QOS class and relative priority depends on + * the completion of the work currently being executed by the thread (e.g. due + * to ordering requirements). + * + * While overrides are in effect, the target thread will execute at the maximum + * QOS class and relative priority of all overrides and of the QOS class + * requested by the thread itself. + * + * A QOS class override does not modify the target thread's requested QOS class + * value and the effect of an override is not visible to the qos_class_self() + * and pthread_get_qos_class_np() interfaces. + */ + +typedef struct pthread_override_s* pthread_override_t; + +/*! + * @function pthread_override_qos_class_start_np + * + * @abstract + * Starts a QOS class override of the specified target thread. + * + * @discussion + * Starting a QOS class override of the specified target thread expresses that + * an item of pending work classified with the specified QOS class and relative + * priority depends on the completion of the work currently being executed by + * the thread (e.g. due to ordering requirements). + * + * While overrides are in effect, the specified target thread will execute at + * the maximum QOS class and relative priority of all overrides and of the QOS + * class requested by the thread itself. + * + * Starting a QOS class override does not modify the target thread's requested + * QOS class value and the effect of an override is not visible to the + * qos_class_self() and pthread_get_qos_class_np() interfaces. + * + * The returned newly allocated override object is intended to be associated + * with the item of pending work in question. Once the dependency has been + * satisfied and enabled that work to begin executing, the QOS class override + * must be ended by passing the associated override object to + * pthread_override_qos_class_end_np(). Failure to do so will result in the + * associated resources to be leaked and the target thread to be permanently + * executed at an inappropriately elevated QOS class. + * + * @param __pthread + * The target thread to modify. + * + * @param __qos_class + * A QOS class value: + * - QOS_CLASS_USER_INTERACTIVE + * - QOS_CLASS_USER_INITIATED + * - QOS_CLASS_DEFAULT + * - QOS_CLASS_UTILITY + * - QOS_CLASS_BACKGROUND + * NULL will be returned if any other value is provided. + * + * @param __relative_priority + * A relative priority within the QOS class. This value is a negative offset + * from the maximum supported scheduler priority for the given class. + * NULL will be returned if the value is greater than zero or less than + * QOS_MIN_RELATIVE_PRIORITY. + * + * @return + * A newly allocated override object if successful, or NULL if the override + * could not be started. + */ +__API_AVAILABLE(macos(10.10), ios(8.0)) +pthread_override_t +pthread_override_qos_class_start_np(pthread_t __pthread, + qos_class_t __qos_class, int __relative_priority); + +/*! + * @function pthread_override_qos_class_end_np + * + * @abstract + * Ends a QOS class override. + * + * @discussion + * Passing an override object returned by pthread_override_qos_class_start_np() + * ends the QOS class override started by that call and deallocates all + * associated resources as well as the override object itself. + * + * The thread starting and the thread ending a QOS class override need not be + * identical. If the thread ending the override is the the target thread of the + * override itself, it should take care to elevate its requested QOS class + * appropriately with pthread_set_qos_class_self_np() before ending the + * override. + * + * @param __override + * An override object returned by pthread_override_qos_class_start_np(). + * + * @return + * Zero if successful, otherwise an errno value. + */ +__API_AVAILABLE(macos(10.10), ios(8.0)) +int +pthread_override_qos_class_end_np(pthread_override_t __override); + +__END_DECLS +#if __has_feature(assume_nonnull) +_Pragma("clang assume_nonnull end") +#endif + +#endif // KERNEL + +#endif // __DARWIN_C_LEVEL >= __DARWIN_C_FULL + +#endif // _PTHREAD_QOS_H diff --git a/dthread/include/sys/_pthread/_pthread_attr_t.h b/dthread/include/sys/_pthread/_pthread_attr_t.h new file mode 100644 index 0000000..cba5882 --- /dev/null +++ b/dthread/include/sys/_pthread/_pthread_attr_t.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _PTHREAD_ATTR_T +#define _PTHREAD_ATTR_T +#include /* __darwin_pthread_attr_t */ +typedef __darwin_pthread_attr_t pthread_attr_t; +#endif /* _PTHREAD_ATTR_T */ diff --git a/dthread/include/sys/_pthread/_pthread_t.h b/dthread/include/sys/_pthread/_pthread_t.h new file mode 100644 index 0000000..4d9e3da --- /dev/null +++ b/dthread/include/sys/_pthread/_pthread_t.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _PTHREAD_T +#define _PTHREAD_T +#include /* __darwin_pthread_t */ +typedef __darwin_pthread_t pthread_t; +#endif /* _PTHREAD_T */ diff --git a/dthread/include/sys/_pthread/_pthread_types.h b/dthread/include/sys/_pthread/_pthread_types.h new file mode 100644 index 0000000..d9d51b8 --- /dev/null +++ b/dthread/include/sys/_pthread/_pthread_types.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2003-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS__PTHREAD_TYPES_H_ +#define _SYS__PTHREAD_TYPES_H_ + +#include + +// pthread opaque structures +#if defined(__LP64__) +#define __PTHREAD_SIZE__ 8176 +#define __PTHREAD_ATTR_SIZE__ 56 +#define __PTHREAD_MUTEXATTR_SIZE__ 8 +#define __PTHREAD_MUTEX_SIZE__ 56 +#define __PTHREAD_CONDATTR_SIZE__ 8 +#define __PTHREAD_COND_SIZE__ 40 +#define __PTHREAD_ONCE_SIZE__ 8 +#define __PTHREAD_RWLOCK_SIZE__ 192 +#define __PTHREAD_RWLOCKATTR_SIZE__ 16 +#else // !__LP64__ +#define __PTHREAD_SIZE__ 4088 +#define __PTHREAD_ATTR_SIZE__ 36 +#define __PTHREAD_MUTEXATTR_SIZE__ 8 +#define __PTHREAD_MUTEX_SIZE__ 40 +#define __PTHREAD_CONDATTR_SIZE__ 4 +#define __PTHREAD_COND_SIZE__ 24 +#define __PTHREAD_ONCE_SIZE__ 4 +#define __PTHREAD_RWLOCK_SIZE__ 124 +#define __PTHREAD_RWLOCKATTR_SIZE__ 12 +#endif // !__LP64__ + +struct __darwin_pthread_handler_rec { + void (*__routine)(void *); // Routine to call + void *__arg; // Argument to pass + struct __darwin_pthread_handler_rec *__next; +}; + +struct _opaque_pthread_attr_t { + long __sig; + char __opaque[__PTHREAD_ATTR_SIZE__]; +}; + +struct _opaque_pthread_cond_t { + long __sig; + char __opaque[__PTHREAD_COND_SIZE__]; +}; + +struct _opaque_pthread_condattr_t { + long __sig; + char __opaque[__PTHREAD_CONDATTR_SIZE__]; +}; + +struct _opaque_pthread_mutex_t { + long __sig; + char __opaque[__PTHREAD_MUTEX_SIZE__]; +}; + +struct _opaque_pthread_mutexattr_t { + long __sig; + char __opaque[__PTHREAD_MUTEXATTR_SIZE__]; +}; + +struct _opaque_pthread_once_t { + long __sig; + char __opaque[__PTHREAD_ONCE_SIZE__]; +}; + +struct _opaque_pthread_rwlock_t { + long __sig; + char __opaque[__PTHREAD_RWLOCK_SIZE__]; +}; + +struct _opaque_pthread_rwlockattr_t { + long __sig; + char __opaque[__PTHREAD_RWLOCKATTR_SIZE__]; +}; + +struct _opaque_pthread_t { + long __sig; + struct __darwin_pthread_handler_rec *__cleanup_stack; + char __opaque[__PTHREAD_SIZE__]; +}; + +typedef struct _opaque_pthread_attr_t __darwin_pthread_attr_t; +typedef struct _opaque_pthread_cond_t __darwin_pthread_cond_t; +typedef struct _opaque_pthread_condattr_t __darwin_pthread_condattr_t; +typedef unsigned long __darwin_pthread_key_t; +typedef struct _opaque_pthread_mutex_t __darwin_pthread_mutex_t; +typedef struct _opaque_pthread_mutexattr_t __darwin_pthread_mutexattr_t; +typedef struct _opaque_pthread_once_t __darwin_pthread_once_t; +typedef struct _opaque_pthread_rwlock_t __darwin_pthread_rwlock_t; +typedef struct _opaque_pthread_rwlockattr_t __darwin_pthread_rwlockattr_t; +typedef struct _opaque_pthread_t *__darwin_pthread_t; + +#endif // _SYS__PTHREAD_TYPES_H_ diff --git a/dthread/include/sys/qos.h b/dthread/include/sys/qos.h new file mode 100644 index 0000000..2aa7dcd --- /dev/null +++ b/dthread/include/sys/qos.h @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2013-2014 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_QOS_H +#define _SYS_QOS_H + +#include +#include + +/*! + * @typedef qos_class_t + * + * @abstract + * An abstract thread quality of service (QOS) classification. + * + * @discussion + * Thread quality of service (QOS) classes are ordered abstract representations + * of the nature of work that is expected to be performed by a pthread, dispatch + * queue, or NSOperation. Each class specifies a maximum thread scheduling + * priority for that band (which may be used in combination with a relative + * priority offset within the band), as well as quality of service + * characteristics for timer latency, CPU throughput, I/O throughput, network + * socket traffic management behavior and more. + * + * A best effort is made to allocate available system resources to every QOS + * class. Quality of service degredation only occurs during system resource + * contention, proportionally to the QOS class. That said, QOS classes + * representing user-initiated work attempt to achieve peak throughput while + * QOS classes for other work attempt to achieve peak energy and thermal + * efficiency, even in the absence of contention. Finally, the use of QOS + * classes does not allow threads to supersede any limits that may be applied + * to the overall process. + */ + +/*! + * @constant QOS_CLASS_USER_INTERACTIVE + * @abstract A QOS class which indicates work performed by this thread + * is interactive with the user. + * @discussion Such work is requested to run at high priority relative to other + * work on the system. Specifying this QOS class is a request to run with + * nearly all available system CPU and I/O bandwidth even under contention. + * This is not an energy-efficient QOS class to use for large tasks. The use of + * this QOS class should be limited to critical interaction with the user such + * as handling events on the main event loop, view drawing, animation, etc. + * + * @constant QOS_CLASS_USER_INITIATED + * @abstract A QOS class which indicates work performed by this thread + * was initiated by the user and that the user is likely waiting for the + * results. + * @discussion Such work is requested to run at a priority below critical user- + * interactive work, but relatively higher than other work on the system. This + * is not an energy-efficient QOS class to use for large tasks. Its use + * should be limited to operations of short enough duration that the user is + * unlikely to switch tasks while waiting for the results. Typical + * user-initiated work will have progress indicated by the display of + * placeholder content or modal user interface. + * + * @constant QOS_CLASS_DEFAULT + * @abstract A default QOS class used by the system in cases where more specific + * QOS class information is not available. + * @discussion Such work is requested to run at a priority below critical user- + * interactive and user-initiated work, but relatively higher than utility and + * background tasks. Threads created by pthread_create() without an attribute + * specifying a QOS class will default to QOS_CLASS_DEFAULT. This QOS class + * value is not intended to be used as a work classification, it should only be + * set when propagating or restoring QOS class values provided by the system. + * + * @constant QOS_CLASS_UTILITY + * @abstract A QOS class which indicates work performed by this thread + * may or may not be initiated by the user and that the user is unlikely to be + * immediately waiting for the results. + * @discussion Such work is requested to run at a priority below critical user- + * interactive and user-initiated work, but relatively higher than low-level + * system maintenance tasks. The use of this QOS class indicates the work + * should be run in an energy and thermally-efficient manner. The progress of + * utility work may or may not be indicated to the user, but the effect of such + * work is user-visible. + * + * @constant QOS_CLASS_BACKGROUND + * @abstract A QOS class which indicates work performed by this thread was not + * initiated by the user and that the user may be unaware of the results. + * @discussion Such work is requested to run at a priority below other work. + * The use of this QOS class indicates the work should be run in the most energy + * and thermally-efficient manner. + * + * @constant QOS_CLASS_UNSPECIFIED + * @abstract A QOS class value which indicates the absence or removal of QOS + * class information. + * @discussion As an API return value, may indicate that threads or pthread + * attributes were configured with legacy API incompatible or in conflict with + * the QOS class system. + */ + +#define __QOS_ENUM(name, type, ...) enum { __VA_ARGS__ }; typedef type name##_t +#define __QOS_CLASS_AVAILABLE(...) + +#if defined(__cplusplus) || defined(__OBJC__) || __LP64__ +#if defined(__has_feature) && defined(__has_extension) +#if __has_feature(objc_fixed_enum) || __has_extension(cxx_strong_enums) +#undef __QOS_ENUM +#define __QOS_ENUM(name, type, ...) typedef enum : type { __VA_ARGS__ } name##_t +#endif +#endif +#if __has_feature(enumerator_attributes) +#undef __QOS_CLASS_AVAILABLE +#define __QOS_CLASS_AVAILABLE __API_AVAILABLE +#endif +#endif + +__QOS_ENUM(qos_class, unsigned int, + QOS_CLASS_USER_INTERACTIVE + __QOS_CLASS_AVAILABLE(macos(10.10), ios(8.0)) = 0x21, + QOS_CLASS_USER_INITIATED + __QOS_CLASS_AVAILABLE(macos(10.10), ios(8.0)) = 0x19, + QOS_CLASS_DEFAULT + __QOS_CLASS_AVAILABLE(macos(10.10), ios(8.0)) = 0x15, + QOS_CLASS_UTILITY + __QOS_CLASS_AVAILABLE(macos(10.10), ios(8.0)) = 0x11, + QOS_CLASS_BACKGROUND + __QOS_CLASS_AVAILABLE(macos(10.10), ios(8.0)) = 0x09, + QOS_CLASS_UNSPECIFIED + __QOS_CLASS_AVAILABLE(macos(10.10), ios(8.0)) = 0x00, +); + +#undef __QOS_ENUM + +/*! + * @constant QOS_MIN_RELATIVE_PRIORITY + * @abstract The minimum relative priority that may be specified within a + * QOS class. These priorities are relative only within a given QOS class + * and meaningful only for the current process. + */ +#define QOS_MIN_RELATIVE_PRIORITY (-15) + +/* Userspace (only) definitions */ + +#ifndef KERNEL + +__BEGIN_DECLS + +/*! + * @function qos_class_self + * + * @abstract + * Returns the requested QOS class of the current thread. + * + * @return + * One of the QOS class values in qos_class_t. + */ +__API_AVAILABLE(macos(10.10), ios(8.0)) +qos_class_t +qos_class_self(void); + +/*! + * @function qos_class_main + * + * @abstract + * Returns the initial requested QOS class of the main thread. + * + * @discussion + * The QOS class that the main thread of a process is created with depends on + * the type of process (e.g. application or daemon) and on how it has been + * launched. + * + * This function returns that initial requested QOS class value chosen by the + * system to enable propagation of that classification to matching work not + * executing on the main thread. + * + * @return + * One of the QOS class values in qos_class_t. + */ +__API_AVAILABLE(macos(10.10), ios(8.0)) +qos_class_t +qos_class_main(void); + +__END_DECLS + +#endif // KERNEL + +#endif // _SYS_QOS_H diff --git a/dthread/include/sys/qos_private.h b/dthread/include/sys/qos_private.h new file mode 100644 index 0000000..b968f87 --- /dev/null +++ b/dthread/include/sys/qos_private.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2014 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _QOS_SYS_PRIVATE_H +#define _QOS_SYS_PRIVATE_H + +/*! + * @constant QOS_CLASS_MAINTENANCE + * @abstract A QOS class which indicates work performed by this thread was not + * initiated by the user and that the user may be unaware of the results. + * @discussion Such work is requested to run at a priority far below other work + * including significant I/O throttling. The use of this QOS class indicates + * the work should be run in the most energy and thermally-efficient manner + * possible, and may be deferred for a long time in order to preserve + * system responsiveness for the user. + * This is SPI for use by Spotlight and Time Machine only. + */ +#define QOS_CLASS_MAINTENANCE ((qos_class_t)0x05) + +#endif //_QOS_SYS_PRIVATE_H diff --git a/dthread/kern_init.c b/dthread/kern_init.c new file mode 100644 index 0000000..3321483 --- /dev/null +++ b/dthread/kern_init.c @@ -0,0 +1,65 @@ +// +// pthread.c +// pthread +// +// Created by Matt Wright on 9/13/12. +// Copyright (c) 2012 Matt Wright. All rights reserved. +// + +#include +#include +#include "kern_internal.h" + +kern_return_t pthread_start(kmod_info_t * ki, void *d); +kern_return_t pthread_stop(kmod_info_t *ki, void *d); + +pthread_callbacks_t pthread_kern; + +const struct pthread_functions_s pthread_internal_functions = { + .pthread_init = _pthread_init, + .pth_proc_hashinit = _pth_proc_hashinit, + .pth_proc_hashdelete = _pth_proc_hashdelete, + .bsdthread_create = _bsdthread_create, + .bsdthread_register = _bsdthread_register, + .bsdthread_terminate = _bsdthread_terminate, + .thread_selfid = _thread_selfid, + + .psynch_mutexwait = _psynch_mutexwait, + .psynch_mutexdrop = _psynch_mutexdrop, + .psynch_cvbroad = _psynch_cvbroad, + .psynch_cvsignal = _psynch_cvsignal, + .psynch_cvwait = _psynch_cvwait, + .psynch_cvclrprepost = _psynch_cvclrprepost, + .psynch_rw_longrdlock = _psynch_rw_longrdlock, + .psynch_rw_rdlock = _psynch_rw_rdlock, + .psynch_rw_unlock = _psynch_rw_unlock, + .psynch_rw_wrlock = _psynch_rw_wrlock, + .psynch_rw_yieldwrlock = _psynch_rw_yieldwrlock, + + .pthread_find_owner = _pthread_find_owner, + .pthread_get_thread_kwq = _pthread_get_thread_kwq, + + .workq_create_threadstack = workq_create_threadstack, + .workq_destroy_threadstack = workq_destroy_threadstack, + .workq_setup_thread = workq_setup_thread, + .workq_handle_stack_events = workq_handle_stack_events, + .workq_markfree_threadstack = workq_markfree_threadstack, +}; + +kern_return_t pthread_start(__unused kmod_info_t * ki, __unused void *d) +{ + pthread_kext_register((pthread_functions_t)&pthread_internal_functions, &pthread_kern); + return KERN_SUCCESS; +} + +kern_return_t pthread_stop(__unused kmod_info_t *ki, __unused void *d) +{ + return KERN_FAILURE; +} + +struct uthread* +current_uthread(void) +{ + thread_t th = current_thread(); + return pthread_kern->get_bsdthread_info(th); +} diff --git a/dthread/kern_internal.h b/dthread/kern_internal.h new file mode 100644 index 0000000..a6b4214 --- /dev/null +++ b/dthread/kern_internal.h @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +// header name modified for Darling +#ifndef _DTHREAD_KERN_INTERNAL_H +#define _DTHREAD_KERN_INTERNAL_H + +#include +#include +#include + +#ifdef KERNEL +struct ksyn_waitq_element; +#include +#include +#include +#ifdef __DARLING__ +#include +#else +#include +#endif +#include +#include + +#ifdef __arm64__ +#define PTHREAD_INLINE_RMW_ATOMICS 0 +#else +#define PTHREAD_INLINE_RMW_ATOMICS 1 +#endif +#endif // KERNEL + +#include "kern/synch_internal.h" +#include "kern/workqueue_internal.h" +#include "kern/kern_trace.h" +#include "pthread/qos.h" +#include "private/qos_private.h" + +/* pthread userspace SPI feature checking, these constants are returned from bsdthread_register, + * as a bitmask, to inform userspace of the supported feature set. Old releases of OS X return + * from this call either zero or -1, allowing us to return a positive number for feature bits. + */ +#define PTHREAD_FEATURE_DISPATCHFUNC 0x01 /* same as WQOPS_QUEUE_NEWSPISUPP, checks for dispatch function support */ +#define PTHREAD_FEATURE_FINEPRIO 0x02 /* are fine grained prioirities available */ +#define PTHREAD_FEATURE_BSDTHREADCTL 0x04 /* is the bsdthread_ctl syscall available */ +#define PTHREAD_FEATURE_SETSELF 0x08 /* is the BSDTHREAD_CTL_SET_SELF command of bsdthread_ctl available */ +#define PTHREAD_FEATURE_QOS_MAINTENANCE 0x10 /* is QOS_CLASS_MAINTENANCE available */ +#define PTHREAD_FEATURE_RESERVED 0x20 /* burnt, shipped in OSX 10.11 & iOS 9 with partial kevent delivery support */ +#define PTHREAD_FEATURE_KEVENT 0x40 /* supports direct kevent delivery */ +#define PTHREAD_FEATURE_WORKLOOP 0x80 /* supports workloops */ +#define PTHREAD_FEATURE_QOS_DEFAULT 0x40000000 /* the kernel supports QOS_CLASS_DEFAULT */ + +/* userspace <-> kernel registration struct, for passing data to/from the kext during main thread init. */ +struct _pthread_registration_data { + /* + * version == sizeof(struct _pthread_registration_data) + * + * The structure can only grow, so we use its size as the version. + * Userspace initializes this to the size of its structure and the kext + * will copy out the version that was actually consumed. + * + * n.b. you must make sure the size of this structure isn't LP64-dependent + */ + uint64_t version; + + uint64_t dispatch_queue_offset; /* copy-in */ + uint64_t /* pthread_priority_t */ main_qos; /* copy-out */ + uint32_t tsd_offset; /* copy-in */ + uint32_t return_to_kernel_offset; /* copy-in */ + uint32_t mach_thread_self_offset; /* copy-in */ + mach_vm_address_t stack_addr_hint; /* copy-out */ + uint32_t mutex_default_policy; /* copy-out */ +} __attribute__ ((packed)); + +/* + * "error" flags returned by fail condvar syscalls + */ +#define ECVCLEARED 0x100 +#define ECVPREPOST 0x200 + +#ifdef KERNEL + +/* The set of features, from the feature bits above, that we support. */ +#define PTHREAD_FEATURE_SUPPORTED ( \ + PTHREAD_FEATURE_DISPATCHFUNC | \ + PTHREAD_FEATURE_FINEPRIO | \ + PTHREAD_FEATURE_BSDTHREADCTL | \ + PTHREAD_FEATURE_SETSELF | \ + PTHREAD_FEATURE_QOS_MAINTENANCE | \ + PTHREAD_FEATURE_QOS_DEFAULT | \ + PTHREAD_FEATURE_KEVENT | \ + PTHREAD_FEATURE_WORKLOOP ) + +#ifdef __DARLING__ +// since we had to move the `pthread_shims.h` include down, we need to declare this differently +struct pthread_callbacks_s; +extern const struct pthread_callbacks_s* pthread_kern; +#else +extern pthread_callbacks_t pthread_kern; +#endif + +struct ksyn_waitq_element { + TAILQ_ENTRY(ksyn_waitq_element) kwe_list; /* link to other list members */ + void * kwe_kwqqueue; /* queue blocked on */ + thread_t kwe_thread; + uint16_t kwe_state; /* state */ + uint16_t kwe_flags; + uint32_t kwe_lockseq; /* the sequence of the entry */ + uint32_t kwe_count; /* upper bound on number of matches still pending */ + uint32_t kwe_psynchretval; /* thread retval */ + void *kwe_uth; /* uthread */ +}; +typedef struct ksyn_waitq_element * ksyn_waitq_element_t; + +#define PTH_DEFAULT_STACKSIZE 512*1024 +#define MAX_PTHREAD_SIZE 64*1024 + +/* exported from the kernel but not present in any headers. */ +extern thread_t port_name_to_thread(mach_port_name_t port_name); + +/* function declarations for pthread_kext.c */ +void pthread_init(void); +void psynch_zoneinit(void); +void _pth_proc_hashinit(proc_t p); +void _pth_proc_hashdelete(proc_t p); +void pth_global_hashinit(void); +void psynch_wq_cleanup(void*, void*); + +void _pthread_init(void); +int _fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo); +uint32_t _get_pwq_state_kdp(proc_t p); +void _workqueue_exit(struct proc *p); +void _workqueue_mark_exiting(struct proc *p); +void _workqueue_thread_yielded(void); +sched_call_t _workqueue_get_sched_callback(void); + +int _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t *retval); +int _bsdthread_register(struct proc *p, user_addr_t threadstart, user_addr_t wqthread, int pthsize, user_addr_t dummy_value, user_addr_t targetconc_ptr, uint64_t dispatchqueue_offset, int32_t *retval); +int _bsdthread_terminate(struct proc *p, user_addr_t stackaddr, size_t size, uint32_t kthport, uint32_t sem, int32_t *retval); +int _bsdthread_ctl_set_qos(struct proc *p, user_addr_t cmd, mach_port_name_t kport, user_addr_t tsd_priority_addr, user_addr_t arg3, int *retval); +int _bsdthread_ctl_set_self(struct proc *p, user_addr_t cmd, pthread_priority_t priority, mach_port_name_t voucher, _pthread_set_flags_t flags, int *retval); +int _bsdthread_ctl_qos_override_start(struct proc *p, user_addr_t cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int *retval); +int _bsdthread_ctl_qos_override_end(struct proc *p, user_addr_t cmd, mach_port_name_t kport, user_addr_t resource, user_addr_t arg3, int *retval); +int _bsdthread_ctl_qos_override_dispatch(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t arg3, int __unused *retval); +int _bsdthread_ctl_qos_override_reset(struct proc __unused *p, user_addr_t __unused cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int __unused *retval); +int _bsdthread_ctl_qos_dispatch_asynchronous_override_add(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval); +int _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(struct proc __unused *p, user_addr_t __unused cmd, int reset_all, user_addr_t resource, user_addr_t arg3, int __unused *retval); +int _bsdthread_ctl(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval); +int _thread_selfid(__unused struct proc *p, uint64_t *retval); +int _workq_kernreturn(struct proc *p, int options, user_addr_t item, int arg2, int arg3, int32_t *retval); +int _workq_open(struct proc *p, int32_t *retval); + +int _psynch_mutexwait(proc_t p, user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint64_t tid, uint32_t flags, uint32_t * retval); +int _psynch_mutexdrop(proc_t p, user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint64_t tid, uint32_t flags, uint32_t * retval); +int _psynch_cvbroad(proc_t p, user_addr_t cv, uint64_t cvlsgen, uint64_t cvudgen, uint32_t flags, user_addr_t mutex, uint64_t mugen, uint64_t tid, uint32_t *retval); +int _psynch_cvsignal(proc_t p, user_addr_t cv, uint64_t cvlsgen, uint32_t cvugen, int thread_port, user_addr_t mutex, uint64_t mugen, uint64_t tid, uint32_t flags, uint32_t * retval); +int _psynch_cvwait(proc_t p, user_addr_t cv, uint64_t cvlsgen, uint32_t cvugen, user_addr_t mutex, uint64_t mugen, uint32_t flags, int64_t sec, uint32_t nsec, uint32_t * retval); +int _psynch_cvclrprepost(proc_t p, user_addr_t cv, uint32_t cvgen, uint32_t cvugen, uint32_t cvsgen, uint32_t prepocnt, uint32_t preposeq, uint32_t flags, int *retval); +int _psynch_rw_longrdlock(proc_t p, user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t * retval); +int _psynch_rw_rdlock(proc_t p, user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval); +int _psynch_rw_unlock(proc_t p, user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval); +int _psynch_rw_wrlock(proc_t p, user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval); +int _psynch_rw_yieldwrlock(proc_t p, user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval); + +void _pthread_find_owner(thread_t thread, struct stackshot_thread_waitinfo *waitinfo); +void * _pthread_get_thread_kwq(thread_t thread); + +extern lck_grp_attr_t *pthread_lck_grp_attr; +extern lck_grp_t *pthread_lck_grp; +extern lck_attr_t *pthread_lck_attr; +extern lck_mtx_t *pthread_list_mlock; +extern thread_call_t psynch_thcall; + +struct uthread* current_uthread(void); + +int +workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr); + +int +workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr); + +void +workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr, + mach_port_name_t kport, int th_qos, int setup_flags, int upcall_flags); + +int +workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map, + user_addr_t stackaddr, mach_port_name_t kport, + user_addr_t events, int nevents, int upcall_flags); + +void +workq_markfree_threadstack(proc_t p, thread_t th, vm_map_t vmap, + user_addr_t stackaddr); + +#ifdef __DARLING__ +// move it down here because it needs `struct ksyn_waitq_element` to be defined +#include +#endif + +#endif // KERNEL + +#endif // _DTHREAD_KERN_INTERNAL_H + diff --git a/dthread/kern_support.c b/dthread/kern_support.c new file mode 100644 index 0000000..0e576c2 --- /dev/null +++ b/dthread/kern_support.c @@ -0,0 +1,1020 @@ +/* + * Copyright (c) 2000-2017 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */ +/* + * pthread_synch.c + */ + +#pragma mark - Front Matter + +#define _PTHREAD_CONDATTR_T +#define _PTHREAD_COND_T +#define _PTHREAD_MUTEXATTR_T +#define _PTHREAD_MUTEX_T +#define _PTHREAD_RWLOCKATTR_T +#define _PTHREAD_RWLOCK_T + +#undef pthread_mutexattr_t +#undef pthread_mutex_t +#undef pthread_condattr_t +#undef pthread_cond_t +#undef pthread_rwlockattr_t +#undef pthread_rwlock_t + +#include +#include + +// panic() should be marked noreturn +extern void panic(const char *string, ...) __printflike(1,2) __dead2; + +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include /* for coredump */ +#include /* for fill_procworkqueue */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for thread_exception_return */ +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include /* for thread_resume */ +#include +#include + +#include +#include + +#include "kern_internal.h" + +#ifndef WQ_SETUP_EXIT_THREAD +#define WQ_SETUP_EXIT_THREAD 8 +#endif + +// XXX: Ditto for thread tags from kern/thread.h +#define THREAD_TAG_MAINTHREAD 0x1 +#define THREAD_TAG_PTHREAD 0x10 +#define THREAD_TAG_WORKQUEUE 0x20 + +lck_grp_attr_t *pthread_lck_grp_attr; +lck_grp_t *pthread_lck_grp; +lck_attr_t *pthread_lck_attr; + +#define C_32_STK_ALIGN 16 +#define C_64_STK_ALIGN 16 + +// WORKQ use the largest alignment any platform needs +#define C_WORKQ_STK_ALIGN 16 + +#if defined(__arm64__) +/* Pull the pthread_t into the same page as the top of the stack so we dirty one less page. + * The _pthread struct at the top of the stack shouldn't be page-aligned + */ +#define PTHREAD_T_OFFSET (12*1024) +#else +#define PTHREAD_T_OFFSET 0 +#endif + +/* + * Flags filed passed to bsdthread_create and back in pthread_start +31 <---------------------------------> 0 +_________________________________________ +| flags(8) | policy(8) | importance(16) | +----------------------------------------- +*/ + +#define PTHREAD_START_CUSTOM 0x01000000 // +#define PTHREAD_START_SETSCHED 0x02000000 +// was PTHREAD_START_DETACHED 0x04000000 +#define PTHREAD_START_QOSCLASS 0x08000000 +#define PTHREAD_START_TSD_BASE_SET 0x10000000 +#define PTHREAD_START_SUSPENDED 0x20000000 +#define PTHREAD_START_QOSCLASS_MASK 0x00ffffff +#define PTHREAD_START_POLICY_BITSHIFT 16 +#define PTHREAD_START_POLICY_MASK 0xff +#define PTHREAD_START_IMPORTANCE_MASK 0xffff + +#define SCHED_OTHER POLICY_TIMESHARE +#define SCHED_FIFO POLICY_FIFO +#define SCHED_RR POLICY_RR + +#define BASEPRI_DEFAULT 31 + +uint32_t pthread_debug_tracing = 1; + +static uint32_t pthread_mutex_default_policy; + +SYSCTL_INT(_kern, OID_AUTO, pthread_mutex_default_policy, CTLFLAG_RW | CTLFLAG_LOCKED, + &pthread_mutex_default_policy, 0, ""); + +#pragma mark - Process/Thread Setup/Teardown syscalls + +static mach_vm_offset_t +stack_addr_hint(proc_t p, vm_map_t vmap) +{ + mach_vm_offset_t stackaddr; + mach_vm_offset_t aslr_offset; + bool proc64bit = proc_is64bit(p); + bool proc64bit_data = proc_is64bit_data(p); + + // We can't safely take random values % something unless its a power-of-two + _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE), "PTH_DEFAULT_STACKSIZE is a power-of-two"); + +#if defined(__i386__) || defined(__x86_64__) + (void)proc64bit_data; + if (proc64bit) { + // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu + aslr_offset = random() % (1 << 28); // about 512 stacks + } else { + // Actually bigger than the image shift, we've got ~256MB to work with + aslr_offset = random() % (16 * PTH_DEFAULT_STACKSIZE); + } + aslr_offset = vm_map_trunc_page_mask(aslr_offset, vm_map_page_mask(vmap)); + if (proc64bit) { + // Above nanomalloc range (see NANOZONE_SIGNATURE) + stackaddr = 0x700000000000 + aslr_offset; + } else { + stackaddr = SHARED_REGION_BASE_I386 + SHARED_REGION_SIZE_I386 + aslr_offset; + } +#elif defined(__arm__) || defined(__arm64__) + user_addr_t main_thread_stack_top = 0; + if (pthread_kern->proc_get_user_stack) { + main_thread_stack_top = pthread_kern->proc_get_user_stack(p); + } + if (proc64bit && main_thread_stack_top) { + // The main thread stack position is randomly slid by xnu (c.f. + // load_main() in mach_loader.c), so basing pthread stack allocations + // where the main thread stack ends is already ASLRd and doing so + // avoids creating a gap in the process address space that may cause + // extra PTE memory usage. rdar://problem/33328206 + stackaddr = vm_map_trunc_page_mask((vm_map_offset_t)main_thread_stack_top, + vm_map_page_mask(vmap)); + } else { + // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better + aslr_offset = random() % ((proc64bit ? 4 : 2) * PTH_DEFAULT_STACKSIZE); + aslr_offset = vm_map_trunc_page_mask((vm_map_offset_t)aslr_offset, + vm_map_page_mask(vmap)); + if (proc64bit) { + // 64 stacks below shared region + stackaddr = SHARED_REGION_BASE_ARM64 - 64 * PTH_DEFAULT_STACKSIZE - aslr_offset; + } else { + // If you try to slide down from this point, you risk ending up in memory consumed by malloc + if (proc64bit_data) { + stackaddr = SHARED_REGION_BASE_ARM64_32; + } else { + stackaddr = SHARED_REGION_BASE_ARM; + } + + stackaddr -= 32 * PTH_DEFAULT_STACKSIZE + aslr_offset; + } + } +#else +#error Need to define a stack address hint for this architecture +#endif + return stackaddr; +} + +static bool +_pthread_priority_to_policy(pthread_priority_t priority, + thread_qos_policy_data_t *data) +{ + data->qos_tier = _pthread_priority_thread_qos(priority); + data->tier_importance = _pthread_priority_relpri(priority); + if (data->qos_tier == THREAD_QOS_UNSPECIFIED || data->tier_importance > 0 || + data->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) { + return false; + } + return true; +} + +/** + * bsdthread_create system call. Used by pthread_create. + */ +int +_bsdthread_create(struct proc *p, + __unused user_addr_t user_func, __unused user_addr_t user_funcarg, + user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, + user_addr_t *retval) +{ + kern_return_t kret; + void * sright; + int error = 0; + mach_vm_offset_t th_tsd_base; + mach_port_name_t th_thport; + thread_t th; + task_t ctask = current_task(); + unsigned int policy, importance; + uint32_t tsd_offset; + bool start_suspended = (flags & PTHREAD_START_SUSPENDED); + + if (pthread_kern->proc_get_register(p) == 0) { + return EINVAL; + } + + PTHREAD_TRACE(pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0); + + kret = pthread_kern->thread_create(ctask, &th); + if (kret != KERN_SUCCESS) + return(ENOMEM); + thread_reference(th); + + pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD); + + sright = (void *)pthread_kern->convert_thread_to_port(th); + th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(ctask)); + if (!MACH_PORT_VALID(th_thport)) { + error = EMFILE; // userland will convert this into a crash + goto out; + } + + if ((flags & PTHREAD_START_CUSTOM) == 0) { + error = EINVAL; + goto out; + } + + PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3); + + tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p); + if (tsd_offset) { + th_tsd_base = user_pthread + tsd_offset; + kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base); + if (kret == KERN_SUCCESS) { + flags |= PTHREAD_START_TSD_BASE_SET; + } + } + /* + * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel + * supports this flag (after the fact). + */ + flags &= ~PTHREAD_START_SUSPENDED; + + /* + * Set up registers & function call. + */ +#if defined(__i386__) || defined(__x86_64__) + if (proc_is64bit_data(p)) { + x86_thread_state64_t state = { + .rip = (uint64_t)pthread_kern->proc_get_threadstart(p), + .rdi = (uint64_t)user_pthread, + .rsi = (uint64_t)th_thport, + .rdx = (uint64_t)user_func, /* golang wants this */ + .rcx = (uint64_t)user_funcarg, /* golang wants this */ + .r8 = (uint64_t)user_stack, /* golang wants this */ + .r9 = (uint64_t)flags, + + .rsp = (uint64_t)user_stack, + }; + + (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state); + } else { + x86_thread_state32_t state = { + .eip = (uint32_t)pthread_kern->proc_get_threadstart(p), + .eax = (uint32_t)user_pthread, + .ebx = (uint32_t)th_thport, + .ecx = (uint32_t)user_func, /* golang wants this */ + .edx = (uint32_t)user_funcarg, /* golang wants this */ + .edi = (uint32_t)user_stack, /* golang wants this */ + .esi = (uint32_t)flags, + + .esp = (uint32_t)user_stack, + }; + + (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); + } +#elif defined(__arm__) || defined(__arm64__) + if (proc_is64bit_data(p)) { +#ifdef __arm64__ + arm_thread_state64_t state = { + .pc = (uint64_t)pthread_kern->proc_get_threadstart(p), + .x[0] = (uint64_t)user_pthread, + .x[1] = (uint64_t)th_thport, + .x[2] = (uint64_t)user_func, /* golang wants this */ + .x[3] = (uint64_t)user_funcarg, /* golang wants this */ + .x[4] = (uint64_t)user_stack, /* golang wants this */ + .x[5] = (uint64_t)flags, + + .sp = (uint64_t)user_stack, + }; + + (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state); +#else + panic("Shouldn't have a 64-bit thread on a 32-bit kernel..."); +#endif // defined(__arm64__) + } else { + arm_thread_state_t state = { + .pc = (uint32_t)pthread_kern->proc_get_threadstart(p), + .r[0] = (uint32_t)user_pthread, + .r[1] = (uint32_t)th_thport, + .r[2] = (uint32_t)user_func, /* golang wants this */ + .r[3] = (uint32_t)user_funcarg, /* golang wants this */ + .r[4] = (uint32_t)user_stack, /* golang wants this */ + .r[5] = (uint32_t)flags, + + .sp = (uint32_t)user_stack, + }; + + (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); + } +#else +#error bsdthread_create not defined for this architecture +#endif + + if (flags & PTHREAD_START_SETSCHED) { + /* Set scheduling parameters if needed */ + thread_extended_policy_data_t extinfo; + thread_precedence_policy_data_t precedinfo; + + importance = (flags & PTHREAD_START_IMPORTANCE_MASK); + policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK; + + if (policy == SCHED_OTHER) { + extinfo.timeshare = 1; + } else { + extinfo.timeshare = 0; + } + + thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT); + + precedinfo.importance = (importance - BASEPRI_DEFAULT); + thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT); + } else if (flags & PTHREAD_START_QOSCLASS) { + /* Set thread QoS class if requested. */ + thread_qos_policy_data_t qos; + + if (!_pthread_priority_to_policy(flags & PTHREAD_START_QOSCLASS_MASK, &qos)) { + error = EINVAL; + goto out; + } + pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, + (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT); + } + + if (pthread_kern->proc_get_mach_thread_self_tsd_offset) { + uint64_t mach_thread_self_offset = + pthread_kern->proc_get_mach_thread_self_tsd_offset(p); + if (mach_thread_self_offset && tsd_offset) { + bool proc64bit = proc_is64bit(p); + if (proc64bit) { + uint64_t th_thport_tsd = (uint64_t)th_thport; + error = copyout(&th_thport_tsd, user_pthread + tsd_offset + + mach_thread_self_offset, sizeof(th_thport_tsd)); + } else { + uint32_t th_thport_tsd = (uint32_t)th_thport; + error = copyout(&th_thport_tsd, user_pthread + tsd_offset + + mach_thread_self_offset, sizeof(th_thport_tsd)); + } + if (error) { + goto out; + } + } + } + + if (!start_suspended) { + kret = pthread_kern->thread_resume(th); + if (kret != KERN_SUCCESS) { + error = EINVAL; + goto out; + } + } + thread_deallocate(th); /* drop the creator reference */ + + PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_END, error, user_pthread, 0, 0); + + *retval = user_pthread; + return(0); + +out: + (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport); + if (pthread_kern->thread_will_park_or_terminate) { + pthread_kern->thread_will_park_or_terminate(th); + } + (void)thread_terminate(th); + (void)thread_deallocate(th); + return(error); +} + +/** + * bsdthread_terminate system call. Used by pthread_terminate + */ +int +_bsdthread_terminate(__unused struct proc *p, + user_addr_t stackaddr, + size_t size, + uint32_t kthport, + uint32_t sem, + __unused int32_t *retval) +{ + mach_vm_offset_t freeaddr; + mach_vm_size_t freesize; + kern_return_t kret; + thread_t th = current_thread(); + + freeaddr = (mach_vm_offset_t)stackaddr; + freesize = size; + + PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff); + + if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) { + if (pthread_kern->thread_get_tag(th) & THREAD_TAG_MAINTHREAD){ + vm_map_t user_map = pthread_kern->current_map(); + freesize = vm_map_trunc_page_mask((vm_map_offset_t)freesize - 1, vm_map_page_mask(user_map)); + kret = mach_vm_behavior_set(user_map, freeaddr, freesize, VM_BEHAVIOR_REUSABLE); +#if MACH_ASSERT + if (kret != KERN_SUCCESS && kret != KERN_INVALID_ADDRESS) { + os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kret); + } +#endif + kret = kret ? kret : mach_vm_protect(user_map, freeaddr, freesize, FALSE, VM_PROT_NONE); + assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS); + } else { + kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize); + if (kret != KERN_SUCCESS) { + PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0); + } + } + } + + if (pthread_kern->thread_will_park_or_terminate) { + pthread_kern->thread_will_park_or_terminate(th); + } + (void)thread_terminate(th); + if (sem != MACH_PORT_NULL) { + kret = pthread_kern->semaphore_signal_internal_trap(sem); + if (kret != KERN_SUCCESS) { + PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0); + } + } + + if (kthport != MACH_PORT_NULL) { + pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport); + } + + PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0); + + pthread_kern->thread_exception_return(); + __builtin_unreachable(); +} + +/** + * bsdthread_register system call. Performs per-process setup. Responsible for + * returning capabilitiy bits to userspace and receiving userspace function addresses. + */ +int +_bsdthread_register(struct proc *p, + user_addr_t threadstart, + user_addr_t wqthread, + int pthsize, + user_addr_t pthread_init_data, + user_addr_t pthread_init_data_size, + uint64_t dispatchqueue_offset, + int32_t *retval) +{ + struct _pthread_registration_data data = {}; + uint32_t max_tsd_offset; + kern_return_t kr; + size_t pthread_init_sz = 0; + + /* syscall randomizer test can pass bogus values */ + if (pthsize < 0 || pthsize > MAX_PTHREAD_SIZE) { + return(EINVAL); + } + /* + * if we have pthread_init_data, then we use that and target_concptr + * (which is an offset) get data. + */ + if (pthread_init_data != 0) { + if (pthread_init_data_size < sizeof(data.version)) { + return EINVAL; + } + pthread_init_sz = MIN(sizeof(data), (size_t)pthread_init_data_size); + int ret = copyin(pthread_init_data, &data, pthread_init_sz); + if (ret) { + return ret; + } + if (data.version != (size_t)pthread_init_data_size) { + return EINVAL; + } + } else { + data.dispatch_queue_offset = dispatchqueue_offset; + } + + /* We have to do this before proc_get_register so that it resets after fork */ + mach_vm_offset_t stackaddr = stack_addr_hint(p, pthread_kern->current_map()); + pthread_kern->proc_set_stack_addr_hint(p, (user_addr_t)stackaddr); + + /* prevent multiple registrations */ + if (pthread_kern->proc_get_register(p) != 0) { + return(EINVAL); + } + + pthread_kern->proc_set_threadstart(p, threadstart); + pthread_kern->proc_set_wqthread(p, wqthread); + pthread_kern->proc_set_pthsize(p, pthsize); + pthread_kern->proc_set_register(p); + + uint32_t tsd_slot_sz = proc_is64bit(p) ? sizeof(uint64_t) : sizeof(uint32_t); + if ((uint32_t)pthsize >= tsd_slot_sz && + data.tsd_offset <= (uint32_t)(pthsize - tsd_slot_sz)) { + max_tsd_offset = ((uint32_t)pthsize - data.tsd_offset - tsd_slot_sz); + } else { + data.tsd_offset = 0; + max_tsd_offset = 0; + } + pthread_kern->proc_set_pthread_tsd_offset(p, data.tsd_offset); + + if (data.dispatch_queue_offset > max_tsd_offset) { + data.dispatch_queue_offset = 0; + } + pthread_kern->proc_set_dispatchqueue_offset(p, data.dispatch_queue_offset); + + if (pthread_kern->proc_set_return_to_kernel_offset) { + if (data.return_to_kernel_offset > max_tsd_offset) { + data.return_to_kernel_offset = 0; + } + pthread_kern->proc_set_return_to_kernel_offset(p, + data.return_to_kernel_offset); + } + + if (pthread_kern->proc_set_mach_thread_self_tsd_offset) { + if (data.mach_thread_self_offset > max_tsd_offset) { + data.mach_thread_self_offset = 0; + } + pthread_kern->proc_set_mach_thread_self_tsd_offset(p, + data.mach_thread_self_offset); + } + + if (pthread_init_data != 0) { + /* Outgoing data that userspace expects as a reply */ + data.version = sizeof(struct _pthread_registration_data); + data.main_qos = _pthread_unspecified_priority(); + + if (pthread_kern->qos_main_thread_active()) { + mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT; + thread_qos_policy_data_t qos; + boolean_t gd = FALSE; + + kr = pthread_kern->thread_policy_get(current_thread(), + THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd); + if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) { + /* + * Unspecified threads means the kernel wants us + * to impose legacy upon the thread. + */ + qos.qos_tier = THREAD_QOS_LEGACY; + qos.tier_importance = 0; + + kr = pthread_kern->thread_policy_set_internal(current_thread(), + THREAD_QOS_POLICY, (thread_policy_t)&qos, + THREAD_QOS_POLICY_COUNT); + } + + if (kr == KERN_SUCCESS) { + data.main_qos = _pthread_priority_make_from_thread_qos( + qos.qos_tier, 0, 0); + } + } + + data.stack_addr_hint = stackaddr; + data.mutex_default_policy = pthread_mutex_default_policy; + + kr = copyout(&data, pthread_init_data, pthread_init_sz); + if (kr != KERN_SUCCESS) { + return EINVAL; + } + } + + /* return the supported feature set as the return value. */ + *retval = PTHREAD_FEATURE_SUPPORTED; + + return(0); +} + + +#pragma mark - Workqueue Thread Support + +static mach_vm_size_t +workq_thread_allocsize(proc_t p, vm_map_t wq_map, + mach_vm_size_t *guardsize_out) +{ + mach_vm_size_t guardsize = vm_map_page_size(wq_map); + mach_vm_size_t pthread_size = vm_map_round_page_mask( + pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, + vm_map_page_mask(wq_map)); + if (guardsize_out) *guardsize_out = guardsize; + return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size; +} + +int +workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr) +{ + mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p); + mach_vm_size_t guardsize, th_allocsize; + kern_return_t kret; + + th_allocsize = workq_thread_allocsize(p, vmap, &guardsize); + kret = mach_vm_map(vmap, &stackaddr, th_allocsize, page_size - 1, + VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE, NULL, 0, FALSE, + VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); + + if (kret != KERN_SUCCESS) { + kret = mach_vm_allocate(vmap, &stackaddr, th_allocsize, + VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE); + } + + if (kret != KERN_SUCCESS) { + goto fail; + } + + /* + * The guard page is at the lowest address + * The stack base is the highest address + */ + kret = mach_vm_protect(vmap, stackaddr, guardsize, FALSE, VM_PROT_NONE); + if (kret != KERN_SUCCESS) { + goto fail_vm_deallocate; + } + + if (out_addr) { + *out_addr = stackaddr; + } + return 0; + +fail_vm_deallocate: + (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize); +fail: + return kret; +} + +int +workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr) +{ + return mach_vm_deallocate(vmap, stackaddr, + workq_thread_allocsize(p, vmap, NULL)); +} + +void +workq_markfree_threadstack(proc_t OS_UNUSED p, thread_t OS_UNUSED th, + vm_map_t vmap, user_addr_t stackaddr) +{ + // Keep this in sync with workq_setup_thread() + const vm_size_t guardsize = vm_map_page_size(vmap); + const user_addr_t freeaddr = (user_addr_t)stackaddr + guardsize; + const vm_map_offset_t freesize = vm_map_trunc_page_mask( + (PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1, + vm_map_page_mask(vmap)) - guardsize; + + __assert_only kern_return_t kr = mach_vm_behavior_set(vmap, freeaddr, + freesize, VM_BEHAVIOR_REUSABLE); +#if MACH_ASSERT + if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) { + os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr); + } +#endif +} + +struct workq_thread_addrs { + user_addr_t self; + user_addr_t stack_bottom; + user_addr_t stack_top; +}; + +static inline void +workq_thread_set_top_addr(struct workq_thread_addrs *th_addrs, user_addr_t addr) +{ + th_addrs->stack_top = (addr & -C_WORKQ_STK_ALIGN); +} + +static void +workq_thread_get_addrs(vm_map_t map, user_addr_t stackaddr, + struct workq_thread_addrs *th_addrs) +{ + const vm_size_t guardsize = vm_map_page_size(map); + + th_addrs->self = (user_addr_t)(stackaddr + PTH_DEFAULT_STACKSIZE + + guardsize + PTHREAD_T_OFFSET); + workq_thread_set_top_addr(th_addrs, th_addrs->self); + th_addrs->stack_bottom = (user_addr_t)(stackaddr + guardsize); +} + +static inline void +workq_set_register_state(proc_t p, thread_t th, + struct workq_thread_addrs *addrs, mach_port_name_t kport, + user_addr_t kevent_list, uint32_t upcall_flags, int kevent_count) +{ + user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p); + if (!wqstart_fnptr) { + panic("workqueue thread start function pointer is NULL"); + } + +#if defined(__i386__) || defined(__x86_64__) + if (proc_is64bit_data(p) == 0) { + x86_thread_state32_t state = { + .eip = (unsigned int)wqstart_fnptr, + .eax = /* arg0 */ (unsigned int)addrs->self, + .ebx = /* arg1 */ (unsigned int)kport, + .ecx = /* arg2 */ (unsigned int)addrs->stack_bottom, + .edx = /* arg3 */ (unsigned int)kevent_list, + .edi = /* arg4 */ (unsigned int)upcall_flags, + .esi = /* arg5 */ (unsigned int)kevent_count, + + .esp = (int)((vm_offset_t)addrs->stack_top), + }; + + int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); + if (error != KERN_SUCCESS) { + panic(__func__ ": thread_set_wq_state failed: %d", error); + } + } else { + x86_thread_state64_t state64 = { + // x86-64 already passes all the arguments in registers, so we just put them in their final place here + .rip = (uint64_t)wqstart_fnptr, + .rdi = (uint64_t)addrs->self, + .rsi = (uint64_t)kport, + .rdx = (uint64_t)addrs->stack_bottom, + .rcx = (uint64_t)kevent_list, + .r8 = (uint64_t)upcall_flags, + .r9 = (uint64_t)kevent_count, + + .rsp = (uint64_t)(addrs->stack_top) + }; + + int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64); + if (error != KERN_SUCCESS) { + panic(__func__ ": thread_set_wq_state failed: %d", error); + } + } +#elif defined(__arm__) || defined(__arm64__) + if (!proc_is64bit_data(p)) { + arm_thread_state_t state = { + .pc = (int)wqstart_fnptr, + .r[0] = (unsigned int)addrs->self, + .r[1] = (unsigned int)kport, + .r[2] = (unsigned int)addrs->stack_bottom, + .r[3] = (unsigned int)kevent_list, + // will be pushed onto the stack as arg4/5 + .r[4] = (unsigned int)upcall_flags, + .r[5] = (unsigned int)kevent_count, + + .sp = (int)(addrs->stack_top) + }; + + int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); + if (error != KERN_SUCCESS) { + panic(__func__ ": thread_set_wq_state failed: %d", error); + } + } else { +#if defined(__arm64__) + arm_thread_state64_t state = { + .pc = (uint64_t)wqstart_fnptr, + .x[0] = (uint64_t)addrs->self, + .x[1] = (uint64_t)kport, + .x[2] = (uint64_t)addrs->stack_bottom, + .x[3] = (uint64_t)kevent_list, + .x[4] = (uint64_t)upcall_flags, + .x[5] = (uint64_t)kevent_count, + + .sp = (uint64_t)((vm_offset_t)addrs->stack_top), + }; + + int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state); + if (error != KERN_SUCCESS) { + panic(__func__ ": thread_set_wq_state failed: %d", error); + } +#else /* defined(__arm64__) */ + panic("Shouldn't have a 64-bit thread on a 32-bit kernel..."); +#endif /* defined(__arm64__) */ + } +#else +#error setup_wqthread not defined for this architecture +#endif +} + +static inline int +workq_kevent(proc_t p, struct workq_thread_addrs *th_addrs, + user_addr_t eventlist, int nevents, int kevent_flags, + user_addr_t *kevent_list_out, int *kevent_count_out) +{ + int ret; + + user_addr_t kevent_list = th_addrs->self - + WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s); + user_addr_t data_buf = kevent_list - WQ_KEVENT_DATA_SIZE; + user_size_t data_available = WQ_KEVENT_DATA_SIZE; + + ret = pthread_kern->kevent_workq_internal(p, eventlist, nevents, + kevent_list, WQ_KEVENT_LIST_LEN, + data_buf, &data_available, + kevent_flags, kevent_count_out); + + // squash any errors into just empty output + if (ret != 0 || *kevent_count_out == -1) { + *kevent_list_out = NULL; + *kevent_count_out = 0; + return ret; + } + + workq_thread_set_top_addr(th_addrs, data_buf + data_available); + *kevent_list_out = kevent_list; + return ret; +} + +/** + * configures initial thread stack/registers to jump into: + * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents); + * to get there we jump through assembily stubs in pthread_asm.s. Those + * routines setup a stack frame, using the current stack pointer, and marshall + * arguments from registers to the stack as required by the ABI. + * + * One odd thing we do here is to start the pthread_t 4k below what would be the + * top of the stack otherwise. This is because usually only the first 4k of the + * pthread_t will be used and so we want to put it on the same 16k page as the + * top of the stack to save memory. + * + * When we are done the stack will look like: + * |-----------| th_stackaddr + th_allocsize + * |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET + * |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events + * |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes + * |stack gap | bottom aligned to 16 bytes + * | STACK | + * | ⇓ | + * | | + * |guard page | guardsize + * |-----------| th_stackaddr + */ +__attribute__((noreturn,noinline)) +void +workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr, + mach_port_name_t kport, int th_qos __unused, int setup_flags, int upcall_flags) +{ + struct workq_thread_addrs th_addrs; + bool first_use = (setup_flags & WQ_SETUP_FIRST_USE); + user_addr_t kevent_list = NULL; + int kevent_count = 0; + + workq_thread_get_addrs(map, stackaddr, &th_addrs); + + if (first_use) { + uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p); + if (tsd_offset) { + mach_vm_offset_t th_tsd_base = th_addrs.self + tsd_offset; + kern_return_t kret = pthread_kern->thread_set_tsd_base(th, + th_tsd_base); + if (kret == KERN_SUCCESS) { + upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET; + } + } + + /* + * Pre-fault the first page of the new thread's stack and the page that will + * contain the pthread_t structure. + */ + vm_map_offset_t mask = vm_map_page_mask(map); + vm_map_offset_t th_page = vm_map_trunc_page_mask(th_addrs.self, mask); + vm_map_offset_t stk_page = vm_map_trunc_page_mask(th_addrs.stack_top - 1, mask); + if (th_page != stk_page) { + vm_fault(map, stk_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0); + } + vm_fault(map, th_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0); + } + + if (setup_flags & WQ_SETUP_EXIT_THREAD) { + kevent_count = WORKQ_EXIT_THREAD_NKEVENT; + } else if (upcall_flags & WQ_FLAG_THREAD_KEVENT) { + unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE; + workq_kevent(p, &th_addrs, NULL, 0, flags, &kevent_list, &kevent_count); + } + + workq_set_register_state(p, th, &th_addrs, kport, + kevent_list, upcall_flags, kevent_count); + + if (first_use) { + pthread_kern->thread_bootstrap_return(); + } else { + pthread_kern->unix_syscall_return(EJUSTRETURN); + } + __builtin_unreachable(); +} + +int +workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map, + user_addr_t stackaddr, mach_port_name_t kport, + user_addr_t events, int nevents, int upcall_flags) +{ + struct workq_thread_addrs th_addrs; + user_addr_t kevent_list = NULL; + int kevent_count = 0, error; + __assert_only kern_return_t kr; + + workq_thread_get_addrs(map, stackaddr, &th_addrs); + + unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE | + KEVENT_FLAG_PARKING; + error = workq_kevent(p, &th_addrs, events, nevents, flags, + &kevent_list, &kevent_count); + + if (error || kevent_count == 0) { + return error; + } + + kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL); + assert(kr == KERN_SUCCESS); + + workq_set_register_state(p, th, &th_addrs, kport, + kevent_list, upcall_flags, kevent_count); + + pthread_kern->unix_syscall_return(EJUSTRETURN); + __builtin_unreachable(); +} + +int +_thread_selfid(__unused struct proc *p, uint64_t *retval) +{ + thread_t thread = current_thread(); + *retval = thread_tid(thread); + return KERN_SUCCESS; +} + +void +_pthread_init(void) +{ + pthread_lck_grp_attr = lck_grp_attr_alloc_init(); + pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr); + + /* + * allocate the lock attribute for pthread synchronizers + */ + pthread_lck_attr = lck_attr_alloc_init(); + pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr); + + pth_global_hashinit(); + psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL); + psynch_zoneinit(); + + int policy_bootarg; + if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg, sizeof(policy_bootarg))) { + pthread_mutex_default_policy = policy_bootarg; + } + + sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy); +} diff --git a/dthread/kern_synch.c b/dthread/kern_synch.c new file mode 100644 index 0000000..0fdcc1e --- /dev/null +++ b/dthread/kern_synch.c @@ -0,0 +1,2835 @@ +/* + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */ +/* + * pthread_support.c + */ + +#ifdef __DARLING__ +#include +#include +#endif + +#ifdef __DARLING__ +#include "kern_internal.h" +#endif + +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +//#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +//#include +#include +#include + +#include + +#include + +#ifndef __DARLING__ +#include "kern_internal.h" +#endif +#include "synch_internal.h" +#include "kern_trace.h" + +#ifdef __DARLING__ +#include + +#include +#endif + +typedef struct uthread *uthread_t; + +//#define __FAILEDUSERTEST__(s) do { panic(s); } while (0) +#define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0) +#define __FAILEDUSERTEST2__(s, x...) do { printf("PSYNCH: pid[%d]: " s "\n", proc_pid(current_proc()), x); } while (0) + +lck_mtx_t *pthread_list_mlock; + +#define PTH_HASHSIZE 100 + +static LIST_HEAD(pthhashhead, ksyn_wait_queue) *pth_glob_hashtbl; +static unsigned long pthhash; + +static LIST_HEAD(, ksyn_wait_queue) pth_free_list; + +static zone_t kwq_zone; /* zone for allocation of ksyn_queue */ +static zone_t kwe_zone; /* zone for allocation of ksyn_waitq_element */ + +#define SEQFIT 0 +#define FIRSTFIT 1 + +struct ksyn_queue { + TAILQ_HEAD(ksynq_kwelist_head, ksyn_waitq_element) ksynq_kwelist; + uint32_t ksynq_count; /* number of entries in queue */ + uint32_t ksynq_firstnum; /* lowest seq in queue */ + uint32_t ksynq_lastnum; /* highest seq in queue */ +}; +typedef struct ksyn_queue *ksyn_queue_t; + +typedef enum { + KSYN_QUEUE_READ = 0, + KSYN_QUEUE_WRITE, + KSYN_QUEUE_MAX, +} kwq_queue_type_t; + +typedef enum { + KWQ_INTR_NONE = 0, + KWQ_INTR_READ = 0x1, + KWQ_INTR_WRITE = 0x2, +} kwq_intr_type_t; + +struct ksyn_wait_queue { + LIST_ENTRY(ksyn_wait_queue) kw_hash; + LIST_ENTRY(ksyn_wait_queue) kw_list; + user_addr_t kw_addr; + thread_t kw_owner; /* current owner or THREAD_NULL, has a +1 */ + uint64_t kw_object; /* object backing in shared mode */ + uint64_t kw_offset; /* offset inside the object in shared mode */ + int kw_pflags; /* flags under listlock protection */ + struct timeval kw_ts; /* timeval need for upkeep before free */ + int kw_iocount; /* inuse reference */ + int kw_dropcount; /* current users unlocking... */ + + int kw_type; /* queue type like mutex, cvar, etc */ + uint32_t kw_inqueue; /* num of waiters held */ + uint32_t kw_fakecount; /* number of error/prepost fakes */ + uint32_t kw_highseq; /* highest seq in the queue */ + uint32_t kw_lowseq; /* lowest seq in the queue */ + uint32_t kw_lword; /* L value from userland */ + uint32_t kw_uword; /* U world value from userland */ + uint32_t kw_sword; /* S word value from userland */ + uint32_t kw_lastunlockseq; /* the last seq that unlocked */ + /* for CV to be used as the seq kernel has seen so far */ +#define kw_cvkernelseq kw_lastunlockseq + uint32_t kw_lastseqword; /* the last seq that unlocked */ + /* for mutex and cvar we need to track I bit values */ + uint32_t kw_nextseqword; /* the last seq that unlocked; with num of waiters */ + struct { + uint32_t count; /* prepost count */ + uint32_t lseq; /* prepost target seq */ + uint32_t sseq; /* prepost target sword, in cvar used for mutexowned */ + } kw_prepost; + struct { + kwq_intr_type_t type; /* type of failed wakueps */ + uint32_t count; /* prepost of missed wakeup due to intrs */ + uint32_t seq; /* prepost of missed wakeup limit seq */ + uint32_t returnbits; /* return bits value for missed wakeup threads */ + } kw_intr; + + int kw_kflags; + int kw_qos_override; /* QoS of max waiter during contention period */ + struct turnstile *kw_turnstile; + struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX]; /* queues to hold threads */ + lck_spin_t kw_lock; /* spinlock protecting this structure */ + struct __wait_queue_head linux_wq; +}; +typedef struct ksyn_wait_queue * ksyn_wait_queue_t; + +#define TID_ZERO (uint64_t)0 + +/* bits needed in handling the rwlock unlock */ +#define PTH_RW_TYPE_READ 0x01 +#define PTH_RW_TYPE_WRITE 0x04 +#define PTH_RW_TYPE_MASK 0xff +#define PTH_RW_TYPE_SHIFT 8 + +#define PTH_RWSHFT_TYPE_READ 0x0100 +#define PTH_RWSHFT_TYPE_WRITE 0x0400 +#define PTH_RWSHFT_TYPE_MASK 0xff00 + +/* + * Mutex pshared attributes + */ +#define PTHREAD_PROCESS_SHARED _PTHREAD_MTX_OPT_PSHARED +#define PTHREAD_PROCESS_PRIVATE 0x20 +#define PTHREAD_PSHARED_FLAGS_MASK 0x30 + +/* + * Mutex policy attributes + */ +#define _PTHREAD_MTX_OPT_POLICY_FAIRSHARE 0x040 /* 1 */ +#define _PTHREAD_MTX_OPT_POLICY_FIRSTFIT 0x080 /* 2 */ +#define _PTHREAD_MTX_OPT_POLICY_MASK 0x1c0 + +/* pflags */ +#define KSYN_WQ_INHASH 2 +#define KSYN_WQ_SHARED 4 +#define KSYN_WQ_WAITING 8 /* threads waiting for this wq to be available */ +#define KSYN_WQ_FLIST 0X10 /* in free list to be freed after a short delay */ + +/* kflags */ +#define KSYN_KWF_INITCLEARED 0x1 /* the init status found and preposts cleared */ +#define KSYN_KWF_ZEROEDOUT 0x2 /* the lword, etc are inited to 0 */ +#define KSYN_KWF_QOS_APPLIED 0x4 /* QoS override applied to owner */ +#define KSYN_KWF_OVERLAP_GUARD 0x8 /* overlap guard */ + +#define KSYN_CLEANUP_DEADLINE 10 +static int psynch_cleanupset; +thread_call_t psynch_thcall; + +#define KSYN_WQTYPE_INWAIT 0x1000 +#define KSYN_WQTYPE_INDROP 0x2000 +#define KSYN_WQTYPE_MTX 0x01 +#define KSYN_WQTYPE_CVAR 0x02 +#define KSYN_WQTYPE_RWLOCK 0x04 +#define KSYN_WQTYPE_SEMA 0x08 +#define KSYN_WQTYPE_MASK 0xff + +#define KSYN_WQTYPE_MUTEXDROP (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX) + +static inline int +_kwq_type(ksyn_wait_queue_t kwq) +{ + return (kwq->kw_type & KSYN_WQTYPE_MASK); +} + +static inline bool +_kwq_use_turnstile(ksyn_wait_queue_t kwq) +{ + // If we had writer-owner information from the + // rwlock then we could use the turnstile to push on it. For now, only + // plain mutexes use it. + return (_kwq_type(kwq) == KSYN_WQTYPE_MTX); +} + +#define KW_UNLOCK_PREPOST 0x01 +#define KW_UNLOCK_PREPOST_READLOCK 0x08 +#define KW_UNLOCK_PREPOST_WRLOCK 0x20 + +static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t object, uint64_t offset); +static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq); +static void ksyn_wqrelease(ksyn_wait_queue_t mkwq, int qfreenow, int wqtype); +static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp); + +static int _wait_result_to_errno(wait_result_t result); + +static int ksyn_wait(ksyn_wait_queue_t, kwq_queue_type_t, uint32_t, int, uint64_t, uint16_t, thread_continue_t, block_hint_t); +static kern_return_t ksyn_signal(ksyn_wait_queue_t, kwq_queue_type_t, ksyn_waitq_element_t, uint32_t); +static void ksyn_freeallkwe(ksyn_queue_t kq); + +static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t, thread_t *); + +static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen); + +static void ksyn_queue_init(ksyn_queue_t kq); +static int ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int firstfit); +static void ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe); +static void ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all); + +static void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq); +static uint32_t find_nextlowseq(ksyn_wait_queue_t kwq); +static uint32_t find_nexthighseq(ksyn_wait_queue_t kwq); +static int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp); + +static uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto); + +static ksyn_waitq_element_t ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen); +static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep); +static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep); +static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq); + +static void __dead2 psynch_cvcontinue(void *, wait_result_t); +static void __dead2 psynch_mtxcontinue(void *, wait_result_t); +static void __dead2 psynch_rw_rdcontinue(void *, wait_result_t); +static void __dead2 psynch_rw_wrcontinue(void *, wait_result_t); + +static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp); +static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]); +static ksyn_waitq_element_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq); + +static void +UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc) +{ + int sinit = ((rw_wc & PTH_RWS_CV_CBIT) != 0); + + // assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR); + + if ((kwq->kw_kflags & KSYN_KWF_ZEROEDOUT) != 0) { + /* the values of L,U and S are cleared out due to L==S in previous transition */ + kwq->kw_lword = mgen; + kwq->kw_uword = ugen; + kwq->kw_sword = rw_wc; + kwq->kw_kflags &= ~KSYN_KWF_ZEROEDOUT; + } else { + if (is_seqhigher(mgen, kwq->kw_lword)) { + kwq->kw_lword = mgen; + } + if (is_seqhigher(ugen, kwq->kw_uword)) { + kwq->kw_uword = ugen; + } + if (sinit && is_seqhigher(rw_wc, kwq->kw_sword)) { + kwq->kw_sword = rw_wc; + } + } + if (sinit && is_seqlower(kwq->kw_cvkernelseq, rw_wc)) { + kwq->kw_cvkernelseq = (rw_wc & PTHRW_COUNT_MASK); + } +} + +static inline void +_kwq_clear_preposted_wakeup(ksyn_wait_queue_t kwq) +{ + kwq->kw_prepost.lseq = 0; + kwq->kw_prepost.sseq = PTHRW_RWS_INIT; + kwq->kw_prepost.count = 0; +} + +static inline void +_kwq_mark_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t count, + uint32_t lseq, uint32_t sseq) +{ + kwq->kw_prepost.count = count; + kwq->kw_prepost.lseq = lseq; + kwq->kw_prepost.sseq = sseq; +} + +static inline void +_kwq_clear_interrupted_wakeup(ksyn_wait_queue_t kwq) +{ + kwq->kw_intr.type = KWQ_INTR_NONE; + kwq->kw_intr.count = 0; + kwq->kw_intr.seq = 0; + kwq->kw_intr.returnbits = 0; +} + +static inline void +_kwq_mark_interruped_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type, + uint32_t count, uint32_t lseq, uint32_t returnbits) +{ + kwq->kw_intr.count = count; + kwq->kw_intr.seq = lseq; + kwq->kw_intr.returnbits = returnbits; + kwq->kw_intr.type = type; +} + +static void +_kwq_destroy(ksyn_wait_queue_t kwq) +{ + if (kwq->kw_owner) { + thread_deallocate(kwq->kw_owner); + } + lck_spin_destroy(&kwq->kw_lock, pthread_lck_grp); + zfree(kwq_zone, kwq); +} + +#define KWQ_SET_OWNER_TRANSFER_REF 0x1 + +static inline thread_t +_kwq_set_owner(ksyn_wait_queue_t kwq, thread_t new_owner, int flags) +{ + thread_t old_owner = kwq->kw_owner; + if (old_owner == new_owner) { + if (flags & KWQ_SET_OWNER_TRANSFER_REF) return new_owner; + return THREAD_NULL; + } + if ((flags & KWQ_SET_OWNER_TRANSFER_REF) == 0) { + thread_reference(new_owner); + } + kwq->kw_owner = new_owner; + return old_owner; +} + +static inline thread_t +_kwq_clear_owner(ksyn_wait_queue_t kwq) +{ + return _kwq_set_owner(kwq, THREAD_NULL, KWQ_SET_OWNER_TRANSFER_REF); +} + +static inline void +_kwq_cleanup_old_owner(thread_t *thread) +{ + if (*thread) { + thread_deallocate(*thread); + *thread = THREAD_NULL; + } +} + +static void +CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq) +{ + if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) { + if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) { + panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount); + } + }; + if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) { + kwq->kw_nextseqword = PTHRW_RWS_INIT; + kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD; + }; + _kwq_clear_preposted_wakeup(kwq); + kwq->kw_lastunlockseq = PTHRW_RWL_INIT; + kwq->kw_lastseqword = PTHRW_RWS_INIT; + _kwq_clear_interrupted_wakeup(kwq); + kwq->kw_lword = 0; + kwq->kw_uword = 0; + kwq->kw_sword = PTHRW_RWS_INIT; +} + +static bool +_kwq_handle_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t type, + uint32_t lseq, uint32_t *retval) +{ + if (kwq->kw_prepost.count == 0 || + !is_seqlower_eq(lseq, kwq->kw_prepost.lseq)) { + return false; + } + + kwq->kw_prepost.count--; + if (kwq->kw_prepost.count > 0) { + return false; + } + + int error, should_block = 0; + uint32_t updatebits = 0; + uint32_t pp_lseq = kwq->kw_prepost.lseq; + uint32_t pp_sseq = kwq->kw_prepost.sseq; + _kwq_clear_preposted_wakeup(kwq); + + kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED; + + error = kwq_handle_unlock(kwq, pp_lseq, pp_sseq, &updatebits, + (type | KW_UNLOCK_PREPOST), &should_block, lseq); + if (error) { + panic("_kwq_handle_preposted_wakeup: kwq_handle_unlock failed %d", + error); + } + + if (should_block) { + return false; + } + *retval = updatebits; + return true; +} + +static bool +_kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t type, uint32_t lgenval, + uint32_t rw_wc, uint32_t *retval) +{ + int res = 0; + + // overlaps only occur on read lockers + if (type != PTH_RW_TYPE_READ) { + return false; + } + + // check for overlap and no pending W bit (indicates writers) + if ((kwq->kw_kflags & KSYN_KWF_OVERLAP_GUARD) && + !is_rws_savemask_set(rw_wc) && !is_rwl_wbit_set(lgenval)) { + /* overlap is set, so no need to check for valid state for overlap */ + + if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) { + /* increase the next expected seq by one */ + kwq->kw_nextseqword += PTHRW_INC; + /* set count by one & bits from the nextseq and add M bit */ + *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT); + res = 1; + } + } + return res; +} + +static inline bool +_kwq_is_used(ksyn_wait_queue_t kwq) +{ + return (kwq->kw_inqueue != 0 || kwq->kw_prepost.count != 0 || + kwq->kw_intr.count != 0); +} + +/* + * consumes a pending interrupted waiter, returns true if the current + * thread should return back to userspace because it was previously + * interrupted. + */ +static inline bool +_kwq_handle_interrupted_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type, + uint32_t lseq, uint32_t *retval) +{ + if (kwq->kw_intr.count != 0 && kwq->kw_intr.type == type && + (!kwq->kw_intr.seq || is_seqlower_eq(lseq, kwq->kw_intr.seq))) { + kwq->kw_intr.count--; + *retval = kwq->kw_intr.returnbits; + if (kwq->kw_intr.returnbits == 0) { + _kwq_clear_interrupted_wakeup(kwq); + } + return true; + } + return false; +} + +static void +pthread_list_lock(void) +{ + lck_mtx_lock_spin(pthread_list_mlock); +} + +static void +pthread_list_unlock(void) +{ + lck_mtx_unlock(pthread_list_mlock); +} + +static void +ksyn_wqlock(ksyn_wait_queue_t kwq) +{ + lck_spin_lock(&kwq->kw_lock); +} + +static void +ksyn_wqunlock(ksyn_wait_queue_t kwq) +{ + lck_spin_unlock(&kwq->kw_lock); +} + +#ifdef __DARLING__ +// custom implementations of some functions in BSD's `kern_synch.c` +static int msleep(void* chan, lck_mtx_t* mtx, int pri, const char* wmesg, struct timespec* ts) { + ksyn_wait_queue_t kwq = container_of(chan, struct ksyn_wait_queue, kw_pflags); + pthread_list_unlock(); + if (wait_event_interruptible(kwq->linux_wq, !(kwq->kw_pflags & KSYN_WQ_WAITING) || darling_thread_canceled()) != 0 || darling_thread_canceled()) { + return EINTR; + } + return 0; +}; + +static void wakeup(void* chan) { + ksyn_wait_queue_t kwq = container_of(chan, struct ksyn_wait_queue, kw_pflags); + wake_up_interruptible(&kwq->linux_wq); +}; +#endif + +/* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */ +static uint32_t +_psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, + int flags) +{ + kern_return_t ret; + uint32_t returnbits = 0; + uint32_t updatebits = 0; + int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK) == + _PTHREAD_MTX_OPT_POLICY_FIRSTFIT; + uint32_t nextgen = (ugen + PTHRW_INC); + thread_t old_owner = THREAD_NULL; + + ksyn_wqlock(kwq); + kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK); + +redrive: + updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | + (PTH_RWL_EBIT | PTH_RWL_KBIT); + + if (firstfit) { + if (kwq->kw_inqueue == 0) { + uint32_t count = kwq->kw_prepost.count + 1; + // Increment the number of preposters we have waiting + _kwq_mark_preposted_wakeup(kwq, count, mgen & PTHRW_COUNT_MASK, 0); + // We don't know the current owner as we've determined this mutex + // drop should have a preposted locker inbound into the kernel but + // we have no way of knowing who it is. When it arrives, the lock + // path will update the turnstile owner and return it to userspace. + old_owner = _kwq_clear_owner(kwq); + pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL, + &kwq->kw_turnstile); + PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr, + kwq->kw_prepost.lseq, count, 0); + } else { + // signal first waiter + ret = ksyn_mtxsignal(kwq, NULL, updatebits, &old_owner); + if (ret == KERN_NOT_WAITING) { + // ksyn_mtxsignal attempts to signal + // the thread but it sets up the turnstile inheritor first. + // That means we can't redrive the mutex in a loop without + // dropping the wq lock and cleaning up the turnstile state. + ksyn_wqunlock(kwq); + pthread_kern->psynch_wait_cleanup(); + _kwq_cleanup_old_owner(&old_owner); + ksyn_wqlock(kwq); + goto redrive; + } + } + } else { + bool prepost = false; + if (kwq->kw_inqueue == 0) { + // No waiters in the queue. + prepost = true; + } else { + uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum & PTHRW_COUNT_MASK); + if (low_writer == nextgen) { + /* next seq to be granted found */ + /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */ + ret = ksyn_mtxsignal(kwq, NULL, + updatebits | PTH_RWL_MTX_WAIT, &old_owner); + if (ret == KERN_NOT_WAITING) { + /* interrupt post */ + _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1, + nextgen, updatebits); + } + } else if (is_seqhigher(low_writer, nextgen)) { + prepost = true; + } else { + //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n"); + ksyn_waitq_element_t kwe; + kwe = ksyn_queue_find_seq(kwq, + &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], nextgen); + if (kwe != NULL) { + /* next seq to be granted found */ + /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */ + ret = ksyn_mtxsignal(kwq, kwe, + updatebits | PTH_RWL_MTX_WAIT, &old_owner); + if (ret == KERN_NOT_WAITING) { + goto redrive; + } + } else { + prepost = true; + } + } + } + if (prepost) { + if (kwq->kw_prepost.count != 0) { + __FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n"); + } else { + _kwq_mark_preposted_wakeup(kwq, 1, nextgen & PTHRW_COUNT_MASK, + 0); + } + old_owner = _kwq_clear_owner(kwq); + pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL, + &kwq->kw_turnstile); + } + } + + ksyn_wqunlock(kwq); + pthread_kern->psynch_wait_cleanup(); + _kwq_cleanup_old_owner(&old_owner); + ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP); + return returnbits; +} + +static int +_ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval) +{ + int res = (lgenval & PTHRW_RWL_INIT) != 0; + if (res) { + if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) { + /* first to notice the reset of the lock, clear preposts */ + CLEAR_REINIT_BITS(kwq); + kwq->kw_kflags |= KSYN_KWF_INITCLEARED; + } + } + return res; +} + +/* + * psynch_mutexwait: This system call is used for contended psynch mutexes to + * block. + */ +int +_psynch_mutexwait(__unused proc_t p, user_addr_t mutex, uint32_t mgen, + uint32_t ugen, uint64_t tid, uint32_t flags, uint32_t *retval) +{ + ksyn_wait_queue_t kwq; + int error = 0; + int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK) + == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT; + int ins_flags = SEQFIT; + uint32_t lseq = (mgen & PTHRW_COUNT_MASK); + uint32_t updatebits = 0; + thread_t tid_th = THREAD_NULL, old_owner = THREAD_NULL; + + if (firstfit) { + /* first fit */ + ins_flags = FIRSTFIT; + } + + error = ksyn_wqfind(mutex, mgen, ugen, 0, flags, + (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX), &kwq); + if (error != 0) { + return error; + } + +again: + ksyn_wqlock(kwq); + + if (_kwq_handle_interrupted_wakeup(kwq, KWQ_INTR_WRITE, lseq, retval)) { + old_owner = _kwq_set_owner(kwq, current_thread(), 0); + pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner, + &kwq->kw_turnstile); + ksyn_wqunlock(kwq); + goto out; + } + + if (kwq->kw_prepost.count && (firstfit || (lseq == kwq->kw_prepost.lseq))) { + /* got preposted lock */ + kwq->kw_prepost.count--; + + if (!firstfit) { + if (kwq->kw_prepost.count > 0) { + __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n"); + kwq->kw_prepost.lseq += PTHRW_INC; /* look for next one */ + ksyn_wqunlock(kwq); + error = EINVAL; + goto out; + } + _kwq_clear_preposted_wakeup(kwq); + } + + if (kwq->kw_inqueue == 0) { + updatebits = lseq | (PTH_RWL_KBIT | PTH_RWL_EBIT); + } else { + updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | + (PTH_RWL_KBIT | PTH_RWL_EBIT); + } + updatebits &= ~PTH_RWL_MTX_WAIT; + + if (updatebits == 0) { + __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n"); + } + + PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr, + kwq->kw_prepost.lseq, kwq->kw_prepost.count, 1); + + old_owner = _kwq_set_owner(kwq, current_thread(), 0); + pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner, + &kwq->kw_turnstile); + + ksyn_wqunlock(kwq); + *retval = updatebits; + goto out; + } + + // mutexwait passes in an owner hint at the time userspace contended for + // the mutex, however, the owner tid in the userspace data structure may be + // unset or SWITCHING (-1), or it may correspond to a stale snapshot after + // the lock has subsequently been unlocked by another thread. + if (tid == thread_tid(kwq->kw_owner)) { + // userspace and kernel agree + } else if (tid == 0) { + // contender came in before owner could write TID + // let's assume that what the kernel knows is accurate + // for all we know this waiter came in late in the kernel + } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT && + is_seqlower(ugen, kwq->kw_lastunlockseq)) { + // owner is stale, someone has come in and unlocked since this + // contended read the TID, so assume what is known in the kernel is + // accurate + } else if (tid == PTHREAD_MTX_TID_SWITCHING) { + // userspace didn't know the owner because it was being unlocked, but + // that unlocker hasn't reached the kernel yet. So assume what is known + // in the kernel is accurate + } else { + // hint is being passed in for a specific thread, and we have no reason + // not to trust it (like the kernel unlock sequence being higher) + // + // So resolve the hint to a thread_t if we haven't done so yet + // and redrive as we dropped the lock + if (tid_th == THREAD_NULL) { + ksyn_wqunlock(kwq); + tid_th = pthread_kern->task_findtid(current_task(), tid); + if (tid_th == THREAD_NULL) tid = 0; + goto again; + } + tid_th = _kwq_set_owner(kwq, tid_th, KWQ_SET_OWNER_TRANSFER_REF); + } + + if (tid_th) { + // We are on our way to block, and can't drop the spinlock anymore + pthread_kern->thread_deallocate_safe(tid_th); + tid_th = THREAD_NULL; + } + assert(old_owner == THREAD_NULL); + error = ksyn_wait(kwq, KSYN_QUEUE_WRITE, mgen, ins_flags, 0, 0, + psynch_mtxcontinue, kThreadWaitPThreadMutex); + // ksyn_wait drops wait queue lock +out: + pthread_kern->psynch_wait_cleanup(); + ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX)); + if (tid_th) { + thread_deallocate(tid_th); + } + if (old_owner) { + thread_deallocate(old_owner); + } + return error; +} + +void __dead2 +psynch_mtxcontinue(void *parameter, wait_result_t result) +{ + uthread_t uth = current_uthread(); + ksyn_wait_queue_t kwq = parameter; + ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth); + + ksyn_wqlock(kwq); + + int error = _wait_result_to_errno(result); + if (error != 0) { + if (kwe->kwe_kwqqueue) { + ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe); + } + } else { + uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT; + pthread_kern->uthread_set_returnval(uth, updatebits); + + if (updatebits == 0) { + __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n"); + } + } + + pthread_kern->psynch_wait_complete(kwq, &kwq->kw_turnstile); + + ksyn_wqunlock(kwq); + pthread_kern->psynch_wait_cleanup(); + ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX)); + pthread_kern->unix_syscall_return(error); + __builtin_unreachable(); +} + +static void __dead2 +_psynch_rw_continue(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi, + wait_result_t result) +{ + uthread_t uth = current_uthread(); + ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth); + + ksyn_wqlock(kwq); + + int error = _wait_result_to_errno(result); + if (error != 0) { + if (kwe->kwe_kwqqueue) { + ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe); + } + } else { + pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval); + } + + ksyn_wqunlock(kwq); + ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK)); + + pthread_kern->unix_syscall_return(error); + __builtin_unreachable(); +} + +void __dead2 +psynch_rw_rdcontinue(void *parameter, wait_result_t result) +{ + _psynch_rw_continue(parameter, KSYN_QUEUE_READ, result); +} + +void __dead2 +psynch_rw_wrcontinue(void *parameter, wait_result_t result) +{ + _psynch_rw_continue(parameter, KSYN_QUEUE_WRITE, result); +} + +/* + * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes. + */ +int +_psynch_mutexdrop(__unused proc_t p, user_addr_t mutex, uint32_t mgen, + uint32_t ugen, uint64_t tid __unused, uint32_t flags, uint32_t *retval) +{ + int res; + ksyn_wait_queue_t kwq; + + res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq); + if (res == 0) { + uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags); + /* drops the kwq reference */ + if (retval) { + *retval = updateval; + } + } + + return res; +} + +static kern_return_t +ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, + uint32_t updateval, thread_t *old_owner) +{ + kern_return_t ret; + + if (!kwe) { + kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_kwelist); + if (!kwe) { + panic("ksyn_mtxsignal: panic signaling empty queue"); + } + } + + PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_START, kwq->kw_addr, kwe, + thread_tid(kwe->kwe_thread), kwq->kw_inqueue); + + ret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, kwe, updateval); + if (ret == KERN_SUCCESS) { + *old_owner = _kwq_set_owner(kwq, kwe->kwe_thread, 0); + } else { + *old_owner = _kwq_clear_owner(kwq); + } + PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_END, kwq->kw_addr, kwe, + ret, 0); + return ret; +} + + +static void +ksyn_prepost(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t state, + uint32_t lockseq) +{ + bzero(kwe, sizeof(*kwe)); + kwe->kwe_state = state; + kwe->kwe_lockseq = lockseq; + kwe->kwe_count = 1; + + (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITE, kwe, lockseq, SEQFIT); + kwq->kw_fakecount++; +} + +static void +ksyn_cvsignal(ksyn_wait_queue_t ckwq, thread_t th, uint32_t uptoseq, + uint32_t signalseq, uint32_t *updatebits, int *broadcast, + ksyn_waitq_element_t *nkwep) +{ + ksyn_waitq_element_t kwe = NULL; + ksyn_waitq_element_t nkwe = NULL; + ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE]; + + uptoseq &= PTHRW_COUNT_MASK; + + // Find the specified thread to wake. + if (th != THREAD_NULL) { + uthread_t uth = pthread_kern->get_bsdthread_info(th); + kwe = pthread_kern->uthread_get_uukwe(uth); + if (kwe->kwe_kwqqueue != ckwq || + is_seqhigher(kwe->kwe_lockseq, uptoseq)) { + // Unless it's no longer waiting on this CV... + kwe = NULL; + // ...in which case we post a broadcast instead. + *broadcast = 1; + return; + } + } + + // If no thread was specified, find any thread to wake (with the right + // sequence number). + while (th == THREAD_NULL) { + if (kwe == NULL) { + kwe = ksyn_queue_find_signalseq(ckwq, kq, uptoseq, signalseq); + } + if (kwe == NULL && nkwe == NULL) { + // No eligible entries; need to allocate a new + // entry to prepost. Loop to rescan after + // reacquiring the lock after allocation in + // case anything new shows up. + ksyn_wqunlock(ckwq); + nkwe = (ksyn_waitq_element_t)zalloc(kwe_zone); + ksyn_wqlock(ckwq); + } else { + break; + } + } + + if (kwe != NULL) { + // If we found a thread to wake... + if (kwe->kwe_state == KWE_THREAD_INWAIT) { + if (is_seqlower(kwe->kwe_lockseq, signalseq)) { + /* + * A valid thread in our range, but lower than our signal. + * Matching it may leave our match with nobody to wake it if/when + * it arrives (the signal originally meant for this thread might + * not successfully wake it). + * + * Convert to broadcast - may cause some spurious wakeups + * (allowed by spec), but avoids starvation (better choice). + */ + *broadcast = 1; + } else { + (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT); + *updatebits += PTHRW_INC; + } + } else if (kwe->kwe_state == KWE_THREAD_PREPOST) { + // Merge with existing prepost at same uptoseq. + kwe->kwe_count += 1; + } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) { + // Existing broadcasts subsume this signal. + } else { + panic("unknown kwe state\n"); + } + if (nkwe) { + /* + * If we allocated a new kwe above but then found a different kwe to + * use then we need to deallocate the spare one. + */ + zfree(kwe_zone, nkwe); + nkwe = NULL; + } + } else if (nkwe != NULL) { + // ... otherwise, insert the newly allocated prepost. + ksyn_prepost(ckwq, nkwe, KWE_THREAD_PREPOST, uptoseq); + nkwe = NULL; + } else { + panic("failed to allocate kwe\n"); + } + + *nkwep = nkwe; +} + +static int +__psynch_cvsignal(user_addr_t cv, uint32_t cgen, uint32_t cugen, + uint32_t csgen, uint32_t flags, int broadcast, + mach_port_name_t threadport, uint32_t *retval) +{ + int error = 0; + thread_t th = THREAD_NULL; + ksyn_wait_queue_t kwq; + + uint32_t uptoseq = cgen & PTHRW_COUNT_MASK; + uint32_t fromseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC; + + // validate sane L, U, and S values + if ((threadport == 0 && is_seqhigher(fromseq, uptoseq)) || is_seqhigher(csgen, uptoseq)) { + __FAILEDUSERTEST__("cvbroad: invalid L, U and S values\n"); + return EINVAL; + } + + if (threadport != 0) { + th = port_name_to_thread((mach_port_name_t)threadport); + if (th == THREAD_NULL) { + return ESRCH; + } + } + + error = ksyn_wqfind(cv, cgen, cugen, csgen, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &kwq); + if (error == 0) { + uint32_t updatebits = 0; + ksyn_waitq_element_t nkwe = NULL; + + ksyn_wqlock(kwq); + + // update L, U and S... + UPDATE_CVKWQ(kwq, cgen, cugen, csgen); + + PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_START, kwq->kw_addr, + fromseq, uptoseq, broadcast); + + if (!broadcast) { + // No need to signal if the CV is already balanced. + if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) { + ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits, + &broadcast, &nkwe); + PTHREAD_TRACE(psynch_cvar_signal, kwq->kw_addr, broadcast, 0,0); + } + } + + if (broadcast) { + ksyn_handle_cvbroad(kwq, uptoseq, &updatebits); + } + + kwq->kw_sword += (updatebits & PTHRW_COUNT_MASK); + // set C or P bits and free if needed + ksyn_cvupdate_fixup(kwq, &updatebits); + *retval = updatebits; + + PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_END, kwq->kw_addr, + updatebits, 0, 0); + + ksyn_wqunlock(kwq); + + pthread_kern->psynch_wait_cleanup(); + + if (nkwe != NULL) { + zfree(kwe_zone, nkwe); + } + + ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR)); + } + + if (th != NULL) { + thread_deallocate(th); + } + + return error; +} + +/* + * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars. + */ +int +_psynch_cvbroad(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen, + uint64_t cvudgen, uint32_t flags, __unused user_addr_t mutex, + __unused uint64_t mugen, __unused uint64_t tid, uint32_t *retval) +{ + uint32_t diffgen = cvudgen & 0xffffffff; + uint32_t count = diffgen >> PTHRW_COUNT_SHIFT; + if (count > pthread_kern->get_task_threadmax()) { + __FAILEDUSERTEST__("cvbroad: difference greater than maximum possible thread count\n"); + return EBUSY; + } + + uint32_t csgen = (cvlsgen >> 32) & 0xffffffff; + uint32_t cgen = cvlsgen & 0xffffffff; + uint32_t cugen = (cvudgen >> 32) & 0xffffffff; + + return __psynch_cvsignal(cv, cgen, cugen, csgen, flags, 1, 0, retval); +} + +/* + * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars. + */ +int +_psynch_cvsignal(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen, + uint32_t cvugen, int threadport, __unused user_addr_t mutex, + __unused uint64_t mugen, __unused uint64_t tid, uint32_t flags, + uint32_t *retval) +{ + uint32_t csgen = (cvlsgen >> 32) & 0xffffffff; + uint32_t cgen = cvlsgen & 0xffffffff; + + return __psynch_cvsignal(cv, cgen, cvugen, csgen, flags, 0, threadport, retval); +} + +/* + * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel. + */ +int +_psynch_cvwait(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen, + uint32_t cvugen, user_addr_t mutex, uint64_t mugen, uint32_t flags, + int64_t sec, uint32_t nsec, uint32_t *retval) +{ + int error = 0; + uint32_t updatebits = 0; + ksyn_wait_queue_t ckwq = NULL; + ksyn_waitq_element_t kwe, nkwe = NULL; + + /* for conformance reasons */ + pthread_kern->__pthread_testcancel(0); + + uint32_t csgen = (cvlsgen >> 32) & 0xffffffff; + uint32_t cgen = cvlsgen & 0xffffffff; + uint32_t ugen = (mugen >> 32) & 0xffffffff; + uint32_t mgen = mugen & 0xffffffff; + + uint32_t lockseq = (cgen & PTHRW_COUNT_MASK); + + /* + * In cvwait U word can be out of range as cv could be used only for + * timeouts. However S word needs to be within bounds and validated at + * user level as well. + */ + if (is_seqhigher_eq(csgen, lockseq) != 0) { + __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n"); + return EINVAL; + } + + PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_START, cv, mutex, cgen, 0); + + error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq); + if (error != 0) { + return error; + } + + if (mutex != 0) { + uint32_t mutexrv = 0; + error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, &mutexrv); + if (error != 0) { + goto out; + } + } + + ksyn_wqlock(ckwq); + + // update L, U and S... + UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen); + + /* Look for the sequence for prepost (or conflicting thread */ + ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE]; + kwe = ksyn_queue_find_cvpreposeq(kq, lockseq); + if (kwe != NULL) { + if (kwe->kwe_state == KWE_THREAD_PREPOST) { + if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == lockseq) { + /* we can safely consume a reference, so do so */ + if (--kwe->kwe_count == 0) { + ksyn_queue_remove_item(ckwq, kq, kwe); + ckwq->kw_fakecount--; + nkwe = kwe; + } + } else { + /* + * consuming a prepost higher than our lock sequence is valid, but + * can leave the higher thread without a match. Convert the entry + * to a broadcast to compensate for this. + */ + ksyn_handle_cvbroad(ckwq, kwe->kwe_lockseq, &updatebits); +#if __TESTPANICS__ + if (updatebits != 0) + panic("psync_cvwait: convert pre-post to broadcast: woke up %d threads that shouldn't be there\n", updatebits); +#endif /* __TESTPANICS__ */ + } + } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) { + // XXX + // Nothing to do. + } else if (kwe->kwe_state == KWE_THREAD_INWAIT) { + __FAILEDUSERTEST__("cvwait: thread entry with same sequence already present\n"); + error = EBUSY; + } else { + panic("psync_cvwait: unexpected wait queue element type\n"); + } + + if (error == 0) { + updatebits |= PTHRW_INC; + ckwq->kw_sword += PTHRW_INC; + + /* set C or P bits and free if needed */ + ksyn_cvupdate_fixup(ckwq, &updatebits); + *retval = updatebits; + } + } else { + uint64_t abstime = 0; + uint16_t kwe_flags = 0; + + if (sec != 0 || (nsec & 0x3fffffff) != 0) { + struct timespec ts; + ts.tv_sec = (__darwin_time_t)sec; + ts.tv_nsec = (nsec & 0x3fffffff); + nanoseconds_to_absolutetime( + (uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime); + clock_absolutetime_interval_to_deadline(abstime, &abstime); + } + + PTHREAD_TRACE(psynch_cvar_kwait, cv, mutex, kwe_flags, 1); + + error = ksyn_wait(ckwq, KSYN_QUEUE_WRITE, cgen, SEQFIT, abstime, + kwe_flags, psynch_cvcontinue, kThreadWaitPThreadCondVar); + // ksyn_wait drops wait queue lock + } + + ksyn_wqunlock(ckwq); + + if (nkwe != NULL) { + zfree(kwe_zone, nkwe); + } +out: + + PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, cv, error, updatebits, 2); + + ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR)); + return error; +} + + +void __dead2 +psynch_cvcontinue(void *parameter, wait_result_t result) +{ + uthread_t uth = current_uthread(); + ksyn_wait_queue_t ckwq = parameter; + ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth); + + int error = _wait_result_to_errno(result); + if (error != 0) { + ksyn_wqlock(ckwq); + /* just in case it got woken up as we were granting */ + int retval = kwe->kwe_psynchretval; + pthread_kern->uthread_set_returnval(uth, retval); + + if (kwe->kwe_kwqqueue) { + ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe); + } + if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) { + /* the condition var granted. + * reset the error so that the thread returns back. + */ + error = 0; + /* no need to set any bits just return as cvsig/broad covers this */ + } else { + ckwq->kw_sword += PTHRW_INC; + + /* set C and P bits, in the local error */ + if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) { + PTHREAD_TRACE(psynch_cvar_zeroed, ckwq->kw_addr, + ckwq->kw_lword, ckwq->kw_sword, ckwq->kw_inqueue); + error |= ECVCLEARED; + if (ckwq->kw_inqueue != 0) { + ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 1); + } + ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0; + ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT; + } else { + /* everythig in the queue is a fake entry ? */ + if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) { + error |= ECVPREPOST; + } + } + } + ksyn_wqunlock(ckwq); + + PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr, + error, 0, 3); + } else { + int val = 0; + // PTH_RWL_MTX_WAIT is removed + if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) { + val = PTHRW_INC | PTH_RWS_CV_CBIT; + } + PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr, + val, 0, 4); + pthread_kern->uthread_set_returnval(uth, val); + } + + ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR)); + pthread_kern->unix_syscall_return(error); + __builtin_unreachable(); +} + +/* + * psynch_cvclrprepost: This system call clears pending prepost if present. + */ +int +_psynch_cvclrprepost(__unused proc_t p, user_addr_t cv, uint32_t cvgen, + uint32_t cvugen, uint32_t cvsgen, __unused uint32_t prepocnt, + uint32_t preposeq, uint32_t flags, int *retval) +{ + int error = 0; + int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX); + int wqtype = (mutex ? KSYN_WQTYPE_MTX : KSYN_WQTYPE_CVAR) | KSYN_WQTYPE_INDROP; + ksyn_wait_queue_t kwq = NULL; + + *retval = 0; + + error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype, + &kwq); + if (error != 0) { + return error; + } + + ksyn_wqlock(kwq); + + if (mutex) { + int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK) + == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT; + if (firstfit && kwq->kw_prepost.count) { + if (is_seqlower_eq(kwq->kw_prepost.lseq, cvgen)) { + PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr, + kwq->kw_prepost.lseq, 0, 2); + _kwq_clear_preposted_wakeup(kwq); + } + } + } else { + PTHREAD_TRACE(psynch_cvar_clrprepost, kwq->kw_addr, wqtype, + preposeq, 0); + ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITE, preposeq, 0); + } + + ksyn_wqunlock(kwq); + ksyn_wqrelease(kwq, 1, wqtype); + return error; +} + +/* ***************** pthread_rwlock ************************ */ + +static int +__psynch_rw_lock(int type, user_addr_t rwlock, uint32_t lgenval, + uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval) +{ + uint32_t lockseq = lgenval & PTHRW_COUNT_MASK; + ksyn_wait_queue_t kwq; + int error, prepost_type, kqi; + thread_continue_t tc; + + if (type == PTH_RW_TYPE_READ) { + prepost_type = KW_UNLOCK_PREPOST_READLOCK; + kqi = KSYN_QUEUE_READ; + tc = psynch_rw_rdcontinue; + } else { + prepost_type = KW_UNLOCK_PREPOST_WRLOCK; + kqi = KSYN_QUEUE_WRITE; + tc = psynch_rw_wrcontinue; + } + + error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, + (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK), &kwq); + if (error != 0) { + return error; + } + + ksyn_wqlock(kwq); + _ksyn_check_init(kwq, lgenval); + if (_kwq_handle_interrupted_wakeup(kwq, type, lockseq, retval) || + // handle overlap first as they are not counted against pre_rwwc + // handle_overlap uses the flags in lgenval (vs. lockseq) + _kwq_handle_overlap(kwq, type, lgenval, rw_wc, retval) || + _kwq_handle_preposted_wakeup(kwq, prepost_type, lockseq, retval)) { + ksyn_wqunlock(kwq); + goto out; + } + + block_hint_t block_hint = type == PTH_RW_TYPE_READ ? + kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite; + error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, 0, tc, block_hint); + // ksyn_wait drops wait queue lock +out: + ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK)); + return error; +} + +/* + * psynch_rw_rdlock: This system call is used for psync rwlock readers to block. + */ +int +_psynch_rw_rdlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval, + uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval) +{ + return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc, + flags, retval); +} + +/* + * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block. + */ +int +_psynch_rw_longrdlock(__unused proc_t p, __unused user_addr_t rwlock, + __unused uint32_t lgenval, __unused uint32_t ugenval, + __unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval) +{ + return ESRCH; +} + + +/* + * psynch_rw_wrlock: This system call is used for psync rwlock writers to block. + */ +int +_psynch_rw_wrlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval, + uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval) +{ + return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval, + rw_wc, flags, retval); +} + +/* + * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block. + */ +int +_psynch_rw_yieldwrlock(__unused proc_t p, __unused user_addr_t rwlock, + __unused uint32_t lgenval, __unused uint32_t ugenval, + __unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval) +{ + return ESRCH; +} + +/* + * psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate + * reader/writer variety lock. + */ +int +_psynch_rw_unlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval, + uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval) +{ + int error = 0; + ksyn_wait_queue_t kwq; + uint32_t updatebits = 0; + int diff; + uint32_t count = 0; + uint32_t curgen = lgenval & PTHRW_COUNT_MASK; + int clearedkflags = 0; + + error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, + (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq); + if (error != 0) { + return(error); + } + + ksyn_wqlock(kwq); + int isinit = _ksyn_check_init(kwq, lgenval); + + /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */ + if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && + (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) { + error = 0; + goto out; + } + + /* If L-U != num of waiters, then it needs to be preposted or spr */ + diff = find_diff(lgenval, ugenval); + + if (find_seq_till(kwq, curgen, diff, &count) == 0) { + if ((count == 0) || (count < (uint32_t)diff)) + goto prepost; + } + + /* no prepost and all threads are in place, reset the bit */ + if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){ + kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED; + clearedkflags = 1; + } + + /* can handle unlock now */ + + _kwq_clear_preposted_wakeup(kwq); + + error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0); +#if __TESTPANICS__ + if (error != 0) + panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error); +#endif /* __TESTPANICS__ */ +out: + if (error == 0) { + /* update bits?? */ + *retval = updatebits; + } + + // If any of the wakeups failed because they + // already returned to userspace because of a signal then we need to ensure + // that the reset state is not cleared when that thread returns. Otherwise, + // _pthread_rwlock_lock will clear the interrupted state before it is read. + if (clearedkflags != 0 && kwq->kw_intr.count > 0) { + kwq->kw_kflags |= KSYN_KWF_INITCLEARED; + } + + ksyn_wqunlock(kwq); + pthread_kern->psynch_wait_cleanup(); + ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK)); + + return(error); + +prepost: + /* update if the new seq is higher than prev prepost, or first set */ + if (is_rws_sbit_set(kwq->kw_prepost.sseq) || + is_seqhigher_eq(rw_wc, kwq->kw_prepost.sseq)) { + _kwq_mark_preposted_wakeup(kwq, diff - count, curgen, rw_wc); + updatebits = lgenval; /* let this not do unlock handling */ + } + error = 0; + goto out; +} + + +/* ************************************************************************** */ +void +pth_global_hashinit(void) +{ + pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash); +} + +void +_pth_proc_hashinit(proc_t p) +{ +#ifdef __DARLING__ + void *ptr = hashinit(PTH_HASHSIZE, M_PROC, &pthhash); +#else + void *ptr = hashinit(PTH_HASHSIZE, M_PCB, &pthhash); +#endif + if (ptr == NULL) { + panic("pth_proc_hashinit: hash init returned 0\n"); + } + + pthread_kern->proc_set_pthhash(p, ptr); +} + + +static int +ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, + ksyn_wait_queue_t *out_kwq, struct pthhashhead **out_hashptr, + uint64_t object, uint64_t offset) +{ + int res = 0; + ksyn_wait_queue_t kwq; + struct pthhashhead *hashptr; + if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) { + hashptr = pth_glob_hashtbl; + LIST_FOREACH(kwq, &hashptr[object & pthhash], kw_hash) { + if (kwq->kw_object == object && kwq->kw_offset == offset) { + break; + } + } + } else { + hashptr = pthread_kern->proc_get_pthhash(p); + LIST_FOREACH(kwq, &hashptr[uaddr & pthhash], kw_hash) { + if (kwq->kw_addr == uaddr) { + break; + } + } + } + *out_kwq = kwq; + *out_hashptr = hashptr; + return res; +} + +void +_pth_proc_hashdelete(proc_t p) +{ + struct pthhashhead * hashptr; + ksyn_wait_queue_t kwq; + unsigned long hashsize = pthhash + 1; + unsigned long i; + + hashptr = pthread_kern->proc_get_pthhash(p); + pthread_kern->proc_set_pthhash(p, NULL); + if (hashptr == NULL) { + return; + } + + pthread_list_lock(); + for(i= 0; i < hashsize; i++) { + while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) { + if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) { + kwq->kw_pflags &= ~KSYN_WQ_INHASH; + LIST_REMOVE(kwq, kw_hash); + } + if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) { + kwq->kw_pflags &= ~KSYN_WQ_FLIST; + LIST_REMOVE(kwq, kw_list); + } + pthread_list_unlock(); + /* release fake entries if present for cvars */ + if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0)) + ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE]); + _kwq_destroy(kwq); + pthread_list_lock(); + } + } + pthread_list_unlock(); + FREE(hashptr, M_PROC); +} + +/* no lock held for this as the waitqueue is getting freed */ +void +ksyn_freeallkwe(ksyn_queue_t kq) +{ + ksyn_waitq_element_t kwe; + while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) { + TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list); + if (kwe->kwe_state != KWE_THREAD_INWAIT) { + zfree(kwe_zone, kwe); + } + } +} + +static inline void +_kwq_report_inuse(ksyn_wait_queue_t kwq) +{ + if (kwq->kw_prepost.count != 0) { + __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [pre %d:0x%x:0x%x]", + (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_prepost.count, + kwq->kw_prepost.lseq, kwq->kw_prepost.sseq); + PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, + kwq->kw_type, 1, 0); + } + if (kwq->kw_intr.count != 0) { + __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [intr %d:0x%x:0x%x:0x%x]", + (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_intr.count, + kwq->kw_intr.type, kwq->kw_intr.seq, + kwq->kw_intr.returnbits); + PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, + kwq->kw_type, 2, 0); + } + if (kwq->kw_iocount) { + __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [ioc %d:%d]", + (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_iocount, + kwq->kw_dropcount); + PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, + kwq->kw_type, 3, 0); + } + if (kwq->kw_inqueue) { + __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [inq %d:%d]", + (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_inqueue, + kwq->kw_fakecount); + PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, kwq->kw_type, + 4, 0); + } +} + +/* find kernel waitqueue, if not present create one. Grants a reference */ +int +ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, + int flags, int wqtype, ksyn_wait_queue_t *kwqp) +{ + int res = 0; + ksyn_wait_queue_t kwq = NULL; + ksyn_wait_queue_t nkwq = NULL; + struct pthhashhead *hashptr; + proc_t p = current_proc(); + + uint64_t object = 0, offset = 0; + if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) { + res = ksyn_findobj(uaddr, &object, &offset); + hashptr = pth_glob_hashtbl; + } else { + hashptr = pthread_kern->proc_get_pthhash(p); + } + + while (res == 0) { + pthread_list_lock(); + res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr, + object, offset); + if (res != 0) { + pthread_list_unlock(); + break; + } + if (kwq == NULL && nkwq == NULL) { + // Drop the lock to allocate a new kwq and retry. + pthread_list_unlock(); + + nkwq = (ksyn_wait_queue_t)zalloc(kwq_zone); + bzero(nkwq, sizeof(struct ksyn_wait_queue)); +#ifdef __DARLING__ + init_waitqueue_head(&nkwq->linux_wq); +#endif + int i; + for (i = 0; i < KSYN_QUEUE_MAX; i++) { + ksyn_queue_init(&nkwq->kw_ksynqueues[i]); + } + lck_spin_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr); + continue; + } else if (kwq == NULL && nkwq != NULL) { + // Still not found, add the new kwq to the hash. + kwq = nkwq; + nkwq = NULL; // Don't free. + if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) { + kwq->kw_pflags |= KSYN_WQ_SHARED; + LIST_INSERT_HEAD(&hashptr[object & pthhash], kwq, kw_hash); + } else { + LIST_INSERT_HEAD(&hashptr[uaddr & pthhash], kwq, kw_hash); + } + kwq->kw_pflags |= KSYN_WQ_INHASH; + } else if (kwq != NULL) { + // Found an existing kwq, use it. + if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) { + LIST_REMOVE(kwq, kw_list); + kwq->kw_pflags &= ~KSYN_WQ_FLIST; + } + if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) { + if (!_kwq_is_used(kwq)) { + if (kwq->kw_iocount == 0) { + kwq->kw_type = 0; // mark for reinitialization + } else if (kwq->kw_iocount == 1 && + kwq->kw_dropcount == kwq->kw_iocount) { + /* if all users are unlockers then wait for it to finish */ + kwq->kw_pflags |= KSYN_WQ_WAITING; + // Drop the lock and wait for the kwq to be free. +#ifdef __DARLING__ + res = msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0); +#else + (void)msleep(&kwq->kw_pflags, pthread_list_mlock, + PDROP, "ksyn_wqfind", 0); +#endif + continue; + } else { + _kwq_report_inuse(kwq); + res = EINVAL; + } + } else { + _kwq_report_inuse(kwq); + res = EINVAL; + } + } + } + if (res == 0) { + if (kwq->kw_type == 0) { + kwq->kw_addr = uaddr; + kwq->kw_object = object; + kwq->kw_offset = offset; + kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK); + CLEAR_REINIT_BITS(kwq); + kwq->kw_lword = mgen; + kwq->kw_uword = ugen; + kwq->kw_sword = sgen; + kwq->kw_owner = THREAD_NULL; + kwq->kw_kflags = 0; + kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED; + PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_START, uaddr, + kwq->kw_type, kwq, 0); + PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_END, uaddr, + mgen, ugen, sgen); + } + kwq->kw_iocount++; + if (wqtype == KSYN_WQTYPE_MUTEXDROP) { + kwq->kw_dropcount++; + } + } + pthread_list_unlock(); + break; + } + if (kwqp != NULL) { + *kwqp = kwq; + } + if (nkwq) { + _kwq_destroy(nkwq); + } + return res; +} + +/* Reference from find is dropped here. Starts the free process if needed */ +void +ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype) +{ + uint64_t deadline; + ksyn_wait_queue_t free_elem = NULL; + + pthread_list_lock(); + if (wqtype == KSYN_WQTYPE_MUTEXDROP) { + kwq->kw_dropcount--; + } + if (--kwq->kw_iocount == 0) { + if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) { + /* some one is waiting for the waitqueue, wake them up */ + kwq->kw_pflags &= ~KSYN_WQ_WAITING; + wakeup(&kwq->kw_pflags); + } + + if (!_kwq_is_used(kwq)) { + if (kwq->kw_turnstile) { + panic("kw_turnstile still non-null upon release"); + } + + PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_START, + kwq->kw_addr, kwq->kw_type, qfreenow, 0); + PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_END, + kwq->kw_addr, kwq->kw_lword, kwq->kw_uword, kwq->kw_sword); + + if (qfreenow == 0) { + microuptime(&kwq->kw_ts); + LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list); + kwq->kw_pflags |= KSYN_WQ_FLIST; + if (psynch_cleanupset == 0) { + struct timeval t; + microuptime(&t); + t.tv_sec += KSYN_CLEANUP_DEADLINE; + deadline = tvtoabstime(&t); + thread_call_enter_delayed(psynch_thcall, deadline); + psynch_cleanupset = 1; + } + } else { + kwq->kw_pflags &= ~KSYN_WQ_INHASH; + LIST_REMOVE(kwq, kw_hash); + free_elem = kwq; + } + } + } + pthread_list_unlock(); + if (free_elem != NULL) { + _kwq_destroy(free_elem); + } +} + +/* responsible to free the waitqueues */ +void +psynch_wq_cleanup(__unused void *param, __unused void * param1) +{ + ksyn_wait_queue_t kwq, tmp; + struct timeval t; + int reschedule = 0; + uint64_t deadline = 0; + LIST_HEAD(, ksyn_wait_queue) freelist; + LIST_INIT(&freelist); + + pthread_list_lock(); + + microuptime(&t); + + LIST_FOREACH(kwq, &pth_free_list, kw_list) { + if (_kwq_is_used(kwq) || kwq->kw_iocount != 0) { + // still in use + continue; + } + __darwin_time_t diff = t.tv_sec - kwq->kw_ts.tv_sec; + if (diff < 0) + diff *= -1; + if (diff >= KSYN_CLEANUP_DEADLINE) { + kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH); + LIST_REMOVE(kwq, kw_hash); + LIST_REMOVE(kwq, kw_list); + LIST_INSERT_HEAD(&freelist, kwq, kw_list); + } else { + reschedule = 1; + } + + } + if (reschedule != 0) { + t.tv_sec += KSYN_CLEANUP_DEADLINE; + deadline = tvtoabstime(&t); + thread_call_enter_delayed(psynch_thcall, deadline); + psynch_cleanupset = 1; + } else { + psynch_cleanupset = 0; + } + pthread_list_unlock(); + + LIST_FOREACH_SAFE(kwq, &freelist, kw_list, tmp) { + _kwq_destroy(kwq); + } +} + +static int +_wait_result_to_errno(wait_result_t result) +{ + int res = 0; + switch (result) { + case THREAD_TIMED_OUT: + res = ETIMEDOUT; + break; + case THREAD_INTERRUPTED: + res = EINTR; + break; + } + return res; +} + +int +ksyn_wait(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi, uint32_t lockseq, + int fit, uint64_t abstime, uint16_t kwe_flags, + thread_continue_t continuation, block_hint_t block_hint) +{ + thread_t th = current_thread(); + uthread_t uth = pthread_kern->get_bsdthread_info(th); + struct turnstile **tstore = NULL; + int res; + + assert(continuation != THREAD_CONTINUE_NULL); + + ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth); + bzero(kwe, sizeof(*kwe)); + kwe->kwe_count = 1; + kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK; + kwe->kwe_state = KWE_THREAD_INWAIT; + kwe->kwe_uth = uth; + kwe->kwe_thread = th; + kwe->kwe_flags = kwe_flags; + + res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit); + if (res != 0) { + //panic("psynch_rw_wrlock: failed to enqueue\n"); // XXX + ksyn_wqunlock(kwq); + return res; + } + + PTHREAD_TRACE(psynch_mutex_kwqwait, kwq->kw_addr, kwq->kw_inqueue, + kwq->kw_prepost.count, kwq->kw_intr.count); + + if (_kwq_use_turnstile(kwq)) { + // pthread mutexes and rwlocks both (at least sometimes) know their + // owner and can use turnstiles. Otherwise, we pass NULL as the + // tstore to the shims so they wait on the global waitq. + tstore = &kwq->kw_turnstile; + } + + pthread_kern->psynch_wait_prepare((uintptr_t)kwq, tstore, kwq->kw_owner, + block_hint, abstime); + + ksyn_wqunlock(kwq); + + if (tstore) { + pthread_kern->psynch_wait_update_complete(kwq->kw_turnstile); + } + + thread_block_parameter(continuation, kwq); + + // NOT REACHED + panic("ksyn_wait continuation returned"); + __builtin_unreachable(); +} + +kern_return_t +ksyn_signal(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi, + ksyn_waitq_element_t kwe, uint32_t updateval) +{ + kern_return_t ret; + struct turnstile **tstore = NULL; + + // If no wait element was specified, wake the first. + if (!kwe) { + kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[kqi].ksynq_kwelist); + if (!kwe) { + panic("ksyn_signal: panic signaling empty queue"); + } + } + + if (kwe->kwe_state != KWE_THREAD_INWAIT) { + panic("ksyn_signal: panic signaling non-waiting element"); + } + + ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe); + kwe->kwe_psynchretval = updateval; + + if (_kwq_use_turnstile(kwq)) { + tstore = &kwq->kw_turnstile; + } + + ret = pthread_kern->psynch_wait_wakeup(kwq, kwe, tstore); + + if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) { + panic("ksyn_signal: panic waking up thread %x\n", ret); + } + return ret; +} + +#ifdef __DARLING__ +#undef current_map +#endif + +int +ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp) +{ +#ifdef __DARLING__ + return ENOTSUP; // TODO vm_map_page_info +#else + kern_return_t ret; + vm_page_info_basic_data_t info; + mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT; + ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr, + VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count); + if (ret != KERN_SUCCESS) { + return EINVAL; + } + + if (objectp != NULL) { + *objectp = (uint64_t)info.object_id; + } + if (offsetp != NULL) { + *offsetp = (uint64_t)info.offset; + } + + return(0); +#endif +} + + +/* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */ +int +kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, + int *typep, uint32_t lowest[]) +{ + uint32_t kw_fr, kw_fwr, low; + int type = 0, lowtype, typenum[2] = { 0 }; + uint32_t numbers[2] = { 0 }; + int count = 0, i; + + if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || + ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) { + type |= PTH_RWSHFT_TYPE_READ; + /* read entries are present */ + if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) { + kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum; + if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && + (is_seqlower(premgen, kw_fr) != 0)) + kw_fr = premgen; + } else + kw_fr = premgen; + + lowest[KSYN_QUEUE_READ] = kw_fr; + numbers[count]= kw_fr; + typenum[count] = PTH_RW_TYPE_READ; + count++; + } else + lowest[KSYN_QUEUE_READ] = 0; + + if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) || + ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) { + type |= PTH_RWSHFT_TYPE_WRITE; + /* read entries are present */ + if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) { + kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum; + if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && + (is_seqlower(premgen, kw_fwr) != 0)) + kw_fwr = premgen; + } else + kw_fwr = premgen; + + lowest[KSYN_QUEUE_WRITE] = kw_fwr; + numbers[count]= kw_fwr; + typenum[count] = PTH_RW_TYPE_WRITE; + count++; + } else + lowest[KSYN_QUEUE_WRITE] = 0; + +#if __TESTPANICS__ + if (count == 0) + panic("nothing in the queue???\n"); +#endif /* __TESTPANICS__ */ + + low = numbers[0]; + lowtype = typenum[0]; + if (count > 1) { + for (i = 1; i< count; i++) { + if (is_seqlower(numbers[i] , low) != 0) { + low = numbers[i]; + lowtype = typenum[i]; + } + } + } + type |= lowtype; + + if (typep != 0) + *typep = type; + return(0); +} + +/* wakeup readers to upto the writer limits */ +int +ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, + uint32_t updatebits, int *wokenp) +{ + ksyn_queue_t kq; + int failedwakeup = 0; + int numwoken = 0; + kern_return_t kret = KERN_SUCCESS; + uint32_t lbits = 0; + + lbits = updatebits; + + kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ]; + while ((kq->ksynq_count != 0) && + (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) { + kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits); + if (kret == KERN_NOT_WAITING) { + failedwakeup++; + } + numwoken++; + } + + if (wokenp != NULL) + *wokenp = numwoken; + return(failedwakeup); +} + + +/* + * This handles the unlock grants for next set on rw_unlock() or on arrival + * of all preposted waiters. + */ +int +kwq_handle_unlock(ksyn_wait_queue_t kwq, __unused uint32_t mgen, uint32_t rw_wc, + uint32_t *updatep, int flags, int *blockp, uint32_t premgen) +{ + uint32_t low_writer, limitrdnum; + int rwtype, error=0; + int allreaders, nfailed; + uint32_t updatebits=0, numneeded = 0;; + int prepost = flags & KW_UNLOCK_PREPOST; + thread_t preth = THREAD_NULL; + ksyn_waitq_element_t kwe; + uthread_t uth; + thread_t th; + int woken = 0; + int block = 1; + uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */ + kern_return_t kret = KERN_SUCCESS; + ksyn_queue_t kq; + int curthreturns = 0; + + if (prepost != 0) { + preth = current_thread(); + } + + kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ]; + kwq->kw_lastseqword = rw_wc; + kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK); + kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD; + + error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest); +#if __TESTPANICS__ + if (error != 0) + panic("rwunlock: cannot fails to slot next round of threads"); +#endif /* __TESTPANICS__ */ + + low_writer = lowest[KSYN_QUEUE_WRITE]; + + allreaders = 0; + updatebits = 0; + + switch (rwtype & PTH_RW_TYPE_MASK) { + case PTH_RW_TYPE_READ: { + // XXX + /* what about the preflight which is LREAD or READ ?? */ + if ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) { + if (rwtype & PTH_RWSHFT_TYPE_WRITE) { + updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT); + } + } + limitrdnum = 0; + if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) { + limitrdnum = low_writer; + } else { + allreaders = 1; + } + + numneeded = 0; + + if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) { + limitrdnum = low_writer; + numneeded = ksyn_queue_count_tolowest(kq, limitrdnum); + if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) { + curthreturns = 1; + numneeded += 1; + } + } else { + // no writers at all + // no other waiters only readers + kwq->kw_kflags |= KSYN_KWF_OVERLAP_GUARD; + numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count; + if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) { + curthreturns = 1; + numneeded += 1; + } + } + + updatebits += (numneeded << PTHRW_COUNT_SHIFT); + + kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits; + + if (curthreturns != 0) { + block = 0; + uth = current_uthread(); + kwe = pthread_kern->uthread_get_uukwe(uth); + kwe->kwe_psynchretval = updatebits; + } + + + nfailed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders, + updatebits, &woken); + if (nfailed != 0) { + _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_READ, nfailed, + limitrdnum, updatebits); + } + + error = 0; + + if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) && + ((updatebits & PTH_RWL_WBIT) == 0)) { + panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits); + } + } + break; + + case PTH_RW_TYPE_WRITE: { + + /* only one thread is goin to be granted */ + updatebits |= (PTHRW_INC); + updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT; + + if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) { + block = 0; + if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) { + updatebits |= PTH_RWL_WBIT; + } + th = preth; + uth = pthread_kern->get_bsdthread_info(th); + kwe = pthread_kern->uthread_get_uukwe(uth); + kwe->kwe_psynchretval = updatebits; + } else { + /* we are not granting writelock to the preposting thread */ + /* if there are writers present or the preposting write thread then W bit is to be set */ + if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count > 1 || + (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) { + updatebits |= PTH_RWL_WBIT; + } + /* setup next in the queue */ + kret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, NULL, updatebits); + if (kret == KERN_NOT_WAITING) { + _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1, + low_writer, updatebits); + } + error = 0; + } + kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits; + if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != + (PTH_RWL_KBIT | PTH_RWL_EBIT)) { + panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits); + } + } + break; + + default: + panic("rwunlock: invalid type for lock grants"); + + }; + + if (updatep != NULL) + *updatep = updatebits; + if (blockp != NULL) + *blockp = block; + return(error); +} + +/************* Indiv queue support routines ************************/ +void +ksyn_queue_init(ksyn_queue_t kq) +{ + TAILQ_INIT(&kq->ksynq_kwelist); + kq->ksynq_count = 0; + kq->ksynq_firstnum = 0; + kq->ksynq_lastnum = 0; +} + +int +ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, + uint32_t mgen, int fit) +{ + ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi]; + uint32_t lockseq = mgen & PTHRW_COUNT_MASK; + int res = 0; + + if (kwe->kwe_kwqqueue != NULL) { + panic("adding enqueued item to another queue"); + } + + if (kq->ksynq_count == 0) { + TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list); + kq->ksynq_firstnum = lockseq; + kq->ksynq_lastnum = lockseq; + } else if (fit == FIRSTFIT) { + /* TBD: if retry bit is set for mutex, add it to the head */ + /* firstfit, arriving order */ + TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list); + if (is_seqlower(lockseq, kq->ksynq_firstnum)) { + kq->ksynq_firstnum = lockseq; + } + if (is_seqhigher(lockseq, kq->ksynq_lastnum)) { + kq->ksynq_lastnum = lockseq; + } + } else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) { + /* During prepost when a thread is getting cancelled, we could have + * two with same seq */ + res = EBUSY; + if (kwe->kwe_state == KWE_THREAD_PREPOST) { + ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq); + if (tmp != NULL && tmp->kwe_uth != NULL && + pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) { + TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list); + res = 0; + } + } + } else if (is_seqlower(kq->ksynq_lastnum, lockseq)) { // XXX is_seqhigher + TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list); + kq->ksynq_lastnum = lockseq; + } else if (is_seqlower(lockseq, kq->ksynq_firstnum)) { + TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list); + kq->ksynq_firstnum = lockseq; + } else { + ksyn_waitq_element_t q_kwe, r_kwe; + + res = ESRCH; + TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) { + if (is_seqhigher(q_kwe->kwe_lockseq, lockseq)) { + TAILQ_INSERT_BEFORE(q_kwe, kwe, kwe_list); + res = 0; + break; + } + } + } + + if (res == 0) { + kwe->kwe_kwqqueue = kwq; + kq->ksynq_count++; + kwq->kw_inqueue++; + update_low_high(kwq, lockseq); + } + return res; +} + +void +ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, + ksyn_waitq_element_t kwe) +{ + if (kq->ksynq_count == 0) { + panic("removing item from empty queue"); + } + + if (kwe->kwe_kwqqueue != kwq) { + panic("removing item from wrong queue"); + } + + TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list); + kwe->kwe_list.tqe_next = NULL; + kwe->kwe_list.tqe_prev = NULL; + kwe->kwe_kwqqueue = NULL; + + if (--kq->ksynq_count > 0) { + ksyn_waitq_element_t tmp; + tmp = TAILQ_FIRST(&kq->ksynq_kwelist); + kq->ksynq_firstnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK; + tmp = TAILQ_LAST(&kq->ksynq_kwelist, ksynq_kwelist_head); + kq->ksynq_lastnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK; + } else { + kq->ksynq_firstnum = 0; + kq->ksynq_lastnum = 0; + } + + if (--kwq->kw_inqueue > 0) { + uint32_t curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK; + if (kwq->kw_lowseq == curseq) { + kwq->kw_lowseq = find_nextlowseq(kwq); + } + if (kwq->kw_highseq == curseq) { + kwq->kw_highseq = find_nexthighseq(kwq); + } + } else { + kwq->kw_lowseq = 0; + kwq->kw_highseq = 0; + } +} + +ksyn_waitq_element_t +ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, + uint32_t seq) +{ + ksyn_waitq_element_t kwe; + + // XXX: should stop searching when higher sequence number is seen + TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) { + if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == seq) { + return kwe; + } + } + return NULL; +} + +/* find the thread at the target sequence (or a broadcast/prepost at or above) */ +ksyn_waitq_element_t +ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen) +{ + ksyn_waitq_element_t result = NULL; + ksyn_waitq_element_t kwe; + uint32_t lgen = (cgen & PTHRW_COUNT_MASK); + + TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) { + if (is_seqhigher_eq(kwe->kwe_lockseq, cgen)) { + result = kwe; + + // KWE_THREAD_INWAIT must be strictly equal + if (kwe->kwe_state == KWE_THREAD_INWAIT && + (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) { + result = NULL; + } + break; + } + } + return result; +} + +/* look for a thread at lockseq, a */ +ksyn_waitq_element_t +ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, + uint32_t uptoseq, uint32_t signalseq) +{ + ksyn_waitq_element_t result = NULL; + ksyn_waitq_element_t q_kwe, r_kwe; + + // XXX + /* case where wrap in the tail of the queue exists */ + TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) { + if (q_kwe->kwe_state == KWE_THREAD_PREPOST) { + if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) { + return result; + } + } + if (q_kwe->kwe_state == KWE_THREAD_PREPOST | + q_kwe->kwe_state == KWE_THREAD_BROADCAST) { + /* match any prepost at our same uptoseq or any broadcast above */ + if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) { + continue; + } + return q_kwe; + } else if (q_kwe->kwe_state == KWE_THREAD_INWAIT) { + /* + * Match any (non-cancelled) thread at or below our upto sequence - + * but prefer an exact match to our signal sequence (if present) to + * keep exact matches happening. + */ + if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) { + return result; + } + if (q_kwe->kwe_kwqqueue == kwq) { + if (!pthread_kern->uthread_is_cancelled(q_kwe->kwe_uth)) { + /* if equal or higher than our signal sequence, return this one */ + if (is_seqhigher_eq(q_kwe->kwe_lockseq, signalseq)) { + return q_kwe; + } + + /* otherwise, just remember this eligible thread and move on */ + if (result == NULL) { + result = q_kwe; + } + } + } + } else { + panic("ksyn_queue_find_signalseq(): unknown wait queue element type (%d)\n", q_kwe->kwe_state); + } + } + return result; +} + +void +ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all) +{ + ksyn_waitq_element_t kwe; + uint32_t tseq = upto & PTHRW_COUNT_MASK; + ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi]; + uint32_t freed = 0, signaled = 0; + + PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_START, kwq->kw_addr, + kqi, upto, all); + + while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) { + if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) { + break; + } + if (kwe->kwe_state == KWE_THREAD_INWAIT) { + /* + * This scenario is typically noticed when the cvar is + * reinited and the new waiters are waiting. We can + * return them as spurious wait so the cvar state gets + * reset correctly. + */ + + PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe, + kwq->kw_inqueue, 1); + + /* skip canceled ones */ + /* wake the rest */ + /* set M bit to indicate to waking CV to retun Inc val */ + (void)ksyn_signal(kwq, kqi, kwe, + PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT); + signaled++; + } else { + PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe, + kwq->kw_inqueue, 2); + ksyn_queue_remove_item(kwq, kq, kwe); + zfree(kwe_zone, kwe); + kwq->kw_fakecount--; + freed++; + } + } + + PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_END, kwq->kw_addr, freed, + signaled, kwq->kw_inqueue); +} + +/*************************************************************************/ + +void +update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq) +{ + if (kwq->kw_inqueue == 1) { + kwq->kw_lowseq = lockseq; + kwq->kw_highseq = lockseq; + } else { + if (is_seqlower(lockseq, kwq->kw_lowseq)) { + kwq->kw_lowseq = lockseq; + } + if (is_seqhigher(lockseq, kwq->kw_highseq)) { + kwq->kw_highseq = lockseq; + } + } +} + +uint32_t +find_nextlowseq(ksyn_wait_queue_t kwq) +{ + uint32_t lowest = 0; + int first = 1; + int i; + + for (i = 0; i < KSYN_QUEUE_MAX; i++) { + if (kwq->kw_ksynqueues[i].ksynq_count > 0) { + uint32_t current = kwq->kw_ksynqueues[i].ksynq_firstnum; + if (first || is_seqlower(current, lowest)) { + lowest = current; + first = 0; + } + } + } + + return lowest; +} + +uint32_t +find_nexthighseq(ksyn_wait_queue_t kwq) +{ + uint32_t highest = 0; + int first = 1; + int i; + + for (i = 0; i < KSYN_QUEUE_MAX; i++) { + if (kwq->kw_ksynqueues[i].ksynq_count > 0) { + uint32_t current = kwq->kw_ksynqueues[i].ksynq_lastnum; + if (first || is_seqhigher(current, highest)) { + highest = current; + first = 0; + } + } + } + + return highest; +} + +int +find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, + uint32_t *countp) +{ + int i; + uint32_t count = 0; + + for (i = 0; i< KSYN_QUEUE_MAX; i++) { + count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto); + if (count >= nwaiters) { + break; + } + } + + if (countp != NULL) { + *countp = count; + } + + if (count == 0) { + return 0; + } else if (count >= nwaiters) { + return 1; + } else { + return 0; + } +} + + +uint32_t +ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto) +{ + uint32_t i = 0; + ksyn_waitq_element_t kwe, newkwe; + + if (kq->ksynq_count == 0 || is_seqhigher(kq->ksynq_firstnum, upto)) { + return 0; + } + if (upto == kq->ksynq_firstnum) { + return 1; + } + TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) { + uint32_t curval = (kwe->kwe_lockseq & PTHRW_COUNT_MASK); + if (is_seqhigher(curval, upto)) { + break; + } + ++i; + if (upto == curval) { + break; + } + } + return i; +} + +/* handles the cond broadcast of cvar and returns number of woken threads and bits for syscall return */ +void +ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep) +{ + ksyn_waitq_element_t kwe, newkwe; + uint32_t updatebits = 0; + ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE]; + + struct ksyn_queue kfreeq; + ksyn_queue_init(&kfreeq); + + PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_START, ckwq->kw_addr, upto, + ckwq->kw_inqueue, 0); + +retry: + TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) { + if (is_seqhigher(kwe->kwe_lockseq, upto)) { + // outside our range + break; + } + + if (kwe->kwe_state == KWE_THREAD_INWAIT) { + // Wake only non-canceled threads waiting on this CV. + if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) { + PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe, 0, 1); + (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT); + updatebits += PTHRW_INC; + } + } else if (kwe->kwe_state == KWE_THREAD_BROADCAST || + kwe->kwe_state == KWE_THREAD_PREPOST) { + PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe, + kwe->kwe_state, 2); + ksyn_queue_remove_item(ckwq, kq, kwe); + TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list); + ckwq->kw_fakecount--; + } else { + panic("unknown kwe state\n"); + } + } + + /* Need to enter a broadcast in the queue (if not already at L == S) */ + + if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) { + PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, ckwq->kw_lword, + ckwq->kw_sword, 3); + + newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist); + if (newkwe == NULL) { + ksyn_wqunlock(ckwq); + newkwe = (ksyn_waitq_element_t)zalloc(kwe_zone); + TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list); + ksyn_wqlock(ckwq); + goto retry; + } else { + TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list); + ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto); + PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, newkwe, 0, 4); + } + } + + // free up any remaining things stumbled across above + while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) { + TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list); + zfree(kwe_zone, kwe); + } + + PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_END, ckwq->kw_addr, + updatebits, 0, 0); + + if (updatep != NULL) { + *updatep |= updatebits; + } +} + +void +ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits) +{ + if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) { + if (ckwq->kw_inqueue != 0) { + /* FREE THE QUEUE */ + ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 0); +#if __TESTPANICS__ + if (ckwq->kw_inqueue != 0) + panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S"); +#endif /* __TESTPANICS__ */ + } + ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0; + ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT; + *updatebits |= PTH_RWS_CV_CBIT; + } else if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) { + // only fake entries are present in the queue + *updatebits |= PTH_RWS_CV_PBIT; + } +} + +void +psynch_zoneinit(void) +{ + kwq_zone = zinit(sizeof(struct ksyn_wait_queue), + 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue"); + kwe_zone = zinit(sizeof(struct ksyn_waitq_element), + 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element"); +} + +void * +_pthread_get_thread_kwq(thread_t thread) +{ + assert(thread); + struct uthread * uthread = pthread_kern->get_bsdthread_info(thread); + assert(uthread); + ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uthread); + assert(kwe); + ksyn_wait_queue_t kwq = kwe->kwe_kwqqueue; + return kwq; +} + +/* This function is used by stackshot to determine why a thread is blocked, and report + * who owns the object that the thread is blocked on. It should *only* be called if the + * `block_hint' field in the relevant thread's struct is populated with something related + * to pthread sync objects. + */ +void +_pthread_find_owner(thread_t thread, + struct stackshot_thread_waitinfo * waitinfo) +{ + ksyn_wait_queue_t kwq = _pthread_get_thread_kwq(thread); + switch (waitinfo->wait_type) { + case kThreadWaitPThreadMutex: + assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX); + waitinfo->owner = thread_tid(kwq->kw_owner); + waitinfo->context = kwq->kw_addr; + break; + /* Owner of rwlock not stored in kernel space due to races. Punt + * and hope that the userspace address is helpful enough. */ + case kThreadWaitPThreadRWLockRead: + case kThreadWaitPThreadRWLockWrite: + assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK); + waitinfo->owner = 0; + waitinfo->context = kwq->kw_addr; + break; + /* Condvars don't have owners, so just give the userspace address. */ + case kThreadWaitPThreadCondVar: + assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR); + waitinfo->owner = 0; + waitinfo->context = kwq->kw_addr; + break; + case kThreadWaitNone: + default: + waitinfo->owner = 0; + waitinfo->context = 0; + break; + } +} diff --git a/dthread/kern_trace.h b/dthread/kern_trace.h new file mode 100644 index 0000000..2e59edc --- /dev/null +++ b/dthread/kern_trace.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _KERN_TRACE_H_ +#define _KERN_TRACE_H_ + +/* pthread kext, or userspace, kdebug trace points. Defined here and output to + * /usr/share/misc/pthread.codes during build. + */ + +// userspace trace points force slow-paths, so must be compiled in +#define ENABLE_USERSPACE_TRACE 0 + +// pthread tracing subclasses +# define _TRACE_SUB_DEFAULT 0 +# define _TRACE_SUB_WORKQUEUE 1 +// WQ_TRACE_REQUESTS_SUBCLASS is 2, in xnu +# define _TRACE_SUB_MUTEX 3 +# define _TRACE_SUB_CONDVAR 4 + +#ifndef _PTHREAD_BUILDING_CODES_ + +#include + +#ifndef DBG_PTHREAD +#define DBG_PTHREAD DBG_WORKQUEUE +#endif + +#if KERNEL +#include + +extern uint32_t pthread_debug_tracing; + +static __unused void* +VM_UNSLIDE(void* ptr) +{ + vm_offset_t unslid_ptr; + vm_kernel_unslide_or_perm_external(ptr, &unslid_ptr); + return (void*)unslid_ptr; +} + +# define PTHREAD_TRACE(x,a,b,c,d) \ + { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, a, b, c, d, 0); } } + +# define PTHREAD_TRACE_WQ(x,a,b,c,d) \ + { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, VM_UNSLIDE(a), b, c, d, 0); } } + +# define PTHREAD_TRACE_WQ_REQ(x,a,b,c,d,e) \ + { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, VM_UNSLIDE(a), VM_UNSLIDE(b), c, d, e); } } + +#else // KERNEL + +#if ENABLE_USERSPACE_TRACE +# include +# define PTHREAD_TRACE(x, a, b, c, d) kdebug_trace(TRACE_##x, a, b, c, d) +#else // ENABLE_USERSPACE_TRACE +# define PTHREAD_TRACE(x, a, b, c, d) do { } while(0) +#endif // ENABLE_USERSPACE_TRACE + +#endif // KERNEL + +# define TRACE_CODE(name, subclass, code) \ + static const int TRACE_##name = KDBG_CODE(DBG_PTHREAD, subclass, code) + +#else // _PTHREAD_BUILDING_CODES_ +/* When not included as a header, this file is pre-processed into perl source to generate + * the pthread.codes file during build. + */ +# define DBG_PTHREAD 9 +# define STR(x) #x + +# define TRACE_CODE(name, subclass, code) \ + printf("0x%x\t%s\n", ((DBG_PTHREAD << 24) | ((subclass & 0xff) << 16) | ((code & 0x3fff) << 2)), STR(name)) +#endif // _PTHREAD_BUILDING_CODES_ + +/* These defines translate into TRACE_ when used in source code, and are + * pre-processed out to a codes file by the build system. + */ + +// "default" trace points +TRACE_CODE(pthread_thread_create, _TRACE_SUB_DEFAULT, 0x10); +TRACE_CODE(pthread_thread_terminate, _TRACE_SUB_DEFAULT, 0x20); +TRACE_CODE(pthread_set_qos_self, _TRACE_SUB_DEFAULT, 0x30); + +// workqueue trace points +TRACE_CODE(wq_pthread_exit, _TRACE_SUB_WORKQUEUE, 0x01); +TRACE_CODE(wq_workqueue_exit, _TRACE_SUB_WORKQUEUE, 0x02); +TRACE_CODE(wq_runthread, _TRACE_SUB_WORKQUEUE, 0x03); +TRACE_CODE(wq_runitem, _TRACE_SUB_WORKQUEUE, 0x04); +TRACE_CODE(wq_thread_block, _TRACE_SUB_WORKQUEUE, 0x9); +TRACE_CODE(wq_thactive_update, _TRACE_SUB_WORKQUEUE, 0xa); +TRACE_CODE(wq_add_timer, _TRACE_SUB_WORKQUEUE, 0xb); +TRACE_CODE(wq_start_add_timer, _TRACE_SUB_WORKQUEUE, 0x0c); +TRACE_CODE(wq_override_start, _TRACE_SUB_WORKQUEUE, 0x12); +TRACE_CODE(wq_override_end, _TRACE_SUB_WORKQUEUE, 0x13); +TRACE_CODE(wq_override_dispatch, _TRACE_SUB_WORKQUEUE, 0x14); +TRACE_CODE(wq_override_reset, _TRACE_SUB_WORKQUEUE, 0x15); +TRACE_CODE(wq_thread_create_failed, _TRACE_SUB_WORKQUEUE, 0x1d); +TRACE_CODE(wq_thread_create, _TRACE_SUB_WORKQUEUE, 0x1f); +TRACE_CODE(wq_run_threadreq, _TRACE_SUB_WORKQUEUE, 0x20); +TRACE_CODE(wq_run_threadreq_mgr_merge, _TRACE_SUB_WORKQUEUE, 0x21); +TRACE_CODE(wq_run_threadreq_req_select, _TRACE_SUB_WORKQUEUE, 0x22); +TRACE_CODE(wq_run_threadreq_thread_select, _TRACE_SUB_WORKQUEUE, 0x23); +TRACE_CODE(wq_thread_reset_priority, _TRACE_SUB_WORKQUEUE, 0x24); +TRACE_CODE(wq_constrained_admission, _TRACE_SUB_WORKQUEUE, 0x25); +TRACE_CODE(wq_wqops_reqthreads, _TRACE_SUB_WORKQUEUE, 0x26); +TRACE_CODE(wq_kevent_reqthreads, _TRACE_SUB_WORKQUEUE, 0x27); +TRACE_CODE(wq_thread_park, _TRACE_SUB_WORKQUEUE, 0x28); +TRACE_CODE(wq_thread_squash, _TRACE_SUB_WORKQUEUE, 0x29); + +// synch trace points +TRACE_CODE(psynch_mutex_ulock, _TRACE_SUB_MUTEX, 0x0); +TRACE_CODE(psynch_mutex_utrylock_failed, _TRACE_SUB_MUTEX, 0x1); +TRACE_CODE(psynch_mutex_uunlock, _TRACE_SUB_MUTEX, 0x2); +TRACE_CODE(psynch_ksyn_incorrect_owner, _TRACE_SUB_MUTEX, 0x3); +TRACE_CODE(psynch_mutex_lock_updatebits, _TRACE_SUB_MUTEX, 0x4); +TRACE_CODE(psynch_mutex_unlock_updatebits, _TRACE_SUB_MUTEX, 0x5); +TRACE_CODE(psynch_mutex_clearprepost, _TRACE_SUB_MUTEX, 0x6); +TRACE_CODE(psynch_mutex_kwqallocate, _TRACE_SUB_MUTEX, 0x7); +TRACE_CODE(psynch_mutex_kwqdeallocate, _TRACE_SUB_MUTEX, 0x8); +TRACE_CODE(psynch_mutex_kwqprepost, _TRACE_SUB_MUTEX, 0x9); +TRACE_CODE(psynch_mutex_markprepost, _TRACE_SUB_MUTEX, 0x10); +TRACE_CODE(psynch_mutex_kwqcollision, _TRACE_SUB_MUTEX, 0x11); +TRACE_CODE(psynch_ffmutex_lock_updatebits, _TRACE_SUB_MUTEX, 0x12); +TRACE_CODE(psynch_ffmutex_unlock_updatebits, _TRACE_SUB_MUTEX, 0x13); +TRACE_CODE(psynch_ffmutex_wake, _TRACE_SUB_MUTEX, 0x14); +TRACE_CODE(psynch_mutex_kwqsignal, _TRACE_SUB_MUTEX, 0x15); +TRACE_CODE(psynch_ffmutex_wait, _TRACE_SUB_MUTEX, 0x16); +TRACE_CODE(psynch_mutex_kwqwait, _TRACE_SUB_MUTEX, 0x17); + +TRACE_CODE(psynch_cvar_kwait, _TRACE_SUB_CONDVAR, 0x0); +TRACE_CODE(psynch_cvar_clrprepost, _TRACE_SUB_CONDVAR, 0x1); +TRACE_CODE(psynch_cvar_freeitems, _TRACE_SUB_CONDVAR, 0x2); +TRACE_CODE(psynch_cvar_signal, _TRACE_SUB_CONDVAR, 0x3); +TRACE_CODE(psynch_cvar_broadcast, _TRACE_SUB_CONDVAR, 0x5); +TRACE_CODE(psynch_cvar_zeroed, _TRACE_SUB_CONDVAR, 0x6); +TRACE_CODE(psynch_cvar_updateval, _TRACE_SUB_CONDVAR, 0x7); + +#endif // _KERN_TRACE_H_ diff --git a/dthread/pthread_kext.c b/dthread/pthread_kext.c new file mode 100644 index 0000000..21518c5 --- /dev/null +++ b/dthread/pthread_kext.c @@ -0,0 +1,201 @@ +/** + * a brief rationale on why we need this + * --- + * + * so, sometime around the release of macOS 10.9 (with xnu-2422.1.72), + * Apple decided to stuff pthread support in-kernel into a kext (pthread.kext) + * + * we already have most of the functions implemented in `psync_support.c`, this file just takes care of the rest + * and the necessary plumbing for XNU's `pthread_shims.c` so that we can use XNU's own interface as much as possible + * + * ignore the following; i've actually decided against building `pthread_workqueue.c` + * (turns out its usage in `turnstile.c` only matters when workqueues are already being used for something else) + * + * > up until recently, we didn't really build any parts of XNU that needed it + * > + * > however, the new turnstile subsystem requires functions that are implemented in `pthread_workqueue.c`, + * > and it's relatively simple to build that file by adding support for the functions it calls out to + * > (and easy enough that it's better than stubbing them) + * + * so like i said, that reason is no longer valid. i decided to leave this in anyways because i had already completely + * added it in and it works so ¯\_(ツ)_/¯ + */ + +#include + +#include +#include +#include +#include "kern_internal.h" +#include + +#include "pthread_kext.h" + +/** + * + * `pthread_shims.c` plumbing + * + */ + +static const struct pthread_functions_s _darling_pthread_functions = { + .pthread_init = _pthread_init, + + .pth_proc_hashinit = _pth_proc_hashinit, + .pth_proc_hashdelete = _pth_proc_hashdelete, + + .bsdthread_create = _bsdthread_create, + .bsdthread_register = _bsdthread_register, + .bsdthread_terminate = _bsdthread_terminate, + + .thread_selfid = _thread_selfid, + + .psynch_mutexwait = _psynch_mutexwait, + .psynch_mutexdrop = _psynch_mutexdrop, + .psynch_cvbroad = _psynch_cvbroad, + .psynch_cvsignal = _psynch_cvsignal, + .psynch_cvwait = _psynch_cvwait, + .psynch_cvclrprepost = _psynch_cvclrprepost, + .psynch_rw_longrdlock = _psynch_rw_longrdlock, + .psynch_rw_rdlock = _psynch_rw_rdlock, + .psynch_rw_unlock = _psynch_rw_unlock, + .psynch_rw_wrlock = _psynch_rw_wrlock, + .psynch_rw_yieldwrlock = _psynch_rw_yieldwrlock, + + .pthread_find_owner = _pthread_find_owner, + .pthread_get_thread_kwq = _pthread_get_thread_kwq, + + .workq_create_threadstack = workq_create_threadstack, + .workq_destroy_threadstack = workq_destroy_threadstack, + .workq_setup_thread = workq_setup_thread, + .workq_handle_stack_events = workq_handle_stack_events, + .workq_markfree_threadstack = workq_markfree_threadstack, +}; + +// called by our kernel module during initialization +// +// this is different from `darling_pthread_init`, because this function is the one that sets up +// the pthread kext plumbing, while the `pthread_init` is only called by some BSD code after the kext has already been set up +void darling_pthread_kext_init(void) { + // we don't really need the callbacks, since we're not actually a kext and we have full access to the whole kernel, + // but it's easier to provide the callbacks than it is to modify every instance of `pthread_kern->whatever(...)`. + // plus, `pthread_shims.c` won't take "no" for an answer (it'll panic if we give it `NULL`). + // we have this a local variable though, because since we *aren't* a kext, we have `pthread_kern` already defined in `pthread_shims.c` + pthread_callbacks_t callbacks = NULL; + + pthread_kext_register(&_darling_pthread_functions, &callbacks); +}; + +// called by our kernel module when it's going to be unloaded +void darling_pthread_kext_exit(void) {}; + +// temporarily copied over from kern_support.c (until we start building that file) +// +#ifdef __DARLING__ +uint32_t pthread_debug_tracing = 0; +#else +uint32_t pthread_debug_tracing = 1; +#endif + +#ifdef __DARLING__ +static lck_grp_attr_t the_real_pthread_lck_grp_attr; +static lck_grp_t the_real_pthread_lck_grp; +static lck_attr_t the_real_pthread_lck_attr; +static lck_mtx_t the_real_pthread_list_mlock; + +lck_grp_attr_t* pthread_lck_grp_attr = &the_real_pthread_lck_grp_attr; +lck_grp_t* pthread_lck_grp = &the_real_pthread_lck_grp; +lck_attr_t* pthread_lck_attr = &the_real_pthread_lck_attr; +#else +lck_grp_attr_t *pthread_lck_grp_attr; +lck_grp_t *pthread_lck_grp; +lck_attr_t *pthread_lck_attr; +#endif + +void +_pthread_init(void) +{ +#ifdef __DARLING__ + lck_grp_attr_setdefault(pthread_lck_grp_attr); + lck_grp_init(pthread_lck_grp, "pthread", pthread_lck_grp_attr); + + lck_attr_setdefault(pthread_lck_attr); + pthread_list_mlock = &the_real_pthread_list_mlock; + lck_mtx_init(pthread_list_mlock, pthread_lck_grp, pthread_lck_attr); +#else + pthread_lck_grp_attr = lck_grp_attr_alloc_init(); + pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr); + + /* + * allocate the lock attribute for pthread synchronizers + */ + pthread_lck_attr = lck_attr_alloc_init(); + pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr); +#endif + + pth_global_hashinit(); + psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL); + psynch_zoneinit(); + +#ifndef __DARLING__ + int policy_bootarg; + if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg, sizeof(policy_bootarg))) { + pthread_mutex_default_policy = policy_bootarg; + } + + sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy); +#endif +} +// + +/** + * stubbed functions + */ + +/** + * nobody really needs this next set of functions right now, + * so we can just stub them for now + */ + +int _bsdthread_create(proc_t p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t* retval) { + return ENOTSUP; +}; + +int _bsdthread_register(proc_t p, user_addr_t threadstart, user_addr_t wqthread, int pthsize, user_addr_t dummy_value, user_addr_t targetconc_ptr, uint64_t dispatchqueue_offset, int32_t* retval) { + return ENOTSUP; +}; + +int _bsdthread_terminate(proc_t p, user_addr_t stackaddr, size_t size, uint32_t kthport, uint32_t sem, int32_t* retval) { + return ENOTSUP; +}; + +int _thread_selfid(proc_t p, uint64_t* retval) { + return ENOTSUP; +}; + +int _bsdthread_register2(proc_t p, user_addr_t threadstart, user_addr_t wqthread, uint32_t flags, user_addr_t stack_addr_hint, user_addr_t targetconc_ptr, uint32_t dispatchqueue_offset, uint32_t tsd_offset, int32_t* retval) { + return ENOTSUP; +}; + +/** + * now these are actually needed by `pthread_workqueue.c` + */ + +int workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr, mach_port_name_t kport, user_addr_t events, int nevents, int upcall_flags) { + return ENOTSUP; +}; + +int workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t* out_addr) { + return ENOTSUP; +}; + +int workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr) { + return ENOTSUP; +}; + +void workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr, mach_port_name_t kport, int th_qos, int setup_flags, int upcall_flags) { + +}; + +void workq_markfree_threadstack(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr) { + +}; diff --git a/dthread/pthread_kext.h b/dthread/pthread_kext.h new file mode 100644 index 0000000..43f0ec3 --- /dev/null +++ b/dthread/pthread_kext.h @@ -0,0 +1,7 @@ +#ifndef _DARLING_LKM_PTHREAD_KEXT_H_ +#define _DARLING_LKM_PTHREAD_KEXT_H_ + +void darling_pthread_kext_init(void); +void darling_pthread_kext_exit(void); + +#endif // _DARLING_LKM_PTHREAD_KEXT_H_ diff --git a/dthread/synch_internal.h b/dthread/synch_internal.h new file mode 100644 index 0000000..1b9d6c2 --- /dev/null +++ b/dthread/synch_internal.h @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef __SYNCH_INTERNAL_H__ +#define __SYNCH_INTERNAL_H__ + +// kwe_state +enum { + KWE_THREAD_INWAIT = 1, + KWE_THREAD_PREPOST, + KWE_THREAD_BROADCAST, +}; + +#define _PTHREAD_MTX_OPT_PSHARED 0x010 +#define _PTHREAD_MTX_OPT_NOTIFY 0x1000 /* notify to drop mutex handling in cvwait */ +#define _PTHREAD_MTX_OPT_MUTEX 0x2000 /* this is a mutex type */ + + +#define PTHRW_COUNT_SHIFT 8 +#define PTHRW_INC (1 << PTHRW_COUNT_SHIFT) +#define PTHRW_BIT_MASK ((1 << PTHRW_COUNT_SHIFT) - 1) +#define PTHRW_COUNT_MASK ((uint32_t)~PTHRW_BIT_MASK) +#define PTHRW_MAX_READERS PTHRW_COUNT_MASK + +// L word +#define PTH_RWL_KBIT 0x01 // cannot acquire in user mode +#define PTH_RWL_EBIT 0x02 // exclusive lock in progress +#define PTH_RWL_WBIT 0x04 // write waiters pending in kernel +#define PTH_RWL_PBIT 0x04 // prepost (cv) pending in kernel + +#define PTH_RWL_MTX_WAIT 0x20 // in cvar in mutex wait +#define PTH_RWL_UBIT 0x40 // lock is unlocked (no readers or writers) +#define PTH_RWL_MBIT 0x40 // overlapping grants from kernel (only in updateval) +#define PTH_RWL_IBIT 0x80 // lock reset, held until first successful unlock + +#define PTHRW_RWL_INIT PTH_RWL_IBIT // reset on the lock bits (U) +#define PTHRW_RWLOCK_INIT (PTH_RWL_IBIT | PTH_RWL_UBIT) // reset on the lock bits (U) + +// S word +#define PTH_RWS_SBIT 0x01 // kernel transition seq not set yet +#define PTH_RWS_IBIT 0x02 // Sequence is not set on return from kernel + +#define PTH_RWS_CV_CBIT PTH_RWS_SBIT // kernel has cleared all info w.r.s.t CV +#define PTH_RWS_CV_PBIT PTH_RWS_IBIT // kernel has prepost/fake structs only,no waiters +#define PTH_RWS_CV_BITSALL (PTH_RWS_CV_CBIT | PTH_RWS_CV_PBIT) +#define PTH_RWS_CV_MBIT PTH_RWL_MBIT // to indicate prepost return from kernel +#define PTH_RWS_CV_RESET_PBIT ((uint32_t)~PTH_RWS_CV_PBIT) + +#define PTH_RWS_WSVBIT 0x04 // save W bit + +#define PTHRW_RWS_SAVEMASK (PTH_RWS_WSVBIT) // save bits mask + +#define PTHRW_RWS_INIT PTH_RWS_SBIT // reset on the lock bits (U) + +// rw_flags +#define PTHRW_KERN_PROCESS_SHARED 0x10 +#define PTHRW_KERN_PROCESS_PRIVATE 0x20 + +#define PTHREAD_MTX_TID_SWITCHING (uint64_t)-1 + +// L word tests +#define is_rwl_ebit_set(x) (((x) & PTH_RWL_EBIT) != 0) +#define is_rwl_wbit_set(x) (((x) & PTH_RWL_WBIT) != 0) +#define is_rwl_ebit_clear(x) (((x) & PTH_RWL_EBIT) == 0) +#define is_rwl_readoverlap(x) (((x) & PTH_RWL_MBIT) != 0) + +// S word tests +#define is_rws_sbit_set(x) (((x) & PTH_RWS_SBIT) != 0) +#define is_rws_unlockinit_set(x) (((x) & PTH_RWS_IBIT) != 0) +#define is_rws_savemask_set(x) (((x) & PTHRW_RWS_SAVEMASK) != 0) +#define is_rws_pbit_set(x) (((x) & PTH_RWS_CV_PBIT) != 0) + +// kwe_flags +#define KWE_FLAG_LOCKPREPOST 0x1 // cvwait caused a lock prepost + +static inline int +is_seqlower(uint32_t x, uint32_t y) +{ + x &= PTHRW_COUNT_MASK; + y &= PTHRW_COUNT_MASK; + if (x < y) { + return ((y - x) < (PTHRW_MAX_READERS / 2)); + } else { + return ((x - y) > (PTHRW_MAX_READERS / 2)); + } +} + +static inline int +is_seqlower_eq(uint32_t x, uint32_t y) +{ + if ((x & PTHRW_COUNT_MASK) == (y & PTHRW_COUNT_MASK)) { + return 1; + } else { + return is_seqlower(x, y); + } +} + +static inline int +is_seqhigher(uint32_t x, uint32_t y) +{ + x &= PTHRW_COUNT_MASK; + y &= PTHRW_COUNT_MASK; + if (x > y) { + return ((x - y) < (PTHRW_MAX_READERS / 2)); + } else { + return ((y - x) > (PTHRW_MAX_READERS / 2)); + } +} + +static inline int +is_seqhigher_eq(uint32_t x, uint32_t y) +{ + if ((x & PTHRW_COUNT_MASK) == (y & PTHRW_COUNT_MASK)) { + return 1; + } else { + return is_seqhigher(x,y); + } +} + +static inline int +diff_genseq(uint32_t x, uint32_t y) +{ + x &= PTHRW_COUNT_MASK; + y &= PTHRW_COUNT_MASK; + if (x == y) { + return 0; + } else if (x > y) { + return x - y; + } else { + return ((PTHRW_MAX_READERS - y) + x + PTHRW_INC); + } +} + +static inline int +find_diff(uint32_t upto, uint32_t lowest) +{ + uint32_t diff; + + if (upto == lowest) + return(0); +#if 0 + diff = diff_genseq(upto, lowest); +#else + if (is_seqlower(upto, lowest) != 0) + diff = diff_genseq(lowest, upto); + else + diff = diff_genseq(upto, lowest); +#endif + diff = (diff >> PTHRW_COUNT_SHIFT); + return(diff); +} + +#endif /* __SYNCH_INTERNAL_H__ */ diff --git a/dthread/workqueue_internal.h b/dthread/workqueue_internal.h new file mode 100644 index 0000000..e631030 --- /dev/null +++ b/dthread/workqueue_internal.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2014 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef DTHREAD_WORKQUEUE_INTERNAL_H // header name changed for Darling +#define DTHREAD_WORKQUEUE_INTERNAL_H + +#ifdef __DARLING__ +#include +#endif + +/* These definitions are shared between the kext and userspace inside the pthread project. Consolidating + * duplicate definitions that used to exist in both projects, when separate. + */ + +// Sometimes something gets passed a bucket number and we need a way to express +// that it's actually the event manager. Use the (0)th bucket for that. +#define WORKQ_THREAD_QOS_MIN (THREAD_QOS_MAINTENANCE) +#define WORKQ_THREAD_QOS_MAX (THREAD_QOS_LAST - 1) +#define WORKQ_THREAD_QOS_CLEANUP (THREAD_QOS_LEGACY) +#define WORKQ_THREAD_QOS_MANAGER (THREAD_QOS_LAST) // outside of MIN/MAX + +#define WORKQ_NUM_QOS_BUCKETS (WORKQ_THREAD_QOS_MAX) +#define WORKQ_NUM_BUCKETS (WORKQ_THREAD_QOS_MAX + 1) +#define WORKQ_IDX(qos) ((qos) - 1) // 0 based index + +// magical `nkevents` values for _pthread_wqthread +#define WORKQ_EXIT_THREAD_NKEVENT (-1) + +#endif // DTHREAD_WORKQUEUE_INTERNAL_H