// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCUDACXX_THREADING_SUPPORT
#define _LIBCUDACXX_THREADING_SUPPORT

#ifndef __cuda_std__
#include <__config>
#include <errno.h>
#else
#include "__cuda/atomic_prelude.h"
#endif // __cuda_std__

#include "__assert" // all public C++ headers provide the assertion handler
#include "__functional/hash.h"
#include "chrono"
#include "climits"
#include "iosfwd"

#ifndef __cuda_std__
#include <__pragma_push>
#endif // __cuda_std__

#if defined(_LIBCUDACXX_USE_PRAGMA_GCC_SYSTEM_HEADER)
#pragma GCC system_header
#endif

#if defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL)
# ifndef __cuda_std__
#  include <__external_threading>
# else
#  define _LIBCUDACXX_THREAD_ABI_VISIBILITY inline _LIBCUDACXX_INLINE_VISIBILITY
# endif
#elif !defined(_LIBCUDACXX_HAS_NO_THREADS)

#if defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD)
# include <pthread.h>
# include <sched.h>
# include <semaphore.h>
# if defined(__APPLE__)
#  include <dispatch/dispatch.h>
# endif
# if defined(__linux__)
#  include <unistd.h>
#  include <linux/futex.h>
#  include <sys/syscall.h>
# endif
#endif

#if defined(_LIBCUDACXX_HAS_THREAD_API_WIN32)
# include <process.h>
# include <windows.h>
#endif

#if defined(_LIBCUDACXX_HAS_THREAD_LIBRARY_EXTERNAL) || \
    defined(_LIBCUDACXX_BUILDING_THREAD_LIBRARY_EXTERNAL)
#define _LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_FUNC_VIS
#else
#define _LIBCUDACXX_THREAD_ABI_VISIBILITY inline _LIBCUDACXX_INLINE_VISIBILITY
#endif

#if defined(__FreeBSD__) && defined(__clang__) && __has_attribute(no_thread_safety_analysis)
#define _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis))
#else
#define _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS
#endif

typedef ::timespec __libcpp_timespec_t;
#endif // !defined(_LIBCUDACXX_HAS_NO_THREADS)

_LIBCUDACXX_BEGIN_NAMESPACE_STD

#if !defined(_LIBCUDACXX_HAS_NO_THREADS)

#define _LIBCUDACXX_POLLING_COUNT 16

_LIBCUDACXX_INLINE_VISIBILITY
inline void __libcpp_thread_yield_processor()
{
#if defined(__aarch64__)
# define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile ("yield" :::);)
#elif defined(__x86_64__)
# define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile ("pause" :::);)
#elif defined (__powerpc__)
# define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile ("or 27,27,27":::);)
#else
# define __LIBCUDACXX_ASM_THREAD_YIELD (;)
#endif
    NV_IF_TARGET(
        NV_IS_HOST,
            __LIBCUDACXX_ASM_THREAD_YIELD
    )
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
void __libcpp_thread_yield();

_LIBCUDACXX_THREAD_ABI_VISIBILITY
void __libcpp_thread_sleep_for(chrono::nanoseconds __ns);

template<class _Fn>
_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_thread_poll_with_backoff(_Fn && __f, chrono::nanoseconds __max = chrono::nanoseconds::zero());

#if defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD)
// Mutex
typedef pthread_mutex_t __libcpp_mutex_t;
#define _LIBCUDACXX_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER

typedef pthread_mutex_t __libcpp_recursive_mutex_t;

// Condition Variable
typedef pthread_cond_t __libcpp_condvar_t;
#define _LIBCUDACXX_CONDVAR_INITIALIZER PTHREAD_COND_INITIALIZER

// Semaphore
#if defined(__APPLE__)
typedef dispatch_semaphore_t __libcpp_semaphore_t;
# define _LIBCUDACXX_SEMAPHORE_MAX numeric_limits<long>::max()
#else
typedef sem_t __libcpp_semaphore_t;
# define _LIBCUDACXX_SEMAPHORE_MAX SEM_VALUE_MAX
#endif

// Execute once
typedef pthread_once_t __libcpp_exec_once_flag;
#define _LIBCUDACXX_EXEC_ONCE_INITIALIZER PTHREAD_ONCE_INIT

// Thread id
typedef pthread_t __libcpp_thread_id;

// Thread
#define _LIBCUDACXX_NULL_THREAD 0U

typedef pthread_t __libcpp_thread_t;

// Thread Local Storage
typedef pthread_key_t __libcpp_tls_key;

#define _LIBCUDACXX_TLS_DESTRUCTOR_CC
#elif !defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL)
// Mutex
typedef void* __libcpp_mutex_t;
#define _LIBCUDACXX_MUTEX_INITIALIZER 0

#if defined(_M_IX86) || defined(__i386__) || defined(_M_ARM) || defined(__arm__)
typedef void* __libcpp_recursive_mutex_t[6];
#elif defined(_M_AMD64) || defined(__x86_64__) || defined(_M_ARM64) || defined(__aarch64__)
typedef void* __libcpp_recursive_mutex_t[5];
#else
# error Unsupported architecture
#endif

// Condition Variable
typedef void* __libcpp_condvar_t;
#define _LIBCUDACXX_CONDVAR_INITIALIZER 0

// Semaphore
typedef void* __libcpp_semaphore_t;

// Execute Once
typedef void* __libcpp_exec_once_flag;
#define _LIBCUDACXX_EXEC_ONCE_INITIALIZER 0

// Thread ID
typedef long __libcpp_thread_id;

// Thread
#define _LIBCUDACXX_NULL_THREAD 0U

typedef void* __libcpp_thread_t;

// Thread Local Storage
typedef long __libcpp_tls_key;

#define _LIBCUDACXX_TLS_DESTRUCTOR_CC __stdcall
#endif // !defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) && !defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL)

#if !defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL)

_LIBCUDACXX_THREAD_ABI_VISIBILITY
__libcpp_timespec_t __libcpp_to_timespec(const chrono::nanoseconds& __ns);

// Mutex
_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t *__m);

_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t *__m);

_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS
bool __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t *__m);

_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t *__m);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_recursive_mutex_destroy(__libcpp_recursive_mutex_t *__m);

_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_mutex_lock(__libcpp_mutex_t *__m);

_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS
bool __libcpp_mutex_trylock(__libcpp_mutex_t *__m);

_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_mutex_unlock(__libcpp_mutex_t *__m);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_mutex_destroy(__libcpp_mutex_t *__m);

// Condition variable
_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_condvar_signal(__libcpp_condvar_t* __cv);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_condvar_broadcast(__libcpp_condvar_t* __cv);

_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m);

_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS
int __libcpp_condvar_timedwait(__libcpp_condvar_t *__cv, __libcpp_mutex_t *__m,
                               __libcpp_timespec_t *__ts);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_condvar_destroy(__libcpp_condvar_t* __cv);

// Semaphore
_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, chrono::nanoseconds const& __ns);

// Execute once
_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_execute_once(__libcpp_exec_once_flag *flag,
                          void (*init_routine)());

// Thread id
_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_thread_id_equal(__libcpp_thread_id t1, __libcpp_thread_id t2);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_thread_id_less(__libcpp_thread_id t1, __libcpp_thread_id t2);

// Thread
_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_thread_isnull(const __libcpp_thread_t *__t);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_thread_create(__libcpp_thread_t *__t, void *(*__func)(void *),
                           void *__arg);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
__libcpp_thread_id __libcpp_thread_get_current_id();

_LIBCUDACXX_THREAD_ABI_VISIBILITY
__libcpp_thread_id __libcpp_thread_get_id(const __libcpp_thread_t *__t);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_thread_join(__libcpp_thread_t *__t);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_thread_detach(__libcpp_thread_t *__t);

// Thread local storage
_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_tls_create(__libcpp_tls_key* __key,
                        void(_LIBCUDACXX_TLS_DESTRUCTOR_CC* __at_exit)(void*));

_LIBCUDACXX_THREAD_ABI_VISIBILITY
void *__libcpp_tls_get(__libcpp_tls_key __key);

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_tls_set(__libcpp_tls_key __key, void *__p);

#endif // !defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL)

#if !defined(_LIBCUDACXX_HAS_THREAD_LIBRARY_EXTERNAL) || defined(_LIBCUDACXX_BUILDING_THREAD_LIBRARY_EXTERNAL)

#if defined(_LIBCUDACXX_HAS_THREAD_API_CUDA)

_LIBCUDACXX_THREAD_ABI_VISIBILITY
void __libcpp_thread_yield() {}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
void __libcpp_thread_sleep_for(chrono::nanoseconds __ns)
{
    NV_IF_TARGET(
        NV_IS_DEVICE, (
            auto const __step = __ns.count();
            assert(__step < numeric_limits<unsigned>::max());
            asm volatile("nanosleep.u32 %0;"::"r"((unsigned)__step):);
        )
    )
}

#elif defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD)

_LIBCUDACXX_THREAD_ABI_VISIBILITY
__libcpp_timespec_t __libcpp_to_timespec(const chrono::nanoseconds& __ns)
{
     using namespace chrono;
     seconds __s = duration_cast<seconds>(__ns);
     __libcpp_timespec_t __ts;
     typedef decltype(__ts.tv_sec) ts_sec;
     _LIBCUDACXX_CONSTEXPR ts_sec __ts_sec_max = numeric_limits<ts_sec>::max();

     if (__s.count() < __ts_sec_max)
     {
         __ts.tv_sec = static_cast<ts_sec>(__s.count());
         __ts.tv_nsec = static_cast<decltype(__ts.tv_nsec)>((__ns - __s).count());
     }
     else
     {
         __ts.tv_sec = __ts_sec_max;
         __ts.tv_nsec = 999999999; // (10^9 - 1)
     }
     return __ts;
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t *__m)
{
    pthread_mutexattr_t attr;
    int __ec = pthread_mutexattr_init(&attr);
    if (__ec)
        return __ec;
    __ec = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
    if (__ec) {
        pthread_mutexattr_destroy(&attr);
        return __ec;
    }
    __ec = pthread_mutex_init(__m, &attr);
    if (__ec) {
        pthread_mutexattr_destroy(&attr);
        return __ec;
    }
    __ec = pthread_mutexattr_destroy(&attr);
    if (__ec) {
        pthread_mutex_destroy(__m);
        return __ec;
    }
    return 0;
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t *__m)
{
    return pthread_mutex_lock(__m);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t *__m)
{
    return pthread_mutex_trylock(__m) == 0;
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_recursive_mutex_unlock(__libcpp_mutex_t *__m)
{
    return pthread_mutex_unlock(__m);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_recursive_mutex_destroy(__libcpp_recursive_mutex_t *__m)
{
    return pthread_mutex_destroy(__m);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_mutex_lock(__libcpp_mutex_t *__m)
{
    return pthread_mutex_lock(__m);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_mutex_trylock(__libcpp_mutex_t *__m)
{
    return pthread_mutex_trylock(__m) == 0;
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_mutex_unlock(__libcpp_mutex_t *__m)
{
    return pthread_mutex_unlock(__m);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_mutex_destroy(__libcpp_mutex_t *__m)
{
  return pthread_mutex_destroy(__m);
}

// Condition Variable
_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_condvar_signal(__libcpp_condvar_t *__cv)
{
    return pthread_cond_signal(__cv);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_condvar_broadcast(__libcpp_condvar_t *__cv)
{
    return pthread_cond_broadcast(__cv);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_condvar_wait(__libcpp_condvar_t *__cv, __libcpp_mutex_t *__m)
{
    return pthread_cond_wait(__cv, __m);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_condvar_timedwait(__libcpp_condvar_t *__cv, __libcpp_mutex_t *__m,
                               __libcpp_timespec_t *__ts)
{
    return pthread_cond_timedwait(__cv, __m, __ts);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_condvar_destroy(__libcpp_condvar_t *__cv)
{
    return pthread_cond_destroy(__cv);
}

// Semaphore
#if defined(__APPLE__)

bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init)
{
    return (*__sem = dispatch_semaphore_create(__init)) != NULL;
}

bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem)
{
    dispatch_release(*__sem);
    return true;
}

bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem)
{
    dispatch_semaphore_signal(*__sem);
    return true;
}

bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem)
{
    return dispatch_semaphore_wait(*__sem, DISPATCH_TIME_FOREVER) == 0;
}

bool __libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, chrono::nanoseconds const& __ns)
{
    return dispatch_semaphore_wait(*__sem, dispatch_time(DISPATCH_TIME_NOW, __ns.count())) == 0;
}

#else

_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init)
{
    return sem_init(__sem, 0, __init) == 0;
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem)
{
    return sem_destroy(__sem) == 0;
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem)
{
    return sem_post(__sem) == 0;
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem)
{
    return sem_wait(__sem) == 0;
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, chrono::nanoseconds const& __ns)
{
    __libcpp_timespec_t __ts = __libcpp_to_timespec(__ns);
    return sem_timedwait(__sem, &__ts) == 0;
}

#endif //__APPLE__

// Execute once
_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_execute_once(__libcpp_exec_once_flag *flag, void (*init_routine)())
{
    return pthread_once(flag, init_routine);
}

// Thread id
// Returns non-zero if the thread ids are equal, otherwise 0
_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_thread_id_equal(__libcpp_thread_id t1, __libcpp_thread_id t2)
{
    return pthread_equal(t1, t2) != 0;
}

// Returns non-zero if t1 < t2, otherwise 0
_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_thread_id_less(__libcpp_thread_id t1, __libcpp_thread_id t2)
{
    return t1 < t2;
}

// Thread
_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_thread_isnull(const __libcpp_thread_t *__t)
{
    return *__t == 0;
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_thread_create(__libcpp_thread_t *__t, void *(*__func)(void *),
                           void *__arg)
{
    return pthread_create(__t, 0, __func, __arg);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
__libcpp_thread_id __libcpp_thread_get_current_id()
{
    return pthread_self();
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
__libcpp_thread_id __libcpp_thread_get_id(const __libcpp_thread_t *__t)
{
    return *__t;
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_thread_join(__libcpp_thread_t *__t)
{
    return pthread_join(*__t, 0);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_thread_detach(__libcpp_thread_t *__t)
{
    return pthread_detach(*__t);
}

// Thread local storage
_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_tls_create(__libcpp_tls_key *__key, void (*__at_exit)(void *))
{
    return pthread_key_create(__key, __at_exit);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
void *__libcpp_tls_get(__libcpp_tls_key __key)
{
    return pthread_getspecific(__key);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
int __libcpp_tls_set(__libcpp_tls_key __key, void *__p)
{
    return pthread_setspecific(__key, __p);
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
void __libcpp_thread_yield()
{
    sched_yield();
}

_LIBCUDACXX_THREAD_ABI_VISIBILITY
void __libcpp_thread_sleep_for(chrono::nanoseconds __ns)
{
    __libcpp_timespec_t __ts = __libcpp_to_timespec(__ns);
    while (nanosleep(&__ts, &__ts) == -1 && errno == EINTR);
}

#if defined(__linux__) && !defined(_LIBCUDACXX_HAS_NO_PLATFORM_WAIT)

#define _LIBCUDACXX_HAS_PLATFORM_WAIT

typedef int __libcpp_platform_wait_t;

template<typename _Tp>
struct __libcpp_platform_wait_uses_type {
    enum { __value = is_same<__remove_cv_t<_Tp>, __libcpp_platform_wait_t>::value };
};

template <class _Tp, typename enable_if<__libcpp_platform_wait_uses_type<_Tp>::__value, int>::type = 1>
void __libcpp_platform_wait(_Tp const* ptr, _Tp val, void const* timeout) {
    syscall(SYS_futex, ptr, FUTEX_WAIT_PRIVATE, val, timeout, 0, 0);
}

template <class _Tp, typename enable_if<__libcpp_platform_wait_uses_type<_Tp>::__value, int>::type = 1>
void __libcpp_platform_wake(_Tp const* ptr, bool all) {
    syscall(SYS_futex, ptr, FUTEX_WAKE_PRIVATE, all ? INT_MAX : 1, 0, 0, 0);
}

#endif // defined(__linux__) && !defined(_LIBCUDACXX_HAS_NO_PLATFORM_WAIT)

#elif defined(_LIBCUDACXX_HAS_THREAD_API_WIN32)

void __libcpp_thread_yield()
{
    SwitchToThread();
}

void __libcpp_thread_sleep_for(chrono::nanoseconds __ns)
{
    using namespace chrono;
    // round-up to the nearest milisecond
    milliseconds __ms =
        duration_cast<milliseconds>(__ns + chrono::nanoseconds(999999));
    Sleep(static_cast<DWORD>(__ms.count()));
}

#endif // defined(_LIBCUDACXX_HAS_THREAD_API_WIN32)

#endif // !defined(_LIBCUDACXX_HAS_THREAD_LIBRARY_EXTERNAL) || defined(_LIBCUDACXX_BUILDING_THREAD_LIBRARY_EXTERNAL)

template<class _Fn>
_LIBCUDACXX_THREAD_ABI_VISIBILITY
bool __libcpp_thread_poll_with_backoff(_Fn && __f, chrono::nanoseconds __max)
{
    chrono::high_resolution_clock::time_point const __start = chrono::high_resolution_clock::now();
    for(int __count = 0;;) {
      if(__f())
        return true;
      if(__count < _LIBCUDACXX_POLLING_COUNT) {
        if(__count > (_LIBCUDACXX_POLLING_COUNT >> 1))
          __libcpp_thread_yield_processor();
        __count += 1;
        continue;
      }
      chrono::high_resolution_clock::duration const __elapsed = chrono::high_resolution_clock::now() - __start;
      if(__max != chrono::nanoseconds::zero() &&
         __max < __elapsed)
         return false;
      chrono::nanoseconds const __step = __elapsed / 4;
      if(__step >= chrono::milliseconds(1))
        __libcpp_thread_sleep_for(chrono::milliseconds(1));
      else if(__step >= chrono::microseconds(10))
        __libcpp_thread_sleep_for(__step);
      else
        __libcpp_thread_yield();
    }
}

#if _LIBCUDACXX_STD_VER < 11
# define _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE
#endif

#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE

struct alignas(64) __libcpp_contention_t {
#if defined(_LIBCUDACXX_HAS_PLATFORM_WAIT)
    ptrdiff_t                __waiters = 0;
    __libcpp_platform_wait_t __version = 0;
#else
    ptrdiff_t                __credit = 0;
    __libcpp_mutex_t         __mutex = _LIBCUDACXX_MUTEX_INITIALIZER;
    __libcpp_condvar_t       __condvar = _LIBCUDACXX_CONDVAR_INITIALIZER;
#endif
};

_LIBCUDACXX_FUNC_VIS
__libcpp_contention_t * __libcpp_contention_state(void const volatile * p) _NOEXCEPT;

#endif // _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE

#if !defined(_LIBCUDACXX_HAS_NO_TREE_BARRIER) && !defined(_LIBCUDACXX_HAS_NO_THREAD_FAVORITE_BARRIER_INDEX)

_LIBCUDACXX_EXPORTED_FROM_ABI
extern thread_local ptrdiff_t __libcpp_thread_favorite_barrier_index;

#endif

#ifndef __cuda_std__

class _LIBCUDACXX_TYPE_VIS thread;
class _LIBCUDACXX_TYPE_VIS __thread_id;

namespace this_thread
{

_LIBCUDACXX_INLINE_VISIBILITY __thread_id get_id() _NOEXCEPT;

}  // this_thread

template<> struct hash<__thread_id>;

class _LIBCUDACXX_TEMPLATE_VIS __thread_id
{
    // FIXME: pthread_t is a pointer on Darwin but a long on Linux.
    // NULL is the no-thread value on Darwin.  Someone needs to check
    // on other platforms.  We assume 0 works everywhere for now.
    __libcpp_thread_id __id_;

public:
    _LIBCUDACXX_INLINE_VISIBILITY
    __thread_id() _NOEXCEPT : __id_(0) {}

    friend _LIBCUDACXX_INLINE_VISIBILITY
        bool operator==(__thread_id __x, __thread_id __y) _NOEXCEPT
        { // don't pass id==0 to underlying routines
        if (__x.__id_ == 0) return __y.__id_ == 0;
        if (__y.__id_ == 0) return false;
        return __libcpp_thread_id_equal(__x.__id_, __y.__id_);
        }
    friend _LIBCUDACXX_INLINE_VISIBILITY
        bool operator!=(__thread_id __x, __thread_id __y) _NOEXCEPT
        {return !(__x == __y);}
    friend _LIBCUDACXX_INLINE_VISIBILITY
        bool operator< (__thread_id __x, __thread_id __y) _NOEXCEPT
        { // id==0 is always less than any other thread_id
        if (__x.__id_ == 0) return __y.__id_ != 0;
        if (__y.__id_ == 0) return false;
        return  __libcpp_thread_id_less(__x.__id_, __y.__id_);
        }
    friend _LIBCUDACXX_INLINE_VISIBILITY
        bool operator<=(__thread_id __x, __thread_id __y) _NOEXCEPT
        {return !(__y < __x);}
    friend _LIBCUDACXX_INLINE_VISIBILITY
        bool operator> (__thread_id __x, __thread_id __y) _NOEXCEPT
        {return   __y < __x ;}
    friend _LIBCUDACXX_INLINE_VISIBILITY
        bool operator>=(__thread_id __x, __thread_id __y) _NOEXCEPT
        {return !(__x < __y);}

    _LIBCUDACXX_INLINE_VISIBILITY
    void __reset() { __id_ = 0; }

#ifndef __cuda_std__
    template<class _CharT, class _Traits>
    friend
    _LIBCUDACXX_INLINE_VISIBILITY
    basic_ostream<_CharT, _Traits>&
    operator<<(basic_ostream<_CharT, _Traits>& __os, __thread_id __id);
#endif

private:
    _LIBCUDACXX_INLINE_VISIBILITY
    __thread_id(__libcpp_thread_id __id) : __id_(__id) {}

    friend __thread_id this_thread::get_id() _NOEXCEPT;
    friend class _LIBCUDACXX_TYPE_VIS thread;
    friend struct _LIBCUDACXX_TEMPLATE_VIS hash<__thread_id>;
};

namespace this_thread
{

inline _LIBCUDACXX_INLINE_VISIBILITY
__thread_id
get_id() _NOEXCEPT
{
    return __libcpp_thread_get_current_id();
}

}  // this_thread

#endif // __cuda_std__

#endif // !_LIBCUDACXX_HAS_NO_THREADS

_LIBCUDACXX_END_NAMESPACE_STD

#ifndef __cuda_std__
#include <__pragma_pop>
#endif

#endif // _LIBCUDACXX_THREADING_SUPPORT
