Skip to content

FAST_IO_HAS_BUILTIN, fix win9x mutex, fix use sse2 ins in sse #1149

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 8 additions & 36 deletions include/fast_io_core_impl/allocation/c_malloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,8 @@ class c_malloc_allocator
n = 1;
}
void *p =
#if defined(__has_builtin)
#if __has_builtin(__builtin_malloc)
#if FAST_IO_HAS_BUILTIN(__builtin_malloc)
__builtin_malloc(n)
#else
::std::malloc(n)
#endif
#else
::std::malloc(n)
#endif
Expand All @@ -66,12 +62,8 @@ class c_malloc_allocator
}
::std::size_t const to_allocate{n};
p =
#if defined(__has_builtin)
#if __has_builtin(__builtin_realloc)
#if FAST_IO_HAS_BUILTIN(__builtin_realloc)
__builtin_realloc
#else
::std::realloc
#endif
#else
::std::realloc
#endif
Expand All @@ -93,12 +85,8 @@ class c_malloc_allocator
n = 1;
}
void *p =
#if defined(__has_builtin)
#if __has_builtin(__builtin_calloc)
#if FAST_IO_HAS_BUILTIN(__builtin_calloc)
__builtin_calloc
#else
::std::calloc
#endif
#else
::std::calloc
#endif
Expand Down Expand Up @@ -142,12 +130,8 @@ class c_malloc_allocator
if (alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__)
{
p =
#if defined(__has_builtin)
#if __has_builtin(__builtin_malloc)
#if FAST_IO_HAS_BUILTIN(__builtin_malloc)
__builtin_malloc
#else
::std::malloc
#endif
#else
::std::malloc
#endif
Expand Down Expand Up @@ -175,12 +159,8 @@ class c_malloc_allocator
if (alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__)
{
p =
#if defined(__has_builtin)
#if __has_builtin(__builtin_realloc)
#if FAST_IO_HAS_BUILTIN(__builtin_realloc)
__builtin_realloc
#else
::std::realloc
#endif
#else
::std::realloc
#endif
Expand All @@ -205,12 +185,8 @@ class c_malloc_allocator
}
if (alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__)
{
#if defined(__has_builtin)
#if __has_builtin(__builtin_free)
#if FAST_IO_HAS_BUILTIN(__builtin_free)
__builtin_free
#else
::std::free
#endif
#else
::std::free
#endif
Expand All @@ -228,12 +204,8 @@ class c_malloc_allocator
{
return;
}
#if defined(__has_builtin)
#if __has_builtin(__builtin_free)
__builtin_free
#else
::std::free
#endif
#if FAST_IO_HAS_BUILTIN(__builtin_free)
__builtin_free
#else
::std::free
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ struct gdi_teb_batch
::std::uint_least32_t Buffer[310];
};

// NOLINTBEGIN(*-optin.performance.Padding)
struct teb
{
nt_tib NtTib;
Expand Down Expand Up @@ -265,6 +266,7 @@ struct teb
::std::uint_least32_t SpinCallCount;
::std::uint_least64_t ExtendedFeatureDisableMask;
};
// NOLINTEND(*-optin.performance.Padding)

FAST_IO_DLLIMPORT FAST_IO_GNU_MALLOC void *FAST_IO_WINSTDCALL RtlAllocateHeap(void *, ::std::uint_least32_t, ::std::size_t) noexcept FAST_IO_WINSTDCALL_RENAME(RtlAllocateHeap, 12);
FAST_IO_DLLIMPORT char unsigned FAST_IO_WINSTDCALL RtlFreeHeap(void *, ::std::uint_least32_t, void *) noexcept FAST_IO_WINSTDCALL_RENAME(RtlFreeHeap, 12);
Expand Down
16 changes: 8 additions & 8 deletions include/fast_io_core_impl/codecvt/general.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ general_code_cvt(src_char_type const *src_first, src_char_type const *src_last,
else
{
#if (defined(_MSC_VER) && defined(_M_AMD64) && !defined(__clang__)) || \
(defined(__SSE__) && defined(__x86_64__) && __cpp_lib_is_constant_evaluated >= 201811L)
(defined(__SSE__) && defined(__SSE2__) && defined(__x86_64__) && __cpp_lib_is_constant_evaluated >= 201811L)
if constexpr (src_encoding != encoding_scheme::utf_ebcdic && encoding != encoding_scheme::utf_ebcdic &&
1 == sizeof(src_char_type) && (1 == sizeof(dest_char_type) || encoding_is_utf(encoding)))
{
Expand Down Expand Up @@ -221,7 +221,7 @@ general_code_cvt(src_char_type const *src_first, src_char_type const *src_last,
}
else
{
dst += get_utf_code_units<encoding>(code, dst);
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
}
}
else
Expand All @@ -235,7 +235,7 @@ general_code_cvt(src_char_type const *src_first, src_char_type const *src_last,
}
else
{
dst += get_utf_code_units<encoding>(code, dst);
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
}
}
}
Expand Down Expand Up @@ -265,7 +265,7 @@ general_code_cvt(src_char_type const *src_first, src_char_type const *src_last,
}
else
{
dst += get_utf_code_units<encoding>(code, dst);
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
}
}
else
Expand Down Expand Up @@ -309,7 +309,7 @@ general_code_cvt(src_char_type const *src_first, src_char_type const *src_last,
}
else
{
dst += get_utf_code_units<encoding>(code, dst);
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
}
}
else
Expand All @@ -331,7 +331,7 @@ general_code_cvt(src_char_type const *src_first, src_char_type const *src_last,
}
else
{
dst += get_utf_code_units<encoding>(code, dst);
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
}
}
}
Expand Down Expand Up @@ -468,7 +468,7 @@ inline constexpr dest_char_type *general_code_cvt(state_type &__restrict state,
}
else
{
dst += get_utf_code_units<encoding>(code, dst);
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
}
src_first += static_cast<::std::size_t>(bytes_src - bytes - state_size);
}
Expand All @@ -492,7 +492,7 @@ inline constexpr dest_char_type *general_code_cvt(state_type &__restrict state,
}
else
{
dst += get_utf_code_units<encoding>(code, dst);
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
}
src_first += static_cast<::std::size_t>(static_cast<::std::size_t>(adv) - state_size);
}
Expand Down
6 changes: 3 additions & 3 deletions include/fast_io_core_impl/codecvt/utf.h
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ inline constexpr char32_t utf16_surrogate_to_utf32(char16_t high, char16_t low)
return static_cast<char32_t>((static_cast<::std::uint_least32_t>(high) << 10u) + low - 0x35fdc00u);
}

#if (defined(_MSC_VER) && defined(_M_AMD64) && !defined(__clang__)) || (defined(__SSE__) && defined(__x86_64__))
#if (defined(_MSC_VER) && defined(_M_AMD64) && !defined(__clang__)) || (defined(__SSE__) && defined(__SSE2__) && defined(__x86_64__))
template <::std::integral T, ::std::integral U>
requires((sizeof(T) == 1) && (sizeof(U) == 1 || sizeof(U) == 2 || sizeof(U) == 4))
inline code_cvt_result<T, U> convert_ascii_with_sse(T const *__restrict pSrc, U *__restrict pDst) noexcept
Expand All @@ -520,7 +520,7 @@ inline code_cvt_result<T, U> convert_ascii_with_sse(T const *__restrict pSrc, U
x86_64_v16qi chunk;
__builtin_memcpy(__builtin_addressof(chunk), pSrc, m128i_size);
mask = static_cast<::std::uint_least32_t>(__builtin_ia32_pmovmskb128(chunk));
#if __has_builtin(__builtin_shufflevector)
#if FAST_IO_HAS_BUILTIN(__builtin_shufflevector)
x86_64_v16qi half{__builtin_shufflevector(chunk, zero, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 16 + 4, 5,
16 + 5, 6, 16 + 6, 7, 16 + 7)};
__builtin_memcpy(pDst, __builtin_addressof(half), m128i_size);
Expand All @@ -539,7 +539,7 @@ inline code_cvt_result<T, U> convert_ascii_with_sse(T const *__restrict pSrc, U
x86_64_v16qi chunk;
__builtin_memcpy(__builtin_addressof(chunk), pSrc, m128i_size);
mask = static_cast<::std::uint_least32_t>(__builtin_ia32_pmovmskb128(chunk));
#if __has_builtin(__builtin_shufflevector)
#if FAST_IO_HAS_BUILTIN(__builtin_shufflevector)
x86_64_v16qi half_result{__builtin_shufflevector(chunk, zero, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,
16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7)};
x86_64_v8hi half;
Expand Down
36 changes: 7 additions & 29 deletions include/fast_io_core_impl/freestanding/algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,43 +243,31 @@ namespace fast_io::freestanding
{

inline
#if defined(__has_builtin)
#if __has_builtin(__builtin_memcpy)
#if FAST_IO_HAS_BUILTIN(__builtin_memcpy)
constexpr
#endif
#endif
void *
my_memcpy(void *dest, void const *src, ::std::size_t count) noexcept
{
return
#if defined(__has_builtin)
#if __has_builtin(__builtin_memcpy)
#if FAST_IO_HAS_BUILTIN(__builtin_memcpy)
__builtin_memcpy
#else
::std::memcpy
#endif
#else
::std::memcpy
#endif
(dest, src, count);
}

inline
#if defined(__has_builtin)
#if __has_builtin(__builtin_memmove)
#if FAST_IO_HAS_BUILTIN(__builtin_memmove)
constexpr
#endif
#endif
void *
my_memmove(void *dest, void const *src, ::std::size_t count) noexcept
{
return
#if defined(__has_builtin)
#if __has_builtin(__builtin_memmove)
#if FAST_IO_HAS_BUILTIN(__builtin_memmove)
__builtin_memmove
#else
::std::memmove
#endif
#else
::std::memmove
#endif
Expand All @@ -289,34 +277,24 @@ inline
inline void *my_memset(void *dest, int ch, ::std::size_t count) noexcept
{
return
#if defined(__has_builtin)
#if __has_builtin(__builtin_memset)
#if FAST_IO_HAS_BUILTIN(__builtin_memset)
__builtin_memset
#else
::std::memset
#endif
#else
::std::memset
#endif
(dest, ch, count);
}

inline
#if defined(__has_builtin)
#if __has_builtin(__builtin_memcmp)
#if FAST_IO_HAS_BUILTIN(__builtin_memcmp)
constexpr
#endif
#endif
int
my_memcmp(void const *dest, void const *src, ::std::size_t count) noexcept
{
return
#if defined(__has_builtin)
#if __has_builtin(__builtin_memcmp)
#if FAST_IO_HAS_BUILTIN(__builtin_memcmp)
__builtin_memcmp
#else
::std::memcmp
#endif
#else
::std::memcmp
#endif
Expand Down
8 changes: 4 additions & 4 deletions include/fast_io_core_impl/freestanding/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ struct allocator
{
__builtin_trap();
}
#if __has_builtin(__builtin_operator_new)
return static_cast<T *>(__builtin_operator_new(n * sizeof(T)));
#if FAST_IO_HAS_BUILTIN(__builtin_operator_new)
return static_cast<T *>(__builtin_operator_new(n * sizeof(T)));
#else
__builtin_trap();
return nullptr;
#endif
}
inline constexpr void deallocate([[maybe_unused]] T *ptr, [[maybe_unused]] ::std::size_t n) noexcept
{
#if __has_builtin(__builtin_operator_delete)
__builtin_operator_delete(ptr, sizeof(T) * n);
#if FAST_IO_HAS_BUILTIN(__builtin_operator_delete)
__builtin_operator_delete(ptr, sizeof(T) * n);
#endif
}
};
Expand Down
Loading