From 804bde962de4819138951aed24b2c8ba768d7344 Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Wed, 15 Apr 2026 18:04:29 +0200 Subject: [PATCH] libstdc++: Implement P4012R1 while reverting P3844R2 (consteval simd broadcast) P3844R2 added consteval conversion for value-preserving conversion from constants. It had been approved by LEWG in Kona. Therefore, the current implementation has the consteval broadcast constructor. In Croydon, LEWG reversed the decision but changed the overload set to keep the design space open for C++29. This patch implements the removal of the consteval constructor and changes the broadcast constructor according to P4012R1, to keep the design space open. libstdc++-v3/ChangeLog: * include/bits/simd_details.h (__value_preserving_cast): Remove. * include/bits/simd_mask.h (basic_mask): Replace plain 0 and 1 literals with cw<0> and cw<1>. Replace explicit basic_vec construction from 0 and 1 with default init and broadcast from _Up(1). (_M_to_uint): Replace 1 with cw<1>. * include/bits/simd_vec.h (basic_vec): Remove consteval broadcast overload. Remove explicit broadcast from non-value-preserving types. * testsuite/std/simd/arithmetic.cc: Replace ill-formed integer literals with explicit cast to T or use cw. * testsuite/std/simd/mask.cc: Likewise. * testsuite/std/simd/simd_alg.cc: Likewise. * testsuite/std/simd/traits_common.cc: Adjust for resulting traits changes. * testsuite/std/simd/traits_math.cc: Likewise. Signed-off-by: Matthias Kretz --- libstdc++-v3/include/bits/simd_details.h | 9 ----- libstdc++-v3/include/bits/simd_mask.h | 17 ++++----- libstdc++-v3/include/bits/simd_vec.h | 22 +++--------- libstdc++-v3/testsuite/std/simd/arithmetic.cc | 35 ++++++++++--------- libstdc++-v3/testsuite/std/simd/mask.cc | 6 ++-- libstdc++-v3/testsuite/std/simd/simd_alg.cc | 5 ++- .../testsuite/std/simd/traits_common.cc | 4 +-- .../testsuite/std/simd/traits_math.cc | 2 +- 8 files changed, 39 insertions(+), 61 deletions(-) diff --git a/libstdc++-v3/include/bits/simd_details.h b/libstdc++-v3/include/bits/simd_details.h index 31bd6ac45ab..3880d8a1916 100644 --- a/libstdc++-v3/include/bits/simd_details.h +++ b/libstdc++-v3/include/bits/simd_details.h @@ -1241,15 +1241,6 @@ namespace simd return static_cast<_To>(__x); } - template - concept __simd_vec_bcast_consteval - = __explicitly_convertible_to<_From, _To> - && is_arithmetic_v> && convertible_to<_From, _To> - && !__value_preserving_convertible_to, _To> - && (is_same_v, _To> - || (is_same_v, int> && is_integral_v<_To>) - || (is_same_v, unsigned> && unsigned_integral<_To>)); - /** @internal * std::pair is not trivially copyable, this one is */ diff --git a/libstdc++-v3/include/bits/simd_mask.h b/libstdc++-v3/include/bits/simd_mask.h index 27eff4ca01a..0a7cfa03ced 100644 --- a/libstdc++-v3/include/bits/simd_mask.h +++ b/libstdc++-v3/include/bits/simd_mask.h @@ -865,11 +865,12 @@ namespace simd using _Ip = typename _VecType::value_type; _VecType __v0 = _Ip(__val); constexpr int __bits_per_element = sizeof(_Ip) * __CHAR_BIT__; - constexpr _VecType __pow2 = _VecType(1) << (__iota<_VecType> % __bits_per_element); + constexpr _VecType __pow2 = _VecType(cw<1>) + << (__iota<_VecType> % cw<__bits_per_element>); if constexpr (_S_size < __bits_per_element) - return ((__v0 & __pow2) > 0)._M_concat_data(); + return ((__v0 & __pow2) > cw<0>)._M_concat_data(); else if constexpr (_S_size == __bits_per_element) - return ((__v0 & __pow2) != 0)._M_concat_data(); + return ((__v0 & __pow2) != cw<0>)._M_concat_data(); else { static_assert(_Bytes == 1); @@ -886,7 +887,7 @@ namespace simd }; __v1 *= 0x0101'0101'0101'0101ull; __v0 = __builtin_bit_cast(_VecType, __v1); - return ((__v0 & __pow2) != 0)._M_data; + return ((__v0 & __pow2) != cw<0>)._M_data; } else { @@ -895,7 +896,7 @@ namespace simd __v0 = _VecType::_S_static_permute(__v1, [](int __i) { return __i / __CHAR_BIT__; }); - return ((__v0 & __pow2) != 0)._M_data; + return ((__v0 & __pow2) != cw<0>)._M_data; } } } @@ -991,7 +992,7 @@ namespace simd else { using _UV = basic_vec<_Up, _UAbi>; - return __select_impl(static_cast<_UV::mask_type>(*this), _UV(1), _UV(0)); + return __select_impl(static_cast<_UV::mask_type>(*this), _Up(1), _UV()); } } @@ -1066,7 +1067,7 @@ namespace simd constexpr int __n = _IV::size(); if constexpr (_Bytes * __CHAR_BIT__ >= __n) // '1 << __iota' cannot overflow { // reduce(select(k, powers_of_2, 0)) - constexpr _IV __pow2 = _IV(1) << __iota<_IV>; + constexpr _IV __pow2 = _IV(cw<1>) << __iota<_IV>; return _Ur(_U0(__select_impl(__k, __pow2, _IV()) ._M_reduce(bit_or<>()))) << _Offset; } @@ -1079,7 +1080,7 @@ namespace simd } else { // limit powers_of_2 to 1, 2, 4, ..., 128 - constexpr _IV __pow2 = _IV(1) << (__iota<_IV> % _IV(__CHAR_BIT__)); + constexpr _IV __pow2 = _IV(cw<1>) << (__iota<_IV> % _IV(cw<__CHAR_BIT__>)); _IV __x = __select_impl(__k, __pow2, _IV()); // partial reductions of 8 neighboring elements __x |= _IV::_S_static_permute(__x, _SwapNeighbors<4>()); diff --git a/libstdc++-v3/include/bits/simd_vec.h b/libstdc++-v3/include/bits/simd_vec.h index 2574bf7c3ad..5f3bd7fd2f6 100644 --- a/libstdc++-v3/include/bits/simd_vec.h +++ b/libstdc++-v3/include/bits/simd_vec.h @@ -1135,20 +1135,13 @@ namespace simd * * @note The constructor is implicit if the conversion (if any) is value-preserving. */ - template <__explicitly_convertible_to _Up> + template <__broadcast_constructible _Up> [[__gnu__::__always_inline__]] - constexpr explicit(!__broadcast_constructible<_Up, value_type>) + constexpr basic_vec(_Up&& __x) noexcept : _M_data(_DataType() == _DataType() ? static_cast(__x) : value_type()) {} - template <__simd_vec_bcast_consteval _Up> - consteval - basic_vec(_Up&& __x) - : _M_data(_DataType() == _DataType() - ? __value_preserving_cast(__x) : value_type()) - {} - // [simd.ctor] conversion constructor ----------------------------------- template requires (_S_size == _UAbi::_S_size) @@ -2037,20 +2030,13 @@ namespace simd { return _M_concat_data(); } // [simd.ctor] broadcast constructor ------------------------------------ - template <__explicitly_convertible_to _Up> + template <__broadcast_constructible _Up> [[__gnu__::__always_inline__]] - constexpr explicit(!__broadcast_constructible<_Up, value_type>) + constexpr basic_vec(_Up&& __x) noexcept : _M_data0(static_cast(__x)), _M_data1(static_cast(__x)) {} - template <__simd_vec_bcast_consteval _Up> - consteval - basic_vec(_Up&& __x) - : _M_data0(__value_preserving_cast(__x)), - _M_data1(__value_preserving_cast(__x)) - {} - // [simd.ctor] conversion constructor ----------------------------------- template requires (_S_size == _UAbi::_S_size) diff --git a/libstdc++-v3/testsuite/std/simd/arithmetic.cc b/libstdc++-v3/testsuite/std/simd/arithmetic.cc index e662a26866c..adf94f0a87f 100644 --- a/libstdc++-v3/testsuite/std/simd/arithmetic.cc +++ b/libstdc++-v3/testsuite/std/simd/arithmetic.cc @@ -88,7 +88,8 @@ template }; ADD_TEST(multiplication) { - std::tuple {V(), V(RealV(1), RealV()), V(RealV(), RealV(1)), init_vec}, + std::tuple {V(), V(RealV(Real(1)), RealV()), V(RealV(), RealV(Real(1))), + init_vec}, [](auto& t, V x, V one, V I, V z) { t.verify_equal(x * x, x); t.verify_equal(x * z, x); @@ -181,11 +182,11 @@ template t.verify_equal(y, x - T(1)); t.verify_equal(x - x, y); t.verify_equal(x = z - x, init_vec); - t.verify_equal(x = z - x, V(1)); + t.verify_equal(x = z - x, T(1)); t.verify_equal(z -= x, init_vec); t.verify_equal(z, init_vec); - t.verify_equal(z -= z, V(0)); - t.verify_equal(z, V(0)); + t.verify_equal(z -= z, V()); + t.verify_equal(z, V()); } }; @@ -259,10 +260,10 @@ template ADD_TEST(divide0, std::is_floating_point_v && !is_iec559) { std::tuple{T(2), init_vec}, [](auto& t, V x, V y) { - t.verify_equal_to_ulp(x / x, V(T(1)), 1); - t.verify_equal_to_ulp(T(3) / x, V(T(3) / T(2)), 1); - t.verify_equal_to_ulp(x / T(3), V(T(2) / T(3)), 1); - t.verify_equal_to_ulp(y / x, init_vec, 1); + t.verify_equal_to_ulp(x / x, V(T(1)), std::cw<1>); + t.verify_equal_to_ulp(T(3) / x, V(T(3) / T(2)), std::cw<1>); + t.verify_equal_to_ulp(x / T(3), V(T(2) / T(3)), std::cw<1>); + t.verify_equal_to_ulp(y / x, init_vec, std::cw<1>); } }; @@ -272,18 +273,18 @@ template [](auto& t, V a) { V b = std::cw<2>; V ref([&](int i) { return a[i] / 2; }); - t.verify_equal_to_ulp(a / b, ref, 1); + t.verify_equal_to_ulp(a / b, ref, std::cw<1>); a = select(a == std::cw<0>, T(1), a); // -freciprocal-math together with flush-to-zero makes // the following range restriction necessary (i.e. // 1/|a| must be >= min). Intel vrcpps and vrcp14ps // need some extra slack (use 1.1 instead of 1). a = select(fabs(a) >= T(1.1) / norm_min, T(1), a); - t.verify_equal_to_ulp(a / a, V(1), 1)("\na = ", a); + t.verify_equal_to_ulp(a / a, V(std::cw<1>), std::cw<1>)("\na = ", a); ref = V([&](int i) { return 2 / a[i]; }); - t.verify_equal_to_ulp(b / a, ref, 1)("\na = ", a); - t.verify_equal_to_ulp(b /= a, ref, 1); - t.verify_equal_to_ulp(b, ref, 1); + t.verify_equal_to_ulp(b / a, ref, std::cw<1>)("\na = ", a); + t.verify_equal_to_ulp(b /= a, ref, std::cw<1>); + t.verify_equal_to_ulp(b, ref, std::cw<1>); } }; @@ -291,15 +292,15 @@ template std::tuple{T(2), init_vec, init_vec, init_vec, init_vec}, [](auto& t, V x, V y, V z, V a, V b) { - t.verify_equal(x / x, V(1)); - t.verify_equal(T(3) / x, V(T(3) / T(2))); - t.verify_equal(x / T(3), V(T(2) / T(3))); + t.verify_equal(x / x, T(1)); + t.verify_equal(T(3) / x, T(T(3) / T(2))); + t.verify_equal(x / T(3), T(T(2) / T(3))); t.verify_equal(y / x, init_vec); V ref = init_vec; t.verify_equal(z / x, ref); ref = init_vec; t.verify_equal(a / x, ref); - t.verify_equal(b / b, V(1)); + t.verify_equal(b / b, T(1)); ref = init_vec; t.verify_equal(x / b, ref); t.verify_equal(x /= b, ref); diff --git a/libstdc++-v3/testsuite/std/simd/mask.cc b/libstdc++-v3/testsuite/std/simd/mask.cc index ebd9dc7728d..117cf37beaa 100644 --- a/libstdc++-v3/testsuite/std/simd/mask.cc +++ b/libstdc++-v3/testsuite/std/simd/mask.cc @@ -54,14 +54,14 @@ template return i % 13 == 0 || i % 7 == 0; })}, [](auto& t, const M k, const M tr, const M fa, const M k2) { - t.verify_equal(V(+tr), V(1)); + t.verify_equal(V(+tr), T(1)); t.verify_equal(V(+fa), V()); t.verify_equal(V(+k), init_vec); if constexpr (std::is_integral_v) { - t.verify_equal(V(~tr), ~V(1)); - t.verify_equal(V(~fa), ~V(0)); + t.verify_equal(V(~tr), ~V(std::cw<1>)); + t.verify_equal(V(~fa), ~V()); t.verify_equal(V(~k), ~init_vec); } diff --git a/libstdc++-v3/testsuite/std/simd/simd_alg.cc b/libstdc++-v3/testsuite/std/simd/simd_alg.cc index 03ba83de8d4..8cfd61802e1 100644 --- a/libstdc++-v3/testsuite/std/simd/simd_alg.cc +++ b/libstdc++-v3/testsuite/std/simd/simd_alg.cc @@ -13,8 +13,7 @@ template using M = typename V::mask_type; using pair = std::pair; - static constexpr std::conditional_t, short, T> x_max - = test_iota_max; + static constexpr T x_max = test_iota_max; static constexpr int x_max_int = static_cast(x_max); static constexpr V @@ -26,7 +25,7 @@ template return static_cast(std::to_underlying(x_max) - static_cast(x)); } else - return x_max - x; + return std::cw - x; } ADD_TEST(Select) { diff --git a/libstdc++-v3/testsuite/std/simd/traits_common.cc b/libstdc++-v3/testsuite/std/simd/traits_common.cc index ecbceef06bf..a026e359bac 100644 --- a/libstdc++-v3/testsuite/std/simd/traits_common.cc +++ b/libstdc++-v3/testsuite/std/simd/traits_common.cc @@ -59,7 +59,7 @@ namespace test02 // ensure 'true ? int : vec' doesn't work template concept has_type_member = requires { typename T::type; }; - static_assert(has_type_member>>); + static_assert(!has_type_member>>); } #if defined __AVX__ && !defined __AVX2__ @@ -90,7 +90,7 @@ static_assert( std::convertible_to, simd::vec>); static_assert(!std::convertible_to, simd::vec>); static_assert(!std::convertible_to, simd::vec>); static_assert(!std::convertible_to, simd::vec>); -static_assert( std::convertible_to>); +static_assert(!std::convertible_to>); static_assert( std::convertible_to, simd::vec>); template diff --git a/libstdc++-v3/testsuite/std/simd/traits_math.cc b/libstdc++-v3/testsuite/std/simd/traits_math.cc index fc71ff9e359..1f90cf34c2e 100644 --- a/libstdc++-v3/testsuite/std/simd/traits_math.cc +++ b/libstdc++-v3/testsuite/std/simd/traits_math.cc @@ -25,7 +25,7 @@ namespace math_tests concept has_deduced_vec = requires { typename simd::__deduced_vec_t; }; static_assert(!has_common_type); - static_assert( has_common_type); + static_assert(!has_common_type); template struct holder